ia64/linux-2.6.18-xen.hg

view drivers/pci/setup-bus.c @ 696:2b5cc22ab406

xen/dom0: Reassign memory resources to device for pci passthrough.

This patch adds the function that reassign page-aligned memory
resources, to dom0 linux. The function is useful when we assign I/O
device to HVM domain using pci passthrough.

When we assign a device to HVM domain using pci passthrough,
the device needs to be assigned page-aligned memory resources. If the
memory resource is not page-aligned, following error occurs.

Error: pci: 0000:00:1d.7: non-page-aligned MMIO BAR found.

On many system, BIOS assigns memory resources to the device and
enables it. So my patch disables the device, and releases resources,
Then it assigns page-aligned memory resource to the device.

To reassign resources, please add boot parameters of dom0 linux as
follows.

reassign_resources reassigndev=00:1d.7,01:00.0

reassign_resources
Enables reassigning resources.

reassigndev= Specifies devices include I/O device and
PCI-PCI
bridge to reassign resources. PCI-PCI bridge
can be specified, if resource windows need to
be expanded.

Signed-off-by: Yuji Shimada <shimada-yxb@necst.nec.co.jp>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Oct 09 11:10:43 2008 +0100 (2008-10-09)
parents 831230e53067
children 16c549b3ba59
line source
1 /*
2 * drivers/pci/setup-bus.c
3 *
4 * Extruded from code written by
5 * Dave Rusling (david.rusling@reo.mts.dec.com)
6 * David Mosberger (davidm@cs.arizona.edu)
7 * David Miller (davem@redhat.com)
8 *
9 * Support routines for initializing a PCI subsystem.
10 */
12 /*
13 * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
14 * PCI-PCI bridges cleanup, sorted resource allocation.
15 * Feb 2002, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
16 * Converted to allocation in 3 passes, which gives
17 * tighter packing. Prefetchable range support.
18 */
20 #include <linux/init.h>
21 #include <linux/kernel.h>
22 #include <linux/module.h>
23 #include <linux/pci.h>
24 #include <linux/errno.h>
25 #include <linux/ioport.h>
26 #include <linux/cache.h>
27 #include <linux/slab.h>
29 #include "pci.h"
31 #define DEBUG_CONFIG 1
32 #if DEBUG_CONFIG
33 #define DBG(x...) printk(x)
34 #else
35 #define DBG(x...)
36 #endif
38 #define ROUND_UP(x, a) (((x) + (a) - 1) & ~((a) - 1))
40 /*
41 * FIXME: IO should be max 256 bytes. However, since we may
42 * have a P2P bridge below a cardbus bridge, we need 4K.
43 */
44 #define CARDBUS_IO_SIZE (256)
45 #define CARDBUS_MEM_SIZE (32*1024*1024)
47 static void __devinit
48 pbus_assign_resources_sorted(struct pci_bus *bus)
49 {
50 struct pci_dev *dev;
51 struct resource *res;
52 struct resource_list head, *list, *tmp;
53 int idx;
55 head.next = NULL;
56 list_for_each_entry(dev, &bus->devices, bus_list) {
57 u16 class = dev->class >> 8;
59 /* Don't touch classless devices or host bridges or ioapics. */
60 if (class == PCI_CLASS_NOT_DEFINED ||
61 class == PCI_CLASS_BRIDGE_HOST ||
62 class == PCI_CLASS_SYSTEM_PIC)
63 continue;
65 pdev_sort_resources(dev, &head);
66 }
68 for (list = head.next; list;) {
69 res = list->res;
70 idx = res - &list->dev->resource[0];
71 if (pci_assign_resource(list->dev, idx)) {
72 res->start = 0;
73 res->end = 0;
74 res->flags = 0;
75 }
76 tmp = list;
77 list = list->next;
78 kfree(tmp);
79 }
80 }
82 void pci_setup_cardbus(struct pci_bus *bus)
83 {
84 struct pci_dev *bridge = bus->self;
85 struct pci_bus_region region;
87 printk("PCI: Bus %d, cardbus bridge: %s\n",
88 bus->number, pci_name(bridge));
90 pcibios_resource_to_bus(bridge, &region, bus->resource[0]);
91 if (bus->resource[0]->flags & IORESOURCE_IO) {
92 /*
93 * The IO resource is allocated a range twice as large as it
94 * would normally need. This allows us to set both IO regs.
95 */
96 printk(" IO window: %08lx-%08lx\n",
97 region.start, region.end);
98 pci_write_config_dword(bridge, PCI_CB_IO_BASE_0,
99 region.start);
100 pci_write_config_dword(bridge, PCI_CB_IO_LIMIT_0,
101 region.end);
102 }
104 pcibios_resource_to_bus(bridge, &region, bus->resource[1]);
105 if (bus->resource[1]->flags & IORESOURCE_IO) {
106 printk(" IO window: %08lx-%08lx\n",
107 region.start, region.end);
108 pci_write_config_dword(bridge, PCI_CB_IO_BASE_1,
109 region.start);
110 pci_write_config_dword(bridge, PCI_CB_IO_LIMIT_1,
111 region.end);
112 }
114 pcibios_resource_to_bus(bridge, &region, bus->resource[2]);
115 if (bus->resource[2]->flags & IORESOURCE_MEM) {
116 printk(" PREFETCH window: %08lx-%08lx\n",
117 region.start, region.end);
118 pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_0,
119 region.start);
120 pci_write_config_dword(bridge, PCI_CB_MEMORY_LIMIT_0,
121 region.end);
122 }
124 pcibios_resource_to_bus(bridge, &region, bus->resource[3]);
125 if (bus->resource[3]->flags & IORESOURCE_MEM) {
126 printk(" MEM window: %08lx-%08lx\n",
127 region.start, region.end);
128 pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_1,
129 region.start);
130 pci_write_config_dword(bridge, PCI_CB_MEMORY_LIMIT_1,
131 region.end);
132 }
133 }
134 EXPORT_SYMBOL(pci_setup_cardbus);
136 /* Initialize bridges with base/limit values we have collected.
137 PCI-to-PCI Bridge Architecture Specification rev. 1.1 (1998)
138 requires that if there is no I/O ports or memory behind the
139 bridge, corresponding range must be turned off by writing base
140 value greater than limit to the bridge's base/limit registers.
142 Note: care must be taken when updating I/O base/limit registers
143 of bridges which support 32-bit I/O. This update requires two
144 config space writes, so it's quite possible that an I/O window of
145 the bridge will have some undesirable address (e.g. 0) after the
146 first write. Ditto 64-bit prefetchable MMIO. */
147 static void __devinit
148 pci_setup_bridge(struct pci_bus *bus)
149 {
150 struct pci_dev *bridge = bus->self;
151 struct pci_bus_region region;
152 u32 l, io_upper16;
154 DBG(KERN_INFO "PCI: Bridge: %s\n", pci_name(bridge));
156 /* Set up the top and bottom of the PCI I/O segment for this bus. */
157 pcibios_resource_to_bus(bridge, &region, bus->resource[0]);
158 if (bus->resource[0]->flags & IORESOURCE_IO) {
159 pci_read_config_dword(bridge, PCI_IO_BASE, &l);
160 l &= 0xffff0000;
161 l |= (region.start >> 8) & 0x00f0;
162 l |= region.end & 0xf000;
163 /* Set up upper 16 bits of I/O base/limit. */
164 io_upper16 = (region.end & 0xffff0000) | (region.start >> 16);
165 DBG(KERN_INFO " IO window: %04lx-%04lx\n",
166 region.start, region.end);
167 }
168 else {
169 /* Clear upper 16 bits of I/O base/limit. */
170 io_upper16 = 0;
171 l = 0x00f0;
172 DBG(KERN_INFO " IO window: disabled.\n");
173 }
174 /* Temporarily disable the I/O range before updating PCI_IO_BASE. */
175 pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0x0000ffff);
176 /* Update lower 16 bits of I/O base/limit. */
177 pci_write_config_dword(bridge, PCI_IO_BASE, l);
178 /* Update upper 16 bits of I/O base/limit. */
179 pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, io_upper16);
181 /* Set up the top and bottom of the PCI Memory segment
182 for this bus. */
183 pcibios_resource_to_bus(bridge, &region, bus->resource[1]);
184 if (bus->resource[1]->flags & IORESOURCE_MEM) {
185 l = (region.start >> 16) & 0xfff0;
186 l |= region.end & 0xfff00000;
187 DBG(KERN_INFO " MEM window: %08lx-%08lx\n",
188 region.start, region.end);
189 }
190 else {
191 l = 0x0000fff0;
192 DBG(KERN_INFO " MEM window: disabled.\n");
193 }
194 pci_write_config_dword(bridge, PCI_MEMORY_BASE, l);
196 /* Clear out the upper 32 bits of PREF limit.
197 If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily
198 disables PREF range, which is ok. */
199 pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0);
201 /* Set up PREF base/limit. */
202 pcibios_resource_to_bus(bridge, &region, bus->resource[2]);
203 if (bus->resource[2]->flags & IORESOURCE_PREFETCH) {
204 l = (region.start >> 16) & 0xfff0;
205 l |= region.end & 0xfff00000;
206 DBG(KERN_INFO " PREFETCH window: %08lx-%08lx\n",
207 region.start, region.end);
208 }
209 else {
210 l = 0x0000fff0;
211 DBG(KERN_INFO " PREFETCH window: disabled.\n");
212 }
213 pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l);
215 /* Clear out the upper 32 bits of PREF base. */
216 pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, 0);
218 pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl);
219 }
221 /* Check whether the bridge supports optional I/O and
222 prefetchable memory ranges. If not, the respective
223 base/limit registers must be read-only and read as 0. */
224 static void __devinit
225 pci_bridge_check_ranges(struct pci_bus *bus)
226 {
227 u16 io;
228 u32 pmem;
229 struct pci_dev *bridge = bus->self;
230 struct resource *b_res;
232 b_res = &bridge->resource[PCI_BRIDGE_RESOURCES];
233 b_res[1].flags |= IORESOURCE_MEM;
235 pci_read_config_word(bridge, PCI_IO_BASE, &io);
236 if (!io) {
237 pci_write_config_word(bridge, PCI_IO_BASE, 0xf0f0);
238 pci_read_config_word(bridge, PCI_IO_BASE, &io);
239 pci_write_config_word(bridge, PCI_IO_BASE, 0x0);
240 }
241 if (io)
242 b_res[0].flags |= IORESOURCE_IO;
243 /* DECchip 21050 pass 2 errata: the bridge may miss an address
244 disconnect boundary by one PCI data phase.
245 Workaround: do not use prefetching on this device. */
246 if (bridge->vendor == PCI_VENDOR_ID_DEC && bridge->device == 0x0001)
247 return;
248 pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
249 if (!pmem) {
250 pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE,
251 0xfff0fff0);
252 pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
253 pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0);
254 }
255 if (pmem)
256 b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH;
257 }
259 /* Helper function for sizing routines: find first available
260 bus resource of a given type. Note: we intentionally skip
261 the bus resources which have already been assigned (that is,
262 have non-NULL parent resource). */
263 static struct resource * __devinit
264 find_free_bus_resource(struct pci_bus *bus, unsigned long type)
265 {
266 int i;
267 struct resource *r;
268 unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
269 IORESOURCE_PREFETCH;
271 for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
272 r = bus->resource[i];
273 if (r == &ioport_resource || r == &iomem_resource)
274 continue;
275 if (r && (r->flags & type_mask) == type && !r->parent)
276 return r;
277 }
278 return NULL;
279 }
281 /* Sizing the IO windows of the PCI-PCI bridge is trivial,
282 since these windows have 4K granularity and the IO ranges
283 of non-bridge PCI devices are limited to 256 bytes.
284 We must be careful with the ISA aliasing though. */
285 static void __devinit
286 pbus_size_io(struct pci_bus *bus)
287 {
288 struct pci_dev *dev;
289 struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
290 unsigned long size = 0, size1 = 0;
292 if (!b_res)
293 return;
295 list_for_each_entry(dev, &bus->devices, bus_list) {
296 int i;
298 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
299 struct resource *r = &dev->resource[i];
300 unsigned long r_size;
302 if (r->parent || !(r->flags & IORESOURCE_IO))
303 continue;
304 r_size = r->end - r->start + 1;
306 if (r_size < 0x400)
307 /* Might be re-aligned for ISA */
308 size += r_size;
309 else
310 size1 += r_size;
311 }
312 }
313 /* To be fixed in 2.5: we should have sort of HAVE_ISA
314 flag in the struct pci_bus. */
315 #if defined(CONFIG_ISA) || defined(CONFIG_EISA)
316 size = (size & 0xff) + ((size & ~0xffUL) << 2);
317 #endif
318 size = ROUND_UP(size + size1, 4096);
319 if (!size) {
320 b_res->flags = 0;
321 return;
322 }
323 /* Alignment of the IO window is always 4K */
324 b_res->start = 4096;
325 b_res->end = b_res->start + size - 1;
326 }
328 /* Calculate the size of the bus and minimal alignment which
329 guarantees that all child resources fit in this size. */
330 static int __devinit
331 pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long type)
332 {
333 struct pci_dev *dev;
334 unsigned long min_align, align, size;
335 unsigned long aligns[12]; /* Alignments from 1Mb to 2Gb */
336 int order, max_order;
337 struct resource *b_res = find_free_bus_resource(bus, type);
339 if (!b_res)
340 return 0;
342 memset(aligns, 0, sizeof(aligns));
343 max_order = 0;
344 size = 0;
346 list_for_each_entry(dev, &bus->devices, bus_list) {
347 int i;
348 int reassign = reassign_resources ? is_reassigndev(dev) : 0;
350 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
351 struct resource *r = &dev->resource[i];
352 unsigned long r_size;
354 if (r->parent || (r->flags & mask) != type)
355 continue;
356 r_size = r->end - r->start + 1;
358 if (reassign) {
359 r_size = ROUND_UP_TO_PAGESIZE(r_size);
360 }
362 /* For bridges size != alignment */
363 align = (i < PCI_BRIDGE_RESOURCES) ? r_size : r->start;
364 order = __ffs(align) - 20;
365 if (order > 11) {
366 printk(KERN_WARNING "PCI: region %s/%d "
367 "too large: %llx-%llx\n",
368 pci_name(dev), i,
369 (unsigned long long)r->start,
370 (unsigned long long)r->end);
371 r->flags = 0;
372 continue;
373 }
374 size += r_size;
375 if (order < 0)
376 order = 0;
377 /* Exclude ranges with size > align from
378 calculation of the alignment. */
379 if (r_size == align)
380 aligns[order] += align;
381 if (order > max_order)
382 max_order = order;
383 }
384 }
386 align = 0;
387 min_align = 0;
388 for (order = 0; order <= max_order; order++) {
389 unsigned long align1 = 1UL << (order + 20);
391 if (!align)
392 min_align = align1;
393 else if (ROUND_UP(align + min_align, min_align) < align1)
394 min_align = align1 >> 1;
395 align += aligns[order];
396 }
397 size = ROUND_UP(size, min_align);
398 if (!size) {
399 b_res->flags = 0;
400 return 1;
401 }
402 b_res->start = min_align;
403 b_res->end = size + min_align - 1;
404 return 1;
405 }
407 static void __devinit
408 pci_bus_size_cardbus(struct pci_bus *bus)
409 {
410 struct pci_dev *bridge = bus->self;
411 struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES];
412 u16 ctrl;
414 /*
415 * Reserve some resources for CardBus. We reserve
416 * a fixed amount of bus space for CardBus bridges.
417 */
418 b_res[0].start = CARDBUS_IO_SIZE;
419 b_res[0].end = b_res[0].start + CARDBUS_IO_SIZE - 1;
420 b_res[0].flags |= IORESOURCE_IO;
422 b_res[1].start = CARDBUS_IO_SIZE;
423 b_res[1].end = b_res[1].start + CARDBUS_IO_SIZE - 1;
424 b_res[1].flags |= IORESOURCE_IO;
426 /*
427 * Check whether prefetchable memory is supported
428 * by this bridge.
429 */
430 pci_read_config_word(bridge, PCI_CB_BRIDGE_CONTROL, &ctrl);
431 if (!(ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0)) {
432 ctrl |= PCI_CB_BRIDGE_CTL_PREFETCH_MEM0;
433 pci_write_config_word(bridge, PCI_CB_BRIDGE_CONTROL, ctrl);
434 pci_read_config_word(bridge, PCI_CB_BRIDGE_CONTROL, &ctrl);
435 }
437 /*
438 * If we have prefetchable memory support, allocate
439 * two regions. Otherwise, allocate one region of
440 * twice the size.
441 */
442 if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0) {
443 b_res[2].start = CARDBUS_MEM_SIZE;
444 b_res[2].end = b_res[2].start + CARDBUS_MEM_SIZE - 1;
445 b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH;
447 b_res[3].start = CARDBUS_MEM_SIZE;
448 b_res[3].end = b_res[3].start + CARDBUS_MEM_SIZE - 1;
449 b_res[3].flags |= IORESOURCE_MEM;
450 } else {
451 b_res[3].start = CARDBUS_MEM_SIZE * 2;
452 b_res[3].end = b_res[3].start + CARDBUS_MEM_SIZE * 2 - 1;
453 b_res[3].flags |= IORESOURCE_MEM;
454 }
455 }
457 void __devinit
458 pci_bus_size_bridges(struct pci_bus *bus)
459 {
460 struct pci_dev *dev;
461 unsigned long mask, prefmask;
463 list_for_each_entry(dev, &bus->devices, bus_list) {
464 struct pci_bus *b = dev->subordinate;
465 if (!b)
466 continue;
468 switch (dev->class >> 8) {
469 case PCI_CLASS_BRIDGE_CARDBUS:
470 pci_bus_size_cardbus(b);
471 break;
473 case PCI_CLASS_BRIDGE_PCI:
474 default:
475 pci_bus_size_bridges(b);
476 break;
477 }
478 }
480 /* The root bus? */
481 if (!bus->self)
482 return;
484 switch (bus->self->class >> 8) {
485 case PCI_CLASS_BRIDGE_CARDBUS:
486 /* don't size cardbuses yet. */
487 break;
489 case PCI_CLASS_BRIDGE_PCI:
490 pci_bridge_check_ranges(bus);
491 default:
492 pbus_size_io(bus);
493 /* If the bridge supports prefetchable range, size it
494 separately. If it doesn't, or its prefetchable window
495 has already been allocated by arch code, try
496 non-prefetchable range for both types of PCI memory
497 resources. */
498 mask = IORESOURCE_MEM;
499 prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
500 if (pbus_size_mem(bus, prefmask, prefmask))
501 mask = prefmask; /* Success, size non-prefetch only. */
502 pbus_size_mem(bus, mask, IORESOURCE_MEM);
503 break;
504 }
505 }
506 EXPORT_SYMBOL(pci_bus_size_bridges);
508 void __devinit
509 pci_bus_assign_resources(struct pci_bus *bus)
510 {
511 struct pci_bus *b;
512 struct pci_dev *dev;
514 pbus_assign_resources_sorted(bus);
516 list_for_each_entry(dev, &bus->devices, bus_list) {
517 b = dev->subordinate;
518 if (!b)
519 continue;
521 pci_bus_assign_resources(b);
523 switch (dev->class >> 8) {
524 case PCI_CLASS_BRIDGE_PCI:
525 pci_setup_bridge(b);
526 break;
528 case PCI_CLASS_BRIDGE_CARDBUS:
529 pci_setup_cardbus(b);
530 break;
532 default:
533 printk(KERN_INFO "PCI: not setting up bridge %s "
534 "for bus %d\n", pci_name(dev), b->number);
535 break;
536 }
537 }
538 }
539 EXPORT_SYMBOL(pci_bus_assign_resources);
541 void __init
542 pci_assign_unassigned_resources(void)
543 {
544 struct pci_bus *bus;
546 /* Depth first, calculate sizes and alignments of all
547 subordinate buses. */
548 list_for_each_entry(bus, &pci_root_buses, node) {
549 pci_bus_size_bridges(bus);
550 }
551 /* Depth last, allocate resources and update the hardware. */
552 list_for_each_entry(bus, &pci_root_buses, node) {
553 pci_bus_assign_resources(bus);
554 pci_enable_bridges(bus);
555 }
556 }