ia64/linux-2.6.18-xen.hg

view drivers/pci/setup-bus.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents c7c92f868aa1
children
line source
1 /*
2 * drivers/pci/setup-bus.c
3 *
4 * Extruded from code written by
5 * Dave Rusling (david.rusling@reo.mts.dec.com)
6 * David Mosberger (davidm@cs.arizona.edu)
7 * David Miller (davem@redhat.com)
8 *
9 * Support routines for initializing a PCI subsystem.
10 */
12 /*
13 * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
14 * PCI-PCI bridges cleanup, sorted resource allocation.
15 * Feb 2002, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
16 * Converted to allocation in 3 passes, which gives
17 * tighter packing. Prefetchable range support.
18 */
20 #include <linux/init.h>
21 #include <linux/kernel.h>
22 #include <linux/module.h>
23 #include <linux/pci.h>
24 #include <linux/errno.h>
25 #include <linux/ioport.h>
26 #include <linux/cache.h>
27 #include <linux/slab.h>
29 #include "pci.h"
31 #define DEBUG_CONFIG 1
32 #if DEBUG_CONFIG
33 #define DBG(x...) printk(x)
34 #else
35 #define DBG(x...)
36 #endif
38 #define ROUND_UP(x, a) (((x) + (a) - 1) & ~((a) - 1))
40 /*
41 * FIXME: IO should be max 256 bytes. However, since we may
42 * have a P2P bridge below a cardbus bridge, we need 4K.
43 */
44 #define CARDBUS_IO_SIZE (256)
45 #define CARDBUS_MEM_SIZE (32*1024*1024)
47 static void __devinit
48 pbus_assign_resources_sorted(struct pci_bus *bus)
49 {
50 struct pci_dev *dev;
51 struct resource *res;
52 struct resource_list head, *list, *tmp;
53 int idx;
55 head.next = NULL;
56 list_for_each_entry(dev, &bus->devices, bus_list) {
57 u16 class = dev->class >> 8;
59 /* Don't touch classless devices or host bridges or ioapics. */
60 if (class == PCI_CLASS_NOT_DEFINED ||
61 class == PCI_CLASS_BRIDGE_HOST ||
62 class == PCI_CLASS_SYSTEM_PIC)
63 continue;
65 pdev_sort_resources(dev, &head);
66 }
68 for (list = head.next; list;) {
69 res = list->res;
70 idx = res - &list->dev->resource[0];
71 if (pci_assign_resource(list->dev, idx)) {
72 res->start = 0;
73 res->end = 0;
74 res->flags = 0;
75 }
76 tmp = list;
77 list = list->next;
78 kfree(tmp);
79 }
80 }
82 void pci_setup_cardbus(struct pci_bus *bus)
83 {
84 struct pci_dev *bridge = bus->self;
85 struct pci_bus_region region;
87 printk("PCI: Bus %d, cardbus bridge: %s\n",
88 bus->number, pci_name(bridge));
90 pcibios_resource_to_bus(bridge, &region, bus->resource[0]);
91 if (bus->resource[0]->flags & IORESOURCE_IO) {
92 /*
93 * The IO resource is allocated a range twice as large as it
94 * would normally need. This allows us to set both IO regs.
95 */
96 printk(" IO window: %08lx-%08lx\n",
97 region.start, region.end);
98 pci_write_config_dword(bridge, PCI_CB_IO_BASE_0,
99 region.start);
100 pci_write_config_dword(bridge, PCI_CB_IO_LIMIT_0,
101 region.end);
102 }
104 pcibios_resource_to_bus(bridge, &region, bus->resource[1]);
105 if (bus->resource[1]->flags & IORESOURCE_IO) {
106 printk(" IO window: %08lx-%08lx\n",
107 region.start, region.end);
108 pci_write_config_dword(bridge, PCI_CB_IO_BASE_1,
109 region.start);
110 pci_write_config_dword(bridge, PCI_CB_IO_LIMIT_1,
111 region.end);
112 }
114 pcibios_resource_to_bus(bridge, &region, bus->resource[2]);
115 if (bus->resource[2]->flags & IORESOURCE_MEM) {
116 printk(" PREFETCH window: %08lx-%08lx\n",
117 region.start, region.end);
118 pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_0,
119 region.start);
120 pci_write_config_dword(bridge, PCI_CB_MEMORY_LIMIT_0,
121 region.end);
122 }
124 pcibios_resource_to_bus(bridge, &region, bus->resource[3]);
125 if (bus->resource[3]->flags & IORESOURCE_MEM) {
126 printk(" MEM window: %08lx-%08lx\n",
127 region.start, region.end);
128 pci_write_config_dword(bridge, PCI_CB_MEMORY_BASE_1,
129 region.start);
130 pci_write_config_dword(bridge, PCI_CB_MEMORY_LIMIT_1,
131 region.end);
132 }
133 }
134 EXPORT_SYMBOL(pci_setup_cardbus);
136 /* Initialize bridges with base/limit values we have collected.
137 PCI-to-PCI Bridge Architecture Specification rev. 1.1 (1998)
138 requires that if there is no I/O ports or memory behind the
139 bridge, corresponding range must be turned off by writing base
140 value greater than limit to the bridge's base/limit registers.
142 Note: care must be taken when updating I/O base/limit registers
143 of bridges which support 32-bit I/O. This update requires two
144 config space writes, so it's quite possible that an I/O window of
145 the bridge will have some undesirable address (e.g. 0) after the
146 first write. Ditto 64-bit prefetchable MMIO. */
147 static void __devinit
148 pci_setup_bridge(struct pci_bus *bus)
149 {
150 struct pci_dev *bridge = bus->self;
151 struct pci_bus_region region;
152 u32 l, io_upper16;
154 DBG(KERN_INFO "PCI: Bridge: %s\n", pci_name(bridge));
156 /* Set up the top and bottom of the PCI I/O segment for this bus. */
157 pcibios_resource_to_bus(bridge, &region, bus->resource[0]);
158 if (bus->resource[0]->flags & IORESOURCE_IO) {
159 pci_read_config_dword(bridge, PCI_IO_BASE, &l);
160 l &= 0xffff0000;
161 l |= (region.start >> 8) & 0x00f0;
162 l |= region.end & 0xf000;
163 /* Set up upper 16 bits of I/O base/limit. */
164 io_upper16 = (region.end & 0xffff0000) | (region.start >> 16);
165 DBG(KERN_INFO " IO window: %04lx-%04lx\n",
166 region.start, region.end);
167 }
168 else {
169 /* Clear upper 16 bits of I/O base/limit. */
170 io_upper16 = 0;
171 l = 0x00f0;
172 DBG(KERN_INFO " IO window: disabled.\n");
173 }
174 /* Temporarily disable the I/O range before updating PCI_IO_BASE. */
175 pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0x0000ffff);
176 /* Update lower 16 bits of I/O base/limit. */
177 pci_write_config_dword(bridge, PCI_IO_BASE, l);
178 /* Update upper 16 bits of I/O base/limit. */
179 pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, io_upper16);
181 /* Set up the top and bottom of the PCI Memory segment
182 for this bus. */
183 pcibios_resource_to_bus(bridge, &region, bus->resource[1]);
184 if (bus->resource[1]->flags & IORESOURCE_MEM) {
185 l = (region.start >> 16) & 0xfff0;
186 l |= region.end & 0xfff00000;
187 DBG(KERN_INFO " MEM window: %08lx-%08lx\n",
188 region.start, region.end);
189 }
190 else {
191 l = 0x0000fff0;
192 DBG(KERN_INFO " MEM window: disabled.\n");
193 }
194 pci_write_config_dword(bridge, PCI_MEMORY_BASE, l);
196 /* Clear out the upper 32 bits of PREF limit.
197 If PCI_PREF_BASE_UPPER32 was non-zero, this temporarily
198 disables PREF range, which is ok. */
199 pci_write_config_dword(bridge, PCI_PREF_LIMIT_UPPER32, 0);
201 /* Set up PREF base/limit. */
202 pcibios_resource_to_bus(bridge, &region, bus->resource[2]);
203 if (bus->resource[2]->flags & IORESOURCE_PREFETCH) {
204 l = (region.start >> 16) & 0xfff0;
205 l |= region.end & 0xfff00000;
206 DBG(KERN_INFO " PREFETCH window: %08lx-%08lx\n",
207 region.start, region.end);
208 }
209 else {
210 l = 0x0000fff0;
211 DBG(KERN_INFO " PREFETCH window: disabled.\n");
212 }
213 pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, l);
215 /* Clear out the upper 32 bits of PREF base. */
216 pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, 0);
218 pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl);
219 }
221 /* Check whether the bridge supports optional I/O and
222 prefetchable memory ranges. If not, the respective
223 base/limit registers must be read-only and read as 0. */
224 static void __devinit
225 pci_bridge_check_ranges(struct pci_bus *bus)
226 {
227 u16 io;
228 u32 pmem;
229 struct pci_dev *bridge = bus->self;
230 struct resource *b_res;
232 b_res = &bridge->resource[PCI_BRIDGE_RESOURCES];
233 b_res[1].flags |= IORESOURCE_MEM;
235 pci_read_config_word(bridge, PCI_IO_BASE, &io);
236 if (!io) {
237 pci_write_config_word(bridge, PCI_IO_BASE, 0xf0f0);
238 pci_read_config_word(bridge, PCI_IO_BASE, &io);
239 pci_write_config_word(bridge, PCI_IO_BASE, 0x0);
240 }
241 if (io)
242 b_res[0].flags |= IORESOURCE_IO;
243 /* DECchip 21050 pass 2 errata: the bridge may miss an address
244 disconnect boundary by one PCI data phase.
245 Workaround: do not use prefetching on this device. */
246 if (bridge->vendor == PCI_VENDOR_ID_DEC && bridge->device == 0x0001)
247 return;
248 pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
249 if (!pmem) {
250 pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE,
251 0xfff0fff0);
252 pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
253 pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0);
254 }
255 if (pmem)
256 b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH;
257 }
259 /* Helper function for sizing routines: find first available
260 bus resource of a given type. Note: we intentionally skip
261 the bus resources which have already been assigned (that is,
262 have non-NULL parent resource). */
263 static struct resource * __devinit
264 find_free_bus_resource(struct pci_bus *bus, unsigned long type)
265 {
266 int i;
267 struct resource *r;
268 unsigned long type_mask = IORESOURCE_IO | IORESOURCE_MEM |
269 IORESOURCE_PREFETCH;
271 for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
272 r = bus->resource[i];
273 if (r == &ioport_resource || r == &iomem_resource)
274 continue;
275 if (r && (r->flags & type_mask) == type && !r->parent)
276 return r;
277 }
278 return NULL;
279 }
281 /* Sizing the IO windows of the PCI-PCI bridge is trivial,
282 since these windows have 4K granularity and the IO ranges
283 of non-bridge PCI devices are limited to 256 bytes.
284 We must be careful with the ISA aliasing though. */
285 static void __devinit
286 pbus_size_io(struct pci_bus *bus)
287 {
288 struct pci_dev *dev;
289 struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
290 unsigned long size = 0, size1 = 0;
292 if (!b_res)
293 return;
295 list_for_each_entry(dev, &bus->devices, bus_list) {
296 int i;
298 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
299 struct resource *r = &dev->resource[i];
300 unsigned long r_size;
302 if (r->parent || !(r->flags & IORESOURCE_IO))
303 continue;
304 r_size = r->end - r->start + 1;
306 if (r_size < 0x400)
307 /* Might be re-aligned for ISA */
308 size += r_size;
309 else
310 size1 += r_size;
311 }
312 }
313 /* To be fixed in 2.5: we should have sort of HAVE_ISA
314 flag in the struct pci_bus. */
315 #if defined(CONFIG_ISA) || defined(CONFIG_EISA)
316 size = (size & 0xff) + ((size & ~0xffUL) << 2);
317 #endif
318 size = ROUND_UP(max(size + size1, pci_reserve_size_io(bus)), 4096);
319 if (!size) {
320 b_res->flags = 0;
321 return;
322 }
323 /* Alignment of the IO window is always 4K */
324 b_res->start = 4096;
325 b_res->end = b_res->start + size - 1;
326 }
328 /* Calculate the size of the bus and minimal alignment which
329 guarantees that all child resources fit in this size. */
330 static int __devinit
331 pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long type)
332 {
333 struct pci_dev *dev;
334 unsigned long min_align, align, size;
335 unsigned long aligns[12]; /* Alignments from 1Mb to 2Gb */
336 int order, max_order;
337 struct resource *b_res = find_free_bus_resource(bus, type);
339 if (!b_res)
340 return 0;
342 memset(aligns, 0, sizeof(aligns));
343 max_order = 0;
344 size = 0;
346 list_for_each_entry(dev, &bus->devices, bus_list) {
347 int i;
348 int reassign = pci_is_reassigndev(dev);
350 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
351 struct resource *r = &dev->resource[i];
352 unsigned long r_size;
354 if (r->parent || (r->flags & mask) != type)
355 continue;
356 r_size = r->end - r->start + 1;
358 if ((i < PCI_BRIDGE_RESOURCES) && reassign)
359 r_size = ALIGN(r_size, PAGE_SIZE);
361 /* For bridges size != alignment */
362 align = (i < PCI_BRIDGE_RESOURCES) ? r_size : r->start;
363 order = __ffs(align) - 20;
364 if (order > 11) {
365 printk(KERN_WARNING "PCI: region %s/%d "
366 "too large: %llx-%llx\n",
367 pci_name(dev), i,
368 (unsigned long long)r->start,
369 (unsigned long long)r->end);
370 r->flags = 0;
371 continue;
372 }
373 size += r_size;
374 if (order < 0)
375 order = 0;
376 /* Exclude ranges with size > align from
377 calculation of the alignment. */
378 if (r_size == align)
379 aligns[order] += align;
380 if (order > max_order)
381 max_order = order;
382 }
383 }
385 align = 0;
386 min_align = 0;
387 for (order = 0; order <= max_order; order++) {
388 unsigned long align1 = 1UL << (order + 20);
390 if (!align)
391 min_align = align1;
392 else if (ROUND_UP(align + min_align, min_align) < align1)
393 min_align = align1 >> 1;
394 align += aligns[order];
395 }
396 size = ROUND_UP(max(size, pci_reserve_size_mem(bus)), min_align);
397 if (!size) {
398 b_res->flags = 0;
399 return 1;
400 }
401 b_res->start = min_align;
402 b_res->end = size + min_align - 1;
403 return 1;
404 }
406 static void __devinit
407 pci_bus_size_cardbus(struct pci_bus *bus)
408 {
409 struct pci_dev *bridge = bus->self;
410 struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES];
411 u16 ctrl;
413 /*
414 * Reserve some resources for CardBus. We reserve
415 * a fixed amount of bus space for CardBus bridges.
416 */
417 b_res[0].start = CARDBUS_IO_SIZE;
418 b_res[0].end = b_res[0].start + CARDBUS_IO_SIZE - 1;
419 b_res[0].flags |= IORESOURCE_IO;
421 b_res[1].start = CARDBUS_IO_SIZE;
422 b_res[1].end = b_res[1].start + CARDBUS_IO_SIZE - 1;
423 b_res[1].flags |= IORESOURCE_IO;
425 /*
426 * Check whether prefetchable memory is supported
427 * by this bridge.
428 */
429 pci_read_config_word(bridge, PCI_CB_BRIDGE_CONTROL, &ctrl);
430 if (!(ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0)) {
431 ctrl |= PCI_CB_BRIDGE_CTL_PREFETCH_MEM0;
432 pci_write_config_word(bridge, PCI_CB_BRIDGE_CONTROL, ctrl);
433 pci_read_config_word(bridge, PCI_CB_BRIDGE_CONTROL, &ctrl);
434 }
436 /*
437 * If we have prefetchable memory support, allocate
438 * two regions. Otherwise, allocate one region of
439 * twice the size.
440 */
441 if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0) {
442 b_res[2].start = CARDBUS_MEM_SIZE;
443 b_res[2].end = b_res[2].start + CARDBUS_MEM_SIZE - 1;
444 b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH;
446 b_res[3].start = CARDBUS_MEM_SIZE;
447 b_res[3].end = b_res[3].start + CARDBUS_MEM_SIZE - 1;
448 b_res[3].flags |= IORESOURCE_MEM;
449 } else {
450 b_res[3].start = CARDBUS_MEM_SIZE * 2;
451 b_res[3].end = b_res[3].start + CARDBUS_MEM_SIZE * 2 - 1;
452 b_res[3].flags |= IORESOURCE_MEM;
453 }
454 }
456 void __devinit
457 pci_bus_size_bridges(struct pci_bus *bus)
458 {
459 struct pci_dev *dev;
460 unsigned long mask, prefmask;
462 list_for_each_entry(dev, &bus->devices, bus_list) {
463 struct pci_bus *b = dev->subordinate;
464 if (!b)
465 continue;
467 switch (dev->class >> 8) {
468 case PCI_CLASS_BRIDGE_CARDBUS:
469 pci_bus_size_cardbus(b);
470 break;
472 case PCI_CLASS_BRIDGE_PCI:
473 default:
474 pci_bus_size_bridges(b);
475 break;
476 }
477 }
479 /* The root bus? */
480 if (!bus->self)
481 return;
483 switch (bus->self->class >> 8) {
484 case PCI_CLASS_BRIDGE_CARDBUS:
485 /* don't size cardbuses yet. */
486 break;
488 case PCI_CLASS_BRIDGE_PCI:
489 pci_bridge_check_ranges(bus);
490 default:
491 pbus_size_io(bus);
492 /* If the bridge supports prefetchable range, size it
493 separately. If it doesn't, or its prefetchable window
494 has already been allocated by arch code, try
495 non-prefetchable range for both types of PCI memory
496 resources. */
497 mask = IORESOURCE_MEM;
498 prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
499 if (pbus_size_mem(bus, prefmask, prefmask))
500 mask = prefmask; /* Success, size non-prefetch only. */
501 pbus_size_mem(bus, mask, IORESOURCE_MEM);
502 break;
503 }
504 }
505 EXPORT_SYMBOL(pci_bus_size_bridges);
507 void __devinit
508 pci_bus_assign_resources(struct pci_bus *bus)
509 {
510 struct pci_bus *b;
511 struct pci_dev *dev;
513 pbus_assign_resources_sorted(bus);
515 list_for_each_entry(dev, &bus->devices, bus_list) {
516 b = dev->subordinate;
517 if (!b)
518 continue;
520 pci_bus_assign_resources(b);
522 switch (dev->class >> 8) {
523 case PCI_CLASS_BRIDGE_PCI:
524 pci_setup_bridge(b);
525 break;
527 case PCI_CLASS_BRIDGE_CARDBUS:
528 pci_setup_cardbus(b);
529 break;
531 default:
532 printk(KERN_INFO "PCI: not setting up bridge %s "
533 "for bus %d\n", pci_name(dev), b->number);
534 break;
535 }
536 }
537 }
538 EXPORT_SYMBOL(pci_bus_assign_resources);
540 void __init
541 pci_assign_unassigned_resources(void)
542 {
543 struct pci_bus *bus;
545 /* Depth first, calculate sizes and alignments of all
546 subordinate buses. */
547 list_for_each_entry(bus, &pci_root_buses, node) {
548 pci_bus_size_bridges(bus);
549 }
550 /* Depth last, allocate resources and update the hardware. */
551 list_for_each_entry(bus, &pci_root_buses, node) {
552 pci_bus_assign_resources(bus);
553 pci_enable_bridges(bus);
554 }
555 }