ia64/linux-2.6.18-xen.hg

view arch/alpha/kernel/pci.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * linux/arch/alpha/kernel/pci.c
3 *
4 * Extruded from code written by
5 * Dave Rusling (david.rusling@reo.mts.dec.com)
6 * David Mosberger (davidm@cs.arizona.edu)
7 */
9 /* 2.3.x PCI/resources, 1999 Andrea Arcangeli <andrea@suse.de> */
11 /*
12 * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
13 * PCI-PCI bridges cleanup
14 */
15 #include <linux/string.h>
16 #include <linux/pci.h>
17 #include <linux/init.h>
18 #include <linux/ioport.h>
19 #include <linux/kernel.h>
20 #include <linux/bootmem.h>
21 #include <linux/module.h>
22 #include <linux/cache.h>
23 #include <linux/slab.h>
24 #include <asm/machvec.h>
26 #include "proto.h"
27 #include "pci_impl.h"
30 /*
31 * Some string constants used by the various core logics.
32 */
34 const char *const pci_io_names[] = {
35 "PCI IO bus 0", "PCI IO bus 1", "PCI IO bus 2", "PCI IO bus 3",
36 "PCI IO bus 4", "PCI IO bus 5", "PCI IO bus 6", "PCI IO bus 7"
37 };
39 const char *const pci_mem_names[] = {
40 "PCI mem bus 0", "PCI mem bus 1", "PCI mem bus 2", "PCI mem bus 3",
41 "PCI mem bus 4", "PCI mem bus 5", "PCI mem bus 6", "PCI mem bus 7"
42 };
44 const char pci_hae0_name[] = "HAE0";
46 /* Indicate whether we respect the PCI setup left by console. */
47 /*
48 * Make this long-lived so that we know when shutting down
49 * whether we probed only or not.
50 */
51 int pci_probe_only;
53 /*
54 * The PCI controller list.
55 */
57 struct pci_controller *hose_head, **hose_tail = &hose_head;
58 struct pci_controller *pci_isa_hose;
60 /*
61 * Quirks.
62 */
64 static void __init
65 quirk_isa_bridge(struct pci_dev *dev)
66 {
67 dev->class = PCI_CLASS_BRIDGE_ISA << 8;
68 }
69 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82378, quirk_isa_bridge);
71 static void __init
72 quirk_cypress(struct pci_dev *dev)
73 {
74 /* The Notorious Cy82C693 chip. */
76 /* The Cypress IDE controller doesn't support native mode, but it
77 has programmable addresses of IDE command/control registers.
78 This violates PCI specifications, confuses the IDE subsystem and
79 causes resource conflicts between the primary HD_CMD register and
80 the floppy controller. Ugh. Fix that. */
81 if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE) {
82 dev->resource[0].flags = 0;
83 dev->resource[1].flags = 0;
84 }
86 /* The Cypress bridge responds on the PCI bus in the address range
87 0xffff0000-0xffffffff (conventional x86 BIOS ROM). There is no
88 way to turn this off. The bridge also supports several extended
89 BIOS ranges (disabled after power-up), and some consoles do turn
90 them on. So if we use a large direct-map window, or a large SG
91 window, we must avoid the entire 0xfff00000-0xffffffff region. */
92 else if (dev->class >> 8 == PCI_CLASS_BRIDGE_ISA) {
93 if (__direct_map_base + __direct_map_size >= 0xfff00000UL)
94 __direct_map_size = 0xfff00000UL - __direct_map_base;
95 else {
96 struct pci_controller *hose = dev->sysdata;
97 struct pci_iommu_arena *pci = hose->sg_pci;
98 if (pci && pci->dma_base + pci->size >= 0xfff00000UL)
99 pci->size = 0xfff00000UL - pci->dma_base;
100 }
101 }
102 }
103 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CONTAQ, PCI_DEVICE_ID_CONTAQ_82C693, quirk_cypress);
105 /* Called for each device after PCI setup is done. */
106 static void __init
107 pcibios_fixup_final(struct pci_dev *dev)
108 {
109 unsigned int class = dev->class >> 8;
111 if (class == PCI_CLASS_BRIDGE_ISA || class == PCI_CLASS_BRIDGE_EISA) {
112 dev->dma_mask = MAX_ISA_DMA_ADDRESS - 1;
113 isa_bridge = dev;
114 }
115 }
116 DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final);
118 /* Just declaring that the power-of-ten prefixes are actually the
119 power-of-two ones doesn't make it true :) */
120 #define KB 1024
121 #define MB (1024*KB)
122 #define GB (1024*MB)
124 void
125 pcibios_align_resource(void *data, struct resource *res,
126 resource_size_t size, resource_size_t align)
127 {
128 struct pci_dev *dev = data;
129 struct pci_controller *hose = dev->sysdata;
130 unsigned long alignto;
131 resource_size_t start = res->start;
133 if (res->flags & IORESOURCE_IO) {
134 /* Make sure we start at our min on all hoses */
135 if (start - hose->io_space->start < PCIBIOS_MIN_IO)
136 start = PCIBIOS_MIN_IO + hose->io_space->start;
138 /*
139 * Put everything into 0x00-0xff region modulo 0x400
140 */
141 if (start & 0x300)
142 start = (start + 0x3ff) & ~0x3ff;
143 }
144 else if (res->flags & IORESOURCE_MEM) {
145 /* Make sure we start at our min on all hoses */
146 if (start - hose->mem_space->start < PCIBIOS_MIN_MEM)
147 start = PCIBIOS_MIN_MEM + hose->mem_space->start;
149 /*
150 * The following holds at least for the Low Cost
151 * Alpha implementation of the PCI interface:
152 *
153 * In sparse memory address space, the first
154 * octant (16MB) of every 128MB segment is
155 * aliased to the very first 16 MB of the
156 * address space (i.e., it aliases the ISA
157 * memory address space). Thus, we try to
158 * avoid allocating PCI devices in that range.
159 * Can be allocated in 2nd-7th octant only.
160 * Devices that need more than 112MB of
161 * address space must be accessed through
162 * dense memory space only!
163 */
165 /* Align to multiple of size of minimum base. */
166 alignto = max(0x1000UL, align);
167 start = ALIGN(start, alignto);
168 if (hose->sparse_mem_base && size <= 7 * 16*MB) {
169 if (((start / (16*MB)) & 0x7) == 0) {
170 start &= ~(128*MB - 1);
171 start += 16*MB;
172 start = ALIGN(start, alignto);
173 }
174 if (start/(128*MB) != (start + size - 1)/(128*MB)) {
175 start &= ~(128*MB - 1);
176 start += (128 + 16)*MB;
177 start = ALIGN(start, alignto);
178 }
179 }
180 }
182 res->start = start;
183 }
184 #undef KB
185 #undef MB
186 #undef GB
188 static int __init
189 pcibios_init(void)
190 {
191 if (alpha_mv.init_pci)
192 alpha_mv.init_pci();
193 return 0;
194 }
196 subsys_initcall(pcibios_init);
198 char * __init
199 pcibios_setup(char *str)
200 {
201 return str;
202 }
204 #ifdef ALPHA_RESTORE_SRM_SETUP
205 static struct pdev_srm_saved_conf *srm_saved_configs;
207 void __init
208 pdev_save_srm_config(struct pci_dev *dev)
209 {
210 struct pdev_srm_saved_conf *tmp;
211 static int printed = 0;
213 if (!alpha_using_srm || pci_probe_only)
214 return;
216 if (!printed) {
217 printk(KERN_INFO "pci: enabling save/restore of SRM state\n");
218 printed = 1;
219 }
221 tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
222 if (!tmp) {
223 printk(KERN_ERR "%s: kmalloc() failed!\n", __FUNCTION__);
224 return;
225 }
226 tmp->next = srm_saved_configs;
227 tmp->dev = dev;
229 pci_save_state(dev);
231 srm_saved_configs = tmp;
232 }
234 void
235 pci_restore_srm_config(void)
236 {
237 struct pdev_srm_saved_conf *tmp;
239 /* No need to restore if probed only. */
240 if (pci_probe_only)
241 return;
243 /* Restore SRM config. */
244 for (tmp = srm_saved_configs; tmp; tmp = tmp->next) {
245 pci_restore_state(tmp->dev);
246 }
247 }
248 #endif
250 void __init
251 pcibios_fixup_resource(struct resource *res, struct resource *root)
252 {
253 res->start += root->start;
254 res->end += root->start;
255 }
257 void __init
258 pcibios_fixup_device_resources(struct pci_dev *dev, struct pci_bus *bus)
259 {
260 /* Update device resources. */
261 struct pci_controller *hose = (struct pci_controller *)bus->sysdata;
262 int i;
264 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
265 if (!dev->resource[i].start)
266 continue;
267 if (dev->resource[i].flags & IORESOURCE_IO)
268 pcibios_fixup_resource(&dev->resource[i],
269 hose->io_space);
270 else if (dev->resource[i].flags & IORESOURCE_MEM)
271 pcibios_fixup_resource(&dev->resource[i],
272 hose->mem_space);
273 }
274 }
276 void __init
277 pcibios_fixup_bus(struct pci_bus *bus)
278 {
279 /* Propagate hose info into the subordinate devices. */
281 struct pci_controller *hose = bus->sysdata;
282 struct pci_dev *dev = bus->self;
284 if (!dev) {
285 /* Root bus. */
286 u32 pci_mem_end;
287 u32 sg_base = hose->sg_pci ? hose->sg_pci->dma_base : ~0;
288 unsigned long end;
290 bus->resource[0] = hose->io_space;
291 bus->resource[1] = hose->mem_space;
293 /* Adjust hose mem_space limit to prevent PCI allocations
294 in the iommu windows. */
295 pci_mem_end = min((u32)__direct_map_base, sg_base) - 1;
296 end = hose->mem_space->start + pci_mem_end;
297 if (hose->mem_space->end > end)
298 hose->mem_space->end = end;
299 } else if (pci_probe_only &&
300 (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
301 pci_read_bridge_bases(bus);
302 pcibios_fixup_device_resources(dev, bus);
303 }
305 list_for_each_entry(dev, &bus->devices, bus_list) {
306 pdev_save_srm_config(dev);
307 if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
308 pcibios_fixup_device_resources(dev, bus);
309 }
310 }
312 void __init
313 pcibios_update_irq(struct pci_dev *dev, int irq)
314 {
315 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
316 }
318 /* Most Alphas have straight-forward swizzling needs. */
320 u8 __init
321 common_swizzle(struct pci_dev *dev, u8 *pinp)
322 {
323 u8 pin = *pinp;
325 while (dev->bus->parent) {
326 pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn));
327 /* Move up the chain of bridges. */
328 dev = dev->bus->self;
329 }
330 *pinp = pin;
332 /* The slot is the slot of the last bridge. */
333 return PCI_SLOT(dev->devfn);
334 }
336 void __devinit
337 pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
338 struct resource *res)
339 {
340 struct pci_controller *hose = (struct pci_controller *)dev->sysdata;
341 unsigned long offset = 0;
343 if (res->flags & IORESOURCE_IO)
344 offset = hose->io_space->start;
345 else if (res->flags & IORESOURCE_MEM)
346 offset = hose->mem_space->start;
348 region->start = res->start - offset;
349 region->end = res->end - offset;
350 }
352 void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
353 struct pci_bus_region *region)
354 {
355 struct pci_controller *hose = (struct pci_controller *)dev->sysdata;
356 unsigned long offset = 0;
358 if (res->flags & IORESOURCE_IO)
359 offset = hose->io_space->start;
360 else if (res->flags & IORESOURCE_MEM)
361 offset = hose->mem_space->start;
363 res->start = region->start + offset;
364 res->end = region->end + offset;
365 }
367 #ifdef CONFIG_HOTPLUG
368 EXPORT_SYMBOL(pcibios_resource_to_bus);
369 EXPORT_SYMBOL(pcibios_bus_to_resource);
370 #endif
372 int
373 pcibios_enable_device(struct pci_dev *dev, int mask)
374 {
375 u16 cmd, oldcmd;
376 int i;
378 pci_read_config_word(dev, PCI_COMMAND, &cmd);
379 oldcmd = cmd;
381 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
382 struct resource *res = &dev->resource[i];
384 if (res->flags & IORESOURCE_IO)
385 cmd |= PCI_COMMAND_IO;
386 else if (res->flags & IORESOURCE_MEM)
387 cmd |= PCI_COMMAND_MEMORY;
388 }
390 if (cmd != oldcmd) {
391 printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n",
392 pci_name(dev), cmd);
393 /* Enable the appropriate bits in the PCI command register. */
394 pci_write_config_word(dev, PCI_COMMAND, cmd);
395 }
396 return 0;
397 }
399 /*
400 * If we set up a device for bus mastering, we need to check the latency
401 * timer as certain firmware forgets to set it properly, as seen
402 * on SX164 and LX164 with SRM.
403 */
404 void
405 pcibios_set_master(struct pci_dev *dev)
406 {
407 u8 lat;
408 pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
409 if (lat >= 16) return;
410 printk("PCI: Setting latency timer of device %s to 64\n",
411 pci_name(dev));
412 pci_write_config_byte(dev, PCI_LATENCY_TIMER, 64);
413 }
415 static void __init
416 pcibios_claim_one_bus(struct pci_bus *b)
417 {
418 struct pci_dev *dev;
419 struct pci_bus *child_bus;
421 list_for_each_entry(dev, &b->devices, bus_list) {
422 int i;
424 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
425 struct resource *r = &dev->resource[i];
427 if (r->parent || !r->start || !r->flags)
428 continue;
429 pci_claim_resource(dev, i);
430 }
431 }
433 list_for_each_entry(child_bus, &b->children, node)
434 pcibios_claim_one_bus(child_bus);
435 }
437 static void __init
438 pcibios_claim_console_setup(void)
439 {
440 struct pci_bus *b;
442 list_for_each_entry(b, &pci_root_buses, node)
443 pcibios_claim_one_bus(b);
444 }
446 void __init
447 common_init_pci(void)
448 {
449 struct pci_controller *hose;
450 struct pci_bus *bus;
451 int next_busno;
452 int need_domain_info = 0;
454 /* Scan all of the recorded PCI controllers. */
455 for (next_busno = 0, hose = hose_head; hose; hose = hose->next) {
456 bus = pci_scan_bus(next_busno, alpha_mv.pci_ops, hose);
457 hose->bus = bus;
458 hose->need_domain_info = need_domain_info;
459 next_busno = bus->subordinate + 1;
460 /* Don't allow 8-bit bus number overflow inside the hose -
461 reserve some space for bridges. */
462 if (next_busno > 224) {
463 next_busno = 0;
464 need_domain_info = 1;
465 }
466 }
468 if (pci_probe_only)
469 pcibios_claim_console_setup();
471 pci_assign_unassigned_resources();
472 pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq);
473 }
476 struct pci_controller * __init
477 alloc_pci_controller(void)
478 {
479 struct pci_controller *hose;
481 hose = alloc_bootmem(sizeof(*hose));
483 *hose_tail = hose;
484 hose_tail = &hose->next;
486 return hose;
487 }
489 struct resource * __init
490 alloc_resource(void)
491 {
492 struct resource *res;
494 res = alloc_bootmem(sizeof(*res));
496 return res;
497 }
500 /* Provide information on locations of various I/O regions in physical
501 memory. Do this on a per-card basis so that we choose the right hose. */
503 asmlinkage long
504 sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn)
505 {
506 struct pci_controller *hose;
507 struct pci_dev *dev;
509 /* from hose or from bus.devfn */
510 if (which & IOBASE_FROM_HOSE) {
511 for(hose = hose_head; hose; hose = hose->next)
512 if (hose->index == bus) break;
513 if (!hose) return -ENODEV;
514 } else {
515 /* Special hook for ISA access. */
516 if (bus == 0 && dfn == 0) {
517 hose = pci_isa_hose;
518 } else {
519 dev = pci_find_slot(bus, dfn);
520 if (!dev)
521 return -ENODEV;
522 hose = dev->sysdata;
523 }
524 }
526 switch (which & ~IOBASE_FROM_HOSE) {
527 case IOBASE_HOSE:
528 return hose->index;
529 case IOBASE_SPARSE_MEM:
530 return hose->sparse_mem_base;
531 case IOBASE_DENSE_MEM:
532 return hose->dense_mem_base;
533 case IOBASE_SPARSE_IO:
534 return hose->sparse_io_base;
535 case IOBASE_DENSE_IO:
536 return hose->dense_io_base;
537 case IOBASE_ROOT_BUS:
538 return hose->bus->number;
539 }
541 return -EOPNOTSUPP;
542 }
544 /* Create an __iomem token from a PCI BAR. Copied from lib/iomap.c with
545 no changes, since we don't want the other things in that object file. */
547 void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
548 {
549 unsigned long start = pci_resource_start(dev, bar);
550 unsigned long len = pci_resource_len(dev, bar);
551 unsigned long flags = pci_resource_flags(dev, bar);
553 if (!len || !start)
554 return NULL;
555 if (maxlen && len > maxlen)
556 len = maxlen;
557 if (flags & IORESOURCE_IO)
558 return ioport_map(start, len);
559 if (flags & IORESOURCE_MEM) {
560 /* Not checking IORESOURCE_CACHEABLE because alpha does
561 not distinguish between ioremap and ioremap_nocache. */
562 return ioremap(start, len);
563 }
564 return NULL;
565 }
567 /* Destroy that token. Not copied from lib/iomap.c. */
569 void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
570 {
571 if (__is_mmio(addr))
572 iounmap(addr);
573 }
575 EXPORT_SYMBOL(pci_iomap);
576 EXPORT_SYMBOL(pci_iounmap);