ia64/linux-2.6.18-xen.hg

view arch/mips/pci/pci.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * This program is free software; you can redistribute it and/or modify it
3 * under the terms of the GNU General Public License as published by the
4 * Free Software Foundation; either version 2 of the License, or (at your
5 * option) any later version.
6 *
7 * Copyright (C) 2003, 04 Ralf Baechle (ralf@linux-mips.org)
8 */
9 #include <linux/kernel.h>
10 #include <linux/mm.h>
11 #include <linux/bootmem.h>
12 #include <linux/init.h>
13 #include <linux/types.h>
14 #include <linux/pci.h>
16 /*
17 * Indicate whether we respect the PCI setup left by the firmware.
18 *
19 * Make this long-lived so that we know when shutting down
20 * whether we probed only or not.
21 */
22 int pci_probe_only;
24 #define PCI_ASSIGN_ALL_BUSSES 1
26 unsigned int pci_probe = PCI_ASSIGN_ALL_BUSSES;
28 /*
29 * The PCI controller list.
30 */
32 struct pci_controller *hose_head, **hose_tail = &hose_head;
33 struct pci_controller *pci_isa_hose;
35 unsigned long PCIBIOS_MIN_IO = 0x0000;
36 unsigned long PCIBIOS_MIN_MEM = 0;
38 /*
39 * We need to avoid collisions with `mirrored' VGA ports
40 * and other strange ISA hardware, so we always want the
41 * addresses to be allocated in the 0x000-0x0ff region
42 * modulo 0x400.
43 *
44 * Why? Because some silly external IO cards only decode
45 * the low 10 bits of the IO address. The 0x00-0xff region
46 * is reserved for motherboard devices that decode all 16
47 * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
48 * but we want to try to avoid allocating at 0x2900-0x2bff
49 * which might have be mirrored at 0x0100-0x03ff..
50 */
51 void
52 pcibios_align_resource(void *data, struct resource *res,
53 resource_size_t size, resource_size_t align)
54 {
55 struct pci_dev *dev = data;
56 struct pci_controller *hose = dev->sysdata;
57 resource_size_t start = res->start;
59 if (res->flags & IORESOURCE_IO) {
60 /* Make sure we start at our min on all hoses */
61 if (start < PCIBIOS_MIN_IO + hose->io_resource->start)
62 start = PCIBIOS_MIN_IO + hose->io_resource->start;
64 /*
65 * Put everything into 0x00-0xff region modulo 0x400
66 */
67 if (start & 0x300)
68 start = (start + 0x3ff) & ~0x3ff;
69 } else if (res->flags & IORESOURCE_MEM) {
70 /* Make sure we start at our min on all hoses */
71 if (start < PCIBIOS_MIN_MEM + hose->mem_resource->start)
72 start = PCIBIOS_MIN_MEM + hose->mem_resource->start;
73 }
75 res->start = start;
76 }
78 void __init register_pci_controller(struct pci_controller *hose)
79 {
80 *hose_tail = hose;
81 hose_tail = &hose->next;
82 }
84 /* Most MIPS systems have straight-forward swizzling needs. */
86 static inline u8 bridge_swizzle(u8 pin, u8 slot)
87 {
88 return (((pin - 1) + slot) % 4) + 1;
89 }
91 static u8 __init common_swizzle(struct pci_dev *dev, u8 *pinp)
92 {
93 u8 pin = *pinp;
95 while (dev->bus->parent) {
96 pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn));
97 /* Move up the chain of bridges. */
98 dev = dev->bus->self;
99 }
100 *pinp = pin;
102 /* The slot is the slot of the last bridge. */
103 return PCI_SLOT(dev->devfn);
104 }
106 static int __init pcibios_init(void)
107 {
108 struct pci_controller *hose;
109 struct pci_bus *bus;
110 int next_busno;
111 int need_domain_info = 0;
113 /* Scan all of the recorded PCI controllers. */
114 for (next_busno = 0, hose = hose_head; hose; hose = hose->next) {
116 if (request_resource(&iomem_resource, hose->mem_resource) < 0)
117 goto out;
118 if (request_resource(&ioport_resource, hose->io_resource) < 0)
119 goto out_free_mem_resource;
121 if (!hose->iommu)
122 PCI_DMA_BUS_IS_PHYS = 1;
124 if (hose->get_busno && pci_probe_only)
125 next_busno = (*hose->get_busno)();
127 bus = pci_scan_bus(next_busno, hose->pci_ops, hose);
128 hose->bus = bus;
129 hose->need_domain_info = need_domain_info;
130 if (bus) {
131 next_busno = bus->subordinate + 1;
132 /* Don't allow 8-bit bus number overflow inside the hose -
133 reserve some space for bridges. */
134 if (next_busno > 224) {
135 next_busno = 0;
136 need_domain_info = 1;
137 }
138 }
139 continue;
141 out_free_mem_resource:
142 release_resource(hose->mem_resource);
144 out:
145 printk(KERN_WARNING
146 "Skipping PCI bus scan due to resource conflict\n");
147 }
149 if (!pci_probe_only)
150 pci_assign_unassigned_resources();
151 pci_fixup_irqs(common_swizzle, pcibios_map_irq);
153 return 0;
154 }
156 subsys_initcall(pcibios_init);
158 static int pcibios_enable_resources(struct pci_dev *dev, int mask)
159 {
160 u16 cmd, old_cmd;
161 int idx;
162 struct resource *r;
164 pci_read_config_word(dev, PCI_COMMAND, &cmd);
165 old_cmd = cmd;
166 for (idx=0; idx < PCI_NUM_RESOURCES; idx++) {
167 /* Only set up the requested stuff */
168 if (!(mask & (1<<idx)))
169 continue;
171 r = &dev->resource[idx];
172 if (!r->start && r->end) {
173 printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", pci_name(dev));
174 return -EINVAL;
175 }
176 if (r->flags & IORESOURCE_IO)
177 cmd |= PCI_COMMAND_IO;
178 if (r->flags & IORESOURCE_MEM)
179 cmd |= PCI_COMMAND_MEMORY;
180 }
181 if (dev->resource[PCI_ROM_RESOURCE].start)
182 cmd |= PCI_COMMAND_MEMORY;
183 if (cmd != old_cmd) {
184 printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd);
185 pci_write_config_word(dev, PCI_COMMAND, cmd);
186 }
187 return 0;
188 }
190 /*
191 * If we set up a device for bus mastering, we need to check the latency
192 * timer as certain crappy BIOSes forget to set it properly.
193 */
194 unsigned int pcibios_max_latency = 255;
196 void pcibios_set_master(struct pci_dev *dev)
197 {
198 u8 lat;
199 pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
200 if (lat < 16)
201 lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
202 else if (lat > pcibios_max_latency)
203 lat = pcibios_max_latency;
204 else
205 return;
206 printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n",
207 pci_name(dev), lat);
208 pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
209 }
211 unsigned int pcibios_assign_all_busses(void)
212 {
213 return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
214 }
216 int pcibios_enable_device(struct pci_dev *dev, int mask)
217 {
218 int err;
220 if ((err = pcibios_enable_resources(dev, mask)) < 0)
221 return err;
223 return pcibios_plat_dev_init(dev);
224 }
226 static void __init pcibios_fixup_device_resources(struct pci_dev *dev,
227 struct pci_bus *bus)
228 {
229 /* Update device resources. */
230 struct pci_controller *hose = (struct pci_controller *)bus->sysdata;
231 unsigned long offset = 0;
232 int i;
234 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
235 if (!dev->resource[i].start)
236 continue;
237 if (dev->resource[i].flags & IORESOURCE_IO)
238 offset = hose->io_offset;
239 else if (dev->resource[i].flags & IORESOURCE_MEM)
240 offset = hose->mem_offset;
242 dev->resource[i].start += offset;
243 dev->resource[i].end += offset;
244 }
245 }
247 void __devinit pcibios_fixup_bus(struct pci_bus *bus)
248 {
249 /* Propagate hose info into the subordinate devices. */
251 struct pci_controller *hose = bus->sysdata;
252 struct list_head *ln;
253 struct pci_dev *dev = bus->self;
255 if (!dev) {
256 bus->resource[0] = hose->io_resource;
257 bus->resource[1] = hose->mem_resource;
258 } else if (pci_probe_only &&
259 (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
260 pci_read_bridge_bases(bus);
261 pcibios_fixup_device_resources(dev, bus);
262 }
264 for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) {
265 struct pci_dev *dev = pci_dev_b(ln);
267 if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
268 pcibios_fixup_device_resources(dev, bus);
269 }
270 }
272 void __init
273 pcibios_update_irq(struct pci_dev *dev, int irq)
274 {
275 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
276 }
278 void __devinit
279 pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
280 struct resource *res)
281 {
282 struct pci_controller *hose = (struct pci_controller *)dev->sysdata;
283 unsigned long offset = 0;
285 if (res->flags & IORESOURCE_IO)
286 offset = hose->io_offset;
287 else if (res->flags & IORESOURCE_MEM)
288 offset = hose->mem_offset;
290 region->start = res->start - offset;
291 region->end = res->end - offset;
292 }
294 void __devinit
295 pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
296 struct pci_bus_region *region)
297 {
298 struct pci_controller *hose = (struct pci_controller *)dev->sysdata;
299 unsigned long offset = 0;
301 if (res->flags & IORESOURCE_IO)
302 offset = hose->io_offset;
303 else if (res->flags & IORESOURCE_MEM)
304 offset = hose->mem_offset;
306 res->start = region->start + offset;
307 res->end = region->end + offset;
308 }
310 #ifdef CONFIG_HOTPLUG
311 EXPORT_SYMBOL(pcibios_resource_to_bus);
312 EXPORT_SYMBOL(pcibios_bus_to_resource);
313 EXPORT_SYMBOL(PCIBIOS_MIN_IO);
314 EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
315 #endif
317 char *pcibios_setup(char *str)
318 {
319 return str;
320 }