ia64/linux-2.6.18-xen.hg

view drivers/pci/setup-res.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents dfd2adc58740
children
line source
1 /*
2 * drivers/pci/setup-res.c
3 *
4 * Extruded from code written by
5 * Dave Rusling (david.rusling@reo.mts.dec.com)
6 * David Mosberger (davidm@cs.arizona.edu)
7 * David Miller (davem@redhat.com)
8 *
9 * Support routines for initializing a PCI subsystem.
10 */
12 /* fixed for multiple pci buses, 1999 Andrea Arcangeli <andrea@suse.de> */
14 /*
15 * Nov 2000, Ivan Kokshaysky <ink@jurassic.park.msu.ru>
16 * Resource sorting
17 */
19 #include <linux/init.h>
20 #include <linux/kernel.h>
21 #include <linux/pci.h>
22 #include <linux/errno.h>
23 #include <linux/ioport.h>
24 #include <linux/cache.h>
25 #include <linux/slab.h>
26 #include "pci.h"
29 void
30 pci_update_resource(struct pci_dev *dev, int resno)
31 {
32 struct pci_bus_region region;
33 u32 new, check, mask;
34 int reg;
35 enum pci_bar_type type;
36 struct resource *res = dev->resource + resno;
38 /* Ignore resources for unimplemented BARs and unused resource slots
39 for 64 bit BARs. */
40 if (!res->flags)
41 return;
43 pcibios_resource_to_bus(dev, &region, res);
45 pr_debug(" got res [%llx:%llx] bus [%lx:%lx] flags %lx for "
46 "BAR %d of %s\n", (unsigned long long)res->start,
47 (unsigned long long)res->end,
48 region.start, region.end, res->flags, resno, pci_name(dev));
50 new = region.start | (res->flags & PCI_REGION_FLAG_MASK);
51 if (res->flags & IORESOURCE_IO)
52 mask = (u32)PCI_BASE_ADDRESS_IO_MASK;
53 else
54 mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
56 reg = pci_resource_bar(dev, resno, &type);
57 if (!reg)
58 return;
59 if (type != pci_bar_unknown) {
60 if (!(res->flags & IORESOURCE_ROM_ENABLE))
61 return;
62 new |= PCI_ROM_ADDRESS_ENABLE;
63 }
65 pci_write_config_dword(dev, reg, new);
66 pci_read_config_dword(dev, reg, &check);
68 if ((new ^ check) & mask) {
69 printk(KERN_ERR "PCI: Error while updating region "
70 "%s/%d (%08x != %08x)\n", pci_name(dev), resno,
71 new, check);
72 }
74 if ((new & (PCI_BASE_ADDRESS_SPACE|PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
75 (PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64)) {
76 new = region.start >> 16 >> 16;
77 pci_write_config_dword(dev, reg + 4, new);
78 pci_read_config_dword(dev, reg + 4, &check);
79 if (check != new) {
80 printk(KERN_ERR "PCI: Error updating region "
81 "%s/%d (high %08x != %08x)\n",
82 pci_name(dev), resno, new, check);
83 }
84 }
85 res->flags &= ~IORESOURCE_UNSET;
86 pr_debug("PCI: moved device %s resource %d (%lx) to %x\n",
87 pci_name(dev), resno, res->flags,
88 new & ~PCI_REGION_FLAG_MASK);
89 }
91 int __devinit
92 pci_claim_resource(struct pci_dev *dev, int resource)
93 {
94 struct resource *res = &dev->resource[resource];
95 struct resource *root = NULL;
96 char *dtype = resource < PCI_BRIDGE_RESOURCES ? "device" : "bridge";
97 int err;
99 root = pcibios_select_root(dev, res);
101 err = -EINVAL;
102 if (root != NULL)
103 err = insert_resource(root, res);
105 if (err) {
106 printk(KERN_ERR "PCI: %s region %d of %s %s [%llx:%llx]\n",
107 root ? "Address space collision on" :
108 "No parent found for",
109 resource, dtype, pci_name(dev),
110 (unsigned long long)res->start,
111 (unsigned long long)res->end);
112 }
114 return err;
115 }
116 EXPORT_SYMBOL_GPL(pci_claim_resource);
118 #ifdef CONFIG_PCI_REASSIGN
119 void pci_disable_bridge_window(struct pci_dev *dev)
120 {
121 printk(KERN_DEBUG "PCI: Disable bridge window on %s\n", pci_name(dev));
123 /* MMIO Base/Limit */
124 pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0);
126 /* Prefetchable MMIO Base/Limit */
127 pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
128 pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0x0000fff0);
129 pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0xffffffff);
130 }
131 #endif
133 int pci_assign_resource(struct pci_dev *dev, int resno)
134 {
135 struct pci_bus *bus = dev->bus;
136 struct resource *res = dev->resource + resno;
137 resource_size_t size, min, align;
138 int ret;
139 int reassigndev = pci_is_reassigndev(dev);
141 size = res->end - res->start + 1;
142 min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
143 /* The bridge resources are special, as their
144 size != alignment. Sizing routines return
145 required alignment in the "start" field. */
146 if (resno < PCI_BRIDGE_RESOURCES) {
147 align = size;
148 if ((reassigndev) &&
149 (res->flags & IORESOURCE_MEM)) {
150 align = ALIGN(align, PAGE_SIZE);
151 }
152 } else {
153 align = res->start;
154 }
156 /* First, try exact prefetching match.. */
157 ret = pci_bus_alloc_resource(bus, res, size, align, min,
158 IORESOURCE_PREFETCH,
159 pcibios_align_resource, dev);
161 if (ret < 0 && (res->flags & IORESOURCE_PREFETCH)) {
162 /*
163 * That failed.
164 *
165 * But a prefetching area can handle a non-prefetching
166 * window (it will just not perform as well).
167 */
168 ret = pci_bus_alloc_resource(bus, res, size, align, min, 0,
169 pcibios_align_resource, dev);
170 }
172 if (ret) {
173 printk(KERN_ERR "PCI: Failed to allocate %s resource "
174 "#%d:%llx@%llx for %s\n",
175 res->flags & IORESOURCE_IO ? "I/O" : "mem",
176 resno, (unsigned long long)size,
177 (unsigned long long)res->start, pci_name(dev));
178 } else if (resno < PCI_BRIDGE_RESOURCES) {
179 if (reassigndev)
180 printk(KERN_DEBUG "PCI: Assign resource(%d) on %s "
181 "%016llx - %016llx\n", resno, pci_name(dev),
182 (unsigned long long)res->start,
183 (unsigned long long)res->end);
184 pci_update_resource(dev, resno);
185 }
187 return ret;
188 }
190 #ifdef CONFIG_EMBEDDED
191 int pci_assign_resource_fixed(struct pci_dev *dev, int resno)
192 {
193 struct pci_bus *bus = dev->bus;
194 struct resource *res = dev->resource + resno;
195 unsigned int type_mask;
196 int i, ret = -EBUSY;
198 type_mask = IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH;
200 for (i = 0; i < PCI_BUS_NUM_RESOURCES; i++) {
201 struct resource *r = bus->resource[i];
202 if (!r)
203 continue;
205 /* type_mask must match */
206 if ((res->flags ^ r->flags) & type_mask)
207 continue;
209 ret = request_resource(r, res);
211 if (ret == 0)
212 break;
213 }
215 if (ret) {
216 printk(KERN_ERR "PCI: Failed to allocate %s resource "
217 "#%d:%llx@%llx for %s\n",
218 res->flags & IORESOURCE_IO ? "I/O" : "mem",
219 resno, (unsigned long long)(res->end - res->start + 1),
220 (unsigned long long)res->start, pci_name(dev));
221 } else if (resno < PCI_BRIDGE_RESOURCES) {
222 pci_update_resource(dev, resno);
223 }
225 return ret;
226 }
227 EXPORT_SYMBOL_GPL(pci_assign_resource_fixed);
228 #endif
230 /* Sort resources by alignment */
231 void __devinit
232 pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
233 {
234 int i;
235 int reassigndev = pci_is_reassigndev(dev);
237 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
238 struct resource *r;
239 struct resource_list *list, *tmp;
240 resource_size_t r_align;
242 r = &dev->resource[i];
243 r_align = r->end - r->start;
245 if (!(r->flags) || r->parent)
246 continue;
248 if (!r_align) {
249 printk(KERN_WARNING "PCI: Ignore bogus resource %d "
250 "[%llx:%llx] of %s\n",
251 i, (unsigned long long)r->start,
252 (unsigned long long)r->end, pci_name(dev));
253 continue;
254 }
255 r_align = (i < PCI_BRIDGE_RESOURCES) ? r_align + 1 : r->start;
257 if (i < PCI_BRIDGE_RESOURCES && (r->flags & IORESOURCE_MEM) &&
258 reassigndev)
259 r_align = ALIGN(r_align, PAGE_SIZE);
261 for (list = head; ; list = list->next) {
262 resource_size_t align = 0;
263 struct resource_list *ln = list->next;
264 int idx;
266 if (ln) {
267 idx = ln->res - &ln->dev->resource[0];
268 align = (idx < PCI_BRIDGE_RESOURCES) ?
269 ln->res->end - ln->res->start + 1 :
270 ln->res->start;
271 if ((idx < PCI_BRIDGE_RESOURCES) &&
272 (ln->res->flags & IORESOURCE_MEM) &&
273 pci_is_reassigndev(ln->dev))
274 align = ALIGN(align, PAGE_SIZE);
275 }
276 if (r_align > align) {
277 tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
278 if (!tmp)
279 panic("pdev_sort_resources(): "
280 "kmalloc() failed!\n");
281 tmp->next = ln;
282 tmp->res = r;
283 tmp->dev = dev;
284 list->next = tmp;
285 break;
286 }
287 }
288 }
289 }