ia64/linux-2.6.18-xen.hg

view arch/sparc64/kernel/ebus.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /* $Id: ebus.c,v 1.64 2001/11/08 04:41:33 davem Exp $
2 * ebus.c: PCI to EBus bridge device.
3 *
4 * Copyright (C) 1997 Eddie C. Dost (ecd@skynet.be)
5 * Copyright (C) 1999 David S. Miller (davem@redhat.com)
6 */
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/types.h>
11 #include <linux/init.h>
12 #include <linux/slab.h>
13 #include <linux/string.h>
14 #include <linux/interrupt.h>
15 #include <linux/delay.h>
17 #include <asm/system.h>
18 #include <asm/page.h>
19 #include <asm/pbm.h>
20 #include <asm/ebus.h>
21 #include <asm/oplib.h>
22 #include <asm/prom.h>
23 #include <asm/of_device.h>
24 #include <asm/bpp.h>
25 #include <asm/irq.h>
27 /* EBUS dma library. */
29 #define EBDMA_CSR 0x00UL /* Control/Status */
30 #define EBDMA_ADDR 0x04UL /* DMA Address */
31 #define EBDMA_COUNT 0x08UL /* DMA Count */
33 #define EBDMA_CSR_INT_PEND 0x00000001
34 #define EBDMA_CSR_ERR_PEND 0x00000002
35 #define EBDMA_CSR_DRAIN 0x00000004
36 #define EBDMA_CSR_INT_EN 0x00000010
37 #define EBDMA_CSR_RESET 0x00000080
38 #define EBDMA_CSR_WRITE 0x00000100
39 #define EBDMA_CSR_EN_DMA 0x00000200
40 #define EBDMA_CSR_CYC_PEND 0x00000400
41 #define EBDMA_CSR_DIAG_RD_DONE 0x00000800
42 #define EBDMA_CSR_DIAG_WR_DONE 0x00001000
43 #define EBDMA_CSR_EN_CNT 0x00002000
44 #define EBDMA_CSR_TC 0x00004000
45 #define EBDMA_CSR_DIS_CSR_DRN 0x00010000
46 #define EBDMA_CSR_BURST_SZ_MASK 0x000c0000
47 #define EBDMA_CSR_BURST_SZ_1 0x00080000
48 #define EBDMA_CSR_BURST_SZ_4 0x00000000
49 #define EBDMA_CSR_BURST_SZ_8 0x00040000
50 #define EBDMA_CSR_BURST_SZ_16 0x000c0000
51 #define EBDMA_CSR_DIAG_EN 0x00100000
52 #define EBDMA_CSR_DIS_ERR_PEND 0x00400000
53 #define EBDMA_CSR_TCI_DIS 0x00800000
54 #define EBDMA_CSR_EN_NEXT 0x01000000
55 #define EBDMA_CSR_DMA_ON 0x02000000
56 #define EBDMA_CSR_A_LOADED 0x04000000
57 #define EBDMA_CSR_NA_LOADED 0x08000000
58 #define EBDMA_CSR_DEV_ID_MASK 0xf0000000
60 #define EBUS_DMA_RESET_TIMEOUT 10000
62 static void __ebus_dma_reset(struct ebus_dma_info *p, int no_drain)
63 {
64 int i;
65 u32 val = 0;
67 writel(EBDMA_CSR_RESET, p->regs + EBDMA_CSR);
68 udelay(1);
70 if (no_drain)
71 return;
73 for (i = EBUS_DMA_RESET_TIMEOUT; i > 0; i--) {
74 val = readl(p->regs + EBDMA_CSR);
76 if (!(val & (EBDMA_CSR_DRAIN | EBDMA_CSR_CYC_PEND)))
77 break;
78 udelay(10);
79 }
80 }
82 static irqreturn_t ebus_dma_irq(int irq, void *dev_id, struct pt_regs *regs)
83 {
84 struct ebus_dma_info *p = dev_id;
85 unsigned long flags;
86 u32 csr = 0;
88 spin_lock_irqsave(&p->lock, flags);
89 csr = readl(p->regs + EBDMA_CSR);
90 writel(csr, p->regs + EBDMA_CSR);
91 spin_unlock_irqrestore(&p->lock, flags);
93 if (csr & EBDMA_CSR_ERR_PEND) {
94 printk(KERN_CRIT "ebus_dma(%s): DMA error!\n", p->name);
95 p->callback(p, EBUS_DMA_EVENT_ERROR, p->client_cookie);
96 return IRQ_HANDLED;
97 } else if (csr & EBDMA_CSR_INT_PEND) {
98 p->callback(p,
99 (csr & EBDMA_CSR_TC) ?
100 EBUS_DMA_EVENT_DMA : EBUS_DMA_EVENT_DEVICE,
101 p->client_cookie);
102 return IRQ_HANDLED;
103 }
105 return IRQ_NONE;
107 }
109 int ebus_dma_register(struct ebus_dma_info *p)
110 {
111 u32 csr;
113 if (!p->regs)
114 return -EINVAL;
115 if (p->flags & ~(EBUS_DMA_FLAG_USE_EBDMA_HANDLER |
116 EBUS_DMA_FLAG_TCI_DISABLE))
117 return -EINVAL;
118 if ((p->flags & EBUS_DMA_FLAG_USE_EBDMA_HANDLER) && !p->callback)
119 return -EINVAL;
120 if (!strlen(p->name))
121 return -EINVAL;
123 __ebus_dma_reset(p, 1);
125 csr = EBDMA_CSR_BURST_SZ_16 | EBDMA_CSR_EN_CNT;
127 if (p->flags & EBUS_DMA_FLAG_TCI_DISABLE)
128 csr |= EBDMA_CSR_TCI_DIS;
130 writel(csr, p->regs + EBDMA_CSR);
132 return 0;
133 }
134 EXPORT_SYMBOL(ebus_dma_register);
136 int ebus_dma_irq_enable(struct ebus_dma_info *p, int on)
137 {
138 unsigned long flags;
139 u32 csr;
141 if (on) {
142 if (p->flags & EBUS_DMA_FLAG_USE_EBDMA_HANDLER) {
143 if (request_irq(p->irq, ebus_dma_irq, IRQF_SHARED, p->name, p))
144 return -EBUSY;
145 }
147 spin_lock_irqsave(&p->lock, flags);
148 csr = readl(p->regs + EBDMA_CSR);
149 csr |= EBDMA_CSR_INT_EN;
150 writel(csr, p->regs + EBDMA_CSR);
151 spin_unlock_irqrestore(&p->lock, flags);
152 } else {
153 spin_lock_irqsave(&p->lock, flags);
154 csr = readl(p->regs + EBDMA_CSR);
155 csr &= ~EBDMA_CSR_INT_EN;
156 writel(csr, p->regs + EBDMA_CSR);
157 spin_unlock_irqrestore(&p->lock, flags);
159 if (p->flags & EBUS_DMA_FLAG_USE_EBDMA_HANDLER) {
160 free_irq(p->irq, p);
161 }
162 }
164 return 0;
165 }
166 EXPORT_SYMBOL(ebus_dma_irq_enable);
168 void ebus_dma_unregister(struct ebus_dma_info *p)
169 {
170 unsigned long flags;
171 u32 csr;
172 int irq_on = 0;
174 spin_lock_irqsave(&p->lock, flags);
175 csr = readl(p->regs + EBDMA_CSR);
176 if (csr & EBDMA_CSR_INT_EN) {
177 csr &= ~EBDMA_CSR_INT_EN;
178 writel(csr, p->regs + EBDMA_CSR);
179 irq_on = 1;
180 }
181 spin_unlock_irqrestore(&p->lock, flags);
183 if (irq_on)
184 free_irq(p->irq, p);
185 }
186 EXPORT_SYMBOL(ebus_dma_unregister);
188 int ebus_dma_request(struct ebus_dma_info *p, dma_addr_t bus_addr, size_t len)
189 {
190 unsigned long flags;
191 u32 csr;
192 int err;
194 if (len >= (1 << 24))
195 return -EINVAL;
197 spin_lock_irqsave(&p->lock, flags);
198 csr = readl(p->regs + EBDMA_CSR);
199 err = -EINVAL;
200 if (!(csr & EBDMA_CSR_EN_DMA))
201 goto out;
202 err = -EBUSY;
203 if (csr & EBDMA_CSR_NA_LOADED)
204 goto out;
206 writel(len, p->regs + EBDMA_COUNT);
207 writel(bus_addr, p->regs + EBDMA_ADDR);
208 err = 0;
210 out:
211 spin_unlock_irqrestore(&p->lock, flags);
213 return err;
214 }
215 EXPORT_SYMBOL(ebus_dma_request);
217 void ebus_dma_prepare(struct ebus_dma_info *p, int write)
218 {
219 unsigned long flags;
220 u32 csr;
222 spin_lock_irqsave(&p->lock, flags);
223 __ebus_dma_reset(p, 0);
225 csr = (EBDMA_CSR_INT_EN |
226 EBDMA_CSR_EN_CNT |
227 EBDMA_CSR_BURST_SZ_16 |
228 EBDMA_CSR_EN_NEXT);
230 if (write)
231 csr |= EBDMA_CSR_WRITE;
232 if (p->flags & EBUS_DMA_FLAG_TCI_DISABLE)
233 csr |= EBDMA_CSR_TCI_DIS;
235 writel(csr, p->regs + EBDMA_CSR);
237 spin_unlock_irqrestore(&p->lock, flags);
238 }
239 EXPORT_SYMBOL(ebus_dma_prepare);
241 unsigned int ebus_dma_residue(struct ebus_dma_info *p)
242 {
243 return readl(p->regs + EBDMA_COUNT);
244 }
245 EXPORT_SYMBOL(ebus_dma_residue);
247 unsigned int ebus_dma_addr(struct ebus_dma_info *p)
248 {
249 return readl(p->regs + EBDMA_ADDR);
250 }
251 EXPORT_SYMBOL(ebus_dma_addr);
253 void ebus_dma_enable(struct ebus_dma_info *p, int on)
254 {
255 unsigned long flags;
256 u32 orig_csr, csr;
258 spin_lock_irqsave(&p->lock, flags);
259 orig_csr = csr = readl(p->regs + EBDMA_CSR);
260 if (on)
261 csr |= EBDMA_CSR_EN_DMA;
262 else
263 csr &= ~EBDMA_CSR_EN_DMA;
264 if ((orig_csr & EBDMA_CSR_EN_DMA) !=
265 (csr & EBDMA_CSR_EN_DMA))
266 writel(csr, p->regs + EBDMA_CSR);
267 spin_unlock_irqrestore(&p->lock, flags);
268 }
269 EXPORT_SYMBOL(ebus_dma_enable);
271 struct linux_ebus *ebus_chain = NULL;
273 static inline void *ebus_alloc(size_t size)
274 {
275 void *mem;
277 mem = kzalloc(size, GFP_ATOMIC);
278 if (!mem)
279 panic("ebus_alloc: out of memory");
280 return mem;
281 }
283 static void __init fill_ebus_child(struct device_node *dp,
284 struct linux_ebus_child *dev,
285 int non_standard_regs)
286 {
287 struct of_device *op;
288 int *regs;
289 int i, len;
291 dev->prom_node = dp;
292 printk(" (%s)", dp->name);
294 regs = of_get_property(dp, "reg", &len);
295 if (!regs)
296 dev->num_addrs = 0;
297 else
298 dev->num_addrs = len / sizeof(regs[0]);
300 if (non_standard_regs) {
301 /* This is to handle reg properties which are not
302 * in the parent relative format. One example are
303 * children of the i2c device on CompactPCI systems.
304 *
305 * So, for such devices we just record the property
306 * raw in the child resources.
307 */
308 for (i = 0; i < dev->num_addrs; i++)
309 dev->resource[i].start = regs[i];
310 } else {
311 for (i = 0; i < dev->num_addrs; i++) {
312 int rnum = regs[i];
313 if (rnum >= dev->parent->num_addrs) {
314 prom_printf("UGH: property for %s was %d, need < %d\n",
315 dp->name, len, dev->parent->num_addrs);
316 prom_halt();
317 }
318 dev->resource[i].start = dev->parent->resource[i].start;
319 dev->resource[i].end = dev->parent->resource[i].end;
320 dev->resource[i].flags = IORESOURCE_MEM;
321 dev->resource[i].name = dp->name;
322 }
323 }
325 op = of_find_device_by_node(dp);
326 if (!op) {
327 dev->num_irqs = 0;
328 } else {
329 dev->num_irqs = op->num_irqs;
330 for (i = 0; i < dev->num_irqs; i++)
331 dev->irqs[i] = op->irqs[i];
332 }
334 if (!dev->num_irqs) {
335 /*
336 * Oh, well, some PROMs don't export interrupts
337 * property to children of EBus devices...
338 *
339 * Be smart about PS/2 keyboard and mouse.
340 */
341 if (!strcmp(dev->parent->prom_node->name, "8042")) {
342 if (!strcmp(dev->prom_node->name, "kb_ps2")) {
343 dev->num_irqs = 1;
344 dev->irqs[0] = dev->parent->irqs[0];
345 } else {
346 dev->num_irqs = 1;
347 dev->irqs[0] = dev->parent->irqs[1];
348 }
349 }
350 }
351 }
353 static int __init child_regs_nonstandard(struct linux_ebus_device *dev)
354 {
355 if (!strcmp(dev->prom_node->name, "i2c") ||
356 !strcmp(dev->prom_node->name, "SUNW,lombus"))
357 return 1;
358 return 0;
359 }
361 static void __init fill_ebus_device(struct device_node *dp, struct linux_ebus_device *dev)
362 {
363 struct linux_ebus_child *child;
364 struct of_device *op;
365 int i, len;
367 dev->prom_node = dp;
369 printk(" [%s", dp->name);
371 op = of_find_device_by_node(dp);
372 if (!op) {
373 dev->num_addrs = 0;
374 dev->num_irqs = 0;
375 } else {
376 (void) of_get_property(dp, "reg", &len);
377 dev->num_addrs = len / sizeof(struct linux_prom_registers);
379 for (i = 0; i < dev->num_addrs; i++)
380 memcpy(&dev->resource[i],
381 &op->resource[i],
382 sizeof(struct resource));
384 dev->num_irqs = op->num_irqs;
385 for (i = 0; i < dev->num_irqs; i++)
386 dev->irqs[i] = op->irqs[i];
387 }
389 dev->ofdev.node = dp;
390 dev->ofdev.dev.parent = &dev->bus->ofdev.dev;
391 dev->ofdev.dev.bus = &ebus_bus_type;
392 strcpy(dev->ofdev.dev.bus_id, dp->path_component_name);
394 /* Register with core */
395 if (of_device_register(&dev->ofdev) != 0)
396 printk(KERN_DEBUG "ebus: device registration error for %s!\n",
397 dev->ofdev.dev.bus_id);
399 dp = dp->child;
400 if (dp) {
401 printk(" ->");
402 dev->children = ebus_alloc(sizeof(struct linux_ebus_child));
404 child = dev->children;
405 child->next = NULL;
406 child->parent = dev;
407 child->bus = dev->bus;
408 fill_ebus_child(dp, child,
409 child_regs_nonstandard(dev));
411 while ((dp = dp->sibling) != NULL) {
412 child->next = ebus_alloc(sizeof(struct linux_ebus_child));
414 child = child->next;
415 child->next = NULL;
416 child->parent = dev;
417 child->bus = dev->bus;
418 fill_ebus_child(dp, child,
419 child_regs_nonstandard(dev));
420 }
421 }
422 printk("]");
423 }
425 static struct pci_dev *find_next_ebus(struct pci_dev *start, int *is_rio_p)
426 {
427 struct pci_dev *pdev = start;
429 while ((pdev = pci_get_device(PCI_VENDOR_ID_SUN, PCI_ANY_ID, pdev)))
430 if (pdev->device == PCI_DEVICE_ID_SUN_EBUS ||
431 pdev->device == PCI_DEVICE_ID_SUN_RIO_EBUS)
432 break;
434 *is_rio_p = !!(pdev && (pdev->device == PCI_DEVICE_ID_SUN_RIO_EBUS));
436 return pdev;
437 }
439 void __init ebus_init(void)
440 {
441 struct pci_pbm_info *pbm;
442 struct linux_ebus_device *dev;
443 struct linux_ebus *ebus;
444 struct pci_dev *pdev;
445 struct pcidev_cookie *cookie;
446 struct device_node *dp;
447 int is_rio;
448 int num_ebus = 0;
450 pdev = find_next_ebus(NULL, &is_rio);
451 if (!pdev) {
452 printk("ebus: No EBus's found.\n");
453 return;
454 }
456 cookie = pdev->sysdata;
457 dp = cookie->prom_node;
459 ebus_chain = ebus = ebus_alloc(sizeof(struct linux_ebus));
460 ebus->next = NULL;
461 ebus->is_rio = is_rio;
463 while (dp) {
464 struct device_node *child;
466 /* SUNW,pci-qfe uses four empty ebuses on it.
467 I think we should not consider them here,
468 as they have half of the properties this
469 code expects and once we do PCI hot-plug,
470 we'd have to tweak with the ebus_chain
471 in the runtime after initialization. -jj */
472 if (!dp->child) {
473 pdev = find_next_ebus(pdev, &is_rio);
474 if (!pdev) {
475 if (ebus == ebus_chain) {
476 ebus_chain = NULL;
477 printk("ebus: No EBus's found.\n");
478 return;
479 }
480 break;
481 }
482 ebus->is_rio = is_rio;
483 cookie = pdev->sysdata;
484 dp = cookie->prom_node;
485 continue;
486 }
487 printk("ebus%d:", num_ebus);
489 ebus->index = num_ebus;
490 ebus->prom_node = dp;
491 ebus->self = pdev;
492 ebus->parent = pbm = cookie->pbm;
494 ebus->ofdev.node = dp;
495 ebus->ofdev.dev.parent = &pdev->dev;
496 ebus->ofdev.dev.bus = &ebus_bus_type;
497 strcpy(ebus->ofdev.dev.bus_id, dp->path_component_name);
499 /* Register with core */
500 if (of_device_register(&ebus->ofdev) != 0)
501 printk(KERN_DEBUG "ebus: device registration error for %s!\n",
502 ebus->ofdev.dev.bus_id);
505 child = dp->child;
506 if (!child)
507 goto next_ebus;
509 ebus->devices = ebus_alloc(sizeof(struct linux_ebus_device));
511 dev = ebus->devices;
512 dev->next = NULL;
513 dev->children = NULL;
514 dev->bus = ebus;
515 fill_ebus_device(child, dev);
517 while ((child = child->sibling) != NULL) {
518 dev->next = ebus_alloc(sizeof(struct linux_ebus_device));
520 dev = dev->next;
521 dev->next = NULL;
522 dev->children = NULL;
523 dev->bus = ebus;
524 fill_ebus_device(child, dev);
525 }
527 next_ebus:
528 printk("\n");
530 pdev = find_next_ebus(pdev, &is_rio);
531 if (!pdev)
532 break;
534 cookie = pdev->sysdata;
535 dp = cookie->prom_node;
537 ebus->next = ebus_alloc(sizeof(struct linux_ebus));
538 ebus = ebus->next;
539 ebus->next = NULL;
540 ebus->is_rio = is_rio;
541 ++num_ebus;
542 }
543 pci_dev_put(pdev); /* XXX for the case, when ebusnd is 0, is it OK? */
544 }