ia64/linux-2.6.18-xen.hg

view arch/alpha/kernel/core_irongate.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * linux/arch/alpha/kernel/core_irongate.c
3 *
4 * Based on code written by David A. Rusling (david.rusling@reo.mts.dec.com).
5 *
6 * Copyright (C) 1999 Alpha Processor, Inc.,
7 * (David Daniel, Stig Telfer, Soohoon Lee)
8 *
9 * Code common to all IRONGATE core logic chips.
10 */
12 #define __EXTERN_INLINE inline
13 #include <asm/io.h>
14 #include <asm/core_irongate.h>
15 #undef __EXTERN_INLINE
17 #include <linux/types.h>
18 #include <linux/pci.h>
19 #include <linux/sched.h>
20 #include <linux/init.h>
21 #include <linux/initrd.h>
22 #include <linux/bootmem.h>
24 #include <asm/ptrace.h>
25 #include <asm/pci.h>
26 #include <asm/cacheflush.h>
27 #include <asm/tlbflush.h>
29 #include "proto.h"
30 #include "pci_impl.h"
32 /*
33 * BIOS32-style PCI interface:
34 */
36 #define DEBUG_CONFIG 0
38 #if DEBUG_CONFIG
39 # define DBG_CFG(args) printk args
40 #else
41 # define DBG_CFG(args)
42 #endif
44 igcsr32 *IronECC;
46 /*
47 * Given a bus, device, and function number, compute resulting
48 * configuration space address accordingly. It is therefore not safe
49 * to have concurrent invocations to configuration space access
50 * routines, but there really shouldn't be any need for this.
51 *
52 * addr[31:24] reserved
53 * addr[23:16] bus number (8 bits = 128 possible buses)
54 * addr[15:11] Device number (5 bits)
55 * addr[10: 8] function number
56 * addr[ 7: 2] register number
57 *
58 * For IRONGATE:
59 * if (bus = addr[23:16]) == 0
60 * then
61 * type 0 config cycle:
62 * addr_on_pci[31:11] = id selection for device = addr[15:11]
63 * addr_on_pci[10: 2] = addr[10: 2] ???
64 * addr_on_pci[ 1: 0] = 00
65 * else
66 * type 1 config cycle (pass on with no decoding):
67 * addr_on_pci[31:24] = 0
68 * addr_on_pci[23: 2] = addr[23: 2]
69 * addr_on_pci[ 1: 0] = 01
70 * fi
71 *
72 * Notes:
73 * The function number selects which function of a multi-function device
74 * (e.g., SCSI and Ethernet).
75 *
76 * The register selects a DWORD (32 bit) register offset. Hence it
77 * doesn't get shifted by 2 bits as we want to "drop" the bottom two
78 * bits.
79 */
81 static int
82 mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where,
83 unsigned long *pci_addr, unsigned char *type1)
84 {
85 unsigned long addr;
86 u8 bus = pbus->number;
88 DBG_CFG(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x, "
89 "pci_addr=0x%p, type1=0x%p)\n",
90 bus, device_fn, where, pci_addr, type1));
92 *type1 = (bus != 0);
94 addr = (bus << 16) | (device_fn << 8) | where;
95 addr |= IRONGATE_CONF;
97 *pci_addr = addr;
98 DBG_CFG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr));
99 return 0;
100 }
102 static int
103 irongate_read_config(struct pci_bus *bus, unsigned int devfn, int where,
104 int size, u32 *value)
105 {
106 unsigned long addr;
107 unsigned char type1;
109 if (mk_conf_addr(bus, devfn, where, &addr, &type1))
110 return PCIBIOS_DEVICE_NOT_FOUND;
112 switch (size) {
113 case 1:
114 *value = __kernel_ldbu(*(vucp)addr);
115 break;
116 case 2:
117 *value = __kernel_ldwu(*(vusp)addr);
118 break;
119 case 4:
120 *value = *(vuip)addr;
121 break;
122 }
124 return PCIBIOS_SUCCESSFUL;
125 }
127 static int
128 irongate_write_config(struct pci_bus *bus, unsigned int devfn, int where,
129 int size, u32 value)
130 {
131 unsigned long addr;
132 unsigned char type1;
134 if (mk_conf_addr(bus, devfn, where, &addr, &type1))
135 return PCIBIOS_DEVICE_NOT_FOUND;
137 switch (size) {
138 case 1:
139 __kernel_stb(value, *(vucp)addr);
140 mb();
141 __kernel_ldbu(*(vucp)addr);
142 break;
143 case 2:
144 __kernel_stw(value, *(vusp)addr);
145 mb();
146 __kernel_ldwu(*(vusp)addr);
147 break;
148 case 4:
149 *(vuip)addr = value;
150 mb();
151 *(vuip)addr;
152 break;
153 }
155 return PCIBIOS_SUCCESSFUL;
156 }
158 struct pci_ops irongate_pci_ops =
159 {
160 .read = irongate_read_config,
161 .write = irongate_write_config,
162 };
164 int
165 irongate_pci_clr_err(void)
166 {
167 unsigned int nmi_ctl=0;
168 unsigned int IRONGATE_jd;
170 again:
171 IRONGATE_jd = IRONGATE0->stat_cmd;
172 printk("Iron stat_cmd %x\n", IRONGATE_jd);
173 IRONGATE0->stat_cmd = IRONGATE_jd; /* write again clears error bits */
174 mb();
175 IRONGATE_jd = IRONGATE0->stat_cmd; /* re-read to force write */
177 IRONGATE_jd = *IronECC;
178 printk("Iron ECC %x\n", IRONGATE_jd);
179 *IronECC = IRONGATE_jd; /* write again clears error bits */
180 mb();
181 IRONGATE_jd = *IronECC; /* re-read to force write */
183 /* Clear ALI NMI */
184 nmi_ctl = inb(0x61);
185 nmi_ctl |= 0x0c;
186 outb(nmi_ctl, 0x61);
187 nmi_ctl &= ~0x0c;
188 outb(nmi_ctl, 0x61);
190 IRONGATE_jd = *IronECC;
191 if (IRONGATE_jd & 0x300) goto again;
193 return 0;
194 }
196 #define IRONGATE_3GB 0xc0000000UL
198 /* On Albacore (aka UP1500) with 4Gb of RAM we have to reserve some
199 memory for PCI. At this point we just reserve memory above 3Gb. Most
200 of this memory will be freed after PCI setup is done. */
201 static void __init
202 albacore_init_arch(void)
203 {
204 unsigned long memtop = max_low_pfn << PAGE_SHIFT;
205 unsigned long pci_mem = (memtop + 0x1000000UL) & ~0xffffffUL;
206 struct percpu_struct *cpu;
207 int pal_rev, pal_var;
209 cpu = (struct percpu_struct*)((char*)hwrpb + hwrpb->processor_offset);
210 pal_rev = cpu->pal_revision & 0xffff;
211 pal_var = (cpu->pal_revision >> 16) & 0xff;
213 /* Consoles earlier than A5.6-18 (OSF PALcode v1.62-2) set up
214 the CPU incorrectly (leave speculative stores enabled),
215 which causes memory corruption under certain conditions.
216 Issue a warning for such consoles. */
217 if (alpha_using_srm &&
218 (pal_rev < 0x13e || (pal_rev == 0x13e && pal_var < 2)))
219 printk(KERN_WARNING "WARNING! Upgrade to SRM A5.6-19 "
220 "or later\n");
222 if (pci_mem > IRONGATE_3GB)
223 pci_mem = IRONGATE_3GB;
224 IRONGATE0->pci_mem = pci_mem;
225 alpha_mv.min_mem_address = pci_mem;
226 if (memtop > pci_mem) {
227 #ifdef CONFIG_BLK_DEV_INITRD
228 extern unsigned long initrd_start, initrd_end;
229 extern void *move_initrd(unsigned long);
231 /* Move the initrd out of the way. */
232 if (initrd_end && __pa(initrd_end) > pci_mem) {
233 unsigned long size;
235 size = initrd_end - initrd_start;
236 free_bootmem_node(NODE_DATA(0), __pa(initrd_start),
237 PAGE_ALIGN(size));
238 if (!move_initrd(pci_mem))
239 printk("irongate_init_arch: initrd too big "
240 "(%ldK)\ndisabling initrd\n",
241 size / 1024);
242 }
243 #endif
244 reserve_bootmem_node(NODE_DATA(0), pci_mem, memtop - pci_mem);
245 printk("irongate_init_arch: temporarily reserving "
246 "region %08lx-%08lx for PCI\n", pci_mem, memtop - 1);
247 }
248 }
250 static void __init
251 irongate_setup_agp(void)
252 {
253 /* Disable the GART window. AGPGART doesn't work due to yet
254 unresolved memory coherency issues... */
255 IRONGATE0->agpva = IRONGATE0->agpva & ~0xf;
256 alpha_agpgart_size = 0;
257 }
259 void __init
260 irongate_init_arch(void)
261 {
262 struct pci_controller *hose;
263 int amd761 = (IRONGATE0->dev_vendor >> 16) > 0x7006; /* Albacore? */
265 IronECC = amd761 ? &IRONGATE0->bacsr54_eccms761 : &IRONGATE0->dramms;
267 irongate_pci_clr_err();
269 if (amd761)
270 albacore_init_arch();
272 irongate_setup_agp();
274 /*
275 * Create our single hose.
276 */
278 pci_isa_hose = hose = alloc_pci_controller();
279 hose->io_space = &ioport_resource;
280 hose->mem_space = &iomem_resource;
281 hose->index = 0;
283 /* This is for userland consumption. For some reason, the 40-bit
284 PIO bias that we use in the kernel through KSEG didn't work for
285 the page table based user mappings. So make sure we get the
286 43-bit PIO bias. */
287 hose->sparse_mem_base = 0;
288 hose->sparse_io_base = 0;
289 hose->dense_mem_base
290 = (IRONGATE_MEM & 0xffffffffffUL) | 0x80000000000UL;
291 hose->dense_io_base
292 = (IRONGATE_IO & 0xffffffffffUL) | 0x80000000000UL;
294 hose->sg_isa = hose->sg_pci = NULL;
295 __direct_map_base = 0;
296 __direct_map_size = 0xffffffff;
297 }
299 /*
300 * IO map and AGP support
301 */
302 #include <linux/vmalloc.h>
303 #include <linux/agp_backend.h>
304 #include <linux/agpgart.h>
305 #include <asm/pgalloc.h>
307 #define GET_PAGE_DIR_OFF(addr) (addr >> 22)
308 #define GET_PAGE_DIR_IDX(addr) (GET_PAGE_DIR_OFF(addr))
310 #define GET_GATT_OFF(addr) ((addr & 0x003ff000) >> 12)
311 #define GET_GATT(addr) (gatt_pages[GET_PAGE_DIR_IDX(addr)])
313 void __iomem *
314 irongate_ioremap(unsigned long addr, unsigned long size)
315 {
316 struct vm_struct *area;
317 unsigned long vaddr;
318 unsigned long baddr, last;
319 u32 *mmio_regs, *gatt_pages, *cur_gatt, pte;
320 unsigned long gart_bus_addr;
322 if (!alpha_agpgart_size)
323 return (void __iomem *)(addr + IRONGATE_MEM);
325 gart_bus_addr = (unsigned long)IRONGATE0->bar0 &
326 PCI_BASE_ADDRESS_MEM_MASK;
328 /*
329 * Check for within the AGP aperture...
330 */
331 do {
332 /*
333 * Check the AGP area
334 */
335 if (addr >= gart_bus_addr && addr + size - 1 <
336 gart_bus_addr + alpha_agpgart_size)
337 break;
339 /*
340 * Not found - assume legacy ioremap
341 */
342 return (void __iomem *)(addr + IRONGATE_MEM);
343 } while(0);
345 mmio_regs = (u32 *)(((unsigned long)IRONGATE0->bar1 &
346 PCI_BASE_ADDRESS_MEM_MASK) + IRONGATE_MEM);
348 gatt_pages = (u32 *)(phys_to_virt(mmio_regs[1])); /* FIXME */
350 /*
351 * Adjust the limits (mappings must be page aligned)
352 */
353 if (addr & ~PAGE_MASK) {
354 printk("AGP ioremap failed... addr not page aligned (0x%lx)\n",
355 addr);
356 return (void __iomem *)(addr + IRONGATE_MEM);
357 }
358 last = addr + size - 1;
359 size = PAGE_ALIGN(last) - addr;
361 #if 0
362 printk("irongate_ioremap(0x%lx, 0x%lx)\n", addr, size);
363 printk("irongate_ioremap: gart_bus_addr 0x%lx\n", gart_bus_addr);
364 printk("irongate_ioremap: gart_aper_size 0x%lx\n", gart_aper_size);
365 printk("irongate_ioremap: mmio_regs %p\n", mmio_regs);
366 printk("irongate_ioremap: gatt_pages %p\n", gatt_pages);
368 for(baddr = addr; baddr <= last; baddr += PAGE_SIZE)
369 {
370 cur_gatt = phys_to_virt(GET_GATT(baddr) & ~1);
371 pte = cur_gatt[GET_GATT_OFF(baddr)] & ~1;
372 printk("irongate_ioremap: cur_gatt %p pte 0x%x\n",
373 cur_gatt, pte);
374 }
375 #endif
377 /*
378 * Map it
379 */
380 area = get_vm_area(size, VM_IOREMAP);
381 if (!area) return NULL;
383 for(baddr = addr, vaddr = (unsigned long)area->addr;
384 baddr <= last;
385 baddr += PAGE_SIZE, vaddr += PAGE_SIZE)
386 {
387 cur_gatt = phys_to_virt(GET_GATT(baddr) & ~1);
388 pte = cur_gatt[GET_GATT_OFF(baddr)] & ~1;
390 if (__alpha_remap_area_pages(vaddr,
391 pte, PAGE_SIZE, 0)) {
392 printk("AGP ioremap: FAILED to map...\n");
393 vfree(area->addr);
394 return NULL;
395 }
396 }
398 flush_tlb_all();
400 vaddr = (unsigned long)area->addr + (addr & ~PAGE_MASK);
401 #if 0
402 printk("irongate_ioremap(0x%lx, 0x%lx) returning 0x%lx\n",
403 addr, size, vaddr);
404 #endif
405 return (void __iomem *)vaddr;
406 }
408 void
409 irongate_iounmap(volatile void __iomem *xaddr)
410 {
411 unsigned long addr = (unsigned long) xaddr;
412 if (((long)addr >> 41) == -2)
413 return; /* kseg map, nothing to do */
414 if (addr)
415 return vfree((void *)(PAGE_MASK & addr));
416 }