ia64/linux-2.6.18-xen.hg

view arch/alpha/kernel/core_tsunami.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * linux/arch/alpha/kernel/core_tsunami.c
3 *
4 * Based on code written by David A. Rusling (david.rusling@reo.mts.dec.com).
5 *
6 * Code common to all TSUNAMI core logic chips.
7 */
9 #define __EXTERN_INLINE inline
10 #include <asm/io.h>
11 #include <asm/core_tsunami.h>
12 #undef __EXTERN_INLINE
14 #include <linux/types.h>
15 #include <linux/pci.h>
16 #include <linux/sched.h>
17 #include <linux/init.h>
18 #include <linux/bootmem.h>
20 #include <asm/ptrace.h>
21 #include <asm/smp.h>
23 #include "proto.h"
24 #include "pci_impl.h"
26 /* Save Tsunami configuration data as the console had it set up. */
28 struct
29 {
30 unsigned long wsba[4];
31 unsigned long wsm[4];
32 unsigned long tba[4];
33 } saved_config[2] __attribute__((common));
35 /*
36 * NOTE: Herein lie back-to-back mb instructions. They are magic.
37 * One plausible explanation is that the I/O controller does not properly
38 * handle the system transaction. Another involves timing. Ho hum.
39 */
41 /*
42 * BIOS32-style PCI interface:
43 */
45 #define DEBUG_CONFIG 0
47 #if DEBUG_CONFIG
48 # define DBG_CFG(args) printk args
49 #else
50 # define DBG_CFG(args)
51 #endif
54 /*
55 * Given a bus, device, and function number, compute resulting
56 * configuration space address
57 * accordingly. It is therefore not safe to have concurrent
58 * invocations to configuration space access routines, but there
59 * really shouldn't be any need for this.
60 *
61 * Note that all config space accesses use Type 1 address format.
62 *
63 * Note also that type 1 is determined by non-zero bus number.
64 *
65 * Type 1:
66 *
67 * 3 3|3 3 2 2|2 2 2 2|2 2 2 2|1 1 1 1|1 1 1 1|1 1
68 * 3 2|1 0 9 8|7 6 5 4|3 2 1 0|9 8 7 6|5 4 3 2|1 0 9 8|7 6 5 4|3 2 1 0
69 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
70 * | | | | | | | | | | |B|B|B|B|B|B|B|B|D|D|D|D|D|F|F|F|R|R|R|R|R|R|0|1|
71 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
72 *
73 * 31:24 reserved
74 * 23:16 bus number (8 bits = 128 possible buses)
75 * 15:11 Device number (5 bits)
76 * 10:8 function number
77 * 7:2 register number
78 *
79 * Notes:
80 * The function number selects which function of a multi-function device
81 * (e.g., SCSI and Ethernet).
82 *
83 * The register selects a DWORD (32 bit) register offset. Hence it
84 * doesn't get shifted by 2 bits as we want to "drop" the bottom two
85 * bits.
86 */
88 static int
89 mk_conf_addr(struct pci_bus *pbus, unsigned int device_fn, int where,
90 unsigned long *pci_addr, unsigned char *type1)
91 {
92 struct pci_controller *hose = pbus->sysdata;
93 unsigned long addr;
94 u8 bus = pbus->number;
96 DBG_CFG(("mk_conf_addr(bus=%d ,device_fn=0x%x, where=0x%x, "
97 "pci_addr=0x%p, type1=0x%p)\n",
98 bus, device_fn, where, pci_addr, type1));
100 if (!pbus->parent) /* No parent means peer PCI bus. */
101 bus = 0;
102 *type1 = (bus != 0);
104 addr = (bus << 16) | (device_fn << 8) | where;
105 addr |= hose->config_space_base;
107 *pci_addr = addr;
108 DBG_CFG(("mk_conf_addr: returning pci_addr 0x%lx\n", addr));
109 return 0;
110 }
112 static int
113 tsunami_read_config(struct pci_bus *bus, unsigned int devfn, int where,
114 int size, u32 *value)
115 {
116 unsigned long addr;
117 unsigned char type1;
119 if (mk_conf_addr(bus, devfn, where, &addr, &type1))
120 return PCIBIOS_DEVICE_NOT_FOUND;
122 switch (size) {
123 case 1:
124 *value = __kernel_ldbu(*(vucp)addr);
125 break;
126 case 2:
127 *value = __kernel_ldwu(*(vusp)addr);
128 break;
129 case 4:
130 *value = *(vuip)addr;
131 break;
132 }
134 return PCIBIOS_SUCCESSFUL;
135 }
137 static int
138 tsunami_write_config(struct pci_bus *bus, unsigned int devfn, int where,
139 int size, u32 value)
140 {
141 unsigned long addr;
142 unsigned char type1;
144 if (mk_conf_addr(bus, devfn, where, &addr, &type1))
145 return PCIBIOS_DEVICE_NOT_FOUND;
147 switch (size) {
148 case 1:
149 __kernel_stb(value, *(vucp)addr);
150 mb();
151 __kernel_ldbu(*(vucp)addr);
152 break;
153 case 2:
154 __kernel_stw(value, *(vusp)addr);
155 mb();
156 __kernel_ldwu(*(vusp)addr);
157 break;
158 case 4:
159 *(vuip)addr = value;
160 mb();
161 *(vuip)addr;
162 break;
163 }
165 return PCIBIOS_SUCCESSFUL;
166 }
168 struct pci_ops tsunami_pci_ops =
169 {
170 .read = tsunami_read_config,
171 .write = tsunami_write_config,
172 };
174 void
175 tsunami_pci_tbi(struct pci_controller *hose, dma_addr_t start, dma_addr_t end)
176 {
177 tsunami_pchip *pchip = hose->index ? TSUNAMI_pchip1 : TSUNAMI_pchip0;
178 volatile unsigned long *csr;
179 unsigned long value;
181 /* We can invalidate up to 8 tlb entries in a go. The flush
182 matches against <31:16> in the pci address. */
183 csr = &pchip->tlbia.csr;
184 if (((start ^ end) & 0xffff0000) == 0)
185 csr = &pchip->tlbiv.csr;
187 /* For TBIA, it doesn't matter what value we write. For TBI,
188 it's the shifted tag bits. */
189 value = (start & 0xffff0000) >> 12;
191 *csr = value;
192 mb();
193 *csr;
194 }
196 #ifdef NXM_MACHINE_CHECKS_ON_TSUNAMI
197 static long __init
198 tsunami_probe_read(volatile unsigned long *vaddr)
199 {
200 long dont_care, probe_result;
201 int cpu = smp_processor_id();
202 int s = swpipl(IPL_MCHECK - 1);
204 mcheck_taken(cpu) = 0;
205 mcheck_expected(cpu) = 1;
206 mb();
207 dont_care = *vaddr;
208 draina();
209 mcheck_expected(cpu) = 0;
210 probe_result = !mcheck_taken(cpu);
211 mcheck_taken(cpu) = 0;
212 setipl(s);
214 printk("dont_care == 0x%lx\n", dont_care);
216 return probe_result;
217 }
219 static long __init
220 tsunami_probe_write(volatile unsigned long *vaddr)
221 {
222 long true_contents, probe_result = 1;
224 TSUNAMI_cchip->misc.csr |= (1L << 28); /* clear NXM... */
225 true_contents = *vaddr;
226 *vaddr = 0;
227 draina();
228 if (TSUNAMI_cchip->misc.csr & (1L << 28)) {
229 int source = (TSUNAMI_cchip->misc.csr >> 29) & 7;
230 TSUNAMI_cchip->misc.csr |= (1L << 28); /* ...and unlock NXS. */
231 probe_result = 0;
232 printk("tsunami_probe_write: unit %d at 0x%016lx\n", source,
233 (unsigned long)vaddr);
234 }
235 if (probe_result)
236 *vaddr = true_contents;
237 return probe_result;
238 }
239 #else
240 #define tsunami_probe_read(ADDR) 1
241 #endif /* NXM_MACHINE_CHECKS_ON_TSUNAMI */
243 #define FN __FUNCTION__
245 static void __init
246 tsunami_init_one_pchip(tsunami_pchip *pchip, int index)
247 {
248 struct pci_controller *hose;
250 if (tsunami_probe_read(&pchip->pctl.csr) == 0)
251 return;
253 hose = alloc_pci_controller();
254 if (index == 0)
255 pci_isa_hose = hose;
256 hose->io_space = alloc_resource();
257 hose->mem_space = alloc_resource();
259 /* This is for userland consumption. For some reason, the 40-bit
260 PIO bias that we use in the kernel through KSEG didn't work for
261 the page table based user mappings. So make sure we get the
262 43-bit PIO bias. */
263 hose->sparse_mem_base = 0;
264 hose->sparse_io_base = 0;
265 hose->dense_mem_base
266 = (TSUNAMI_MEM(index) & 0xffffffffffL) | 0x80000000000L;
267 hose->dense_io_base
268 = (TSUNAMI_IO(index) & 0xffffffffffL) | 0x80000000000L;
270 hose->config_space_base = TSUNAMI_CONF(index);
271 hose->index = index;
273 hose->io_space->start = TSUNAMI_IO(index) - TSUNAMI_IO_BIAS;
274 hose->io_space->end = hose->io_space->start + TSUNAMI_IO_SPACE - 1;
275 hose->io_space->name = pci_io_names[index];
276 hose->io_space->flags = IORESOURCE_IO;
278 hose->mem_space->start = TSUNAMI_MEM(index) - TSUNAMI_MEM_BIAS;
279 hose->mem_space->end = hose->mem_space->start + 0xffffffff;
280 hose->mem_space->name = pci_mem_names[index];
281 hose->mem_space->flags = IORESOURCE_MEM;
283 if (request_resource(&ioport_resource, hose->io_space) < 0)
284 printk(KERN_ERR "Failed to request IO on hose %d\n", index);
285 if (request_resource(&iomem_resource, hose->mem_space) < 0)
286 printk(KERN_ERR "Failed to request MEM on hose %d\n", index);
288 /*
289 * Save the existing PCI window translations. SRM will
290 * need them when we go to reboot.
291 */
293 saved_config[index].wsba[0] = pchip->wsba[0].csr;
294 saved_config[index].wsm[0] = pchip->wsm[0].csr;
295 saved_config[index].tba[0] = pchip->tba[0].csr;
297 saved_config[index].wsba[1] = pchip->wsba[1].csr;
298 saved_config[index].wsm[1] = pchip->wsm[1].csr;
299 saved_config[index].tba[1] = pchip->tba[1].csr;
301 saved_config[index].wsba[2] = pchip->wsba[2].csr;
302 saved_config[index].wsm[2] = pchip->wsm[2].csr;
303 saved_config[index].tba[2] = pchip->tba[2].csr;
305 saved_config[index].wsba[3] = pchip->wsba[3].csr;
306 saved_config[index].wsm[3] = pchip->wsm[3].csr;
307 saved_config[index].tba[3] = pchip->tba[3].csr;
309 /*
310 * Set up the PCI to main memory translation windows.
311 *
312 * Note: Window 3 is scatter-gather only
313 *
314 * Window 0 is scatter-gather 8MB at 8MB (for isa)
315 * Window 1 is scatter-gather (up to) 1GB at 1GB
316 * Window 2 is direct access 2GB at 2GB
317 *
318 * NOTE: we need the align_entry settings for Acer devices on ES40,
319 * specifically floppy and IDE when memory is larger than 2GB.
320 */
321 hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
322 /* Initially set for 4 PTEs, but will be overridden to 64K for ISA. */
323 hose->sg_isa->align_entry = 4;
325 hose->sg_pci = iommu_arena_new(hose, 0x40000000,
326 size_for_memory(0x40000000), 0);
327 hose->sg_pci->align_entry = 4; /* Tsunami caches 4 PTEs at a time */
329 __direct_map_base = 0x80000000;
330 __direct_map_size = 0x80000000;
332 pchip->wsba[0].csr = hose->sg_isa->dma_base | 3;
333 pchip->wsm[0].csr = (hose->sg_isa->size - 1) & 0xfff00000;
334 pchip->tba[0].csr = virt_to_phys(hose->sg_isa->ptes);
336 pchip->wsba[1].csr = hose->sg_pci->dma_base | 3;
337 pchip->wsm[1].csr = (hose->sg_pci->size - 1) & 0xfff00000;
338 pchip->tba[1].csr = virt_to_phys(hose->sg_pci->ptes);
340 pchip->wsba[2].csr = 0x80000000 | 1;
341 pchip->wsm[2].csr = (0x80000000 - 1) & 0xfff00000;
342 pchip->tba[2].csr = 0;
344 pchip->wsba[3].csr = 0;
346 /* Enable the Monster Window to make DAC pci64 possible. */
347 pchip->pctl.csr |= pctl_m_mwin;
349 tsunami_pci_tbi(hose, 0, -1);
350 }
352 void __init
353 tsunami_init_arch(void)
354 {
355 #ifdef NXM_MACHINE_CHECKS_ON_TSUNAMI
356 unsigned long tmp;
358 /* Ho hum.. init_arch is called before init_IRQ, but we need to be
359 able to handle machine checks. So install the handler now. */
360 wrent(entInt, 0);
362 /* NXMs just don't matter to Tsunami--unless they make it
363 choke completely. */
364 tmp = (unsigned long)(TSUNAMI_cchip - 1);
365 printk("%s: probing bogus address: 0x%016lx\n", FN, bogus_addr);
366 printk("\tprobe %s\n",
367 tsunami_probe_write((unsigned long *)bogus_addr)
368 ? "succeeded" : "failed");
369 #endif /* NXM_MACHINE_CHECKS_ON_TSUNAMI */
371 #if 0
372 printk("%s: CChip registers:\n", FN);
373 printk("%s: CSR_CSC 0x%lx\n", FN, TSUNAMI_cchip->csc.csr);
374 printk("%s: CSR_MTR 0x%lx\n", FN, TSUNAMI_cchip.mtr.csr);
375 printk("%s: CSR_MISC 0x%lx\n", FN, TSUNAMI_cchip->misc.csr);
376 printk("%s: CSR_DIM0 0x%lx\n", FN, TSUNAMI_cchip->dim0.csr);
377 printk("%s: CSR_DIM1 0x%lx\n", FN, TSUNAMI_cchip->dim1.csr);
378 printk("%s: CSR_DIR0 0x%lx\n", FN, TSUNAMI_cchip->dir0.csr);
379 printk("%s: CSR_DIR1 0x%lx\n", FN, TSUNAMI_cchip->dir1.csr);
380 printk("%s: CSR_DRIR 0x%lx\n", FN, TSUNAMI_cchip->drir.csr);
382 printk("%s: DChip registers:\n");
383 printk("%s: CSR_DSC 0x%lx\n", FN, TSUNAMI_dchip->dsc.csr);
384 printk("%s: CSR_STR 0x%lx\n", FN, TSUNAMI_dchip->str.csr);
385 printk("%s: CSR_DREV 0x%lx\n", FN, TSUNAMI_dchip->drev.csr);
386 #endif
387 /* With multiple PCI busses, we play with I/O as physical addrs. */
388 ioport_resource.end = ~0UL;
390 /* Find how many hoses we have, and initialize them. TSUNAMI
391 and TYPHOON can have 2, but might only have 1 (DS10). */
393 tsunami_init_one_pchip(TSUNAMI_pchip0, 0);
394 if (TSUNAMI_cchip->csc.csr & 1L<<14)
395 tsunami_init_one_pchip(TSUNAMI_pchip1, 1);
396 }
398 static void
399 tsunami_kill_one_pchip(tsunami_pchip *pchip, int index)
400 {
401 pchip->wsba[0].csr = saved_config[index].wsba[0];
402 pchip->wsm[0].csr = saved_config[index].wsm[0];
403 pchip->tba[0].csr = saved_config[index].tba[0];
405 pchip->wsba[1].csr = saved_config[index].wsba[1];
406 pchip->wsm[1].csr = saved_config[index].wsm[1];
407 pchip->tba[1].csr = saved_config[index].tba[1];
409 pchip->wsba[2].csr = saved_config[index].wsba[2];
410 pchip->wsm[2].csr = saved_config[index].wsm[2];
411 pchip->tba[2].csr = saved_config[index].tba[2];
413 pchip->wsba[3].csr = saved_config[index].wsba[3];
414 pchip->wsm[3].csr = saved_config[index].wsm[3];
415 pchip->tba[3].csr = saved_config[index].tba[3];
416 }
418 void
419 tsunami_kill_arch(int mode)
420 {
421 tsunami_kill_one_pchip(TSUNAMI_pchip0, 0);
422 if (TSUNAMI_cchip->csc.csr & 1L<<14)
423 tsunami_kill_one_pchip(TSUNAMI_pchip1, 1);
424 }
426 static inline void
427 tsunami_pci_clr_err_1(tsunami_pchip *pchip)
428 {
429 pchip->perror.csr;
430 pchip->perror.csr = 0x040;
431 mb();
432 pchip->perror.csr;
433 }
435 static inline void
436 tsunami_pci_clr_err(void)
437 {
438 tsunami_pci_clr_err_1(TSUNAMI_pchip0);
440 /* TSUNAMI and TYPHOON can have 2, but might only have 1 (DS10) */
441 if (TSUNAMI_cchip->csc.csr & 1L<<14)
442 tsunami_pci_clr_err_1(TSUNAMI_pchip1);
443 }
445 void
446 tsunami_machine_check(unsigned long vector, unsigned long la_ptr,
447 struct pt_regs * regs)
448 {
449 /* Clear error before any reporting. */
450 mb();
451 mb(); /* magic */
452 draina();
453 tsunami_pci_clr_err();
454 wrmces(0x7);
455 mb();
457 process_mcheck_info(vector, la_ptr, regs, "TSUNAMI",
458 mcheck_expected(smp_processor_id()));
459 }