ia64/linux-2.6.18-xen.hg

view arch/mips/sgi-ip27/ip27-init.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * This file is subject to the terms and conditions of the GNU General
3 * Public License. See the file "COPYING" in the main directory of this
4 * archive for more details.
5 *
6 * Copyright (C) 2000 - 2001 by Kanoj Sarcar (kanoj@sgi.com)
7 * Copyright (C) 2000 - 2001 by Silicon Graphics, Inc.
8 */
9 #include <linux/kernel.h>
10 #include <linux/init.h>
11 #include <linux/sched.h>
12 #include <linux/mm.h>
13 #include <linux/module.h>
14 #include <linux/cpumask.h>
15 #include <asm/cpu.h>
16 #include <asm/io.h>
17 #include <asm/pgtable.h>
18 #include <asm/time.h>
19 #include <asm/sn/types.h>
20 #include <asm/sn/sn0/addrs.h>
21 #include <asm/sn/sn0/hubni.h>
22 #include <asm/sn/sn0/hubio.h>
23 #include <asm/sn/klconfig.h>
24 #include <asm/sn/ioc3.h>
25 #include <asm/mipsregs.h>
26 #include <asm/sn/gda.h>
27 #include <asm/sn/hub.h>
28 #include <asm/sn/intr.h>
29 #include <asm/current.h>
30 #include <asm/smp.h>
31 #include <asm/processor.h>
32 #include <asm/mmu_context.h>
33 #include <asm/thread_info.h>
34 #include <asm/sn/launch.h>
35 #include <asm/sn/sn_private.h>
36 #include <asm/sn/sn0/ip27.h>
37 #include <asm/sn/mapped_kernel.h>
39 #define CPU_NONE (cpuid_t)-1
41 static DECLARE_BITMAP(hub_init_mask, MAX_COMPACT_NODES);
42 nasid_t master_nasid = INVALID_NASID;
44 cnodeid_t nasid_to_compact_node[MAX_NASIDS];
45 nasid_t compact_to_nasid_node[MAX_COMPACT_NODES];
46 cnodeid_t cpuid_to_compact_node[MAXCPUS];
48 EXPORT_SYMBOL(nasid_to_compact_node);
50 extern void pcibr_setup(cnodeid_t);
52 extern void xtalk_probe_node(cnodeid_t nid);
54 static void __init per_hub_init(cnodeid_t cnode)
55 {
56 struct hub_data *hub = hub_data(cnode);
57 nasid_t nasid = COMPACT_TO_NASID_NODEID(cnode);
58 int i;
60 cpu_set(smp_processor_id(), hub->h_cpus);
62 if (test_and_set_bit(cnode, hub_init_mask))
63 return;
64 /*
65 * Set CRB timeout at 5ms, (< PI timeout of 10ms)
66 */
67 REMOTE_HUB_S(nasid, IIO_ICTP, 0x800);
68 REMOTE_HUB_S(nasid, IIO_ICTO, 0xff);
70 hub_rtc_init(cnode);
71 xtalk_probe_node(cnode);
73 #ifdef CONFIG_REPLICATE_EXHANDLERS
74 /*
75 * If this is not a headless node initialization,
76 * copy over the caliased exception handlers.
77 */
78 if (get_compact_nodeid() == cnode) {
79 extern char except_vec2_generic, except_vec3_generic;
80 extern void build_tlb_refill_handler(void);
82 memcpy((void *)(CKSEG0 + 0x100), &except_vec2_generic, 0x80);
83 memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x80);
84 build_tlb_refill_handler();
85 memcpy((void *)(CKSEG0 + 0x100), (void *) CKSEG0, 0x80);
86 memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x100);
87 __flush_cache_all();
88 }
89 #endif
91 /*
92 * Some interrupts are reserved by hardware or by software convention.
93 * Mark these as reserved right away so they won't be used accidently
94 * later.
95 */
96 for (i = 0; i <= BASE_PCI_IRQ; i++) {
97 __set_bit(i, hub->irq_alloc_mask);
98 LOCAL_HUB_CLR_INTR(INT_PEND0_BASELVL + i);
99 }
101 __set_bit(IP_PEND0_6_63, hub->irq_alloc_mask);
102 LOCAL_HUB_S(PI_INT_PEND_MOD, IP_PEND0_6_63);
104 for (i = NI_BRDCAST_ERR_A; i <= MSC_PANIC_INTR; i++) {
105 __set_bit(i, hub->irq_alloc_mask);
106 LOCAL_HUB_CLR_INTR(INT_PEND1_BASELVL + i);
107 }
108 }
110 void __init per_cpu_init(void)
111 {
112 int cpu = smp_processor_id();
113 int slice = LOCAL_HUB_L(PI_CPU_NUM);
114 cnodeid_t cnode = get_compact_nodeid();
115 struct hub_data *hub = hub_data(cnode);
116 struct slice_data *si = hub->slice + slice;
117 int i;
119 if (test_and_set_bit(slice, &hub->slice_map))
120 return;
122 clear_c0_status(ST0_IM);
124 per_hub_init(cnode);
126 for (i = 0; i < LEVELS_PER_SLICE; i++)
127 si->level_to_irq[i] = -1;
129 /*
130 * We use this so we can find the local hub's data as fast as only
131 * possible.
132 */
133 cpu_data[cpu].data = si;
135 cpu_time_init();
136 install_ipi();
138 /* Install our NMI handler if symmon hasn't installed one. */
139 install_cpu_nmi_handler(cputoslice(cpu));
141 set_c0_status(SRB_DEV0 | SRB_DEV1);
142 }
144 /*
145 * get_nasid() returns the physical node id number of the caller.
146 */
147 nasid_t
148 get_nasid(void)
149 {
150 return (nasid_t)((LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_NODEID_MASK)
151 >> NSRI_NODEID_SHFT);
152 }
154 /*
155 * Map the physical node id to a virtual node id (virtual node ids are contiguous).
156 */
157 cnodeid_t get_compact_nodeid(void)
158 {
159 return NASID_TO_COMPACT_NODEID(get_nasid());
160 }
162 /* Extracted from the IOC3 meta driver. FIXME. */
163 static inline void ioc3_sio_init(void)
164 {
165 struct ioc3 *ioc3;
166 nasid_t nid;
167 long loops;
169 nid = get_nasid();
170 ioc3 = (struct ioc3 *) KL_CONFIG_CH_CONS_INFO(nid)->memory_base;
172 ioc3->sscr_a = 0; /* PIO mode for uarta. */
173 ioc3->sscr_b = 0; /* PIO mode for uartb. */
174 ioc3->sio_iec = ~0;
175 ioc3->sio_ies = (SIO_IR_SA_INT | SIO_IR_SB_INT);
177 loops=1000000; while(loops--);
178 ioc3->sregs.uarta.iu_fcr = 0;
179 ioc3->sregs.uartb.iu_fcr = 0;
180 loops=1000000; while(loops--);
181 }
183 static inline void ioc3_eth_init(void)
184 {
185 struct ioc3 *ioc3;
186 nasid_t nid;
188 nid = get_nasid();
189 ioc3 = (struct ioc3 *) KL_CONFIG_CH_CONS_INFO(nid)->memory_base;
191 ioc3->eier = 0;
192 }
194 extern void ip27_setup_console(void);
195 extern void ip27_time_init(void);
196 extern void ip27_reboot_setup(void);
198 void __init plat_mem_setup(void)
199 {
200 hubreg_t p, e, n_mode;
201 nasid_t nid;
203 ip27_setup_console();
204 ip27_reboot_setup();
206 /*
207 * hub_rtc init and cpu clock intr enabled for later calibrate_delay.
208 */
209 nid = get_nasid();
210 printk("IP27: Running on node %d.\n", nid);
212 p = LOCAL_HUB_L(PI_CPU_PRESENT_A) & 1;
213 e = LOCAL_HUB_L(PI_CPU_ENABLE_A) & 1;
214 printk("Node %d has %s primary CPU%s.\n", nid,
215 p ? "a" : "no",
216 e ? ", CPU is running" : "");
218 p = LOCAL_HUB_L(PI_CPU_PRESENT_B) & 1;
219 e = LOCAL_HUB_L(PI_CPU_ENABLE_B) & 1;
220 printk("Node %d has %s secondary CPU%s.\n", nid,
221 p ? "a" : "no",
222 e ? ", CPU is running" : "");
224 /*
225 * Try to catch kernel missconfigurations and give user an
226 * indication what option to select.
227 */
228 n_mode = LOCAL_HUB_L(NI_STATUS_REV_ID) & NSRI_MORENODES_MASK;
229 printk("Machine is in %c mode.\n", n_mode ? 'N' : 'M');
230 #ifdef CONFIG_SGI_SN_N_MODE
231 if (!n_mode)
232 panic("Kernel compiled for M mode.");
233 #else
234 if (n_mode)
235 panic("Kernel compiled for N mode.");
236 #endif
238 ioc3_sio_init();
239 ioc3_eth_init();
240 per_cpu_init();
242 set_io_port_base(IO_BASE);
244 board_time_init = ip27_time_init;
245 }