ia64/linux-2.6.18-xen.hg

view arch/mips/sgi-ip27/ip27-smp.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * This file is subject to the terms and conditions of the GNU General
3 * Public License. See the file "COPYING" in the main directory of this
4 * archive for more details.
5 *
6 * Copyright (C) 2000 - 2001 by Kanoj Sarcar (kanoj@sgi.com)
7 * Copyright (C) 2000 - 2001 by Silicon Graphics, Inc.
8 */
9 #include <linux/init.h>
10 #include <linux/sched.h>
11 #include <linux/nodemask.h>
12 #include <asm/page.h>
13 #include <asm/processor.h>
14 #include <asm/sn/arch.h>
15 #include <asm/sn/gda.h>
16 #include <asm/sn/intr.h>
17 #include <asm/sn/klconfig.h>
18 #include <asm/sn/launch.h>
19 #include <asm/sn/mapped_kernel.h>
20 #include <asm/sn/sn_private.h>
21 #include <asm/sn/types.h>
22 #include <asm/sn/sn0/hubpi.h>
23 #include <asm/sn/sn0/hubio.h>
24 #include <asm/sn/sn0/ip27.h>
26 /*
27 * Takes as first input the PROM assigned cpu id, and the kernel
28 * assigned cpu id as the second.
29 */
30 static void alloc_cpupda(cpuid_t cpu, int cpunum)
31 {
32 cnodeid_t node = get_cpu_cnode(cpu);
33 nasid_t nasid = COMPACT_TO_NASID_NODEID(node);
35 cputonasid(cpunum) = nasid;
36 cpu_data[cpunum].p_nodeid = node;
37 cputoslice(cpunum) = get_cpu_slice(cpu);
38 }
40 static nasid_t get_actual_nasid(lboard_t *brd)
41 {
42 klhub_t *hub;
44 if (!brd)
45 return INVALID_NASID;
47 /* find out if we are a completely disabled brd. */
48 hub = (klhub_t *)find_first_component(brd, KLSTRUCT_HUB);
49 if (!hub)
50 return INVALID_NASID;
51 if (!(hub->hub_info.flags & KLINFO_ENABLE)) /* disabled node brd */
52 return hub->hub_info.physid;
53 else
54 return brd->brd_nasid;
55 }
57 static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest)
58 {
59 static int tot_cpus_found = 0;
60 lboard_t *brd;
61 klcpu_t *acpu;
62 int cpus_found = 0;
63 cpuid_t cpuid;
65 brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_IP27);
67 do {
68 acpu = (klcpu_t *)find_first_component(brd, KLSTRUCT_CPU);
69 while (acpu) {
70 cpuid = acpu->cpu_info.virtid;
71 /* cnode is not valid for completely disabled brds */
72 if (get_actual_nasid(brd) == brd->brd_nasid)
73 cpuid_to_compact_node[cpuid] = cnode;
74 if (cpuid > highest)
75 highest = cpuid;
76 /* Only let it join in if it's marked enabled */
77 if ((acpu->cpu_info.flags & KLINFO_ENABLE) &&
78 (tot_cpus_found != NR_CPUS)) {
79 cpu_set(cpuid, phys_cpu_present_map);
80 alloc_cpupda(cpuid, tot_cpus_found);
81 cpus_found++;
82 tot_cpus_found++;
83 }
84 acpu = (klcpu_t *)find_component(brd, (klinfo_t *)acpu,
85 KLSTRUCT_CPU);
86 }
87 brd = KLCF_NEXT(brd);
88 if (!brd)
89 break;
91 brd = find_lboard(brd, KLTYPE_IP27);
92 } while (brd);
94 return highest;
95 }
97 void cpu_node_probe(void)
98 {
99 int i, highest = 0;
100 gda_t *gdap = GDA;
102 /*
103 * Initialize the arrays to invalid nodeid (-1)
104 */
105 for (i = 0; i < MAX_COMPACT_NODES; i++)
106 compact_to_nasid_node[i] = INVALID_NASID;
107 for (i = 0; i < MAX_NASIDS; i++)
108 nasid_to_compact_node[i] = INVALID_CNODEID;
109 for (i = 0; i < MAXCPUS; i++)
110 cpuid_to_compact_node[i] = INVALID_CNODEID;
112 /*
113 * MCD - this whole "compact node" stuff can probably be dropped,
114 * as we can handle sparse numbering now
115 */
116 nodes_clear(node_online_map);
117 for (i = 0; i < MAX_COMPACT_NODES; i++) {
118 nasid_t nasid = gdap->g_nasidtable[i];
119 if (nasid == INVALID_NASID)
120 break;
121 compact_to_nasid_node[i] = nasid;
122 nasid_to_compact_node[nasid] = i;
123 node_set_online(num_online_nodes());
124 highest = do_cpumask(i, nasid, highest);
125 }
127 printk("Discovered %d cpus on %d nodes\n", highest + 1, num_online_nodes());
128 }
130 static __init void intr_clear_all(nasid_t nasid)
131 {
132 int i;
134 REMOTE_HUB_S(nasid, PI_INT_MASK0_A, 0);
135 REMOTE_HUB_S(nasid, PI_INT_MASK0_B, 0);
136 REMOTE_HUB_S(nasid, PI_INT_MASK1_A, 0);
137 REMOTE_HUB_S(nasid, PI_INT_MASK1_B, 0);
139 for (i = 0; i < 128; i++)
140 REMOTE_HUB_CLR_INTR(nasid, i);
141 }
143 void __init plat_smp_setup(void)
144 {
145 cnodeid_t cnode;
147 for_each_online_node(cnode) {
148 if (cnode == 0)
149 continue;
150 intr_clear_all(COMPACT_TO_NASID_NODEID(cnode));
151 }
153 replicate_kernel_text();
155 /*
156 * Assumption to be fixed: we're always booted on logical / physical
157 * processor 0. While we're always running on logical processor 0
158 * this still means this is physical processor zero; it might for
159 * example be disabled in the firwware.
160 */
161 alloc_cpupda(0, 0);
162 }
164 void __init plat_prepare_cpus(unsigned int max_cpus)
165 {
166 /* We already did everything necessary earlier */
167 }
169 /*
170 * Launch a slave into smp_bootstrap(). It doesn't take an argument, and we
171 * set sp to the kernel stack of the newly created idle process, gp to the proc
172 * struct so that current_thread_info() will work.
173 */
174 void __init prom_boot_secondary(int cpu, struct task_struct *idle)
175 {
176 unsigned long gp = (unsigned long)task_thread_info(idle);
177 unsigned long sp = __KSTK_TOS(idle);
179 LAUNCH_SLAVE(cputonasid(cpu),cputoslice(cpu),
180 (launch_proc_t)MAPPED_KERN_RW_TO_K0(smp_bootstrap),
181 0, (void *) sp, (void *) gp);
182 }
184 void prom_init_secondary(void)
185 {
186 per_cpu_init();
187 local_irq_enable();
188 }
190 void __init prom_cpus_done(void)
191 {
192 }
194 void prom_smp_finish(void)
195 {
196 }
198 void core_send_ipi(int destid, unsigned int action)
199 {
200 int irq;
202 switch (action) {
203 case SMP_RESCHEDULE_YOURSELF:
204 irq = CPU_RESCHED_A_IRQ;
205 break;
206 case SMP_CALL_FUNCTION:
207 irq = CPU_CALL_A_IRQ;
208 break;
209 default:
210 panic("sendintr");
211 }
213 irq += cputoslice(destid);
215 /*
216 * Convert the compact hub number to the NASID to get the correct
217 * part of the address space. Then set the interrupt bit associated
218 * with the CPU we want to send the interrupt to.
219 */
220 REMOTE_HUB_SEND_INTR(COMPACT_TO_NASID_NODEID(cpu_to_node(destid)), irq);
221 }