ia64/linux-2.6.18-xen.hg

view arch/mips/sgi-ip27/ip27-nmi.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 #include <linux/kallsyms.h>
2 #include <linux/kernel.h>
3 #include <linux/mmzone.h>
4 #include <linux/nodemask.h>
5 #include <linux/spinlock.h>
6 #include <linux/smp.h>
7 #include <asm/atomic.h>
8 #include <asm/sn/types.h>
9 #include <asm/sn/addrs.h>
10 #include <asm/sn/nmi.h>
11 #include <asm/sn/arch.h>
12 #include <asm/sn/sn0/hub.h>
14 #if 0
15 #define NODE_NUM_CPUS(n) CNODE_NUM_CPUS(n)
16 #else
17 #define NODE_NUM_CPUS(n) CPUS_PER_NODE
18 #endif
20 #define CNODEID_NONE (cnodeid_t)-1
21 #define enter_panic_mode() spin_lock(&nmi_lock)
23 typedef unsigned long machreg_t;
25 DEFINE_SPINLOCK(nmi_lock);
27 /*
28 * Lets see what else we need to do here. Set up sp, gp?
29 */
30 void nmi_dump(void)
31 {
32 void cont_nmi_dump(void);
34 cont_nmi_dump();
35 }
37 void install_cpu_nmi_handler(int slice)
38 {
39 nmi_t *nmi_addr;
41 nmi_addr = (nmi_t *)NMI_ADDR(get_nasid(), slice);
42 if (nmi_addr->call_addr)
43 return;
44 nmi_addr->magic = NMI_MAGIC;
45 nmi_addr->call_addr = (void *)nmi_dump;
46 nmi_addr->call_addr_c =
47 (void *)(~((unsigned long)(nmi_addr->call_addr)));
48 nmi_addr->call_parm = 0;
49 }
51 /*
52 * Copy the cpu registers which have been saved in the IP27prom format
53 * into the eframe format for the node under consideration.
54 */
56 void nmi_cpu_eframe_save(nasid_t nasid, int slice)
57 {
58 struct reg_struct *nr;
59 int i;
61 /* Get the pointer to the current cpu's register set. */
62 nr = (struct reg_struct *)
63 (TO_UNCAC(TO_NODE(nasid, IP27_NMI_KREGS_OFFSET)) +
64 slice * IP27_NMI_KREGS_CPU_SIZE);
66 printk("NMI nasid %d: slice %d\n", nasid, slice);
68 /*
69 * Saved main processor registers
70 */
71 for (i = 0; i < 32; ) {
72 if ((i % 4) == 0)
73 printk("$%2d :", i);
74 printk(" %016lx", nr->gpr[i]);
76 i++;
77 if ((i % 4) == 0)
78 printk("\n");
79 }
81 printk("Hi : (value lost)\n");
82 printk("Lo : (value lost)\n");
84 /*
85 * Saved cp0 registers
86 */
87 printk("epc : %016lx ", nr->epc);
88 print_symbol("%s ", nr->epc);
89 printk("%s\n", print_tainted());
90 printk("ErrEPC: %016lx ", nr->error_epc);
91 print_symbol("%s\n", nr->error_epc);
92 printk("ra : %016lx ", nr->gpr[31]);
93 print_symbol("%s\n", nr->gpr[31]);
94 printk("Status: %08lx ", nr->sr);
96 if (nr->sr & ST0_KX)
97 printk("KX ");
98 if (nr->sr & ST0_SX)
99 printk("SX ");
100 if (nr->sr & ST0_UX)
101 printk("UX ");
103 switch (nr->sr & ST0_KSU) {
104 case KSU_USER:
105 printk("USER ");
106 break;
107 case KSU_SUPERVISOR:
108 printk("SUPERVISOR ");
109 break;
110 case KSU_KERNEL:
111 printk("KERNEL ");
112 break;
113 default:
114 printk("BAD_MODE ");
115 break;
116 }
118 if (nr->sr & ST0_ERL)
119 printk("ERL ");
120 if (nr->sr & ST0_EXL)
121 printk("EXL ");
122 if (nr->sr & ST0_IE)
123 printk("IE ");
124 printk("\n");
126 printk("Cause : %08lx\n", nr->cause);
127 printk("PrId : %08x\n", read_c0_prid());
128 printk("BadVA : %016lx\n", nr->badva);
129 printk("CErr : %016lx\n", nr->cache_err);
130 printk("NMI_SR: %016lx\n", nr->nmi_sr);
132 printk("\n");
133 }
135 void nmi_dump_hub_irq(nasid_t nasid, int slice)
136 {
137 hubreg_t mask0, mask1, pend0, pend1;
139 if (slice == 0) { /* Slice A */
140 mask0 = REMOTE_HUB_L(nasid, PI_INT_MASK0_A);
141 mask1 = REMOTE_HUB_L(nasid, PI_INT_MASK1_A);
142 } else { /* Slice B */
143 mask0 = REMOTE_HUB_L(nasid, PI_INT_MASK0_B);
144 mask1 = REMOTE_HUB_L(nasid, PI_INT_MASK1_B);
145 }
147 pend0 = REMOTE_HUB_L(nasid, PI_INT_PEND0);
148 pend1 = REMOTE_HUB_L(nasid, PI_INT_PEND1);
150 printk("PI_INT_MASK0: %16lx PI_INT_MASK1: %16lx\n", mask0, mask1);
151 printk("PI_INT_PEND0: %16lx PI_INT_PEND1: %16lx\n", pend0, pend1);
152 printk("\n\n");
153 }
155 /*
156 * Copy the cpu registers which have been saved in the IP27prom format
157 * into the eframe format for the node under consideration.
158 */
159 void nmi_node_eframe_save(cnodeid_t cnode)
160 {
161 nasid_t nasid;
162 int slice;
164 /* Make sure that we have a valid node */
165 if (cnode == CNODEID_NONE)
166 return;
168 nasid = COMPACT_TO_NASID_NODEID(cnode);
169 if (nasid == INVALID_NASID)
170 return;
172 /* Save the registers into eframe for each cpu */
173 for (slice = 0; slice < NODE_NUM_CPUS(slice); slice++) {
174 nmi_cpu_eframe_save(nasid, slice);
175 nmi_dump_hub_irq(nasid, slice);
176 }
177 }
179 /*
180 * Save the nmi cpu registers for all cpus in the system.
181 */
182 void
183 nmi_eframes_save(void)
184 {
185 cnodeid_t cnode;
187 for_each_online_node(cnode)
188 nmi_node_eframe_save(cnode);
189 }
191 void
192 cont_nmi_dump(void)
193 {
194 #ifndef REAL_NMI_SIGNAL
195 static atomic_t nmied_cpus = ATOMIC_INIT(0);
197 atomic_inc(&nmied_cpus);
198 #endif
199 /*
200 * Use enter_panic_mode to allow only 1 cpu to proceed
201 */
202 enter_panic_mode();
204 #ifdef REAL_NMI_SIGNAL
205 /*
206 * Wait up to 15 seconds for the other cpus to respond to the NMI.
207 * If a cpu has not responded after 10 sec, send it 1 additional NMI.
208 * This is for 2 reasons:
209 * - sometimes a MMSC fail to NMI all cpus.
210 * - on 512p SN0 system, the MMSC will only send NMIs to
211 * half the cpus. Unfortunately, we don't know which cpus may be
212 * NMIed - it depends on how the site chooses to configure.
213 *
214 * Note: it has been measure that it takes the MMSC up to 2.3 secs to
215 * send NMIs to all cpus on a 256p system.
216 */
217 for (i=0; i < 1500; i++) {
218 for_each_online_node(node)
219 if (NODEPDA(node)->dump_count == 0)
220 break;
221 if (node == MAX_NUMNODES)
222 break;
223 if (i == 1000) {
224 for_each_online_node(node)
225 if (NODEPDA(node)->dump_count == 0) {
226 cpu = node_to_first_cpu(node);
227 for (n=0; n < CNODE_NUM_CPUS(node); cpu++, n++) {
228 CPUMASK_SETB(nmied_cpus, cpu);
229 /*
230 * cputonasid, cputoslice
231 * needs kernel cpuid
232 */
233 SEND_NMI((cputonasid(cpu)), (cputoslice(cpu)));
234 }
235 }
237 }
238 udelay(10000);
239 }
240 #else
241 while (atomic_read(&nmied_cpus) != num_online_cpus());
242 #endif
244 /*
245 * Save the nmi cpu registers for all cpu in the eframe format.
246 */
247 nmi_eframes_save();
248 LOCAL_HUB_S(NI_PORT_RESET, NPR_PORTRESET | NPR_LOCALRESET);
249 }