ia64/linux-2.6.18-xen.hg

view arch/mips/sgi-ip27/ip27-irq.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * ip27-irq.c: Highlevel interrupt handling for IP27 architecture.
3 *
4 * Copyright (C) 1999, 2000 Ralf Baechle (ralf@gnu.org)
5 * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
6 * Copyright (C) 1999 - 2001 Kanoj Sarcar
7 */
9 #undef DEBUG
11 #include <linux/init.h>
12 #include <linux/irq.h>
13 #include <linux/errno.h>
14 #include <linux/signal.h>
15 #include <linux/sched.h>
16 #include <linux/types.h>
17 #include <linux/interrupt.h>
18 #include <linux/ioport.h>
19 #include <linux/timex.h>
20 #include <linux/slab.h>
21 #include <linux/random.h>
22 #include <linux/smp_lock.h>
23 #include <linux/kernel.h>
24 #include <linux/kernel_stat.h>
25 #include <linux/delay.h>
26 #include <linux/bitops.h>
28 #include <asm/bootinfo.h>
29 #include <asm/io.h>
30 #include <asm/mipsregs.h>
31 #include <asm/system.h>
33 #include <asm/ptrace.h>
34 #include <asm/processor.h>
35 #include <asm/pci/bridge.h>
36 #include <asm/sn/addrs.h>
37 #include <asm/sn/agent.h>
38 #include <asm/sn/arch.h>
39 #include <asm/sn/hub.h>
40 #include <asm/sn/intr.h>
42 /*
43 * Linux has a controller-independent x86 interrupt architecture.
44 * every controller has a 'controller-template', that is used
45 * by the main code to do the right thing. Each driver-visible
46 * interrupt source is transparently wired to the apropriate
47 * controller. Thus drivers need not be aware of the
48 * interrupt-controller.
49 *
50 * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
51 * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
52 * (IO-APICs assumed to be messaging to Pentium local-APICs)
53 *
54 * the code is designed to be easily extended with new/different
55 * interrupt controllers, without having to do assembly magic.
56 */
58 extern asmlinkage void ip27_irq(void);
60 extern struct bridge_controller *irq_to_bridge[];
61 extern int irq_to_slot[];
63 /*
64 * use these macros to get the encoded nasid and widget id
65 * from the irq value
66 */
67 #define IRQ_TO_BRIDGE(i) irq_to_bridge[(i)]
68 #define SLOT_FROM_PCI_IRQ(i) irq_to_slot[i]
70 static inline int alloc_level(int cpu, int irq)
71 {
72 struct hub_data *hub = hub_data(cpu_to_node(cpu));
73 struct slice_data *si = cpu_data[cpu].data;
74 int level;
76 level = find_first_zero_bit(hub->irq_alloc_mask, LEVELS_PER_SLICE);
77 if (level >= LEVELS_PER_SLICE)
78 panic("Cpu %d flooded with devices\n", cpu);
80 __set_bit(level, hub->irq_alloc_mask);
81 si->level_to_irq[level] = irq;
83 return level;
84 }
86 static inline int find_level(cpuid_t *cpunum, int irq)
87 {
88 int cpu, i;
90 for_each_online_cpu(cpu) {
91 struct slice_data *si = cpu_data[cpu].data;
93 for (i = BASE_PCI_IRQ; i < LEVELS_PER_SLICE; i++)
94 if (si->level_to_irq[i] == irq) {
95 *cpunum = cpu;
97 return i;
98 }
99 }
101 panic("Could not identify cpu/level for irq %d\n", irq);
102 }
104 /*
105 * Find first bit set
106 */
107 static int ms1bit(unsigned long x)
108 {
109 int b = 0, s;
111 s = 16; if (x >> 16 == 0) s = 0; b += s; x >>= s;
112 s = 8; if (x >> 8 == 0) s = 0; b += s; x >>= s;
113 s = 4; if (x >> 4 == 0) s = 0; b += s; x >>= s;
114 s = 2; if (x >> 2 == 0) s = 0; b += s; x >>= s;
115 s = 1; if (x >> 1 == 0) s = 0; b += s;
117 return b;
118 }
120 /*
121 * This code is unnecessarily complex, because we do IRQF_DISABLED
122 * intr enabling. Basically, once we grab the set of intrs we need
123 * to service, we must mask _all_ these interrupts; firstly, to make
124 * sure the same intr does not intr again, causing recursion that
125 * can lead to stack overflow. Secondly, we can not just mask the
126 * one intr we are do_IRQing, because the non-masked intrs in the
127 * first set might intr again, causing multiple servicings of the
128 * same intr. This effect is mostly seen for intercpu intrs.
129 * Kanoj 05.13.00
130 */
132 static void ip27_do_irq_mask0(struct pt_regs *regs)
133 {
134 int irq, swlevel;
135 hubreg_t pend0, mask0;
136 cpuid_t cpu = smp_processor_id();
137 int pi_int_mask0 =
138 (cputoslice(cpu) == 0) ? PI_INT_MASK0_A : PI_INT_MASK0_B;
140 /* copied from Irix intpend0() */
141 pend0 = LOCAL_HUB_L(PI_INT_PEND0);
142 mask0 = LOCAL_HUB_L(pi_int_mask0);
144 pend0 &= mask0; /* Pick intrs we should look at */
145 if (!pend0)
146 return;
148 swlevel = ms1bit(pend0);
149 #ifdef CONFIG_SMP
150 if (pend0 & (1UL << CPU_RESCHED_A_IRQ)) {
151 LOCAL_HUB_CLR_INTR(CPU_RESCHED_A_IRQ);
152 } else if (pend0 & (1UL << CPU_RESCHED_B_IRQ)) {
153 LOCAL_HUB_CLR_INTR(CPU_RESCHED_B_IRQ);
154 } else if (pend0 & (1UL << CPU_CALL_A_IRQ)) {
155 LOCAL_HUB_CLR_INTR(CPU_CALL_A_IRQ);
156 smp_call_function_interrupt();
157 } else if (pend0 & (1UL << CPU_CALL_B_IRQ)) {
158 LOCAL_HUB_CLR_INTR(CPU_CALL_B_IRQ);
159 smp_call_function_interrupt();
160 } else
161 #endif
162 {
163 /* "map" swlevel to irq */
164 struct slice_data *si = cpu_data[cpu].data;
166 irq = si->level_to_irq[swlevel];
167 do_IRQ(irq, regs);
168 }
170 LOCAL_HUB_L(PI_INT_PEND0);
171 }
173 static void ip27_do_irq_mask1(struct pt_regs *regs)
174 {
175 int irq, swlevel;
176 hubreg_t pend1, mask1;
177 cpuid_t cpu = smp_processor_id();
178 int pi_int_mask1 = (cputoslice(cpu) == 0) ? PI_INT_MASK1_A : PI_INT_MASK1_B;
179 struct slice_data *si = cpu_data[cpu].data;
181 /* copied from Irix intpend0() */
182 pend1 = LOCAL_HUB_L(PI_INT_PEND1);
183 mask1 = LOCAL_HUB_L(pi_int_mask1);
185 pend1 &= mask1; /* Pick intrs we should look at */
186 if (!pend1)
187 return;
189 swlevel = ms1bit(pend1);
190 /* "map" swlevel to irq */
191 irq = si->level_to_irq[swlevel];
192 LOCAL_HUB_CLR_INTR(swlevel);
193 do_IRQ(irq, regs);
195 LOCAL_HUB_L(PI_INT_PEND1);
196 }
198 static void ip27_prof_timer(struct pt_regs *regs)
199 {
200 panic("CPU %d got a profiling interrupt", smp_processor_id());
201 }
203 static void ip27_hub_error(struct pt_regs *regs)
204 {
205 panic("CPU %d got a hub error interrupt", smp_processor_id());
206 }
208 static int intr_connect_level(int cpu, int bit)
209 {
210 nasid_t nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu));
211 struct slice_data *si = cpu_data[cpu].data;
212 unsigned long flags;
214 set_bit(bit, si->irq_enable_mask);
216 local_irq_save(flags);
217 if (!cputoslice(cpu)) {
218 REMOTE_HUB_S(nasid, PI_INT_MASK0_A, si->irq_enable_mask[0]);
219 REMOTE_HUB_S(nasid, PI_INT_MASK1_A, si->irq_enable_mask[1]);
220 } else {
221 REMOTE_HUB_S(nasid, PI_INT_MASK0_B, si->irq_enable_mask[0]);
222 REMOTE_HUB_S(nasid, PI_INT_MASK1_B, si->irq_enable_mask[1]);
223 }
224 local_irq_restore(flags);
226 return 0;
227 }
229 static int intr_disconnect_level(int cpu, int bit)
230 {
231 nasid_t nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu));
232 struct slice_data *si = cpu_data[cpu].data;
234 clear_bit(bit, si->irq_enable_mask);
236 if (!cputoslice(cpu)) {
237 REMOTE_HUB_S(nasid, PI_INT_MASK0_A, si->irq_enable_mask[0]);
238 REMOTE_HUB_S(nasid, PI_INT_MASK1_A, si->irq_enable_mask[1]);
239 } else {
240 REMOTE_HUB_S(nasid, PI_INT_MASK0_B, si->irq_enable_mask[0]);
241 REMOTE_HUB_S(nasid, PI_INT_MASK1_B, si->irq_enable_mask[1]);
242 }
244 return 0;
245 }
247 /* Startup one of the (PCI ...) IRQs routes over a bridge. */
248 static unsigned int startup_bridge_irq(unsigned int irq)
249 {
250 struct bridge_controller *bc;
251 bridgereg_t device;
252 bridge_t *bridge;
253 int pin, swlevel;
254 cpuid_t cpu;
256 pin = SLOT_FROM_PCI_IRQ(irq);
257 bc = IRQ_TO_BRIDGE(irq);
258 bridge = bc->base;
260 pr_debug("bridge_startup(): irq= 0x%x pin=%d\n", irq, pin);
261 /*
262 * "map" irq to a swlevel greater than 6 since the first 6 bits
263 * of INT_PEND0 are taken
264 */
265 swlevel = find_level(&cpu, irq);
266 bridge->b_int_addr[pin].addr = (0x20000 | swlevel | (bc->nasid << 8));
267 bridge->b_int_enable |= (1 << pin);
268 bridge->b_int_enable |= 0x7ffffe00; /* more stuff in int_enable */
270 /*
271 * Enable sending of an interrupt clear packt to the hub on a high to
272 * low transition of the interrupt pin.
273 *
274 * IRIX sets additional bits in the address which are documented as
275 * reserved in the bridge docs.
276 */
277 bridge->b_int_mode |= (1UL << pin);
279 /*
280 * We assume the bridge to have a 1:1 mapping between devices
281 * (slots) and intr pins.
282 */
283 device = bridge->b_int_device;
284 device &= ~(7 << (pin*3));
285 device |= (pin << (pin*3));
286 bridge->b_int_device = device;
288 bridge->b_wid_tflush;
290 return 0; /* Never anything pending. */
291 }
293 /* Shutdown one of the (PCI ...) IRQs routes over a bridge. */
294 static void shutdown_bridge_irq(unsigned int irq)
295 {
296 struct bridge_controller *bc = IRQ_TO_BRIDGE(irq);
297 struct hub_data *hub = hub_data(cpu_to_node(bc->irq_cpu));
298 bridge_t *bridge = bc->base;
299 int pin, swlevel;
300 cpuid_t cpu;
302 pr_debug("bridge_shutdown: irq 0x%x\n", irq);
303 pin = SLOT_FROM_PCI_IRQ(irq);
305 /*
306 * map irq to a swlevel greater than 6 since the first 6 bits
307 * of INT_PEND0 are taken
308 */
309 swlevel = find_level(&cpu, irq);
310 intr_disconnect_level(cpu, swlevel);
312 __clear_bit(swlevel, hub->irq_alloc_mask);
314 bridge->b_int_enable &= ~(1 << pin);
315 bridge->b_wid_tflush;
316 }
318 static inline void enable_bridge_irq(unsigned int irq)
319 {
320 cpuid_t cpu;
321 int swlevel;
323 swlevel = find_level(&cpu, irq); /* Criminal offence */
324 intr_connect_level(cpu, swlevel);
325 }
327 static inline void disable_bridge_irq(unsigned int irq)
328 {
329 cpuid_t cpu;
330 int swlevel;
332 swlevel = find_level(&cpu, irq); /* Criminal offence */
333 intr_disconnect_level(cpu, swlevel);
334 }
336 static void mask_and_ack_bridge_irq(unsigned int irq)
337 {
338 disable_bridge_irq(irq);
339 }
341 static void end_bridge_irq(unsigned int irq)
342 {
343 if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)) &&
344 irq_desc[irq].action)
345 enable_bridge_irq(irq);
346 }
348 static struct irq_chip bridge_irq_type = {
349 .typename = "bridge",
350 .startup = startup_bridge_irq,
351 .shutdown = shutdown_bridge_irq,
352 .enable = enable_bridge_irq,
353 .disable = disable_bridge_irq,
354 .ack = mask_and_ack_bridge_irq,
355 .end = end_bridge_irq,
356 };
358 static unsigned long irq_map[NR_IRQS / BITS_PER_LONG];
360 int allocate_irqno(void)
361 {
362 int irq;
364 again:
365 irq = find_first_zero_bit(irq_map, NR_IRQS);
367 if (irq >= NR_IRQS)
368 return -ENOSPC;
370 if (test_and_set_bit(irq, irq_map))
371 goto again;
373 return irq;
374 }
376 void free_irqno(unsigned int irq)
377 {
378 clear_bit(irq, irq_map);
379 }
381 void __devinit register_bridge_irq(unsigned int irq)
382 {
383 irq_desc[irq].status = IRQ_DISABLED;
384 irq_desc[irq].action = 0;
385 irq_desc[irq].depth = 1;
386 irq_desc[irq].chip = &bridge_irq_type;
387 }
389 int __devinit request_bridge_irq(struct bridge_controller *bc)
390 {
391 int irq = allocate_irqno();
392 int swlevel, cpu;
393 nasid_t nasid;
395 if (irq < 0)
396 return irq;
398 /*
399 * "map" irq to a swlevel greater than 6 since the first 6 bits
400 * of INT_PEND0 are taken
401 */
402 cpu = bc->irq_cpu;
403 swlevel = alloc_level(cpu, irq);
404 if (unlikely(swlevel < 0)) {
405 free_irqno(irq);
407 return -EAGAIN;
408 }
410 /* Make sure it's not already pending when we connect it. */
411 nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu));
412 REMOTE_HUB_CLR_INTR(nasid, swlevel);
414 intr_connect_level(cpu, swlevel);
416 register_bridge_irq(irq);
418 return irq;
419 }
421 extern void ip27_rt_timer_interrupt(struct pt_regs *regs);
423 asmlinkage void plat_irq_dispatch(struct pt_regs *regs)
424 {
425 unsigned long pending = read_c0_cause() & read_c0_status();
427 if (pending & CAUSEF_IP4)
428 ip27_rt_timer_interrupt(regs);
429 else if (pending & CAUSEF_IP2) /* PI_INT_PEND_0 or CC_PEND_{A|B} */
430 ip27_do_irq_mask0(regs);
431 else if (pending & CAUSEF_IP3) /* PI_INT_PEND_1 */
432 ip27_do_irq_mask1(regs);
433 else if (pending & CAUSEF_IP5)
434 ip27_prof_timer(regs);
435 else if (pending & CAUSEF_IP6)
436 ip27_hub_error(regs);
437 }
439 void __init arch_init_irq(void)
440 {
441 }
443 void install_ipi(void)
444 {
445 int slice = LOCAL_HUB_L(PI_CPU_NUM);
446 int cpu = smp_processor_id();
447 struct slice_data *si = cpu_data[cpu].data;
448 struct hub_data *hub = hub_data(cpu_to_node(cpu));
449 int resched, call;
451 resched = CPU_RESCHED_A_IRQ + slice;
452 __set_bit(resched, hub->irq_alloc_mask);
453 __set_bit(resched, si->irq_enable_mask);
454 LOCAL_HUB_CLR_INTR(resched);
456 call = CPU_CALL_A_IRQ + slice;
457 __set_bit(call, hub->irq_alloc_mask);
458 __set_bit(call, si->irq_enable_mask);
459 LOCAL_HUB_CLR_INTR(call);
461 if (slice == 0) {
462 LOCAL_HUB_S(PI_INT_MASK0_A, si->irq_enable_mask[0]);
463 LOCAL_HUB_S(PI_INT_MASK1_A, si->irq_enable_mask[1]);
464 } else {
465 LOCAL_HUB_S(PI_INT_MASK0_B, si->irq_enable_mask[0]);
466 LOCAL_HUB_S(PI_INT_MASK1_B, si->irq_enable_mask[1]);
467 }
468 }