ia64/linux-2.6.18-xen.hg

annotate arch/mips/sgi-ip27/ip27-irq.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
rev   line source
ian@0 1 /*
ian@0 2 * ip27-irq.c: Highlevel interrupt handling for IP27 architecture.
ian@0 3 *
ian@0 4 * Copyright (C) 1999, 2000 Ralf Baechle (ralf@gnu.org)
ian@0 5 * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
ian@0 6 * Copyright (C) 1999 - 2001 Kanoj Sarcar
ian@0 7 */
ian@0 8
ian@0 9 #undef DEBUG
ian@0 10
ian@0 11 #include <linux/init.h>
ian@0 12 #include <linux/irq.h>
ian@0 13 #include <linux/errno.h>
ian@0 14 #include <linux/signal.h>
ian@0 15 #include <linux/sched.h>
ian@0 16 #include <linux/types.h>
ian@0 17 #include <linux/interrupt.h>
ian@0 18 #include <linux/ioport.h>
ian@0 19 #include <linux/timex.h>
ian@0 20 #include <linux/slab.h>
ian@0 21 #include <linux/random.h>
ian@0 22 #include <linux/smp_lock.h>
ian@0 23 #include <linux/kernel.h>
ian@0 24 #include <linux/kernel_stat.h>
ian@0 25 #include <linux/delay.h>
ian@0 26 #include <linux/bitops.h>
ian@0 27
ian@0 28 #include <asm/bootinfo.h>
ian@0 29 #include <asm/io.h>
ian@0 30 #include <asm/mipsregs.h>
ian@0 31 #include <asm/system.h>
ian@0 32
ian@0 33 #include <asm/ptrace.h>
ian@0 34 #include <asm/processor.h>
ian@0 35 #include <asm/pci/bridge.h>
ian@0 36 #include <asm/sn/addrs.h>
ian@0 37 #include <asm/sn/agent.h>
ian@0 38 #include <asm/sn/arch.h>
ian@0 39 #include <asm/sn/hub.h>
ian@0 40 #include <asm/sn/intr.h>
ian@0 41
ian@0 42 /*
ian@0 43 * Linux has a controller-independent x86 interrupt architecture.
ian@0 44 * every controller has a 'controller-template', that is used
ian@0 45 * by the main code to do the right thing. Each driver-visible
ian@0 46 * interrupt source is transparently wired to the apropriate
ian@0 47 * controller. Thus drivers need not be aware of the
ian@0 48 * interrupt-controller.
ian@0 49 *
ian@0 50 * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
ian@0 51 * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
ian@0 52 * (IO-APICs assumed to be messaging to Pentium local-APICs)
ian@0 53 *
ian@0 54 * the code is designed to be easily extended with new/different
ian@0 55 * interrupt controllers, without having to do assembly magic.
ian@0 56 */
ian@0 57
ian@0 58 extern asmlinkage void ip27_irq(void);
ian@0 59
ian@0 60 extern struct bridge_controller *irq_to_bridge[];
ian@0 61 extern int irq_to_slot[];
ian@0 62
ian@0 63 /*
ian@0 64 * use these macros to get the encoded nasid and widget id
ian@0 65 * from the irq value
ian@0 66 */
ian@0 67 #define IRQ_TO_BRIDGE(i) irq_to_bridge[(i)]
ian@0 68 #define SLOT_FROM_PCI_IRQ(i) irq_to_slot[i]
ian@0 69
ian@0 70 static inline int alloc_level(int cpu, int irq)
ian@0 71 {
ian@0 72 struct hub_data *hub = hub_data(cpu_to_node(cpu));
ian@0 73 struct slice_data *si = cpu_data[cpu].data;
ian@0 74 int level;
ian@0 75
ian@0 76 level = find_first_zero_bit(hub->irq_alloc_mask, LEVELS_PER_SLICE);
ian@0 77 if (level >= LEVELS_PER_SLICE)
ian@0 78 panic("Cpu %d flooded with devices\n", cpu);
ian@0 79
ian@0 80 __set_bit(level, hub->irq_alloc_mask);
ian@0 81 si->level_to_irq[level] = irq;
ian@0 82
ian@0 83 return level;
ian@0 84 }
ian@0 85
ian@0 86 static inline int find_level(cpuid_t *cpunum, int irq)
ian@0 87 {
ian@0 88 int cpu, i;
ian@0 89
ian@0 90 for_each_online_cpu(cpu) {
ian@0 91 struct slice_data *si = cpu_data[cpu].data;
ian@0 92
ian@0 93 for (i = BASE_PCI_IRQ; i < LEVELS_PER_SLICE; i++)
ian@0 94 if (si->level_to_irq[i] == irq) {
ian@0 95 *cpunum = cpu;
ian@0 96
ian@0 97 return i;
ian@0 98 }
ian@0 99 }
ian@0 100
ian@0 101 panic("Could not identify cpu/level for irq %d\n", irq);
ian@0 102 }
ian@0 103
ian@0 104 /*
ian@0 105 * Find first bit set
ian@0 106 */
ian@0 107 static int ms1bit(unsigned long x)
ian@0 108 {
ian@0 109 int b = 0, s;
ian@0 110
ian@0 111 s = 16; if (x >> 16 == 0) s = 0; b += s; x >>= s;
ian@0 112 s = 8; if (x >> 8 == 0) s = 0; b += s; x >>= s;
ian@0 113 s = 4; if (x >> 4 == 0) s = 0; b += s; x >>= s;
ian@0 114 s = 2; if (x >> 2 == 0) s = 0; b += s; x >>= s;
ian@0 115 s = 1; if (x >> 1 == 0) s = 0; b += s;
ian@0 116
ian@0 117 return b;
ian@0 118 }
ian@0 119
ian@0 120 /*
ian@0 121 * This code is unnecessarily complex, because we do IRQF_DISABLED
ian@0 122 * intr enabling. Basically, once we grab the set of intrs we need
ian@0 123 * to service, we must mask _all_ these interrupts; firstly, to make
ian@0 124 * sure the same intr does not intr again, causing recursion that
ian@0 125 * can lead to stack overflow. Secondly, we can not just mask the
ian@0 126 * one intr we are do_IRQing, because the non-masked intrs in the
ian@0 127 * first set might intr again, causing multiple servicings of the
ian@0 128 * same intr. This effect is mostly seen for intercpu intrs.
ian@0 129 * Kanoj 05.13.00
ian@0 130 */
ian@0 131
ian@0 132 static void ip27_do_irq_mask0(struct pt_regs *regs)
ian@0 133 {
ian@0 134 int irq, swlevel;
ian@0 135 hubreg_t pend0, mask0;
ian@0 136 cpuid_t cpu = smp_processor_id();
ian@0 137 int pi_int_mask0 =
ian@0 138 (cputoslice(cpu) == 0) ? PI_INT_MASK0_A : PI_INT_MASK0_B;
ian@0 139
ian@0 140 /* copied from Irix intpend0() */
ian@0 141 pend0 = LOCAL_HUB_L(PI_INT_PEND0);
ian@0 142 mask0 = LOCAL_HUB_L(pi_int_mask0);
ian@0 143
ian@0 144 pend0 &= mask0; /* Pick intrs we should look at */
ian@0 145 if (!pend0)
ian@0 146 return;
ian@0 147
ian@0 148 swlevel = ms1bit(pend0);
ian@0 149 #ifdef CONFIG_SMP
ian@0 150 if (pend0 & (1UL << CPU_RESCHED_A_IRQ)) {
ian@0 151 LOCAL_HUB_CLR_INTR(CPU_RESCHED_A_IRQ);
ian@0 152 } else if (pend0 & (1UL << CPU_RESCHED_B_IRQ)) {
ian@0 153 LOCAL_HUB_CLR_INTR(CPU_RESCHED_B_IRQ);
ian@0 154 } else if (pend0 & (1UL << CPU_CALL_A_IRQ)) {
ian@0 155 LOCAL_HUB_CLR_INTR(CPU_CALL_A_IRQ);
ian@0 156 smp_call_function_interrupt();
ian@0 157 } else if (pend0 & (1UL << CPU_CALL_B_IRQ)) {
ian@0 158 LOCAL_HUB_CLR_INTR(CPU_CALL_B_IRQ);
ian@0 159 smp_call_function_interrupt();
ian@0 160 } else
ian@0 161 #endif
ian@0 162 {
ian@0 163 /* "map" swlevel to irq */
ian@0 164 struct slice_data *si = cpu_data[cpu].data;
ian@0 165
ian@0 166 irq = si->level_to_irq[swlevel];
ian@0 167 do_IRQ(irq, regs);
ian@0 168 }
ian@0 169
ian@0 170 LOCAL_HUB_L(PI_INT_PEND0);
ian@0 171 }
ian@0 172
ian@0 173 static void ip27_do_irq_mask1(struct pt_regs *regs)
ian@0 174 {
ian@0 175 int irq, swlevel;
ian@0 176 hubreg_t pend1, mask1;
ian@0 177 cpuid_t cpu = smp_processor_id();
ian@0 178 int pi_int_mask1 = (cputoslice(cpu) == 0) ? PI_INT_MASK1_A : PI_INT_MASK1_B;
ian@0 179 struct slice_data *si = cpu_data[cpu].data;
ian@0 180
ian@0 181 /* copied from Irix intpend0() */
ian@0 182 pend1 = LOCAL_HUB_L(PI_INT_PEND1);
ian@0 183 mask1 = LOCAL_HUB_L(pi_int_mask1);
ian@0 184
ian@0 185 pend1 &= mask1; /* Pick intrs we should look at */
ian@0 186 if (!pend1)
ian@0 187 return;
ian@0 188
ian@0 189 swlevel = ms1bit(pend1);
ian@0 190 /* "map" swlevel to irq */
ian@0 191 irq = si->level_to_irq[swlevel];
ian@0 192 LOCAL_HUB_CLR_INTR(swlevel);
ian@0 193 do_IRQ(irq, regs);
ian@0 194
ian@0 195 LOCAL_HUB_L(PI_INT_PEND1);
ian@0 196 }
ian@0 197
ian@0 198 static void ip27_prof_timer(struct pt_regs *regs)
ian@0 199 {
ian@0 200 panic("CPU %d got a profiling interrupt", smp_processor_id());
ian@0 201 }
ian@0 202
ian@0 203 static void ip27_hub_error(struct pt_regs *regs)
ian@0 204 {
ian@0 205 panic("CPU %d got a hub error interrupt", smp_processor_id());
ian@0 206 }
ian@0 207
ian@0 208 static int intr_connect_level(int cpu, int bit)
ian@0 209 {
ian@0 210 nasid_t nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu));
ian@0 211 struct slice_data *si = cpu_data[cpu].data;
ian@0 212 unsigned long flags;
ian@0 213
ian@0 214 set_bit(bit, si->irq_enable_mask);
ian@0 215
ian@0 216 local_irq_save(flags);
ian@0 217 if (!cputoslice(cpu)) {
ian@0 218 REMOTE_HUB_S(nasid, PI_INT_MASK0_A, si->irq_enable_mask[0]);
ian@0 219 REMOTE_HUB_S(nasid, PI_INT_MASK1_A, si->irq_enable_mask[1]);
ian@0 220 } else {
ian@0 221 REMOTE_HUB_S(nasid, PI_INT_MASK0_B, si->irq_enable_mask[0]);
ian@0 222 REMOTE_HUB_S(nasid, PI_INT_MASK1_B, si->irq_enable_mask[1]);
ian@0 223 }
ian@0 224 local_irq_restore(flags);
ian@0 225
ian@0 226 return 0;
ian@0 227 }
ian@0 228
ian@0 229 static int intr_disconnect_level(int cpu, int bit)
ian@0 230 {
ian@0 231 nasid_t nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu));
ian@0 232 struct slice_data *si = cpu_data[cpu].data;
ian@0 233
ian@0 234 clear_bit(bit, si->irq_enable_mask);
ian@0 235
ian@0 236 if (!cputoslice(cpu)) {
ian@0 237 REMOTE_HUB_S(nasid, PI_INT_MASK0_A, si->irq_enable_mask[0]);
ian@0 238 REMOTE_HUB_S(nasid, PI_INT_MASK1_A, si->irq_enable_mask[1]);
ian@0 239 } else {
ian@0 240 REMOTE_HUB_S(nasid, PI_INT_MASK0_B, si->irq_enable_mask[0]);
ian@0 241 REMOTE_HUB_S(nasid, PI_INT_MASK1_B, si->irq_enable_mask[1]);
ian@0 242 }
ian@0 243
ian@0 244 return 0;
ian@0 245 }
ian@0 246
ian@0 247 /* Startup one of the (PCI ...) IRQs routes over a bridge. */
ian@0 248 static unsigned int startup_bridge_irq(unsigned int irq)
ian@0 249 {
ian@0 250 struct bridge_controller *bc;
ian@0 251 bridgereg_t device;
ian@0 252 bridge_t *bridge;
ian@0 253 int pin, swlevel;
ian@0 254 cpuid_t cpu;
ian@0 255
ian@0 256 pin = SLOT_FROM_PCI_IRQ(irq);
ian@0 257 bc = IRQ_TO_BRIDGE(irq);
ian@0 258 bridge = bc->base;
ian@0 259
ian@0 260 pr_debug("bridge_startup(): irq= 0x%x pin=%d\n", irq, pin);
ian@0 261 /*
ian@0 262 * "map" irq to a swlevel greater than 6 since the first 6 bits
ian@0 263 * of INT_PEND0 are taken
ian@0 264 */
ian@0 265 swlevel = find_level(&cpu, irq);
ian@0 266 bridge->b_int_addr[pin].addr = (0x20000 | swlevel | (bc->nasid << 8));
ian@0 267 bridge->b_int_enable |= (1 << pin);
ian@0 268 bridge->b_int_enable |= 0x7ffffe00; /* more stuff in int_enable */
ian@0 269
ian@0 270 /*
ian@0 271 * Enable sending of an interrupt clear packt to the hub on a high to
ian@0 272 * low transition of the interrupt pin.
ian@0 273 *
ian@0 274 * IRIX sets additional bits in the address which are documented as
ian@0 275 * reserved in the bridge docs.
ian@0 276 */
ian@0 277 bridge->b_int_mode |= (1UL << pin);
ian@0 278
ian@0 279 /*
ian@0 280 * We assume the bridge to have a 1:1 mapping between devices
ian@0 281 * (slots) and intr pins.
ian@0 282 */
ian@0 283 device = bridge->b_int_device;
ian@0 284 device &= ~(7 << (pin*3));
ian@0 285 device |= (pin << (pin*3));
ian@0 286 bridge->b_int_device = device;
ian@0 287
ian@0 288 bridge->b_wid_tflush;
ian@0 289
ian@0 290 return 0; /* Never anything pending. */
ian@0 291 }
ian@0 292
ian@0 293 /* Shutdown one of the (PCI ...) IRQs routes over a bridge. */
ian@0 294 static void shutdown_bridge_irq(unsigned int irq)
ian@0 295 {
ian@0 296 struct bridge_controller *bc = IRQ_TO_BRIDGE(irq);
ian@0 297 struct hub_data *hub = hub_data(cpu_to_node(bc->irq_cpu));
ian@0 298 bridge_t *bridge = bc->base;
ian@0 299 int pin, swlevel;
ian@0 300 cpuid_t cpu;
ian@0 301
ian@0 302 pr_debug("bridge_shutdown: irq 0x%x\n", irq);
ian@0 303 pin = SLOT_FROM_PCI_IRQ(irq);
ian@0 304
ian@0 305 /*
ian@0 306 * map irq to a swlevel greater than 6 since the first 6 bits
ian@0 307 * of INT_PEND0 are taken
ian@0 308 */
ian@0 309 swlevel = find_level(&cpu, irq);
ian@0 310 intr_disconnect_level(cpu, swlevel);
ian@0 311
ian@0 312 __clear_bit(swlevel, hub->irq_alloc_mask);
ian@0 313
ian@0 314 bridge->b_int_enable &= ~(1 << pin);
ian@0 315 bridge->b_wid_tflush;
ian@0 316 }
ian@0 317
ian@0 318 static inline void enable_bridge_irq(unsigned int irq)
ian@0 319 {
ian@0 320 cpuid_t cpu;
ian@0 321 int swlevel;
ian@0 322
ian@0 323 swlevel = find_level(&cpu, irq); /* Criminal offence */
ian@0 324 intr_connect_level(cpu, swlevel);
ian@0 325 }
ian@0 326
ian@0 327 static inline void disable_bridge_irq(unsigned int irq)
ian@0 328 {
ian@0 329 cpuid_t cpu;
ian@0 330 int swlevel;
ian@0 331
ian@0 332 swlevel = find_level(&cpu, irq); /* Criminal offence */
ian@0 333 intr_disconnect_level(cpu, swlevel);
ian@0 334 }
ian@0 335
ian@0 336 static void mask_and_ack_bridge_irq(unsigned int irq)
ian@0 337 {
ian@0 338 disable_bridge_irq(irq);
ian@0 339 }
ian@0 340
ian@0 341 static void end_bridge_irq(unsigned int irq)
ian@0 342 {
ian@0 343 if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)) &&
ian@0 344 irq_desc[irq].action)
ian@0 345 enable_bridge_irq(irq);
ian@0 346 }
ian@0 347
ian@0 348 static struct irq_chip bridge_irq_type = {
ian@0 349 .typename = "bridge",
ian@0 350 .startup = startup_bridge_irq,
ian@0 351 .shutdown = shutdown_bridge_irq,
ian@0 352 .enable = enable_bridge_irq,
ian@0 353 .disable = disable_bridge_irq,
ian@0 354 .ack = mask_and_ack_bridge_irq,
ian@0 355 .end = end_bridge_irq,
ian@0 356 };
ian@0 357
ian@0 358 static unsigned long irq_map[NR_IRQS / BITS_PER_LONG];
ian@0 359
ian@0 360 int allocate_irqno(void)
ian@0 361 {
ian@0 362 int irq;
ian@0 363
ian@0 364 again:
ian@0 365 irq = find_first_zero_bit(irq_map, NR_IRQS);
ian@0 366
ian@0 367 if (irq >= NR_IRQS)
ian@0 368 return -ENOSPC;
ian@0 369
ian@0 370 if (test_and_set_bit(irq, irq_map))
ian@0 371 goto again;
ian@0 372
ian@0 373 return irq;
ian@0 374 }
ian@0 375
ian@0 376 void free_irqno(unsigned int irq)
ian@0 377 {
ian@0 378 clear_bit(irq, irq_map);
ian@0 379 }
ian@0 380
ian@0 381 void __devinit register_bridge_irq(unsigned int irq)
ian@0 382 {
ian@0 383 irq_desc[irq].status = IRQ_DISABLED;
ian@0 384 irq_desc[irq].action = 0;
ian@0 385 irq_desc[irq].depth = 1;
ian@0 386 irq_desc[irq].chip = &bridge_irq_type;
ian@0 387 }
ian@0 388
ian@0 389 int __devinit request_bridge_irq(struct bridge_controller *bc)
ian@0 390 {
ian@0 391 int irq = allocate_irqno();
ian@0 392 int swlevel, cpu;
ian@0 393 nasid_t nasid;
ian@0 394
ian@0 395 if (irq < 0)
ian@0 396 return irq;
ian@0 397
ian@0 398 /*
ian@0 399 * "map" irq to a swlevel greater than 6 since the first 6 bits
ian@0 400 * of INT_PEND0 are taken
ian@0 401 */
ian@0 402 cpu = bc->irq_cpu;
ian@0 403 swlevel = alloc_level(cpu, irq);
ian@0 404 if (unlikely(swlevel < 0)) {
ian@0 405 free_irqno(irq);
ian@0 406
ian@0 407 return -EAGAIN;
ian@0 408 }
ian@0 409
ian@0 410 /* Make sure it's not already pending when we connect it. */
ian@0 411 nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu));
ian@0 412 REMOTE_HUB_CLR_INTR(nasid, swlevel);
ian@0 413
ian@0 414 intr_connect_level(cpu, swlevel);
ian@0 415
ian@0 416 register_bridge_irq(irq);
ian@0 417
ian@0 418 return irq;
ian@0 419 }
ian@0 420
ian@0 421 extern void ip27_rt_timer_interrupt(struct pt_regs *regs);
ian@0 422
ian@0 423 asmlinkage void plat_irq_dispatch(struct pt_regs *regs)
ian@0 424 {
ian@0 425 unsigned long pending = read_c0_cause() & read_c0_status();
ian@0 426
ian@0 427 if (pending & CAUSEF_IP4)
ian@0 428 ip27_rt_timer_interrupt(regs);
ian@0 429 else if (pending & CAUSEF_IP2) /* PI_INT_PEND_0 or CC_PEND_{A|B} */
ian@0 430 ip27_do_irq_mask0(regs);
ian@0 431 else if (pending & CAUSEF_IP3) /* PI_INT_PEND_1 */
ian@0 432 ip27_do_irq_mask1(regs);
ian@0 433 else if (pending & CAUSEF_IP5)
ian@0 434 ip27_prof_timer(regs);
ian@0 435 else if (pending & CAUSEF_IP6)
ian@0 436 ip27_hub_error(regs);
ian@0 437 }
ian@0 438
ian@0 439 void __init arch_init_irq(void)
ian@0 440 {
ian@0 441 }
ian@0 442
ian@0 443 void install_ipi(void)
ian@0 444 {
ian@0 445 int slice = LOCAL_HUB_L(PI_CPU_NUM);
ian@0 446 int cpu = smp_processor_id();
ian@0 447 struct slice_data *si = cpu_data[cpu].data;
ian@0 448 struct hub_data *hub = hub_data(cpu_to_node(cpu));
ian@0 449 int resched, call;
ian@0 450
ian@0 451 resched = CPU_RESCHED_A_IRQ + slice;
ian@0 452 __set_bit(resched, hub->irq_alloc_mask);
ian@0 453 __set_bit(resched, si->irq_enable_mask);
ian@0 454 LOCAL_HUB_CLR_INTR(resched);
ian@0 455
ian@0 456 call = CPU_CALL_A_IRQ + slice;
ian@0 457 __set_bit(call, hub->irq_alloc_mask);
ian@0 458 __set_bit(call, si->irq_enable_mask);
ian@0 459 LOCAL_HUB_CLR_INTR(call);
ian@0 460
ian@0 461 if (slice == 0) {
ian@0 462 LOCAL_HUB_S(PI_INT_MASK0_A, si->irq_enable_mask[0]);
ian@0 463 LOCAL_HUB_S(PI_INT_MASK1_A, si->irq_enable_mask[1]);
ian@0 464 } else {
ian@0 465 LOCAL_HUB_S(PI_INT_MASK0_B, si->irq_enable_mask[0]);
ian@0 466 LOCAL_HUB_S(PI_INT_MASK1_B, si->irq_enable_mask[1]);
ian@0 467 }
ian@0 468 }