ia64/linux-2.6.18-xen.hg

view arch/sparc64/kernel/kprobes.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /* arch/sparc64/kernel/kprobes.c
2 *
3 * Copyright (C) 2004 David S. Miller <davem@davemloft.net>
4 */
6 #include <linux/kernel.h>
7 #include <linux/kprobes.h>
8 #include <linux/module.h>
9 #include <asm/kdebug.h>
10 #include <asm/signal.h>
11 #include <asm/cacheflush.h>
12 #include <asm/uaccess.h>
14 /* We do not have hardware single-stepping on sparc64.
15 * So we implement software single-stepping with breakpoint
16 * traps. The top-level scheme is similar to that used
17 * in the x86 kprobes implementation.
18 *
19 * In the kprobe->ainsn.insn[] array we store the original
20 * instruction at index zero and a break instruction at
21 * index one.
22 *
23 * When we hit a kprobe we:
24 * - Run the pre-handler
25 * - Remember "regs->tnpc" and interrupt level stored in
26 * "regs->tstate" so we can restore them later
27 * - Disable PIL interrupts
28 * - Set regs->tpc to point to kprobe->ainsn.insn[0]
29 * - Set regs->tnpc to point to kprobe->ainsn.insn[1]
30 * - Mark that we are actively in a kprobe
31 *
32 * At this point we wait for the second breakpoint at
33 * kprobe->ainsn.insn[1] to hit. When it does we:
34 * - Run the post-handler
35 * - Set regs->tpc to "remembered" regs->tnpc stored above,
36 * restore the PIL interrupt level in "regs->tstate" as well
37 * - Make any adjustments necessary to regs->tnpc in order
38 * to handle relative branches correctly. See below.
39 * - Mark that we are no longer actively in a kprobe.
40 */
42 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
43 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
45 int __kprobes arch_prepare_kprobe(struct kprobe *p)
46 {
47 p->ainsn.insn[0] = *p->addr;
48 p->ainsn.insn[1] = BREAKPOINT_INSTRUCTION_2;
49 p->opcode = *p->addr;
50 return 0;
51 }
53 void __kprobes arch_arm_kprobe(struct kprobe *p)
54 {
55 *p->addr = BREAKPOINT_INSTRUCTION;
56 flushi(p->addr);
57 }
59 void __kprobes arch_disarm_kprobe(struct kprobe *p)
60 {
61 *p->addr = p->opcode;
62 flushi(p->addr);
63 }
65 static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
66 {
67 kcb->prev_kprobe.kp = kprobe_running();
68 kcb->prev_kprobe.status = kcb->kprobe_status;
69 kcb->prev_kprobe.orig_tnpc = kcb->kprobe_orig_tnpc;
70 kcb->prev_kprobe.orig_tstate_pil = kcb->kprobe_orig_tstate_pil;
71 }
73 static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
74 {
75 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
76 kcb->kprobe_status = kcb->prev_kprobe.status;
77 kcb->kprobe_orig_tnpc = kcb->prev_kprobe.orig_tnpc;
78 kcb->kprobe_orig_tstate_pil = kcb->prev_kprobe.orig_tstate_pil;
79 }
81 static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
82 struct kprobe_ctlblk *kcb)
83 {
84 __get_cpu_var(current_kprobe) = p;
85 kcb->kprobe_orig_tnpc = regs->tnpc;
86 kcb->kprobe_orig_tstate_pil = (regs->tstate & TSTATE_PIL);
87 }
89 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs,
90 struct kprobe_ctlblk *kcb)
91 {
92 regs->tstate |= TSTATE_PIL;
94 /*single step inline, if it a breakpoint instruction*/
95 if (p->opcode == BREAKPOINT_INSTRUCTION) {
96 regs->tpc = (unsigned long) p->addr;
97 regs->tnpc = kcb->kprobe_orig_tnpc;
98 } else {
99 regs->tpc = (unsigned long) &p->ainsn.insn[0];
100 regs->tnpc = (unsigned long) &p->ainsn.insn[1];
101 }
102 }
104 static int __kprobes kprobe_handler(struct pt_regs *regs)
105 {
106 struct kprobe *p;
107 void *addr = (void *) regs->tpc;
108 int ret = 0;
109 struct kprobe_ctlblk *kcb;
111 /*
112 * We don't want to be preempted for the entire
113 * duration of kprobe processing
114 */
115 preempt_disable();
116 kcb = get_kprobe_ctlblk();
118 if (kprobe_running()) {
119 p = get_kprobe(addr);
120 if (p) {
121 if (kcb->kprobe_status == KPROBE_HIT_SS) {
122 regs->tstate = ((regs->tstate & ~TSTATE_PIL) |
123 kcb->kprobe_orig_tstate_pil);
124 goto no_kprobe;
125 }
126 /* We have reentered the kprobe_handler(), since
127 * another probe was hit while within the handler.
128 * We here save the original kprobes variables and
129 * just single step on the instruction of the new probe
130 * without calling any user handlers.
131 */
132 save_previous_kprobe(kcb);
133 set_current_kprobe(p, regs, kcb);
134 kprobes_inc_nmissed_count(p);
135 kcb->kprobe_status = KPROBE_REENTER;
136 prepare_singlestep(p, regs, kcb);
137 return 1;
138 } else {
139 if (*(u32 *)addr != BREAKPOINT_INSTRUCTION) {
140 /* The breakpoint instruction was removed by
141 * another cpu right after we hit, no further
142 * handling of this interrupt is appropriate
143 */
144 ret = 1;
145 goto no_kprobe;
146 }
147 p = __get_cpu_var(current_kprobe);
148 if (p->break_handler && p->break_handler(p, regs))
149 goto ss_probe;
150 }
151 goto no_kprobe;
152 }
154 p = get_kprobe(addr);
155 if (!p) {
156 if (*(u32 *)addr != BREAKPOINT_INSTRUCTION) {
157 /*
158 * The breakpoint instruction was removed right
159 * after we hit it. Another cpu has removed
160 * either a probepoint or a debugger breakpoint
161 * at this address. In either case, no further
162 * handling of this interrupt is appropriate.
163 */
164 ret = 1;
165 }
166 /* Not one of ours: let kernel handle it */
167 goto no_kprobe;
168 }
170 set_current_kprobe(p, regs, kcb);
171 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
172 if (p->pre_handler && p->pre_handler(p, regs))
173 return 1;
175 ss_probe:
176 prepare_singlestep(p, regs, kcb);
177 kcb->kprobe_status = KPROBE_HIT_SS;
178 return 1;
180 no_kprobe:
181 preempt_enable_no_resched();
182 return ret;
183 }
185 /* If INSN is a relative control transfer instruction,
186 * return the corrected branch destination value.
187 *
188 * The original INSN location was REAL_PC, it actually
189 * executed at PC and produced destination address NPC.
190 */
191 static unsigned long __kprobes relbranch_fixup(u32 insn, unsigned long real_pc,
192 unsigned long pc,
193 unsigned long npc)
194 {
195 /* Branch not taken, no mods necessary. */
196 if (npc == pc + 0x4UL)
197 return real_pc + 0x4UL;
199 /* The three cases are call, branch w/prediction,
200 * and traditional branch.
201 */
202 if ((insn & 0xc0000000) == 0x40000000 ||
203 (insn & 0xc1c00000) == 0x00400000 ||
204 (insn & 0xc1c00000) == 0x00800000) {
205 /* The instruction did all the work for us
206 * already, just apply the offset to the correct
207 * instruction location.
208 */
209 return (real_pc + (npc - pc));
210 }
212 return real_pc + 0x4UL;
213 }
215 /* If INSN is an instruction which writes it's PC location
216 * into a destination register, fix that up.
217 */
218 static void __kprobes retpc_fixup(struct pt_regs *regs, u32 insn,
219 unsigned long real_pc)
220 {
221 unsigned long *slot = NULL;
223 /* Simplest cast is call, which always uses %o7 */
224 if ((insn & 0xc0000000) == 0x40000000) {
225 slot = &regs->u_regs[UREG_I7];
226 }
228 /* Jmpl encodes the register inside of the opcode */
229 if ((insn & 0xc1f80000) == 0x81c00000) {
230 unsigned long rd = ((insn >> 25) & 0x1f);
232 if (rd <= 15) {
233 slot = &regs->u_regs[rd];
234 } else {
235 /* Hard case, it goes onto the stack. */
236 flushw_all();
238 rd -= 16;
239 slot = (unsigned long *)
240 (regs->u_regs[UREG_FP] + STACK_BIAS);
241 slot += rd;
242 }
243 }
244 if (slot != NULL)
245 *slot = real_pc;
246 }
248 /*
249 * Called after single-stepping. p->addr is the address of the
250 * instruction whose first byte has been replaced by the breakpoint
251 * instruction. To avoid the SMP problems that can occur when we
252 * temporarily put back the original opcode to single-step, we
253 * single-stepped a copy of the instruction. The address of this
254 * copy is p->ainsn.insn.
255 *
256 * This function prepares to return from the post-single-step
257 * breakpoint trap.
258 */
259 static void __kprobes resume_execution(struct kprobe *p,
260 struct pt_regs *regs, struct kprobe_ctlblk *kcb)
261 {
262 u32 insn = p->ainsn.insn[0];
264 regs->tpc = kcb->kprobe_orig_tnpc;
265 regs->tnpc = relbranch_fixup(insn,
266 (unsigned long) p->addr,
267 (unsigned long) &p->ainsn.insn[0],
268 regs->tnpc);
269 retpc_fixup(regs, insn, (unsigned long) p->addr);
271 regs->tstate = ((regs->tstate & ~TSTATE_PIL) |
272 kcb->kprobe_orig_tstate_pil);
273 }
275 static int __kprobes post_kprobe_handler(struct pt_regs *regs)
276 {
277 struct kprobe *cur = kprobe_running();
278 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
280 if (!cur)
281 return 0;
283 if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
284 kcb->kprobe_status = KPROBE_HIT_SSDONE;
285 cur->post_handler(cur, regs, 0);
286 }
288 resume_execution(cur, regs, kcb);
290 /*Restore back the original saved kprobes variables and continue. */
291 if (kcb->kprobe_status == KPROBE_REENTER) {
292 restore_previous_kprobe(kcb);
293 goto out;
294 }
295 reset_current_kprobe();
296 out:
297 preempt_enable_no_resched();
299 return 1;
300 }
302 static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
303 {
304 struct kprobe *cur = kprobe_running();
305 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
306 const struct exception_table_entry *entry;
308 switch(kcb->kprobe_status) {
309 case KPROBE_HIT_SS:
310 case KPROBE_REENTER:
311 /*
312 * We are here because the instruction being single
313 * stepped caused a page fault. We reset the current
314 * kprobe and the tpc points back to the probe address
315 * and allow the page fault handler to continue as a
316 * normal page fault.
317 */
318 regs->tpc = (unsigned long)cur->addr;
319 regs->tnpc = kcb->kprobe_orig_tnpc;
320 regs->tstate = ((regs->tstate & ~TSTATE_PIL) |
321 kcb->kprobe_orig_tstate_pil);
322 if (kcb->kprobe_status == KPROBE_REENTER)
323 restore_previous_kprobe(kcb);
324 else
325 reset_current_kprobe();
326 preempt_enable_no_resched();
327 break;
328 case KPROBE_HIT_ACTIVE:
329 case KPROBE_HIT_SSDONE:
330 /*
331 * We increment the nmissed count for accounting,
332 * we can also use npre/npostfault count for accouting
333 * these specific fault cases.
334 */
335 kprobes_inc_nmissed_count(cur);
337 /*
338 * We come here because instructions in the pre/post
339 * handler caused the page_fault, this could happen
340 * if handler tries to access user space by
341 * copy_from_user(), get_user() etc. Let the
342 * user-specified handler try to fix it first.
343 */
344 if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
345 return 1;
347 /*
348 * In case the user-specified fault handler returned
349 * zero, try to fix up.
350 */
352 entry = search_exception_tables(regs->tpc);
353 if (entry) {
354 regs->tpc = entry->fixup;
355 regs->tnpc = regs->tpc + 4;
356 return 1;
357 }
359 /*
360 * fixup_exception() could not handle it,
361 * Let do_page_fault() fix it.
362 */
363 break;
364 default:
365 break;
366 }
368 return 0;
369 }
371 /*
372 * Wrapper routine to for handling exceptions.
373 */
374 int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
375 unsigned long val, void *data)
376 {
377 struct die_args *args = (struct die_args *)data;
378 int ret = NOTIFY_DONE;
380 if (args->regs && user_mode(args->regs))
381 return ret;
383 switch (val) {
384 case DIE_DEBUG:
385 if (kprobe_handler(args->regs))
386 ret = NOTIFY_STOP;
387 break;
388 case DIE_DEBUG_2:
389 if (post_kprobe_handler(args->regs))
390 ret = NOTIFY_STOP;
391 break;
392 case DIE_GPF:
393 case DIE_PAGE_FAULT:
394 /* kprobe_running() needs smp_processor_id() */
395 preempt_disable();
396 if (kprobe_running() &&
397 kprobe_fault_handler(args->regs, args->trapnr))
398 ret = NOTIFY_STOP;
399 preempt_enable();
400 break;
401 default:
402 break;
403 }
404 return ret;
405 }
407 asmlinkage void __kprobes kprobe_trap(unsigned long trap_level,
408 struct pt_regs *regs)
409 {
410 BUG_ON(trap_level != 0x170 && trap_level != 0x171);
412 if (user_mode(regs)) {
413 local_irq_enable();
414 bad_trap(regs, trap_level);
415 return;
416 }
418 /* trap_level == 0x170 --> ta 0x70
419 * trap_level == 0x171 --> ta 0x71
420 */
421 if (notify_die((trap_level == 0x170) ? DIE_DEBUG : DIE_DEBUG_2,
422 (trap_level == 0x170) ? "debug" : "debug_2",
423 regs, 0, trap_level, SIGTRAP) != NOTIFY_STOP)
424 bad_trap(regs, trap_level);
425 }
427 /* Jprobes support. */
428 int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
429 {
430 struct jprobe *jp = container_of(p, struct jprobe, kp);
431 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
433 kcb->jprobe_saved_regs_location = regs;
434 memcpy(&(kcb->jprobe_saved_regs), regs, sizeof(*regs));
436 /* Save a whole stack frame, this gets arguments
437 * pushed onto the stack after using up all the
438 * arg registers.
439 */
440 memcpy(&(kcb->jprobe_saved_stack),
441 (char *) (regs->u_regs[UREG_FP] + STACK_BIAS),
442 sizeof(kcb->jprobe_saved_stack));
444 regs->tpc = (unsigned long) jp->entry;
445 regs->tnpc = ((unsigned long) jp->entry) + 0x4UL;
446 regs->tstate |= TSTATE_PIL;
448 return 1;
449 }
451 void __kprobes jprobe_return(void)
452 {
453 __asm__ __volatile__(
454 ".globl jprobe_return_trap_instruction\n"
455 "jprobe_return_trap_instruction:\n\t"
456 "ta 0x70");
457 }
459 extern void jprobe_return_trap_instruction(void);
461 extern void __show_regs(struct pt_regs * regs);
463 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
464 {
465 u32 *addr = (u32 *) regs->tpc;
466 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
468 if (addr == (u32 *) jprobe_return_trap_instruction) {
469 if (kcb->jprobe_saved_regs_location != regs) {
470 printk("JPROBE: Current regs (%p) does not match "
471 "saved regs (%p).\n",
472 regs, kcb->jprobe_saved_regs_location);
473 printk("JPROBE: Saved registers\n");
474 __show_regs(kcb->jprobe_saved_regs_location);
475 printk("JPROBE: Current registers\n");
476 __show_regs(regs);
477 BUG();
478 }
479 /* Restore old register state. Do pt_regs
480 * first so that UREG_FP is the original one for
481 * the stack frame restore.
482 */
483 memcpy(regs, &(kcb->jprobe_saved_regs), sizeof(*regs));
485 memcpy((char *) (regs->u_regs[UREG_FP] + STACK_BIAS),
486 &(kcb->jprobe_saved_stack),
487 sizeof(kcb->jprobe_saved_stack));
489 preempt_enable_no_resched();
490 return 1;
491 }
492 return 0;
493 }
495 /* architecture specific initialization */
496 int arch_init_kprobes(void)
497 {
498 return 0;
499 }