ia64/linux-2.6.18-xen.hg

view arch/alpha/kernel/ptrace.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /* ptrace.c */
2 /* By Ross Biro 1/23/92 */
3 /* edited by Linus Torvalds */
4 /* mangled further by Bob Manson (manson@santafe.edu) */
5 /* more mutilation by David Mosberger (davidm@azstarnet.com) */
7 #include <linux/kernel.h>
8 #include <linux/sched.h>
9 #include <linux/mm.h>
10 #include <linux/smp.h>
11 #include <linux/smp_lock.h>
12 #include <linux/errno.h>
13 #include <linux/ptrace.h>
14 #include <linux/user.h>
15 #include <linux/slab.h>
16 #include <linux/security.h>
17 #include <linux/signal.h>
19 #include <asm/uaccess.h>
20 #include <asm/pgtable.h>
21 #include <asm/system.h>
22 #include <asm/fpu.h>
24 #include "proto.h"
26 #define DEBUG DBG_MEM
27 #undef DEBUG
29 #ifdef DEBUG
30 enum {
31 DBG_MEM = (1<<0),
32 DBG_BPT = (1<<1),
33 DBG_MEM_ALL = (1<<2)
34 };
35 #define DBG(fac,args) {if ((fac) & DEBUG) printk args;}
36 #else
37 #define DBG(fac,args)
38 #endif
40 #define BREAKINST 0x00000080 /* call_pal bpt */
42 /*
43 * does not yet catch signals sent when the child dies.
44 * in exit.c or in signal.c.
45 */
47 /*
48 * Processes always block with the following stack-layout:
49 *
50 * +================================+ <---- task + 2*PAGE_SIZE
51 * | PALcode saved frame (ps, pc, | ^
52 * | gp, a0, a1, a2) | |
53 * +================================+ | struct pt_regs
54 * | | |
55 * | frame generated by SAVE_ALL | |
56 * | | v
57 * +================================+
58 * | | ^
59 * | frame saved by do_switch_stack | | struct switch_stack
60 * | | v
61 * +================================+
62 */
64 /*
65 * The following table maps a register index into the stack offset at
66 * which the register is saved. Register indices are 0-31 for integer
67 * regs, 32-63 for fp regs, and 64 for the pc. Notice that sp and
68 * zero have no stack-slot and need to be treated specially (see
69 * get_reg/put_reg below).
70 */
71 enum {
72 REG_R0 = 0, REG_F0 = 32, REG_FPCR = 63, REG_PC = 64
73 };
75 #define PT_REG(reg) \
76 (PAGE_SIZE*2 - sizeof(struct pt_regs) + offsetof(struct pt_regs, reg))
78 #define SW_REG(reg) \
79 (PAGE_SIZE*2 - sizeof(struct pt_regs) - sizeof(struct switch_stack) \
80 + offsetof(struct switch_stack, reg))
82 static int regoff[] = {
83 PT_REG( r0), PT_REG( r1), PT_REG( r2), PT_REG( r3),
84 PT_REG( r4), PT_REG( r5), PT_REG( r6), PT_REG( r7),
85 PT_REG( r8), SW_REG( r9), SW_REG( r10), SW_REG( r11),
86 SW_REG( r12), SW_REG( r13), SW_REG( r14), SW_REG( r15),
87 PT_REG( r16), PT_REG( r17), PT_REG( r18), PT_REG( r19),
88 PT_REG( r20), PT_REG( r21), PT_REG( r22), PT_REG( r23),
89 PT_REG( r24), PT_REG( r25), PT_REG( r26), PT_REG( r27),
90 PT_REG( r28), PT_REG( gp), -1, -1,
91 SW_REG(fp[ 0]), SW_REG(fp[ 1]), SW_REG(fp[ 2]), SW_REG(fp[ 3]),
92 SW_REG(fp[ 4]), SW_REG(fp[ 5]), SW_REG(fp[ 6]), SW_REG(fp[ 7]),
93 SW_REG(fp[ 8]), SW_REG(fp[ 9]), SW_REG(fp[10]), SW_REG(fp[11]),
94 SW_REG(fp[12]), SW_REG(fp[13]), SW_REG(fp[14]), SW_REG(fp[15]),
95 SW_REG(fp[16]), SW_REG(fp[17]), SW_REG(fp[18]), SW_REG(fp[19]),
96 SW_REG(fp[20]), SW_REG(fp[21]), SW_REG(fp[22]), SW_REG(fp[23]),
97 SW_REG(fp[24]), SW_REG(fp[25]), SW_REG(fp[26]), SW_REG(fp[27]),
98 SW_REG(fp[28]), SW_REG(fp[29]), SW_REG(fp[30]), SW_REG(fp[31]),
99 PT_REG( pc)
100 };
102 static unsigned long zero;
104 /*
105 * Get address of register REGNO in task TASK.
106 */
107 static unsigned long *
108 get_reg_addr(struct task_struct * task, unsigned long regno)
109 {
110 unsigned long *addr;
112 if (regno == 30) {
113 addr = &task_thread_info(task)->pcb.usp;
114 } else if (regno == 65) {
115 addr = &task_thread_info(task)->pcb.unique;
116 } else if (regno == 31 || regno > 65) {
117 zero = 0;
118 addr = &zero;
119 } else {
120 addr = task_stack_page(task) + regoff[regno];
121 }
122 return addr;
123 }
125 /*
126 * Get contents of register REGNO in task TASK.
127 */
128 static unsigned long
129 get_reg(struct task_struct * task, unsigned long regno)
130 {
131 /* Special hack for fpcr -- combine hardware and software bits. */
132 if (regno == 63) {
133 unsigned long fpcr = *get_reg_addr(task, regno);
134 unsigned long swcr
135 = task_thread_info(task)->ieee_state & IEEE_SW_MASK;
136 swcr = swcr_update_status(swcr, fpcr);
137 return fpcr | swcr;
138 }
139 return *get_reg_addr(task, regno);
140 }
142 /*
143 * Write contents of register REGNO in task TASK.
144 */
145 static int
146 put_reg(struct task_struct *task, unsigned long regno, unsigned long data)
147 {
148 if (regno == 63) {
149 task_thread_info(task)->ieee_state
150 = ((task_thread_info(task)->ieee_state & ~IEEE_SW_MASK)
151 | (data & IEEE_SW_MASK));
152 data = (data & FPCR_DYN_MASK) | ieee_swcr_to_fpcr(data);
153 }
154 *get_reg_addr(task, regno) = data;
155 return 0;
156 }
158 static inline int
159 read_int(struct task_struct *task, unsigned long addr, int * data)
160 {
161 int copied = access_process_vm(task, addr, data, sizeof(int), 0);
162 return (copied == sizeof(int)) ? 0 : -EIO;
163 }
165 static inline int
166 write_int(struct task_struct *task, unsigned long addr, int data)
167 {
168 int copied = access_process_vm(task, addr, &data, sizeof(int), 1);
169 return (copied == sizeof(int)) ? 0 : -EIO;
170 }
172 /*
173 * Set breakpoint.
174 */
175 int
176 ptrace_set_bpt(struct task_struct * child)
177 {
178 int displ, i, res, reg_b, nsaved = 0;
179 unsigned int insn, op_code;
180 unsigned long pc;
182 pc = get_reg(child, REG_PC);
183 res = read_int(child, pc, (int *) &insn);
184 if (res < 0)
185 return res;
187 op_code = insn >> 26;
188 if (op_code >= 0x30) {
189 /*
190 * It's a branch: instead of trying to figure out
191 * whether the branch will be taken or not, we'll put
192 * a breakpoint at either location. This is simpler,
193 * more reliable, and probably not a whole lot slower
194 * than the alternative approach of emulating the
195 * branch (emulation can be tricky for fp branches).
196 */
197 displ = ((s32)(insn << 11)) >> 9;
198 task_thread_info(child)->bpt_addr[nsaved++] = pc + 4;
199 if (displ) /* guard against unoptimized code */
200 task_thread_info(child)->bpt_addr[nsaved++]
201 = pc + 4 + displ;
202 DBG(DBG_BPT, ("execing branch\n"));
203 } else if (op_code == 0x1a) {
204 reg_b = (insn >> 16) & 0x1f;
205 task_thread_info(child)->bpt_addr[nsaved++] = get_reg(child, reg_b);
206 DBG(DBG_BPT, ("execing jump\n"));
207 } else {
208 task_thread_info(child)->bpt_addr[nsaved++] = pc + 4;
209 DBG(DBG_BPT, ("execing normal insn\n"));
210 }
212 /* install breakpoints: */
213 for (i = 0; i < nsaved; ++i) {
214 res = read_int(child, task_thread_info(child)->bpt_addr[i],
215 (int *) &insn);
216 if (res < 0)
217 return res;
218 task_thread_info(child)->bpt_insn[i] = insn;
219 DBG(DBG_BPT, (" -> next_pc=%lx\n",
220 task_thread_info(child)->bpt_addr[i]));
221 res = write_int(child, task_thread_info(child)->bpt_addr[i],
222 BREAKINST);
223 if (res < 0)
224 return res;
225 }
226 task_thread_info(child)->bpt_nsaved = nsaved;
227 return 0;
228 }
230 /*
231 * Ensure no single-step breakpoint is pending. Returns non-zero
232 * value if child was being single-stepped.
233 */
234 int
235 ptrace_cancel_bpt(struct task_struct * child)
236 {
237 int i, nsaved = task_thread_info(child)->bpt_nsaved;
239 task_thread_info(child)->bpt_nsaved = 0;
241 if (nsaved > 2) {
242 printk("ptrace_cancel_bpt: bogus nsaved: %d!\n", nsaved);
243 nsaved = 2;
244 }
246 for (i = 0; i < nsaved; ++i) {
247 write_int(child, task_thread_info(child)->bpt_addr[i],
248 task_thread_info(child)->bpt_insn[i]);
249 }
250 return (nsaved != 0);
251 }
253 /*
254 * Called by kernel/ptrace.c when detaching..
255 *
256 * Make sure the single step bit is not set.
257 */
258 void ptrace_disable(struct task_struct *child)
259 {
260 ptrace_cancel_bpt(child);
261 }
263 asmlinkage long
264 do_sys_ptrace(long request, long pid, long addr, long data,
265 struct pt_regs *regs)
266 {
267 struct task_struct *child;
268 unsigned long tmp;
269 size_t copied;
270 long ret;
272 lock_kernel();
273 DBG(DBG_MEM, ("request=%ld pid=%ld addr=0x%lx data=0x%lx\n",
274 request, pid, addr, data));
275 if (request == PTRACE_TRACEME) {
276 ret = ptrace_traceme();
277 goto out_notsk;
278 }
280 child = ptrace_get_task_struct(pid);
281 if (IS_ERR(child)) {
282 ret = PTR_ERR(child);
283 goto out_notsk;
284 }
286 if (request == PTRACE_ATTACH) {
287 ret = ptrace_attach(child);
288 goto out;
289 }
291 ret = ptrace_check_attach(child, request == PTRACE_KILL);
292 if (ret < 0)
293 goto out;
295 switch (request) {
296 /* When I and D space are separate, these will need to be fixed. */
297 case PTRACE_PEEKTEXT: /* read word at location addr. */
298 case PTRACE_PEEKDATA:
299 copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
300 ret = -EIO;
301 if (copied != sizeof(tmp))
302 break;
304 regs->r0 = 0; /* special return: no errors */
305 ret = tmp;
306 break;
308 /* Read register number ADDR. */
309 case PTRACE_PEEKUSR:
310 regs->r0 = 0; /* special return: no errors */
311 ret = get_reg(child, addr);
312 DBG(DBG_MEM, ("peek $%ld->%#lx\n", addr, ret));
313 break;
315 /* When I and D space are separate, this will have to be fixed. */
316 case PTRACE_POKETEXT: /* write the word at location addr. */
317 case PTRACE_POKEDATA:
318 tmp = data;
319 copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 1);
320 ret = (copied == sizeof(tmp)) ? 0 : -EIO;
321 break;
323 case PTRACE_POKEUSR: /* write the specified register */
324 DBG(DBG_MEM, ("poke $%ld<-%#lx\n", addr, data));
325 ret = put_reg(child, addr, data);
326 break;
328 case PTRACE_SYSCALL:
329 /* continue and stop at next (return from) syscall */
330 case PTRACE_CONT: /* restart after signal. */
331 ret = -EIO;
332 if (!valid_signal(data))
333 break;
334 if (request == PTRACE_SYSCALL)
335 set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
336 else
337 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
338 child->exit_code = data;
339 /* make sure single-step breakpoint is gone. */
340 ptrace_cancel_bpt(child);
341 wake_up_process(child);
342 ret = 0;
343 break;
345 /*
346 * Make the child exit. Best I can do is send it a sigkill.
347 * perhaps it should be put in the status that it wants to
348 * exit.
349 */
350 case PTRACE_KILL:
351 ret = 0;
352 if (child->exit_state == EXIT_ZOMBIE)
353 break;
354 child->exit_code = SIGKILL;
355 /* make sure single-step breakpoint is gone. */
356 ptrace_cancel_bpt(child);
357 wake_up_process(child);
358 goto out;
360 case PTRACE_SINGLESTEP: /* execute single instruction. */
361 ret = -EIO;
362 if (!valid_signal(data))
363 break;
364 /* Mark single stepping. */
365 task_thread_info(child)->bpt_nsaved = -1;
366 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
367 child->exit_code = data;
368 wake_up_process(child);
369 /* give it a chance to run. */
370 ret = 0;
371 goto out;
373 case PTRACE_DETACH: /* detach a process that was attached. */
374 ret = ptrace_detach(child, data);
375 goto out;
377 default:
378 ret = ptrace_request(child, request, addr, data);
379 goto out;
380 }
381 out:
382 put_task_struct(child);
383 out_notsk:
384 unlock_kernel();
385 return ret;
386 }
388 asmlinkage void
389 syscall_trace(void)
390 {
391 if (!test_thread_flag(TIF_SYSCALL_TRACE))
392 return;
393 if (!(current->ptrace & PT_PTRACED))
394 return;
395 /* The 0x80 provides a way for the tracing parent to distinguish
396 between a syscall stop and SIGTRAP delivery */
397 ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
398 ? 0x80 : 0));
400 /*
401 * This isn't the same as continuing with a signal, but it will do
402 * for normal use. strace only continues with a signal if the
403 * stopping signal is not SIGTRAP. -brl
404 */
405 if (current->exit_code) {
406 send_sig(current->exit_code, current, 1);
407 current->exit_code = 0;
408 }
409 }