ia64/linux-2.6.18-xen.hg

view arch/ia64/xen/xenminstate.h @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents a533be77c572
children
line source
1 #include <asm/cache.h>
3 #ifdef CONFIG_XEN
4 #include "../kernel/entry.h"
5 #else
6 #include "entry.h"
7 #endif
9 /*
10 * For ivt.s we want to access the stack virtually so we don't have to disable translation
11 * on interrupts.
12 *
13 * On entry:
14 * r1: pointer to current task (ar.k6)
15 */
16 #define MINSTATE_START_SAVE_MIN_VIRT \
17 (pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
18 ;; \
19 (pUStk) mov.m r24=ar.rnat; \
20 (pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \
21 (pKStk) mov r1=sp; /* get sp */ \
22 ;; \
23 (pUStk) lfetch.fault.excl.nt1 [r22]; \
24 (pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
25 (pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \
26 ;; \
27 (pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \
28 (pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
29 ;; \
30 (pUStk) mov r18=ar.bsp; \
31 (pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \
33 #define MINSTATE_END_SAVE_MIN_VIRT \
34 bsw.1; /* switch back to bank 1 (must be last in insn group) */ \
35 ;;
37 /*
38 * For mca_asm.S we want to access the stack physically since the state is saved before we
39 * go virtual and don't want to destroy the iip or ipsr.
40 */
41 #define MINSTATE_START_SAVE_MIN_PHYS \
42 (pKStk) mov r3=IA64_KR(PER_CPU_DATA);; \
43 (pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;; \
44 (pKStk) ld8 r3 = [r3];; \
45 (pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;; \
46 (pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \
47 (pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
48 (pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \
49 ;; \
50 (pUStk) mov r24=ar.rnat; \
51 (pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
52 (pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \
53 (pUStk) dep r22=-1,r22,61,3; /* compute kernel virtual addr of RBS */ \
54 ;; \
55 (pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
56 (pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \
57 ;; \
58 (pUStk) mov r18=ar.bsp; \
59 (pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \
61 #define MINSTATE_END_SAVE_MIN_PHYS \
62 dep r12=-1,r12,61,3; /* make sp a kernel virtual address */ \
63 ;;
65 #ifdef MINSTATE_VIRT
66 # define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT)
67 # define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_VIRT
68 # define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_VIRT
69 #endif
71 #ifdef MINSTATE_PHYS
72 # define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT);; tpa reg=reg
73 # define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_PHYS
74 # define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_PHYS
75 #endif
77 /*
78 * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
79 * the minimum state necessary that allows us to turn psr.ic back
80 * on.
81 *
82 * Assumed state upon entry:
83 * psr.ic: off
84 * r31: contains saved predicates (pr)
85 *
86 * Upon exit, the state is as follows:
87 * psr.ic: off
88 * r2 = points to &pt_regs.r16
89 * r8 = contents of ar.ccv
90 * r9 = contents of ar.csd
91 * r10 = contents of ar.ssd
92 * r11 = FPSR_DEFAULT
93 * r12 = kernel sp (kernel virtual address)
94 * r13 = points to current task_struct (kernel virtual address)
95 * p15 = TRUE if psr.i is set in cr.ipsr
96 * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
97 * preserved
98 * CONFIG_XEN note: p6/p7 are not preserved
99 *
100 * Note that psr.ic is NOT turned on by this macro. This is so that
101 * we can pass interruption state as arguments to a handler.
102 */
103 #ifdef CONFIG_XEN
104 #define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \
105 MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \
106 mov r27=ar.rsc; /* M */ \
107 mov r20=r1; /* A */ \
108 mov r25=ar.unat; /* M */ \
109 /* mov r29=cr.ipsr; /* M */ \
110 movl r29=XSI_IPSR;; \
111 ld8 r29=[r29];; \
112 mov r26=ar.pfs; /* I */ \
113 /* mov r28=cr.iip; /* M */ \
114 movl r28=XSI_IIP;; \
115 ld8 r28=[r28];; \
116 mov r21=ar.fpsr; /* M */ \
117 COVER; /* B;; (or nothing) */ \
118 ;; \
119 adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \
120 ;; \
121 ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \
122 st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \
123 adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \
124 /* switch from user to kernel RBS: */ \
125 ;; \
126 invala; /* M */ \
127 /* SAVE_IFS; /* see xen special handling below */ \
128 cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \
129 ;; \
130 MINSTATE_START_SAVE_MIN \
131 adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \
132 adds r16=PT(CR_IPSR),r1; \
133 ;; \
134 lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \
135 st8 [r16]=r29; /* save cr.ipsr */ \
136 ;; \
137 lfetch.fault.excl.nt1 [r17]; \
138 tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \
139 mov r29=b0 \
140 ;; \
141 adds r16=PT(R8),r1; /* initialize first base pointer */ \
142 adds r17=PT(R9),r1; /* initialize second base pointer */ \
143 (pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \
144 ;; \
145 .mem.offset 0,0; st8.spill [r16]=r8,16; \
146 .mem.offset 8,0; st8.spill [r17]=r9,16; \
147 ;; \
148 .mem.offset 0,0; st8.spill [r16]=r10,24; \
149 .mem.offset 8,0; st8.spill [r17]=r11,24; \
150 ;; \
151 /* xen special handling for possibly lazy cover */ \
152 movl r8=XSI_PRECOVER_IFS; \
153 ;; \
154 ld8 r30=[r8]; \
155 ;; \
156 st8 [r16]=r28,16; /* save cr.iip */ \
157 st8 [r17]=r30,16; /* save cr.ifs */ \
158 (pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \
159 mov r8=ar.ccv; \
160 mov r9=ar.csd; \
161 mov r10=ar.ssd; \
162 movl r11=FPSR_DEFAULT; /* L-unit */ \
163 ;; \
164 st8 [r16]=r25,16; /* save ar.unat */ \
165 st8 [r17]=r26,16; /* save ar.pfs */ \
166 shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \
167 ;; \
168 st8 [r16]=r27,16; /* save ar.rsc */ \
169 (pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \
170 (pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \
171 ;; /* avoid RAW on r16 & r17 */ \
172 (pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \
173 st8 [r17]=r31,16; /* save predicates */ \
174 (pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \
175 ;; \
176 st8 [r16]=r29,16; /* save b0 */ \
177 st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \
178 cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \
179 ;; \
180 .mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \
181 .mem.offset 8,0; st8.spill [r17]=r12,16; \
182 adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \
183 ;; \
184 .mem.offset 0,0; st8.spill [r16]=r13,16; \
185 .mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \
186 mov r13=IA64_KR(CURRENT); /* establish `current' */ \
187 ;; \
188 .mem.offset 0,0; st8.spill [r16]=r15,16; \
189 .mem.offset 8,0; st8.spill [r17]=r14,16; \
190 ;; \
191 .mem.offset 0,0; st8.spill [r16]=r2,16; \
192 .mem.offset 8,0; st8.spill [r17]=r3,16; \
193 ;; \
194 EXTRA; \
195 mov r2=b0; br.call.sptk b0=xen_bsw1;; mov b0=r2; \
196 adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
197 ;; \
198 movl r1=__gp; /* establish kernel global pointer */ \
199 ;; \
200 /* MINSTATE_END_SAVE_MIN */
201 #else
202 #define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \
203 MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \
204 mov r27=ar.rsc; /* M */ \
205 mov r20=r1; /* A */ \
206 mov r25=ar.unat; /* M */ \
207 mov r29=cr.ipsr; /* M */ \
208 mov r26=ar.pfs; /* I */ \
209 mov r28=cr.iip; /* M */ \
210 mov r21=ar.fpsr; /* M */ \
211 COVER; /* B;; (or nothing) */ \
212 ;; \
213 adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \
214 ;; \
215 ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \
216 st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \
217 adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \
218 /* switch from user to kernel RBS: */ \
219 ;; \
220 invala; /* M */ \
221 SAVE_IFS; \
222 cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \
223 ;; \
224 MINSTATE_START_SAVE_MIN \
225 adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \
226 adds r16=PT(CR_IPSR),r1; \
227 ;; \
228 lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \
229 st8 [r16]=r29; /* save cr.ipsr */ \
230 ;; \
231 lfetch.fault.excl.nt1 [r17]; \
232 tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \
233 mov r29=b0 \
234 ;; \
235 adds r16=PT(R8),r1; /* initialize first base pointer */ \
236 adds r17=PT(R9),r1; /* initialize second base pointer */ \
237 (pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \
238 ;; \
239 .mem.offset 0,0; st8.spill [r16]=r8,16; \
240 .mem.offset 8,0; st8.spill [r17]=r9,16; \
241 ;; \
242 .mem.offset 0,0; st8.spill [r16]=r10,24; \
243 .mem.offset 8,0; st8.spill [r17]=r11,24; \
244 ;; \
245 st8 [r16]=r28,16; /* save cr.iip */ \
246 st8 [r17]=r30,16; /* save cr.ifs */ \
247 (pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \
248 mov r8=ar.ccv; \
249 mov r9=ar.csd; \
250 mov r10=ar.ssd; \
251 movl r11=FPSR_DEFAULT; /* L-unit */ \
252 ;; \
253 st8 [r16]=r25,16; /* save ar.unat */ \
254 st8 [r17]=r26,16; /* save ar.pfs */ \
255 shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \
256 ;; \
257 st8 [r16]=r27,16; /* save ar.rsc */ \
258 (pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \
259 (pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \
260 ;; /* avoid RAW on r16 & r17 */ \
261 (pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \
262 st8 [r17]=r31,16; /* save predicates */ \
263 (pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \
264 ;; \
265 st8 [r16]=r29,16; /* save b0 */ \
266 st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \
267 cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \
268 ;; \
269 .mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \
270 .mem.offset 8,0; st8.spill [r17]=r12,16; \
271 adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \
272 ;; \
273 .mem.offset 0,0; st8.spill [r16]=r13,16; \
274 .mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \
275 mov r13=IA64_KR(CURRENT); /* establish `current' */ \
276 ;; \
277 .mem.offset 0,0; st8.spill [r16]=r15,16; \
278 .mem.offset 8,0; st8.spill [r17]=r14,16; \
279 ;; \
280 .mem.offset 0,0; st8.spill [r16]=r2,16; \
281 .mem.offset 8,0; st8.spill [r17]=r3,16; \
282 adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
283 ;; \
284 EXTRA; \
285 movl r1=__gp; /* establish kernel global pointer */ \
286 ;; \
287 MINSTATE_END_SAVE_MIN
288 #endif
290 /*
291 * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
292 *
293 * Assumed state upon entry:
294 * psr.ic: on
295 * r2: points to &pt_regs.r16
296 * r3: points to &pt_regs.r17
297 * r8: contents of ar.ccv
298 * r9: contents of ar.csd
299 * r10: contents of ar.ssd
300 * r11: FPSR_DEFAULT
301 *
302 * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
303 */
304 #define SAVE_REST \
305 .mem.offset 0,0; st8.spill [r2]=r16,16; \
306 .mem.offset 8,0; st8.spill [r3]=r17,16; \
307 ;; \
308 .mem.offset 0,0; st8.spill [r2]=r18,16; \
309 .mem.offset 8,0; st8.spill [r3]=r19,16; \
310 ;; \
311 .mem.offset 0,0; st8.spill [r2]=r20,16; \
312 .mem.offset 8,0; st8.spill [r3]=r21,16; \
313 mov r18=b6; \
314 ;; \
315 .mem.offset 0,0; st8.spill [r2]=r22,16; \
316 .mem.offset 8,0; st8.spill [r3]=r23,16; \
317 mov r19=b7; \
318 ;; \
319 .mem.offset 0,0; st8.spill [r2]=r24,16; \
320 .mem.offset 8,0; st8.spill [r3]=r25,16; \
321 ;; \
322 .mem.offset 0,0; st8.spill [r2]=r26,16; \
323 .mem.offset 8,0; st8.spill [r3]=r27,16; \
324 ;; \
325 .mem.offset 0,0; st8.spill [r2]=r28,16; \
326 .mem.offset 8,0; st8.spill [r3]=r29,16; \
327 ;; \
328 .mem.offset 0,0; st8.spill [r2]=r30,16; \
329 .mem.offset 8,0; st8.spill [r3]=r31,32; \
330 ;; \
331 mov ar.fpsr=r11; /* M-unit */ \
332 st8 [r2]=r8,8; /* ar.ccv */ \
333 adds r24=PT(B6)-PT(F7),r3; \
334 ;; \
335 stf.spill [r2]=f6,32; \
336 stf.spill [r3]=f7,32; \
337 ;; \
338 stf.spill [r2]=f8,32; \
339 stf.spill [r3]=f9,32; \
340 ;; \
341 stf.spill [r2]=f10; \
342 stf.spill [r3]=f11; \
343 adds r25=PT(B7)-PT(F11),r3; \
344 ;; \
345 st8 [r24]=r18,16; /* b6 */ \
346 st8 [r25]=r19,16; /* b7 */ \
347 ;; \
348 st8 [r24]=r9; /* ar.csd */ \
349 st8 [r25]=r10; /* ar.ssd */ \
350 ;;
352 #define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov r30=cr.ifs,)
353 #define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19)
354 #ifdef CONFIG_XEN
355 #define SAVE_MIN break 0;; /* FIXME: non-cover version only for ia32 support? */
356 #else
357 #define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, )
358 #endif