ia64/linux-2.6.18-xen.hg

view arch/ia64/xen/xenentry.S @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents a533be77c572
children
line source
1 /*
2 * ia64/xen/entry.S
3 *
4 * Alternate kernel routines for Xen. Heavily leveraged from
5 * ia64/kernel/entry.S
6 *
7 * Copyright (C) 2005 Hewlett-Packard Co
8 * Dan Magenheimer <dan.magenheimer@.hp.com>
9 */
11 #include <asm/asmmacro.h>
12 #include <asm/cache.h>
13 #include <asm/errno.h>
14 #include <asm/kregs.h>
15 #include <asm/asm-offsets.h>
16 #include <asm/pgtable.h>
17 #include <asm/percpu.h>
18 #include <asm/processor.h>
19 #include <asm/thread_info.h>
20 #include <asm/unistd.h>
22 #ifdef CONFIG_XEN
23 #include "xenminstate.h"
24 #else
25 #include "minstate.h"
26 #endif
28 /*
29 * prev_task <- ia64_switch_to(struct task_struct *next)
30 * With Ingo's new scheduler, interrupts are disabled when this routine gets
31 * called. The code starting at .map relies on this. The rest of the code
32 * doesn't care about the interrupt masking status.
33 */
34 #ifdef CONFIG_XEN
35 GLOBAL_ENTRY(xen_switch_to)
36 .prologue
37 alloc r16=ar.pfs,1,0,0,0
38 movl r22=running_on_xen;;
39 ld4 r22=[r22];;
40 cmp.eq p7,p0=r22,r0
41 (p7) br.cond.sptk.many __ia64_switch_to;;
42 #else
43 GLOBAL_ENTRY(ia64_switch_to)
44 .prologue
45 alloc r16=ar.pfs,1,0,0,0
46 #endif
47 DO_SAVE_SWITCH_STACK
48 .body
50 adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
51 movl r25=init_task
52 mov r27=IA64_KR(CURRENT_STACK)
53 adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
54 dep r20=0,in0,61,3 // physical address of "next"
55 ;;
56 st8 [r22]=sp // save kernel stack pointer of old task
57 shr.u r26=r20,IA64_GRANULE_SHIFT
58 cmp.eq p7,p6=r25,in0
59 ;;
60 /*
61 * If we've already mapped this task's page, we can skip doing it again.
62 */
63 (p6) cmp.eq p7,p6=r26,r27
64 (p6) br.cond.dpnt .map
65 ;;
66 .done:
67 ld8 sp=[r21] // load kernel stack pointer of new task
68 #ifdef CONFIG_XEN
69 // update "current" application register
70 mov r8=IA64_KR_CURRENT
71 mov r9=in0;;
72 XEN_HYPER_SET_KR
73 #else
74 mov IA64_KR(CURRENT)=in0 // update "current" application register
75 #endif
76 mov r8=r13 // return pointer to previously running task
77 mov r13=in0 // set "current" pointer
78 ;;
79 DO_LOAD_SWITCH_STACK
81 #ifdef CONFIG_SMP
82 sync.i // ensure "fc"s done by this CPU are visible on other CPUs
83 #endif
84 br.ret.sptk.many rp // boogie on out in new context
86 .map:
87 #ifdef CONFIG_XEN
88 movl r25=XSI_PSR_IC // clear psr.ic
89 ;;
90 st4 [r25]=r0
91 ;;
92 #else
93 rsm psr.ic // interrupts (psr.i) are already disabled here
94 #endif
95 movl r25=PAGE_KERNEL
96 ;;
97 srlz.d
98 or r23=r25,r20 // construct PA | page properties
99 mov r25=IA64_GRANULE_SHIFT<<2
100 ;;
101 #ifdef CONFIG_XEN
102 movl r8=XSI_ITIR
103 ;;
104 st8 [r8]=r25
105 ;;
106 movl r8=XSI_IFA
107 ;;
108 st8 [r8]=in0 // VA of next task...
109 ;;
110 mov r25=IA64_TR_CURRENT_STACK
111 // remember last page we mapped...
112 mov r8=IA64_KR_CURRENT_STACK
113 mov r9=r26;;
114 XEN_HYPER_SET_KR;;
115 #else
116 mov cr.itir=r25
117 mov cr.ifa=in0 // VA of next task...
118 ;;
119 mov r25=IA64_TR_CURRENT_STACK
120 mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped...
121 #endif
122 ;;
123 itr.d dtr[r25]=r23 // wire in new mapping...
124 #ifdef CONFIG_XEN
125 ;;
126 srlz.d
127 mov r9=1
128 movl r8=XSI_PSR_IC
129 ;;
130 st4 [r8]=r9
131 ;;
132 #else
133 ssm psr.ic // reenable the psr.ic bit
134 ;;
135 srlz.d
136 #endif
137 br.cond.sptk .done
138 #ifdef CONFIG_XEN
139 END(xen_switch_to)
140 #else
141 END(ia64_switch_to)
142 #endif
144 /*
145 * Invoke a system call, but do some tracing before and after the call.
146 * We MUST preserve the current register frame throughout this routine
147 * because some system calls (such as ia64_execve) directly
148 * manipulate ar.pfs.
149 */
150 #ifdef CONFIG_XEN
151 GLOBAL_ENTRY(xen_trace_syscall)
152 PT_REGS_UNWIND_INFO(0)
153 movl r16=running_on_xen;;
154 ld4 r16=[r16];;
155 cmp.eq p7,p0=r16,r0
156 (p7) br.cond.sptk.many __ia64_trace_syscall;;
157 #else
158 GLOBAL_ENTRY(ia64_trace_syscall)
159 PT_REGS_UNWIND_INFO(0)
160 #endif
161 /*
162 * We need to preserve the scratch registers f6-f11 in case the system
163 * call is sigreturn.
164 */
165 adds r16=PT(F6)+16,sp
166 adds r17=PT(F7)+16,sp
167 ;;
168 stf.spill [r16]=f6,32
169 stf.spill [r17]=f7,32
170 ;;
171 stf.spill [r16]=f8,32
172 stf.spill [r17]=f9,32
173 ;;
174 stf.spill [r16]=f10
175 stf.spill [r17]=f11
176 br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
177 adds r16=PT(F6)+16,sp
178 adds r17=PT(F7)+16,sp
179 ;;
180 ldf.fill f6=[r16],32
181 ldf.fill f7=[r17],32
182 ;;
183 ldf.fill f8=[r16],32
184 ldf.fill f9=[r17],32
185 ;;
186 ldf.fill f10=[r16]
187 ldf.fill f11=[r17]
188 // the syscall number may have changed, so re-load it and re-calculate the
189 // syscall entry-point:
190 adds r15=PT(R15)+16,sp // r15 = &pt_regs.r15 (syscall #)
191 ;;
192 ld8 r15=[r15]
193 mov r3=NR_syscalls - 1
194 ;;
195 adds r15=-1024,r15
196 movl r16=sys_call_table
197 ;;
198 shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024)
199 cmp.leu p6,p7=r15,r3
200 ;;
201 (p6) ld8 r20=[r20] // load address of syscall entry point
202 (p7) movl r20=sys_ni_syscall
203 ;;
204 mov b6=r20
205 br.call.sptk.many rp=b6 // do the syscall
206 .strace_check_retval:
207 cmp.lt p6,p0=r8,r0 // syscall failed?
208 adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8
209 adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10
210 mov r10=0
211 (p6) br.cond.sptk strace_error // syscall failed ->
212 ;; // avoid RAW on r10
213 .strace_save_retval:
214 .mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8
215 .mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10
216 br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
217 .ret3:
218 (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
219 br.cond.sptk .work_pending_syscall_end
221 strace_error:
222 ld8 r3=[r2] // load pt_regs.r8
223 sub r9=0,r8 // negate return value to get errno value
224 ;;
225 cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0?
226 adds r3=16,r2 // r3=&pt_regs.r10
227 ;;
228 (p6) mov r10=-1
229 (p6) mov r8=r9
230 br.cond.sptk .strace_save_retval
231 #ifdef CONFIG_XEN
232 END(xen_trace_syscall)
233 #else
234 END(ia64_trace_syscall)
235 #endif
237 #ifdef CONFIG_XEN
238 GLOBAL_ENTRY(xen_ret_from_clone)
239 PT_REGS_UNWIND_INFO(0)
240 movl r16=running_on_xen;;
241 ld4 r16=[r16];;
242 cmp.eq p7,p0=r16,r0
243 (p7) br.cond.sptk.many __ia64_ret_from_clone;;
244 #else
245 GLOBAL_ENTRY(ia64_ret_from_clone)
246 PT_REGS_UNWIND_INFO(0)
247 #endif
248 { /*
249 * Some versions of gas generate bad unwind info if the first instruction of a
250 * procedure doesn't go into the first slot of a bundle. This is a workaround.
251 */
252 nop.m 0
253 nop.i 0
254 /*
255 * We need to call schedule_tail() to complete the scheduling process.
256 * Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the
257 * address of the previously executing task.
258 */
259 br.call.sptk.many rp=ia64_invoke_schedule_tail
260 }
261 .ret8:
262 adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
263 ;;
264 ld4 r2=[r2]
265 ;;
266 mov r8=0
267 and r2=_TIF_SYSCALL_TRACEAUDIT,r2
268 ;;
269 cmp.ne p6,p0=r2,r0
270 (p6) br.cond.spnt .strace_check_retval
271 ;; // added stop bits to prevent r8 dependency
272 #ifdef CONFIG_XEN
273 br.cond.sptk ia64_ret_from_syscall
274 END(xen_ret_from_clone)
275 #else
276 END(ia64_ret_from_clone)
277 #endif
278 /*
279 * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
280 * need to switch to bank 0 and doesn't restore the scratch registers.
281 * To avoid leaking kernel bits, the scratch registers are set to
282 * the following known-to-be-safe values:
283 *
284 * r1: restored (global pointer)
285 * r2: cleared
286 * r3: 1 (when returning to user-level)
287 * r8-r11: restored (syscall return value(s))
288 * r12: restored (user-level stack pointer)
289 * r13: restored (user-level thread pointer)
290 * r14: set to __kernel_syscall_via_epc
291 * r15: restored (syscall #)
292 * r16-r17: cleared
293 * r18: user-level b6
294 * r19: cleared
295 * r20: user-level ar.fpsr
296 * r21: user-level b0
297 * r22: cleared
298 * r23: user-level ar.bspstore
299 * r24: user-level ar.rnat
300 * r25: user-level ar.unat
301 * r26: user-level ar.pfs
302 * r27: user-level ar.rsc
303 * r28: user-level ip
304 * r29: user-level psr
305 * r30: user-level cfm
306 * r31: user-level pr
307 * f6-f11: cleared
308 * pr: restored (user-level pr)
309 * b0: restored (user-level rp)
310 * b6: restored
311 * b7: set to __kernel_syscall_via_epc
312 * ar.unat: restored (user-level ar.unat)
313 * ar.pfs: restored (user-level ar.pfs)
314 * ar.rsc: restored (user-level ar.rsc)
315 * ar.rnat: restored (user-level ar.rnat)
316 * ar.bspstore: restored (user-level ar.bspstore)
317 * ar.fpsr: restored (user-level ar.fpsr)
318 * ar.ccv: cleared
319 * ar.csd: cleared
320 * ar.ssd: cleared
321 */
322 #ifdef CONFIG_XEN
323 GLOBAL_ENTRY(xen_leave_syscall)
324 PT_REGS_UNWIND_INFO(0)
325 movl r22=running_on_xen;;
326 ld4 r22=[r22];;
327 cmp.eq p7,p0=r22,r0
328 (p7) br.cond.sptk.many __ia64_leave_syscall;;
329 #else
330 ENTRY(ia64_leave_syscall)
331 PT_REGS_UNWIND_INFO(0)
332 #endif
333 /*
334 * work.need_resched etc. mustn't get changed by this CPU before it returns to
335 * user- or fsys-mode, hence we disable interrupts early on.
336 *
337 * p6 controls whether current_thread_info()->flags needs to be check for
338 * extra work. We always check for extra work when returning to user-level.
339 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
340 * is 0. After extra work processing has been completed, execution
341 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
342 * needs to be redone.
343 */
344 #ifdef CONFIG_PREEMPT
345 rsm psr.i // disable interrupts
346 cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall
347 (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
348 ;;
349 .pred.rel.mutex pUStk,pKStk
350 (pKStk) ld4 r21=[r20] // r21 <- preempt_count
351 (pUStk) mov r21=0 // r21 <- 0
352 ;;
353 cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0)
354 #else /* !CONFIG_PREEMPT */
355 #ifdef CONFIG_XEN
356 movl r2=XSI_PSR_I_ADDR
357 mov r18=1
358 ;;
359 ld8 r2=[r2]
360 ;;
361 (pUStk) st1 [r2]=r18
362 #else
363 (pUStk) rsm psr.i
364 #endif
365 cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall
366 (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
367 #endif
368 .work_processed_syscall:
369 adds r2=PT(LOADRS)+16,r12
370 adds r3=PT(AR_BSPSTORE)+16,r12
371 adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
372 ;;
373 (p6) ld4 r31=[r18] // load current_thread_info()->flags
374 ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
375 nop.i 0
376 ;;
377 mov r16=ar.bsp // M2 get existing backing store pointer
378 ld8 r18=[r2],PT(R9)-PT(B6) // load b6
379 (p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
380 ;;
381 ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage)
382 (p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending?
383 (p6) br.cond.spnt .work_pending_syscall
384 ;;
385 // start restoring the state saved on the kernel stack (struct pt_regs):
386 ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
387 ld8 r11=[r3],PT(CR_IIP)-PT(R11)
388 (pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE!
389 ;;
390 invala // M0|1 invalidate ALAT
391 #ifdef CONFIG_XEN
392 movl r28=XSI_PSR_I_ADDR
393 movl r29=XSI_PSR_IC
394 ;;
395 ld8 r28=[r28]
396 mov r30=1
397 ;;
398 st1 [r28]=r30
399 st4 [r29]=r0 // note: clears both vpsr.i and vpsr.ic!
400 ;;
401 #else
402 rsm psr.i | psr.ic // M2 turn off interrupts and interruption collection
403 #endif
404 cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs
406 ld8 r29=[r2],16 // M0|1 load cr.ipsr
407 ld8 r28=[r3],16 // M0|1 load cr.iip
408 mov r22=r0 // A clear r22
409 ;;
410 ld8 r30=[r2],16 // M0|1 load cr.ifs
411 ld8 r25=[r3],16 // M0|1 load ar.unat
412 (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
413 ;;
414 ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
415 #ifdef CONFIG_XEN
416 (pKStk) mov r21=r8
417 (pKStk) XEN_HYPER_GET_PSR
418 ;;
419 (pKStk) mov r22=r8
420 (pKStk) mov r8=r21
421 ;;
422 #else
423 (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
424 #endif
425 nop 0
426 ;;
427 ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
428 ld8 r27=[r3],PT(PR)-PT(AR_RSC) // M0|1 load ar.rsc
429 mov f6=f0 // F clear f6
430 ;;
431 ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // M0|1 load ar.rnat (may be garbage)
432 ld8 r31=[r3],PT(R1)-PT(PR) // M0|1 load predicates
433 mov f7=f0 // F clear f7
434 ;;
435 ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // M0|1 load ar.fpsr
436 ld8.fill r1=[r3],16 // M0|1 load r1
437 (pUStk) mov r17=1 // A
438 ;;
439 (pUStk) st1 [r14]=r17 // M2|3
440 ld8.fill r13=[r3],16 // M0|1
441 mov f8=f0 // F clear f8
442 ;;
443 ld8.fill r12=[r2] // M0|1 restore r12 (sp)
444 ld8.fill r15=[r3] // M0|1 restore r15
445 mov b6=r18 // I0 restore b6
447 addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
448 mov f9=f0 // F clear f9
449 (pKStk) br.cond.dpnt.many skip_rbs_switch // B
451 srlz.d // M0 ensure interruption collection is off (for cover)
452 shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
453 #ifdef CONFIG_XEN
454 XEN_HYPER_COVER;
455 #else
456 cover // B add current frame into dirty partition & set cr.ifs
457 #endif
458 ;;
459 (pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8
460 mov r19=ar.bsp // M2 get new backing store pointer
461 mov f10=f0 // F clear f10
463 nop.m 0
464 movl r14=__kernel_syscall_via_epc // X
465 ;;
466 mov.m ar.csd=r0 // M2 clear ar.csd
467 mov.m ar.ccv=r0 // M2 clear ar.ccv
468 mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc)
470 mov.m ar.ssd=r0 // M2 clear ar.ssd
471 mov f11=f0 // F clear f11
472 br.cond.sptk.many rbs_switch // B
473 #ifdef CONFIG_XEN
474 END(xen_leave_syscall)
475 #else
476 END(ia64_leave_syscall)
477 #endif
479 #ifdef CONFIG_XEN
480 GLOBAL_ENTRY(xen_leave_kernel)
481 PT_REGS_UNWIND_INFO(0)
482 movl r22=running_on_xen;;
483 ld4 r22=[r22];;
484 cmp.eq p7,p0=r22,r0
485 (p7) br.cond.sptk.many __ia64_leave_kernel;;
486 #else
487 GLOBAL_ENTRY(ia64_leave_kernel)
488 PT_REGS_UNWIND_INFO(0)
489 #endif
490 /*
491 * work.need_resched etc. mustn't get changed by this CPU before it returns to
492 * user- or fsys-mode, hence we disable interrupts early on.
493 *
494 * p6 controls whether current_thread_info()->flags needs to be check for
495 * extra work. We always check for extra work when returning to user-level.
496 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
497 * is 0. After extra work processing has been completed, execution
498 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
499 * needs to be redone.
500 */
501 #ifdef CONFIG_PREEMPT
502 rsm psr.i // disable interrupts
503 cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel
504 (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
505 ;;
506 .pred.rel.mutex pUStk,pKStk
507 (pKStk) ld4 r21=[r20] // r21 <- preempt_count
508 (pUStk) mov r21=0 // r21 <- 0
509 ;;
510 cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0)
511 #else
512 #ifdef CONFIG_XEN
513 (pUStk) movl r17=XSI_PSR_I_ADDR
514 (pUStk) mov r31=1
515 ;;
516 (pUStk) ld8 r17=[r17]
517 ;;
518 (pUStk) st1 [r17]=r31
519 ;;
520 #else
521 (pUStk) rsm psr.i
522 #endif
523 cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel
524 (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
525 #endif
526 .work_processed_kernel:
527 adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
528 ;;
529 (p6) ld4 r31=[r17] // load current_thread_info()->flags
530 adds r21=PT(PR)+16,r12
531 ;;
533 lfetch [r21],PT(CR_IPSR)-PT(PR)
534 adds r2=PT(B6)+16,r12
535 adds r3=PT(R16)+16,r12
536 ;;
537 lfetch [r21]
538 ld8 r28=[r2],8 // load b6
539 adds r29=PT(R24)+16,r12
541 ld8.fill r16=[r3],PT(AR_CSD)-PT(R16)
542 adds r30=PT(AR_CCV)+16,r12
543 (p6) and r19=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
544 ;;
545 ld8.fill r24=[r29]
546 ld8 r15=[r30] // load ar.ccv
547 (p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending?
548 ;;
549 ld8 r29=[r2],16 // load b7
550 ld8 r30=[r3],16 // load ar.csd
551 (p6) br.cond.spnt .work_pending
552 ;;
553 ld8 r31=[r2],16 // load ar.ssd
554 ld8.fill r8=[r3],16
555 ;;
556 ld8.fill r9=[r2],16
557 ld8.fill r10=[r3],PT(R17)-PT(R10)
558 ;;
559 ld8.fill r11=[r2],PT(R18)-PT(R11)
560 ld8.fill r17=[r3],16
561 ;;
562 ld8.fill r18=[r2],16
563 ld8.fill r19=[r3],16
564 ;;
565 ld8.fill r20=[r2],16
566 ld8.fill r21=[r3],16
567 mov ar.csd=r30
568 mov ar.ssd=r31
569 ;;
570 #ifdef CONFIG_XEN
571 movl r23=XSI_PSR_I_ADDR
572 movl r22=XSI_PSR_IC
573 ;;
574 ld8 r23=[r23]
575 mov r25=1
576 ;;
577 st1 [r23]=r25
578 st4 [r22]=r0 // note: clears both vpsr.i and vpsr.ic!
579 ;;
580 #else
581 rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection
582 #endif
583 invala // invalidate ALAT
584 ;;
585 ld8.fill r22=[r2],24
586 ld8.fill r23=[r3],24
587 mov b6=r28
588 ;;
589 ld8.fill r25=[r2],16
590 ld8.fill r26=[r3],16
591 mov b7=r29
592 ;;
593 ld8.fill r27=[r2],16
594 ld8.fill r28=[r3],16
595 ;;
596 ld8.fill r29=[r2],16
597 ld8.fill r30=[r3],24
598 ;;
599 ld8.fill r31=[r2],PT(F9)-PT(R31)
600 adds r3=PT(F10)-PT(F6),r3
601 ;;
602 ldf.fill f9=[r2],PT(F6)-PT(F9)
603 ldf.fill f10=[r3],PT(F8)-PT(F10)
604 ;;
605 ldf.fill f6=[r2],PT(F7)-PT(F6)
606 ;;
607 ldf.fill f7=[r2],PT(F11)-PT(F7)
608 ldf.fill f8=[r3],32
609 ;;
610 srlz.d // ensure that inter. collection is off (VHPT is don't care, since text is pinned)
611 mov ar.ccv=r15
612 ;;
613 ldf.fill f11=[r2]
614 #ifdef CONFIG_XEN
615 ;;
616 // r16-r31 all now hold bank1 values
617 mov r15=ar.unat
618 movl r2=XSI_BANK1_R16
619 movl r3=XSI_BANK1_R16+8
620 ;;
621 .mem.offset 0,0; st8.spill [r2]=r16,16
622 .mem.offset 8,0; st8.spill [r3]=r17,16
623 ;;
624 .mem.offset 0,0; st8.spill [r2]=r18,16
625 .mem.offset 8,0; st8.spill [r3]=r19,16
626 ;;
627 .mem.offset 0,0; st8.spill [r2]=r20,16
628 .mem.offset 8,0; st8.spill [r3]=r21,16
629 ;;
630 .mem.offset 0,0; st8.spill [r2]=r22,16
631 .mem.offset 8,0; st8.spill [r3]=r23,16
632 ;;
633 .mem.offset 0,0; st8.spill [r2]=r24,16
634 .mem.offset 8,0; st8.spill [r3]=r25,16
635 ;;
636 .mem.offset 0,0; st8.spill [r2]=r26,16
637 .mem.offset 8,0; st8.spill [r3]=r27,16
638 ;;
639 .mem.offset 0,0; st8.spill [r2]=r28,16
640 .mem.offset 8,0; st8.spill [r3]=r29,16
641 ;;
642 .mem.offset 0,0; st8.spill [r2]=r30,16
643 .mem.offset 8,0; st8.spill [r3]=r31,16
644 ;;
645 mov r3=ar.unat
646 movl r2=XSI_B1NAT
647 ;;
648 st8 [r2]=r3
649 mov ar.unat=r15
650 movl r2=XSI_BANKNUM;;
651 st4 [r2]=r0;
652 #else
653 bsw.0 // switch back to bank 0 (no stop bit required beforehand...)
654 #endif
655 ;;
656 (pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
657 adds r16=PT(CR_IPSR)+16,r12
658 adds r17=PT(CR_IIP)+16,r12
660 #ifdef CONFIG_XEN
661 (pKStk) mov r29=r8
662 (pKStk) XEN_HYPER_GET_PSR
663 ;;
664 (pKStk) mov r22=r8
665 (pKStk) mov r8=r29
666 ;;
667 #else
668 (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
669 #endif
670 nop.i 0
671 nop.i 0
672 ;;
673 ld8 r29=[r16],16 // load cr.ipsr
674 ld8 r28=[r17],16 // load cr.iip
675 ;;
676 ld8 r30=[r16],16 // load cr.ifs
677 ld8 r25=[r17],16 // load ar.unat
678 ;;
679 ld8 r26=[r16],16 // load ar.pfs
680 ld8 r27=[r17],16 // load ar.rsc
681 cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs
682 ;;
683 ld8 r24=[r16],16 // load ar.rnat (may be garbage)
684 ld8 r23=[r17],16 // load ar.bspstore (may be garbage)
685 ;;
686 ld8 r31=[r16],16 // load predicates
687 ld8 r21=[r17],16 // load b0
688 ;;
689 ld8 r19=[r16],16 // load ar.rsc value for "loadrs"
690 ld8.fill r1=[r17],16 // load r1
691 ;;
692 ld8.fill r12=[r16],16
693 ld8.fill r13=[r17],16
694 (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
695 ;;
696 ld8 r20=[r16],16 // ar.fpsr
697 ld8.fill r15=[r17],16
698 ;;
699 ld8.fill r14=[r16],16
700 ld8.fill r2=[r17]
701 (pUStk) mov r17=1
702 ;;
703 ld8.fill r3=[r16]
704 (pUStk) st1 [r18]=r17 // restore current->thread.on_ustack
705 shr.u r18=r19,16 // get byte size of existing "dirty" partition
706 ;;
707 mov r16=ar.bsp // get existing backing store pointer
708 addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
709 ;;
710 ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8
711 (pKStk) br.cond.dpnt skip_rbs_switch
713 /*
714 * Restore user backing store.
715 *
716 * NOTE: alloc, loadrs, and cover can't be predicated.
717 */
718 (pNonSys) br.cond.dpnt dont_preserve_current_frame
720 #ifdef CONFIG_XEN
721 XEN_HYPER_COVER;
722 #else
723 cover // add current frame into dirty partition and set cr.ifs
724 #endif
725 ;;
726 mov r19=ar.bsp // get new backing store pointer
727 rbs_switch:
728 sub r16=r16,r18 // krbs = old bsp - size of dirty partition
729 cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs
730 ;;
731 sub r19=r19,r16 // calculate total byte size of dirty partition
732 add r18=64,r18 // don't force in0-in7 into memory...
733 ;;
734 shl r19=r19,16 // shift size of dirty partition into loadrs position
735 ;;
736 dont_preserve_current_frame:
737 /*
738 * To prevent leaking bits between the kernel and user-space,
739 * we must clear the stacked registers in the "invalid" partition here.
740 * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium,
741 * 5 registers/cycle on McKinley).
742 */
743 # define pRecurse p6
744 # define pReturn p7
745 #ifdef CONFIG_ITANIUM
746 # define Nregs 10
747 #else
748 # define Nregs 14
749 #endif
750 alloc loc0=ar.pfs,2,Nregs-2,2,0
751 shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
752 sub r17=r17,r18 // r17 = (physStackedSize + 8) - dirtySize
753 ;;
754 mov ar.rsc=r19 // load ar.rsc to be used for "loadrs"
755 shladd in0=loc1,3,r17
756 mov in1=0
757 ;;
758 TEXT_ALIGN(32)
759 rse_clear_invalid:
760 #ifdef CONFIG_ITANIUM
761 // cycle 0
762 { .mii
763 alloc loc0=ar.pfs,2,Nregs-2,2,0
764 cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse
765 add out0=-Nregs*8,in0
766 }{ .mfb
767 add out1=1,in1 // increment recursion count
768 nop.f 0
769 nop.b 0 // can't do br.call here because of alloc (WAW on CFM)
770 ;;
771 }{ .mfi // cycle 1
772 mov loc1=0
773 nop.f 0
774 mov loc2=0
775 }{ .mib
776 mov loc3=0
777 mov loc4=0
778 (pRecurse) br.call.sptk.many b0=rse_clear_invalid
780 }{ .mfi // cycle 2
781 mov loc5=0
782 nop.f 0
783 cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret
784 }{ .mib
785 mov loc6=0
786 mov loc7=0
787 (pReturn) br.ret.sptk.many b0
788 }
789 #else /* !CONFIG_ITANIUM */
790 alloc loc0=ar.pfs,2,Nregs-2,2,0
791 cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse
792 add out0=-Nregs*8,in0
793 add out1=1,in1 // increment recursion count
794 mov loc1=0
795 mov loc2=0
796 ;;
797 mov loc3=0
798 mov loc4=0
799 mov loc5=0
800 mov loc6=0
801 mov loc7=0
802 (pRecurse) br.call.dptk.few b0=rse_clear_invalid
803 ;;
804 mov loc8=0
805 mov loc9=0
806 cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret
807 mov loc10=0
808 mov loc11=0
809 (pReturn) br.ret.dptk.many b0
810 #endif /* !CONFIG_ITANIUM */
811 # undef pRecurse
812 # undef pReturn
813 ;;
814 alloc r17=ar.pfs,0,0,0,0 // drop current register frame
815 ;;
816 loadrs
817 ;;
818 skip_rbs_switch:
819 mov ar.unat=r25 // M2
820 (pKStk) extr.u r22=r22,21,1 // I0 extract current value of psr.pp from r22
821 (pLvSys)mov r19=r0 // A clear r19 for leave_syscall, no-op otherwise
822 ;;
823 (pUStk) mov ar.bspstore=r23 // M2
824 (pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp
825 (pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise
826 ;;
827 #ifdef CONFIG_XEN
828 movl r25=XSI_IPSR
829 ;;
830 st8[r25]=r29,XSI_IFS_OFS-XSI_IPSR_OFS
831 ;;
832 #else
833 mov cr.ipsr=r29 // M2
834 #endif
835 mov ar.pfs=r26 // I0
836 (pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise
838 #ifdef CONFIG_XEN
839 (p9) st8 [r25]=r30
840 ;;
841 adds r25=XSI_IIP_OFS-XSI_IFS_OFS,r25
842 ;;
843 #else
844 (p9) mov cr.ifs=r30 // M2
845 #endif
846 mov b0=r21 // I0
847 (pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise
849 mov ar.fpsr=r20 // M2
850 #ifdef CONFIG_XEN
851 st8 [r25]=r28
852 #else
853 mov cr.iip=r28 // M2
854 #endif
855 nop 0
856 ;;
857 (pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode
858 nop 0
859 (pLvSys)mov r2=r0
861 mov ar.rsc=r27 // M2
862 mov pr=r31,-1 // I0
863 #ifdef CONFIG_XEN
864 ;;
865 XEN_HYPER_RFI;
866 #else
867 rfi // B
868 #endif
870 /*
871 * On entry:
872 * r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
873 * r31 = current->thread_info->flags
874 * On exit:
875 * p6 = TRUE if work-pending-check needs to be redone
876 */
877 .work_pending_syscall:
878 add r2=-8,r2
879 add r3=-8,r3
880 ;;
881 st8 [r2]=r8
882 st8 [r3]=r10
883 .work_pending:
884 tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0?
885 (p6) br.cond.sptk.few .notify
886 #ifdef CONFIG_PREEMPT
887 (pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
888 ;;
889 (pKStk) st4 [r20]=r21
890 ssm psr.i // enable interrupts
891 #endif
892 br.call.spnt.many rp=schedule
893 .ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1
894 #ifdef CONFIG_XEN
895 movl r2=XSI_PSR_I_ADDR
896 mov r20=1
897 ;;
898 ld8 r2=[r2]
899 ;;
900 st1 [r2]=r20
901 #else
902 rsm psr.i // disable interrupts
903 #endif
904 ;;
905 #ifdef CONFIG_PREEMPT
906 (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
907 ;;
908 (pKStk) st4 [r20]=r0 // preempt_count() <- 0
909 #endif
910 (pLvSys)br.cond.sptk.few .work_pending_syscall_end
911 br.cond.sptk.many .work_processed_kernel // re-check
913 .notify:
914 (pUStk) br.call.spnt.many rp=notify_resume_user
915 .ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0
916 (pLvSys)br.cond.sptk.few .work_pending_syscall_end
917 br.cond.sptk.many .work_processed_kernel // don't re-check
919 .work_pending_syscall_end:
920 adds r2=PT(R8)+16,r12
921 adds r3=PT(R10)+16,r12
922 ;;
923 ld8 r8=[r2]
924 ld8 r10=[r3]
925 br.cond.sptk.many .work_processed_syscall // re-check
927 #ifdef CONFIG_XEN
928 END(xen_leave_kernel)
929 #else
930 END(ia64_leave_kernel)
931 #endif