ia64/xen-unstable

view linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/entry.S @ 5599:348b774a8f97

bitkeeper revision 1.1767 (42c18c1dNRHfk7ZFajLHHtesQcrKlg)

At the time of debugging the fs segment problem, I noticed this bug (and
was able to test it).

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Jun 28 17:42:53 2005 +0000 (2005-06-28)
parents 25ceeee71ab6
children 2b6c1a809807
line source
1 /*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 *
8 * $Id$
9 *
10 * Jun Nakajima <jun.nakajima@intel.com>
11 * Asit Mallick <asit.k.mallick@intel.com>
12 * Modified for Xen
13 */
15 /*
16 * entry.S contains the system-call and fault low-level handling routines.
17 *
18 * NOTE: This code handles signal-recognition, which happens every time
19 * after an interrupt and after each system call.
20 *
21 * Normal syscalls and interrupts don't save a full stack frame, this is
22 * only done for syscall tracing, signals or fork/exec et.al.
23 *
24 * A note on terminology:
25 * - top of stack: Architecture defined interrupt frame from SS to RIP
26 * at the top of the kernel process stack.
27 * - partial stack frame: partially saved registers upto R11.
28 * - full stack frame: Like partial stack frame, but all register saved.
29 *
30 * TODO:
31 * - schedule it carefully for the final hardware.
32 */
34 #define ASSEMBLY 1
35 #include <linux/config.h>
36 #include <linux/linkage.h>
37 #include <asm/segment.h>
38 #include <asm/smp.h>
39 #include <asm/cache.h>
40 #include <asm/errno.h>
41 #include <asm/dwarf2.h>
42 #include <asm/calling.h>
43 #include <asm/asm_offset.h>
44 #include <asm/msr.h>
45 #include <asm/unistd.h>
46 #include <asm/thread_info.h>
47 #include <asm/hw_irq.h>
48 #include <asm/errno.h>
49 #include <asm-xen/xen-public/arch-x86_64.h>
51 #include "irq_vectors.h"
53 #include "xen_entry.S"
55 .code64
57 #ifdef CONFIG_PREEMPT
58 #define preempt_stop XEN_BLOCK_EVENTS(%rsi)
59 #else
60 #define preempt_stop
61 #define retint_kernel retint_restore_args
62 #endif
64 /*
65 * C code is not supposed to know about undefined top of stack. Every time
66 * a C function with an pt_regs argument is called from the SYSCALL based
67 * fast path FIXUP_TOP_OF_STACK is needed.
68 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
69 * manipulation.
70 */
72 /* %rsp:at FRAMEEND */
73 .macro FIXUP_TOP_OF_STACK tmp
74 movq $__USER_CS,CS(%rsp)
75 movq $-1,RCX(%rsp)
76 .endm
78 .macro RESTORE_TOP_OF_STACK tmp,offset=0
79 .endm
81 .macro FAKE_STACK_FRAME child_rip
82 /* push in order ss, rsp, eflags, cs, rip */
83 xorq %rax, %rax
84 pushq %rax /* ss */
85 CFI_ADJUST_CFA_OFFSET 8
86 pushq %rax /* rsp */
87 CFI_ADJUST_CFA_OFFSET 8
88 CFI_OFFSET rip,0
89 pushq $(1<<9) /* eflags - interrupts on */
90 CFI_ADJUST_CFA_OFFSET 8
91 pushq $__KERNEL_CS /* cs */
92 CFI_ADJUST_CFA_OFFSET 8
93 pushq \child_rip /* rip */
94 CFI_ADJUST_CFA_OFFSET 8
95 CFI_OFFSET rip,0
96 pushq %rax /* orig rax */
97 CFI_ADJUST_CFA_OFFSET 8
98 .endm
100 .macro UNFAKE_STACK_FRAME
101 addq $8*6, %rsp
102 CFI_ADJUST_CFA_OFFSET -(6*8)
103 .endm
105 .macro CFI_DEFAULT_STACK
106 CFI_ADJUST_CFA_OFFSET (SS)
107 CFI_OFFSET r15,R15-SS
108 CFI_OFFSET r14,R14-SS
109 CFI_OFFSET r13,R13-SS
110 CFI_OFFSET r12,R12-SS
111 CFI_OFFSET rbp,RBP-SS
112 CFI_OFFSET rbx,RBX-SS
113 CFI_OFFSET r11,R11-SS
114 CFI_OFFSET r10,R10-SS
115 CFI_OFFSET r9,R9-SS
116 CFI_OFFSET r8,R8-SS
117 CFI_OFFSET rax,RAX-SS
118 CFI_OFFSET rcx,RCX-SS
119 CFI_OFFSET rdx,RDX-SS
120 CFI_OFFSET rsi,RSI-SS
121 CFI_OFFSET rdi,RDI-SS
122 CFI_OFFSET rsp,RSP-SS
123 CFI_OFFSET rip,RIP-SS
124 .endm
126 /*
127 * Must be consistent with the definition in arch_x86_64.h:
128 * struct switch_to_user {
129 * u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
130 * } PACKED;
131 * #define VGCF_IN_SYSCALL (1<<8)
132 */
133 .macro SWITCH_TO_USER flag
134 movl $0,%gs:pda_kernel_mode # change to user mode
135 subq $8*4,%rsp # reuse rip, cs, rflags, rsp, ss in the stack
136 movq %rax,(%rsp)
137 movq %r11,1*8(%rsp)
138 movq %rcx,2*8(%rsp) # we saved %rcx upon exceptions
139 movq $\flag,3*8(%rsp)
140 movq $__HYPERVISOR_switch_to_user,%rax
141 syscall
142 .endm
144 .macro SWITCH_TO_KERNEL ssoff,adjust=0
145 btsq $0,%gs:pda_kernel_mode
146 jc 1f
147 orb $1,\ssoff-\adjust+4(%rsp)
148 1:
149 .endm
151 /*
152 * A newly forked process directly context switches into this.
153 */
154 /* rdi: prev */
155 ENTRY(ret_from_fork)
156 CFI_STARTPROC
157 CFI_DEFAULT_STACK
158 call schedule_tail
159 GET_THREAD_INFO(%rcx)
160 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
161 jnz rff_trace
162 rff_action:
163 RESTORE_REST
164 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
165 je int_ret_from_sys_call
166 testl $_TIF_IA32,threadinfo_flags(%rcx)
167 jnz int_ret_from_sys_call
168 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
169 jmp ret_from_sys_call
170 rff_trace:
171 movq %rsp,%rdi
172 call syscall_trace_leave
173 GET_THREAD_INFO(%rcx)
174 jmp rff_action
175 CFI_ENDPROC
177 /*
178 * System call entry. Upto 6 arguments in registers are supported.
179 *
180 * SYSCALL does not save anything on the stack and does not change the
181 * stack pointer.
182 */
184 /*
185 * Register setup:
186 * rax system call number
187 * rdi arg0
188 * rcx return address for syscall/sysret, C arg3
189 * rsi arg1
190 * rdx arg2
191 * r10 arg3 (--> moved to rcx for C)
192 * r8 arg4
193 * r9 arg5
194 * r11 eflags for syscall/sysret, temporary for C
195 * r12-r15,rbp,rbx saved by C code, not touched.
196 *
197 * Interrupts are off on entry.
198 * Only called from user space.
199 *
200 * XXX if we had a free scratch register we could save the RSP into the stack frame
201 * and report it properly in ps. Unfortunately we haven't.
202 */
204 ENTRY(system_call)
205 CFI_STARTPROC
206 SAVE_ARGS -8,0
207 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
208 XEN_UNBLOCK_EVENTS(%r11)
209 GET_THREAD_INFO(%rcx)
210 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
211 jnz tracesys
212 cmpq $__NR_syscall_max,%rax
213 ja badsys
214 movq %r10,%rcx
215 call *sys_call_table(,%rax,8) # XXX: rip relative
216 movq %rax,RAX-ARGOFFSET(%rsp)
217 /*
218 * Syscall return path ending with SYSRET (fast path)
219 * Has incomplete stack frame and undefined top of stack.
220 */
221 .globl ret_from_sys_call
222 ret_from_sys_call:
223 movl $_TIF_WORK_MASK,%edi
224 /* edi: flagmask */
225 sysret_check:
226 GET_THREAD_INFO(%rcx)
227 XEN_BLOCK_EVENTS(%rsi)
228 movl threadinfo_flags(%rcx),%edx
229 andl %edi,%edx
230 jnz sysret_careful
231 XEN_UNBLOCK_EVENTS(%rsi)
232 RESTORE_ARGS 0,8,0
233 SWITCH_TO_USER VGCF_IN_SYSCALL
235 /* Handle reschedules */
236 /* edx: work, edi: workmask */
237 sysret_careful:
238 bt $TIF_NEED_RESCHED,%edx
239 jnc sysret_signal
240 XEN_BLOCK_EVENTS(%rsi)
241 pushq %rdi
242 call schedule
243 popq %rdi
244 jmp sysret_check
246 /* Handle a signal */
247 sysret_signal:
248 /* sti */
249 XEN_UNBLOCK_EVENTS(%rsi)
250 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
251 jz 1f
253 /* Really a signal */
254 /* edx: work flags (arg3) */
255 leaq do_notify_resume(%rip),%rax
256 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
257 xorl %esi,%esi # oldset -> arg2
258 call ptregscall_common
259 1: movl $_TIF_NEED_RESCHED,%edi
260 jmp sysret_check
262 /* Do syscall tracing */
263 tracesys:
264 SAVE_REST
265 movq $-ENOSYS,RAX(%rsp)
266 FIXUP_TOP_OF_STACK %rdi
267 movq %rsp,%rdi
268 call syscall_trace_enter
269 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
270 RESTORE_REST
271 cmpq $__NR_syscall_max,%rax
272 ja 1f
273 movq %r10,%rcx /* fixup for C */
274 call *sys_call_table(,%rax,8)
275 movq %rax,RAX-ARGOFFSET(%rsp)
276 1: SAVE_REST
277 movq %rsp,%rdi
278 call syscall_trace_leave
279 RESTORE_TOP_OF_STACK %rbx
280 RESTORE_REST
281 jmp ret_from_sys_call
283 badsys:
284 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
285 jmp ret_from_sys_call
287 /*
288 * Syscall return path ending with IRET.
289 * Has correct top of stack, but partial stack frame.
290 */
291 ENTRY(int_ret_from_sys_call)
292 XEN_BLOCK_EVENTS(%rsi)
293 testb $3,CS-ARGOFFSET(%rsp)
294 jnz 1f
295 /* Need to set the proper %ss (not NULL) for ring 3 iretq */
296 movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
297 jmp retint_restore_args # retrun from ring3 kernel
298 1:
299 movl $_TIF_ALLWORK_MASK,%edi
300 /* edi: mask to check */
301 int_with_check:
302 GET_THREAD_INFO(%rcx)
303 movl threadinfo_flags(%rcx),%edx
304 andl %edi,%edx
305 jnz int_careful
306 jmp retint_restore_args
308 /* Either reschedule or signal or syscall exit tracking needed. */
309 /* First do a reschedule test. */
310 /* edx: work, edi: workmask */
311 int_careful:
312 bt $TIF_NEED_RESCHED,%edx
313 jnc int_very_careful
314 /* sti */
315 XEN_UNBLOCK_EVENTS(%rsi)
316 pushq %rdi
317 call schedule
318 popq %rdi
319 jmp int_with_check
321 /* handle signals and tracing -- both require a full stack frame */
322 int_very_careful:
323 /* sti */
324 XEN_UNBLOCK_EVENTS(%rsi)
325 SAVE_REST
326 /* Check for syscall exit trace */
327 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
328 jz int_signal
329 pushq %rdi
330 leaq 8(%rsp),%rdi # &ptregs -> arg1
331 call syscall_trace_leave
332 popq %rdi
333 btr $TIF_SYSCALL_TRACE,%edi
334 btr $TIF_SYSCALL_AUDIT,%edi
335 btr $TIF_SINGLESTEP,%edi
336 jmp int_restore_rest
338 int_signal:
339 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
340 jz 1f
341 movq %rsp,%rdi # &ptregs -> arg1
342 xorl %esi,%esi # oldset -> arg2
343 call do_notify_resume
344 1: movl $_TIF_NEED_RESCHED,%edi
345 int_restore_rest:
346 RESTORE_REST
347 jmp int_with_check
348 CFI_ENDPROC
350 /*
351 * Certain special system calls that need to save a complete full stack frame.
352 */
354 .macro PTREGSCALL label,func,arg
355 .globl \label
356 \label:
357 leaq \func(%rip),%rax
358 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
359 jmp ptregscall_common
360 .endm
362 PTREGSCALL stub_clone, sys_clone, %r8
363 PTREGSCALL stub_fork, sys_fork, %rdi
364 PTREGSCALL stub_vfork, sys_vfork, %rdi
365 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
366 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
367 PTREGSCALL stub_iopl, sys_iopl, %rsi
369 ENTRY(ptregscall_common)
370 CFI_STARTPROC
371 popq %r11
372 CFI_ADJUST_CFA_OFFSET -8
373 SAVE_REST
374 movq %r11, %r15
375 FIXUP_TOP_OF_STACK %r11
376 call *%rax
377 RESTORE_TOP_OF_STACK %r11
378 movq %r15, %r11
379 RESTORE_REST
380 pushq %r11
381 CFI_ADJUST_CFA_OFFSET 8
382 ret
383 CFI_ENDPROC
385 ENTRY(stub_execve)
386 CFI_STARTPROC
387 popq %r11
388 CFI_ADJUST_CFA_OFFSET -8
389 SAVE_REST
390 movq %r11, %r15
391 FIXUP_TOP_OF_STACK %r11
392 call sys_execve
393 GET_THREAD_INFO(%rcx)
394 bt $TIF_IA32,threadinfo_flags(%rcx)
395 jc exec_32bit
396 RESTORE_TOP_OF_STACK %r11
397 movq %r15, %r11
398 RESTORE_REST
399 push %r11
400 ret
402 exec_32bit:
403 CFI_ADJUST_CFA_OFFSET REST_SKIP
404 movq %rax,RAX(%rsp)
405 RESTORE_REST
406 jmp int_ret_from_sys_call
407 CFI_ENDPROC
409 /*
410 * sigreturn is special because it needs to restore all registers on return.
411 * This cannot be done with SYSRET, so use the IRET return path instead.
412 */
413 ENTRY(stub_rt_sigreturn)
414 CFI_STARTPROC
415 addq $8, %rsp
416 SAVE_REST
417 movq %rsp,%rdi
418 FIXUP_TOP_OF_STACK %r11
419 call sys_rt_sigreturn
420 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
421 RESTORE_REST
422 jmp int_ret_from_sys_call
423 CFI_ENDPROC
426 /*
427 * Interrupt entry/exit.
428 *
429 * Interrupt entry points save only callee clobbered registers in fast path.
430 *
431 * Entry runs with interrupts off.
432 */
434 /* 0(%rsp): interrupt number */
435 .macro interrupt func
436 CFI_STARTPROC simple
437 CFI_DEF_CFA rsp,(SS-RDI)
438 CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
439 CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
440 cld
441 #ifdef CONFIG_DEBUG_INFO
442 SAVE_ALL
443 movq %rsp,%rdi
444 /*
445 * Setup a stack frame pointer. This allows gdb to trace
446 * back to the original stack.
447 */
448 movq %rsp,%rbp
449 CFI_DEF_CFA_REGISTER rbp
450 #else
451 SAVE_ARGS
452 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
453 #endif
454 #if 0 /* For Xen we don't need to do this */
455 testl $3,CS(%rdi)
456 je 1f
457 swapgs
458 #endif
459 1: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count
460 movq %gs:pda_irqstackptr,%rax
461 cmoveq %rax,%rsp
462 pushq %rdi # save old stack
463 call \func
464 .endm
466 retint_check:
467 movl threadinfo_flags(%rcx),%edx
468 andl %edi,%edx
469 jnz retint_careful
470 retint_restore_args:
471 movb EVENT_MASK-REST_SKIP(%rsp), %al
472 notb %al # %al == ~saved_mask
473 XEN_GET_VCPU_INFO(%rsi)
474 andb evtchn_upcall_mask(%rsi),%al
475 andb $1,%al # %al == mask & ~saved_mask
476 jnz restore_all_enable_events # != 0 => reenable event delivery
477 XEN_PUT_VCPU_INFO(%rsi)
479 RESTORE_ARGS 0,8,0
480 testb $3,8(%rsp) # check CS
481 jnz user_mode
482 kernel_mode:
483 orb $3,1*8(%rsp)
484 iretq
485 user_mode:
486 SWITCH_TO_USER 0
488 /* edi: workmask, edx: work */
489 retint_careful:
490 bt $TIF_NEED_RESCHED,%edx
491 jnc retint_signal
492 XEN_UNBLOCK_EVENTS(%rsi)
493 /* sti */
494 pushq %rdi
495 call schedule
496 popq %rdi
497 XEN_BLOCK_EVENTS(%rsi)
498 GET_THREAD_INFO(%rcx)
499 /* cli */
500 jmp retint_check
502 retint_signal:
503 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
504 jz retint_restore_args
505 XEN_UNBLOCK_EVENTS(%rsi)
506 SAVE_REST
507 movq $-1,ORIG_RAX(%rsp)
508 xorq %rsi,%rsi # oldset
509 movq %rsp,%rdi # &pt_regs
510 call do_notify_resume
511 RESTORE_REST
512 XEN_BLOCK_EVENTS(%rsi)
513 movl $_TIF_NEED_RESCHED,%edi
514 GET_THREAD_INFO(%rcx)
515 jmp retint_check
517 #ifdef CONFIG_PREEMPT
518 /* Returning to kernel space. Check if we need preemption */
519 /* rcx: threadinfo. interrupts off. */
520 .p2align
521 retint_kernel:
522 cmpl $0,threadinfo_preempt_count(%rcx)
523 jnz retint_restore_args
524 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
525 jnc retint_restore_args
526 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
527 jc retint_restore_args
528 movl $PREEMPT_ACTIVE,threadinfo_preempt_count(%rcx)
529 /* sti */
530 XEN_UNBLOCK_EVENTS(%rsi)
531 call schedule
532 XEN_BLOCK_EVENTS(%rsi)
533 /* cli */
534 GET_THREAD_INFO(%rcx)
535 movl $0,threadinfo_preempt_count(%rcx)
536 jmp retint_kernel /* check again */
537 #endif
538 CFI_ENDPROC
540 /*
541 * APIC interrupts.
542 */
543 .macro apicinterrupt num,func
544 pushq $\num-256
545 interrupt \func
546 jmp error_entry
547 CFI_ENDPROC
548 .endm
550 #ifdef CONFIG_SMP
551 ENTRY(reschedule_interrupt)
552 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
554 ENTRY(invalidate_interrupt)
555 apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt
557 ENTRY(call_function_interrupt)
558 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
559 #endif
561 #ifdef CONFIG_X86_LOCAL_APIC
562 ENTRY(apic_timer_interrupt)
563 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
565 ENTRY(error_interrupt)
566 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
568 ENTRY(spurious_interrupt)
569 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
570 #endif
572 /*
573 * Exception entry points.
574 */
575 .macro zeroentry sym
576 movq (%rsp),%rcx
577 movq 8(%rsp),%r11
578 addq $0x10,%rsp /* skip rcx and r11 */
579 pushq $0 /* push error code/oldrax */
580 pushq %rax /* push real oldrax to the rdi slot */
581 leaq \sym(%rip),%rax
582 jmp error_entry
583 .endm
585 .macro errorentry sym
586 movq (%rsp),%rcx
587 movq 8(%rsp),%r11
588 addq $0x10,%rsp /* rsp points to the error code */
589 pushq %rax
590 leaq \sym(%rip),%rax
591 jmp error_entry
592 .endm
594 /* error code is on the stack already */
595 /* handle NMI like exceptions that can happen everywhere */
596 .macro paranoidentry sym
597 movq (%rsp),%rcx
598 movq 8(%rsp),%r11
599 addq $0x10,%rsp /* skip rcx and r11 */
600 SAVE_ALL
601 cld
602 movl $1,%ebx
603 movl $MSR_GS_BASE,%ecx
604 rdmsr
605 testl %edx,%edx
606 js 1f
607 /* swapgs */
608 xorl %ebx,%ebx
609 1: movq %rsp,%rdi
610 movq ORIG_RAX(%rsp),%rsi
611 movq $-1,ORIG_RAX(%rsp)
612 call \sym
613 .endm
615 /*
616 * Exception entry point. This expects an error code/orig_rax on the stack
617 * and the exception handler in %rax.
618 */
619 ENTRY(error_entry)
620 CFI_STARTPROC simple
621 CFI_DEF_CFA rsp,(SS-RDI)
622 CFI_REL_OFFSET rsp,(RSP-RDI)
623 CFI_REL_OFFSET rip,(RIP-RDI)
624 /* rdi slot contains rax, oldrax contains error code */
625 cld
626 subq $14*8,%rsp
627 CFI_ADJUST_CFA_OFFSET (14*8)
628 movq %rsi,13*8(%rsp)
629 CFI_REL_OFFSET rsi,RSI
630 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
631 movq %rdx,12*8(%rsp)
632 CFI_REL_OFFSET rdx,RDX
633 movq %rcx,11*8(%rsp)
634 CFI_REL_OFFSET rcx,RCX
635 movq %rsi,10*8(%rsp) /* store rax */
636 CFI_REL_OFFSET rax,RAX
637 movq %r8, 9*8(%rsp)
638 CFI_REL_OFFSET r8,R8
639 movq %r9, 8*8(%rsp)
640 CFI_REL_OFFSET r9,R9
641 movq %r10,7*8(%rsp)
642 CFI_REL_OFFSET r10,R10
643 movq %r11,6*8(%rsp)
644 CFI_REL_OFFSET r11,R11
645 movq %rbx,5*8(%rsp)
646 CFI_REL_OFFSET rbx,RBX
647 movq %rbp,4*8(%rsp)
648 CFI_REL_OFFSET rbp,RBP
649 movq %r12,3*8(%rsp)
650 CFI_REL_OFFSET r12,R12
651 movq %r13,2*8(%rsp)
652 CFI_REL_OFFSET r13,R13
653 movq %r14,1*8(%rsp)
654 CFI_REL_OFFSET r14,R14
655 movq %r15,(%rsp)
656 CFI_REL_OFFSET r15,R15
657 #if 0
658 cmpl $__KERNEL_CS,CS(%rsp)
659 je error_kernelspace
660 #endif
661 error_call_handler:
662 movq %rdi, RDI(%rsp)
663 movq %rsp,%rdi
664 movq ORIG_RAX(%rsp),%rsi # get error code
665 movq $-1,ORIG_RAX(%rsp)
666 call *%rax
667 error_exit:
668 RESTORE_REST
669 /* cli */
670 XEN_BLOCK_EVENTS(%rsi)
671 GET_THREAD_INFO(%rcx)
672 testb $3,CS-ARGOFFSET(%rsp)
673 jz retint_kernel
674 movl threadinfo_flags(%rcx),%edx
675 movl $_TIF_WORK_MASK,%edi
676 andl %edi,%edx
677 jnz retint_careful
678 jmp retint_restore_args
680 error_kernelspace:
681 /*
682 * We need to re-write the logic here because we don't do iretq to
683 * to return to user mode. It's still possible that we get trap/fault
684 * in the kernel (when accessing buffers pointed to by system calls,
685 * for example).
686 *
687 */
688 #if 0
689 incl %ebx
690 /* There are two places in the kernel that can potentially fault with
691 usergs. Handle them here. The exception handlers after
692 iret run with kernel gs again, so don't set the user space flag.
693 B stepping K8s sometimes report an truncated RIP for IRET
694 exceptions returning to compat mode. Check for these here too. */
695 leaq iret_label(%rip),%rbp
696 cmpq %rbp,RIP(%rsp)
697 je error_swapgs
698 movl %ebp,%ebp /* zero extend */
699 cmpq %rbp,RIP(%rsp)
700 je error_swapgs
701 cmpq $gs_change,RIP(%rsp)
702 je error_swapgs
703 jmp error_sti
704 #endif
706 ENTRY(hypervisor_callback)
707 zeroentry do_hypervisor_callback
709 /*
710 * Copied from arch/xen/i386/kernel/entry.S
711 */
712 # A note on the "critical region" in our callback handler.
713 # We want to avoid stacking callback handlers due to events occurring
714 # during handling of the last event. To do this, we keep events disabled
715 # until we've done all processing. HOWEVER, we must enable events before
716 # popping the stack frame (can't be done atomically) and so it would still
717 # be possible to get enough handler activations to overflow the stack.
718 # Although unlikely, bugs of that kind are hard to track down, so we'd
719 # like to avoid the possibility.
720 # So, on entry to the handler we detect whether we interrupted an
721 # existing activation in its critical region -- if so, we pop the current
722 # activation and restart the handler using the previous one.
723 ENTRY(do_hypervisor_callback) # do_hyperviosr_callback(struct *pt_regs)
724 # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
725 # see the correct pointer to the pt_regs
726 addq $8, %rsp # we don't return, adjust the stack frame
727 11: movb $0, EVENT_MASK(%rsp)
728 call evtchn_do_upcall
729 jmp error_exit
731 ALIGN
732 restore_all_enable_events:
733 XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
735 scrit: /**** START OF CRITICAL REGION ****/
736 XEN_TEST_PENDING(%rsi)
737 jnz 14f # process more events if necessary...
738 XEN_PUT_VCPU_INFO(%rsi)
739 RESTORE_ARGS 0,8,0
740 testb $3,8(%rsp) # check CS
741 jnz crit_user_mode
742 orb $3,1*8(%rsp)
743 iretq
744 crit_user_mode:
745 SWITCH_TO_USER 0
747 14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
748 XEN_PUT_VCPU_INFO(%rsi)
749 SAVE_REST
750 movq %rsp,%rdi # set the argument again
751 jmp 11b
752 ecrit: /**** END OF CRITICAL REGION ****/
753 # At this point, unlike on x86-32, we don't do the fixup to simplify the
754 # code and the stack frame is more complex on x86-64.
755 # When the kernel is interrupted in the critical section, the kernel
756 # will do IRET in that case, and everything will be restored at that point,
757 # i.e. it just resumes from the next instruction interrupted with the same context.
759 # Hypervisor uses this for application faults while it executes.
760 ENTRY(failsafe_callback)
761 addq $0x10,%rsp /* skip rcx and r11 */
762 1: movl (%rsp),%ds
763 2: movl 8(%rsp),%es
764 3: movl 16(%rsp),%fs
765 4: movl 24(%rsp),%gs
766 addq $0x20,%rsp /* skip the above selectors */
767 SAVE_ALL
768 jmp error_exit
769 .section .fixup,"ax"; \
770 6: movq $0,(%rsp); \
771 jmp 1b; \
772 7: movq $0,8(%rsp); \
773 jmp 2b; \
774 8: movq $0,16(%rsp); \
775 jmp 3b; \
776 9: movq $0,24(%rsp); \
777 jmp 4b; \
778 .previous; \
779 .section __ex_table,"a";\
780 .align 16; \
781 .quad 1b,6b; \
782 .quad 2b,7b; \
783 .quad 3b,8b; \
784 .quad 4b,9b; \
785 .previous
787 #if 0
788 .section __ex_table,"a"
789 .align 8
790 .quad gs_change,bad_gs
791 .previous
792 .section .fixup,"ax"
793 /* running with kernelgs */
794 bad_gs:
795 /* swapgs */ /* switch back to user gs */
796 xorl %eax,%eax
797 movl %eax,%gs
798 jmp 2b
799 .previous
800 #endif
802 /*
803 * Create a kernel thread.
804 *
805 * C extern interface:
806 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
807 *
808 * asm input arguments:
809 * rdi: fn, rsi: arg, rdx: flags
810 */
811 ENTRY(kernel_thread)
812 CFI_STARTPROC
813 FAKE_STACK_FRAME $child_rip
814 SAVE_ALL
816 # rdi: flags, rsi: usp, rdx: will be &pt_regs
817 movq %rdx,%rdi
818 orq kernel_thread_flags(%rip),%rdi
819 movq $-1, %rsi
820 movq %rsp, %rdx
822 xorl %r8d,%r8d
823 xorl %r9d,%r9d
825 # clone now
826 call do_fork
827 movq %rax,RAX(%rsp)
828 xorl %edi,%edi
830 /*
831 * It isn't worth to check for reschedule here,
832 * so internally to the x86_64 port you can rely on kernel_thread()
833 * not to reschedule the child before returning, this avoids the need
834 * of hacks for example to fork off the per-CPU idle tasks.
835 * [Hopefully no generic code relies on the reschedule -AK]
836 */
837 RESTORE_ALL
838 UNFAKE_STACK_FRAME
839 ret
840 CFI_ENDPROC
843 child_rip:
844 /*
845 * Here we are in the child and the registers are set as they were
846 * at kernel_thread() invocation in the parent.
847 */
848 movq %rdi, %rax
849 movq %rsi, %rdi
850 call *%rax
851 # exit
852 xorq %rdi, %rdi
853 call do_exit
855 /*
856 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
857 *
858 * C extern interface:
859 * extern long execve(char *name, char **argv, char **envp)
860 *
861 * asm input arguments:
862 * rdi: name, rsi: argv, rdx: envp
863 *
864 * We want to fallback into:
865 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
866 *
867 * do_sys_execve asm fallback arguments:
868 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
869 */
870 ENTRY(execve)
871 CFI_STARTPROC
872 FAKE_STACK_FRAME $0
873 SAVE_ALL
874 call sys_execve
875 movq %rax, RAX(%rsp)
876 RESTORE_REST
877 testq %rax,%rax
878 jne 1f
879 jmp int_ret_from_sys_call
880 1: RESTORE_ARGS
881 UNFAKE_STACK_FRAME
882 ret
883 CFI_ENDPROC
886 /*
887 * Copy error_entry because of the different stack frame
888 */
889 ENTRY(page_fault)
890 movq (%rsp),%rcx
891 movq 8(%rsp),%r11
892 addq $0x10,%rsp # now %rsp points to %cr2
893 pushq %rax
894 leaq do_page_fault(%rip),%rax
895 cld
896 subq $13*8,%rsp
897 movq %rdx,12*8(%rsp) # save %rdx
898 movq 13*8(%rsp),%rdx # load rax
899 movq %rcx,11*8(%rsp)
900 movq %rdx,10*8(%rsp) # store rax
901 movq %rsi,13*8(%rsp) # now save %rsi
902 movq 14*8(%rsp),%rdx # load %cr2, 3rd argument
903 movq %r8, 9*8(%rsp)
904 movq %r9, 8*8(%rsp)
905 movq %r10,7*8(%rsp)
906 movq %r11,6*8(%rsp)
907 movq %rbx,5*8(%rsp)
908 movq %rbp,4*8(%rsp)
909 movq %r12,3*8(%rsp)
910 movq %r13,2*8(%rsp)
911 movq %r14,1*8(%rsp)
912 movq %r15,(%rsp)
913 #if 0
914 cmpl $__KERNEL_CS,CS(%rsp)
915 je error_kernelspace
916 #endif
917 /*
918 * 1st and 2nd arguments are set by error_call_handler
919 */
920 jmp error_call_handler
922 ENTRY(coprocessor_error)
923 zeroentry do_coprocessor_error
925 ENTRY(simd_coprocessor_error)
926 zeroentry do_simd_coprocessor_error
928 ENTRY(device_not_available)
929 zeroentry math_state_restore
931 /* runs on exception stack */
932 ENTRY(debug)
933 CFI_STARTPROC
934 pushq $0
935 CFI_ADJUST_CFA_OFFSET 8
936 paranoidentry do_debug
937 /* switch back to process stack to restore the state ptrace touched */
938 movq %rax,%rsp
939 jmp paranoid_exit
940 CFI_ENDPROC
942 #if 0
943 /* runs on exception stack */
944 ENTRY(nmi)
945 CFI_STARTPROC
946 pushq $-1
947 CFI_ADJUST_CFA_OFFSET 8
948 paranoidentry do_nmi
949 /* ebx: no swapgs flag */
950 #endif
951 paranoid_exit:
952 testl %ebx,%ebx /* swapgs needed? */
953 jnz paranoid_restore
954 paranoid_swapgs:
955 /* cli
956 swapgs */
957 paranoid_restore:
958 RESTORE_ALL 8
959 /* iretq */
960 paranoid_userspace:
961 /* cli */
962 GET_THREAD_INFO(%rcx)
963 movl threadinfo_flags(%rcx),%edx
964 testl $_TIF_NEED_RESCHED,%edx
965 jnz paranoid_resched
966 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
967 jnz paranoid_signal
968 jmp paranoid_swapgs
969 paranoid_resched:
970 /* sti */
971 call schedule
972 jmp paranoid_exit
973 paranoid_signal:
974 /* sti */
975 xorl %esi,%esi /* oldset */
976 movq %rsp,%rdi /* &pt_regs */
977 call do_notify_resume
978 jmp paranoid_exit
979 CFI_ENDPROC
981 ENTRY(int3)
982 zeroentry do_int3
984 ENTRY(overflow)
985 zeroentry do_overflow
987 ENTRY(bounds)
988 zeroentry do_bounds
990 ENTRY(invalid_op)
991 zeroentry do_invalid_op
993 ENTRY(coprocessor_segment_overrun)
994 zeroentry do_coprocessor_segment_overrun
996 ENTRY(reserved)
997 zeroentry do_reserved
999 /* runs on exception stack */
1000 ENTRY(double_fault)
1001 CFI_STARTPROC
1002 paranoidentry do_double_fault
1003 movq %rax,%rsp
1004 jmp paranoid_exit
1005 CFI_ENDPROC
1007 ENTRY(invalid_TSS)
1008 errorentry do_invalid_TSS
1010 ENTRY(segment_not_present)
1011 errorentry do_segment_not_present
1013 /* runs on exception stack */
1014 ENTRY(stack_segment)
1015 CFI_STARTPROC
1016 paranoidentry do_stack_segment
1017 movq %rax,%rsp
1018 jmp paranoid_exit
1019 CFI_ENDPROC
1021 ENTRY(general_protection)
1022 errorentry do_general_protection
1024 ENTRY(alignment_check)
1025 errorentry do_alignment_check
1027 ENTRY(divide_error)
1028 zeroentry do_divide_error
1030 ENTRY(spurious_interrupt_bug)
1031 zeroentry do_spurious_interrupt_bug
1033 #ifdef CONFIG_X86_MCE
1034 /* runs on exception stack */
1035 ENTRY(machine_check)
1036 CFI_STARTPROC
1037 pushq $0
1038 CFI_ADJUST_CFA_OFFSET 8
1039 paranoidentry do_machine_check
1040 jmp paranoid_exit
1041 CFI_ENDPROC
1042 #endif
1044 ENTRY(call_debug)
1045 zeroentry do_call_debug