ia64/xen-unstable

view linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S @ 9327:8cc027def1de

linux-x86_64: Sync with native.

From: Jan Beulich <JBeulich@novell.com>
Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author cl349@firebug.cl.cam.ac.uk
date Sat Mar 18 16:30:20 2006 +0000 (2006-03-18)
parents c823e60207f0
children 47dda4fa5d11
line source
1 /*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 *
8 * $Id$
9 *
10 * Jun Nakajima <jun.nakajima@intel.com>
11 * Asit Mallick <asit.k.mallick@intel.com>
12 * Modified for Xen
13 */
15 /*
16 * entry.S contains the system-call and fault low-level handling routines.
17 *
18 * NOTE: This code handles signal-recognition, which happens every time
19 * after an interrupt and after each system call.
20 *
21 * Normal syscalls and interrupts don't save a full stack frame, this is
22 * only done for syscall tracing, signals or fork/exec et.al.
23 *
24 * A note on terminology:
25 * - top of stack: Architecture defined interrupt frame from SS to RIP
26 * at the top of the kernel process stack.
27 * - partial stack frame: partially saved registers upto R11.
28 * - full stack frame: Like partial stack frame, but all register saved.
29 *
30 * TODO:
31 * - schedule it carefully for the final hardware.
32 */
34 #define ASSEMBLY 1
35 #include <linux/config.h>
36 #ifdef CONFIG_DEBUG_INFO
37 #undef CONFIG_DEBUG_INFO
38 #endif
39 #include <linux/linkage.h>
40 #include <asm/segment.h>
41 #include <asm/smp.h>
42 #include <asm/cache.h>
43 #include <asm/errno.h>
44 #include <asm/dwarf2.h>
45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h>
47 #include <asm/msr.h>
48 #include <asm/unistd.h>
49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h>
51 #include <asm/page.h>
52 #include <asm/errno.h>
53 #include <xen/interface/arch-x86_64.h>
54 #include <xen/interface/features.h>
56 #include "irq_vectors.h"
58 #include "xen_entry.S"
60 .code64
62 #ifndef CONFIG_PREEMPT
63 #define retint_kernel retint_restore_args
64 #endif
66 NMI_MASK = 0x80000000
68 /*
69 * C code is not supposed to know about undefined top of stack. Every time
70 * a C function with an pt_regs argument is called from the SYSCALL based
71 * fast path FIXUP_TOP_OF_STACK is needed.
72 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
73 * manipulation.
74 */
76 /* %rsp:at FRAMEEND */
77 .macro FIXUP_TOP_OF_STACK tmp
78 movq $__USER_CS,CS(%rsp)
79 movq $-1,RCX(%rsp)
80 .endm
82 .macro RESTORE_TOP_OF_STACK tmp,offset=0
83 .endm
85 .macro FAKE_STACK_FRAME child_rip
86 /* push in order ss, rsp, eflags, cs, rip */
87 xorl %eax, %eax
88 pushq %rax /* ss */
89 CFI_ADJUST_CFA_OFFSET 8
90 /*CFI_REL_OFFSET ss,0*/
91 pushq %rax /* rsp */
92 CFI_ADJUST_CFA_OFFSET 8
93 CFI_REL_OFFSET rsp,0
94 pushq $(1<<9) /* eflags - interrupts on */
95 CFI_ADJUST_CFA_OFFSET 8
96 /*CFI_REL_OFFSET rflags,0*/
97 pushq $__KERNEL_CS /* cs */
98 CFI_ADJUST_CFA_OFFSET 8
99 /*CFI_REL_OFFSET cs,0*/
100 pushq \child_rip /* rip */
101 CFI_ADJUST_CFA_OFFSET 8
102 CFI_REL_OFFSET rip,0
103 pushq %rax /* orig rax */
104 CFI_ADJUST_CFA_OFFSET 8
105 .endm
107 .macro UNFAKE_STACK_FRAME
108 addq $8*6, %rsp
109 CFI_ADJUST_CFA_OFFSET -(6*8)
110 .endm
112 .macro CFI_DEFAULT_STACK start=1
113 .if \start
114 CFI_STARTPROC simple
115 CFI_DEF_CFA rsp,SS+8
116 .else
117 CFI_DEF_CFA_OFFSET SS+8
118 .endif
119 CFI_REL_OFFSET r15,R15
120 CFI_REL_OFFSET r14,R14
121 CFI_REL_OFFSET r13,R13
122 CFI_REL_OFFSET r12,R12
123 CFI_REL_OFFSET rbp,RBP
124 CFI_REL_OFFSET rbx,RBX
125 CFI_REL_OFFSET r11,R11
126 CFI_REL_OFFSET r10,R10
127 CFI_REL_OFFSET r9,R9
128 CFI_REL_OFFSET r8,R8
129 CFI_REL_OFFSET rax,RAX
130 CFI_REL_OFFSET rcx,RCX
131 CFI_REL_OFFSET rdx,RDX
132 CFI_REL_OFFSET rsi,RSI
133 CFI_REL_OFFSET rdi,RDI
134 CFI_REL_OFFSET rip,RIP
135 /*CFI_REL_OFFSET cs,CS*/
136 /*CFI_REL_OFFSET rflags,EFLAGS*/
137 CFI_REL_OFFSET rsp,RSP
138 /*CFI_REL_OFFSET ss,SS*/
139 .endm
141 /*
142 * Must be consistent with the definition in arch-x86_64.h:
143 * struct iret_context {
144 * u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
145 * };
146 * #define VGCF_IN_SYSCALL (1<<8)
147 */
148 .macro HYPERVISOR_IRET flag
149 testb $3,1*8(%rsp)
150 jnz 2f
151 testl $NMI_MASK,2*8(%rsp)
152 jnz 2f
154 testb $1,(xen_features+XENFEAT_supervisor_mode_kernel)
155 jnz 1f
157 /* Direct iret to kernel space. Correct CS and SS. */
158 orb $3,1*8(%rsp)
159 orb $3,4*8(%rsp)
160 1: iretq
162 2: /* Slow iret via hypervisor. */
163 andl $~NMI_MASK, 16(%rsp)
164 pushq $\flag
165 jmp hypercall_page + (__HYPERVISOR_iret * 32)
166 .endm
168 .macro SWITCH_TO_KERNEL ssoff,adjust=0
169 jc 1f
170 orb $1,\ssoff-\adjust+4(%rsp)
171 1:
172 .endm
174 /*
175 * A newly forked process directly context switches into this.
176 */
177 /* rdi: prev */
178 ENTRY(ret_from_fork)
179 CFI_DEFAULT_STACK
180 call schedule_tail
181 GET_THREAD_INFO(%rcx)
182 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
183 jnz rff_trace
184 rff_action:
185 RESTORE_REST
186 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
187 je int_ret_from_sys_call
188 testl $_TIF_IA32,threadinfo_flags(%rcx)
189 jnz int_ret_from_sys_call
190 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
191 jmp ret_from_sys_call
192 rff_trace:
193 movq %rsp,%rdi
194 call syscall_trace_leave
195 GET_THREAD_INFO(%rcx)
196 jmp rff_action
197 CFI_ENDPROC
199 /*
200 * System call entry. Upto 6 arguments in registers are supported.
201 *
202 * SYSCALL does not save anything on the stack and does not change the
203 * stack pointer.
204 */
206 /*
207 * Register setup:
208 * rax system call number
209 * rdi arg0
210 * rcx return address for syscall/sysret, C arg3
211 * rsi arg1
212 * rdx arg2
213 * r10 arg3 (--> moved to rcx for C)
214 * r8 arg4
215 * r9 arg5
216 * r11 eflags for syscall/sysret, temporary for C
217 * r12-r15,rbp,rbx saved by C code, not touched.
218 *
219 * Interrupts are off on entry.
220 * Only called from user space.
221 *
222 * XXX if we had a free scratch register we could save the RSP into the stack frame
223 * and report it properly in ps. Unfortunately we haven't.
224 */
226 ENTRY(system_call)
227 CFI_STARTPROC simple
228 CFI_DEF_CFA rsp,0
229 CFI_REGISTER rip,rcx
230 /*CFI_REGISTER rflags,r11*/
231 SAVE_ARGS -8,0
232 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
233 XEN_UNBLOCK_EVENTS(%r11)
234 GET_THREAD_INFO(%rcx)
235 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
236 CFI_REMEMBER_STATE
237 jnz tracesys
238 cmpq $__NR_syscall_max,%rax
239 ja badsys
240 movq %r10,%rcx
241 call *sys_call_table(,%rax,8) # XXX: rip relative
242 movq %rax,RAX-ARGOFFSET(%rsp)
243 /*
244 * Syscall return path ending with SYSRET (fast path)
245 * Has incomplete stack frame and undefined top of stack.
246 */
247 .globl ret_from_sys_call
248 ret_from_sys_call:
249 movl $_TIF_ALLWORK_MASK,%edi
250 /* edi: flagmask */
251 sysret_check:
252 GET_THREAD_INFO(%rcx)
253 XEN_BLOCK_EVENTS(%rsi)
254 movl threadinfo_flags(%rcx),%edx
255 andl %edi,%edx
256 CFI_REMEMBER_STATE
257 jnz sysret_careful
258 XEN_UNBLOCK_EVENTS(%rsi)
259 CFI_REGISTER rip,rcx
260 RESTORE_ARGS 0,8,0
261 /*CFI_REGISTER rflags,r11*/
262 HYPERVISOR_IRET VGCF_IN_SYSCALL
264 /* Handle reschedules */
265 /* edx: work, edi: workmask */
266 sysret_careful:
267 CFI_RESTORE_STATE
268 bt $TIF_NEED_RESCHED,%edx
269 jnc sysret_signal
270 XEN_BLOCK_EVENTS(%rsi)
271 pushq %rdi
272 CFI_ADJUST_CFA_OFFSET 8
273 call schedule
274 popq %rdi
275 CFI_ADJUST_CFA_OFFSET -8
276 jmp sysret_check
278 /* Handle a signal */
279 sysret_signal:
280 /* sti */
281 XEN_UNBLOCK_EVENTS(%rsi)
282 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
283 jz 1f
285 /* Really a signal */
286 /* edx: work flags (arg3) */
287 leaq do_notify_resume(%rip),%rax
288 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
289 xorl %esi,%esi # oldset -> arg2
290 call ptregscall_common
291 1: movl $_TIF_NEED_RESCHED,%edi
292 jmp sysret_check
294 badsys:
295 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
296 jmp ret_from_sys_call
298 /* Do syscall tracing */
299 tracesys:
300 CFI_RESTORE_STATE
301 SAVE_REST
302 movq $-ENOSYS,RAX(%rsp)
303 FIXUP_TOP_OF_STACK %rdi
304 movq %rsp,%rdi
305 call syscall_trace_enter
306 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
307 RESTORE_REST
308 cmpq $__NR_syscall_max,%rax
309 ja 1f
310 movq %r10,%rcx /* fixup for C */
311 call *sys_call_table(,%rax,8)
312 movq %rax,RAX-ARGOFFSET(%rsp)
313 1: SAVE_REST
314 movq %rsp,%rdi
315 call syscall_trace_leave
316 RESTORE_TOP_OF_STACK %rbx
317 RESTORE_REST
318 jmp ret_from_sys_call
319 CFI_ENDPROC
321 /*
322 * Syscall return path ending with IRET.
323 * Has correct top of stack, but partial stack frame.
324 */
325 ENTRY(int_ret_from_sys_call)
326 CFI_STARTPROC simple
327 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
328 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
329 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
330 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
331 /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
332 CFI_REL_OFFSET rip,RIP-ARGOFFSET
333 CFI_REL_OFFSET rdx,RDX-ARGOFFSET
334 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
335 CFI_REL_OFFSET rax,RAX-ARGOFFSET
336 CFI_REL_OFFSET rdi,RDI-ARGOFFSET
337 CFI_REL_OFFSET rsi,RSI-ARGOFFSET
338 CFI_REL_OFFSET r8,R8-ARGOFFSET
339 CFI_REL_OFFSET r9,R9-ARGOFFSET
340 CFI_REL_OFFSET r10,R10-ARGOFFSET
341 CFI_REL_OFFSET r11,R11-ARGOFFSET
342 XEN_BLOCK_EVENTS(%rsi)
343 testb $3,CS-ARGOFFSET(%rsp)
344 jnz 1f
345 /* Need to set the proper %ss (not NULL) for ring 3 iretq */
346 movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
347 jmp retint_restore_args # retrun from ring3 kernel
348 1:
349 movl $_TIF_ALLWORK_MASK,%edi
350 /* edi: mask to check */
351 int_with_check:
352 GET_THREAD_INFO(%rcx)
353 movl threadinfo_flags(%rcx),%edx
354 andl %edi,%edx
355 jnz int_careful
356 andl $~TS_COMPAT,threadinfo_status(%rcx)
357 jmp retint_restore_args
359 /* Either reschedule or signal or syscall exit tracking needed. */
360 /* First do a reschedule test. */
361 /* edx: work, edi: workmask */
362 int_careful:
363 bt $TIF_NEED_RESCHED,%edx
364 jnc int_very_careful
365 /* sti */
366 XEN_UNBLOCK_EVENTS(%rsi)
367 pushq %rdi
368 CFI_ADJUST_CFA_OFFSET 8
369 call schedule
370 popq %rdi
371 CFI_ADJUST_CFA_OFFSET -8
372 cli
373 jmp int_with_check
375 /* handle signals and tracing -- both require a full stack frame */
376 int_very_careful:
377 /* sti */
378 XEN_UNBLOCK_EVENTS(%rsi)
379 SAVE_REST
380 /* Check for syscall exit trace */
381 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
382 jz int_signal
383 pushq %rdi
384 CFI_ADJUST_CFA_OFFSET 8
385 leaq 8(%rsp),%rdi # &ptregs -> arg1
386 call syscall_trace_leave
387 popq %rdi
388 CFI_ADJUST_CFA_OFFSET -8
389 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
390 cli
391 jmp int_restore_rest
393 int_signal:
394 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
395 jz 1f
396 movq %rsp,%rdi # &ptregs -> arg1
397 xorl %esi,%esi # oldset -> arg2
398 call do_notify_resume
399 1: movl $_TIF_NEED_RESCHED,%edi
400 int_restore_rest:
401 RESTORE_REST
402 cli
403 jmp int_with_check
404 CFI_ENDPROC
406 /*
407 * Certain special system calls that need to save a complete full stack frame.
408 */
410 .macro PTREGSCALL label,func,arg
411 .globl \label
412 \label:
413 leaq \func(%rip),%rax
414 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
415 jmp ptregscall_common
416 .endm
418 CFI_STARTPROC
420 PTREGSCALL stub_clone, sys_clone, %r8
421 PTREGSCALL stub_fork, sys_fork, %rdi
422 PTREGSCALL stub_vfork, sys_vfork, %rdi
423 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
424 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
425 PTREGSCALL stub_iopl, sys_iopl, %rsi
427 ENTRY(ptregscall_common)
428 popq %r11
429 CFI_ADJUST_CFA_OFFSET -8
430 CFI_REGISTER rip, r11
431 SAVE_REST
432 movq %r11, %r15
433 CFI_REGISTER rip, r15
434 FIXUP_TOP_OF_STACK %r11
435 call *%rax
436 RESTORE_TOP_OF_STACK %r11
437 movq %r15, %r11
438 CFI_REGISTER rip, r11
439 RESTORE_REST
440 pushq %r11
441 CFI_ADJUST_CFA_OFFSET 8
442 CFI_REL_OFFSET rip, 0
443 ret
444 CFI_ENDPROC
446 ENTRY(stub_execve)
447 CFI_STARTPROC
448 popq %r11
449 CFI_ADJUST_CFA_OFFSET -8
450 CFI_REGISTER rip, r11
451 SAVE_REST
452 movq %r11, %r15
453 CFI_REGISTER rip, r15
454 FIXUP_TOP_OF_STACK %r11
455 call sys_execve
456 GET_THREAD_INFO(%rcx)
457 bt $TIF_IA32,threadinfo_flags(%rcx)
458 CFI_REMEMBER_STATE
459 jc exec_32bit
460 RESTORE_TOP_OF_STACK %r11
461 movq %r15, %r11
462 CFI_REGISTER rip, r11
463 RESTORE_REST
464 pushq %r11
465 CFI_ADJUST_CFA_OFFSET 8
466 CFI_REL_OFFSET rip, 0
467 ret
469 exec_32bit:
470 CFI_RESTORE_STATE
471 movq %rax,RAX(%rsp)
472 RESTORE_REST
473 jmp int_ret_from_sys_call
474 CFI_ENDPROC
476 /*
477 * sigreturn is special because it needs to restore all registers on return.
478 * This cannot be done with SYSRET, so use the IRET return path instead.
479 */
480 ENTRY(stub_rt_sigreturn)
481 CFI_STARTPROC
482 addq $8, %rsp
483 CFI_ADJUST_CFA_OFFSET -8
484 SAVE_REST
485 movq %rsp,%rdi
486 FIXUP_TOP_OF_STACK %r11
487 call sys_rt_sigreturn
488 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
489 RESTORE_REST
490 jmp int_ret_from_sys_call
491 CFI_ENDPROC
493 /*
494 * initial frame state for interrupts and exceptions
495 */
496 .macro _frame ref
497 CFI_STARTPROC simple
498 CFI_DEF_CFA rsp,SS+8-\ref
499 /*CFI_REL_OFFSET ss,SS-\ref*/
500 CFI_REL_OFFSET rsp,RSP-\ref
501 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
502 /*CFI_REL_OFFSET cs,CS-\ref*/
503 CFI_REL_OFFSET rip,RIP-\ref
504 .endm
506 /* initial frame state for interrupts (and exceptions without error code) */
507 #define INTR_FRAME _frame RIP
508 /* initial frame state for exceptions with error code (and interrupts with
509 vector already pushed) */
510 #define XCPT_FRAME _frame ORIG_RAX
512 /*
513 * Interrupt exit.
514 *
515 */
517 retint_check:
518 movl threadinfo_flags(%rcx),%edx
519 andl %edi,%edx
520 CFI_REMEMBER_STATE
521 jnz retint_careful
522 retint_restore_args:
523 movb EVENT_MASK-REST_SKIP(%rsp), %al
524 notb %al # %al == ~saved_mask
525 XEN_GET_VCPU_INFO(%rsi)
526 andb evtchn_upcall_mask(%rsi),%al
527 andb $1,%al # %al == mask & ~saved_mask
528 jnz restore_all_enable_events # != 0 => reenable event delivery
529 XEN_PUT_VCPU_INFO(%rsi)
531 RESTORE_ARGS 0,8,0
532 HYPERVISOR_IRET 0
534 /* edi: workmask, edx: work */
535 retint_careful:
536 CFI_RESTORE_STATE
537 bt $TIF_NEED_RESCHED,%edx
538 jnc retint_signal
539 XEN_UNBLOCK_EVENTS(%rsi)
540 /* sti */
541 pushq %rdi
542 CFI_ADJUST_CFA_OFFSET 8
543 call schedule
544 popq %rdi
545 CFI_ADJUST_CFA_OFFSET -8
546 XEN_BLOCK_EVENTS(%rsi)
547 GET_THREAD_INFO(%rcx)
548 /* cli */
549 jmp retint_check
551 retint_signal:
552 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
553 jz retint_restore_args
554 XEN_UNBLOCK_EVENTS(%rsi)
555 SAVE_REST
556 movq $-1,ORIG_RAX(%rsp)
557 xorl %esi,%esi # oldset
558 movq %rsp,%rdi # &pt_regs
559 call do_notify_resume
560 RESTORE_REST
561 XEN_BLOCK_EVENTS(%rsi)
562 movl $_TIF_NEED_RESCHED,%edi
563 GET_THREAD_INFO(%rcx)
564 jmp retint_check
566 #ifdef CONFIG_PREEMPT
567 /* Returning to kernel space. Check if we need preemption */
568 /* rcx: threadinfo. interrupts off. */
569 .p2align
570 retint_kernel:
571 cmpl $0,threadinfo_preempt_count(%rcx)
572 jnz retint_restore_args
573 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
574 jnc retint_restore_args
575 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
576 jnc retint_restore_args
577 call preempt_schedule_irq
578 jmp retint_kernel /* check again */
579 #endif
580 CFI_ENDPROC
582 /*
583 * APIC interrupts.
584 */
585 .macro apicinterrupt num,func
586 INTR_FRAME
587 pushq $\num-256
588 CFI_ADJUST_CFA_OFFSET 8
589 interrupt \func
590 jmp error_entry
591 CFI_ENDPROC
592 .endm
594 #ifndef CONFIG_XEN
595 ENTRY(thermal_interrupt)
596 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
598 ENTRY(threshold_interrupt)
599 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
601 #ifdef CONFIG_SMP
602 ENTRY(reschedule_interrupt)
603 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
605 .macro INVALIDATE_ENTRY num
606 ENTRY(invalidate_interrupt\num)
607 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
608 .endm
610 INVALIDATE_ENTRY 0
611 INVALIDATE_ENTRY 1
612 INVALIDATE_ENTRY 2
613 INVALIDATE_ENTRY 3
614 INVALIDATE_ENTRY 4
615 INVALIDATE_ENTRY 5
616 INVALIDATE_ENTRY 6
617 INVALIDATE_ENTRY 7
619 ENTRY(call_function_interrupt)
620 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
621 #endif
623 #ifdef CONFIG_X86_LOCAL_APIC
624 ENTRY(apic_timer_interrupt)
625 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
627 ENTRY(error_interrupt)
628 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
630 ENTRY(spurious_interrupt)
631 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
632 #endif
633 #endif /* !CONFIG_XEN */
635 /*
636 * Exception entry points.
637 */
638 .macro zeroentry sym
639 INTR_FRAME
640 movq (%rsp),%rcx
641 movq 8(%rsp),%r11
642 addq $0x10,%rsp /* skip rcx and r11 */
643 pushq $0 /* push error code/oldrax */
644 CFI_ADJUST_CFA_OFFSET 8
645 pushq %rax /* push real oldrax to the rdi slot */
646 CFI_ADJUST_CFA_OFFSET 8
647 leaq \sym(%rip),%rax
648 jmp error_entry
649 CFI_ENDPROC
650 .endm
652 .macro errorentry sym
653 XCPT_FRAME
654 movq (%rsp),%rcx
655 movq 8(%rsp),%r11
656 addq $0x10,%rsp /* rsp points to the error code */
657 pushq %rax
658 CFI_ADJUST_CFA_OFFSET 8
659 leaq \sym(%rip),%rax
660 jmp error_entry
661 CFI_ENDPROC
662 .endm
664 #if 0 /* not XEN */
665 /* error code is on the stack already */
666 /* handle NMI like exceptions that can happen everywhere */
667 .macro paranoidentry sym, ist=0
668 movq (%rsp),%rcx
669 movq 8(%rsp),%r11
670 addq $0x10,%rsp /* skip rcx and r11 */
671 SAVE_ALL
672 cld
673 #if 0 /* not XEN */
674 movl $1,%ebx
675 movl $MSR_GS_BASE,%ecx
676 rdmsr
677 testl %edx,%edx
678 js 1f
679 swapgs
680 xorl %ebx,%ebx
681 1:
682 #endif
683 .if \ist
684 movq %gs:pda_data_offset, %rbp
685 .endif
686 movq %rsp,%rdi
687 movq ORIG_RAX(%rsp),%rsi
688 movq $-1,ORIG_RAX(%rsp)
689 .if \ist
690 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
691 .endif
692 call \sym
693 .if \ist
694 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
695 .endif
696 /* cli */
697 XEN_BLOCK_EVENTS(%rsi)
698 .endm
699 #endif
701 /*
702 * Exception entry point. This expects an error code/orig_rax on the stack
703 * and the exception handler in %rax.
704 */
705 ENTRY(error_entry)
706 _frame RDI
707 /* rdi slot contains rax, oldrax contains error code */
708 cld
709 subq $14*8,%rsp
710 CFI_ADJUST_CFA_OFFSET (14*8)
711 movq %rsi,13*8(%rsp)
712 CFI_REL_OFFSET rsi,RSI
713 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
714 movq %rdx,12*8(%rsp)
715 CFI_REL_OFFSET rdx,RDX
716 movq %rcx,11*8(%rsp)
717 CFI_REL_OFFSET rcx,RCX
718 movq %rsi,10*8(%rsp) /* store rax */
719 CFI_REL_OFFSET rax,RAX
720 movq %r8, 9*8(%rsp)
721 CFI_REL_OFFSET r8,R8
722 movq %r9, 8*8(%rsp)
723 CFI_REL_OFFSET r9,R9
724 movq %r10,7*8(%rsp)
725 CFI_REL_OFFSET r10,R10
726 movq %r11,6*8(%rsp)
727 CFI_REL_OFFSET r11,R11
728 movq %rbx,5*8(%rsp)
729 CFI_REL_OFFSET rbx,RBX
730 movq %rbp,4*8(%rsp)
731 CFI_REL_OFFSET rbp,RBP
732 movq %r12,3*8(%rsp)
733 CFI_REL_OFFSET r12,R12
734 movq %r13,2*8(%rsp)
735 CFI_REL_OFFSET r13,R13
736 movq %r14,1*8(%rsp)
737 CFI_REL_OFFSET r14,R14
738 movq %r15,(%rsp)
739 CFI_REL_OFFSET r15,R15
740 #if 0
741 cmpl $__KERNEL_CS,CS(%rsp)
742 je error_kernelspace
743 #endif
744 error_call_handler:
745 movq %rdi, RDI(%rsp)
746 movq %rsp,%rdi
747 movq ORIG_RAX(%rsp),%rsi # get error code
748 movq $-1,ORIG_RAX(%rsp)
749 call *%rax
750 error_exit:
751 RESTORE_REST
752 /* cli */
753 XEN_BLOCK_EVENTS(%rsi)
754 GET_THREAD_INFO(%rcx)
755 testb $3,CS-ARGOFFSET(%rsp)
756 jz retint_kernel
757 movl threadinfo_flags(%rcx),%edx
758 movl $_TIF_WORK_MASK,%edi
759 andl %edi,%edx
760 jnz retint_careful
761 jmp retint_restore_args
763 error_kernelspace:
764 /*
765 * We need to re-write the logic here because we don't do iretq to
766 * to return to user mode. It's still possible that we get trap/fault
767 * in the kernel (when accessing buffers pointed to by system calls,
768 * for example).
769 *
770 */
771 #if 0
772 incl %ebx
773 /* There are two places in the kernel that can potentially fault with
774 usergs. Handle them here. The exception handlers after
775 iret run with kernel gs again, so don't set the user space flag.
776 B stepping K8s sometimes report an truncated RIP for IRET
777 exceptions returning to compat mode. Check for these here too. */
778 leaq iret_label(%rip),%rbp
779 cmpq %rbp,RIP(%rsp)
780 je error_swapgs
781 movl %ebp,%ebp /* zero extend */
782 cmpq %rbp,RIP(%rsp)
783 je error_swapgs
784 cmpq $gs_change,RIP(%rsp)
785 je error_swapgs
786 jmp error_sti
787 #endif
789 ENTRY(hypervisor_callback)
790 zeroentry do_hypervisor_callback
792 /*
793 * Copied from arch/xen/i386/kernel/entry.S
794 */
795 # A note on the "critical region" in our callback handler.
796 # We want to avoid stacking callback handlers due to events occurring
797 # during handling of the last event. To do this, we keep events disabled
798 # until we've done all processing. HOWEVER, we must enable events before
799 # popping the stack frame (can't be done atomically) and so it would still
800 # be possible to get enough handler activations to overflow the stack.
801 # Although unlikely, bugs of that kind are hard to track down, so we'd
802 # like to avoid the possibility.
803 # So, on entry to the handler we detect whether we interrupted an
804 # existing activation in its critical region -- if so, we pop the current
805 # activation and restart the handler using the previous one.
806 ENTRY(do_hypervisor_callback) # do_hyperviosr_callback(struct *pt_regs)
807 # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
808 # see the correct pointer to the pt_regs
809 movq %rdi, %rsp # we don't return, adjust the stack frame
810 11: movb $0, EVENT_MASK(%rdi)
811 movq %gs:pda_irqstackptr,%rax
812 incl %gs:pda_irqcount
813 cmovzq %rax,%rsp
814 pushq %rdi
815 call evtchn_do_upcall
816 popq %rsp
817 decl %gs:pda_irqcount
818 jmp error_exit
820 #ifdef CONFIG_X86_LOCAL_APIC
821 KPROBE_ENTRY(nmi)
822 zeroentry do_nmi_callback
823 ENTRY(do_nmi_callback)
824 addq $8, %rsp
825 call do_nmi
826 orl $NMI_MASK,EFLAGS(%rsp)
827 RESTORE_REST
828 XEN_BLOCK_EVENTS(%rsi)
829 GET_THREAD_INFO(%rcx)
830 jmp retint_restore_args
831 .previous .text
832 #endif
834 ALIGN
835 restore_all_enable_events:
836 XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
838 scrit: /**** START OF CRITICAL REGION ****/
839 XEN_TEST_PENDING(%rsi)
840 jnz 14f # process more events if necessary...
841 XEN_PUT_VCPU_INFO(%rsi)
842 RESTORE_ARGS 0,8,0
843 HYPERVISOR_IRET 0
845 14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
846 XEN_PUT_VCPU_INFO(%rsi)
847 SAVE_REST
848 movq %rsp,%rdi # set the argument again
849 jmp 11b
850 ecrit: /**** END OF CRITICAL REGION ****/
851 # At this point, unlike on x86-32, we don't do the fixup to simplify the
852 # code and the stack frame is more complex on x86-64.
853 # When the kernel is interrupted in the critical section, the kernel
854 # will do IRET in that case, and everything will be restored at that point,
855 # i.e. it just resumes from the next instruction interrupted with the same context.
857 # Hypervisor uses this for application faults while it executes.
858 ENTRY(failsafe_callback)
859 addq $0x10,%rsp /* skip rcx and r11 */
860 1: mov (%rsp),%ds
861 2: mov 8(%rsp),%es
862 3: mov 16(%rsp),%fs
863 4: mov 24(%rsp),%gs
864 addq $0x20,%rsp /* skip the above selectors */
865 SAVE_ALL
866 jmp error_exit
867 .section .fixup,"ax"; \
868 6: movq $0,(%rsp); \
869 jmp 1b; \
870 7: movq $0,8(%rsp); \
871 jmp 2b; \
872 8: movq $0,16(%rsp); \
873 jmp 3b; \
874 9: movq $0,24(%rsp); \
875 jmp 4b; \
876 .previous; \
877 .section __ex_table,"a";\
878 .align 16; \
879 .quad 1b,6b; \
880 .quad 2b,7b; \
881 .quad 3b,8b; \
882 .quad 4b,9b; \
883 .previous
885 #if 0
886 .section __ex_table,"a"
887 .align 8
888 .quad gs_change,bad_gs
889 .previous
890 .section .fixup,"ax"
891 /* running with kernelgs */
892 bad_gs:
893 /* swapgs */ /* switch back to user gs */
894 xorl %eax,%eax
895 movl %eax,%gs
896 jmp 2b
897 .previous
898 #endif
900 /*
901 * Create a kernel thread.
902 *
903 * C extern interface:
904 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
905 *
906 * asm input arguments:
907 * rdi: fn, rsi: arg, rdx: flags
908 */
909 ENTRY(kernel_thread)
910 CFI_STARTPROC
911 FAKE_STACK_FRAME $child_rip
912 SAVE_ALL
914 # rdi: flags, rsi: usp, rdx: will be &pt_regs
915 movq %rdx,%rdi
916 orq kernel_thread_flags(%rip),%rdi
917 movq $-1, %rsi
918 movq %rsp, %rdx
920 xorl %r8d,%r8d
921 xorl %r9d,%r9d
923 # clone now
924 call do_fork
925 movq %rax,RAX(%rsp)
926 xorl %edi,%edi
928 /*
929 * It isn't worth to check for reschedule here,
930 * so internally to the x86_64 port you can rely on kernel_thread()
931 * not to reschedule the child before returning, this avoids the need
932 * of hacks for example to fork off the per-CPU idle tasks.
933 * [Hopefully no generic code relies on the reschedule -AK]
934 */
935 RESTORE_ALL
936 UNFAKE_STACK_FRAME
937 ret
938 CFI_ENDPROC
941 child_rip:
942 /*
943 * Here we are in the child and the registers are set as they were
944 * at kernel_thread() invocation in the parent.
945 */
946 movq %rdi, %rax
947 movq %rsi, %rdi
948 call *%rax
949 # exit
950 xorl %edi, %edi
951 call do_exit
953 /*
954 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
955 *
956 * C extern interface:
957 * extern long execve(char *name, char **argv, char **envp)
958 *
959 * asm input arguments:
960 * rdi: name, rsi: argv, rdx: envp
961 *
962 * We want to fallback into:
963 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
964 *
965 * do_sys_execve asm fallback arguments:
966 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
967 */
968 ENTRY(execve)
969 CFI_STARTPROC
970 FAKE_STACK_FRAME $0
971 SAVE_ALL
972 call sys_execve
973 movq %rax, RAX(%rsp)
974 RESTORE_REST
975 testq %rax,%rax
976 jne 1f
977 jmp int_ret_from_sys_call
978 1: RESTORE_ARGS
979 UNFAKE_STACK_FRAME
980 ret
981 CFI_ENDPROC
983 KPROBE_ENTRY(page_fault)
984 errorentry do_page_fault
985 .previous .text
987 ENTRY(coprocessor_error)
988 zeroentry do_coprocessor_error
990 ENTRY(simd_coprocessor_error)
991 zeroentry do_simd_coprocessor_error
993 ENTRY(device_not_available)
994 zeroentry math_state_restore
996 /* runs on exception stack */
997 KPROBE_ENTRY(debug)
998 INTR_FRAME
999 /* pushq $0
1000 CFI_ADJUST_CFA_OFFSET 8 */
1001 zeroentry do_debug
1002 /* jmp paranoid_exit */
1003 CFI_ENDPROC
1004 .previous .text
1006 #if 0
1007 /* runs on exception stack */
1008 KPROBE_ENTRY(nmi)
1009 INTR_FRAME
1010 pushq $-1
1011 CFI_ADJUST_CFA_OFFSET 8
1012 paranoidentry do_nmi
1013 /*
1014 * "Paranoid" exit path from exception stack.
1015 * Paranoid because this is used by NMIs and cannot take
1016 * any kernel state for granted.
1017 * We don't do kernel preemption checks here, because only
1018 * NMI should be common and it does not enable IRQs and
1019 * cannot get reschedule ticks.
1020 */
1021 /* ebx: no swapgs flag */
1022 paranoid_exit:
1023 testl %ebx,%ebx /* swapgs needed? */
1024 jnz paranoid_restore
1025 testl $3,CS(%rsp)
1026 jnz paranoid_userspace
1027 paranoid_swapgs:
1028 swapgs
1029 paranoid_restore:
1030 RESTORE_ALL 8
1031 iretq
1032 paranoid_userspace:
1033 GET_THREAD_INFO(%rcx)
1034 movl threadinfo_flags(%rcx),%ebx
1035 andl $_TIF_WORK_MASK,%ebx
1036 jz paranoid_swapgs
1037 movq %rsp,%rdi /* &pt_regs */
1038 call sync_regs
1039 movq %rax,%rsp /* switch stack for scheduling */
1040 testl $_TIF_NEED_RESCHED,%ebx
1041 jnz paranoid_schedule
1042 movl %ebx,%edx /* arg3: thread flags */
1043 sti
1044 xorl %esi,%esi /* arg2: oldset */
1045 movq %rsp,%rdi /* arg1: &pt_regs */
1046 call do_notify_resume
1047 cli
1048 jmp paranoid_userspace
1049 paranoid_schedule:
1050 sti
1051 call schedule
1052 cli
1053 jmp paranoid_userspace
1054 CFI_ENDPROC
1055 .previous .text
1056 #endif
1058 KPROBE_ENTRY(int3)
1059 INTR_FRAME
1060 /* pushq $0
1061 CFI_ADJUST_CFA_OFFSET 8 */
1062 zeroentry do_int3
1063 /* jmp paranoid_exit */
1064 CFI_ENDPROC
1065 .previous .text
1067 ENTRY(overflow)
1068 zeroentry do_overflow
1070 ENTRY(bounds)
1071 zeroentry do_bounds
1073 ENTRY(invalid_op)
1074 zeroentry do_invalid_op
1076 ENTRY(coprocessor_segment_overrun)
1077 zeroentry do_coprocessor_segment_overrun
1079 ENTRY(reserved)
1080 zeroentry do_reserved
1082 #if 0
1083 /* runs on exception stack */
1084 ENTRY(double_fault)
1085 XCPT_FRAME
1086 paranoidentry do_double_fault
1087 jmp paranoid_exit
1088 CFI_ENDPROC
1089 #endif
1091 ENTRY(invalid_TSS)
1092 errorentry do_invalid_TSS
1094 ENTRY(segment_not_present)
1095 errorentry do_segment_not_present
1097 /* runs on exception stack */
1098 ENTRY(stack_segment)
1099 XCPT_FRAME
1100 errorentry do_stack_segment
1101 CFI_ENDPROC
1103 KPROBE_ENTRY(general_protection)
1104 errorentry do_general_protection
1105 .previous .text
1107 ENTRY(alignment_check)
1108 errorentry do_alignment_check
1110 ENTRY(divide_error)
1111 zeroentry do_divide_error
1113 ENTRY(spurious_interrupt_bug)
1114 zeroentry do_spurious_interrupt_bug
1116 #ifdef CONFIG_X86_MCE
1117 /* runs on exception stack */
1118 ENTRY(machine_check)
1119 INTR_FRAME
1120 pushq $0
1121 CFI_ADJUST_CFA_OFFSET 8
1122 paranoidentry do_machine_check
1123 jmp paranoid_exit
1124 CFI_ENDPROC
1125 #endif
1127 ENTRY(call_softirq)
1128 CFI_STARTPROC
1129 movq %gs:pda_irqstackptr,%rax
1130 movq %rsp,%rdx
1131 CFI_DEF_CFA_REGISTER rdx
1132 incl %gs:pda_irqcount
1133 cmove %rax,%rsp
1134 pushq %rdx
1135 /*todo CFI_DEF_CFA_EXPRESSION ...*/
1136 call __do_softirq
1137 popq %rsp
1138 CFI_DEF_CFA_REGISTER rsp
1139 decl %gs:pda_irqcount
1140 ret
1141 CFI_ENDPROC