ia64/xen-unstable

view linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S @ 13978:477a3bde3b61

linux: miscellaneous changes after upgrade through 2.6.17.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
author kfraser@localhost.localdomain
date Thu Feb 15 14:12:32 2007 +0000 (2007-02-15)
parents 3adf00179a63
children b1f33c3a97fa
line source
1 /*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 *
8 * $Id$
9 *
10 * Jun Nakajima <jun.nakajima@intel.com>
11 * Asit Mallick <asit.k.mallick@intel.com>
12 * Modified for Xen
13 */
15 /*
16 * entry.S contains the system-call and fault low-level handling routines.
17 *
18 * NOTE: This code handles signal-recognition, which happens every time
19 * after an interrupt and after each system call.
20 *
21 * Normal syscalls and interrupts don't save a full stack frame, this is
22 * only done for syscall tracing, signals or fork/exec et.al.
23 *
24 * A note on terminology:
25 * - top of stack: Architecture defined interrupt frame from SS to RIP
26 * at the top of the kernel process stack.
27 * - partial stack frame: partially saved registers upto R11.
28 * - full stack frame: Like partial stack frame, but all register saved.
29 *
30 * TODO:
31 * - schedule it carefully for the final hardware.
32 */
34 #define ASSEMBLY 1
35 #include <linux/linkage.h>
36 #include <asm/segment.h>
37 #include <asm/smp.h>
38 #include <asm/cache.h>
39 #include <asm/errno.h>
40 #include <asm/dwarf2.h>
41 #include <asm/calling.h>
42 #include <asm/asm-offsets.h>
43 #include <asm/msr.h>
44 #include <asm/unistd.h>
45 #include <asm/thread_info.h>
46 #include <asm/hw_irq.h>
47 #include <asm/page.h>
48 #include <asm/irqflags.h>
49 #include <asm/errno.h>
50 #include <xen/interface/arch-x86_64.h>
51 #include <xen/interface/features.h>
53 #include "irq_vectors.h"
55 #include "xen_entry.S"
57 .code64
59 #ifndef CONFIG_PREEMPT
60 #define retint_kernel retint_restore_args
61 #endif
64 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
65 #ifdef CONFIG_TRACE_IRQFLAGS
66 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
67 jnc 1f
68 TRACE_IRQS_ON
69 1:
70 #endif
71 .endm
73 NMI_MASK = 0x80000000
75 /*
76 * C code is not supposed to know about undefined top of stack. Every time
77 * a C function with an pt_regs argument is called from the SYSCALL based
78 * fast path FIXUP_TOP_OF_STACK is needed.
79 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
80 * manipulation.
81 */
83 /* %rsp:at FRAMEEND */
84 .macro FIXUP_TOP_OF_STACK tmp
85 movq $__USER_CS,CS(%rsp)
86 movq $-1,RCX(%rsp)
87 .endm
89 .macro RESTORE_TOP_OF_STACK tmp,offset=0
90 .endm
92 .macro FAKE_STACK_FRAME child_rip
93 /* push in order ss, rsp, eflags, cs, rip */
94 xorl %eax, %eax
95 pushq %rax /* ss */
96 CFI_ADJUST_CFA_OFFSET 8
97 /*CFI_REL_OFFSET ss,0*/
98 pushq %rax /* rsp */
99 CFI_ADJUST_CFA_OFFSET 8
100 CFI_REL_OFFSET rsp,0
101 pushq $(1<<9) /* eflags - interrupts on */
102 CFI_ADJUST_CFA_OFFSET 8
103 /*CFI_REL_OFFSET rflags,0*/
104 pushq $__KERNEL_CS /* cs */
105 CFI_ADJUST_CFA_OFFSET 8
106 /*CFI_REL_OFFSET cs,0*/
107 pushq \child_rip /* rip */
108 CFI_ADJUST_CFA_OFFSET 8
109 CFI_REL_OFFSET rip,0
110 pushq %rax /* orig rax */
111 CFI_ADJUST_CFA_OFFSET 8
112 .endm
114 .macro UNFAKE_STACK_FRAME
115 addq $8*6, %rsp
116 CFI_ADJUST_CFA_OFFSET -(6*8)
117 .endm
119 .macro CFI_DEFAULT_STACK start=1
120 .if \start
121 CFI_STARTPROC simple
122 CFI_DEF_CFA rsp,SS+8
123 .else
124 CFI_DEF_CFA_OFFSET SS+8
125 .endif
126 CFI_REL_OFFSET r15,R15
127 CFI_REL_OFFSET r14,R14
128 CFI_REL_OFFSET r13,R13
129 CFI_REL_OFFSET r12,R12
130 CFI_REL_OFFSET rbp,RBP
131 CFI_REL_OFFSET rbx,RBX
132 CFI_REL_OFFSET r11,R11
133 CFI_REL_OFFSET r10,R10
134 CFI_REL_OFFSET r9,R9
135 CFI_REL_OFFSET r8,R8
136 CFI_REL_OFFSET rax,RAX
137 CFI_REL_OFFSET rcx,RCX
138 CFI_REL_OFFSET rdx,RDX
139 CFI_REL_OFFSET rsi,RSI
140 CFI_REL_OFFSET rdi,RDI
141 CFI_REL_OFFSET rip,RIP
142 /*CFI_REL_OFFSET cs,CS*/
143 /*CFI_REL_OFFSET rflags,EFLAGS*/
144 CFI_REL_OFFSET rsp,RSP
145 /*CFI_REL_OFFSET ss,SS*/
146 .endm
148 /*
149 * Must be consistent with the definition in arch-x86_64.h:
150 * struct iret_context {
151 * u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
152 * };
153 * #define VGCF_IN_SYSCALL (1<<8)
154 */
155 .macro HYPERVISOR_IRET flag
156 testb $3,1*8(%rsp)
157 jnz 2f
158 testl $NMI_MASK,2*8(%rsp)
159 jnz 2f
161 testb $1,(xen_features+XENFEAT_supervisor_mode_kernel)
162 jnz 1f
164 /* Direct iret to kernel space. Correct CS and SS. */
165 orb $3,1*8(%rsp)
166 orb $3,4*8(%rsp)
167 1: iretq
169 2: /* Slow iret via hypervisor. */
170 andl $~NMI_MASK, 16(%rsp)
171 pushq $\flag
172 jmp hypercall_page + (__HYPERVISOR_iret * 32)
173 .endm
175 .macro SWITCH_TO_KERNEL ssoff,adjust=0
176 jc 1f
177 orb $1,\ssoff-\adjust+4(%rsp)
178 1:
179 .endm
181 /*
182 * A newly forked process directly context switches into this.
183 */
184 /* rdi: prev */
185 ENTRY(ret_from_fork)
186 CFI_DEFAULT_STACK
187 call schedule_tail
188 GET_THREAD_INFO(%rcx)
189 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
190 jnz rff_trace
191 rff_action:
192 RESTORE_REST
193 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
194 je int_ret_from_sys_call
195 testl $_TIF_IA32,threadinfo_flags(%rcx)
196 jnz int_ret_from_sys_call
197 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
198 jmp ret_from_sys_call
199 rff_trace:
200 movq %rsp,%rdi
201 call syscall_trace_leave
202 GET_THREAD_INFO(%rcx)
203 jmp rff_action
204 CFI_ENDPROC
205 END(ret_from_fork)
207 /*
208 * System call entry. Upto 6 arguments in registers are supported.
209 *
210 * SYSCALL does not save anything on the stack and does not change the
211 * stack pointer.
212 */
214 /*
215 * Register setup:
216 * rax system call number
217 * rdi arg0
218 * rcx return address for syscall/sysret, C arg3
219 * rsi arg1
220 * rdx arg2
221 * r10 arg3 (--> moved to rcx for C)
222 * r8 arg4
223 * r9 arg5
224 * r11 eflags for syscall/sysret, temporary for C
225 * r12-r15,rbp,rbx saved by C code, not touched.
226 *
227 * Interrupts are off on entry.
228 * Only called from user space.
229 *
230 * XXX if we had a free scratch register we could save the RSP into the stack frame
231 * and report it properly in ps. Unfortunately we haven't.
232 *
233 * When user can change the frames always force IRET. That is because
234 * it deals with uncanonical addresses better. SYSRET has trouble
235 * with them due to bugs in both AMD and Intel CPUs.
236 */
238 ENTRY(system_call)
239 CFI_STARTPROC simple
240 CFI_DEF_CFA rsp,PDA_STACKOFFSET
241 CFI_REGISTER rip,rcx
242 /*CFI_REGISTER rflags,r11*/
243 SAVE_ARGS -8,0
244 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
245 /*
246 * No need to follow this irqs off/on section - it's straight
247 * and short:
248 */
249 XEN_UNBLOCK_EVENTS(%r11)
250 GET_THREAD_INFO(%rcx)
251 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
252 CFI_REMEMBER_STATE
253 jnz tracesys
254 cmpq $__NR_syscall_max,%rax
255 ja badsys
256 movq %r10,%rcx
257 call *sys_call_table(,%rax,8) # XXX: rip relative
258 movq %rax,RAX-ARGOFFSET(%rsp)
259 /*
260 * Syscall return path ending with SYSRET (fast path)
261 * Has incomplete stack frame and undefined top of stack.
262 */
263 .globl ret_from_sys_call
264 ret_from_sys_call:
265 movl $_TIF_ALLWORK_MASK,%edi
266 /* edi: flagmask */
267 sysret_check:
268 GET_THREAD_INFO(%rcx)
269 XEN_BLOCK_EVENTS(%rsi)
270 TRACE_IRQS_OFF
271 movl threadinfo_flags(%rcx),%edx
272 andl %edi,%edx
273 CFI_REMEMBER_STATE
274 jnz sysret_careful
275 /*
276 * sysretq will re-enable interrupts:
277 */
278 TRACE_IRQS_ON
279 XEN_UNBLOCK_EVENTS(%rsi)
280 CFI_REGISTER rip,rcx
281 RESTORE_ARGS 0,8,0
282 /*CFI_REGISTER rflags,r11*/
283 HYPERVISOR_IRET VGCF_IN_SYSCALL
285 /* Handle reschedules */
286 /* edx: work, edi: workmask */
287 sysret_careful:
288 CFI_RESTORE_STATE
289 bt $TIF_NEED_RESCHED,%edx
290 jnc sysret_signal
291 TRACE_IRQS_ON
292 XEN_UNBLOCK_EVENTS(%rsi)
293 pushq %rdi
294 CFI_ADJUST_CFA_OFFSET 8
295 call schedule
296 popq %rdi
297 CFI_ADJUST_CFA_OFFSET -8
298 jmp sysret_check
300 /* Handle a signal */
301 sysret_signal:
302 TRACE_IRQS_ON
303 /* sti */
304 XEN_UNBLOCK_EVENTS(%rsi)
305 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
306 jz 1f
308 /* Really a signal */
309 /* edx: work flags (arg3) */
310 leaq do_notify_resume(%rip),%rax
311 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
312 xorl %esi,%esi # oldset -> arg2
313 call ptregscall_common
314 1: movl $_TIF_NEED_RESCHED,%edi
315 /* Use IRET because user could have changed frame. This
316 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
317 XEN_BLOCK_EVENTS(%rsi)
318 TRACE_IRQS_OFF
319 jmp int_with_check
321 badsys:
322 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
323 jmp ret_from_sys_call
325 /* Do syscall tracing */
326 tracesys:
327 CFI_RESTORE_STATE
328 SAVE_REST
329 movq $-ENOSYS,RAX(%rsp)
330 FIXUP_TOP_OF_STACK %rdi
331 movq %rsp,%rdi
332 call syscall_trace_enter
333 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
334 RESTORE_REST
335 cmpq $__NR_syscall_max,%rax
336 ja 1f
337 movq %r10,%rcx /* fixup for C */
338 call *sys_call_table(,%rax,8)
339 1: movq %rax,RAX-ARGOFFSET(%rsp)
340 /* Use IRET because user could have changed frame */
341 jmp int_ret_from_sys_call
342 CFI_ENDPROC
343 END(system_call)
345 /*
346 * Syscall return path ending with IRET.
347 * Has correct top of stack, but partial stack frame.
348 */
349 ENTRY(int_ret_from_sys_call)
350 CFI_STARTPROC simple
351 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
352 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
353 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
354 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
355 /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
356 CFI_REL_OFFSET rip,RIP-ARGOFFSET
357 CFI_REL_OFFSET rdx,RDX-ARGOFFSET
358 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
359 CFI_REL_OFFSET rax,RAX-ARGOFFSET
360 CFI_REL_OFFSET rdi,RDI-ARGOFFSET
361 CFI_REL_OFFSET rsi,RSI-ARGOFFSET
362 CFI_REL_OFFSET r8,R8-ARGOFFSET
363 CFI_REL_OFFSET r9,R9-ARGOFFSET
364 CFI_REL_OFFSET r10,R10-ARGOFFSET
365 CFI_REL_OFFSET r11,R11-ARGOFFSET
366 TRACE_IRQS_OFF
367 XEN_BLOCK_EVENTS(%rsi)
368 testb $3,CS-ARGOFFSET(%rsp)
369 jnz 1f
370 /* Need to set the proper %ss (not NULL) for ring 3 iretq */
371 movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
372 jmp retint_restore_args # retrun from ring3 kernel
373 1:
374 movl $_TIF_ALLWORK_MASK,%edi
375 /* edi: mask to check */
376 int_with_check:
377 GET_THREAD_INFO(%rcx)
378 movl threadinfo_flags(%rcx),%edx
379 andl %edi,%edx
380 jnz int_careful
381 andl $~TS_COMPAT,threadinfo_status(%rcx)
382 jmp retint_restore_args
384 /* Either reschedule or signal or syscall exit tracking needed. */
385 /* First do a reschedule test. */
386 /* edx: work, edi: workmask */
387 int_careful:
388 bt $TIF_NEED_RESCHED,%edx
389 jnc int_very_careful
390 TRACE_IRQS_ON
391 /* sti */
392 XEN_UNBLOCK_EVENTS(%rsi)
393 pushq %rdi
394 CFI_ADJUST_CFA_OFFSET 8
395 call schedule
396 popq %rdi
397 CFI_ADJUST_CFA_OFFSET -8
398 XEN_BLOCK_EVENTS(%rsi)
399 TRACE_IRQS_OFF
400 jmp int_with_check
402 /* handle signals and tracing -- both require a full stack frame */
403 int_very_careful:
404 TRACE_IRQS_ON
405 /* sti */
406 XEN_UNBLOCK_EVENTS(%rsi)
407 SAVE_REST
408 /* Check for syscall exit trace */
409 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
410 jz int_signal
411 pushq %rdi
412 CFI_ADJUST_CFA_OFFSET 8
413 leaq 8(%rsp),%rdi # &ptregs -> arg1
414 call syscall_trace_leave
415 popq %rdi
416 CFI_ADJUST_CFA_OFFSET -8
417 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
418 XEN_BLOCK_EVENTS(%rsi)
419 TRACE_IRQS_OFF
420 jmp int_restore_rest
422 int_signal:
423 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
424 jz 1f
425 movq %rsp,%rdi # &ptregs -> arg1
426 xorl %esi,%esi # oldset -> arg2
427 call do_notify_resume
428 1: movl $_TIF_NEED_RESCHED,%edi
429 int_restore_rest:
430 RESTORE_REST
431 XEN_BLOCK_EVENTS(%rsi)
432 TRACE_IRQS_OFF
433 jmp int_with_check
434 CFI_ENDPROC
435 END(int_ret_from_sys_call)
437 /*
438 * Certain special system calls that need to save a complete full stack frame.
439 */
441 .macro PTREGSCALL label,func,arg
442 .globl \label
443 \label:
444 leaq \func(%rip),%rax
445 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
446 jmp ptregscall_common
447 END(\label)
448 .endm
450 CFI_STARTPROC
452 PTREGSCALL stub_clone, sys_clone, %r8
453 PTREGSCALL stub_fork, sys_fork, %rdi
454 PTREGSCALL stub_vfork, sys_vfork, %rdi
455 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
456 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
457 PTREGSCALL stub_iopl, sys_iopl, %rsi
459 ENTRY(ptregscall_common)
460 popq %r11
461 CFI_ADJUST_CFA_OFFSET -8
462 CFI_REGISTER rip, r11
463 SAVE_REST
464 movq %r11, %r15
465 CFI_REGISTER rip, r15
466 FIXUP_TOP_OF_STACK %r11
467 call *%rax
468 RESTORE_TOP_OF_STACK %r11
469 movq %r15, %r11
470 CFI_REGISTER rip, r11
471 RESTORE_REST
472 pushq %r11
473 CFI_ADJUST_CFA_OFFSET 8
474 CFI_REL_OFFSET rip, 0
475 ret
476 CFI_ENDPROC
477 END(ptregscall_common)
479 ENTRY(stub_execve)
480 CFI_STARTPROC
481 popq %r11
482 CFI_ADJUST_CFA_OFFSET -8
483 CFI_REGISTER rip, r11
484 SAVE_REST
485 FIXUP_TOP_OF_STACK %r11
486 call sys_execve
487 RESTORE_TOP_OF_STACK %r11
488 movq %rax,RAX(%rsp)
489 RESTORE_REST
490 jmp int_ret_from_sys_call
491 CFI_ENDPROC
492 END(stub_execve)
494 /*
495 * sigreturn is special because it needs to restore all registers on return.
496 * This cannot be done with SYSRET, so use the IRET return path instead.
497 */
498 ENTRY(stub_rt_sigreturn)
499 CFI_STARTPROC
500 addq $8, %rsp
501 CFI_ADJUST_CFA_OFFSET -8
502 SAVE_REST
503 movq %rsp,%rdi
504 FIXUP_TOP_OF_STACK %r11
505 call sys_rt_sigreturn
506 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
507 RESTORE_REST
508 jmp int_ret_from_sys_call
509 CFI_ENDPROC
510 END(stub_rt_sigreturn)
512 /*
513 * initial frame state for interrupts and exceptions
514 */
515 .macro _frame ref
516 CFI_STARTPROC simple
517 CFI_DEF_CFA rsp,SS+8-\ref
518 /*CFI_REL_OFFSET ss,SS-\ref*/
519 CFI_REL_OFFSET rsp,RSP-\ref
520 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
521 /*CFI_REL_OFFSET cs,CS-\ref*/
522 CFI_REL_OFFSET rip,RIP-\ref
523 .endm
525 /* initial frame state for interrupts (and exceptions without error code) */
526 #define INTR_FRAME _frame RIP
527 /* initial frame state for exceptions with error code (and interrupts with
528 vector already pushed) */
529 #define XCPT_FRAME _frame ORIG_RAX
531 /*
532 * Interrupt exit.
533 *
534 */
536 retint_check:
537 CFI_DEFAULT_STACK
538 movl threadinfo_flags(%rcx),%edx
539 andl %edi,%edx
540 CFI_REMEMBER_STATE
541 jnz retint_careful
542 retint_restore_args:
543 movl EFLAGS-REST_SKIP(%rsp), %eax
544 shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
545 XEN_GET_VCPU_INFO(%rsi)
546 andb evtchn_upcall_mask(%rsi),%al
547 andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask
548 jnz restore_all_enable_events # != 0 => enable event delivery
549 XEN_PUT_VCPU_INFO(%rsi)
551 RESTORE_ARGS 0,8,0
552 HYPERVISOR_IRET 0
554 /* edi: workmask, edx: work */
555 retint_careful:
556 CFI_RESTORE_STATE
557 bt $TIF_NEED_RESCHED,%edx
558 jnc retint_signal
559 TRACE_IRQS_ON
560 XEN_UNBLOCK_EVENTS(%rsi)
561 /* sti */
562 pushq %rdi
563 CFI_ADJUST_CFA_OFFSET 8
564 call schedule
565 popq %rdi
566 CFI_ADJUST_CFA_OFFSET -8
567 GET_THREAD_INFO(%rcx)
568 XEN_BLOCK_EVENTS(%rsi)
569 /* cli */
570 TRACE_IRQS_OFF
571 jmp retint_check
573 retint_signal:
574 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
575 jz retint_restore_args
576 TRACE_IRQS_ON
577 XEN_UNBLOCK_EVENTS(%rsi)
578 SAVE_REST
579 movq $-1,ORIG_RAX(%rsp)
580 xorl %esi,%esi # oldset
581 movq %rsp,%rdi # &pt_regs
582 call do_notify_resume
583 RESTORE_REST
584 XEN_BLOCK_EVENTS(%rsi)
585 TRACE_IRQS_OFF
586 movl $_TIF_NEED_RESCHED,%edi
587 GET_THREAD_INFO(%rcx)
588 jmp retint_check
590 #ifdef CONFIG_PREEMPT
591 /* Returning to kernel space. Check if we need preemption */
592 /* rcx: threadinfo. interrupts off. */
593 .p2align
594 retint_kernel:
595 cmpl $0,threadinfo_preempt_count(%rcx)
596 jnz retint_restore_args
597 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
598 jnc retint_restore_args
599 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
600 jnc retint_restore_args
601 call preempt_schedule_irq
602 jmp retint_kernel /* check again */
603 #endif
605 CFI_ENDPROC
606 END(common_interrupt)
608 /*
609 * APIC interrupts.
610 */
611 .macro apicinterrupt num,func
612 INTR_FRAME
613 pushq $~(\num)
614 CFI_ADJUST_CFA_OFFSET 8
615 interrupt \func
616 jmp error_entry
617 CFI_ENDPROC
618 .endm
620 #ifndef CONFIG_XEN
621 ENTRY(thermal_interrupt)
622 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
623 END(thermal_interrupt)
625 ENTRY(threshold_interrupt)
626 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
627 END(threshold_interrupt)
629 #ifdef CONFIG_SMP
630 ENTRY(reschedule_interrupt)
631 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
632 END(reschedule_interrupt)
634 .macro INVALIDATE_ENTRY num
635 ENTRY(invalidate_interrupt\num)
636 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
637 END(invalidate_interrupt\num)
638 .endm
640 INVALIDATE_ENTRY 0
641 INVALIDATE_ENTRY 1
642 INVALIDATE_ENTRY 2
643 INVALIDATE_ENTRY 3
644 INVALIDATE_ENTRY 4
645 INVALIDATE_ENTRY 5
646 INVALIDATE_ENTRY 6
647 INVALIDATE_ENTRY 7
649 ENTRY(call_function_interrupt)
650 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
651 END(call_function_interrupt)
652 #endif
654 #ifdef CONFIG_X86_LOCAL_APIC
655 ENTRY(apic_timer_interrupt)
656 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
657 END(apic_timer_interrupt)
659 ENTRY(error_interrupt)
660 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
661 END(error_interrupt)
663 ENTRY(spurious_interrupt)
664 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
665 END(spurious_interrupt)
666 #endif
667 #endif /* !CONFIG_XEN */
669 /*
670 * Exception entry points.
671 */
672 .macro zeroentry sym
673 INTR_FRAME
674 movq (%rsp),%rcx
675 movq 8(%rsp),%r11
676 addq $0x10,%rsp /* skip rcx and r11 */
677 pushq $0 /* push error code/oldrax */
678 CFI_ADJUST_CFA_OFFSET 8
679 pushq %rax /* push real oldrax to the rdi slot */
680 CFI_ADJUST_CFA_OFFSET 8
681 leaq \sym(%rip),%rax
682 jmp error_entry
683 CFI_ENDPROC
684 .endm
686 .macro errorentry sym
687 XCPT_FRAME
688 movq (%rsp),%rcx
689 movq 8(%rsp),%r11
690 addq $0x10,%rsp /* rsp points to the error code */
691 pushq %rax
692 CFI_ADJUST_CFA_OFFSET 8
693 leaq \sym(%rip),%rax
694 jmp error_entry
695 CFI_ENDPROC
696 .endm
698 #if 0 /* not XEN */
699 /* error code is on the stack already */
700 /* handle NMI like exceptions that can happen everywhere */
701 .macro paranoidentry sym, ist=0, irqtrace=1
702 movq (%rsp),%rcx
703 movq 8(%rsp),%r11
704 addq $0x10,%rsp /* skip rcx and r11 */
705 SAVE_ALL
706 cld
707 #if 0 /* not XEN */
708 movl $1,%ebx
709 movl $MSR_GS_BASE,%ecx
710 rdmsr
711 testl %edx,%edx
712 js 1f
713 swapgs
714 xorl %ebx,%ebx
715 1:
716 #endif
717 .if \ist
718 movq %gs:pda_data_offset, %rbp
719 .endif
720 movq %rsp,%rdi
721 movq ORIG_RAX(%rsp),%rsi
722 movq $-1,ORIG_RAX(%rsp)
723 .if \ist
724 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
725 .endif
726 call \sym
727 .if \ist
728 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
729 .endif
730 /* cli */
731 XEN_BLOCK_EVENTS(%rsi)
732 .if \irqtrace
733 TRACE_IRQS_OFF
734 .endif
735 .endm
737 /*
738 * "Paranoid" exit path from exception stack.
739 * Paranoid because this is used by NMIs and cannot take
740 * any kernel state for granted.
741 * We don't do kernel preemption checks here, because only
742 * NMI should be common and it does not enable IRQs and
743 * cannot get reschedule ticks.
744 *
745 * "trace" is 0 for the NMI handler only, because irq-tracing
746 * is fundamentally NMI-unsafe. (we cannot change the soft and
747 * hard flags at once, atomically)
748 */
749 .macro paranoidexit trace=1
750 /* ebx: no swapgs flag */
751 paranoid_exit\trace:
752 testl %ebx,%ebx /* swapgs needed? */
753 jnz paranoid_restore\trace
754 testl $3,CS(%rsp)
755 jnz paranoid_userspace\trace
756 paranoid_swapgs\trace:
757 TRACE_IRQS_IRETQ 0
758 swapgs
759 paranoid_restore\trace:
760 RESTORE_ALL 8
761 iretq
762 paranoid_userspace\trace:
763 GET_THREAD_INFO(%rcx)
764 movl threadinfo_flags(%rcx),%ebx
765 andl $_TIF_WORK_MASK,%ebx
766 jz paranoid_swapgs\trace
767 movq %rsp,%rdi /* &pt_regs */
768 call sync_regs
769 movq %rax,%rsp /* switch stack for scheduling */
770 testl $_TIF_NEED_RESCHED,%ebx
771 jnz paranoid_schedule\trace
772 movl %ebx,%edx /* arg3: thread flags */
773 .if \trace
774 TRACE_IRQS_ON
775 .endif
776 sti
777 xorl %esi,%esi /* arg2: oldset */
778 movq %rsp,%rdi /* arg1: &pt_regs */
779 call do_notify_resume
780 cli
781 .if \trace
782 TRACE_IRQS_OFF
783 .endif
784 jmp paranoid_userspace\trace
785 paranoid_schedule\trace:
786 .if \trace
787 TRACE_IRQS_ON
788 .endif
789 sti
790 call schedule
791 cli
792 .if \trace
793 TRACE_IRQS_OFF
794 .endif
795 jmp paranoid_userspace\trace
796 CFI_ENDPROC
797 .endm
798 #endif
800 /*
801 * Exception entry point. This expects an error code/orig_rax on the stack
802 * and the exception handler in %rax.
803 */
804 ENTRY(error_entry)
805 _frame RDI
806 /* rdi slot contains rax, oldrax contains error code */
807 cld
808 subq $14*8,%rsp
809 CFI_ADJUST_CFA_OFFSET (14*8)
810 movq %rsi,13*8(%rsp)
811 CFI_REL_OFFSET rsi,RSI
812 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
813 movq %rdx,12*8(%rsp)
814 CFI_REL_OFFSET rdx,RDX
815 movq %rcx,11*8(%rsp)
816 CFI_REL_OFFSET rcx,RCX
817 movq %rsi,10*8(%rsp) /* store rax */
818 CFI_REL_OFFSET rax,RAX
819 movq %r8, 9*8(%rsp)
820 CFI_REL_OFFSET r8,R8
821 movq %r9, 8*8(%rsp)
822 CFI_REL_OFFSET r9,R9
823 movq %r10,7*8(%rsp)
824 CFI_REL_OFFSET r10,R10
825 movq %r11,6*8(%rsp)
826 CFI_REL_OFFSET r11,R11
827 movq %rbx,5*8(%rsp)
828 CFI_REL_OFFSET rbx,RBX
829 movq %rbp,4*8(%rsp)
830 CFI_REL_OFFSET rbp,RBP
831 movq %r12,3*8(%rsp)
832 CFI_REL_OFFSET r12,R12
833 movq %r13,2*8(%rsp)
834 CFI_REL_OFFSET r13,R13
835 movq %r14,1*8(%rsp)
836 CFI_REL_OFFSET r14,R14
837 movq %r15,(%rsp)
838 CFI_REL_OFFSET r15,R15
839 #if 0
840 cmpl $__KERNEL_CS,CS(%rsp)
841 je error_kernelspace
842 #endif
843 error_call_handler:
844 movq %rdi, RDI(%rsp)
845 movq %rsp,%rdi
846 movq ORIG_RAX(%rsp),%rsi # get error code
847 movq $-1,ORIG_RAX(%rsp)
848 call *%rax
849 error_exit:
850 RESTORE_REST
851 /* cli */
852 XEN_BLOCK_EVENTS(%rsi)
853 TRACE_IRQS_OFF
854 GET_THREAD_INFO(%rcx)
855 testb $3,CS-ARGOFFSET(%rsp)
856 jz retint_kernel
857 movl threadinfo_flags(%rcx),%edx
858 movl $_TIF_WORK_MASK,%edi
859 andl %edi,%edx
860 jnz retint_careful
861 /*
862 * The iret might restore flags:
863 */
864 TRACE_IRQS_IRETQ
865 jmp retint_restore_args
867 error_kernelspace:
868 /*
869 * We need to re-write the logic here because we don't do iretq to
870 * to return to user mode. It's still possible that we get trap/fault
871 * in the kernel (when accessing buffers pointed to by system calls,
872 * for example).
873 *
874 */
875 #if 0
876 incl %ebx
877 /* There are two places in the kernel that can potentially fault with
878 usergs. Handle them here. The exception handlers after
879 iret run with kernel gs again, so don't set the user space flag.
880 B stepping K8s sometimes report an truncated RIP for IRET
881 exceptions returning to compat mode. Check for these here too. */
882 leaq iret_label(%rip),%rbp
883 cmpq %rbp,RIP(%rsp)
884 je error_swapgs
885 movl %ebp,%ebp /* zero extend */
886 cmpq %rbp,RIP(%rsp)
887 je error_swapgs
888 cmpq $gs_change,RIP(%rsp)
889 je error_swapgs
890 jmp error_sti
891 #endif
892 END(error_entry)
894 ENTRY(hypervisor_callback)
895 zeroentry do_hypervisor_callback
897 /*
898 * Copied from arch/xen/i386/kernel/entry.S
899 */
900 # A note on the "critical region" in our callback handler.
901 # We want to avoid stacking callback handlers due to events occurring
902 # during handling of the last event. To do this, we keep events disabled
903 # until we've done all processing. HOWEVER, we must enable events before
904 # popping the stack frame (can't be done atomically) and so it would still
905 # be possible to get enough handler activations to overflow the stack.
906 # Although unlikely, bugs of that kind are hard to track down, so we'd
907 # like to avoid the possibility.
908 # So, on entry to the handler we detect whether we interrupted an
909 # existing activation in its critical region -- if so, we pop the current
910 # activation and restart the handler using the previous one.
911 ENTRY(do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
912 # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
913 # see the correct pointer to the pt_regs
914 movq %rdi, %rsp # we don't return, adjust the stack frame
915 11: movq %gs:pda_irqstackptr,%rax
916 incl %gs:pda_irqcount
917 cmovzq %rax,%rsp
918 pushq %rdi
919 call evtchn_do_upcall
920 popq %rsp
921 decl %gs:pda_irqcount
922 jmp error_exit
924 #ifdef CONFIG_X86_LOCAL_APIC
925 KPROBE_ENTRY(nmi)
926 zeroentry do_nmi_callback
927 ENTRY(do_nmi_callback)
928 addq $8, %rsp
929 call do_nmi
930 orl $NMI_MASK,EFLAGS(%rsp)
931 RESTORE_REST
932 XEN_BLOCK_EVENTS(%rsi)
933 GET_THREAD_INFO(%rcx)
934 jmp retint_restore_args
935 .previous .text
936 #endif
938 ALIGN
939 restore_all_enable_events:
940 XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
942 scrit: /**** START OF CRITICAL REGION ****/
943 XEN_TEST_PENDING(%rsi)
944 jnz 14f # process more events if necessary...
945 XEN_PUT_VCPU_INFO(%rsi)
946 RESTORE_ARGS 0,8,0
947 HYPERVISOR_IRET 0
949 14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
950 XEN_PUT_VCPU_INFO(%rsi)
951 SAVE_REST
952 movq %rsp,%rdi # set the argument again
953 jmp 11b
954 ecrit: /**** END OF CRITICAL REGION ****/
955 # At this point, unlike on x86-32, we don't do the fixup to simplify the
956 # code and the stack frame is more complex on x86-64.
957 # When the kernel is interrupted in the critical section, the kernel
958 # will do IRET in that case, and everything will be restored at that point,
959 # i.e. it just resumes from the next instruction interrupted with the same context.
961 # Hypervisor uses this for application faults while it executes.
962 # We get here for two reasons:
963 # 1. Fault while reloading DS, ES, FS or GS
964 # 2. Fault while executing IRET
965 # Category 1 we do not need to fix up as Xen has already reloaded all segment
966 # registers that could be reloaded and zeroed the others.
967 # Category 2 we fix up by killing the current process. We cannot use the
968 # normal Linux return path in this case because if we use the IRET hypercall
969 # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
970 # We distinguish between categories by comparing each saved segment register
971 # with its current contents: any discrepancy means we in category 1.
972 ENTRY(failsafe_callback)
973 movw %ds,%cx
974 cmpw %cx,0x10(%rsp)
975 jne 1f
976 movw %es,%cx
977 cmpw %cx,0x18(%rsp)
978 jne 1f
979 movw %fs,%cx
980 cmpw %cx,0x20(%rsp)
981 jne 1f
982 movw %gs,%cx
983 cmpw %cx,0x28(%rsp)
984 jne 1f
985 /* All segments match their saved values => Category 2 (Bad IRET). */
986 movq (%rsp),%rcx
987 movq 8(%rsp),%r11
988 addq $0x30,%rsp
989 movq $11,%rdi /* SIGSEGV */
990 jmp do_exit
991 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
992 movq (%rsp),%rcx
993 movq 8(%rsp),%r11
994 addq $0x30,%rsp
995 pushq $0
996 SAVE_ALL
997 jmp error_exit
998 #if 0
999 .section __ex_table,"a"
1000 .align 8
1001 .quad gs_change,bad_gs
1002 .previous
1003 .section .fixup,"ax"
1004 /* running with kernelgs */
1005 bad_gs:
1006 /* swapgs */ /* switch back to user gs */
1007 xorl %eax,%eax
1008 movl %eax,%gs
1009 jmp 2b
1010 .previous
1011 #endif
1013 /*
1014 * Create a kernel thread.
1016 * C extern interface:
1017 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1019 * asm input arguments:
1020 * rdi: fn, rsi: arg, rdx: flags
1021 */
1022 ENTRY(kernel_thread)
1023 CFI_STARTPROC
1024 FAKE_STACK_FRAME $child_rip
1025 SAVE_ALL
1027 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1028 movq %rdx,%rdi
1029 orq kernel_thread_flags(%rip),%rdi
1030 movq $-1, %rsi
1031 movq %rsp, %rdx
1033 xorl %r8d,%r8d
1034 xorl %r9d,%r9d
1036 # clone now
1037 call do_fork
1038 movq %rax,RAX(%rsp)
1039 xorl %edi,%edi
1041 /*
1042 * It isn't worth to check for reschedule here,
1043 * so internally to the x86_64 port you can rely on kernel_thread()
1044 * not to reschedule the child before returning, this avoids the need
1045 * of hacks for example to fork off the per-CPU idle tasks.
1046 * [Hopefully no generic code relies on the reschedule -AK]
1047 */
1048 RESTORE_ALL
1049 UNFAKE_STACK_FRAME
1050 ret
1051 CFI_ENDPROC
1052 ENDPROC(kernel_thread)
1054 child_rip:
1055 pushq $0 # fake return address
1056 CFI_STARTPROC
1057 /*
1058 * Here we are in the child and the registers are set as they were
1059 * at kernel_thread() invocation in the parent.
1060 */
1061 movq %rdi, %rax
1062 movq %rsi, %rdi
1063 call *%rax
1064 # exit
1065 xorl %edi, %edi
1066 call do_exit
1067 CFI_ENDPROC
1068 ENDPROC(child_rip)
1070 /*
1071 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1073 * C extern interface:
1074 * extern long execve(char *name, char **argv, char **envp)
1076 * asm input arguments:
1077 * rdi: name, rsi: argv, rdx: envp
1079 * We want to fallback into:
1080 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
1082 * do_sys_execve asm fallback arguments:
1083 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
1084 */
1085 ENTRY(execve)
1086 CFI_STARTPROC
1087 FAKE_STACK_FRAME $0
1088 SAVE_ALL
1089 call sys_execve
1090 movq %rax, RAX(%rsp)
1091 RESTORE_REST
1092 testq %rax,%rax
1093 jne 1f
1094 jmp int_ret_from_sys_call
1095 1: RESTORE_ARGS
1096 UNFAKE_STACK_FRAME
1097 ret
1098 CFI_ENDPROC
1099 ENDPROC(execve)
1101 KPROBE_ENTRY(page_fault)
1102 errorentry do_page_fault
1103 END(page_fault)
1104 .previous .text
1106 ENTRY(coprocessor_error)
1107 zeroentry do_coprocessor_error
1108 END(coprocessor_error)
1110 ENTRY(simd_coprocessor_error)
1111 zeroentry do_simd_coprocessor_error
1112 END(simd_coprocessor_error)
1114 ENTRY(device_not_available)
1115 zeroentry math_state_restore
1116 END(device_not_available)
1118 /* runs on exception stack */
1119 KPROBE_ENTRY(debug)
1120 INTR_FRAME
1121 /* pushq $0
1122 CFI_ADJUST_CFA_OFFSET 8 */
1123 zeroentry do_debug
1124 /* paranoid_exit */
1125 CFI_ENDPROC
1126 END(debug)
1127 .previous .text
1129 #if 0
1130 /* runs on exception stack */
1131 KPROBE_ENTRY(nmi)
1132 INTR_FRAME
1133 pushq $-1
1134 CFI_ADJUST_CFA_OFFSET 8
1135 paranoidentry do_nmi, 0, 0
1136 #ifdef CONFIG_TRACE_IRQFLAGS
1137 paranoidexit 0
1138 #else
1139 jmp paranoid_exit1
1140 CFI_ENDPROC
1141 #endif
1142 END(nmi)
1143 .previous .text
1144 #endif
1146 KPROBE_ENTRY(int3)
1147 INTR_FRAME
1148 /* pushq $0
1149 CFI_ADJUST_CFA_OFFSET 8 */
1150 zeroentry do_int3
1151 /* jmp paranoid_exit1 */
1152 CFI_ENDPROC
1153 END(int3)
1154 .previous .text
1156 ENTRY(overflow)
1157 zeroentry do_overflow
1158 END(debug)
1160 ENTRY(bounds)
1161 zeroentry do_bounds
1162 END(bounds)
1164 ENTRY(invalid_op)
1165 zeroentry do_invalid_op
1166 END(invalid_op)
1168 ENTRY(coprocessor_segment_overrun)
1169 zeroentry do_coprocessor_segment_overrun
1170 END(coprocessor_segment_overrun)
1172 ENTRY(reserved)
1173 zeroentry do_reserved
1174 END(reserved)
1176 #if 0
1177 /* runs on exception stack */
1178 ENTRY(double_fault)
1179 XCPT_FRAME
1180 paranoidentry do_double_fault
1181 jmp paranoid_exit1
1182 CFI_ENDPROC
1183 END(double_fault)
1184 #endif
1186 ENTRY(invalid_TSS)
1187 errorentry do_invalid_TSS
1188 END(invalid_TSS)
1190 ENTRY(segment_not_present)
1191 errorentry do_segment_not_present
1192 END(segment_not_present)
1194 /* runs on exception stack */
1195 ENTRY(stack_segment)
1196 XCPT_FRAME
1197 errorentry do_stack_segment
1198 CFI_ENDPROC
1199 END(stack_segment)
1201 KPROBE_ENTRY(general_protection)
1202 errorentry do_general_protection
1203 END(general_protection)
1204 .previous .text
1206 ENTRY(alignment_check)
1207 errorentry do_alignment_check
1208 END(alignment_check)
1210 ENTRY(divide_error)
1211 zeroentry do_divide_error
1212 END(divide_error)
1214 ENTRY(spurious_interrupt_bug)
1215 zeroentry do_spurious_interrupt_bug
1216 END(spurious_interrupt_bug)
1218 #ifdef CONFIG_X86_MCE
1219 /* runs on exception stack */
1220 ENTRY(machine_check)
1221 INTR_FRAME
1222 pushq $0
1223 CFI_ADJUST_CFA_OFFSET 8
1224 paranoidentry do_machine_check
1225 jmp paranoid_exit1
1226 CFI_ENDPROC
1227 END(machine_check)
1228 #endif
1230 /* Call softirq on interrupt stack. Interrupts are off. */
1231 ENTRY(call_softirq)
1232 CFI_STARTPROC
1233 push %rbp
1234 CFI_ADJUST_CFA_OFFSET 8
1235 CFI_REL_OFFSET rbp,0
1236 mov %rsp,%rbp
1237 CFI_DEF_CFA_REGISTER rbp
1238 incl %gs:pda_irqcount
1239 cmove %gs:pda_irqstackptr,%rsp
1240 push %rbp # backlink for old unwinder
1241 call __do_softirq
1242 leaveq
1243 CFI_DEF_CFA_REGISTER rsp
1244 CFI_ADJUST_CFA_OFFSET -8
1245 decl %gs:pda_irqcount
1246 ret
1247 CFI_ENDPROC
1248 ENDPROC(call_softirq)
1250 #ifdef CONFIG_STACK_UNWIND
1251 ENTRY(arch_unwind_init_running)
1252 CFI_STARTPROC
1253 movq %r15, R15(%rdi)
1254 movq %r14, R14(%rdi)
1255 xchgq %rsi, %rdx
1256 movq %r13, R13(%rdi)
1257 movq %r12, R12(%rdi)
1258 xorl %eax, %eax
1259 movq %rbp, RBP(%rdi)
1260 movq %rbx, RBX(%rdi)
1261 movq (%rsp), %rcx
1262 movq %rax, R11(%rdi)
1263 movq %rax, R10(%rdi)
1264 movq %rax, R9(%rdi)
1265 movq %rax, R8(%rdi)
1266 movq %rax, RAX(%rdi)
1267 movq %rax, RCX(%rdi)
1268 movq %rax, RDX(%rdi)
1269 movq %rax, RSI(%rdi)
1270 movq %rax, RDI(%rdi)
1271 movq %rax, ORIG_RAX(%rdi)
1272 movq %rcx, RIP(%rdi)
1273 leaq 8(%rsp), %rcx
1274 movq $__KERNEL_CS, CS(%rdi)
1275 movq %rax, EFLAGS(%rdi)
1276 movq %rcx, RSP(%rdi)
1277 movq $__KERNEL_DS, SS(%rdi)
1278 jmpq *%rdx
1279 CFI_ENDPROC
1280 ENDPROC(arch_unwind_init_running)
1281 #endif