ia64/xen-unstable

view linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S @ 13647:057c4c2991fa

32-on-64: Fix an one-off error in compat_addr_ok().
Signed-off-by: Jan Beulich <jbeulich@novell.com>
author kaf24@localhost.localdomain
date Fri Jan 26 13:36:52 2007 +0000 (2007-01-26)
parents 3adf00179a63
children 477a3bde3b61
line source
1 /*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 *
8 * $Id$
9 *
10 * Jun Nakajima <jun.nakajima@intel.com>
11 * Asit Mallick <asit.k.mallick@intel.com>
12 * Modified for Xen
13 */
15 /*
16 * entry.S contains the system-call and fault low-level handling routines.
17 *
18 * NOTE: This code handles signal-recognition, which happens every time
19 * after an interrupt and after each system call.
20 *
21 * Normal syscalls and interrupts don't save a full stack frame, this is
22 * only done for syscall tracing, signals or fork/exec et.al.
23 *
24 * A note on terminology:
25 * - top of stack: Architecture defined interrupt frame from SS to RIP
26 * at the top of the kernel process stack.
27 * - partial stack frame: partially saved registers upto R11.
28 * - full stack frame: Like partial stack frame, but all register saved.
29 *
30 * TODO:
31 * - schedule it carefully for the final hardware.
32 */
34 #define ASSEMBLY 1
35 #ifdef CONFIG_DEBUG_INFO
36 #undef CONFIG_DEBUG_INFO
37 #endif
38 #include <linux/linkage.h>
39 #include <asm/segment.h>
40 #include <asm/smp.h>
41 #include <asm/cache.h>
42 #include <asm/errno.h>
43 #include <asm/dwarf2.h>
44 #include <asm/calling.h>
45 #include <asm/asm-offsets.h>
46 #include <asm/msr.h>
47 #include <asm/unistd.h>
48 #include <asm/thread_info.h>
49 #include <asm/hw_irq.h>
50 #include <asm/page.h>
51 #include <asm/irqflags.h>
52 #include <asm/errno.h>
53 #include <xen/interface/arch-x86_64.h>
54 #include <xen/interface/features.h>
56 #include "irq_vectors.h"
58 #include "xen_entry.S"
60 .code64
62 #ifndef CONFIG_PREEMPT
63 #define retint_kernel retint_restore_args
64 #endif
67 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
68 #ifdef CONFIG_TRACE_IRQFLAGS
69 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
70 jnc 1f
71 TRACE_IRQS_ON
72 1:
73 #endif
74 .endm
76 NMI_MASK = 0x80000000
78 /*
79 * C code is not supposed to know about undefined top of stack. Every time
80 * a C function with an pt_regs argument is called from the SYSCALL based
81 * fast path FIXUP_TOP_OF_STACK is needed.
82 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
83 * manipulation.
84 */
86 /* %rsp:at FRAMEEND */
87 .macro FIXUP_TOP_OF_STACK tmp
88 movq $__USER_CS,CS(%rsp)
89 movq $-1,RCX(%rsp)
90 .endm
92 .macro RESTORE_TOP_OF_STACK tmp,offset=0
93 .endm
95 .macro FAKE_STACK_FRAME child_rip
96 /* push in order ss, rsp, eflags, cs, rip */
97 xorl %eax, %eax
98 pushq %rax /* ss */
99 CFI_ADJUST_CFA_OFFSET 8
100 /*CFI_REL_OFFSET ss,0*/
101 pushq %rax /* rsp */
102 CFI_ADJUST_CFA_OFFSET 8
103 CFI_REL_OFFSET rsp,0
104 pushq $(1<<9) /* eflags - interrupts on */
105 CFI_ADJUST_CFA_OFFSET 8
106 /*CFI_REL_OFFSET rflags,0*/
107 pushq $__KERNEL_CS /* cs */
108 CFI_ADJUST_CFA_OFFSET 8
109 /*CFI_REL_OFFSET cs,0*/
110 pushq \child_rip /* rip */
111 CFI_ADJUST_CFA_OFFSET 8
112 CFI_REL_OFFSET rip,0
113 pushq %rax /* orig rax */
114 CFI_ADJUST_CFA_OFFSET 8
115 .endm
117 .macro UNFAKE_STACK_FRAME
118 addq $8*6, %rsp
119 CFI_ADJUST_CFA_OFFSET -(6*8)
120 .endm
122 .macro CFI_DEFAULT_STACK start=1
123 .if \start
124 CFI_STARTPROC simple
125 CFI_DEF_CFA rsp,SS+8
126 .else
127 CFI_DEF_CFA_OFFSET SS+8
128 .endif
129 CFI_REL_OFFSET r15,R15
130 CFI_REL_OFFSET r14,R14
131 CFI_REL_OFFSET r13,R13
132 CFI_REL_OFFSET r12,R12
133 CFI_REL_OFFSET rbp,RBP
134 CFI_REL_OFFSET rbx,RBX
135 CFI_REL_OFFSET r11,R11
136 CFI_REL_OFFSET r10,R10
137 CFI_REL_OFFSET r9,R9
138 CFI_REL_OFFSET r8,R8
139 CFI_REL_OFFSET rax,RAX
140 CFI_REL_OFFSET rcx,RCX
141 CFI_REL_OFFSET rdx,RDX
142 CFI_REL_OFFSET rsi,RSI
143 CFI_REL_OFFSET rdi,RDI
144 CFI_REL_OFFSET rip,RIP
145 /*CFI_REL_OFFSET cs,CS*/
146 /*CFI_REL_OFFSET rflags,EFLAGS*/
147 CFI_REL_OFFSET rsp,RSP
148 /*CFI_REL_OFFSET ss,SS*/
149 .endm
151 /*
152 * Must be consistent with the definition in arch-x86_64.h:
153 * struct iret_context {
154 * u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
155 * };
156 * #define VGCF_IN_SYSCALL (1<<8)
157 */
158 .macro HYPERVISOR_IRET flag
159 testb $3,1*8(%rsp)
160 jnz 2f
161 testl $NMI_MASK,2*8(%rsp)
162 jnz 2f
164 testb $1,(xen_features+XENFEAT_supervisor_mode_kernel)
165 jnz 1f
167 /* Direct iret to kernel space. Correct CS and SS. */
168 orb $3,1*8(%rsp)
169 orb $3,4*8(%rsp)
170 1: iretq
172 2: /* Slow iret via hypervisor. */
173 andl $~NMI_MASK, 16(%rsp)
174 pushq $\flag
175 jmp hypercall_page + (__HYPERVISOR_iret * 32)
176 .endm
178 .macro SWITCH_TO_KERNEL ssoff,adjust=0
179 jc 1f
180 orb $1,\ssoff-\adjust+4(%rsp)
181 1:
182 .endm
184 /*
185 * A newly forked process directly context switches into this.
186 */
187 /* rdi: prev */
188 ENTRY(ret_from_fork)
189 CFI_DEFAULT_STACK
190 call schedule_tail
191 GET_THREAD_INFO(%rcx)
192 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
193 jnz rff_trace
194 rff_action:
195 RESTORE_REST
196 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
197 je int_ret_from_sys_call
198 testl $_TIF_IA32,threadinfo_flags(%rcx)
199 jnz int_ret_from_sys_call
200 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
201 jmp ret_from_sys_call
202 rff_trace:
203 movq %rsp,%rdi
204 call syscall_trace_leave
205 GET_THREAD_INFO(%rcx)
206 jmp rff_action
207 CFI_ENDPROC
208 END(ret_from_fork)
210 /*
211 * System call entry. Upto 6 arguments in registers are supported.
212 *
213 * SYSCALL does not save anything on the stack and does not change the
214 * stack pointer.
215 */
217 /*
218 * Register setup:
219 * rax system call number
220 * rdi arg0
221 * rcx return address for syscall/sysret, C arg3
222 * rsi arg1
223 * rdx arg2
224 * r10 arg3 (--> moved to rcx for C)
225 * r8 arg4
226 * r9 arg5
227 * r11 eflags for syscall/sysret, temporary for C
228 * r12-r15,rbp,rbx saved by C code, not touched.
229 *
230 * Interrupts are off on entry.
231 * Only called from user space.
232 *
233 * XXX if we had a free scratch register we could save the RSP into the stack frame
234 * and report it properly in ps. Unfortunately we haven't.
235 *
236 * When user can change the frames always force IRET. That is because
237 * it deals with uncanonical addresses better. SYSRET has trouble
238 * with them due to bugs in both AMD and Intel CPUs.
239 */
241 ENTRY(system_call)
242 CFI_STARTPROC simple
243 CFI_DEF_CFA rsp,PDA_STACKOFFSET
244 CFI_REGISTER rip,rcx
245 /*CFI_REGISTER rflags,r11*/
246 SAVE_ARGS -8,0
247 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
248 /*
249 * No need to follow this irqs off/on section - it's straight
250 * and short:
251 */
252 XEN_UNBLOCK_EVENTS(%r11)
253 GET_THREAD_INFO(%rcx)
254 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
255 CFI_REMEMBER_STATE
256 jnz tracesys
257 cmpq $__NR_syscall_max,%rax
258 ja badsys
259 movq %r10,%rcx
260 call *sys_call_table(,%rax,8) # XXX: rip relative
261 movq %rax,RAX-ARGOFFSET(%rsp)
262 /*
263 * Syscall return path ending with SYSRET (fast path)
264 * Has incomplete stack frame and undefined top of stack.
265 */
266 .globl ret_from_sys_call
267 ret_from_sys_call:
268 movl $_TIF_ALLWORK_MASK,%edi
269 /* edi: flagmask */
270 sysret_check:
271 GET_THREAD_INFO(%rcx)
272 XEN_BLOCK_EVENTS(%rsi)
273 TRACE_IRQS_OFF
274 movl threadinfo_flags(%rcx),%edx
275 andl %edi,%edx
276 CFI_REMEMBER_STATE
277 jnz sysret_careful
278 /*
279 * sysretq will re-enable interrupts:
280 */
281 TRACE_IRQS_ON
282 XEN_UNBLOCK_EVENTS(%rsi)
283 CFI_REGISTER rip,rcx
284 RESTORE_ARGS 0,8,0
285 /*CFI_REGISTER rflags,r11*/
286 HYPERVISOR_IRET VGCF_IN_SYSCALL
288 /* Handle reschedules */
289 /* edx: work, edi: workmask */
290 sysret_careful:
291 CFI_RESTORE_STATE
292 bt $TIF_NEED_RESCHED,%edx
293 jnc sysret_signal
294 TRACE_IRQS_ON
295 XEN_UNBLOCK_EVENTS(%rsi)
296 pushq %rdi
297 CFI_ADJUST_CFA_OFFSET 8
298 call schedule
299 popq %rdi
300 CFI_ADJUST_CFA_OFFSET -8
301 jmp sysret_check
303 /* Handle a signal */
304 sysret_signal:
305 TRACE_IRQS_ON
306 /* sti */
307 XEN_UNBLOCK_EVENTS(%rsi)
308 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
309 jz 1f
311 /* Really a signal */
312 /* edx: work flags (arg3) */
313 leaq do_notify_resume(%rip),%rax
314 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
315 xorl %esi,%esi # oldset -> arg2
316 call ptregscall_common
317 1: movl $_TIF_NEED_RESCHED,%edi
318 /* Use IRET because user could have changed frame. This
319 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
320 XEN_BLOCK_EVENTS(%rsi)
321 TRACE_IRQS_OFF
322 jmp int_with_check
324 badsys:
325 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
326 jmp ret_from_sys_call
328 /* Do syscall tracing */
329 tracesys:
330 CFI_RESTORE_STATE
331 SAVE_REST
332 movq $-ENOSYS,RAX(%rsp)
333 FIXUP_TOP_OF_STACK %rdi
334 movq %rsp,%rdi
335 call syscall_trace_enter
336 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
337 RESTORE_REST
338 cmpq $__NR_syscall_max,%rax
339 ja 1f
340 movq %r10,%rcx /* fixup for C */
341 call *sys_call_table(,%rax,8)
342 1: movq %rax,RAX-ARGOFFSET(%rsp)
343 /* Use IRET because user could have changed frame */
344 jmp int_ret_from_sys_call
345 CFI_ENDPROC
346 END(system_call)
348 /*
349 * Syscall return path ending with IRET.
350 * Has correct top of stack, but partial stack frame.
351 */
352 ENTRY(int_ret_from_sys_call)
353 CFI_STARTPROC simple
354 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
355 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
356 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
357 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
358 /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
359 CFI_REL_OFFSET rip,RIP-ARGOFFSET
360 CFI_REL_OFFSET rdx,RDX-ARGOFFSET
361 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
362 CFI_REL_OFFSET rax,RAX-ARGOFFSET
363 CFI_REL_OFFSET rdi,RDI-ARGOFFSET
364 CFI_REL_OFFSET rsi,RSI-ARGOFFSET
365 CFI_REL_OFFSET r8,R8-ARGOFFSET
366 CFI_REL_OFFSET r9,R9-ARGOFFSET
367 CFI_REL_OFFSET r10,R10-ARGOFFSET
368 CFI_REL_OFFSET r11,R11-ARGOFFSET
369 TRACE_IRQS_OFF
370 XEN_BLOCK_EVENTS(%rsi)
371 testb $3,CS-ARGOFFSET(%rsp)
372 jnz 1f
373 /* Need to set the proper %ss (not NULL) for ring 3 iretq */
374 movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
375 jmp retint_restore_args # retrun from ring3 kernel
376 1:
377 movl $_TIF_ALLWORK_MASK,%edi
378 /* edi: mask to check */
379 int_with_check:
380 GET_THREAD_INFO(%rcx)
381 movl threadinfo_flags(%rcx),%edx
382 andl %edi,%edx
383 jnz int_careful
384 andl $~TS_COMPAT,threadinfo_status(%rcx)
385 jmp retint_restore_args
387 /* Either reschedule or signal or syscall exit tracking needed. */
388 /* First do a reschedule test. */
389 /* edx: work, edi: workmask */
390 int_careful:
391 bt $TIF_NEED_RESCHED,%edx
392 jnc int_very_careful
393 TRACE_IRQS_ON
394 /* sti */
395 XEN_UNBLOCK_EVENTS(%rsi)
396 pushq %rdi
397 CFI_ADJUST_CFA_OFFSET 8
398 call schedule
399 popq %rdi
400 CFI_ADJUST_CFA_OFFSET -8
401 XEN_BLOCK_EVENTS(%rsi)
402 TRACE_IRQS_OFF
403 jmp int_with_check
405 /* handle signals and tracing -- both require a full stack frame */
406 int_very_careful:
407 TRACE_IRQS_ON
408 /* sti */
409 XEN_UNBLOCK_EVENTS(%rsi)
410 SAVE_REST
411 /* Check for syscall exit trace */
412 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
413 jz int_signal
414 pushq %rdi
415 CFI_ADJUST_CFA_OFFSET 8
416 leaq 8(%rsp),%rdi # &ptregs -> arg1
417 call syscall_trace_leave
418 popq %rdi
419 CFI_ADJUST_CFA_OFFSET -8
420 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
421 XEN_BLOCK_EVENTS(%rsi)
422 TRACE_IRQS_OFF
423 jmp int_restore_rest
425 int_signal:
426 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
427 jz 1f
428 movq %rsp,%rdi # &ptregs -> arg1
429 xorl %esi,%esi # oldset -> arg2
430 call do_notify_resume
431 1: movl $_TIF_NEED_RESCHED,%edi
432 int_restore_rest:
433 RESTORE_REST
434 XEN_BLOCK_EVENTS(%rsi)
435 TRACE_IRQS_OFF
436 jmp int_with_check
437 CFI_ENDPROC
438 END(int_ret_from_sys_call)
440 /*
441 * Certain special system calls that need to save a complete full stack frame.
442 */
444 .macro PTREGSCALL label,func,arg
445 .globl \label
446 \label:
447 leaq \func(%rip),%rax
448 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
449 jmp ptregscall_common
450 END(\label)
451 .endm
453 CFI_STARTPROC
455 PTREGSCALL stub_clone, sys_clone, %r8
456 PTREGSCALL stub_fork, sys_fork, %rdi
457 PTREGSCALL stub_vfork, sys_vfork, %rdi
458 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
459 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
460 PTREGSCALL stub_iopl, sys_iopl, %rsi
462 ENTRY(ptregscall_common)
463 popq %r11
464 CFI_ADJUST_CFA_OFFSET -8
465 CFI_REGISTER rip, r11
466 SAVE_REST
467 movq %r11, %r15
468 CFI_REGISTER rip, r15
469 FIXUP_TOP_OF_STACK %r11
470 call *%rax
471 RESTORE_TOP_OF_STACK %r11
472 movq %r15, %r11
473 CFI_REGISTER rip, r11
474 RESTORE_REST
475 pushq %r11
476 CFI_ADJUST_CFA_OFFSET 8
477 CFI_REL_OFFSET rip, 0
478 ret
479 CFI_ENDPROC
480 END(ptregscall_common)
482 ENTRY(stub_execve)
483 CFI_STARTPROC
484 popq %r11
485 CFI_ADJUST_CFA_OFFSET -8
486 CFI_REGISTER rip, r11
487 SAVE_REST
488 FIXUP_TOP_OF_STACK %r11
489 call sys_execve
490 RESTORE_TOP_OF_STACK %r11
491 movq %rax,RAX(%rsp)
492 RESTORE_REST
493 jmp int_ret_from_sys_call
494 CFI_ENDPROC
495 END(stub_execve)
497 /*
498 * sigreturn is special because it needs to restore all registers on return.
499 * This cannot be done with SYSRET, so use the IRET return path instead.
500 */
501 ENTRY(stub_rt_sigreturn)
502 CFI_STARTPROC
503 addq $8, %rsp
504 CFI_ADJUST_CFA_OFFSET -8
505 SAVE_REST
506 movq %rsp,%rdi
507 FIXUP_TOP_OF_STACK %r11
508 call sys_rt_sigreturn
509 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
510 RESTORE_REST
511 jmp int_ret_from_sys_call
512 CFI_ENDPROC
513 END(stub_rt_sigreturn)
515 /*
516 * initial frame state for interrupts and exceptions
517 */
518 .macro _frame ref
519 CFI_STARTPROC simple
520 CFI_DEF_CFA rsp,SS+8-\ref
521 /*CFI_REL_OFFSET ss,SS-\ref*/
522 CFI_REL_OFFSET rsp,RSP-\ref
523 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
524 /*CFI_REL_OFFSET cs,CS-\ref*/
525 CFI_REL_OFFSET rip,RIP-\ref
526 .endm
528 /* initial frame state for interrupts (and exceptions without error code) */
529 #define INTR_FRAME _frame RIP
530 /* initial frame state for exceptions with error code (and interrupts with
531 vector already pushed) */
532 #define XCPT_FRAME _frame ORIG_RAX
534 /*
535 * Interrupt exit.
536 *
537 */
539 retint_check:
540 movl threadinfo_flags(%rcx),%edx
541 andl %edi,%edx
542 CFI_REMEMBER_STATE
543 jnz retint_careful
544 retint_restore_args:
545 movl EFLAGS-REST_SKIP(%rsp), %eax
546 shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
547 XEN_GET_VCPU_INFO(%rsi)
548 andb evtchn_upcall_mask(%rsi),%al
549 andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask
550 jnz restore_all_enable_events # != 0 => enable event delivery
551 XEN_PUT_VCPU_INFO(%rsi)
553 RESTORE_ARGS 0,8,0
554 HYPERVISOR_IRET 0
556 /* edi: workmask, edx: work */
557 retint_careful:
558 CFI_RESTORE_STATE
559 bt $TIF_NEED_RESCHED,%edx
560 jnc retint_signal
561 TRACE_IRQS_ON
562 XEN_UNBLOCK_EVENTS(%rsi)
563 /* sti */
564 pushq %rdi
565 CFI_ADJUST_CFA_OFFSET 8
566 call schedule
567 popq %rdi
568 CFI_ADJUST_CFA_OFFSET -8
569 GET_THREAD_INFO(%rcx)
570 XEN_BLOCK_EVENTS(%rsi)
571 /* cli */
572 TRACE_IRQS_OFF
573 jmp retint_check
575 retint_signal:
576 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
577 jz retint_restore_args
578 TRACE_IRQS_ON
579 XEN_UNBLOCK_EVENTS(%rsi)
580 SAVE_REST
581 movq $-1,ORIG_RAX(%rsp)
582 xorl %esi,%esi # oldset
583 movq %rsp,%rdi # &pt_regs
584 call do_notify_resume
585 RESTORE_REST
586 XEN_BLOCK_EVENTS(%rsi)
587 TRACE_IRQS_OFF
588 movl $_TIF_NEED_RESCHED,%edi
589 GET_THREAD_INFO(%rcx)
590 jmp retint_check
592 #ifdef CONFIG_PREEMPT
593 /* Returning to kernel space. Check if we need preemption */
594 /* rcx: threadinfo. interrupts off. */
595 .p2align
596 retint_kernel:
597 cmpl $0,threadinfo_preempt_count(%rcx)
598 jnz retint_restore_args
599 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
600 jnc retint_restore_args
601 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
602 jnc retint_restore_args
603 call preempt_schedule_irq
604 jmp retint_kernel /* check again */
605 #endif
607 CFI_ENDPROC
608 END(common_interrupt)
610 /*
611 * APIC interrupts.
612 */
613 .macro apicinterrupt num,func
614 INTR_FRAME
615 pushq $~(\num)
616 CFI_ADJUST_CFA_OFFSET 8
617 interrupt \func
618 jmp error_entry
619 CFI_ENDPROC
620 .endm
622 #ifndef CONFIG_XEN
623 ENTRY(thermal_interrupt)
624 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
625 END(thermal_interrupt)
627 ENTRY(threshold_interrupt)
628 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
629 END(threshold_interrupt)
631 #ifdef CONFIG_SMP
632 ENTRY(reschedule_interrupt)
633 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
634 END(reschedule_interrupt)
636 .macro INVALIDATE_ENTRY num
637 ENTRY(invalidate_interrupt\num)
638 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
639 END(invalidate_interrupt\num)
640 .endm
642 INVALIDATE_ENTRY 0
643 INVALIDATE_ENTRY 1
644 INVALIDATE_ENTRY 2
645 INVALIDATE_ENTRY 3
646 INVALIDATE_ENTRY 4
647 INVALIDATE_ENTRY 5
648 INVALIDATE_ENTRY 6
649 INVALIDATE_ENTRY 7
651 ENTRY(call_function_interrupt)
652 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
653 END(call_function_interrupt)
654 #endif
656 #ifdef CONFIG_X86_LOCAL_APIC
657 ENTRY(apic_timer_interrupt)
658 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
659 END(apic_timer_interrupt)
661 ENTRY(error_interrupt)
662 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
663 END(error_interrupt)
665 ENTRY(spurious_interrupt)
666 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
667 END(spurious_interrupt)
668 #endif
669 #endif /* !CONFIG_XEN */
671 /*
672 * Exception entry points.
673 */
674 .macro zeroentry sym
675 INTR_FRAME
676 movq (%rsp),%rcx
677 movq 8(%rsp),%r11
678 addq $0x10,%rsp /* skip rcx and r11 */
679 pushq $0 /* push error code/oldrax */
680 CFI_ADJUST_CFA_OFFSET 8
681 pushq %rax /* push real oldrax to the rdi slot */
682 CFI_ADJUST_CFA_OFFSET 8
683 leaq \sym(%rip),%rax
684 jmp error_entry
685 CFI_ENDPROC
686 .endm
688 .macro errorentry sym
689 XCPT_FRAME
690 movq (%rsp),%rcx
691 movq 8(%rsp),%r11
692 addq $0x10,%rsp /* rsp points to the error code */
693 pushq %rax
694 CFI_ADJUST_CFA_OFFSET 8
695 leaq \sym(%rip),%rax
696 jmp error_entry
697 CFI_ENDPROC
698 .endm
700 #if 0 /* not XEN */
701 /* error code is on the stack already */
702 /* handle NMI like exceptions that can happen everywhere */
703 .macro paranoidentry sym, ist=0, irqtrace=1
704 movq (%rsp),%rcx
705 movq 8(%rsp),%r11
706 addq $0x10,%rsp /* skip rcx and r11 */
707 SAVE_ALL
708 cld
709 #if 0 /* not XEN */
710 movl $1,%ebx
711 movl $MSR_GS_BASE,%ecx
712 rdmsr
713 testl %edx,%edx
714 js 1f
715 swapgs
716 xorl %ebx,%ebx
717 1:
718 #endif
719 .if \ist
720 movq %gs:pda_data_offset, %rbp
721 .endif
722 movq %rsp,%rdi
723 movq ORIG_RAX(%rsp),%rsi
724 movq $-1,ORIG_RAX(%rsp)
725 .if \ist
726 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
727 .endif
728 call \sym
729 .if \ist
730 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
731 .endif
732 /* cli */
733 XEN_BLOCK_EVENTS(%rsi)
734 .if \irqtrace
735 TRACE_IRQS_OFF
736 .endif
737 .endm
739 /*
740 * "Paranoid" exit path from exception stack.
741 * Paranoid because this is used by NMIs and cannot take
742 * any kernel state for granted.
743 * We don't do kernel preemption checks here, because only
744 * NMI should be common and it does not enable IRQs and
745 * cannot get reschedule ticks.
746 *
747 * "trace" is 0 for the NMI handler only, because irq-tracing
748 * is fundamentally NMI-unsafe. (we cannot change the soft and
749 * hard flags at once, atomically)
750 */
751 .macro paranoidexit trace=1
752 /* ebx: no swapgs flag */
753 paranoid_exit\trace:
754 testl %ebx,%ebx /* swapgs needed? */
755 jnz paranoid_restore\trace
756 testl $3,CS(%rsp)
757 jnz paranoid_userspace\trace
758 paranoid_swapgs\trace:
759 TRACE_IRQS_IRETQ 0
760 swapgs
761 paranoid_restore\trace:
762 RESTORE_ALL 8
763 iretq
764 paranoid_userspace\trace:
765 GET_THREAD_INFO(%rcx)
766 movl threadinfo_flags(%rcx),%ebx
767 andl $_TIF_WORK_MASK,%ebx
768 jz paranoid_swapgs\trace
769 movq %rsp,%rdi /* &pt_regs */
770 call sync_regs
771 movq %rax,%rsp /* switch stack for scheduling */
772 testl $_TIF_NEED_RESCHED,%ebx
773 jnz paranoid_schedule\trace
774 movl %ebx,%edx /* arg3: thread flags */
775 .if \trace
776 TRACE_IRQS_ON
777 .endif
778 sti
779 xorl %esi,%esi /* arg2: oldset */
780 movq %rsp,%rdi /* arg1: &pt_regs */
781 call do_notify_resume
782 cli
783 .if \trace
784 TRACE_IRQS_OFF
785 .endif
786 jmp paranoid_userspace\trace
787 paranoid_schedule\trace:
788 .if \trace
789 TRACE_IRQS_ON
790 .endif
791 sti
792 call schedule
793 cli
794 .if \trace
795 TRACE_IRQS_OFF
796 .endif
797 jmp paranoid_userspace\trace
798 CFI_ENDPROC
799 .endm
800 #endif
802 /*
803 * Exception entry point. This expects an error code/orig_rax on the stack
804 * and the exception handler in %rax.
805 */
806 ENTRY(error_entry)
807 _frame RDI
808 /* rdi slot contains rax, oldrax contains error code */
809 cld
810 subq $14*8,%rsp
811 CFI_ADJUST_CFA_OFFSET (14*8)
812 movq %rsi,13*8(%rsp)
813 CFI_REL_OFFSET rsi,RSI
814 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
815 movq %rdx,12*8(%rsp)
816 CFI_REL_OFFSET rdx,RDX
817 movq %rcx,11*8(%rsp)
818 CFI_REL_OFFSET rcx,RCX
819 movq %rsi,10*8(%rsp) /* store rax */
820 CFI_REL_OFFSET rax,RAX
821 movq %r8, 9*8(%rsp)
822 CFI_REL_OFFSET r8,R8
823 movq %r9, 8*8(%rsp)
824 CFI_REL_OFFSET r9,R9
825 movq %r10,7*8(%rsp)
826 CFI_REL_OFFSET r10,R10
827 movq %r11,6*8(%rsp)
828 CFI_REL_OFFSET r11,R11
829 movq %rbx,5*8(%rsp)
830 CFI_REL_OFFSET rbx,RBX
831 movq %rbp,4*8(%rsp)
832 CFI_REL_OFFSET rbp,RBP
833 movq %r12,3*8(%rsp)
834 CFI_REL_OFFSET r12,R12
835 movq %r13,2*8(%rsp)
836 CFI_REL_OFFSET r13,R13
837 movq %r14,1*8(%rsp)
838 CFI_REL_OFFSET r14,R14
839 movq %r15,(%rsp)
840 CFI_REL_OFFSET r15,R15
841 #if 0
842 cmpl $__KERNEL_CS,CS(%rsp)
843 je error_kernelspace
844 #endif
845 error_call_handler:
846 movq %rdi, RDI(%rsp)
847 movq %rsp,%rdi
848 movq ORIG_RAX(%rsp),%rsi # get error code
849 movq $-1,ORIG_RAX(%rsp)
850 call *%rax
851 error_exit:
852 RESTORE_REST
853 /* cli */
854 XEN_BLOCK_EVENTS(%rsi)
855 TRACE_IRQS_OFF
856 GET_THREAD_INFO(%rcx)
857 testb $3,CS-ARGOFFSET(%rsp)
858 jz retint_kernel
859 movl threadinfo_flags(%rcx),%edx
860 movl $_TIF_WORK_MASK,%edi
861 andl %edi,%edx
862 jnz retint_careful
863 /*
864 * The iret might restore flags:
865 */
866 TRACE_IRQS_IRETQ
867 jmp retint_restore_args
869 error_kernelspace:
870 /*
871 * We need to re-write the logic here because we don't do iretq to
872 * to return to user mode. It's still possible that we get trap/fault
873 * in the kernel (when accessing buffers pointed to by system calls,
874 * for example).
875 *
876 */
877 #if 0
878 incl %ebx
879 /* There are two places in the kernel that can potentially fault with
880 usergs. Handle them here. The exception handlers after
881 iret run with kernel gs again, so don't set the user space flag.
882 B stepping K8s sometimes report an truncated RIP for IRET
883 exceptions returning to compat mode. Check for these here too. */
884 leaq iret_label(%rip),%rbp
885 cmpq %rbp,RIP(%rsp)
886 je error_swapgs
887 movl %ebp,%ebp /* zero extend */
888 cmpq %rbp,RIP(%rsp)
889 je error_swapgs
890 cmpq $gs_change,RIP(%rsp)
891 je error_swapgs
892 jmp error_sti
893 #endif
894 END(error_entry)
896 ENTRY(hypervisor_callback)
897 zeroentry do_hypervisor_callback
899 /*
900 * Copied from arch/xen/i386/kernel/entry.S
901 */
902 # A note on the "critical region" in our callback handler.
903 # We want to avoid stacking callback handlers due to events occurring
904 # during handling of the last event. To do this, we keep events disabled
905 # until we've done all processing. HOWEVER, we must enable events before
906 # popping the stack frame (can't be done atomically) and so it would still
907 # be possible to get enough handler activations to overflow the stack.
908 # Although unlikely, bugs of that kind are hard to track down, so we'd
909 # like to avoid the possibility.
910 # So, on entry to the handler we detect whether we interrupted an
911 # existing activation in its critical region -- if so, we pop the current
912 # activation and restart the handler using the previous one.
913 ENTRY(do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
914 # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
915 # see the correct pointer to the pt_regs
916 movq %rdi, %rsp # we don't return, adjust the stack frame
917 11: movq %gs:pda_irqstackptr,%rax
918 incl %gs:pda_irqcount
919 cmovzq %rax,%rsp
920 pushq %rdi
921 call evtchn_do_upcall
922 popq %rsp
923 decl %gs:pda_irqcount
924 jmp error_exit
926 #ifdef CONFIG_X86_LOCAL_APIC
927 KPROBE_ENTRY(nmi)
928 zeroentry do_nmi_callback
929 ENTRY(do_nmi_callback)
930 addq $8, %rsp
931 call do_nmi
932 orl $NMI_MASK,EFLAGS(%rsp)
933 RESTORE_REST
934 XEN_BLOCK_EVENTS(%rsi)
935 GET_THREAD_INFO(%rcx)
936 jmp retint_restore_args
937 .previous .text
938 #endif
940 ALIGN
941 restore_all_enable_events:
942 XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
944 scrit: /**** START OF CRITICAL REGION ****/
945 XEN_TEST_PENDING(%rsi)
946 jnz 14f # process more events if necessary...
947 XEN_PUT_VCPU_INFO(%rsi)
948 RESTORE_ARGS 0,8,0
949 HYPERVISOR_IRET 0
951 14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
952 XEN_PUT_VCPU_INFO(%rsi)
953 SAVE_REST
954 movq %rsp,%rdi # set the argument again
955 jmp 11b
956 ecrit: /**** END OF CRITICAL REGION ****/
957 # At this point, unlike on x86-32, we don't do the fixup to simplify the
958 # code and the stack frame is more complex on x86-64.
959 # When the kernel is interrupted in the critical section, the kernel
960 # will do IRET in that case, and everything will be restored at that point,
961 # i.e. it just resumes from the next instruction interrupted with the same context.
963 # Hypervisor uses this for application faults while it executes.
964 # We get here for two reasons:
965 # 1. Fault while reloading DS, ES, FS or GS
966 # 2. Fault while executing IRET
967 # Category 1 we do not need to fix up as Xen has already reloaded all segment
968 # registers that could be reloaded and zeroed the others.
969 # Category 2 we fix up by killing the current process. We cannot use the
970 # normal Linux return path in this case because if we use the IRET hypercall
971 # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
972 # We distinguish between categories by comparing each saved segment register
973 # with its current contents: any discrepancy means we in category 1.
974 ENTRY(failsafe_callback)
975 movw %ds,%cx
976 cmpw %cx,0x10(%rsp)
977 jne 1f
978 movw %es,%cx
979 cmpw %cx,0x18(%rsp)
980 jne 1f
981 movw %fs,%cx
982 cmpw %cx,0x20(%rsp)
983 jne 1f
984 movw %gs,%cx
985 cmpw %cx,0x28(%rsp)
986 jne 1f
987 /* All segments match their saved values => Category 2 (Bad IRET). */
988 movq (%rsp),%rcx
989 movq 8(%rsp),%r11
990 addq $0x30,%rsp
991 movq $11,%rdi /* SIGSEGV */
992 jmp do_exit
993 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
994 movq (%rsp),%rcx
995 movq 8(%rsp),%r11
996 addq $0x30,%rsp
997 pushq $0
998 SAVE_ALL
999 jmp error_exit
1000 #if 0
1001 .section __ex_table,"a"
1002 .align 8
1003 .quad gs_change,bad_gs
1004 .previous
1005 .section .fixup,"ax"
1006 /* running with kernelgs */
1007 bad_gs:
1008 /* swapgs */ /* switch back to user gs */
1009 xorl %eax,%eax
1010 movl %eax,%gs
1011 jmp 2b
1012 .previous
1013 #endif
1015 /*
1016 * Create a kernel thread.
1018 * C extern interface:
1019 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1021 * asm input arguments:
1022 * rdi: fn, rsi: arg, rdx: flags
1023 */
1024 ENTRY(kernel_thread)
1025 CFI_STARTPROC
1026 FAKE_STACK_FRAME $child_rip
1027 SAVE_ALL
1029 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1030 movq %rdx,%rdi
1031 orq kernel_thread_flags(%rip),%rdi
1032 movq $-1, %rsi
1033 movq %rsp, %rdx
1035 xorl %r8d,%r8d
1036 xorl %r9d,%r9d
1038 # clone now
1039 call do_fork
1040 movq %rax,RAX(%rsp)
1041 xorl %edi,%edi
1043 /*
1044 * It isn't worth to check for reschedule here,
1045 * so internally to the x86_64 port you can rely on kernel_thread()
1046 * not to reschedule the child before returning, this avoids the need
1047 * of hacks for example to fork off the per-CPU idle tasks.
1048 * [Hopefully no generic code relies on the reschedule -AK]
1049 */
1050 RESTORE_ALL
1051 UNFAKE_STACK_FRAME
1052 ret
1053 CFI_ENDPROC
1054 ENDPROC(kernel_thread)
1056 child_rip:
1057 pushq $0 # fake return address
1058 CFI_STARTPROC
1059 /*
1060 * Here we are in the child and the registers are set as they were
1061 * at kernel_thread() invocation in the parent.
1062 */
1063 movq %rdi, %rax
1064 movq %rsi, %rdi
1065 call *%rax
1066 # exit
1067 xorl %edi, %edi
1068 call do_exit
1069 CFI_ENDPROC
1070 ENDPROC(child_rip)
1072 /*
1073 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1075 * C extern interface:
1076 * extern long execve(char *name, char **argv, char **envp)
1078 * asm input arguments:
1079 * rdi: name, rsi: argv, rdx: envp
1081 * We want to fallback into:
1082 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
1084 * do_sys_execve asm fallback arguments:
1085 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
1086 */
1087 ENTRY(execve)
1088 CFI_STARTPROC
1089 FAKE_STACK_FRAME $0
1090 SAVE_ALL
1091 call sys_execve
1092 movq %rax, RAX(%rsp)
1093 RESTORE_REST
1094 testq %rax,%rax
1095 jne 1f
1096 jmp int_ret_from_sys_call
1097 1: RESTORE_ARGS
1098 UNFAKE_STACK_FRAME
1099 ret
1100 CFI_ENDPROC
1101 ENDPROC(execve)
1103 KPROBE_ENTRY(page_fault)
1104 errorentry do_page_fault
1105 END(page_fault)
1106 .previous .text
1108 ENTRY(coprocessor_error)
1109 zeroentry do_coprocessor_error
1110 END(coprocessor_error)
1112 ENTRY(simd_coprocessor_error)
1113 zeroentry do_simd_coprocessor_error
1114 END(simd_coprocessor_error)
1116 ENTRY(device_not_available)
1117 zeroentry math_state_restore
1118 END(device_not_available)
1120 /* runs on exception stack */
1121 KPROBE_ENTRY(debug)
1122 INTR_FRAME
1123 /* pushq $0
1124 CFI_ADJUST_CFA_OFFSET 8 */
1125 zeroentry do_debug
1126 /* paranoid_exit */
1127 CFI_ENDPROC
1128 END(debug)
1129 .previous .text
1131 #if 0
1132 /* runs on exception stack */
1133 KPROBE_ENTRY(nmi)
1134 INTR_FRAME
1135 pushq $-1
1136 CFI_ADJUST_CFA_OFFSET 8
1137 paranoidentry do_nmi, 0, 0
1138 #ifdef CONFIG_TRACE_IRQFLAGS
1139 paranoidexit 0
1140 #else
1141 jmp paranoid_exit1
1142 CFI_ENDPROC
1143 #endif
1144 END(nmi)
1145 .previous .text
1146 #endif
1148 KPROBE_ENTRY(int3)
1149 INTR_FRAME
1150 /* pushq $0
1151 CFI_ADJUST_CFA_OFFSET 8 */
1152 zeroentry do_int3
1153 /* jmp paranoid_exit1 */
1154 CFI_ENDPROC
1155 END(int3)
1156 .previous .text
1158 ENTRY(overflow)
1159 zeroentry do_overflow
1160 END(debug)
1162 ENTRY(bounds)
1163 zeroentry do_bounds
1164 END(bounds)
1166 ENTRY(invalid_op)
1167 zeroentry do_invalid_op
1168 END(invalid_op)
1170 ENTRY(coprocessor_segment_overrun)
1171 zeroentry do_coprocessor_segment_overrun
1172 END(coprocessor_segment_overrun)
1174 ENTRY(reserved)
1175 zeroentry do_reserved
1176 END(reserved)
1178 #if 0
1179 /* runs on exception stack */
1180 ENTRY(double_fault)
1181 XCPT_FRAME
1182 paranoidentry do_double_fault
1183 jmp paranoid_exit1
1184 CFI_ENDPROC
1185 END(double_fault)
1186 #endif
1188 ENTRY(invalid_TSS)
1189 errorentry do_invalid_TSS
1190 END(invalid_TSS)
1192 ENTRY(segment_not_present)
1193 errorentry do_segment_not_present
1194 END(segment_not_present)
1196 /* runs on exception stack */
1197 ENTRY(stack_segment)
1198 XCPT_FRAME
1199 errorentry do_stack_segment
1200 CFI_ENDPROC
1201 END(stack_segment)
1203 KPROBE_ENTRY(general_protection)
1204 errorentry do_general_protection
1205 END(general_protection)
1206 .previous .text
1208 ENTRY(alignment_check)
1209 errorentry do_alignment_check
1210 END(alignment_check)
1212 ENTRY(divide_error)
1213 zeroentry do_divide_error
1214 END(divide_error)
1216 ENTRY(spurious_interrupt_bug)
1217 zeroentry do_spurious_interrupt_bug
1218 END(spurious_interrupt_bug)
1220 #ifdef CONFIG_X86_MCE
1221 /* runs on exception stack */
1222 ENTRY(machine_check)
1223 INTR_FRAME
1224 pushq $0
1225 CFI_ADJUST_CFA_OFFSET 8
1226 paranoidentry do_machine_check
1227 jmp paranoid_exit1
1228 CFI_ENDPROC
1229 END(machine_check)
1230 #endif
1232 /* Call softirq on interrupt stack. Interrupts are off. */
1233 ENTRY(call_softirq)
1234 CFI_STARTPROC
1235 push %rbp
1236 CFI_ADJUST_CFA_OFFSET 8
1237 CFI_REL_OFFSET rbp,0
1238 mov %rsp,%rbp
1239 CFI_DEF_CFA_REGISTER rbp
1240 incl %gs:pda_irqcount
1241 cmove %gs:pda_irqstackptr,%rsp
1242 push %rbp # backlink for old unwinder
1243 call __do_softirq
1244 leaveq
1245 CFI_DEF_CFA_REGISTER rsp
1246 CFI_ADJUST_CFA_OFFSET -8
1247 decl %gs:pda_irqcount
1248 ret
1249 CFI_ENDPROC
1250 ENDPROC(call_softirq)
1252 #ifdef CONFIG_STACK_UNWIND
1253 ENTRY(arch_unwind_init_running)
1254 CFI_STARTPROC
1255 movq %r15, R15(%rdi)
1256 movq %r14, R14(%rdi)
1257 xchgq %rsi, %rdx
1258 movq %r13, R13(%rdi)
1259 movq %r12, R12(%rdi)
1260 xorl %eax, %eax
1261 movq %rbp, RBP(%rdi)
1262 movq %rbx, RBX(%rdi)
1263 movq (%rsp), %rcx
1264 movq %rax, R11(%rdi)
1265 movq %rax, R10(%rdi)
1266 movq %rax, R9(%rdi)
1267 movq %rax, R8(%rdi)
1268 movq %rax, RAX(%rdi)
1269 movq %rax, RCX(%rdi)
1270 movq %rax, RDX(%rdi)
1271 movq %rax, RSI(%rdi)
1272 movq %rax, RDI(%rdi)
1273 movq %rax, ORIG_RAX(%rdi)
1274 movq %rcx, RIP(%rdi)
1275 leaq 8(%rsp), %rcx
1276 movq $__KERNEL_CS, CS(%rdi)
1277 movq %rax, EFLAGS(%rdi)
1278 movq %rcx, RSP(%rdi)
1279 movq $__KERNEL_DS, SS(%rdi)
1280 jmpq *%rdx
1281 CFI_ENDPROC
1282 ENDPROC(arch_unwind_init_running)
1283 #endif