ia64/xen-unstable

view linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S @ 8994:e0f563e8db9f

Implement direct iret to guest kernel where possible in HYPERVISOR_IRET macro.

Returns to guest userspace and from an NMI must still go via the hypervisor.

Signed-off-by: Ian Campbell <ian.campbell@xensource.com>
author Ian.Campbell@xensource.com
date Fri Feb 24 11:05:52 2006 +0000 (2006-02-24)
parents c646586d1065
children c823e60207f0
line source
1 /*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 *
8 * $Id$
9 *
10 * Jun Nakajima <jun.nakajima@intel.com>
11 * Asit Mallick <asit.k.mallick@intel.com>
12 * Modified for Xen
13 */
15 /*
16 * entry.S contains the system-call and fault low-level handling routines.
17 *
18 * NOTE: This code handles signal-recognition, which happens every time
19 * after an interrupt and after each system call.
20 *
21 * Normal syscalls and interrupts don't save a full stack frame, this is
22 * only done for syscall tracing, signals or fork/exec et.al.
23 *
24 * A note on terminology:
25 * - top of stack: Architecture defined interrupt frame from SS to RIP
26 * at the top of the kernel process stack.
27 * - partial stack frame: partially saved registers upto R11.
28 * - full stack frame: Like partial stack frame, but all register saved.
29 *
30 * TODO:
31 * - schedule it carefully for the final hardware.
32 */
34 #define ASSEMBLY 1
35 #include <linux/config.h>
36 #ifdef CONFIG_DEBUG_INFO
37 #undef CONFIG_DEBUG_INFO
38 #endif
39 #include <linux/linkage.h>
40 #include <asm/segment.h>
41 #include <asm/smp.h>
42 #include <asm/cache.h>
43 #include <asm/errno.h>
44 #include <asm/dwarf2.h>
45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h>
47 #include <asm/msr.h>
48 #include <asm/unistd.h>
49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h>
51 #include <asm/page.h>
52 #include <asm/errno.h>
53 #include <xen/interface/arch-x86_64.h>
55 #include "irq_vectors.h"
57 #include "xen_entry.S"
59 .code64
61 #ifndef CONFIG_PREEMPT
62 #define retint_kernel retint_restore_args
63 #endif
65 NMI_MASK = 0x80000000
67 /*
68 * C code is not supposed to know about undefined top of stack. Every time
69 * a C function with an pt_regs argument is called from the SYSCALL based
70 * fast path FIXUP_TOP_OF_STACK is needed.
71 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
72 * manipulation.
73 */
75 /* %rsp:at FRAMEEND */
76 .macro FIXUP_TOP_OF_STACK tmp
77 movq $__USER_CS,CS(%rsp)
78 movq $-1,RCX(%rsp)
79 .endm
81 .macro RESTORE_TOP_OF_STACK tmp,offset=0
82 .endm
84 .macro FAKE_STACK_FRAME child_rip
85 /* push in order ss, rsp, eflags, cs, rip */
86 xorl %eax, %eax
87 pushq %rax /* ss */
88 CFI_ADJUST_CFA_OFFSET 8
89 /*CFI_REL_OFFSET ss,0*/
90 pushq %rax /* rsp */
91 CFI_ADJUST_CFA_OFFSET 8
92 CFI_REL_OFFSET rsp,0
93 pushq $(1<<9) /* eflags - interrupts on */
94 CFI_ADJUST_CFA_OFFSET 8
95 /*CFI_REL_OFFSET rflags,0*/
96 pushq $__KERNEL_CS /* cs */
97 CFI_ADJUST_CFA_OFFSET 8
98 /*CFI_REL_OFFSET cs,0*/
99 pushq \child_rip /* rip */
100 CFI_ADJUST_CFA_OFFSET 8
101 CFI_REL_OFFSET rip,0
102 pushq %rax /* orig rax */
103 CFI_ADJUST_CFA_OFFSET 8
104 .endm
106 .macro UNFAKE_STACK_FRAME
107 addq $8*6, %rsp
108 CFI_ADJUST_CFA_OFFSET -(6*8)
109 .endm
111 .macro CFI_DEFAULT_STACK start=1
112 .if \start
113 CFI_STARTPROC simple
114 CFI_DEF_CFA rsp,SS+8
115 .else
116 CFI_DEF_CFA_OFFSET SS+8
117 .endif
118 CFI_REL_OFFSET r15,R15
119 CFI_REL_OFFSET r14,R14
120 CFI_REL_OFFSET r13,R13
121 CFI_REL_OFFSET r12,R12
122 CFI_REL_OFFSET rbp,RBP
123 CFI_REL_OFFSET rbx,RBX
124 CFI_REL_OFFSET r11,R11
125 CFI_REL_OFFSET r10,R10
126 CFI_REL_OFFSET r9,R9
127 CFI_REL_OFFSET r8,R8
128 CFI_REL_OFFSET rax,RAX
129 CFI_REL_OFFSET rcx,RCX
130 CFI_REL_OFFSET rdx,RDX
131 CFI_REL_OFFSET rsi,RSI
132 CFI_REL_OFFSET rdi,RDI
133 CFI_REL_OFFSET rip,RIP
134 /*CFI_REL_OFFSET cs,CS*/
135 /*CFI_REL_OFFSET rflags,EFLAGS*/
136 CFI_REL_OFFSET rsp,RSP
137 /*CFI_REL_OFFSET ss,SS*/
138 .endm
140 /*
141 * Must be consistent with the definition in arch-x86_64.h:
142 * struct iret_context {
143 * u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
144 * };
145 * #define VGCF_IN_SYSCALL (1<<8)
146 */
147 .macro HYPERVISOR_IRET flag
148 testb $3,1*8(%rsp)
149 jnz 1f
150 testl $NMI_MASK,2*8(%rsp)
151 jnz 1f
153 /* Direct iret to kernel space. Correct CS and SS. */
154 orb $3,1*8(%rsp)
155 orb $3,4*8(%rsp)
156 iretq
158 1: /* Slow iret via hypervisor. */
159 andl $~NMI_MASK, 16(%rsp)
160 pushq $\flag
161 jmp hypercall_page + (__HYPERVISOR_iret * 32)
162 .endm
164 .macro SWITCH_TO_KERNEL ssoff,adjust=0
165 jc 1f
166 orb $1,\ssoff-\adjust+4(%rsp)
167 1:
168 .endm
170 /*
171 * A newly forked process directly context switches into this.
172 */
173 /* rdi: prev */
174 ENTRY(ret_from_fork)
175 CFI_DEFAULT_STACK
176 call schedule_tail
177 GET_THREAD_INFO(%rcx)
178 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
179 jnz rff_trace
180 rff_action:
181 RESTORE_REST
182 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
183 je int_ret_from_sys_call
184 testl $_TIF_IA32,threadinfo_flags(%rcx)
185 jnz int_ret_from_sys_call
186 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
187 jmp ret_from_sys_call
188 rff_trace:
189 movq %rsp,%rdi
190 call syscall_trace_leave
191 GET_THREAD_INFO(%rcx)
192 jmp rff_action
193 CFI_ENDPROC
195 /*
196 * System call entry. Upto 6 arguments in registers are supported.
197 *
198 * SYSCALL does not save anything on the stack and does not change the
199 * stack pointer.
200 */
202 /*
203 * Register setup:
204 * rax system call number
205 * rdi arg0
206 * rcx return address for syscall/sysret, C arg3
207 * rsi arg1
208 * rdx arg2
209 * r10 arg3 (--> moved to rcx for C)
210 * r8 arg4
211 * r9 arg5
212 * r11 eflags for syscall/sysret, temporary for C
213 * r12-r15,rbp,rbx saved by C code, not touched.
214 *
215 * Interrupts are off on entry.
216 * Only called from user space.
217 *
218 * XXX if we had a free scratch register we could save the RSP into the stack frame
219 * and report it properly in ps. Unfortunately we haven't.
220 */
222 ENTRY(system_call)
223 CFI_STARTPROC simple
224 CFI_DEF_CFA rsp,0
225 CFI_REGISTER rip,rcx
226 /*CFI_REGISTER rflags,r11*/
227 SAVE_ARGS -8,0
228 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
229 XEN_UNBLOCK_EVENTS(%r11)
230 GET_THREAD_INFO(%rcx)
231 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
232 CFI_REMEMBER_STATE
233 jnz tracesys
234 cmpq $__NR_syscall_max,%rax
235 ja badsys
236 movq %r10,%rcx
237 call *sys_call_table(,%rax,8) # XXX: rip relative
238 movq %rax,RAX-ARGOFFSET(%rsp)
239 /*
240 * Syscall return path ending with SYSRET (fast path)
241 * Has incomplete stack frame and undefined top of stack.
242 */
243 .globl ret_from_sys_call
244 ret_from_sys_call:
245 movl $_TIF_ALLWORK_MASK,%edi
246 /* edi: flagmask */
247 sysret_check:
248 GET_THREAD_INFO(%rcx)
249 XEN_BLOCK_EVENTS(%rsi)
250 movl threadinfo_flags(%rcx),%edx
251 andl %edi,%edx
252 CFI_REMEMBER_STATE
253 jnz sysret_careful
254 XEN_UNBLOCK_EVENTS(%rsi)
255 CFI_REGISTER rip,rcx
256 RESTORE_ARGS 0,8,0
257 /*CFI_REGISTER rflags,r11*/
258 HYPERVISOR_IRET VGCF_IN_SYSCALL
260 /* Handle reschedules */
261 /* edx: work, edi: workmask */
262 sysret_careful:
263 CFI_RESTORE_STATE
264 bt $TIF_NEED_RESCHED,%edx
265 jnc sysret_signal
266 XEN_BLOCK_EVENTS(%rsi)
267 pushq %rdi
268 CFI_ADJUST_CFA_OFFSET 8
269 call schedule
270 popq %rdi
271 CFI_ADJUST_CFA_OFFSET -8
272 jmp sysret_check
274 /* Handle a signal */
275 sysret_signal:
276 /* sti */
277 XEN_UNBLOCK_EVENTS(%rsi)
278 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
279 jz 1f
281 /* Really a signal */
282 /* edx: work flags (arg3) */
283 leaq do_notify_resume(%rip),%rax
284 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
285 xorl %esi,%esi # oldset -> arg2
286 call ptregscall_common
287 1: movl $_TIF_NEED_RESCHED,%edi
288 jmp sysret_check
290 badsys:
291 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
292 jmp ret_from_sys_call
294 /* Do syscall tracing */
295 tracesys:
296 CFI_RESTORE_STATE
297 SAVE_REST
298 movq $-ENOSYS,RAX(%rsp)
299 FIXUP_TOP_OF_STACK %rdi
300 movq %rsp,%rdi
301 call syscall_trace_enter
302 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
303 RESTORE_REST
304 cmpq $__NR_syscall_max,%rax
305 ja 1f
306 movq %r10,%rcx /* fixup for C */
307 call *sys_call_table(,%rax,8)
308 movq %rax,RAX-ARGOFFSET(%rsp)
309 1: SAVE_REST
310 movq %rsp,%rdi
311 call syscall_trace_leave
312 RESTORE_TOP_OF_STACK %rbx
313 RESTORE_REST
314 jmp ret_from_sys_call
315 CFI_ENDPROC
317 /*
318 * Syscall return path ending with IRET.
319 * Has correct top of stack, but partial stack frame.
320 */
321 ENTRY(int_ret_from_sys_call)
322 CFI_STARTPROC simple
323 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
324 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
325 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
326 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
327 /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
328 CFI_REL_OFFSET rip,RIP-ARGOFFSET
329 CFI_REL_OFFSET rdx,RDX-ARGOFFSET
330 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
331 CFI_REL_OFFSET rax,RAX-ARGOFFSET
332 CFI_REL_OFFSET rdi,RDI-ARGOFFSET
333 CFI_REL_OFFSET rsi,RSI-ARGOFFSET
334 CFI_REL_OFFSET r8,R8-ARGOFFSET
335 CFI_REL_OFFSET r9,R9-ARGOFFSET
336 CFI_REL_OFFSET r10,R10-ARGOFFSET
337 CFI_REL_OFFSET r11,R11-ARGOFFSET
338 XEN_BLOCK_EVENTS(%rsi)
339 testb $3,CS-ARGOFFSET(%rsp)
340 jnz 1f
341 /* Need to set the proper %ss (not NULL) for ring 3 iretq */
342 movl $__KERNEL_DS,SS-ARGOFFSET(%rsp)
343 jmp retint_restore_args # retrun from ring3 kernel
344 1:
345 movl $_TIF_ALLWORK_MASK,%edi
346 /* edi: mask to check */
347 int_with_check:
348 GET_THREAD_INFO(%rcx)
349 movl threadinfo_flags(%rcx),%edx
350 andl %edi,%edx
351 jnz int_careful
352 andl $~TS_COMPAT,threadinfo_status(%rcx)
353 jmp retint_restore_args
355 /* Either reschedule or signal or syscall exit tracking needed. */
356 /* First do a reschedule test. */
357 /* edx: work, edi: workmask */
358 int_careful:
359 bt $TIF_NEED_RESCHED,%edx
360 jnc int_very_careful
361 /* sti */
362 XEN_UNBLOCK_EVENTS(%rsi)
363 pushq %rdi
364 CFI_ADJUST_CFA_OFFSET 8
365 call schedule
366 popq %rdi
367 CFI_ADJUST_CFA_OFFSET -8
368 cli
369 jmp int_with_check
371 /* handle signals and tracing -- both require a full stack frame */
372 int_very_careful:
373 /* sti */
374 XEN_UNBLOCK_EVENTS(%rsi)
375 SAVE_REST
376 /* Check for syscall exit trace */
377 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
378 jz int_signal
379 pushq %rdi
380 CFI_ADJUST_CFA_OFFSET 8
381 leaq 8(%rsp),%rdi # &ptregs -> arg1
382 call syscall_trace_leave
383 popq %rdi
384 CFI_ADJUST_CFA_OFFSET -8
385 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
386 cli
387 jmp int_restore_rest
389 int_signal:
390 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
391 jz 1f
392 movq %rsp,%rdi # &ptregs -> arg1
393 xorl %esi,%esi # oldset -> arg2
394 call do_notify_resume
395 1: movl $_TIF_NEED_RESCHED,%edi
396 int_restore_rest:
397 RESTORE_REST
398 cli
399 jmp int_with_check
400 CFI_ENDPROC
402 /*
403 * Certain special system calls that need to save a complete full stack frame.
404 */
406 .macro PTREGSCALL label,func,arg
407 .globl \label
408 \label:
409 leaq \func(%rip),%rax
410 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
411 jmp ptregscall_common
412 .endm
414 CFI_STARTPROC
416 PTREGSCALL stub_clone, sys_clone, %r8
417 PTREGSCALL stub_fork, sys_fork, %rdi
418 PTREGSCALL stub_vfork, sys_vfork, %rdi
419 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
420 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
421 PTREGSCALL stub_iopl, sys_iopl, %rsi
423 ENTRY(ptregscall_common)
424 popq %r11
425 CFI_ADJUST_CFA_OFFSET -8
426 CFI_REGISTER rip, r11
427 SAVE_REST
428 movq %r11, %r15
429 CFI_REGISTER rip, r15
430 FIXUP_TOP_OF_STACK %r11
431 call *%rax
432 RESTORE_TOP_OF_STACK %r11
433 movq %r15, %r11
434 CFI_REGISTER rip, r11
435 RESTORE_REST
436 pushq %r11
437 CFI_ADJUST_CFA_OFFSET 8
438 CFI_REL_OFFSET rip, 0
439 ret
440 CFI_ENDPROC
442 ENTRY(stub_execve)
443 CFI_STARTPROC
444 popq %r11
445 CFI_ADJUST_CFA_OFFSET -8
446 CFI_REGISTER rip, r11
447 SAVE_REST
448 movq %r11, %r15
449 CFI_REGISTER rip, r15
450 FIXUP_TOP_OF_STACK %r11
451 call sys_execve
452 GET_THREAD_INFO(%rcx)
453 bt $TIF_IA32,threadinfo_flags(%rcx)
454 CFI_REMEMBER_STATE
455 jc exec_32bit
456 RESTORE_TOP_OF_STACK %r11
457 movq %r15, %r11
458 CFI_REGISTER rip, r11
459 RESTORE_REST
460 pushq %r11
461 CFI_ADJUST_CFA_OFFSET 8
462 CFI_REL_OFFSET rip, 0
463 ret
465 exec_32bit:
466 CFI_RESTORE_STATE
467 movq %rax,RAX(%rsp)
468 RESTORE_REST
469 jmp int_ret_from_sys_call
470 CFI_ENDPROC
472 /*
473 * sigreturn is special because it needs to restore all registers on return.
474 * This cannot be done with SYSRET, so use the IRET return path instead.
475 */
476 ENTRY(stub_rt_sigreturn)
477 CFI_STARTPROC
478 addq $8, %rsp
479 CFI_ADJUST_CFA_OFFSET -8
480 SAVE_REST
481 movq %rsp,%rdi
482 FIXUP_TOP_OF_STACK %r11
483 call sys_rt_sigreturn
484 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
485 RESTORE_REST
486 jmp int_ret_from_sys_call
487 CFI_ENDPROC
489 /*
490 * initial frame state for interrupts and exceptions
491 */
492 .macro _frame ref
493 CFI_STARTPROC simple
494 CFI_DEF_CFA rsp,SS+8-\ref
495 /*CFI_REL_OFFSET ss,SS-\ref*/
496 CFI_REL_OFFSET rsp,RSP-\ref
497 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
498 /*CFI_REL_OFFSET cs,CS-\ref*/
499 CFI_REL_OFFSET rip,RIP-\ref
500 .endm
502 /* initial frame state for interrupts (and exceptions without error code) */
503 #define INTR_FRAME _frame RIP
504 /* initial frame state for exceptions with error code (and interrupts with
505 vector already pushed) */
506 #define XCPT_FRAME _frame ORIG_RAX
508 /*
509 * Interrupt exit.
510 *
511 */
513 retint_check:
514 movl threadinfo_flags(%rcx),%edx
515 andl %edi,%edx
516 CFI_REMEMBER_STATE
517 jnz retint_careful
518 retint_restore_args:
519 movb EVENT_MASK-REST_SKIP(%rsp), %al
520 notb %al # %al == ~saved_mask
521 XEN_GET_VCPU_INFO(%rsi)
522 andb evtchn_upcall_mask(%rsi),%al
523 andb $1,%al # %al == mask & ~saved_mask
524 jnz restore_all_enable_events # != 0 => reenable event delivery
525 XEN_PUT_VCPU_INFO(%rsi)
527 RESTORE_ARGS 0,8,0
528 HYPERVISOR_IRET 0
530 /* edi: workmask, edx: work */
531 retint_careful:
532 CFI_RESTORE_STATE
533 bt $TIF_NEED_RESCHED,%edx
534 jnc retint_signal
535 XEN_UNBLOCK_EVENTS(%rsi)
536 /* sti */
537 pushq %rdi
538 CFI_ADJUST_CFA_OFFSET 8
539 call schedule
540 popq %rdi
541 CFI_ADJUST_CFA_OFFSET -8
542 XEN_BLOCK_EVENTS(%rsi)
543 GET_THREAD_INFO(%rcx)
544 /* cli */
545 jmp retint_check
547 retint_signal:
548 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
549 jz retint_restore_args
550 XEN_UNBLOCK_EVENTS(%rsi)
551 SAVE_REST
552 movq $-1,ORIG_RAX(%rsp)
553 xorl %esi,%esi # oldset
554 movq %rsp,%rdi # &pt_regs
555 call do_notify_resume
556 RESTORE_REST
557 XEN_BLOCK_EVENTS(%rsi)
558 movl $_TIF_NEED_RESCHED,%edi
559 GET_THREAD_INFO(%rcx)
560 jmp retint_check
562 #ifdef CONFIG_PREEMPT
563 /* Returning to kernel space. Check if we need preemption */
564 /* rcx: threadinfo. interrupts off. */
565 .p2align
566 retint_kernel:
567 cmpl $0,threadinfo_preempt_count(%rcx)
568 jnz retint_restore_args
569 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
570 jnc retint_restore_args
571 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
572 jnc retint_restore_args
573 call preempt_schedule_irq
574 jmp retint_kernel /* check again */
575 #endif
576 CFI_ENDPROC
578 /*
579 * APIC interrupts.
580 */
581 .macro apicinterrupt num,func
582 INTR_FRAME
583 pushq $\num-256
584 CFI_ADJUST_CFA_OFFSET 8
585 interrupt \func
586 jmp error_entry
587 CFI_ENDPROC
588 .endm
590 #ifndef CONFIG_XEN
591 ENTRY(thermal_interrupt)
592 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
594 ENTRY(threshold_interrupt)
595 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
597 #ifdef CONFIG_SMP
598 ENTRY(reschedule_interrupt)
599 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
601 .macro INVALIDATE_ENTRY num
602 ENTRY(invalidate_interrupt\num)
603 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
604 .endm
606 INVALIDATE_ENTRY 0
607 INVALIDATE_ENTRY 1
608 INVALIDATE_ENTRY 2
609 INVALIDATE_ENTRY 3
610 INVALIDATE_ENTRY 4
611 INVALIDATE_ENTRY 5
612 INVALIDATE_ENTRY 6
613 INVALIDATE_ENTRY 7
615 ENTRY(call_function_interrupt)
616 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
617 #endif
619 #ifdef CONFIG_X86_LOCAL_APIC
620 ENTRY(apic_timer_interrupt)
621 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
623 ENTRY(error_interrupt)
624 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
626 ENTRY(spurious_interrupt)
627 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
628 #endif
629 #endif /* !CONFIG_XEN */
631 /*
632 * Exception entry points.
633 */
634 .macro zeroentry sym
635 INTR_FRAME
636 movq (%rsp),%rcx
637 movq 8(%rsp),%r11
638 addq $0x10,%rsp /* skip rcx and r11 */
639 pushq $0 /* push error code/oldrax */
640 CFI_ADJUST_CFA_OFFSET 8
641 pushq %rax /* push real oldrax to the rdi slot */
642 CFI_ADJUST_CFA_OFFSET 8
643 leaq \sym(%rip),%rax
644 jmp error_entry
645 CFI_ENDPROC
646 .endm
648 .macro errorentry sym
649 XCPT_FRAME
650 movq (%rsp),%rcx
651 movq 8(%rsp),%r11
652 addq $0x10,%rsp /* rsp points to the error code */
653 pushq %rax
654 CFI_ADJUST_CFA_OFFSET 8
655 leaq \sym(%rip),%rax
656 jmp error_entry
657 CFI_ENDPROC
658 .endm
660 #if 0 /* not XEN */
661 /* error code is on the stack already */
662 /* handle NMI like exceptions that can happen everywhere */
663 .macro paranoidentry sym, ist=0
664 movq (%rsp),%rcx
665 movq 8(%rsp),%r11
666 addq $0x10,%rsp /* skip rcx and r11 */
667 SAVE_ALL
668 cld
669 #if 0 /* not XEN */
670 movl $1,%ebx
671 movl $MSR_GS_BASE,%ecx
672 rdmsr
673 testl %edx,%edx
674 js 1f
675 swapgs
676 xorl %ebx,%ebx
677 1:
678 #endif
679 .if \ist
680 movq %gs:pda_data_offset, %rbp
681 .endif
682 movq %rsp,%rdi
683 movq ORIG_RAX(%rsp),%rsi
684 movq $-1,ORIG_RAX(%rsp)
685 .if \ist
686 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
687 .endif
688 call \sym
689 .if \ist
690 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
691 .endif
692 /* cli */
693 XEN_BLOCK_EVENTS(%rsi)
694 .endm
695 #endif
697 /*
698 * Exception entry point. This expects an error code/orig_rax on the stack
699 * and the exception handler in %rax.
700 */
701 ENTRY(error_entry)
702 _frame RDI
703 /* rdi slot contains rax, oldrax contains error code */
704 cld
705 subq $14*8,%rsp
706 CFI_ADJUST_CFA_OFFSET (14*8)
707 movq %rsi,13*8(%rsp)
708 CFI_REL_OFFSET rsi,RSI
709 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
710 movq %rdx,12*8(%rsp)
711 CFI_REL_OFFSET rdx,RDX
712 movq %rcx,11*8(%rsp)
713 CFI_REL_OFFSET rcx,RCX
714 movq %rsi,10*8(%rsp) /* store rax */
715 CFI_REL_OFFSET rax,RAX
716 movq %r8, 9*8(%rsp)
717 CFI_REL_OFFSET r8,R8
718 movq %r9, 8*8(%rsp)
719 CFI_REL_OFFSET r9,R9
720 movq %r10,7*8(%rsp)
721 CFI_REL_OFFSET r10,R10
722 movq %r11,6*8(%rsp)
723 CFI_REL_OFFSET r11,R11
724 movq %rbx,5*8(%rsp)
725 CFI_REL_OFFSET rbx,RBX
726 movq %rbp,4*8(%rsp)
727 CFI_REL_OFFSET rbp,RBP
728 movq %r12,3*8(%rsp)
729 CFI_REL_OFFSET r12,R12
730 movq %r13,2*8(%rsp)
731 CFI_REL_OFFSET r13,R13
732 movq %r14,1*8(%rsp)
733 CFI_REL_OFFSET r14,R14
734 movq %r15,(%rsp)
735 CFI_REL_OFFSET r15,R15
736 #if 0
737 cmpl $__KERNEL_CS,CS(%rsp)
738 je error_kernelspace
739 #endif
740 error_call_handler:
741 movq %rdi, RDI(%rsp)
742 movq %rsp,%rdi
743 movq ORIG_RAX(%rsp),%rsi # get error code
744 movq $-1,ORIG_RAX(%rsp)
745 call *%rax
746 error_exit:
747 RESTORE_REST
748 /* cli */
749 XEN_BLOCK_EVENTS(%rsi)
750 GET_THREAD_INFO(%rcx)
751 testb $3,CS-ARGOFFSET(%rsp)
752 jz retint_kernel
753 movl threadinfo_flags(%rcx),%edx
754 movl $_TIF_WORK_MASK,%edi
755 andl %edi,%edx
756 jnz retint_careful
757 jmp retint_restore_args
759 error_kernelspace:
760 /*
761 * We need to re-write the logic here because we don't do iretq to
762 * to return to user mode. It's still possible that we get trap/fault
763 * in the kernel (when accessing buffers pointed to by system calls,
764 * for example).
765 *
766 */
767 #if 0
768 incl %ebx
769 /* There are two places in the kernel that can potentially fault with
770 usergs. Handle them here. The exception handlers after
771 iret run with kernel gs again, so don't set the user space flag.
772 B stepping K8s sometimes report an truncated RIP for IRET
773 exceptions returning to compat mode. Check for these here too. */
774 leaq iret_label(%rip),%rbp
775 cmpq %rbp,RIP(%rsp)
776 je error_swapgs
777 movl %ebp,%ebp /* zero extend */
778 cmpq %rbp,RIP(%rsp)
779 je error_swapgs
780 cmpq $gs_change,RIP(%rsp)
781 je error_swapgs
782 jmp error_sti
783 #endif
785 ENTRY(hypervisor_callback)
786 zeroentry do_hypervisor_callback
788 /*
789 * Copied from arch/xen/i386/kernel/entry.S
790 */
791 # A note on the "critical region" in our callback handler.
792 # We want to avoid stacking callback handlers due to events occurring
793 # during handling of the last event. To do this, we keep events disabled
794 # until we've done all processing. HOWEVER, we must enable events before
795 # popping the stack frame (can't be done atomically) and so it would still
796 # be possible to get enough handler activations to overflow the stack.
797 # Although unlikely, bugs of that kind are hard to track down, so we'd
798 # like to avoid the possibility.
799 # So, on entry to the handler we detect whether we interrupted an
800 # existing activation in its critical region -- if so, we pop the current
801 # activation and restart the handler using the previous one.
802 ENTRY(do_hypervisor_callback) # do_hyperviosr_callback(struct *pt_regs)
803 # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
804 # see the correct pointer to the pt_regs
805 movq %rdi, %rsp # we don't return, adjust the stack frame
806 11: movb $0, EVENT_MASK(%rdi)
807 movq %gs:pda_irqstackptr,%rax
808 incl %gs:pda_irqcount
809 cmovzq %rax,%rsp
810 pushq %rdi
811 call evtchn_do_upcall
812 popq %rsp
813 decl %gs:pda_irqcount
814 jmp error_exit
816 #ifdef CONFIG_X86_LOCAL_APIC
817 ENTRY(nmi)
818 zeroentry do_nmi_callback
819 ENTRY(do_nmi_callback)
820 addq $8, %rsp
821 call do_nmi
822 orl $NMI_MASK,EFLAGS(%rsp)
823 RESTORE_REST
824 XEN_BLOCK_EVENTS(%rsi)
825 GET_THREAD_INFO(%rcx)
826 jmp retint_restore_args
827 #endif
829 ALIGN
830 restore_all_enable_events:
831 XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
833 scrit: /**** START OF CRITICAL REGION ****/
834 XEN_TEST_PENDING(%rsi)
835 jnz 14f # process more events if necessary...
836 XEN_PUT_VCPU_INFO(%rsi)
837 RESTORE_ARGS 0,8,0
838 HYPERVISOR_IRET 0
840 14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
841 XEN_PUT_VCPU_INFO(%rsi)
842 SAVE_REST
843 movq %rsp,%rdi # set the argument again
844 jmp 11b
845 ecrit: /**** END OF CRITICAL REGION ****/
846 # At this point, unlike on x86-32, we don't do the fixup to simplify the
847 # code and the stack frame is more complex on x86-64.
848 # When the kernel is interrupted in the critical section, the kernel
849 # will do IRET in that case, and everything will be restored at that point,
850 # i.e. it just resumes from the next instruction interrupted with the same context.
852 # Hypervisor uses this for application faults while it executes.
853 ENTRY(failsafe_callback)
854 addq $0x10,%rsp /* skip rcx and r11 */
855 1: mov (%rsp),%ds
856 2: mov 8(%rsp),%es
857 3: mov 16(%rsp),%fs
858 4: mov 24(%rsp),%gs
859 addq $0x20,%rsp /* skip the above selectors */
860 SAVE_ALL
861 jmp error_exit
862 .section .fixup,"ax"; \
863 6: movq $0,(%rsp); \
864 jmp 1b; \
865 7: movq $0,8(%rsp); \
866 jmp 2b; \
867 8: movq $0,16(%rsp); \
868 jmp 3b; \
869 9: movq $0,24(%rsp); \
870 jmp 4b; \
871 .previous; \
872 .section __ex_table,"a";\
873 .align 16; \
874 .quad 1b,6b; \
875 .quad 2b,7b; \
876 .quad 3b,8b; \
877 .quad 4b,9b; \
878 .previous
880 #if 0
881 .section __ex_table,"a"
882 .align 8
883 .quad gs_change,bad_gs
884 .previous
885 .section .fixup,"ax"
886 /* running with kernelgs */
887 bad_gs:
888 /* swapgs */ /* switch back to user gs */
889 xorl %eax,%eax
890 movl %eax,%gs
891 jmp 2b
892 .previous
893 #endif
895 /*
896 * Create a kernel thread.
897 *
898 * C extern interface:
899 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
900 *
901 * asm input arguments:
902 * rdi: fn, rsi: arg, rdx: flags
903 */
904 ENTRY(kernel_thread)
905 CFI_STARTPROC
906 FAKE_STACK_FRAME $child_rip
907 SAVE_ALL
909 # rdi: flags, rsi: usp, rdx: will be &pt_regs
910 movq %rdx,%rdi
911 orq kernel_thread_flags(%rip),%rdi
912 movq $-1, %rsi
913 movq %rsp, %rdx
915 xorl %r8d,%r8d
916 xorl %r9d,%r9d
918 # clone now
919 call do_fork
920 movq %rax,RAX(%rsp)
921 xorl %edi,%edi
923 /*
924 * It isn't worth to check for reschedule here,
925 * so internally to the x86_64 port you can rely on kernel_thread()
926 * not to reschedule the child before returning, this avoids the need
927 * of hacks for example to fork off the per-CPU idle tasks.
928 * [Hopefully no generic code relies on the reschedule -AK]
929 */
930 RESTORE_ALL
931 UNFAKE_STACK_FRAME
932 ret
933 CFI_ENDPROC
936 child_rip:
937 /*
938 * Here we are in the child and the registers are set as they were
939 * at kernel_thread() invocation in the parent.
940 */
941 movq %rdi, %rax
942 movq %rsi, %rdi
943 call *%rax
944 # exit
945 xorl %edi, %edi
946 call do_exit
948 /*
949 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
950 *
951 * C extern interface:
952 * extern long execve(char *name, char **argv, char **envp)
953 *
954 * asm input arguments:
955 * rdi: name, rsi: argv, rdx: envp
956 *
957 * We want to fallback into:
958 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
959 *
960 * do_sys_execve asm fallback arguments:
961 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
962 */
963 ENTRY(execve)
964 CFI_STARTPROC
965 FAKE_STACK_FRAME $0
966 SAVE_ALL
967 call sys_execve
968 movq %rax, RAX(%rsp)
969 RESTORE_REST
970 testq %rax,%rax
971 jne 1f
972 jmp int_ret_from_sys_call
973 1: RESTORE_ARGS
974 UNFAKE_STACK_FRAME
975 ret
976 CFI_ENDPROC
978 KPROBE_ENTRY(page_fault)
979 errorentry do_page_fault
980 .previous .text
982 ENTRY(coprocessor_error)
983 zeroentry do_coprocessor_error
985 ENTRY(simd_coprocessor_error)
986 zeroentry do_simd_coprocessor_error
988 ENTRY(device_not_available)
989 zeroentry math_state_restore
991 /* runs on exception stack */
992 KPROBE_ENTRY(debug)
993 INTR_FRAME
994 /* pushq $0
995 CFI_ADJUST_CFA_OFFSET 8 */
996 zeroentry do_debug
997 /* jmp paranoid_exit */
998 CFI_ENDPROC
999 .previous .text
1001 #if 0
1002 /* runs on exception stack */
1003 KPROBE_ENTRY(nmi)
1004 INTR_FRAME
1005 pushq $-1
1006 CFI_ADJUST_CFA_OFFSET 8
1007 paranoidentry do_nmi
1008 /*
1009 * "Paranoid" exit path from exception stack.
1010 * Paranoid because this is used by NMIs and cannot take
1011 * any kernel state for granted.
1012 * We don't do kernel preemption checks here, because only
1013 * NMI should be common and it does not enable IRQs and
1014 * cannot get reschedule ticks.
1015 */
1016 /* ebx: no swapgs flag */
1017 paranoid_exit:
1018 testl %ebx,%ebx /* swapgs needed? */
1019 jnz paranoid_restore
1020 testl $3,CS(%rsp)
1021 jnz paranoid_userspace
1022 paranoid_swapgs:
1023 swapgs
1024 paranoid_restore:
1025 RESTORE_ALL 8
1026 iretq
1027 paranoid_userspace:
1028 GET_THREAD_INFO(%rcx)
1029 movl threadinfo_flags(%rcx),%ebx
1030 andl $_TIF_WORK_MASK,%ebx
1031 jz paranoid_swapgs
1032 movq %rsp,%rdi /* &pt_regs */
1033 call sync_regs
1034 movq %rax,%rsp /* switch stack for scheduling */
1035 testl $_TIF_NEED_RESCHED,%ebx
1036 jnz paranoid_schedule
1037 movl %ebx,%edx /* arg3: thread flags */
1038 sti
1039 xorl %esi,%esi /* arg2: oldset */
1040 movq %rsp,%rdi /* arg1: &pt_regs */
1041 call do_notify_resume
1042 cli
1043 jmp paranoid_userspace
1044 paranoid_schedule:
1045 sti
1046 call schedule
1047 cli
1048 jmp paranoid_userspace
1049 CFI_ENDPROC
1050 .previous .text
1051 #endif
1053 KPROBE_ENTRY(int3)
1054 INTR_FRAME
1055 /* pushq $0
1056 CFI_ADJUST_CFA_OFFSET 8 */
1057 zeroentry do_int3
1058 /* jmp paranoid_exit */
1059 CFI_ENDPROC
1060 .previous .text
1062 ENTRY(overflow)
1063 zeroentry do_overflow
1065 ENTRY(bounds)
1066 zeroentry do_bounds
1068 ENTRY(invalid_op)
1069 zeroentry do_invalid_op
1071 ENTRY(coprocessor_segment_overrun)
1072 zeroentry do_coprocessor_segment_overrun
1074 ENTRY(reserved)
1075 zeroentry do_reserved
1077 #if 0
1078 /* runs on exception stack */
1079 ENTRY(double_fault)
1080 XCPT_FRAME
1081 paranoidentry do_double_fault
1082 jmp paranoid_exit
1083 CFI_ENDPROC
1084 #endif
1086 ENTRY(invalid_TSS)
1087 errorentry do_invalid_TSS
1089 ENTRY(segment_not_present)
1090 errorentry do_segment_not_present
1092 /* runs on exception stack */
1093 ENTRY(stack_segment)
1094 XCPT_FRAME
1095 errorentry do_stack_segment
1096 CFI_ENDPROC
1098 KPROBE_ENTRY(general_protection)
1099 errorentry do_general_protection
1100 .previous .text
1102 ENTRY(alignment_check)
1103 errorentry do_alignment_check
1105 ENTRY(divide_error)
1106 zeroentry do_divide_error
1108 ENTRY(spurious_interrupt_bug)
1109 zeroentry do_spurious_interrupt_bug
1111 #ifdef CONFIG_X86_MCE
1112 /* runs on exception stack */
1113 ENTRY(machine_check)
1114 INTR_FRAME
1115 pushq $0
1116 CFI_ADJUST_CFA_OFFSET 8
1117 paranoidentry do_machine_check
1118 jmp paranoid_exit
1119 CFI_ENDPROC
1120 #endif
1122 ENTRY(call_softirq)
1123 CFI_STARTPROC
1124 movq %gs:pda_irqstackptr,%rax
1125 movq %rsp,%rdx
1126 CFI_DEF_CFA_REGISTER rdx
1127 incl %gs:pda_irqcount
1128 cmove %rax,%rsp
1129 pushq %rdx
1130 /*todo CFI_DEF_CFA_EXPRESSION ...*/
1131 call __do_softirq
1132 popq %rsp
1133 CFI_DEF_CFA_REGISTER rsp
1134 decl %gs:pda_irqcount
1135 ret
1136 CFI_ENDPROC