ia64/linux-2.6.18-xen.hg

view arch/i386/kernel/entry-xen.S @ 870:9b9454800544

xen/i386: hypervisor_callback adjustments

The missing check of the interrupted code's code selector in
hypervisor_callback() allowed a user mode application to oops (and
perhaps crash) the kernel.

Further adjustments:
- the 'main' critical region does not include the jmp following the
disabling of interrupts
- the sysexit_[se]crit range checks got broken at some point - the
sysexit ciritcal region is always at higher addresses than the
'main'
one, yielding the check pointless (but consuming execution time);
since the supervisor mode kernel isn't actively used afaict, I moved
that code into an #ifdef using a hypothetical config option
- the use of a numeric label across more than 300 lines of code always
seemed pretty fragile to me, so the patch replaces this with a local
named label
- streamlined the critical_region_fixup code to eliminate a branch

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu May 14 10:08:40 2009 +0100 (2009-05-14)
parents 271d9b9bee40
children
line source
1 /*
2 * linux/arch/i386/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
7 /*
8 * entry.S contains the system-call and fault low-level handling routines.
9 * This also contains the timer-interrupt handler, as well as all interrupts
10 * and faults that can result in a task-switch.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after a timer-interrupt and after each system call.
14 *
15 * I changed all the .align's to 4 (16 byte alignment), as that's faster
16 * on a 486.
17 *
18 * Stack layout in 'ret_from_system_call':
19 * ptrace needs to have all regs on the stack.
20 * if the order here is changed, it needs to be
21 * updated in fork.c:copy_process, signal.c:do_signal,
22 * ptrace.c and ptrace.h
23 *
24 * 0(%esp) - %ebx
25 * 4(%esp) - %ecx
26 * 8(%esp) - %edx
27 * C(%esp) - %esi
28 * 10(%esp) - %edi
29 * 14(%esp) - %ebp
30 * 18(%esp) - %eax
31 * 1C(%esp) - %ds
32 * 20(%esp) - %es
33 * 24(%esp) - orig_eax
34 * 28(%esp) - %eip
35 * 2C(%esp) - %cs
36 * 30(%esp) - %eflags
37 * 34(%esp) - %oldesp
38 * 38(%esp) - %oldss
39 *
40 * "current" is in register %ebx during any slow entries.
41 */
43 #include <linux/linkage.h>
44 #include <asm/thread_info.h>
45 #include <asm/irqflags.h>
46 #include <asm/errno.h>
47 #include <asm/segment.h>
48 #include <asm/smp.h>
49 #include <asm/page.h>
50 #include <asm/desc.h>
51 #include <asm/dwarf2.h>
52 #include "irq_vectors.h"
53 #include <xen/interface/xen.h>
55 #define nr_syscalls ((syscall_table_size)/4)
57 EBX = 0x00
58 ECX = 0x04
59 EDX = 0x08
60 ESI = 0x0C
61 EDI = 0x10
62 EBP = 0x14
63 EAX = 0x18
64 DS = 0x1C
65 ES = 0x20
66 ORIG_EAX = 0x24
67 EIP = 0x28
68 CS = 0x2C
69 EFLAGS = 0x30
70 OLDESP = 0x34
71 OLDSS = 0x38
73 CF_MASK = 0x00000001
74 TF_MASK = 0x00000100
75 IF_MASK = 0x00000200
76 DF_MASK = 0x00000400
77 NT_MASK = 0x00004000
78 VM_MASK = 0x00020000
79 /* Pseudo-eflags. */
80 NMI_MASK = 0x80000000
82 #ifndef CONFIG_XEN
83 #define DISABLE_INTERRUPTS cli
84 #define ENABLE_INTERRUPTS sti
85 #else
86 /* Offsets into shared_info_t. */
87 #define evtchn_upcall_pending /* 0 */
88 #define evtchn_upcall_mask 1
90 #define sizeof_vcpu_shift 6
92 #ifdef CONFIG_SMP
93 #define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \
94 shl $sizeof_vcpu_shift,%esi ; \
95 addl HYPERVISOR_shared_info,%esi
96 #else
97 #define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi
98 #endif
100 #define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
101 #define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
102 #define DISABLE_INTERRUPTS GET_VCPU_INFO ; \
103 __DISABLE_INTERRUPTS
104 #define ENABLE_INTERRUPTS GET_VCPU_INFO ; \
105 __ENABLE_INTERRUPTS
106 #define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
107 #endif
109 #ifdef CONFIG_PREEMPT
110 #define preempt_stop cli; TRACE_IRQS_OFF
111 #else
112 #define preempt_stop
113 #define resume_kernel restore_nocheck
114 #endif
116 .macro TRACE_IRQS_IRET
117 #ifdef CONFIG_TRACE_IRQFLAGS
118 testl $IF_MASK,EFLAGS(%esp) # interrupts off?
119 jz 1f
120 TRACE_IRQS_ON
121 1:
122 #endif
123 .endm
125 #ifdef CONFIG_VM86
126 #define resume_userspace_sig check_userspace
127 #else
128 #define resume_userspace_sig resume_userspace
129 #endif
131 #define SAVE_ALL \
132 cld; \
133 pushl %es; \
134 CFI_ADJUST_CFA_OFFSET 4;\
135 /*CFI_REL_OFFSET es, 0;*/\
136 pushl %ds; \
137 CFI_ADJUST_CFA_OFFSET 4;\
138 /*CFI_REL_OFFSET ds, 0;*/\
139 pushl %eax; \
140 CFI_ADJUST_CFA_OFFSET 4;\
141 CFI_REL_OFFSET eax, 0;\
142 pushl %ebp; \
143 CFI_ADJUST_CFA_OFFSET 4;\
144 CFI_REL_OFFSET ebp, 0;\
145 pushl %edi; \
146 CFI_ADJUST_CFA_OFFSET 4;\
147 CFI_REL_OFFSET edi, 0;\
148 pushl %esi; \
149 CFI_ADJUST_CFA_OFFSET 4;\
150 CFI_REL_OFFSET esi, 0;\
151 pushl %edx; \
152 CFI_ADJUST_CFA_OFFSET 4;\
153 CFI_REL_OFFSET edx, 0;\
154 pushl %ecx; \
155 CFI_ADJUST_CFA_OFFSET 4;\
156 CFI_REL_OFFSET ecx, 0;\
157 pushl %ebx; \
158 CFI_ADJUST_CFA_OFFSET 4;\
159 CFI_REL_OFFSET ebx, 0;\
160 movl $(__USER_DS), %edx; \
161 movl %edx, %ds; \
162 movl %edx, %es;
164 #define RESTORE_INT_REGS \
165 popl %ebx; \
166 CFI_ADJUST_CFA_OFFSET -4;\
167 CFI_RESTORE ebx;\
168 popl %ecx; \
169 CFI_ADJUST_CFA_OFFSET -4;\
170 CFI_RESTORE ecx;\
171 popl %edx; \
172 CFI_ADJUST_CFA_OFFSET -4;\
173 CFI_RESTORE edx;\
174 popl %esi; \
175 CFI_ADJUST_CFA_OFFSET -4;\
176 CFI_RESTORE esi;\
177 popl %edi; \
178 CFI_ADJUST_CFA_OFFSET -4;\
179 CFI_RESTORE edi;\
180 popl %ebp; \
181 CFI_ADJUST_CFA_OFFSET -4;\
182 CFI_RESTORE ebp;\
183 popl %eax; \
184 CFI_ADJUST_CFA_OFFSET -4;\
185 CFI_RESTORE eax
187 #define RESTORE_REGS \
188 RESTORE_INT_REGS; \
189 1: popl %ds; \
190 CFI_ADJUST_CFA_OFFSET -4;\
191 /*CFI_RESTORE ds;*/\
192 2: popl %es; \
193 CFI_ADJUST_CFA_OFFSET -4;\
194 /*CFI_RESTORE es;*/\
195 .section .fixup,"ax"; \
196 3: movl $0,(%esp); \
197 jmp 1b; \
198 4: movl $0,(%esp); \
199 jmp 2b; \
200 .previous; \
201 .section __ex_table,"a";\
202 .align 4; \
203 .long 1b,3b; \
204 .long 2b,4b; \
205 .previous
207 #define RING0_INT_FRAME \
208 CFI_STARTPROC simple;\
209 CFI_DEF_CFA esp, 3*4;\
210 /*CFI_OFFSET cs, -2*4;*/\
211 CFI_OFFSET eip, -3*4
213 #define RING0_EC_FRAME \
214 CFI_STARTPROC simple;\
215 CFI_DEF_CFA esp, 4*4;\
216 /*CFI_OFFSET cs, -2*4;*/\
217 CFI_OFFSET eip, -3*4
219 #define RING0_PTREGS_FRAME \
220 CFI_STARTPROC simple;\
221 CFI_DEF_CFA esp, OLDESP-EBX;\
222 /*CFI_OFFSET cs, CS-OLDESP;*/\
223 CFI_OFFSET eip, EIP-OLDESP;\
224 /*CFI_OFFSET es, ES-OLDESP;*/\
225 /*CFI_OFFSET ds, DS-OLDESP;*/\
226 CFI_OFFSET eax, EAX-OLDESP;\
227 CFI_OFFSET ebp, EBP-OLDESP;\
228 CFI_OFFSET edi, EDI-OLDESP;\
229 CFI_OFFSET esi, ESI-OLDESP;\
230 CFI_OFFSET edx, EDX-OLDESP;\
231 CFI_OFFSET ecx, ECX-OLDESP;\
232 CFI_OFFSET ebx, EBX-OLDESP
234 ENTRY(ret_from_fork)
235 CFI_STARTPROC
236 pushl %eax
237 CFI_ADJUST_CFA_OFFSET 4
238 call schedule_tail
239 GET_THREAD_INFO(%ebp)
240 popl %eax
241 CFI_ADJUST_CFA_OFFSET -4
242 pushl $0x0202 # Reset kernel eflags
243 CFI_ADJUST_CFA_OFFSET 4
244 popfl
245 CFI_ADJUST_CFA_OFFSET -4
246 jmp syscall_exit
247 CFI_ENDPROC
249 /*
250 * Return to user mode is not as complex as all this looks,
251 * but we want the default path for a system call return to
252 * go as quickly as possible which is why some of this is
253 * less clear than it otherwise should be.
254 */
256 # userspace resumption stub bypassing syscall exit tracing
257 ALIGN
258 RING0_PTREGS_FRAME
259 ret_from_exception:
260 preempt_stop
261 ret_from_intr:
262 GET_THREAD_INFO(%ebp)
263 check_userspace:
264 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
265 movb CS(%esp), %al
266 testl $(VM_MASK | 2), %eax
267 jz resume_kernel
268 ENTRY(resume_userspace)
269 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
270 # setting need_resched or sigpending
271 # between sampling and the iret
272 movl TI_flags(%ebp), %ecx
273 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
274 # int/exception return?
275 jne work_pending
276 jmp restore_all
278 #ifdef CONFIG_PREEMPT
279 ENTRY(resume_kernel)
280 cli
281 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
282 jnz restore_nocheck
283 need_resched:
284 movl TI_flags(%ebp), %ecx # need_resched set ?
285 testb $_TIF_NEED_RESCHED, %cl
286 jz restore_all
287 testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
288 jz restore_all
289 call preempt_schedule_irq
290 jmp need_resched
291 #endif
292 CFI_ENDPROC
294 /* SYSENTER_RETURN points to after the "sysenter" instruction in
295 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
297 # sysenter call handler stub
298 ENTRY(sysenter_entry)
299 CFI_STARTPROC simple
300 CFI_DEF_CFA esp, 0
301 CFI_REGISTER esp, ebp
302 movl SYSENTER_stack_esp0(%esp),%esp
303 sysenter_past_esp:
304 /*
305 * No need to follow this irqs on/off section: the syscall
306 * disabled irqs and here we enable it straight after entry:
307 */
308 sti
309 pushl $(__USER_DS)
310 CFI_ADJUST_CFA_OFFSET 4
311 /*CFI_REL_OFFSET ss, 0*/
312 pushl %ebp
313 CFI_ADJUST_CFA_OFFSET 4
314 CFI_REL_OFFSET esp, 0
315 pushfl
316 CFI_ADJUST_CFA_OFFSET 4
317 pushl $(__USER_CS)
318 CFI_ADJUST_CFA_OFFSET 4
319 /*CFI_REL_OFFSET cs, 0*/
320 /*
321 * Push current_thread_info()->sysenter_return to the stack.
322 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
323 * pushed above; +8 corresponds to copy_thread's esp0 setting.
324 */
325 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
326 CFI_ADJUST_CFA_OFFSET 4
327 CFI_REL_OFFSET eip, 0
329 /*
330 * Load the potential sixth argument from user stack.
331 * Careful about security.
332 */
333 cmpl $__PAGE_OFFSET-3,%ebp
334 jae syscall_fault
335 1: movl (%ebp),%ebp
336 .section __ex_table,"a"
337 .align 4
338 .long 1b,syscall_fault
339 .previous
341 pushl %eax
342 CFI_ADJUST_CFA_OFFSET 4
343 SAVE_ALL
344 GET_THREAD_INFO(%ebp)
346 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
347 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
348 jnz syscall_trace_entry
349 cmpl $(nr_syscalls), %eax
350 jae syscall_badsys
351 call *sys_call_table(,%eax,4)
352 movl %eax,EAX(%esp)
353 DISABLE_INTERRUPTS
354 TRACE_IRQS_OFF
355 movl TI_flags(%ebp), %ecx
356 testw $_TIF_ALLWORK_MASK, %cx
357 jne syscall_exit_work
358 /* if something modifies registers it must also disable sysexit */
359 movl EIP(%esp), %edx
360 movl OLDESP(%esp), %ecx
361 xorl %ebp,%ebp
362 #ifdef CONFIG_XEN
363 TRACE_IRQS_ON
364 __ENABLE_INTERRUPTS
365 sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/
366 __TEST_PENDING
367 jnz 14f # process more events if necessary...
368 movl ESI(%esp), %esi
369 sysexit
370 14: __DISABLE_INTERRUPTS
371 TRACE_IRQS_OFF
372 sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
373 push %esp
374 call evtchn_do_upcall
375 add $4,%esp
376 jmp ret_from_intr
377 #else
378 TRACE_IRQS_ON
379 sti
380 sysexit
381 #endif /* !CONFIG_XEN */
382 CFI_ENDPROC
384 # pv sysenter call handler stub
385 ENTRY(sysenter_entry_pv)
386 RING0_INT_FRAME
387 movl $__USER_DS,16(%esp)
388 movl %ebp,12(%esp)
389 movl $__USER_CS,4(%esp)
390 addl $4,%esp
391 CFI_ADJUST_CFA_OFFSET -4
392 /* +5*4 is SS:ESP,EFLAGS,CS:EIP. +8 is esp0 setting. */
393 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
394 CFI_ADJUST_CFA_OFFSET 4
395 /*
396 * Load the potential sixth argument from user stack.
397 * Careful about security.
398 */
399 cmpl $__PAGE_OFFSET-3,%ebp
400 jae syscall_fault
401 1: movl (%ebp),%ebp
402 .section __ex_table,"a"
403 .align 4
404 .long 1b,syscall_fault
405 .previous
406 /* fall through */
407 CFI_ENDPROC
408 ENDPROC(sysenter_entry_pv)
410 # system call handler stub
411 ENTRY(system_call)
412 RING0_INT_FRAME # can't unwind into user space anyway
413 pushl %eax # save orig_eax
414 CFI_ADJUST_CFA_OFFSET 4
415 SAVE_ALL
416 GET_THREAD_INFO(%ebp)
417 testl $TF_MASK,EFLAGS(%esp)
418 jz no_singlestep
419 orl $_TIF_SINGLESTEP,TI_flags(%ebp)
420 no_singlestep:
421 # system call tracing in operation / emulation
422 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
423 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
424 jnz syscall_trace_entry
425 cmpl $(nr_syscalls), %eax
426 jae syscall_badsys
427 syscall_call:
428 call *sys_call_table(,%eax,4)
429 movl %eax,EAX(%esp) # store the return value
430 syscall_exit:
431 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
432 # setting need_resched or sigpending
433 # between sampling and the iret
434 TRACE_IRQS_OFF
435 movl TI_flags(%ebp), %ecx
436 testw $_TIF_ALLWORK_MASK, %cx # current->work
437 jne syscall_exit_work
439 restore_all:
440 #ifndef CONFIG_XEN
441 movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
442 # Warning: OLDSS(%esp) contains the wrong/random values if we
443 # are returning to the kernel.
444 # See comments in process.c:copy_thread() for details.
445 movb OLDSS(%esp), %ah
446 movb CS(%esp), %al
447 andl $(VM_MASK | (4 << 8) | 3), %eax
448 cmpl $((4 << 8) | 3), %eax
449 CFI_REMEMBER_STATE
450 je ldt_ss # returning to user-space with LDT SS
451 restore_nocheck:
452 #else
453 restore_nocheck:
454 movl EFLAGS(%esp), %eax
455 testl $(VM_MASK|NMI_MASK), %eax
456 CFI_REMEMBER_STATE
457 jnz hypervisor_iret
458 shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
459 GET_VCPU_INFO
460 andb evtchn_upcall_mask(%esi),%al
461 andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask
462 CFI_REMEMBER_STATE
463 jnz restore_all_enable_events # != 0 => enable event delivery
464 #endif
465 TRACE_IRQS_IRET
466 restore_nocheck_notrace:
467 RESTORE_REGS
468 addl $4, %esp
469 CFI_ADJUST_CFA_OFFSET -4
470 1: iret
471 .section .fixup,"ax"
472 iret_exc:
473 #ifndef CONFIG_XEN
474 TRACE_IRQS_ON
475 sti
476 #endif
477 pushl $0 # no error code
478 pushl $do_iret_error
479 jmp error_code
480 .previous
481 .section __ex_table,"a"
482 .align 4
483 .long 1b,iret_exc
484 .previous
486 CFI_RESTORE_STATE
487 #ifndef CONFIG_XEN
488 ldt_ss:
489 larl OLDSS(%esp), %eax
490 jnz restore_nocheck
491 testl $0x00400000, %eax # returning to 32bit stack?
492 jnz restore_nocheck # allright, normal return
493 /* If returning to userspace with 16bit stack,
494 * try to fix the higher word of ESP, as the CPU
495 * won't restore it.
496 * This is an "official" bug of all the x86-compatible
497 * CPUs, which we can try to work around to make
498 * dosemu and wine happy. */
499 subl $8, %esp # reserve space for switch16 pointer
500 CFI_ADJUST_CFA_OFFSET 8
501 cli
502 TRACE_IRQS_OFF
503 movl %esp, %eax
504 /* Set up the 16bit stack frame with switch32 pointer on top,
505 * and a switch16 pointer on top of the current frame. */
506 call setup_x86_bogus_stack
507 CFI_ADJUST_CFA_OFFSET -8 # frame has moved
508 TRACE_IRQS_IRET
509 RESTORE_REGS
510 lss 20+4(%esp), %esp # switch to 16bit stack
511 1: iret
512 .section __ex_table,"a"
513 .align 4
514 .long 1b,iret_exc
515 .previous
516 #else
517 ALIGN
518 restore_all_enable_events:
519 TRACE_IRQS_ON
520 __ENABLE_INTERRUPTS
521 scrit: /**** START OF CRITICAL REGION ****/
522 __TEST_PENDING
523 jnz 14f # process more events if necessary...
524 RESTORE_REGS
525 addl $4, %esp
526 CFI_ADJUST_CFA_OFFSET -4
527 1: iret
528 .section __ex_table,"a"
529 .align 4
530 .long 1b,iret_exc
531 .previous
532 14: __DISABLE_INTERRUPTS
533 TRACE_IRQS_OFF
534 ecrit: /**** END OF CRITICAL REGION ****/
535 jmp .Ldo_upcall
537 CFI_RESTORE_STATE
538 hypervisor_iret:
539 andl $~NMI_MASK, EFLAGS(%esp)
540 RESTORE_REGS
541 addl $4, %esp
542 CFI_ADJUST_CFA_OFFSET -4
543 jmp hypercall_page + (__HYPERVISOR_iret * 32)
544 #endif
545 CFI_ENDPROC
547 # perform work that needs to be done immediately before resumption
548 ALIGN
549 RING0_PTREGS_FRAME # can't unwind into user space anyway
550 work_pending:
551 testb $_TIF_NEED_RESCHED, %cl
552 jz work_notifysig
553 work_resched:
554 call schedule
555 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
556 # setting need_resched or sigpending
557 # between sampling and the iret
558 TRACE_IRQS_OFF
559 movl TI_flags(%ebp), %ecx
560 andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
561 # than syscall tracing?
562 jz restore_all
563 testb $_TIF_NEED_RESCHED, %cl
564 jnz work_resched
566 work_notifysig: # deal with pending signals and
567 # notify-resume requests
568 testl $VM_MASK, EFLAGS(%esp)
569 movl %esp, %eax
570 jne work_notifysig_v86 # returning to kernel-space or
571 # vm86-space
572 xorl %edx, %edx
573 call do_notify_resume
574 jmp resume_userspace_sig
576 ALIGN
577 work_notifysig_v86:
578 #ifdef CONFIG_VM86
579 pushl %ecx # save ti_flags for do_notify_resume
580 CFI_ADJUST_CFA_OFFSET 4
581 call save_v86_state # %eax contains pt_regs pointer
582 popl %ecx
583 CFI_ADJUST_CFA_OFFSET -4
584 movl %eax, %esp
585 xorl %edx, %edx
586 call do_notify_resume
587 jmp resume_userspace_sig
588 #endif
590 # perform syscall exit tracing
591 ALIGN
592 syscall_trace_entry:
593 movl $-ENOSYS,EAX(%esp)
594 movl %esp, %eax
595 xorl %edx,%edx
596 call do_syscall_trace
597 cmpl $0, %eax
598 jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
599 # so must skip actual syscall
600 movl ORIG_EAX(%esp), %eax
601 cmpl $(nr_syscalls), %eax
602 jnae syscall_call
603 jmp syscall_exit
605 # perform syscall exit tracing
606 ALIGN
607 syscall_exit_work:
608 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
609 jz work_pending
610 TRACE_IRQS_ON
611 ENABLE_INTERRUPTS # could let do_syscall_trace() call
612 # schedule() instead
613 movl %esp, %eax
614 movl $1, %edx
615 call do_syscall_trace
616 jmp resume_userspace
617 CFI_ENDPROC
619 RING0_INT_FRAME # can't unwind into user space anyway
620 syscall_fault:
621 pushl %eax # save orig_eax
622 CFI_ADJUST_CFA_OFFSET 4
623 SAVE_ALL
624 GET_THREAD_INFO(%ebp)
625 movl $-EFAULT,EAX(%esp)
626 jmp resume_userspace
628 syscall_badsys:
629 movl $-ENOSYS,EAX(%esp)
630 jmp resume_userspace
631 CFI_ENDPROC
633 #ifndef CONFIG_XEN
634 #define FIXUP_ESPFIX_STACK \
635 movl %esp, %eax; \
636 /* switch to 32bit stack using the pointer on top of 16bit stack */ \
637 lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
638 /* copy data from 16bit stack to 32bit stack */ \
639 call fixup_x86_bogus_stack; \
640 /* put ESP to the proper location */ \
641 movl %eax, %esp;
642 #define UNWIND_ESPFIX_STACK \
643 pushl %eax; \
644 CFI_ADJUST_CFA_OFFSET 4; \
645 movl %ss, %eax; \
646 /* see if on 16bit stack */ \
647 cmpw $__ESPFIX_SS, %ax; \
648 je 28f; \
649 27: popl %eax; \
650 CFI_ADJUST_CFA_OFFSET -4; \
651 .section .fixup,"ax"; \
652 28: movl $__KERNEL_DS, %eax; \
653 movl %eax, %ds; \
654 movl %eax, %es; \
655 /* switch to 32bit stack */ \
656 FIXUP_ESPFIX_STACK; \
657 jmp 27b; \
658 .previous
660 /*
661 * Build the entry stubs and pointer table with
662 * some assembler magic.
663 */
664 .data
665 ENTRY(interrupt)
666 .text
668 vector=0
669 ENTRY(irq_entries_start)
670 RING0_INT_FRAME
671 .rept NR_IRQS
672 ALIGN
673 .if vector
674 CFI_ADJUST_CFA_OFFSET -4
675 .endif
676 1: pushl $~(vector)
677 CFI_ADJUST_CFA_OFFSET 4
678 jmp common_interrupt
679 .data
680 .long 1b
681 .text
682 vector=vector+1
683 .endr
685 /*
686 * the CPU automatically disables interrupts when executing an IRQ vector,
687 * so IRQ-flags tracing has to follow that:
688 */
689 ALIGN
690 common_interrupt:
691 SAVE_ALL
692 TRACE_IRQS_OFF
693 movl %esp,%eax
694 call do_IRQ
695 jmp ret_from_intr
696 CFI_ENDPROC
698 #define BUILD_INTERRUPT(name, nr) \
699 ENTRY(name) \
700 RING0_INT_FRAME; \
701 pushl $~(nr); \
702 CFI_ADJUST_CFA_OFFSET 4; \
703 SAVE_ALL; \
704 TRACE_IRQS_OFF \
705 movl %esp,%eax; \
706 call smp_/**/name; \
707 jmp ret_from_intr; \
708 CFI_ENDPROC
710 /* The include is where all of the SMP etc. interrupts come from */
711 #include "entry_arch.h"
712 #else
713 #define UNWIND_ESPFIX_STACK
714 #endif
716 ENTRY(divide_error)
717 RING0_INT_FRAME
718 pushl $0 # no error code
719 CFI_ADJUST_CFA_OFFSET 4
720 pushl $do_divide_error
721 CFI_ADJUST_CFA_OFFSET 4
722 ALIGN
723 error_code:
724 pushl %ds
725 CFI_ADJUST_CFA_OFFSET 4
726 /*CFI_REL_OFFSET ds, 0*/
727 pushl %eax
728 CFI_ADJUST_CFA_OFFSET 4
729 CFI_REL_OFFSET eax, 0
730 xorl %eax, %eax
731 pushl %ebp
732 CFI_ADJUST_CFA_OFFSET 4
733 CFI_REL_OFFSET ebp, 0
734 pushl %edi
735 CFI_ADJUST_CFA_OFFSET 4
736 CFI_REL_OFFSET edi, 0
737 pushl %esi
738 CFI_ADJUST_CFA_OFFSET 4
739 CFI_REL_OFFSET esi, 0
740 pushl %edx
741 CFI_ADJUST_CFA_OFFSET 4
742 CFI_REL_OFFSET edx, 0
743 decl %eax # eax = -1
744 pushl %ecx
745 CFI_ADJUST_CFA_OFFSET 4
746 CFI_REL_OFFSET ecx, 0
747 pushl %ebx
748 CFI_ADJUST_CFA_OFFSET 4
749 CFI_REL_OFFSET ebx, 0
750 cld
751 pushl %es
752 CFI_ADJUST_CFA_OFFSET 4
753 /*CFI_REL_OFFSET es, 0*/
754 UNWIND_ESPFIX_STACK
755 popl %ecx
756 CFI_ADJUST_CFA_OFFSET -4
757 /*CFI_REGISTER es, ecx*/
758 movl ES(%esp), %edi # get the function address
759 movl ORIG_EAX(%esp), %edx # get the error code
760 movl %eax, ORIG_EAX(%esp)
761 movl %ecx, ES(%esp)
762 /*CFI_REL_OFFSET es, ES*/
763 movl $(__USER_DS), %ecx
764 movl %ecx, %ds
765 movl %ecx, %es
766 movl %esp,%eax # pt_regs pointer
767 call *%edi
768 jmp ret_from_exception
769 CFI_ENDPROC
771 #ifdef CONFIG_XEN
772 # A note on the "critical region" in our callback handler.
773 # We want to avoid stacking callback handlers due to events occurring
774 # during handling of the last event. To do this, we keep events disabled
775 # until we've done all processing. HOWEVER, we must enable events before
776 # popping the stack frame (can't be done atomically) and so it would still
777 # be possible to get enough handler activations to overflow the stack.
778 # Although unlikely, bugs of that kind are hard to track down, so we'd
779 # like to avoid the possibility.
780 # So, on entry to the handler we detect whether we interrupted an
781 # existing activation in its critical region -- if so, we pop the current
782 # activation and restart the handler using the previous one.
783 #
784 # The sysexit critical region is slightly different. sysexit
785 # atomically removes the entire stack frame. If we interrupt in the
786 # critical region we know that the entire frame is present and correct
787 # so we can simply throw away the new one.
788 ENTRY(hypervisor_callback)
789 RING0_INT_FRAME
790 pushl %eax
791 CFI_ADJUST_CFA_OFFSET 4
792 SAVE_ALL
793 testb $2,CS(%esp)
794 movl EIP(%esp),%eax
795 jnz .Ldo_upcall
796 cmpl $scrit,%eax
797 jb 0f
798 cmpl $ecrit,%eax
799 jb critical_region_fixup
800 0:
801 #ifdef CONFIG_XEN_SUPERVISOR_MODE_KERNEL
802 cmpl $sysexit_scrit,%eax
803 jb .Ldo_upcall
804 cmpl $sysexit_ecrit,%eax
805 ja .Ldo_upcall
806 addl $OLDESP,%esp # Remove eflags...ebx from stack frame.
807 #endif
808 .Ldo_upcall:
809 push %esp
810 CFI_ADJUST_CFA_OFFSET 4
811 call evtchn_do_upcall
812 add $4,%esp
813 CFI_ADJUST_CFA_OFFSET -4
814 jmp ret_from_intr
815 CFI_ENDPROC
817 # [How we do the fixup]. We want to merge the current stack frame with the
818 # just-interrupted frame. How we do this depends on where in the critical
819 # region the interrupted handler was executing, and so how many saved
820 # registers are in each frame. We do this quickly using the lookup table
821 # 'critical_fixup_table'. For each byte offset in the critical region, it
822 # provides the number of bytes which have already been popped from the
823 # interrupted stack frame.
824 critical_region_fixup:
825 movsbl critical_fixup_table-scrit(%eax),%ecx # %ecx contains num slots popped
826 testl %ecx,%ecx
827 leal (%esp,%ecx,4),%esi # %esi points at end of src region
828 leal OLDESP(%esp),%edi # %edi points at end of dst region
829 jle 17f # skip loop if nothing to copy
830 16: subl $4,%esi # pre-decrementing copy loop
831 subl $4,%edi
832 movl (%esi),%eax
833 movl %eax,(%edi)
834 loop 16b
835 17: movl %edi,%esp # final %edi is top of merged stack
836 jmp .Ldo_upcall
838 .section .rodata,"a"
839 critical_fixup_table:
840 .byte -1,-1,-1 # testb $0xff,(%esi) = __TEST_PENDING
841 .byte -1,-1 # jnz 14f
842 .byte 0 # pop %ebx
843 .byte 1 # pop %ecx
844 .byte 2 # pop %edx
845 .byte 3 # pop %esi
846 .byte 4 # pop %edi
847 .byte 5 # pop %ebp
848 .byte 6 # pop %eax
849 .byte 7 # pop %ds
850 .byte 8 # pop %es
851 .byte 9,9,9 # add $4,%esp
852 .byte 10 # iret
853 .byte -1,-1,-1,-1 # movb $1,1(%esi) = __DISABLE_INTERRUPTS
854 .previous
856 # Hypervisor uses this for application faults while it executes.
857 # We get here for two reasons:
858 # 1. Fault while reloading DS, ES, FS or GS
859 # 2. Fault while executing IRET
860 # Category 1 we fix up by reattempting the load, and zeroing the segment
861 # register if the load fails.
862 # Category 2 we fix up by jumping to do_iret_error. We cannot use the
863 # normal Linux return path in this case because if we use the IRET hypercall
864 # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
865 # We distinguish between categories by maintaining a status value in EAX.
866 ENTRY(failsafe_callback)
867 pushl %eax
868 movl $1,%eax
869 1: mov 4(%esp),%ds
870 2: mov 8(%esp),%es
871 3: mov 12(%esp),%fs
872 4: mov 16(%esp),%gs
873 testl %eax,%eax
874 popl %eax
875 jz 5f
876 addl $16,%esp # EAX != 0 => Category 2 (Bad IRET)
877 jmp iret_exc
878 5: addl $16,%esp # EAX == 0 => Category 1 (Bad segment)
879 RING0_INT_FRAME
880 pushl $0
881 SAVE_ALL
882 jmp ret_from_exception
883 .section .fixup,"ax"; \
884 6: xorl %eax,%eax; \
885 movl %eax,4(%esp); \
886 jmp 1b; \
887 7: xorl %eax,%eax; \
888 movl %eax,8(%esp); \
889 jmp 2b; \
890 8: xorl %eax,%eax; \
891 movl %eax,12(%esp); \
892 jmp 3b; \
893 9: xorl %eax,%eax; \
894 movl %eax,16(%esp); \
895 jmp 4b; \
896 .previous; \
897 .section __ex_table,"a"; \
898 .align 4; \
899 .long 1b,6b; \
900 .long 2b,7b; \
901 .long 3b,8b; \
902 .long 4b,9b; \
903 .previous
904 #endif
905 CFI_ENDPROC
907 ENTRY(coprocessor_error)
908 RING0_INT_FRAME
909 pushl $0
910 CFI_ADJUST_CFA_OFFSET 4
911 pushl $do_coprocessor_error
912 CFI_ADJUST_CFA_OFFSET 4
913 jmp error_code
914 CFI_ENDPROC
916 ENTRY(simd_coprocessor_error)
917 RING0_INT_FRAME
918 pushl $0
919 CFI_ADJUST_CFA_OFFSET 4
920 pushl $do_simd_coprocessor_error
921 CFI_ADJUST_CFA_OFFSET 4
922 jmp error_code
923 CFI_ENDPROC
925 ENTRY(device_not_available)
926 RING0_INT_FRAME
927 pushl $-1 # mark this as an int
928 CFI_ADJUST_CFA_OFFSET 4
929 SAVE_ALL
930 #ifndef CONFIG_XEN
931 movl %cr0, %eax
932 testl $0x4, %eax # EM (math emulation bit)
933 je device_available_emulate
934 pushl $0 # temporary storage for ORIG_EIP
935 CFI_ADJUST_CFA_OFFSET 4
936 call math_emulate
937 addl $4, %esp
938 CFI_ADJUST_CFA_OFFSET -4
939 jmp ret_from_exception
940 device_available_emulate:
941 #endif
942 preempt_stop
943 call math_state_restore
944 jmp ret_from_exception
945 CFI_ENDPROC
947 #ifndef CONFIG_XEN
948 /*
949 * Debug traps and NMI can happen at the one SYSENTER instruction
950 * that sets up the real kernel stack. Check here, since we can't
951 * allow the wrong stack to be used.
952 *
953 * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have
954 * already pushed 3 words if it hits on the sysenter instruction:
955 * eflags, cs and eip.
956 *
957 * We just load the right stack, and push the three (known) values
958 * by hand onto the new stack - while updating the return eip past
959 * the instruction that would have done it for sysenter.
960 */
961 #define FIX_STACK(offset, ok, label) \
962 cmpw $__KERNEL_CS,4(%esp); \
963 jne ok; \
964 label: \
965 movl SYSENTER_stack_esp0+offset(%esp),%esp; \
966 pushfl; \
967 pushl $__KERNEL_CS; \
968 pushl $sysenter_past_esp
969 #endif /* CONFIG_XEN */
971 KPROBE_ENTRY(debug)
972 RING0_INT_FRAME
973 #ifndef CONFIG_XEN
974 cmpl $sysenter_entry,(%esp)
975 jne debug_stack_correct
976 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
977 debug_stack_correct:
978 #endif /* !CONFIG_XEN */
979 pushl $-1 # mark this as an int
980 CFI_ADJUST_CFA_OFFSET 4
981 SAVE_ALL
982 xorl %edx,%edx # error code 0
983 movl %esp,%eax # pt_regs pointer
984 call do_debug
985 jmp ret_from_exception
986 CFI_ENDPROC
987 .previous .text
988 #ifndef CONFIG_XEN
989 /*
990 * NMI is doubly nasty. It can happen _while_ we're handling
991 * a debug fault, and the debug fault hasn't yet been able to
992 * clear up the stack. So we first check whether we got an
993 * NMI on the sysenter entry path, but after that we need to
994 * check whether we got an NMI on the debug path where the debug
995 * fault happened on the sysenter path.
996 */
997 ENTRY(nmi)
998 RING0_INT_FRAME
999 pushl %eax
1000 CFI_ADJUST_CFA_OFFSET 4
1001 movl %ss, %eax
1002 cmpw $__ESPFIX_SS, %ax
1003 popl %eax
1004 CFI_ADJUST_CFA_OFFSET -4
1005 je nmi_16bit_stack
1006 cmpl $sysenter_entry,(%esp)
1007 je nmi_stack_fixup
1008 pushl %eax
1009 CFI_ADJUST_CFA_OFFSET 4
1010 movl %esp,%eax
1011 /* Do not access memory above the end of our stack page,
1012 * it might not exist.
1013 */
1014 andl $(THREAD_SIZE-1),%eax
1015 cmpl $(THREAD_SIZE-20),%eax
1016 popl %eax
1017 CFI_ADJUST_CFA_OFFSET -4
1018 jae nmi_stack_correct
1019 cmpl $sysenter_entry,12(%esp)
1020 je nmi_debug_stack_check
1021 nmi_stack_correct:
1022 pushl %eax
1023 CFI_ADJUST_CFA_OFFSET 4
1024 SAVE_ALL
1025 xorl %edx,%edx # zero error code
1026 movl %esp,%eax # pt_regs pointer
1027 call do_nmi
1028 jmp restore_nocheck_notrace
1029 CFI_ENDPROC
1031 nmi_stack_fixup:
1032 FIX_STACK(12,nmi_stack_correct, 1)
1033 jmp nmi_stack_correct
1034 nmi_debug_stack_check:
1035 cmpw $__KERNEL_CS,16(%esp)
1036 jne nmi_stack_correct
1037 cmpl $debug,(%esp)
1038 jb nmi_stack_correct
1039 cmpl $debug_esp_fix_insn,(%esp)
1040 ja nmi_stack_correct
1041 FIX_STACK(24,nmi_stack_correct, 1)
1042 jmp nmi_stack_correct
1044 nmi_16bit_stack:
1045 RING0_INT_FRAME
1046 /* create the pointer to lss back */
1047 pushl %ss
1048 CFI_ADJUST_CFA_OFFSET 4
1049 pushl %esp
1050 CFI_ADJUST_CFA_OFFSET 4
1051 movzwl %sp, %esp
1052 addw $4, (%esp)
1053 /* copy the iret frame of 12 bytes */
1054 .rept 3
1055 pushl 16(%esp)
1056 CFI_ADJUST_CFA_OFFSET 4
1057 .endr
1058 pushl %eax
1059 CFI_ADJUST_CFA_OFFSET 4
1060 SAVE_ALL
1061 FIXUP_ESPFIX_STACK # %eax == %esp
1062 CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved
1063 xorl %edx,%edx # zero error code
1064 call do_nmi
1065 RESTORE_REGS
1066 lss 12+4(%esp), %esp # back to 16bit stack
1067 1: iret
1068 CFI_ENDPROC
1069 .section __ex_table,"a"
1070 .align 4
1071 .long 1b,iret_exc
1072 .previous
1073 #else
1074 ENTRY(nmi)
1075 RING0_INT_FRAME
1076 pushl %eax
1077 CFI_ADJUST_CFA_OFFSET 4
1078 SAVE_ALL
1079 xorl %edx,%edx # zero error code
1080 movl %esp,%eax # pt_regs pointer
1081 call do_nmi
1082 orl $NMI_MASK, EFLAGS(%esp)
1083 jmp restore_all
1084 CFI_ENDPROC
1085 #endif
1087 KPROBE_ENTRY(int3)
1088 RING0_INT_FRAME
1089 pushl $-1 # mark this as an int
1090 CFI_ADJUST_CFA_OFFSET 4
1091 SAVE_ALL
1092 xorl %edx,%edx # zero error code
1093 movl %esp,%eax # pt_regs pointer
1094 call do_int3
1095 jmp ret_from_exception
1096 CFI_ENDPROC
1097 .previous .text
1099 ENTRY(overflow)
1100 RING0_INT_FRAME
1101 pushl $0
1102 CFI_ADJUST_CFA_OFFSET 4
1103 pushl $do_overflow
1104 CFI_ADJUST_CFA_OFFSET 4
1105 jmp error_code
1106 CFI_ENDPROC
1108 ENTRY(bounds)
1109 RING0_INT_FRAME
1110 pushl $0
1111 CFI_ADJUST_CFA_OFFSET 4
1112 pushl $do_bounds
1113 CFI_ADJUST_CFA_OFFSET 4
1114 jmp error_code
1115 CFI_ENDPROC
1117 ENTRY(invalid_op)
1118 RING0_INT_FRAME
1119 pushl $0
1120 CFI_ADJUST_CFA_OFFSET 4
1121 pushl $do_invalid_op
1122 CFI_ADJUST_CFA_OFFSET 4
1123 jmp error_code
1124 CFI_ENDPROC
1126 ENTRY(coprocessor_segment_overrun)
1127 RING0_INT_FRAME
1128 pushl $0
1129 CFI_ADJUST_CFA_OFFSET 4
1130 pushl $do_coprocessor_segment_overrun
1131 CFI_ADJUST_CFA_OFFSET 4
1132 jmp error_code
1133 CFI_ENDPROC
1135 ENTRY(invalid_TSS)
1136 RING0_EC_FRAME
1137 pushl $do_invalid_TSS
1138 CFI_ADJUST_CFA_OFFSET 4
1139 jmp error_code
1140 CFI_ENDPROC
1142 ENTRY(segment_not_present)
1143 RING0_EC_FRAME
1144 pushl $do_segment_not_present
1145 CFI_ADJUST_CFA_OFFSET 4
1146 jmp error_code
1147 CFI_ENDPROC
1149 ENTRY(stack_segment)
1150 RING0_EC_FRAME
1151 pushl $do_stack_segment
1152 CFI_ADJUST_CFA_OFFSET 4
1153 jmp error_code
1154 CFI_ENDPROC
1156 KPROBE_ENTRY(general_protection)
1157 RING0_EC_FRAME
1158 pushl $do_general_protection
1159 CFI_ADJUST_CFA_OFFSET 4
1160 jmp error_code
1161 CFI_ENDPROC
1162 .previous .text
1164 ENTRY(alignment_check)
1165 RING0_EC_FRAME
1166 pushl $do_alignment_check
1167 CFI_ADJUST_CFA_OFFSET 4
1168 jmp error_code
1169 CFI_ENDPROC
1171 KPROBE_ENTRY(page_fault)
1172 RING0_EC_FRAME
1173 pushl $do_page_fault
1174 CFI_ADJUST_CFA_OFFSET 4
1175 jmp error_code
1176 CFI_ENDPROC
1177 .previous .text
1179 #ifdef CONFIG_X86_MCE
1180 ENTRY(machine_check)
1181 RING0_INT_FRAME
1182 pushl $0
1183 CFI_ADJUST_CFA_OFFSET 4
1184 pushl machine_check_vector
1185 CFI_ADJUST_CFA_OFFSET 4
1186 jmp error_code
1187 CFI_ENDPROC
1188 #endif
1190 #ifndef CONFIG_XEN
1191 ENTRY(spurious_interrupt_bug)
1192 RING0_INT_FRAME
1193 pushl $0
1194 CFI_ADJUST_CFA_OFFSET 4
1195 pushl $do_spurious_interrupt_bug
1196 CFI_ADJUST_CFA_OFFSET 4
1197 jmp error_code
1198 CFI_ENDPROC
1199 #endif /* !CONFIG_XEN */
1201 #ifdef CONFIG_STACK_UNWIND
1202 ENTRY(arch_unwind_init_running)
1203 CFI_STARTPROC
1204 movl 4(%esp), %edx
1205 movl (%esp), %ecx
1206 leal 4(%esp), %eax
1207 movl %ebx, EBX(%edx)
1208 xorl %ebx, %ebx
1209 movl %ebx, ECX(%edx)
1210 movl %ebx, EDX(%edx)
1211 movl %esi, ESI(%edx)
1212 movl %edi, EDI(%edx)
1213 movl %ebp, EBP(%edx)
1214 movl %ebx, EAX(%edx)
1215 movl $__USER_DS, DS(%edx)
1216 movl $__USER_DS, ES(%edx)
1217 movl %ebx, ORIG_EAX(%edx)
1218 movl %ecx, EIP(%edx)
1219 movl 12(%esp), %ecx
1220 movl $__KERNEL_CS, CS(%edx)
1221 movl %ebx, EFLAGS(%edx)
1222 movl %eax, OLDESP(%edx)
1223 movl 8(%esp), %eax
1224 movl %ecx, 8(%esp)
1225 movl EBX(%edx), %ebx
1226 movl $__KERNEL_DS, OLDSS(%edx)
1227 jmpl *%eax
1228 CFI_ENDPROC
1229 ENDPROC(arch_unwind_init_running)
1230 #endif
1232 ENTRY(fixup_4gb_segment)
1233 RING0_EC_FRAME
1234 pushl $do_fixup_4gb_segment
1235 CFI_ADJUST_CFA_OFFSET 4
1236 jmp error_code
1237 CFI_ENDPROC
1239 .section .rodata,"a"
1240 #include "syscall_table.S"
1242 syscall_table_size=(.-sys_call_table)