ia64/xen-unstable
changeset 15436:acb7aa72fac7
i386: remove NMI deferral by instead making sure selector registers
are always stored/restored correctly despite the potential for an NMI
(and also MCE, with a subsequent patch) to kick in.
The idea is to always check values read from %ds and %es against
__HYPERVISOR_DS, and only store into the current frame (all normal
handlers) or the outer-most one (NMI and MCE) if the value read is
different. That way, any NMI or MCE occurring during frame setup will
store selectors not saved so far on behalf of the interrupted handler,
with that interrupted handler either having managed to read the guest
selector (in which case it can store it regardless of whether NMI/MCE
kicked in between the read and the store) or finding __HYPERVISOR_DS
already in the register, in which case it'll know not to store (as the
nested handler would have done the store).
For the restore portion this makes use of the fact that there's
exactly one such code sequence, and by moving the selector restore
part past all other restores (including all stack pointer adjustments)
the NMI/MCE handlers can safely detect whether any selector would have
been restored already (by range checking EIP) and move EIP back to the
beginning of the selector restore sequence without having to play with
the stack pointer itself or any other gpr.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
are always stored/restored correctly despite the potential for an NMI
(and also MCE, with a subsequent patch) to kick in.
The idea is to always check values read from %ds and %es against
__HYPERVISOR_DS, and only store into the current frame (all normal
handlers) or the outer-most one (NMI and MCE) if the value read is
different. That way, any NMI or MCE occurring during frame setup will
store selectors not saved so far on behalf of the interrupted handler,
with that interrupted handler either having managed to read the guest
selector (in which case it can store it regardless of whether NMI/MCE
kicked in between the read and the store) or finding __HYPERVISOR_DS
already in the register, in which case it'll know not to store (as the
nested handler would have done the store).
For the restore portion this makes use of the fact that there's
exactly one such code sequence, and by moving the selector restore
part past all other restores (including all stack pointer adjustments)
the NMI/MCE handlers can safely detect whether any selector would have
been restored already (by range checking EIP) and move EIP back to the
beginning of the selector restore sequence without having to play with
the stack pointer itself or any other gpr.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author | kfraser@localhost.localdomain |
---|---|
date | Thu Jun 21 12:13:06 2007 +0100 (2007-06-21) |
parents | 5ec34f7f31ab |
children | 899a44cb6ef6 |
files | xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_32/supervisor_mode_kernel.S xen/arch/x86/x86_32/traps.c xen/include/asm-x86/processor.h xen/include/asm-x86/x86_32/asm_defns.h |
line diff
1.1 --- a/xen/arch/x86/x86_32/entry.S Thu Jun 21 12:10:01 2007 +0100 1.2 +++ b/xen/arch/x86/x86_32/entry.S Thu Jun 21 12:13:06 2007 +0100 1.3 @@ -72,35 +72,34 @@ 1.4 andl $~3,reg; \ 1.5 movl (reg),reg; 1.6 1.7 - 1.8 ALIGN 1.9 restore_all_guest: 1.10 ASSERT_INTERRUPTS_DISABLED 1.11 testl $X86_EFLAGS_VM,UREGS_eflags(%esp) 1.12 - jnz restore_all_vm86 1.13 + popl %ebx 1.14 + popl %ecx 1.15 + popl %edx 1.16 + popl %esi 1.17 + popl %edi 1.18 + popl %ebp 1.19 + popl %eax 1.20 + leal 4(%esp),%esp 1.21 + jnz .Lrestore_iret_guest 1.22 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL 1.23 - testl $2,UREGS_cs(%esp) 1.24 - jnz 1f 1.25 + testb $2,UREGS_cs-UREGS_eip(%esp) 1.26 + jnz .Lrestore_sregs_guest 1.27 call restore_ring0_guest 1.28 - jmp restore_all_vm86 1.29 -1: 1.30 + jmp .Lrestore_iret_guest 1.31 #endif 1.32 -.Lft1: mov UREGS_ds(%esp),%ds 1.33 -.Lft2: mov UREGS_es(%esp),%es 1.34 -.Lft3: mov UREGS_fs(%esp),%fs 1.35 -.Lft4: mov UREGS_gs(%esp),%gs 1.36 -restore_all_vm86: 1.37 - popl %ebx 1.38 - popl %ecx 1.39 - popl %edx 1.40 - popl %esi 1.41 - popl %edi 1.42 - popl %ebp 1.43 - popl %eax 1.44 - addl $4,%esp 1.45 +.Lrestore_sregs_guest: 1.46 +.Lft1: mov UREGS_ds-UREGS_eip(%esp),%ds 1.47 +.Lft2: mov UREGS_es-UREGS_eip(%esp),%es 1.48 +.Lft3: mov UREGS_fs-UREGS_eip(%esp),%fs 1.49 +.Lft4: mov UREGS_gs-UREGS_eip(%esp),%gs 1.50 +.Lrestore_iret_guest: 1.51 .Lft5: iret 1.52 .section .fixup,"ax" 1.53 -.Lfx5: subl $28,%esp 1.54 +.Lfx1: subl $28,%esp 1.55 pushl 28(%esp) # error_code/entry_vector 1.56 movl %eax,UREGS_eax+4(%esp) 1.57 movl %ebp,UREGS_ebp+4(%esp) 1.58 @@ -109,9 +108,6 @@ restore_all_vm86: 1.59 movl %edx,UREGS_edx+4(%esp) 1.60 movl %ecx,UREGS_ecx+4(%esp) 1.61 movl %ebx,UREGS_ebx+4(%esp) 1.62 -.Lfx1: SET_XEN_SEGMENTS(a) 1.63 - movl %eax,%fs 1.64 - movl %eax,%gs 1.65 sti 1.66 popl %esi 1.67 pushfl # EFLAGS 1.68 @@ -147,7 +143,7 @@ 1: call create_bounce_frame 1.69 .long .Lft2,.Lfx1 1.70 .long .Lft3,.Lfx1 1.71 .long .Lft4,.Lfx1 1.72 - .long .Lft5,.Lfx5 1.73 + .long .Lft5,.Lfx1 1.74 .previous 1.75 .section __ex_table,"a" 1.76 .long .Ldf1,failsafe_callback 1.77 @@ -169,8 +165,8 @@ restore_all_xen: 1.78 ENTRY(hypercall) 1.79 subl $4,%esp 1.80 FIXUP_RING0_GUEST_STACK 1.81 - SAVE_ALL(b) 1.82 - sti 1.83 + SAVE_ALL(1f,1f) 1.84 +1: sti 1.85 GET_CURRENT(%ebx) 1.86 cmpl $NR_hypercalls,%eax 1.87 jae bad_hypercall 1.88 @@ -420,9 +416,14 @@ ENTRY(divide_error) 1.89 ALIGN 1.90 handle_exception: 1.91 FIXUP_RING0_GUEST_STACK 1.92 - SAVE_ALL_NOSEGREGS(a) 1.93 - SET_XEN_SEGMENTS(a) 1.94 - testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp) 1.95 + SAVE_ALL(1f,2f) 1.96 + .text 1 1.97 + /* Exception within Xen: make sure we have valid %ds,%es. */ 1.98 +1: mov %ecx,%ds 1.99 + mov %ecx,%es 1.100 + jmp 2f 1.101 + .previous 1.102 +2: testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp) 1.103 jz exception_with_ints_disabled 1.104 sti # re-enable interrupts 1.105 1: xorl %eax,%eax 1.106 @@ -542,9 +543,9 @@ ENTRY(spurious_interrupt_bug) 1.107 jmp handle_exception 1.108 1.109 ENTRY(early_page_fault) 1.110 - SAVE_ALL_NOSEGREGS(a) 1.111 - movl %esp,%edx 1.112 - pushl %edx 1.113 + SAVE_ALL(1f,1f) 1.114 +1: movl %esp,%eax 1.115 + pushl %eax 1.116 call do_early_page_fault 1.117 addl $4,%esp 1.118 jmp restore_all_xen 1.119 @@ -555,49 +556,53 @@ ENTRY(nmi) 1.120 iret 1.121 #else 1.122 # Save state but do not trash the segment registers! 1.123 - # We may otherwise be unable to reload them or copy them to ring 1. 1.124 + pushl $TRAP_nmi<<16 1.125 + SAVE_ALL(.Lnmi_xen,.Lnmi_common) 1.126 +.Lnmi_common: 1.127 + movl %esp,%eax 1.128 pushl %eax 1.129 - SAVE_ALL_NOSEGREGS(a) 1.130 - 1.131 - # We can only process the NMI if: 1.132 - # A. We are the outermost Xen activation (in which case we have 1.133 - # the selectors safely saved on our stack) 1.134 - # B. DS and ES contain sane Xen values. 1.135 - # In all other cases we bail without touching DS-GS, as we have 1.136 - # interrupted an enclosing Xen activation in tricky prologue or 1.137 - # epilogue code. 1.138 - movl UREGS_eflags(%esp),%eax 1.139 - movb UREGS_cs(%esp),%al 1.140 - testl $(3|X86_EFLAGS_VM),%eax 1.141 - jnz continue_nmi 1.142 - movl %ds,%eax 1.143 - cmpw $(__HYPERVISOR_DS),%ax 1.144 - jne defer_nmi 1.145 - movl %es,%eax 1.146 - cmpw $(__HYPERVISOR_DS),%ax 1.147 - jne defer_nmi 1.148 - 1.149 -continue_nmi: 1.150 - SET_XEN_SEGMENTS(d) 1.151 - movl %esp,%edx 1.152 - pushl %edx 1.153 call do_nmi 1.154 addl $4,%esp 1.155 + /* 1.156 + * NB. We may return to Xen context with polluted %ds/%es. But in such 1.157 + * cases we have put guest DS/ES on the guest stack frame, which will 1.158 + * be detected by SAVE_ALL(), or we have rolled back restore_guest. 1.159 + */ 1.160 jmp ret_from_intr 1.161 - 1.162 -defer_nmi: 1.163 - movl $FIXMAP_apic_base,%eax 1.164 - # apic_wait_icr_idle() 1.165 -1: movl %ss:APIC_ICR(%eax),%ebx 1.166 - testl $APIC_ICR_BUSY,%ebx 1.167 - jnz 1b 1.168 - # __send_IPI_shortcut(APIC_DEST_SELF, TRAP_deferred_nmi) 1.169 - movl $(APIC_DM_FIXED | APIC_DEST_SELF | APIC_DEST_PHYSICAL | \ 1.170 - TRAP_deferred_nmi),%ss:APIC_ICR(%eax) 1.171 - jmp restore_all_xen 1.172 +.Lnmi_xen: 1.173 + /* Check the outer (guest) context for %ds/%es state validity. */ 1.174 + GET_GUEST_REGS(%ebx) 1.175 + testl $X86_EFLAGS_VM,%ss:UREGS_eflags(%ebx) 1.176 + mov %ds,%eax 1.177 + mov %es,%edx 1.178 + jnz .Lnmi_vm86 1.179 + /* We may have interrupted Xen while messing with %ds/%es... */ 1.180 + cmpw %ax,%cx 1.181 + mov %ecx,%ds /* Ensure %ds is valid */ 1.182 + cmove UREGS_ds(%ebx),%eax /* Grab guest DS if it wasn't in %ds */ 1.183 + cmpw %dx,%cx 1.184 + movl %eax,UREGS_ds(%ebx) /* Ensure guest frame contains guest DS */ 1.185 + cmove UREGS_es(%ebx),%edx /* Grab guest ES if it wasn't in %es */ 1.186 + mov %ecx,%es /* Ensure %es is valid */ 1.187 + movl $.Lrestore_sregs_guest,%ecx 1.188 + movl %edx,UREGS_es(%ebx) /* Ensure guest frame contains guest ES */ 1.189 + cmpl %ecx,UREGS_eip(%esp) 1.190 + jbe .Lnmi_common 1.191 + cmpl $.Lrestore_iret_guest,UREGS_eip(%esp) 1.192 + ja .Lnmi_common 1.193 + /* Roll outer context restore_guest back to restoring %ds/%es. */ 1.194 + movl %ecx,UREGS_eip(%esp) 1.195 + jmp .Lnmi_common 1.196 +.Lnmi_vm86: 1.197 + /* vm86 is easy: the CPU saved %ds/%es so we can safely stomp them. */ 1.198 + mov %ecx,%ds 1.199 + mov %ecx,%es 1.200 + jmp .Lnmi_common 1.201 #endif /* !CONFIG_X86_SUPERVISOR_MODE_KERNEL */ 1.202 1.203 ENTRY(setup_vm86_frame) 1.204 + mov %ecx,%ds 1.205 + mov %ecx,%es 1.206 # Copies the entire stack frame forwards by 16 bytes. 1.207 .macro copy_vm86_words count=18 1.208 .if \count
2.1 --- a/xen/arch/x86/x86_32/supervisor_mode_kernel.S Thu Jun 21 12:10:01 2007 +0100 2.2 +++ b/xen/arch/x86/x86_32/supervisor_mode_kernel.S Thu Jun 21 12:13:06 2007 +0100 2.3 @@ -20,40 +20,45 @@ 2.4 #include <asm/asm_defns.h> 2.5 #include <public/xen.h> 2.6 2.7 +#define guestreg(field) ((field)-UREGS_eip+36) 2.8 + 2.9 # Upon entry the stack should be the Xen stack and contain: 2.10 - # %ss, %esp, EFLAGS, %cs|1, %eip, ERROR, SAVE_ALL, RETURN 2.11 + # %ss, %esp, EFLAGS, %cs|1, %eip, RETURN 2.12 # On exit the stack should be %ss:%esp (i.e. the guest stack) 2.13 # and contain: 2.14 - # EFLAGS, %cs, %eip, ERROR, SAVE_ALL, RETURN 2.15 + # EFLAGS, %cs, %eip, RETURN 2.16 ALIGN 2.17 ENTRY(restore_ring0_guest) 2.18 + pusha 2.19 + 2.20 # Point %gs:%esi to guest stack. 2.21 -RRG0: movw UREGS_ss+4(%esp),%gs 2.22 - movl UREGS_esp+4(%esp),%esi 2.23 +RRG0: movw guestreg(UREGS_ss)(%esp),%gs 2.24 + movl guestreg(UREGS_esp)(%esp),%esi 2.25 2.26 - # Copy EFLAGS...EBX, RETURN from Xen stack to guest stack. 2.27 - movl $(UREGS_kernel_sizeof>>2)+1,%ecx 2.28 + # Copy EFLAGS, %cs, %eip, RETURN, PUSHA from Xen stack to guest stack. 2.29 + movl $12,%ecx /* 12 32-bit values */ 2.30 2.31 1: subl $4,%esi 2.32 movl -4(%esp,%ecx,4),%eax 2.33 RRG1: movl %eax,%gs:(%esi) 2.34 loop 1b 2.35 2.36 -RRG2: andl $~3,%gs:UREGS_cs+4(%esi) 2.37 +RRG2: andl $~3,%gs:guestreg(UREGS_cs)(%esi) 2.38 2.39 movl %gs,%eax 2.40 2.41 # We need to do this because these registers are not present 2.42 # on the guest stack so they cannot be restored by the code in 2.43 # restore_all_guest. 2.44 -RRG3: mov UREGS_ds+4(%esp),%ds 2.45 -RRG4: mov UREGS_es+4(%esp),%es 2.46 -RRG5: mov UREGS_fs+4(%esp),%fs 2.47 -RRG6: mov UREGS_gs+4(%esp),%gs 2.48 +RRG3: mov guestreg(UREGS_ds)(%esp),%ds 2.49 +RRG4: mov guestreg(UREGS_es)(%esp),%es 2.50 +RRG5: mov guestreg(UREGS_fs)(%esp),%fs 2.51 +RRG6: mov guestreg(UREGS_gs)(%esp),%gs 2.52 2.53 RRG7: movl %eax,%ss 2.54 movl %esi,%esp 2.55 2.56 + popa 2.57 ret 2.58 .section __ex_table,"a" 2.59 .long RRG0,domain_crash_synchronous
3.1 --- a/xen/arch/x86/x86_32/traps.c Thu Jun 21 12:10:01 2007 +0100 3.2 +++ b/xen/arch/x86/x86_32/traps.c Thu Jun 21 12:13:06 2007 +0100 3.3 @@ -232,15 +232,6 @@ unsigned long do_iret(void) 3.4 return 0; 3.5 } 3.6 3.7 -#include <asm/asm_defns.h> 3.8 -BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi) 3.9 -fastcall void smp_deferred_nmi(struct cpu_user_regs *regs) 3.10 -{ 3.11 - asmlinkage void do_nmi(struct cpu_user_regs *); 3.12 - ack_APIC_irq(); 3.13 - do_nmi(regs); 3.14 -} 3.15 - 3.16 void __init percpu_traps_init(void) 3.17 { 3.18 struct tss_struct *tss = &doublefault_tss; 3.19 @@ -252,8 +243,6 @@ void __init percpu_traps_init(void) 3.20 /* The hypercall entry vector is only accessible from ring 1. */ 3.21 _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall); 3.22 3.23 - set_intr_gate(TRAP_deferred_nmi, &deferred_nmi); 3.24 - 3.25 /* 3.26 * Make a separate task for double faults. This will get us debug output if 3.27 * we blow the kernel stack.
4.1 --- a/xen/include/asm-x86/processor.h Thu Jun 21 12:10:01 2007 +0100 4.2 +++ b/xen/include/asm-x86/processor.h Thu Jun 21 12:13:06 2007 +0100 4.3 @@ -104,7 +104,6 @@ 4.4 #define TRAP_alignment_check 17 4.5 #define TRAP_machine_check 18 4.6 #define TRAP_simd_error 19 4.7 -#define TRAP_deferred_nmi 31 4.8 4.9 /* Set for entry via SYSCALL. Informs return code to use SYSRETQ not IRETQ. */ 4.10 /* NB. Same as VGCF_in_syscall. No bits in common with any other TRAP_ defn. */
5.1 --- a/xen/include/asm-x86/x86_32/asm_defns.h Thu Jun 21 12:10:01 2007 +0100 5.2 +++ b/xen/include/asm-x86/x86_32/asm_defns.h Thu Jun 21 12:13:06 2007 +0100 5.3 @@ -26,7 +26,16 @@ 1: addl $4,%esp; 5.4 #define ASSERT_INTERRUPTS_ENABLED ASSERT_INTERRUPT_STATUS(nz) 5.5 #define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z) 5.6 5.7 -#define __SAVE_ALL_PRE \ 5.8 +/* 5.9 + * Saves all register state into an exception/interrupt stack frame. 5.10 + * Returns to the caller at <xen_lbl> if the interrupted context is within 5.11 + * Xen; at <vm86_lbl> if the interrupted context is vm86; or falls through 5.12 + * if the interrupted context is an ordinary guest protected-mode context. 5.13 + * In all cases %ecx contains __HYPERVISOR_DS. %ds/%es are guaranteed to 5.14 + * contain __HYPERVISOR_DS unless control passes to <xen_lbl>, in which case 5.15 + * the caller is reponsible for validity of %ds/%es. 5.16 + */ 5.17 +#define SAVE_ALL(xen_lbl, vm86_lbl) \ 5.18 cld; \ 5.19 pushl %eax; \ 5.20 pushl %ebp; \ 5.21 @@ -37,30 +46,34 @@ 1: addl $4,%esp; 5.22 pushl %ecx; \ 5.23 pushl %ebx; \ 5.24 testl $(X86_EFLAGS_VM),UREGS_eflags(%esp); \ 5.25 - jz 2f; \ 5.26 - call setup_vm86_frame; \ 5.27 - jmp 3f; \ 5.28 - 2:testb $3,UREGS_cs(%esp); \ 5.29 - jz 1f; \ 5.30 - mov %ds,UREGS_ds(%esp); \ 5.31 - mov %es,UREGS_es(%esp); \ 5.32 - mov %fs,UREGS_fs(%esp); \ 5.33 - mov %gs,UREGS_gs(%esp); \ 5.34 - 3: 5.35 - 5.36 -#define SAVE_ALL_NOSEGREGS(_reg) \ 5.37 - __SAVE_ALL_PRE \ 5.38 - 1: 5.39 - 5.40 -#define SET_XEN_SEGMENTS(_reg) \ 5.41 - movl $(__HYPERVISOR_DS),%e ## _reg ## x; \ 5.42 - mov %e ## _reg ## x,%ds; \ 5.43 - mov %e ## _reg ## x,%es; 5.44 - 5.45 -#define SAVE_ALL(_reg) \ 5.46 - __SAVE_ALL_PRE \ 5.47 - SET_XEN_SEGMENTS(_reg) \ 5.48 - 1: 5.49 + mov %ds,%edi; \ 5.50 + mov %es,%esi; \ 5.51 + mov $(__HYPERVISOR_DS),%ecx; \ 5.52 + jnz 86f; \ 5.53 + .text 1; \ 5.54 + 86: call setup_vm86_frame; \ 5.55 + jmp vm86_lbl; \ 5.56 + .previous; \ 5.57 + testb $3,UREGS_cs(%esp); \ 5.58 + jz xen_lbl; \ 5.59 + /* \ 5.60 + * We are the outermost Xen context, but our \ 5.61 + * life is complicated by NMIs and MCEs. These \ 5.62 + * could occur in our critical section and \ 5.63 + * pollute %ds and %es. We have to detect that \ 5.64 + * this has occurred and avoid saving Xen DS/ES \ 5.65 + * values to the guest stack frame. \ 5.66 + */ \ 5.67 + cmpw %cx,%di; \ 5.68 + mov %ecx,%ds; \ 5.69 + mov %fs,UREGS_fs(%esp); \ 5.70 + cmove UREGS_ds(%esp),%edi; \ 5.71 + cmpw %cx,%si; \ 5.72 + mov %edi,UREGS_ds(%esp); \ 5.73 + cmove UREGS_es(%esp),%esi; \ 5.74 + mov %ecx,%es; \ 5.75 + mov %gs,UREGS_gs(%esp); \ 5.76 + mov %esi,UREGS_es(%esp) 5.77 5.78 #ifdef PERF_COUNTERS 5.79 #define PERFC_INCR(_name,_idx,_cur) \ 5.80 @@ -97,8 +110,8 @@ asmlinkage void x(void); 5.81 STR(x) ":\n\t" \ 5.82 "pushl $"#v"<<16\n\t" \ 5.83 STR(FIXUP_RING0_GUEST_STACK) \ 5.84 - STR(SAVE_ALL(a)) \ 5.85 - "movl %esp,%eax\n\t" \ 5.86 + STR(SAVE_ALL(1f,1f)) "\n\t" \ 5.87 + "1:movl %esp,%eax\n\t" \ 5.88 "pushl %eax\n\t" \ 5.89 "call "STR(smp_##x)"\n\t" \ 5.90 "addl $4,%esp\n\t" \ 5.91 @@ -109,8 +122,8 @@ asmlinkage void x(void); 5.92 "\n" __ALIGN_STR"\n" \ 5.93 "common_interrupt:\n\t" \ 5.94 STR(FIXUP_RING0_GUEST_STACK) \ 5.95 - STR(SAVE_ALL(a)) \ 5.96 - "movl %esp,%eax\n\t" \ 5.97 + STR(SAVE_ALL(1f,1f)) "\n\t" \ 5.98 + "1:movl %esp,%eax\n\t" \ 5.99 "pushl %eax\n\t" \ 5.100 "call " STR(do_IRQ) "\n\t" \ 5.101 "addl $4,%esp\n\t" \