or $X86_EFLAGS_MBS,%rax
mov %rax,VMCB_rflags(%rcx)
+ mov VCPU_arch_msr(%rbx), %rax
+ mov VCPUMSR_spec_ctrl_raw(%rax), %eax
+
+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+ SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+
pop %r15
pop %r14
pop %r13
SAVE_ALL
GET_CURRENT(bx)
- mov VCPU_svm_vmcb(%rbx),%rcx
+ SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
+ mov VCPU_svm_vmcb(%rbx),%rcx
movb $0,VCPU_svm_vmcb_in_sync(%rbx)
mov VMCB_rax(%rcx),%rax
mov %rax,UREGS_rax(%rsp)
movb $1,VCPU_vmx_launched(%rbx)
mov %rax,VCPU_hvm_guest_cr2(%rbx)
+ SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
mov %rsp,%rdi
call vmx_vmexit_handler
call vmx_vmenter_helper
test %al, %al
jz .Lvmx_vmentry_restart
+
+ mov VCPU_arch_msr(%rbx), %rax
+ mov VCPUMSR_spec_ctrl_raw(%rax), %eax
+
+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+ SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+
mov VCPU_hvm_guest_cr2(%rbx),%rax
pop %r15
.Lvmx_vmentry_fail:
sti
SAVE_ALL
+
+ /*
+ * PV variant needed here as no guest code has executed (so
+ * MSR_SPEC_CTRL can't have changed value), and NMIs/MCEs are liable
+ * to hit (in which case the HVM variant might corrupt things).
+ */
+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo Clob: acd */
+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
call vmx_vmentry_failure
BUG /* vmx_vmentry_failure() shouldn't return. */
set_processor_id(0);
set_current(INVALID_VCPU); /* debug sanity. */
idle_vcpu[0] = current;
+ init_shadow_spec_ctrl_state();
percpu_init_areas();
#include <asm/guest.h>
#include <asm/msr.h>
#include <asm/mtrr.h>
+#include <asm/spec_ctrl.h>
#include <asm/time.h>
#include <asm/tboot.h>
#include <mach_apic.h>
set_current(idle_vcpu[cpu]);
this_cpu(curr_vcpu) = idle_vcpu[cpu];
rdmsrl(MSR_EFER, this_cpu(efer));
+ init_shadow_spec_ctrl_state();
/*
* Just as during early bootstrap, it is convenient here to disable
OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss);
OFFSET(VCPU_iopl, struct vcpu, arch.pv_vcpu.iopl);
OFFSET(VCPU_guest_context_flags, struct vcpu, arch.vgc_flags);
+ OFFSET(VCPU_arch_msr, struct vcpu, arch.msr);
OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending);
OFFSET(VCPU_mce_pending, struct vcpu, mce_pending);
OFFSET(VCPU_nmi_old_mask, struct vcpu, nmi_state.old_mask);
OFFSET(CPUINFO_cr4, struct cpu_info, cr4);
OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3);
OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3);
+ OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl);
+ OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl);
DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
BLANK();
OFFSET(TRAPBOUNCE_eip, struct trap_bounce, eip);
BLANK();
+ OFFSET(VCPUMSR_spec_ctrl_raw, struct msr_vcpu_policy, spec_ctrl.raw);
+ BLANK();
+
#ifdef CONFIG_PERF_COUNTERS
DEFINE(ASM_PERFC_exceptions, PERFC_exceptions);
BLANK();
pushq $0
movl $HYPERCALL_VECTOR, 4(%rsp)
SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */
+
+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
CR4_PV32_RESTORE
GET_CURRENT(bx)
.popsection
or $X86_EFLAGS_IF,%r11
mov %r11d,UREGS_eflags(%rsp)
+
+ mov VCPU_arch_msr(%rbx), %rax
+ mov VCPUMSR_spec_ctrl_raw(%rax), %eax
+
+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+ SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+
RESTORE_ALL adj=8 compat=1
.Lft0: iretq
_ASM_PRE_EXTABLE(.Lft0, handle_exception)
movl $TRAP_syscall, 4(%rsp)
SAVE_ALL
+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
GET_STACK_END(bx)
mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
neg %rcx
restore_all_guest:
ASSERT_INTERRUPTS_DISABLED
+ /* Stash guest SPEC_CTRL value while we can read struct vcpu. */
+ mov VCPU_arch_msr(%rbx), %rdx
+ mov VCPUMSR_spec_ctrl_raw(%rdx), %r15d
+
/* Copy guest mappings and switch to per-CPU root page table. */
mov %cr3, %r9
GET_STACK_END(dx)
write_cr3 rax, rdi, rsi
.Lrag_keep_cr3:
+ /* Restore stashed SPEC_CTRL value. */
+ mov %r15d, %eax
+
+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+ SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+
RESTORE_ALL
testw $TRAP_syscall,4(%rsp)
jz iret_exit_to_guest
* Check whether we need to switch to the per-CPU page tables, in
* case we return to late PV exit code (from an NMI or #MC).
*/
- GET_STACK_END(ax)
- mov STACK_CPUINFO_FIELD(xen_cr3)(%rax), %rdx
- mov STACK_CPUINFO_FIELD(pv_cr3)(%rax), %rax
+ GET_STACK_END(bx)
+ mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rdx
+ mov STACK_CPUINFO_FIELD(pv_cr3)(%rbx), %rax
test %rdx, %rdx
/*
* Ideally the condition would be "nsz", but such doesn't exist,
write_cr3 rax, rdi, rsi
UNLIKELY_END(exit_cr3)
+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+ SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */
+
RESTORE_ALL adj=8
iretq
movl $TRAP_syscall, 4(%rsp)
SAVE_ALL
+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
GET_STACK_END(bx)
mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
neg %rcx
movl $TRAP_syscall, 4(%rsp)
SAVE_ALL
+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
GET_STACK_END(bx)
mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
neg %rcx
movl $0x80, 4(%rsp)
SAVE_ALL
+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
GET_STACK_END(bx)
mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
neg %rcx
SAVE_ALL CLAC
GET_STACK_END(14)
+
+ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
mov %rcx, %r15
neg %rcx
SAVE_ALL CLAC
GET_STACK_END(14)
+
+ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
mov %rcx, %r15
neg %rcx
/* Set AC to reduce chance of further SMAP faults */
SAVE_ALL STAC
- GET_STACK_END(bx)
- mov STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rbx
+ GET_STACK_END(14)
+
+ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
+ mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx
test %rbx, %rbx
jz .Ldblf_cr3_okay
jns .Ldblf_cr3_load
SAVE_ALL CLAC
GET_STACK_END(14)
+
+ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
+ /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
mov %rcx, %r15
neg %rcx
#include <asm/asm-offsets.h>
#endif
#include <asm/bug.h>
+#include <asm/page.h>
#include <asm/processor.h>
#include <asm/percpu.h>
#include <xen/stringify.h>
4: .p2align 2 ; \
.popsection
+#include <asm/spec_ctrl_asm.h>
+
#endif /* __X86_ASM_DEFNS_H__ */
*/
unsigned long xen_cr3;
unsigned long pv_cr3;
+
+ /* See asm-x86/spec_ctrl_asm.h for usage. */
+ unsigned int shadow_spec_ctrl;
+ bool use_shadow_spec_ctrl;
+
+ unsigned long __pad;
/* get_stack_bottom() must be 16-byte aligned */
};
#define ASM_NOP8 _ASM_MK_NOP(P6_NOP8)
#define ASM_NOP9 _ASM_MK_NOP(P6_NOP9)
+#define ASM_NOP17 ASM_NOP8; ASM_NOP7; ASM_NOP2
+#define ASM_NOP21 ASM_NOP8; ASM_NOP8; ASM_NOP5
+#define ASM_NOP24 ASM_NOP8; ASM_NOP8; ASM_NOP8
+#define ASM_NOP29 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP5
+#define ASM_NOP32 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8
+
#define ASM_NOP_MAX 9
#endif /* __X86_ASM_NOPS_H__ */
#ifndef __X86_SPEC_CTRL_H__
#define __X86_SPEC_CTRL_H__
+#include <asm/current.h>
+
void init_speculation_mitigations(void);
+static inline void init_shadow_spec_ctrl_state(void)
+{
+ struct cpu_info *info = get_cpu_info();
+
+ info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0;
+}
+
#endif /* !__X86_SPEC_CTRL_H__ */
/*
--- /dev/null
+/******************************************************************************
+ * include/asm-x86/spec_ctrl.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (c) 2017-2018 Citrix Systems Ltd.
+ */
+
+#ifndef __X86_SPEC_CTRL_ASM_H__
+#define __X86_SPEC_CTRL_ASM_H__
+
+#ifdef __ASSEMBLY__
+#include <asm/msr-index.h>
+
+/*
+ * Saving and restoring MSR_SPEC_CTRL state is a little tricky.
+ *
+ * We want the guests choice of SPEC_CTRL while in guest context, and Xen's
+ * choice (set or clear, depending on the hardware) while running in Xen
+ * context. Therefore, a simplistic algorithm is:
+ *
+ * - Set/clear IBRS on entry to Xen
+ * - Set the guests' choice on exit to guest
+ * - Leave SPEC_CTRL unchanged on exit to xen
+ *
+ * There are two complicating factors:
+ * 1) HVM guests can have direct access to the MSR, so it can change
+ * behind Xen's back.
+ * 2) An NMI or MCE can interrupt at any point, including early in the entry
+ * path, or late in the exit path after restoring the guest value. This
+ * will corrupt the guest value.
+ *
+ * Factor 1 is dealt with by relying on NMIs/MCEs being blocked immediately
+ * after VMEXIT. The VMEXIT-specific code reads MSR_SPEC_CTRL and updates
+ * current before loading Xen's MSR_SPEC_CTRL setting.
+ *
+ * Factor 2 is harder. We maintain a shadow_spec_ctrl value, and
+ * use_shadow_spec_ctrl boolean per cpu. The synchronous use is:
+ *
+ * 1) Store guest value in shadow_spec_ctrl
+ * 2) Set use_shadow_spec_ctrl boolean
+ * 3) Load guest value into MSR_SPEC_CTRL
+ * 4) Exit to guest
+ * 5) Entry from guest
+ * 6) Clear use_shadow_spec_ctrl boolean
+ * 7) Load Xen's value into MSR_SPEC_CTRL
+ *
+ * The asynchronous use for interrupts/exceptions is:
+ * - Set/clear IBRS on entry to Xen
+ * - On exit to Xen, check use_shadow_spec_ctrl
+ * - If set, load shadow_spec_ctrl
+ *
+ * Therefore, an interrupt/exception which hits the synchronous path between
+ * steps 2 and 6 will restore the shadow value rather than leaving Xen's value
+ * loaded and corrupting the value used in guest context.
+ *
+ * The following ASM fragments implement this algorithm. See their local
+ * comments for further details.
+ * - SPEC_CTRL_ENTRY_FROM_VMEXIT
+ * - SPEC_CTRL_ENTRY_FROM_PV
+ * - SPEC_CTRL_ENTRY_FROM_INTR
+ * - SPEC_CTRL_EXIT_TO_XEN
+ * - SPEC_CTRL_EXIT_TO_GUEST
+ */
+
+.macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT ibrs_val:req
+/*
+ * Requires %rbx=current, %rsp=regs/cpuinfo
+ * Clobbers %rax, %rcx, %rdx
+ *
+ * The common case is that a guest has direct access to MSR_SPEC_CTRL, at
+ * which point we need to save the guest value before setting IBRS for Xen.
+ * Unilaterally saving the guest value is shorter and faster than checking.
+ */
+ mov $MSR_SPEC_CTRL, %ecx
+ rdmsr
+
+ /* Stash the value from hardware. */
+ mov VCPU_arch_msr(%rbx), %rdx
+ mov %eax, VCPUMSR_spec_ctrl_raw(%rdx)
+ xor %edx, %edx
+
+ /* Clear SPEC_CTRL shadowing *before* loading Xen's value. */
+ movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp)
+
+ /* Load Xen's intended value. */
+ mov $\ibrs_val, %eax
+ wrmsr
+.endm
+
+.macro DO_SPEC_CTRL_ENTRY maybexen:req ibrs_val:req
+/*
+ * Requires %rsp=regs (also cpuinfo if !maybexen)
+ * Requires %r14=stack_end (if maybexen)
+ * Clobbers %rax, %rcx, %rdx
+ *
+ * PV guests can't update MSR_SPEC_CTRL behind Xen's back, so no need to read
+ * it back. Entries from guest context need to clear SPEC_CTRL shadowing,
+ * while entries from Xen must leave shadowing in its current state.
+ */
+ mov $MSR_SPEC_CTRL, %ecx
+ xor %edx, %edx
+
+ /*
+ * Clear SPEC_CTRL shadowing *before* loading Xen's value. If entering
+ * from a possibly-xen context, %rsp doesn't necessarily alias the cpuinfo
+ * block so calculate the position directly.
+ */
+ .if \maybexen
+ /* Branchless `if ( !xen ) clear_shadowing` */
+ testb $3, UREGS_cs(%rsp)
+ setz %al
+ and %al, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14)
+ .else
+ movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp)
+ .endif
+
+ /* Load Xen's intended value. */
+ mov $\ibrs_val, %eax
+ wrmsr
+.endm
+
+.macro DO_SPEC_CTRL_EXIT_TO_XEN
+/*
+ * Requires %rbx=stack_end
+ * Clobbers %rax, %rcx, %rdx
+ *
+ * When returning to Xen context, look to see whether SPEC_CTRL shadowing is
+ * in effect, and reload the shadow value. This covers race conditions which
+ * exist with an NMI/MCE/etc hitting late in the return-to-guest path.
+ */
+ xor %edx, %edx
+
+ cmpb %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%rbx)
+ je .L\@_skip
+
+ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax
+ mov $MSR_SPEC_CTRL, %ecx
+ wrmsr
+
+.L\@_skip:
+.endm
+
+.macro DO_SPEC_CTRL_EXIT_TO_GUEST
+/*
+ * Requires %eax=spec_ctrl, %rsp=regs/cpuinfo
+ * Clobbers %rcx, %rdx
+ *
+ * When returning to guest context, set up SPEC_CTRL shadowing and load the
+ * guest value.
+ */
+ /* Set up shadow value *before* enabling shadowing. */
+ mov %eax, CPUINFO_shadow_spec_ctrl(%rsp)
+
+ /* Set SPEC_CTRL shadowing *before* loading the guest value. */
+ movb $1, CPUINFO_use_shadow_spec_ctrl(%rsp)
+
+ mov $MSR_SPEC_CTRL, %ecx
+ xor %edx, %edx
+ wrmsr
+.endm
+
+/* Use after a VMEXIT from an HVM guest. */
+#define SPEC_CTRL_ENTRY_FROM_VMEXIT \
+ ALTERNATIVE_2 __stringify(ASM_NOP32), \
+ __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT \
+ ibrs_val=SPEC_CTRL_IBRS), \
+ X86_FEATURE_XEN_IBRS_SET, \
+ __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT \
+ ibrs_val=0), \
+ X86_FEATURE_XEN_IBRS_CLEAR
+
+/* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
+#define SPEC_CTRL_ENTRY_FROM_PV \
+ ALTERNATIVE_2 __stringify(ASM_NOP21), \
+ __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 \
+ ibrs_val=SPEC_CTRL_IBRS), \
+ X86_FEATURE_XEN_IBRS_SET, \
+ __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 ibrs_val=0), \
+ X86_FEATURE_XEN_IBRS_CLEAR
+
+/* Use in interrupt/exception context. May interrupt Xen or PV context. */
+#define SPEC_CTRL_ENTRY_FROM_INTR \
+ ALTERNATIVE_2 __stringify(ASM_NOP29), \
+ __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 \
+ ibrs_val=SPEC_CTRL_IBRS), \
+ X86_FEATURE_XEN_IBRS_SET, \
+ __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 ibrs_val=0), \
+ X86_FEATURE_XEN_IBRS_CLEAR
+
+/* Use when exiting to Xen context. */
+#define SPEC_CTRL_EXIT_TO_XEN \
+ ALTERNATIVE_2 __stringify(ASM_NOP17), \
+ DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_SET, \
+ DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_CLEAR
+
+/* Use when exiting to guest context. */
+#define SPEC_CTRL_EXIT_TO_GUEST \
+ ALTERNATIVE_2 __stringify(ASM_NOP24), \
+ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_SET, \
+ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_CLEAR
+
+#endif /* __ASSEMBLY__ */
+#endif /* !__X86_SPEC_CTRL_ASM_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */