]> xenbits.xensource.com Git - xen.git/commitdiff
x86/entry: Organise the use of MSR_SPEC_CTRL at each entry/exit point
authorAndrew Cooper <andrew.cooper3@citrix.com>
Fri, 3 Nov 2017 16:17:00 +0000 (16:17 +0000)
committerAndrew Cooper <andrew.cooper3@citrix.com>
Fri, 26 Jan 2018 14:10:21 +0000 (14:10 +0000)
We need to be able to either set or clear IBRS in Xen context, as well as
restore appropriate guest values in guest context.  See the documentation in
asm-x86/spec_ctrl_asm.h for details.

With the contemporary microcode, writes to %cr3 are slower when SPEC_CTRL.IBRS
is set.  Therefore, the positioning of SPEC_CTRL_{ENTRY/EXIT}* is important.

Ideally, the IBRS_SET/IBRS_CLEAR hunks might be positioned either side of the
%cr3 change, but that is rather more complicated to arrange, and could still
result in a guest controlled value in SPEC_CTRL during the %cr3 change,
negating the saving if the guest chose to have IBRS set.

Therefore, we optimise for the pre-Skylake case (being far more common in the
field than Skylake and later, at the moment), where we have a Xen-preferred
value of IBRS clear when switching %cr3.

There is a semi-unrelated bugfix, where various asm_defn.h macros have a
hidden dependency on PAGE_SIZE, which results in an assembler error if used in
a .macro definition.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
12 files changed:
xen/arch/x86/hvm/svm/entry.S
xen/arch/x86/hvm/vmx/entry.S
xen/arch/x86/setup.c
xen/arch/x86/smpboot.c
xen/arch/x86/x86_64/asm-offsets.c
xen/arch/x86/x86_64/compat/entry.S
xen/arch/x86/x86_64/entry.S
xen/include/asm-x86/asm_defns.h
xen/include/asm-x86/current.h
xen/include/asm-x86/nops.h
xen/include/asm-x86/spec_ctrl.h
xen/include/asm-x86/spec_ctrl_asm.h [new file with mode: 0644]

index df86da0a81fc07abc76cf6d75b30226e9faf008a..bf092fe0716be7293a7318bd326356961d894fba 100644 (file)
@@ -79,6 +79,12 @@ UNLIKELY_END(svm_trace)
         or   $X86_EFLAGS_MBS,%rax
         mov  %rax,VMCB_rflags(%rcx)
 
+        mov VCPU_arch_msr(%rbx), %rax
+        mov VCPUMSR_spec_ctrl_raw(%rax), %eax
+
+        /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+        SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+
         pop  %r15
         pop  %r14
         pop  %r13
@@ -101,8 +107,11 @@ UNLIKELY_END(svm_trace)
         SAVE_ALL
 
         GET_CURRENT(bx)
-        mov  VCPU_svm_vmcb(%rbx),%rcx
 
+        SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
+        mov  VCPU_svm_vmcb(%rbx),%rcx
         movb $0,VCPU_svm_vmcb_in_sync(%rbx)
         mov  VMCB_rax(%rcx),%rax
         mov  %rax,UREGS_rax(%rsp)
index b2f98be7f55100fc8746a85d02789de693f8de79..e750544b4bce36413b55e361ad85372e887a461d 100644 (file)
@@ -38,6 +38,9 @@ ENTRY(vmx_asm_vmexit_handler)
         movb $1,VCPU_vmx_launched(%rbx)
         mov  %rax,VCPU_hvm_guest_cr2(%rbx)
 
+        SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         mov  %rsp,%rdi
         call vmx_vmexit_handler
 
@@ -68,6 +71,13 @@ UNLIKELY_END(realmode)
         call vmx_vmenter_helper
         test %al, %al
         jz .Lvmx_vmentry_restart
+
+        mov VCPU_arch_msr(%rbx), %rax
+        mov VCPUMSR_spec_ctrl_raw(%rax), %eax
+
+        /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+        SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+
         mov  VCPU_hvm_guest_cr2(%rbx),%rax
 
         pop  %r15
@@ -99,6 +109,15 @@ UNLIKELY_END(realmode)
 .Lvmx_vmentry_fail:
         sti
         SAVE_ALL
+
+        /*
+         * PV variant needed here as no guest code has executed (so
+         * MSR_SPEC_CTRL can't have changed value), and NMIs/MCEs are liable
+         * to hit (in which case the HVM variant might corrupt things).
+         */
+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         call vmx_vmentry_failure
         BUG  /* vmx_vmentry_failure() shouldn't return. */
 
index 9407247955379d1feeded75bfd29065a31934d8a..ac530ece2c55e543d545501b31a72249e2328e14 100644 (file)
@@ -678,6 +678,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
     set_processor_id(0);
     set_current(INVALID_VCPU); /* debug sanity. */
     idle_vcpu[0] = current;
+    init_shadow_spec_ctrl_state();
 
     percpu_init_areas();
 
index fe637dae405470d48a227573dc14fd53539731f5..2ebef030274e2ff9339d4315f355f3b585ea6349 100644 (file)
@@ -41,6 +41,7 @@
 #include <asm/guest.h>
 #include <asm/msr.h>
 #include <asm/mtrr.h>
+#include <asm/spec_ctrl.h>
 #include <asm/time.h>
 #include <asm/tboot.h>
 #include <mach_apic.h>
@@ -309,6 +310,7 @@ void start_secondary(void *unused)
     set_current(idle_vcpu[cpu]);
     this_cpu(curr_vcpu) = idle_vcpu[cpu];
     rdmsrl(MSR_EFER, this_cpu(efer));
+    init_shadow_spec_ctrl_state();
 
     /*
      * Just as during early bootstrap, it is convenient here to disable
index b1a431097498a479e38f903c4b8207fb8121b5d6..17f1d77320d06210f7ccc0fdd5e92afe3e9557a7 100644 (file)
@@ -88,6 +88,7 @@ void __dummy__(void)
     OFFSET(VCPU_kernel_ss, struct vcpu, arch.pv_vcpu.kernel_ss);
     OFFSET(VCPU_iopl, struct vcpu, arch.pv_vcpu.iopl);
     OFFSET(VCPU_guest_context_flags, struct vcpu, arch.vgc_flags);
+    OFFSET(VCPU_arch_msr, struct vcpu, arch.msr);
     OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending);
     OFFSET(VCPU_mce_pending, struct vcpu, mce_pending);
     OFFSET(VCPU_nmi_old_mask, struct vcpu, nmi_state.old_mask);
@@ -139,6 +140,8 @@ void __dummy__(void)
     OFFSET(CPUINFO_cr4, struct cpu_info, cr4);
     OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3);
     OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3);
+    OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl);
+    OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl);
     DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
     BLANK();
 
@@ -154,6 +157,9 @@ void __dummy__(void)
     OFFSET(TRAPBOUNCE_eip, struct trap_bounce, eip);
     BLANK();
 
+    OFFSET(VCPUMSR_spec_ctrl_raw, struct msr_vcpu_policy, spec_ctrl.raw);
+    BLANK();
+
 #ifdef CONFIG_PERF_COUNTERS
     DEFINE(ASM_PERFC_exceptions, PERFC_exceptions);
     BLANK();
index e668f00c364e5bd72a30741c0807affe86c071e2..4190c733a31656376ff451c485f2cad6eef2ffbe 100644 (file)
@@ -18,6 +18,10 @@ ENTRY(entry_int82)
         pushq $0
         movl  $HYPERCALL_VECTOR, 4(%rsp)
         SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */
+
+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         CR4_PV32_RESTORE
 
         GET_CURRENT(bx)
@@ -142,6 +146,13 @@ ENTRY(compat_restore_all_guest)
         .popsection
         or    $X86_EFLAGS_IF,%r11
         mov   %r11d,UREGS_eflags(%rsp)
+
+        mov VCPU_arch_msr(%rbx), %rax
+        mov VCPUMSR_spec_ctrl_raw(%rax), %eax
+
+        /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+        SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+
         RESTORE_ALL adj=8 compat=1
 .Lft0:  iretq
         _ASM_PRE_EXTABLE(.Lft0, handle_exception)
@@ -200,6 +211,9 @@ ENTRY(cstar_enter)
         movl  $TRAP_syscall, 4(%rsp)
         SAVE_ALL
 
+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         GET_STACK_END(bx)
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
index 710c0616ba1bd628c848bcbfbce3810ab3912523..73bd7ca2ad611b58438d69a24cb90f0f535437c8 100644 (file)
@@ -38,6 +38,10 @@ ENTRY(switch_to_kernel)
 restore_all_guest:
         ASSERT_INTERRUPTS_DISABLED
 
+        /* Stash guest SPEC_CTRL value while we can read struct vcpu. */
+        mov VCPU_arch_msr(%rbx), %rdx
+        mov VCPUMSR_spec_ctrl_raw(%rdx), %r15d
+
         /* Copy guest mappings and switch to per-CPU root page table. */
         mov   %cr3, %r9
         GET_STACK_END(dx)
@@ -65,6 +69,12 @@ restore_all_guest:
         write_cr3 rax, rdi, rsi
 .Lrag_keep_cr3:
 
+        /* Restore stashed SPEC_CTRL value. */
+        mov   %r15d, %eax
+
+        /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+        SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+
         RESTORE_ALL
         testw $TRAP_syscall,4(%rsp)
         jz    iret_exit_to_guest
@@ -103,9 +113,9 @@ restore_all_xen:
          * Check whether we need to switch to the per-CPU page tables, in
          * case we return to late PV exit code (from an NMI or #MC).
          */
-        GET_STACK_END(ax)
-        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rax), %rdx
-        mov   STACK_CPUINFO_FIELD(pv_cr3)(%rax), %rax
+        GET_STACK_END(bx)
+        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rdx
+        mov   STACK_CPUINFO_FIELD(pv_cr3)(%rbx), %rax
         test  %rdx, %rdx
         /*
          * Ideally the condition would be "nsz", but such doesn't exist,
@@ -115,6 +125,9 @@ UNLIKELY_START(g, exit_cr3)
         write_cr3 rax, rdi, rsi
 UNLIKELY_END(exit_cr3)
 
+        /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+        SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */
+
         RESTORE_ALL adj=8
         iretq
 
@@ -145,6 +158,9 @@ ENTRY(lstar_enter)
         movl  $TRAP_syscall, 4(%rsp)
         SAVE_ALL
 
+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         GET_STACK_END(bx)
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
@@ -248,6 +264,9 @@ GLOBAL(sysenter_eflags_saved)
         movl  $TRAP_syscall, 4(%rsp)
         SAVE_ALL
 
+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         GET_STACK_END(bx)
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
@@ -294,6 +313,9 @@ ENTRY(int80_direct_trap)
         movl  $0x80, 4(%rsp)
         SAVE_ALL
 
+        SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         GET_STACK_END(bx)
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
@@ -469,6 +491,10 @@ ENTRY(common_interrupt)
         SAVE_ALL CLAC
 
         GET_STACK_END(14)
+
+        SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
         mov   %rcx, %r15
         neg   %rcx
@@ -507,6 +533,10 @@ GLOBAL(handle_exception)
         SAVE_ALL CLAC
 
         GET_STACK_END(14)
+
+        SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
         mov   %rcx, %r15
         neg   %rcx
@@ -700,8 +730,12 @@ ENTRY(double_fault)
         /* Set AC to reduce chance of further SMAP faults */
         SAVE_ALL STAC
 
-        GET_STACK_END(bx)
-        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rbx
+        GET_STACK_END(14)
+
+        SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
+        mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx
         test  %rbx, %rbx
         jz    .Ldblf_cr3_okay
         jns   .Ldblf_cr3_load
@@ -730,6 +764,10 @@ handle_ist_exception:
         SAVE_ALL CLAC
 
         GET_STACK_END(14)
+
+        SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
+        /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
         mov   %rcx, %r15
         neg   %rcx
index 6a2b833db621d31aedaa142f5b15a4da4d8df272..88b775b3b3aee38416ab8e60e0f158595b83b09f 100644 (file)
@@ -7,6 +7,7 @@
 #include <asm/asm-offsets.h>
 #endif
 #include <asm/bug.h>
+#include <asm/page.h>
 #include <asm/processor.h>
 #include <asm/percpu.h>
 #include <xen/stringify.h>
@@ -386,4 +387,6 @@ static always_inline void stac(void)
 4:  .p2align 2                            ; \
     .popsection
 
+#include <asm/spec_ctrl_asm.h>
+
 #endif /* __X86_ASM_DEFNS_H__ */
index b929c48c855da3a3f3f1868b0a99334512d6e31b..1009d05e2404e2411136afcaf6dde269758a168c 100644 (file)
@@ -53,6 +53,12 @@ struct cpu_info {
      */
     unsigned long xen_cr3;
     unsigned long pv_cr3;
+
+    /* See asm-x86/spec_ctrl_asm.h for usage. */
+    unsigned int shadow_spec_ctrl;
+    bool         use_shadow_spec_ctrl;
+
+    unsigned long __pad;
     /* get_stack_bottom() must be 16-byte aligned */
 };
 
index 1a46b97aff3e225d902e87df978b535159688f7b..80126c1e679e300db6d86fcf3f1e7de3ddf8c371 100644 (file)
 #define ASM_NOP8 _ASM_MK_NOP(P6_NOP8)
 #define ASM_NOP9 _ASM_MK_NOP(P6_NOP9)
 
+#define ASM_NOP17 ASM_NOP8; ASM_NOP7; ASM_NOP2
+#define ASM_NOP21 ASM_NOP8; ASM_NOP8; ASM_NOP5
+#define ASM_NOP24 ASM_NOP8; ASM_NOP8; ASM_NOP8
+#define ASM_NOP29 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP5
+#define ASM_NOP32 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8
+
 #define ASM_NOP_MAX 9
 
 #endif /* __X86_ASM_NOPS_H__ */
index e088a551dad6b4a0feb8312dbecaca53958d0cd6..b451250282c9b59d99367799132710f7ac43386e 100644 (file)
 #ifndef __X86_SPEC_CTRL_H__
 #define __X86_SPEC_CTRL_H__
 
+#include <asm/current.h>
+
 void init_speculation_mitigations(void);
 
+static inline void init_shadow_spec_ctrl_state(void)
+{
+    struct cpu_info *info = get_cpu_info();
+
+    info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0;
+}
+
 #endif /* !__X86_SPEC_CTRL_H__ */
 
 /*
diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
new file mode 100644 (file)
index 0000000..ba55574
--- /dev/null
@@ -0,0 +1,225 @@
+/******************************************************************************
+ * include/asm-x86/spec_ctrl.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (c) 2017-2018 Citrix Systems Ltd.
+ */
+
+#ifndef __X86_SPEC_CTRL_ASM_H__
+#define __X86_SPEC_CTRL_ASM_H__
+
+#ifdef __ASSEMBLY__
+#include <asm/msr-index.h>
+
+/*
+ * Saving and restoring MSR_SPEC_CTRL state is a little tricky.
+ *
+ * We want the guests choice of SPEC_CTRL while in guest context, and Xen's
+ * choice (set or clear, depending on the hardware) while running in Xen
+ * context.  Therefore, a simplistic algorithm is:
+ *
+ *  - Set/clear IBRS on entry to Xen
+ *  - Set the guests' choice on exit to guest
+ *  - Leave SPEC_CTRL unchanged on exit to xen
+ *
+ * There are two complicating factors:
+ *  1) HVM guests can have direct access to the MSR, so it can change
+ *     behind Xen's back.
+ *  2) An NMI or MCE can interrupt at any point, including early in the entry
+ *     path, or late in the exit path after restoring the guest value.  This
+ *     will corrupt the guest value.
+ *
+ * Factor 1 is dealt with by relying on NMIs/MCEs being blocked immediately
+ * after VMEXIT.  The VMEXIT-specific code reads MSR_SPEC_CTRL and updates
+ * current before loading Xen's MSR_SPEC_CTRL setting.
+ *
+ * Factor 2 is harder.  We maintain a shadow_spec_ctrl value, and
+ * use_shadow_spec_ctrl boolean per cpu.  The synchronous use is:
+ *
+ *  1) Store guest value in shadow_spec_ctrl
+ *  2) Set use_shadow_spec_ctrl boolean
+ *  3) Load guest value into MSR_SPEC_CTRL
+ *  4) Exit to guest
+ *  5) Entry from guest
+ *  6) Clear use_shadow_spec_ctrl boolean
+ *  7) Load Xen's value into MSR_SPEC_CTRL
+ *
+ * The asynchronous use for interrupts/exceptions is:
+ *  -  Set/clear IBRS on entry to Xen
+ *  -  On exit to Xen, check use_shadow_spec_ctrl
+ *  -  If set, load shadow_spec_ctrl
+ *
+ * Therefore, an interrupt/exception which hits the synchronous path between
+ * steps 2 and 6 will restore the shadow value rather than leaving Xen's value
+ * loaded and corrupting the value used in guest context.
+ *
+ * The following ASM fragments implement this algorithm.  See their local
+ * comments for further details.
+ *  - SPEC_CTRL_ENTRY_FROM_VMEXIT
+ *  - SPEC_CTRL_ENTRY_FROM_PV
+ *  - SPEC_CTRL_ENTRY_FROM_INTR
+ *  - SPEC_CTRL_EXIT_TO_XEN
+ *  - SPEC_CTRL_EXIT_TO_GUEST
+ */
+
+.macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT ibrs_val:req
+/*
+ * Requires %rbx=current, %rsp=regs/cpuinfo
+ * Clobbers %rax, %rcx, %rdx
+ *
+ * The common case is that a guest has direct access to MSR_SPEC_CTRL, at
+ * which point we need to save the guest value before setting IBRS for Xen.
+ * Unilaterally saving the guest value is shorter and faster than checking.
+ */
+    mov $MSR_SPEC_CTRL, %ecx
+    rdmsr
+
+    /* Stash the value from hardware. */
+    mov VCPU_arch_msr(%rbx), %rdx
+    mov %eax, VCPUMSR_spec_ctrl_raw(%rdx)
+    xor %edx, %edx
+
+    /* Clear SPEC_CTRL shadowing *before* loading Xen's value. */
+    movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp)
+
+    /* Load Xen's intended value. */
+    mov $\ibrs_val, %eax
+    wrmsr
+.endm
+
+.macro DO_SPEC_CTRL_ENTRY maybexen:req ibrs_val:req
+/*
+ * Requires %rsp=regs (also cpuinfo if !maybexen)
+ * Requires %r14=stack_end (if maybexen)
+ * Clobbers %rax, %rcx, %rdx
+ *
+ * PV guests can't update MSR_SPEC_CTRL behind Xen's back, so no need to read
+ * it back.  Entries from guest context need to clear SPEC_CTRL shadowing,
+ * while entries from Xen must leave shadowing in its current state.
+ */
+    mov $MSR_SPEC_CTRL, %ecx
+    xor %edx, %edx
+
+    /*
+     * Clear SPEC_CTRL shadowing *before* loading Xen's value.  If entering
+     * from a possibly-xen context, %rsp doesn't necessarily alias the cpuinfo
+     * block so calculate the position directly.
+     */
+    .if \maybexen
+        /* Branchless `if ( !xen ) clear_shadowing` */
+        testb $3, UREGS_cs(%rsp)
+        setz %al
+        and %al, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14)
+    .else
+        movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp)
+    .endif
+
+    /* Load Xen's intended value. */
+    mov $\ibrs_val, %eax
+    wrmsr
+.endm
+
+.macro DO_SPEC_CTRL_EXIT_TO_XEN
+/*
+ * Requires %rbx=stack_end
+ * Clobbers %rax, %rcx, %rdx
+ *
+ * When returning to Xen context, look to see whether SPEC_CTRL shadowing is
+ * in effect, and reload the shadow value.  This covers race conditions which
+ * exist with an NMI/MCE/etc hitting late in the return-to-guest path.
+ */
+    xor %edx, %edx
+
+    cmpb %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%rbx)
+    je .L\@_skip
+
+    mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax
+    mov $MSR_SPEC_CTRL, %ecx
+    wrmsr
+
+.L\@_skip:
+.endm
+
+.macro DO_SPEC_CTRL_EXIT_TO_GUEST
+/*
+ * Requires %eax=spec_ctrl, %rsp=regs/cpuinfo
+ * Clobbers %rcx, %rdx
+ *
+ * When returning to guest context, set up SPEC_CTRL shadowing and load the
+ * guest value.
+ */
+    /* Set up shadow value *before* enabling shadowing. */
+    mov %eax, CPUINFO_shadow_spec_ctrl(%rsp)
+
+    /* Set SPEC_CTRL shadowing *before* loading the guest value. */
+    movb $1, CPUINFO_use_shadow_spec_ctrl(%rsp)
+
+    mov $MSR_SPEC_CTRL, %ecx
+    xor %edx, %edx
+    wrmsr
+.endm
+
+/* Use after a VMEXIT from an HVM guest. */
+#define SPEC_CTRL_ENTRY_FROM_VMEXIT                                     \
+    ALTERNATIVE_2 __stringify(ASM_NOP32),                               \
+        __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT                      \
+                    ibrs_val=SPEC_CTRL_IBRS),                           \
+        X86_FEATURE_XEN_IBRS_SET,                                       \
+        __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT                      \
+                    ibrs_val=0),                                        \
+        X86_FEATURE_XEN_IBRS_CLEAR
+
+/* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
+#define SPEC_CTRL_ENTRY_FROM_PV                                         \
+    ALTERNATIVE_2 __stringify(ASM_NOP21),                               \
+        __stringify(DO_SPEC_CTRL_ENTRY maybexen=0                       \
+                    ibrs_val=SPEC_CTRL_IBRS),                           \
+        X86_FEATURE_XEN_IBRS_SET,                                       \
+        __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 ibrs_val=0),          \
+        X86_FEATURE_XEN_IBRS_CLEAR
+
+/* Use in interrupt/exception context.  May interrupt Xen or PV context. */
+#define SPEC_CTRL_ENTRY_FROM_INTR                                       \
+    ALTERNATIVE_2 __stringify(ASM_NOP29),                               \
+        __stringify(DO_SPEC_CTRL_ENTRY maybexen=1                       \
+                    ibrs_val=SPEC_CTRL_IBRS),                           \
+        X86_FEATURE_XEN_IBRS_SET,                                       \
+        __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 ibrs_val=0),          \
+        X86_FEATURE_XEN_IBRS_CLEAR
+
+/* Use when exiting to Xen context. */
+#define SPEC_CTRL_EXIT_TO_XEN                                           \
+    ALTERNATIVE_2 __stringify(ASM_NOP17),                               \
+        DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_SET,             \
+        DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_CLEAR
+
+/* Use when exiting to guest context. */
+#define SPEC_CTRL_EXIT_TO_GUEST                                         \
+    ALTERNATIVE_2 __stringify(ASM_NOP24),                               \
+        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_SET,           \
+        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_CLEAR
+
+#endif /* __ASSEMBLY__ */
+#endif /* !__X86_SPEC_CTRL_ASM_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */