AFLAGS-y += -D__ASSEMBLY__
-# Clang's built-in assembler can't handle .code16/.code32/.code64 yet
-AFLAGS-$(clang) += -no-integrated-as
+# Clang's built-in assembler can't handle embedded .include's
+CFLAGS-$(clang) += -no-integrated-as
ALL_OBJS := $(ALL_OBJS-y)
CFLAGS += -DCONFIG_INDIRECT_THUNK
export CONFIG_INDIRECT_THUNK=y
endif
+
+# Set up the assembler include path properly for older GCC toolchains. Clang
+# objects to the agument being passed however.
+ifneq ($(clang),y)
+CFLAGS += -Wa,-I$(BASEDIR)/include
+endif
.code64
start64:
/* Jump to high mappings. */
- movabs $__high_start,%rax
- jmpq *%rax
+ movabs $__high_start, %rdi
+
+#ifdef CONFIG_INDIRECT_THUNK
+ /*
+ * If booting virtualised, or hot-onlining a CPU, sibling threads can
+ * attempt Branch Target Injection against this jmp.
+ *
+ * We've got no usable stack so can't use a RETPOLINE thunk, and are
+ * further than disp32 from the high mappings so couldn't use
+ * JUMP_THUNK even if it was a non-RETPOLINE thunk. Furthermore, an
+ * LFENCE isn't necessarily safe to use at this point.
+ *
+ * As this isn't a hotpath, use a fully serialising event to reduce
+ * the speculation window as much as possible. %ebx needs preserving
+ * for __high_start.
+ */
+ mov %ebx, %esi
+ cpuid
+ mov %esi, %ebx
+#endif
+
+ jmpq *%rdi
#include "wakeup.S"
memcpy(ptr, tests[i].opc, ARRAY_SIZE(tests[i].opc));
unmap_domain_page(ptr);
- asm volatile ( "call *%[stb]\n"
+ asm volatile ( "INDIRECT_CALL %[stb]\n"
".Lret%=:\n\t"
".pushsection .fixup,\"ax\"\n"
".Lfix%=:\n\t"
".popsection\n\t"
_ASM_EXTABLE(.Lret%=, .Lfix%=)
: [exn] "+m" (res)
- : [stb] "rm" (addr), "a" (tests[i].rax));
+ : [stb] "r" (addr), "a" (tests[i].rax));
ASSERT(res == tests[i].res.raw);
}
typedef void io_emul_stub_t(struct cpu_user_regs *);
+void __x86_indirect_thunk_rcx(void);
+
static io_emul_stub_t *io_emul_stub_setup(struct priv_op_ctxt *ctxt, u8 opcode,
unsigned int port, unsigned int bytes)
{
+ struct stubs *this_stubs = &this_cpu(stubs);
+ unsigned long stub_va = this_stubs->addr + STUB_BUF_SIZE / 2;
bool use_quirk_stub = false;
if ( !ctxt->io_emul_stub )
- ctxt->io_emul_stub = map_domain_page(_mfn(this_cpu(stubs.mfn))) +
- (this_cpu(stubs.addr) &
- ~PAGE_MASK) +
- STUB_BUF_SIZE / 2;
+ ctxt->io_emul_stub =
+ map_domain_page(_mfn(this_stubs->mfn)) + (stub_va & ~PAGE_MASK);
/* movq $host_to_guest_gpr_switch,%rcx */
ctxt->io_emul_stub[0] = 0x48;
ctxt->io_emul_stub[1] = 0xb9;
*(void **)&ctxt->io_emul_stub[2] = (void *)host_to_guest_gpr_switch;
+
+#ifdef CONFIG_INDIRECT_THUNK
+ /* callq __x86_indirect_thunk_rcx */
+ ctxt->io_emul_stub[10] = 0xe8;
+ *(int32_t *)&ctxt->io_emul_stub[11] =
+ (long)__x86_indirect_thunk_rcx - (stub_va + 11 + 4);
+#else
/* callq *%rcx */
ctxt->io_emul_stub[10] = 0xff;
ctxt->io_emul_stub[11] = 0xd1;
+ /* TODO: untangle ideal_nops from init/livepatch Kconfig options. */
+ memcpy(&ctxt->io_emul_stub[12], "\x0f\x1f\x00", 3); /* P6_NOP3 */
+#endif
if ( unlikely(ioemul_handle_quirk) )
- use_quirk_stub = ioemul_handle_quirk(opcode, &ctxt->io_emul_stub[12],
+ use_quirk_stub = ioemul_handle_quirk(opcode, &ctxt->io_emul_stub[15],
ctxt->ctxt.regs);
if ( !use_quirk_stub )
{
/* data16 or nop */
- ctxt->io_emul_stub[12] = (bytes != 2) ? 0x90 : 0x66;
+ ctxt->io_emul_stub[15] = (bytes != 2) ? 0x90 : 0x66;
/* <io-access opcode> */
- ctxt->io_emul_stub[13] = opcode;
+ ctxt->io_emul_stub[16] = opcode;
/* imm8 or nop */
- ctxt->io_emul_stub[14] = !(opcode & 8) ? port : 0x90;
+ ctxt->io_emul_stub[17] = !(opcode & 8) ? port : 0x90;
/* ret (jumps to guest_to_host_gpr_switch) */
- ctxt->io_emul_stub[15] = 0xc3;
+ ctxt->io_emul_stub[18] = 0xc3;
}
- BUILD_BUG_ON(STUB_BUF_SIZE / 2 < MAX(16, /* Default emul stub */
- 12 + IOEMUL_QUIRK_STUB_BYTES));
+ BUILD_BUG_ON(STUB_BUF_SIZE / 2 < MAX(19, /* Default emul stub */
+ 15 + IOEMUL_QUIRK_STUB_BYTES));
/* Handy function-typed pointer to the stub. */
- return (void *)(this_cpu(stubs.addr) + STUB_BUF_SIZE / 2);
+ return (void *)stub_va;
}
movzbl UREGS_entry_vector(%rsp),%eax
leaq exception_table(%rip),%rdx
PERFC_INCR(exceptions, %rax, %rbx)
- callq *(%rdx,%rax,8)
+ mov (%rdx, %rax, 8), %rdx
+ INDIRECT_CALL %rdx
mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
testb $3,UREGS_cs(%rsp)
jz restore_all_xen
1: movq %rsp,%rdi
movzbl UREGS_entry_vector(%rsp),%eax
leaq exception_table(%rip),%rdx
- callq *(%rdx,%rax,8)
+ mov (%rdx, %rax, 8), %rdx
+ INDIRECT_CALL %rdx
mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
cmpb $TRAP_nmi,UREGS_entry_vector(%rsp)
jne ret_from_intr
#ifdef __XEN__
# define invoke_stub(pre, post, constraints...) do { \
union stub_exception_token res_ = { .raw = ~0 }; \
- asm volatile ( pre "\n\tcall *%[stub]\n\t" post "\n" \
+ asm volatile ( pre "\n\tINDIRECT_CALL %[stub]\n\t" post "\n" \
".Lret%=:\n\t" \
".pushsection .fixup,\"ax\"\n" \
".Lfix%=:\n\t" \
".popsection\n\t" \
_ASM_EXTABLE(.Lret%=, .Lfix%=) \
: [exn] "+g" (res_), constraints, \
- [stub] "rm" (stub.func), \
+ [stub] "r" (stub.func), \
"m" (*(uint8_t(*)[MAX_INST_LEN + 1])stub.ptr) ); \
if ( unlikely(~res_.raw) ) \
{ \
/*
* Hand-rolled longjmp(). Returns to the pointer on the top of
- * wqv->stack, and lands on a `rep movs` instruction.
+ * wqv->stack, and lands on a `rep movs` instruction. All other GPRs are
+ * restored from the stack, so are available for use here.
*/
asm volatile (
- "mov %1,%%"__OP"sp; jmp *(%0)"
+ "mov %1,%%"__OP"sp; INDIRECT_JMP %[ip]"
: : "S" (wqv->stack), "D" (wqv->esp),
- "c" ((char *)get_cpu_info() - (char *)wqv->esp)
+ "c" ((char *)get_cpu_info() - (char *)wqv->esp),
+ [ip] "r" (*(unsigned long *)wqv->stack)
: "memory" );
unreachable();
}
#include <asm/cpufeature.h>
#include <asm/alternative.h>
+#ifdef __ASSEMBLY__
+# include <asm/indirect_thunk_asm.h>
+#else
+asm ( "\t.equ CONFIG_INDIRECT_THUNK, "
+ __stringify(IS_ENABLED(CONFIG_INDIRECT_THUNK)) );
+asm ( "\t.include \"asm/indirect_thunk_asm.h\"" );
+#endif
+
#ifndef __ASSEMBLY__
void ret_from_intr(void);
#endif
--- /dev/null
+/*
+ * Warning! This file is included at an assembler level for .c files, causing
+ * usual #ifdef'ary to turn into comments.
+ */
+
+.macro INDIRECT_BRANCH insn:req arg:req
+/*
+ * Create an indirect branch. insn is one of call/jmp, arg is a single
+ * register.
+ *
+ * With no compiler support, this degrades into a plain indirect call/jmp.
+ * With compiler support, dispatch to the correct __x86_indirect_thunk_*
+ */
+ .if CONFIG_INDIRECT_THUNK == 1
+
+ $done = 0
+ .irp reg, ax, cx, dx, bx, bp, si, di, 8, 9, 10, 11, 12, 13, 14, 15
+ .ifeqs "\arg", "%r\reg"
+ \insn __x86_indirect_thunk_r\reg
+ $done = 1
+ .exitm
+ .endif
+ .endr
+
+ .if $done != 1
+ .error "Bad register arg \arg"
+ .endif
+
+ .else
+ \insn *\arg
+ .endif
+.endm
+
+/* Convenience wrappers. */
+.macro INDIRECT_CALL arg:req
+ INDIRECT_BRANCH call \arg
+.endm
+
+.macro INDIRECT_JMP arg:req
+ INDIRECT_BRANCH jmp \arg
+.endm