* self modifying code. This implies that asymmetric systems where
* APs have less capabilities than the boot processor are not handled.
* Tough. Make sure you disable such features by hand.
+ *
+ * The caller will set the "force" argument to true for the final
+ * invocation, such that no CALLs/JMPs to NULL pointers will be left
+ * around. See also the further comment below.
*/
-void init_or_livepatch apply_alternatives(struct alt_instr *start,
- struct alt_instr *end)
+static void init_or_livepatch _apply_alternatives(struct alt_instr *start,
+ struct alt_instr *end,
+ bool force)
{
struct alt_instr *a, *base;
/*
* Detect sequences of alt_instr's patching the same origin site, and
* keep base pointing at the first alt_instr entry. This is so we can
- * refer to a single ->priv field for patching decisions. We
- * deliberately use the alt_instr itself rather than a local variable
- * in case we end up making multiple passes.
+ * refer to a single ->priv field for some of our patching decisions,
+ * in particular the NOP optimization. We deliberately use the alt_instr
+ * itself rather than a local variable in case we end up making multiple
+ * passes.
*
* ->priv being nonzero means that the origin site has already been
* modified, and we shouldn't try to optimise the nops again.
if ( ALT_ORIG_PTR(base) != orig )
base = a;
+ /* Skip patch sites already handled during the first pass. */
+ if ( a->priv )
+ {
+ ASSERT(force);
+ continue;
+ }
+
/* If there is no replacement to make, see about optimising the nops. */
if ( !boot_cpu_has(a->cpuid) )
{
if ( base->priv )
continue;
- base->priv = 1;
+ a->priv = 1;
/* Nothing useful to do? */
if ( toolchain_nops_are_ideal || a->pad_len <= 1 )
continue;
}
- base->priv = 1;
-
memcpy(buf, repl, a->repl_len);
/* 0xe8/0xe9 are relative branches; fix the offset. */
if ( a->repl_len >= 5 && (*buf & 0xfe) == 0xe8 )
- *(int32_t *)(buf + 1) += repl - orig;
+ {
+ /*
+ * Detect the special case of indirect-to-direct branch patching:
+ * - replacement is a direct CALL/JMP (opcodes 0xE8/0xE9; already
+ * checked above),
+ * - replacement's displacement is -5 (pointing back at the very
+ * insn, which makes no sense in a real replacement insn),
+ * - original is an indirect CALL/JMP (opcodes 0xFF/2 or 0xFF/4)
+ * using RIP-relative addressing.
+ * Some branch destinations may still be NULL when we come here
+ * the first time. Defer patching of those until the post-presmp-
+ * initcalls re-invocation (with force set to true). If at that
+ * point the branch destination is still NULL, insert "UD2; UD0"
+ * (for ease of recognition) instead of CALL/JMP.
+ */
+ if ( a->cpuid == X86_FEATURE_ALWAYS &&
+ *(int32_t *)(buf + 1) == -5 &&
+ a->orig_len >= 6 &&
+ orig[0] == 0xff &&
+ orig[1] == (*buf & 1 ? 0x25 : 0x15) )
+ {
+ long disp = *(int32_t *)(orig + 2);
+ const uint8_t *dest = *(void **)(orig + 6 + disp);
+
+ if ( dest )
+ {
+ disp = dest - (orig + 5);
+ ASSERT(disp == (int32_t)disp);
+ *(int32_t *)(buf + 1) = disp;
+ }
+ else if ( force )
+ {
+ buf[0] = 0x0f;
+ buf[1] = 0x0b;
+ buf[2] = 0x0f;
+ buf[3] = 0xff;
+ buf[4] = 0xff;
+ }
+ else
+ continue;
+ }
+ else if ( force && system_state < SYS_STATE_active )
+ ASSERT_UNREACHABLE();
+ else
+ *(int32_t *)(buf + 1) += repl - orig;
+ }
+ else if ( force && system_state < SYS_STATE_active )
+ ASSERT_UNREACHABLE();
+
+ a->priv = 1;
add_nops(buf + a->repl_len, total_len - a->repl_len);
text_poke(orig, buf, total_len);
}
}
-static bool __initdata alt_done;
+void init_or_livepatch apply_alternatives(struct alt_instr *start,
+ struct alt_instr *end)
+{
+ _apply_alternatives(start, end, true);
+}
+
+static unsigned int __initdata alt_todo;
+static unsigned int __initdata alt_done;
/*
* At boot time, we patch alternatives in NMI context. This means that the
* More than one NMI may occur between the two set_nmi_callback() below.
* We only need to apply alternatives once.
*/
- if ( !alt_done )
+ if ( !(alt_done & alt_todo) )
{
unsigned long cr0;
/* Disable WP to allow patching read-only pages. */
write_cr0(cr0 & ~X86_CR0_WP);
- apply_alternatives(__alt_instructions, __alt_instructions_end);
+ _apply_alternatives(__alt_instructions, __alt_instructions_end,
+ alt_done);
write_cr0(cr0);
- alt_done = true;
+ alt_done |= alt_todo;
}
return 1;
* This routine is called with local interrupt disabled and used during
* bootup.
*/
-void __init alternative_instructions(void)
+static void __init _alternative_instructions(bool force)
{
unsigned int i;
nmi_callback_t *saved_nmi_callback;
- arch_init_ideal_nops();
-
/*
* Don't stop machine check exceptions while patching.
* MCEs only happen when something got corrupted and in this
*/
ASSERT(!local_irq_is_enabled());
+ /* Set what operation to perform /before/ setting the callback. */
+ alt_todo = 1u << force;
+ barrier();
+
/*
* As soon as the callback is set up, the next NMI will trigger patching,
* even an NMI ahead of our explicit self-NMI.
* cover the (hopefully never) async case, poll alt_done for up to one
* second.
*/
- for ( i = 0; !ACCESS_ONCE(alt_done) && i < 1000; ++i )
+ for ( i = 0; !(ACCESS_ONCE(alt_done) & alt_todo) && i < 1000; ++i )
mdelay(1);
- if ( !ACCESS_ONCE(alt_done) )
+ if ( !(ACCESS_ONCE(alt_done) & alt_todo) )
panic("Timed out waiting for alternatives self-NMI to hit\n");
set_nmi_callback(saved_nmi_callback);
}
+
+void __init alternative_instructions(void)
+{
+ arch_init_ideal_nops();
+ _alternative_instructions(false);
+}
+
+void __init alternative_branches(void)
+{
+ local_irq_disable();
+ _alternative_instructions(true);
+ local_irq_enable();
+}
#ifdef __ASSEMBLY__
#include <asm/alternative-asm.h>
#else
+#include <xen/lib.h>
#include <xen/stringify.h>
-#include <xen/types.h>
#include <asm/asm-macros.h>
struct __packed alt_instr {
/* Similar to alternative_instructions except it can be run with IRQs enabled. */
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
extern void alternative_instructions(void);
+extern void alternative_branches(void);
#define alt_orig_len "(.LXEN%=_orig_e - .LXEN%=_orig_s)"
#define alt_pad_len "(.LXEN%=_orig_p - .LXEN%=_orig_e)"
/* Use this macro(s) if you need more than one output parameter. */
#define ASM_OUTPUT2(a...) a
+/*
+ * Machinery to allow converting indirect to direct calls, when the called
+ * function is determined once at boot and later never changed.
+ */
+
+#define ALT_CALL_arg1 "rdi"
+#define ALT_CALL_arg2 "rsi"
+#define ALT_CALL_arg3 "rdx"
+#define ALT_CALL_arg4 "rcx"
+#define ALT_CALL_arg5 "r8"
+#define ALT_CALL_arg6 "r9"
+
+#define ALT_CALL_ARG(arg, n) \
+ register typeof((arg) ? (arg) : 0) a ## n ## _ \
+ asm ( ALT_CALL_arg ## n ) = (arg)
+#define ALT_CALL_NO_ARG(n) \
+ register unsigned long a ## n ## _ asm ( ALT_CALL_arg ## n )
+
+#define ALT_CALL_NO_ARG6 ALT_CALL_NO_ARG(6)
+#define ALT_CALL_NO_ARG5 ALT_CALL_NO_ARG(5); ALT_CALL_NO_ARG6
+#define ALT_CALL_NO_ARG4 ALT_CALL_NO_ARG(4); ALT_CALL_NO_ARG5
+#define ALT_CALL_NO_ARG3 ALT_CALL_NO_ARG(3); ALT_CALL_NO_ARG4
+#define ALT_CALL_NO_ARG2 ALT_CALL_NO_ARG(2); ALT_CALL_NO_ARG3
+#define ALT_CALL_NO_ARG1 ALT_CALL_NO_ARG(1); ALT_CALL_NO_ARG2
+
+/*
+ * Unfortunately ALT_CALL_NO_ARG() above can't use a fake initializer (to
+ * suppress "uninitialized variable" warnings), as various versions of gcc
+ * older than 8.1 fall on the nose in various ways with that (always because
+ * of some other construct elsewhere in the same function needing to use the
+ * same hard register). Otherwise the asm() below could uniformly use "+r"
+ * output constraints, making unnecessary all these ALT_CALL<n>_OUT macros.
+ */
+#define ALT_CALL0_OUT "=r" (a1_), "=r" (a2_), "=r" (a3_), \
+ "=r" (a4_), "=r" (a5_), "=r" (a6_)
+#define ALT_CALL1_OUT "+r" (a1_), "=r" (a2_), "=r" (a3_), \
+ "=r" (a4_), "=r" (a5_), "=r" (a6_)
+#define ALT_CALL2_OUT "+r" (a1_), "+r" (a2_), "=r" (a3_), \
+ "=r" (a4_), "=r" (a5_), "=r" (a6_)
+#define ALT_CALL3_OUT "+r" (a1_), "+r" (a2_), "+r" (a3_), \
+ "=r" (a4_), "=r" (a5_), "=r" (a6_)
+#define ALT_CALL4_OUT "+r" (a1_), "+r" (a2_), "+r" (a3_), \
+ "+r" (a4_), "=r" (a5_), "=r" (a6_)
+#define ALT_CALL5_OUT "+r" (a1_), "+r" (a2_), "+r" (a3_), \
+ "+r" (a4_), "+r" (a5_), "=r" (a6_)
+#define ALT_CALL6_OUT "+r" (a1_), "+r" (a2_), "+r" (a3_), \
+ "+r" (a4_), "+r" (a5_), "+r" (a6_)
+
+#define alternative_callN(n, rettype, func) ({ \
+ rettype ret_; \
+ register unsigned long r10_ asm("r10"); \
+ register unsigned long r11_ asm("r11"); \
+ asm volatile (__stringify(ALTERNATIVE "call *%c[addr](%%rip)", \
+ "call .", \
+ X86_FEATURE_ALWAYS) \
+ : ALT_CALL ## n ## _OUT, "=a" (ret_), \
+ "=r" (r10_), "=r" (r11_) ASM_CALL_CONSTRAINT \
+ : [addr] "i" (&(func)), "g" (func) \
+ : "memory" ); \
+ ret_; \
+})
+
+#define alternative_vcall0(func) ({ \
+ ALT_CALL_NO_ARG1; \
+ ((void)alternative_callN(0, int, func)); \
+})
+
+#define alternative_call0(func) ({ \
+ ALT_CALL_NO_ARG1; \
+ alternative_callN(0, typeof(func()), func); \
+})
+
+#define alternative_vcall1(func, arg) ({ \
+ ALT_CALL_ARG(arg, 1); \
+ ALT_CALL_NO_ARG2; \
+ (void)sizeof(func(arg)); \
+ (void)alternative_callN(1, int, func); \
+})
+
+#define alternative_call1(func, arg) ({ \
+ ALT_CALL_ARG(arg, 1); \
+ ALT_CALL_NO_ARG2; \
+ alternative_callN(1, typeof(func(arg)), func); \
+})
+
+#define alternative_vcall2(func, arg1, arg2) ({ \
+ typeof(arg2) v2_ = (arg2); \
+ ALT_CALL_ARG(arg1, 1); \
+ ALT_CALL_ARG(v2_, 2); \
+ ALT_CALL_NO_ARG3; \
+ (void)sizeof(func(arg1, arg2)); \
+ (void)alternative_callN(2, int, func); \
+})
+
+#define alternative_call2(func, arg1, arg2) ({ \
+ typeof(arg2) v2_ = (arg2); \
+ ALT_CALL_ARG(arg1, 1); \
+ ALT_CALL_ARG(v2_, 2); \
+ ALT_CALL_NO_ARG3; \
+ alternative_callN(2, typeof(func(arg1, arg2)), func); \
+})
+
+#define alternative_vcall3(func, arg1, arg2, arg3) ({ \
+ typeof(arg2) v2_ = (arg2); \
+ typeof(arg3) v3_ = (arg3); \
+ ALT_CALL_ARG(arg1, 1); \
+ ALT_CALL_ARG(v2_, 2); \
+ ALT_CALL_ARG(v3_, 3); \
+ ALT_CALL_NO_ARG4; \
+ (void)sizeof(func(arg1, arg2, arg3)); \
+ (void)alternative_callN(3, int, func); \
+})
+
+#define alternative_call3(func, arg1, arg2, arg3) ({ \
+ typeof(arg2) v2_ = (arg2); \
+ typeof(arg3) v3_ = (arg3); \
+ ALT_CALL_ARG(arg1, 1); \
+ ALT_CALL_ARG(v2_, 2); \
+ ALT_CALL_ARG(v3_, 3); \
+ ALT_CALL_NO_ARG4; \
+ alternative_callN(3, typeof(func(arg1, arg2, arg3)), \
+ func); \
+})
+
+#define alternative_vcall4(func, arg1, arg2, arg3, arg4) ({ \
+ typeof(arg2) v2_ = (arg2); \
+ typeof(arg3) v3_ = (arg3); \
+ typeof(arg4) v4_ = (arg4); \
+ ALT_CALL_ARG(arg1, 1); \
+ ALT_CALL_ARG(v2_, 2); \
+ ALT_CALL_ARG(v3_, 3); \
+ ALT_CALL_ARG(v4_, 4); \
+ ALT_CALL_NO_ARG5; \
+ (void)sizeof(func(arg1, arg2, arg3, arg4)); \
+ (void)alternative_callN(4, int, func); \
+})
+
+#define alternative_call4(func, arg1, arg2, arg3, arg4) ({ \
+ typeof(arg2) v2_ = (arg2); \
+ typeof(arg3) v3_ = (arg3); \
+ typeof(arg4) v4_ = (arg4); \
+ ALT_CALL_ARG(arg1, 1); \
+ ALT_CALL_ARG(v2_, 2); \
+ ALT_CALL_ARG(v3_, 3); \
+ ALT_CALL_ARG(v4_, 4); \
+ ALT_CALL_NO_ARG5; \
+ alternative_callN(4, typeof(func(arg1, arg2, \
+ arg3, arg4)), \
+ func); \
+})
+
+#define alternative_vcall5(func, arg1, arg2, arg3, arg4, arg5) ({ \
+ typeof(arg2) v2_ = (arg2); \
+ typeof(arg3) v3_ = (arg3); \
+ typeof(arg4) v4_ = (arg4); \
+ typeof(arg5) v5_ = (arg5); \
+ ALT_CALL_ARG(arg1, 1); \
+ ALT_CALL_ARG(v2_, 2); \
+ ALT_CALL_ARG(v3_, 3); \
+ ALT_CALL_ARG(v4_, 4); \
+ ALT_CALL_ARG(v5_, 5); \
+ ALT_CALL_NO_ARG6; \
+ (void)sizeof(func(arg1, arg2, arg3, arg4, arg5)); \
+ (void)alternative_callN(5, int, func); \
+})
+
+#define alternative_call5(func, arg1, arg2, arg3, arg4, arg5) ({ \
+ typeof(arg2) v2_ = (arg2); \
+ typeof(arg3) v3_ = (arg3); \
+ typeof(arg4) v4_ = (arg4); \
+ typeof(arg5) v5_ = (arg5); \
+ ALT_CALL_ARG(arg1, 1); \
+ ALT_CALL_ARG(v2_, 2); \
+ ALT_CALL_ARG(v3_, 3); \
+ ALT_CALL_ARG(v4_, 4); \
+ ALT_CALL_ARG(v5_, 5); \
+ ALT_CALL_NO_ARG6; \
+ alternative_callN(5, typeof(func(arg1, arg2, arg3, \
+ arg4, arg5)), \
+ func); \
+})
+
+#define alternative_vcall6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({ \
+ typeof(arg2) v2_ = (arg2); \
+ typeof(arg3) v3_ = (arg3); \
+ typeof(arg4) v4_ = (arg4); \
+ typeof(arg5) v5_ = (arg5); \
+ typeof(arg6) v6_ = (arg6); \
+ ALT_CALL_ARG(arg1, 1); \
+ ALT_CALL_ARG(v2_, 2); \
+ ALT_CALL_ARG(v3_, 3); \
+ ALT_CALL_ARG(v4_, 4); \
+ ALT_CALL_ARG(v5_, 5); \
+ ALT_CALL_ARG(v6_, 6); \
+ (void)sizeof(func(arg1, arg2, arg3, arg4, arg5, arg6)); \
+ (void)alternative_callN(6, int, func); \
+})
+
+#define alternative_call6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({ \
+ typeof(arg2) v2_ = (arg2); \
+ typeof(arg3) v3_ = (arg3); \
+ typeof(arg4) v4_ = (arg4); \
+ typeof(arg5) v5_ = (arg5); \
+ typeof(arg6) v6_ = (arg6); \
+ ALT_CALL_ARG(arg1, 1); \
+ ALT_CALL_ARG(v2_, 2); \
+ ALT_CALL_ARG(v3_, 3); \
+ ALT_CALL_ARG(v4_, 4); \
+ ALT_CALL_ARG(v5_, 5); \
+ ALT_CALL_ARG(v6_, 6); \
+ alternative_callN(6, typeof(func(arg1, arg2, arg3, \
+ arg4, arg5, arg6)), \
+ func); \
+})
+
+#define alternative_vcall__(nr) alternative_vcall ## nr
+#define alternative_call__(nr) alternative_call ## nr
+
+#define alternative_vcall_(nr) alternative_vcall__(nr)
+#define alternative_call_(nr) alternative_call__(nr)
+
+#define alternative_vcall(func, args...) \
+ alternative_vcall_(count_args(args))(func, ## args)
+
+#define alternative_call(func, args...) \
+ alternative_call_(count_args(args))(func, ## args)
+
#endif /* !__ASSEMBLY__ */
#endif /* __X86_ALTERNATIVE_H__ */