From: Roger Pau Monne Date: Fri, 5 Jul 2024 10:08:06 +0000 (+0200) Subject: x86/mm: zero stack on context switch X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=99bd091659683c3e6936c1a467b84ca3737aecf6;p=people%2Froyger%2Fxen.git x86/mm: zero stack on context switch With the stack mapped on a per-CPU basis there's no risk of other CPUs being able to read the stack contents, but vCPUs running on the current pCPU could read stack rubble from operations of previous vCPUs. The #DF stack is not zeroed because handling of #DF results in a panic. The contents of the shadow stack are not cleared as part of this change. It's arguable that leaking internal Xen return addresses is not guest confidential data. At most those could be used by an attacker to figure out the paths inside of Xen previous execution flows have used. Signed-off-by: Roger Pau Monné --- Changes since v1: - Zero the stack forward to use ERMS. - Only zero the IST stacks if they have been used. - Only zero the primary stack for full context switches. --- diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c index 9028ccde54..eaaaefe7f8 100644 --- a/xen/arch/x86/cpu/mcheck/mce.c +++ b/xen/arch/x86/cpu/mcheck/mce.c @@ -92,10 +92,14 @@ struct mce_callbacks __ro_after_init mce_callbacks = { static const typeof(mce_callbacks.handler) __initconst_cf_clobber __used default_handler = unexpected_machine_check; +DEFINE_PER_CPU(unsigned int, slice_mce_count); + /* Call the installed machine check handler for this CPU setup. */ void do_machine_check(const struct cpu_user_regs *regs) { + this_cpu(slice_mce_count)++; + mce_enter(); alternative_vcall(mce_callbacks.handler, regs); mce_exit(); diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index fbb1b232d0..44878cf47f 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -2212,12 +2212,17 @@ void context_switch(struct vcpu *prev, struct vcpu *next) /* Ensure that the vcpu has an up-to-date time base. */ update_vcpu_system_time(next); - reset_stack_and_call_ind(nextd->arch.ctxt_switch->tail); + /* + * Context switches to the idle vCPU (either lazy or full) will never + * trigger zeroing of the stack, because the idle domain doesn't have ASI + * enabled. + */ + reset_stack_and_call_ind(nextd->arch.ctxt_switch->tail, nextd->arch.asi); } void continue_running(struct vcpu *same) { - reset_stack_and_call_ind(same->domain->arch.ctxt_switch->tail); + reset_stack_and_call_ind(same->domain->arch.ctxt_switch->tail, false); } int __sync_local_execstate(void) diff --git a/xen/arch/x86/include/asm/current.h b/xen/arch/x86/include/asm/current.h index 4a9776f87a..9abb4e55ae 100644 --- a/xen/arch/x86/include/asm/current.h +++ b/xen/arch/x86/include/asm/current.h @@ -170,6 +170,12 @@ unsigned long get_stack_dump_bottom (unsigned long sp); # define SHADOW_STACK_WORK "" #endif +#define ZERO_STACK \ + "test %[stk_size], %[stk_size];" \ + "jz .L_skip_zeroing.%=;" \ + "rep stosb;" \ + ".L_skip_zeroing.%=:" + #if __GNUC__ >= 9 # define ssaj_has_attr_noreturn(fn) __builtin_has_attribute(fn, __noreturn__) #else @@ -177,13 +183,43 @@ unsigned long get_stack_dump_bottom (unsigned long sp); # define ssaj_has_attr_noreturn(fn) true #endif -#define switch_stack_and_jump(fn, instr, constr) \ +DECLARE_PER_CPU(unsigned int, slice_mce_count); +DECLARE_PER_CPU(unsigned int, slice_nmi_count); +DECLARE_PER_CPU(unsigned int, slice_db_count); + +#define switch_stack_and_jump(fn, instr, constr, zero_stk) \ ({ \ unsigned int tmp; \ + \ BUILD_BUG_ON(!ssaj_has_attr_noreturn(fn)); \ + ASSERT(IS_ALIGNED((unsigned long)guest_cpu_user_regs() - \ + PRIMARY_STACK_SIZE + \ + sizeof(struct cpu_info), PAGE_SIZE)); \ + if ( zero_stk ) \ + { \ + unsigned long stack_top = get_stack_bottom() & \ + ~(STACK_SIZE - 1); \ + \ + if ( this_cpu(slice_mce_count) ) \ + { \ + this_cpu(slice_mce_count) = 0; \ + clear_page((void *)stack_top + IST_MCE * PAGE_SIZE); \ + } \ + if ( this_cpu(slice_nmi_count) ) \ + { \ + this_cpu(slice_nmi_count) = 0; \ + clear_page((void *)stack_top + IST_NMI * PAGE_SIZE); \ + } \ + if ( this_cpu(slice_db_count) ) \ + { \ + this_cpu(slice_db_count) = 0; \ + clear_page((void *)stack_top + IST_DB * PAGE_SIZE); \ + } \ + } \ __asm__ __volatile__ ( \ SHADOW_STACK_WORK \ "mov %[stk], %%rsp;" \ + ZERO_STACK \ CHECK_FOR_LIVEPATCH_WORK \ instr "[fun]" \ : [val] "=&r" (tmp), \ @@ -194,19 +230,26 @@ unsigned long get_stack_dump_bottom (unsigned long sp); ((PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8), \ [stack_mask] "i" (STACK_SIZE - 1), \ _ASM_BUGFRAME_INFO(BUGFRAME_bug, __LINE__, \ - __FILE__, NULL) \ + __FILE__, NULL), \ + /* For stack zeroing. */ \ + "D" ((void *)guest_cpu_user_regs() - \ + PRIMARY_STACK_SIZE + sizeof(struct cpu_info)), \ + [stk_size] "c" \ + ((zero_stk) ? PRIMARY_STACK_SIZE - sizeof(struct cpu_info)\ + : 0), \ + "a" (0) \ : "memory" ); \ unreachable(); \ }) #define reset_stack_and_jump(fn) \ - switch_stack_and_jump(fn, "jmp %c", "i") + switch_stack_and_jump(fn, "jmp %c", "i", false) /* The constraint may only specify non-call-clobbered registers. */ -#define reset_stack_and_call_ind(fn) \ +#define reset_stack_and_call_ind(fn, zero_stk) \ ({ \ (void)((fn) == (void (*)(void))NULL); \ - switch_stack_and_jump(fn, "INDIRECT_CALL %", "b"); \ + switch_stack_and_jump(fn, "INDIRECT_CALL %", "b", zero_stk); \ }) /* diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 8231874fe2..c47ae9a3fc 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -1790,6 +1790,7 @@ static void unknown_nmi_error(const struct cpu_user_regs *regs, static nmi_callback_t *__read_mostly nmi_callback; DEFINE_PER_CPU(unsigned int, nmi_count); +DEFINE_PER_CPU(unsigned int, slice_nmi_count); void do_nmi(const struct cpu_user_regs *regs) { @@ -1799,6 +1800,7 @@ void do_nmi(const struct cpu_user_regs *regs) bool handle_unknown = false; this_cpu(nmi_count)++; + this_cpu(slice_nmi_count)++; nmi_enter(); /* @@ -1917,6 +1919,8 @@ void asmlinkage do_device_not_available(struct cpu_user_regs *regs) void nocall sysenter_eflags_saved(void); +DEFINE_PER_CPU(unsigned int, slice_db_count); + void asmlinkage do_debug(struct cpu_user_regs *regs) { unsigned long dr6; @@ -1925,6 +1929,7 @@ void asmlinkage do_debug(struct cpu_user_regs *regs) /* Stash dr6 as early as possible. */ dr6 = read_debugreg(6); + this_cpu(slice_db_count)++; /* * At the time of writing (March 2018), on the subject of %dr6: *