]> xenbits.xensource.com Git - people/royger/xen.git/commitdiff
x86/mm: zero stack on context switch asi-wip-6 gitlab/asi-wip-6
authorRoger Pau Monne <roger.pau@citrix.com>
Fri, 5 Jul 2024 10:08:06 +0000 (12:08 +0200)
committerRoger Pau Monne <roger.pau@citrix.com>
Wed, 11 Dec 2024 14:26:57 +0000 (15:26 +0100)
With the stack mapped on a per-CPU basis there's no risk of other CPUs being
able to read the stack contents, but vCPUs running on the current pCPU could
read stack rubble from operations of previous vCPUs.

The #DF stack is not zeroed because handling of #DF results in a panic.

The contents of the shadow stack are not cleared as part of this change.  It's
arguable that leaking internal Xen return addresses is not guest confidential
data.  At most those could be used by an attacker to figure out the paths
inside of Xen previous execution flows have used.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v1:
 - Zero the stack forward to use ERMS.
 - Only zero the IST stacks if they have been used.
 - Only zero the primary stack for full context switches.

xen/arch/x86/cpu/mcheck/mce.c
xen/arch/x86/domain.c
xen/arch/x86/include/asm/current.h
xen/arch/x86/traps.c

index 9028ccde5477e5ef227be14a88bd691b163f9f25..eaaaefe7f8ba7a9f592d0a4f1b74b5fb965b15c0 100644 (file)
@@ -92,10 +92,14 @@ struct mce_callbacks __ro_after_init mce_callbacks = {
 static const typeof(mce_callbacks.handler) __initconst_cf_clobber __used
     default_handler = unexpected_machine_check;
 
+DEFINE_PER_CPU(unsigned int, slice_mce_count);
+
 /* Call the installed machine check handler for this CPU setup. */
 
 void do_machine_check(const struct cpu_user_regs *regs)
 {
+    this_cpu(slice_mce_count)++;
+
     mce_enter();
     alternative_vcall(mce_callbacks.handler, regs);
     mce_exit();
index fbb1b232d04b8188eb5a026a7ade532ed56cf8c1..44878cf47fd3193b3b69161932bf5702e624b93b 100644 (file)
@@ -2212,12 +2212,17 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
     /* Ensure that the vcpu has an up-to-date time base. */
     update_vcpu_system_time(next);
 
-    reset_stack_and_call_ind(nextd->arch.ctxt_switch->tail);
+    /*
+     * Context switches to the idle vCPU (either lazy or full) will never
+     * trigger zeroing of the stack, because the idle domain doesn't have ASI
+     * enabled.
+     */
+    reset_stack_and_call_ind(nextd->arch.ctxt_switch->tail, nextd->arch.asi);
 }
 
 void continue_running(struct vcpu *same)
 {
-    reset_stack_and_call_ind(same->domain->arch.ctxt_switch->tail);
+    reset_stack_and_call_ind(same->domain->arch.ctxt_switch->tail, false);
 }
 
 int __sync_local_execstate(void)
index 4a9776f87a7ac16eaf7a87582b00710b2401ed54..9abb4e55aeeaeb29f992e12c79a0dd13aa331157 100644 (file)
@@ -170,6 +170,12 @@ unsigned long get_stack_dump_bottom (unsigned long sp);
 # define SHADOW_STACK_WORK ""
 #endif
 
+#define ZERO_STACK                                              \
+    "test %[stk_size], %[stk_size];"                            \
+    "jz .L_skip_zeroing.%=;"                                    \
+    "rep stosb;"                                                \
+    ".L_skip_zeroing.%=:"
+
 #if __GNUC__ >= 9
 # define ssaj_has_attr_noreturn(fn) __builtin_has_attribute(fn, __noreturn__)
 #else
@@ -177,13 +183,43 @@ unsigned long get_stack_dump_bottom (unsigned long sp);
 # define ssaj_has_attr_noreturn(fn) true
 #endif
 
-#define switch_stack_and_jump(fn, instr, constr)                        \
+DECLARE_PER_CPU(unsigned int, slice_mce_count);
+DECLARE_PER_CPU(unsigned int, slice_nmi_count);
+DECLARE_PER_CPU(unsigned int, slice_db_count);
+
+#define switch_stack_and_jump(fn, instr, constr, zero_stk)              \
     ({                                                                  \
         unsigned int tmp;                                               \
+                                                                        \
         BUILD_BUG_ON(!ssaj_has_attr_noreturn(fn));                      \
+        ASSERT(IS_ALIGNED((unsigned long)guest_cpu_user_regs() -        \
+                          PRIMARY_STACK_SIZE +                          \
+                          sizeof(struct cpu_info), PAGE_SIZE));         \
+        if ( zero_stk )                                                 \
+        {                                                               \
+            unsigned long stack_top = get_stack_bottom() &              \
+                                      ~(STACK_SIZE - 1);                \
+                                                                        \
+            if ( this_cpu(slice_mce_count) )                            \
+            {                                                           \
+                this_cpu(slice_mce_count) = 0;                          \
+                clear_page((void *)stack_top + IST_MCE * PAGE_SIZE);    \
+            }                                                           \
+            if ( this_cpu(slice_nmi_count) )                            \
+            {                                                           \
+                this_cpu(slice_nmi_count) = 0;                          \
+                clear_page((void *)stack_top + IST_NMI * PAGE_SIZE);    \
+            }                                                           \
+            if ( this_cpu(slice_db_count) )                             \
+            {                                                           \
+                this_cpu(slice_db_count) = 0;                           \
+                clear_page((void *)stack_top + IST_DB  * PAGE_SIZE);    \
+            }                                                           \
+        }                                                               \
         __asm__ __volatile__ (                                          \
             SHADOW_STACK_WORK                                           \
             "mov %[stk], %%rsp;"                                        \
+            ZERO_STACK                                                  \
             CHECK_FOR_LIVEPATCH_WORK                                    \
             instr "[fun]"                                               \
             : [val] "=&r" (tmp),                                        \
@@ -194,19 +230,26 @@ unsigned long get_stack_dump_bottom (unsigned long sp);
               ((PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8),               \
               [stack_mask] "i" (STACK_SIZE - 1),                        \
               _ASM_BUGFRAME_INFO(BUGFRAME_bug, __LINE__,                \
-                                 __FILE__, NULL)                        \
+                                 __FILE__, NULL),                       \
+              /* For stack zeroing. */                                  \
+              "D" ((void *)guest_cpu_user_regs() -                      \
+                   PRIMARY_STACK_SIZE + sizeof(struct cpu_info)),       \
+              [stk_size] "c"                                            \
+              ((zero_stk) ? PRIMARY_STACK_SIZE - sizeof(struct cpu_info)\
+                          : 0),                                         \
+              "a" (0)                                                   \
             : "memory" );                                               \
         unreachable();                                                  \
     })
 
 #define reset_stack_and_jump(fn)                                        \
-    switch_stack_and_jump(fn, "jmp %c", "i")
+    switch_stack_and_jump(fn, "jmp %c", "i", false)
 
 /* The constraint may only specify non-call-clobbered registers. */
-#define reset_stack_and_call_ind(fn)                                    \
+#define reset_stack_and_call_ind(fn, zero_stk)                          \
     ({                                                                  \
         (void)((fn) == (void (*)(void))NULL);                           \
-        switch_stack_and_jump(fn, "INDIRECT_CALL %", "b");              \
+        switch_stack_and_jump(fn, "INDIRECT_CALL %", "b", zero_stk);    \
     })
 
 /*
index 8231874fe23a740132735810050a9d32eaa2794b..c47ae9a3fcf9fe043780912404a63293fa041926 100644 (file)
@@ -1790,6 +1790,7 @@ static void unknown_nmi_error(const struct cpu_user_regs *regs,
 static nmi_callback_t *__read_mostly nmi_callback;
 
 DEFINE_PER_CPU(unsigned int, nmi_count);
+DEFINE_PER_CPU(unsigned int, slice_nmi_count);
 
 void do_nmi(const struct cpu_user_regs *regs)
 {
@@ -1799,6 +1800,7 @@ void do_nmi(const struct cpu_user_regs *regs)
     bool handle_unknown = false;
 
     this_cpu(nmi_count)++;
+    this_cpu(slice_nmi_count)++;
     nmi_enter();
 
     /*
@@ -1917,6 +1919,8 @@ void asmlinkage do_device_not_available(struct cpu_user_regs *regs)
 
 void nocall sysenter_eflags_saved(void);
 
+DEFINE_PER_CPU(unsigned int, slice_db_count);
+
 void asmlinkage do_debug(struct cpu_user_regs *regs)
 {
     unsigned long dr6;
@@ -1925,6 +1929,7 @@ void asmlinkage do_debug(struct cpu_user_regs *regs)
     /* Stash dr6 as early as possible. */
     dr6 = read_debugreg(6);
 
+    this_cpu(slice_db_count)++;
     /*
      * At the time of writing (March 2018), on the subject of %dr6:
      *