]> xenbits.xensource.com Git - people/royger/xen.git/commitdiff
(no commit message) percpu-wip gitlab/percpu-wip
authorRoger Pau Monne <roger.pau@citrix.com>
Wed, 26 Jun 2024 13:50:46 +0000 (15:50 +0200)
committerRoger Pau Monne <roger.pau@citrix.com>
Sat, 29 Jun 2024 10:51:51 +0000 (12:51 +0200)
xen/arch/x86/include/asm/fixmap.h
xen/arch/x86/include/asm/mm.h
xen/arch/x86/mm.c
xen/arch/x86/setup.c
xen/arch/x86/smp.c
xen/arch/x86/smpboot.c
xen/common/efi/runtime.c
xen/common/smp.c
xen/include/xen/smp.h

index e30a8112bf4777cf50b90bf9cab1b504af2d3160..fb171b9eb625c69094983ecb7a74b5fbd759484a 100644 (file)
@@ -120,20 +120,22 @@ extern void __set_fixmap_x(
 
 /* per-CPU fixmap area. */
 enum percpu_fixed_addresses {
-    /* Index 0 is reserved since fix_to_virt(0) == FIXADDR_TOP. */
-    PCPU_FIX_RESERVED,
+    /* For alignment reasons the per-CPU stacks must come first. */
+    PCPU_STACK_START,
+    PCPU_STACK_END = NR_CPUS * (PCPU_STACK_START + (1U << STACK_ORDER)) - 1,
+#define PERCPU_STACK_IDX(c) (PCPU_STACK_START + (c) * (1U << STACK_ORDER))
+#define PERCPU_STACK_ADDR(c) percpu_fix_to_virt(PERCPU_STACK_IDX(c))
     PCPU_FIX_PV_L4SHADOW,
     __end_of_percpu_fixed_addresses
 };
 
 #define PERCPU_FIXADDR_SIZE (__end_of_percpu_fixed_addresses << PAGE_SHIFT)
-#define PERCPU_FIXADDR_TOP (PERCPU_VIRT_SLOT(0) + PERCPU_FIXADDR_SIZE - \
-                            PAGE_SIZE)
+#define PERCPU_FIXADDR PERCPU_VIRT_SLOT(0)
 
 static inline void *percpu_fix_to_virt(enum percpu_fixed_addresses idx)
 {
-    BUG_ON(idx >=__end_of_percpu_fixed_addresses || idx <= PCPU_FIX_RESERVED);
-    return (void *)PERCPU_FIXADDR_TOP - (idx << PAGE_SHIFT);
+    BUG_ON(idx >=__end_of_percpu_fixed_addresses);
+    return (void *)PERCPU_FIXADDR + (idx << PAGE_SHIFT);
 }
 
 static inline void percpu_set_fixmap_remote(
index 8cd360e0b3b938fad69a651f22506763d3d1b5c3..d6f6bb3c5e7a455b2d7b3ec2da2e17db32e3267f 100644 (file)
@@ -521,7 +521,7 @@ extern struct rangeset *mmio_ro_ranges;
 #define compat_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
 #define compat_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
 
-void memguard_guard_stack(void *p);
+void memguard_guard_stack(void *p, unsigned int cpu);
 void memguard_unguard_stack(void *p);
 
 struct mmio_ro_emulate_ctxt {
index 3fe032b2c19f711a7c85919313eaed075e3f6c36..fa19572f03c8e6a228819f9f81c1324d7ade61f3 100644 (file)
@@ -6287,31 +6287,38 @@ void free_perdomain_mappings(struct domain *d)
     d->arch.perdomain_l3_pg = NULL;
 }
 
-static void write_sss_token(unsigned long *ptr)
+static void write_sss_token(unsigned long *ptr, unsigned long va)
 {
     /*
      * A supervisor shadow stack token is its own linear address, with the
      * busy bit (0) clear.
      */
-    *ptr = (unsigned long)ptr;
+    *ptr = va;
 }
 
-void memguard_guard_stack(void *p)
+void memguard_guard_stack(void *p, unsigned int cpu)
 {
+    unsigned long va = (unsigned long)PERCPU_STACK_ADDR(cpu);
+
     /* IST Shadow stacks.  4x 1k in stack page 0. */
     if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
     {
-        write_sss_token(p + (IST_MCE * IST_SHSTK_SIZE) - 8);
-        write_sss_token(p + (IST_NMI * IST_SHSTK_SIZE) - 8);
-        write_sss_token(p + (IST_DB  * IST_SHSTK_SIZE) - 8);
-        write_sss_token(p + (IST_DF  * IST_SHSTK_SIZE) - 8);
+        write_sss_token(p + (IST_MCE * IST_SHSTK_SIZE) - 8,
+                        va + (IST_MCE * IST_SHSTK_SIZE) - 8);
+        write_sss_token(p + (IST_NMI * IST_SHSTK_SIZE) - 8,
+                        va + (IST_NMI * IST_SHSTK_SIZE) - 8);
+        write_sss_token(p + (IST_DB  * IST_SHSTK_SIZE) - 8,
+                        va + (IST_DB  * IST_SHSTK_SIZE) - 8);
+        write_sss_token(p + (IST_DF  * IST_SHSTK_SIZE) - 8,
+                        va + (IST_DF  * IST_SHSTK_SIZE) - 8);
     }
     map_pages_to_xen((unsigned long)p, virt_to_mfn(p), 1, PAGE_HYPERVISOR_SHSTK);
 
     /* Primary Shadow Stack.  1x 4k in stack page 5. */
     p += PRIMARY_SHSTK_SLOT * PAGE_SIZE;
+    va += PRIMARY_SHSTK_SLOT * PAGE_SIZE;
     if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
-        write_sss_token(p + PAGE_SIZE - 8);
+        write_sss_token(p + PAGE_SIZE - 8, va + PAGE_SIZE - 8);
 
     map_pages_to_xen((unsigned long)p, virt_to_mfn(p), 1, PAGE_HYPERVISOR_SHSTK);
 }
@@ -6400,8 +6407,8 @@ int allocate_perdomain_local_l3(unsigned int cpu)
 {
     l3_pgentry_t *l3 = NULL;
     l2_pgentry_t *l2 = NULL;
-    l1_pgentry_t *l1 = NULL;
     root_pgentry_t *root_pgt = maddr_to_virt(idle_vcpu[cpu]->arch.cr3);
+    size_t i;
 
     ASSERT(!per_cpu(local_l3, cpu));
 
@@ -6410,29 +6417,40 @@ int allocate_perdomain_local_l3(unsigned int cpu)
 
     l3 = alloc_xenheap_page();
     l2 = alloc_xenheap_page();
-    l1 = alloc_xenheap_page();
-    if ( !l3 || !l2 || !l1 )
+    if ( !l3 || !l2 )
     {
         free_xenheap_page(l3);
         free_xenheap_page(l2);
-        free_xenheap_page(l1);
         return -ENOMEM;
     }
 
     clear_page(l3);
     clear_page(l2);
-    clear_page(l1);
-
-    /* Ensure one L1 table is enough to cover for the per-CPU fixmap. */
-    BUILD_BUG_ON(PERCPU_FIXADDR_SIZE > (1U << L2_PAGETABLE_SHIFT));
 
     l3[l3_table_offset(PERCPU_VIRT_START)] =
         l3e_from_mfn(virt_to_mfn(l2), __PAGE_HYPERVISOR_RW);
-    l2[l2_table_offset(PERCPU_VIRT_START)] =
-        l2e_from_mfn(virt_to_mfn(l1), __PAGE_HYPERVISOR_RW);
 
     per_cpu(local_l3, cpu) = l3;
 
+    /* Assume the per-cpu fixmap doesn't need more than an L2. */
+    BUILD_BUG_ON(PERCPU_FIXADDR_SIZE > (1U << L3_PAGETABLE_SHIFT));
+    for ( i = 0; i < PERCPU_FIXADDR_SIZE; i += (1U << L2_PAGETABLE_SHIFT) )
+    {
+        l1_pgentry_t *l1 = alloc_xenheap_page();
+
+        ASSERT(!l2[l2_table_offset(PERCPU_VIRT_START + i)].l2);
+
+        if ( !l1 )
+        {
+            free_perdomain_local_l3(cpu);
+            return -ENOMEM;
+        }
+
+        clear_page(l1);
+        l2[l2_table_offset(PERCPU_VIRT_START + i)] =
+            l2e_from_mfn(virt_to_mfn(l1), __PAGE_HYPERVISOR_RW);
+    }
+
     /* Setup the slot in the idle page table. */
     root_pgt[root_table_offset(PERDOMAIN_VIRT_START)] =
             l4e_from_mfn(virt_to_mfn(l3), __PAGE_HYPERVISOR_RW);
@@ -6443,8 +6461,8 @@ int allocate_perdomain_local_l3(unsigned int cpu)
 void free_perdomain_local_l3(unsigned int cpu)
 {
     l3_pgentry_t *l3 = per_cpu(local_l3, cpu);
-    l2_pgentry_t *l2 = NULL;
-    l1_pgentry_t *l1 = NULL;
+    l2_pgentry_t *l2;
+    size_t i;
 
     if ( !l3 )
         return;
@@ -6452,9 +6470,20 @@ void free_perdomain_local_l3(unsigned int cpu)
     per_cpu(local_l3, cpu) = NULL;
 
     l2 = maddr_to_virt(l3e_get_paddr(l3[l3_table_offset(PERCPU_VIRT_START)]));
-    l1 = maddr_to_virt(l2e_get_paddr(l2[l2_table_offset(PERCPU_VIRT_START)]));
 
-    free_xenheap_page(l1);
+    for ( i = 0; i < PERCPU_FIXADDR_SIZE; i += (1U << L2_PAGETABLE_SHIFT) )
+    {
+        unsigned long pfn =
+            l2e_get_pfn(l2[l2_table_offset(PERCPU_VIRT_START + i)]);
+        l1_pgentry_t *l1;
+
+        if ( !pfn )
+            break;
+
+        l1 = mfn_to_virt(pfn);
+        free_xenheap_page(l1);
+    }
+
     free_xenheap_page(l2);
     free_xenheap_page(l3);
 }
index 94f73a352a2b0d8a6e236501f10980c8f9827e47..95c6f3fc556784fc1c35cc876a23b2580c3af40c 100644 (file)
@@ -808,14 +808,9 @@ static void __init noreturn reinit_bsp_stack(void)
     /* Update SYSCALL trampolines */
     percpu_traps_init();
 
-    stack_base[0] = stack;
-
     rc = setup_cpu_root_pgt(0);
     if ( rc )
         panic("Error %d setting up PV root page table\n", rc);
-    rc = allocate_perdomain_local_l3(0);
-    if ( rc )
-        panic("Error %d setting up local per-domain L3\n", rc);
 
     if ( cpu_has_xen_shstk )
     {
@@ -825,6 +820,7 @@ static void __init noreturn reinit_bsp_stack(void)
         asm volatile ("setssbsy" ::: "memory");
     }
 
+    printk("shstk enabled\n");
     reset_stack_and_jump(init_done);
 }
 
@@ -2077,8 +2073,32 @@ void asmlinkage __init noreturn __start_xen(unsigned long mbi_p)
         info->last_spec_ctrl = default_xen_spec_ctrl;
     }
 
+    ret = allocate_perdomain_local_l3(0);
+    if ( ret )
+        panic("Error %d setting up local per-domain L3\n", ret);
+
+    for ( i = 0; i < (1U << STACK_ORDER); i++ )
+    {
+        percpu_set_fixmap(PERCPU_STACK_IDX(0) + i,
+                          _mfn(virt_to_mfn(bsp_stack + i * PAGE_SIZE)),
+                          PAGE_HYPERVISOR_RW);
+        printk("CPU%u populating %p -> %lx\n", 0,
+               percpu_fix_to_virt(PERCPU_STACK_IDX(0) + i),
+               virt_to_mfn(bsp_stack + i * PAGE_SIZE));
+    }
+
+    percpu_set_fixmap(PERCPU_STACK_IDX(0),
+                      _mfn(virt_to_mfn(bsp_stack)),
+                      PAGE_HYPERVISOR_SHSTK);
+    percpu_set_fixmap(PERCPU_STACK_IDX(0) + PRIMARY_SHSTK_SLOT,
+                      _mfn(virt_to_mfn(bsp_stack +
+                                       PRIMARY_SHSTK_SLOT * PAGE_SIZE)),
+                      PAGE_HYPERVISOR_SHSTK);
+
+    stack_base[0] = bsp_stack;
+
     /* Copy the cpu info block, and move onto the BSP stack. */
-    bsp_info = get_cpu_info_from_stack((unsigned long)bsp_stack);
+    bsp_info = get_cpu_info_from_stack((unsigned long)PERCPU_STACK_ADDR(0));
     *bsp_info = *info;
 
     asm volatile ("mov %[stk], %%rsp; jmp %c[fn]" ::
index 04c6a0572319a661a45a669150df313695f75c7a..1eaf3a46dc6e9e8b1d62f72f79c14f10e9f7a639 100644 (file)
@@ -433,3 +433,29 @@ long cf_check cpu_down_helper(void *data)
         ret = cpu_down(cpu);
     return ret;
 }
+
+void arch_smp_pre_callfunc(unsigned int cpu)
+{
+    unsigned int i;
+
+    if ( cpu == smp_processor_id() )
+        return;
+
+    for ( i = 0; i < (1U << STACK_ORDER); i++ )
+        percpu_set_fixmap(PERCPU_STACK_IDX(cpu) + i,
+                          _mfn(virt_to_mfn(stack_base[cpu] + i * PAGE_SIZE)),
+                          __PAGE_HYPERVISOR_RW);
+}
+
+void arch_smp_post_callfunc(unsigned int cpu)
+{
+    unsigned int i;
+
+    if ( cpu == smp_processor_id() )
+        return;
+
+    for ( i = 0; i < (1U << STACK_ORDER); i++ )
+        percpu_clear_fixmap(PERCPU_STACK_IDX(cpu) + i);
+
+    flush_area_local(PERCPU_STACK_ADDR(cpu), FLUSH_ORDER(STACK_ORDER));
+}
index 74a525a8d7aef400d2df61ea87efc6b875b8ba25..1ac607563abb525eb3854a24ccb67cea1339eb3a 100644 (file)
@@ -559,6 +559,8 @@ static int do_boot_cpu(int apicid, int cpu)
 {
     int timeout, boot_error = 0, rc = 0;
     unsigned long start_eip;
+    unsigned int i;
+    const unsigned int stack_pages = 1U << STACK_ORDER;
 
     /*
      * Save current MTRR state in case it was changed since early boot
@@ -579,7 +581,28 @@ static int do_boot_cpu(int apicid, int cpu)
         printk("Booting processor %d/%d eip %lx\n",
                cpu, apicid, start_eip);
 
-    stack_start = stack_base[cpu] + STACK_SIZE - sizeof(struct cpu_info);
+    for ( i = 0; i < stack_pages; i++ )
+    {
+        percpu_set_fixmap_remote(cpu, PERCPU_STACK_IDX(cpu) + i,
+                                 _mfn(virt_to_mfn(stack_base[cpu] +
+                                                  i * PAGE_SIZE)),
+                                 PAGE_HYPERVISOR_RW);
+        printk("CPU%u populating %p -> %lx\n", cpu,
+               percpu_fix_to_virt(PERCPU_STACK_IDX(cpu) + i),
+               virt_to_mfn(stack_base[cpu] + i * PAGE_SIZE));
+    }
+
+    percpu_set_fixmap_remote(cpu, PERCPU_STACK_IDX(cpu),
+                             _mfn(virt_to_mfn(stack_base[cpu])),
+                             PAGE_HYPERVISOR_SHSTK);
+    percpu_set_fixmap_remote(cpu, PERCPU_STACK_IDX(cpu) + PRIMARY_SHSTK_SLOT,
+                             _mfn(virt_to_mfn(stack_base[cpu] +
+                                              PRIMARY_SHSTK_SLOT * PAGE_SIZE)),
+                             PAGE_HYPERVISOR_SHSTK);
+
+    ASSERT(IS_ALIGNED((unsigned long)PERCPU_STACK_ADDR(cpu), STACK_SIZE));
+
+    stack_start = PERCPU_STACK_ADDR(cpu) + STACK_SIZE - sizeof(struct cpu_info);
 
     /*
      * If per-CPU idle root page table has been allocated, switch to it as
@@ -1044,7 +1067,7 @@ void *cpu_alloc_stack(unsigned int cpu)
     stack = alloc_xenheap_pages(STACK_ORDER, memflags);
 
     if ( stack )
-        memguard_guard_stack(stack);
+        memguard_guard_stack(stack, cpu);
 
     return stack;
 }
index d952c3ba785ef3801da216c59388bdb8df512d59..b519c91dd11aeec927f28528f1f6d3ade0c06055 100644 (file)
@@ -85,6 +85,7 @@ struct efi_rs_state efi_rs_enter(void)
     static const u16 fcw = FCW_DEFAULT;
     static const u32 mxcsr = MXCSR_DEFAULT;
     struct efi_rs_state state = { .cr3 = 0 };
+    root_pgentry_t *efi_pgt, *idle_pgt;
 
     if ( mfn_eq(efi_l4_mfn, INVALID_MFN) )
         return state;
@@ -98,6 +99,13 @@ struct efi_rs_state efi_rs_enter(void)
 
     efi_rs_on_cpu = smp_processor_id();
 
+    /* Insert the current per-domain slot for the stack mapping. */
+    efi_pgt = map_domain_page(efi_l4_mfn);
+    idle_pgt = maddr_to_virt(idle_vcpu[efi_rs_on_cpu]->arch.cr3);
+    efi_pgt[root_table_offset(PERDOMAIN_VIRT_START)].l4 =
+        idle_pgt[root_table_offset(PERDOMAIN_VIRT_START)].l4;
+    unmap_domain_page(efi_pgt);
+
     /* prevent fixup_page_fault() from doing anything */
     irq_enter();
 
index a011f541f1eae0ae8af327cb64bff336186cc0dc..ff2a0b92fb8643a8fd4a3436628702b34482bd83 100644 (file)
@@ -29,6 +29,7 @@ static struct call_data_struct {
     void (*func) (void *info);
     void *info;
     int wait;
+    unsigned int caller;
     cpumask_t selected;
 } call_data;
 
@@ -63,6 +64,7 @@ void on_selected_cpus(
     call_data.func = func;
     call_data.info = info;
     call_data.wait = wait;
+    call_data.caller = smp_processor_id();
 
     smp_send_call_function_mask(&call_data.selected);
 
@@ -82,6 +84,8 @@ void smp_call_function_interrupt(void)
     if ( !cpumask_test_cpu(cpu, &call_data.selected) )
         return;
 
+    arch_smp_pre_callfunc(call_data.caller);
+
     irq_enter();
 
     if ( unlikely(!func) )
@@ -102,6 +106,8 @@ void smp_call_function_interrupt(void)
     }
 
     irq_exit();
+
+    arch_smp_post_callfunc(call_data.caller);
 }
 
 /*
index 2ca9ff1bfcc133c09d66fa5c45ddaba24367d887..73ad5b3646f3e203be620e10121e423df58bb7cb 100644 (file)
@@ -76,4 +76,7 @@ extern void *stack_base[NR_CPUS];
 void initialize_cpu_data(unsigned int cpu);
 int setup_cpu_root_pgt(unsigned int cpu);
 
+void arch_smp_pre_callfunc(unsigned int cpu);
+void arch_smp_post_callfunc(unsigned int cpu);
+
 #endif /* __XEN_SMP_H__ */