/* per-CPU fixmap area. */
enum percpu_fixed_addresses {
- /* Index 0 is reserved since fix_to_virt(0) == FIXADDR_TOP. */
- PCPU_FIX_RESERVED,
+ /* For alignment reasons the per-CPU stacks must come first. */
+ PCPU_STACK_START,
+ PCPU_STACK_END = NR_CPUS * (PCPU_STACK_START + (1U << STACK_ORDER)) - 1,
+#define PERCPU_STACK_IDX(c) (PCPU_STACK_START + (c) * (1U << STACK_ORDER))
+#define PERCPU_STACK_ADDR(c) percpu_fix_to_virt(PERCPU_STACK_IDX(c))
PCPU_FIX_PV_L4SHADOW,
__end_of_percpu_fixed_addresses
};
#define PERCPU_FIXADDR_SIZE (__end_of_percpu_fixed_addresses << PAGE_SHIFT)
-#define PERCPU_FIXADDR_TOP (PERCPU_VIRT_SLOT(0) + PERCPU_FIXADDR_SIZE - \
- PAGE_SIZE)
+#define PERCPU_FIXADDR PERCPU_VIRT_SLOT(0)
static inline void *percpu_fix_to_virt(enum percpu_fixed_addresses idx)
{
- BUG_ON(idx >=__end_of_percpu_fixed_addresses || idx <= PCPU_FIX_RESERVED);
- return (void *)PERCPU_FIXADDR_TOP - (idx << PAGE_SHIFT);
+ BUG_ON(idx >=__end_of_percpu_fixed_addresses);
+ return (void *)PERCPU_FIXADDR + (idx << PAGE_SHIFT);
}
static inline void percpu_set_fixmap_remote(
#define compat_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
#define compat_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
-void memguard_guard_stack(void *p);
+void memguard_guard_stack(void *p, unsigned int cpu);
void memguard_unguard_stack(void *p);
struct mmio_ro_emulate_ctxt {
d->arch.perdomain_l3_pg = NULL;
}
-static void write_sss_token(unsigned long *ptr)
+static void write_sss_token(unsigned long *ptr, unsigned long va)
{
/*
* A supervisor shadow stack token is its own linear address, with the
* busy bit (0) clear.
*/
- *ptr = (unsigned long)ptr;
+ *ptr = va;
}
-void memguard_guard_stack(void *p)
+void memguard_guard_stack(void *p, unsigned int cpu)
{
+ unsigned long va = (unsigned long)PERCPU_STACK_ADDR(cpu);
+
/* IST Shadow stacks. 4x 1k in stack page 0. */
if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
{
- write_sss_token(p + (IST_MCE * IST_SHSTK_SIZE) - 8);
- write_sss_token(p + (IST_NMI * IST_SHSTK_SIZE) - 8);
- write_sss_token(p + (IST_DB * IST_SHSTK_SIZE) - 8);
- write_sss_token(p + (IST_DF * IST_SHSTK_SIZE) - 8);
+ write_sss_token(p + (IST_MCE * IST_SHSTK_SIZE) - 8,
+ va + (IST_MCE * IST_SHSTK_SIZE) - 8);
+ write_sss_token(p + (IST_NMI * IST_SHSTK_SIZE) - 8,
+ va + (IST_NMI * IST_SHSTK_SIZE) - 8);
+ write_sss_token(p + (IST_DB * IST_SHSTK_SIZE) - 8,
+ va + (IST_DB * IST_SHSTK_SIZE) - 8);
+ write_sss_token(p + (IST_DF * IST_SHSTK_SIZE) - 8,
+ va + (IST_DF * IST_SHSTK_SIZE) - 8);
}
map_pages_to_xen((unsigned long)p, virt_to_mfn(p), 1, PAGE_HYPERVISOR_SHSTK);
/* Primary Shadow Stack. 1x 4k in stack page 5. */
p += PRIMARY_SHSTK_SLOT * PAGE_SIZE;
+ va += PRIMARY_SHSTK_SLOT * PAGE_SIZE;
if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
- write_sss_token(p + PAGE_SIZE - 8);
+ write_sss_token(p + PAGE_SIZE - 8, va + PAGE_SIZE - 8);
map_pages_to_xen((unsigned long)p, virt_to_mfn(p), 1, PAGE_HYPERVISOR_SHSTK);
}
{
l3_pgentry_t *l3 = NULL;
l2_pgentry_t *l2 = NULL;
- l1_pgentry_t *l1 = NULL;
root_pgentry_t *root_pgt = maddr_to_virt(idle_vcpu[cpu]->arch.cr3);
+ size_t i;
ASSERT(!per_cpu(local_l3, cpu));
l3 = alloc_xenheap_page();
l2 = alloc_xenheap_page();
- l1 = alloc_xenheap_page();
- if ( !l3 || !l2 || !l1 )
+ if ( !l3 || !l2 )
{
free_xenheap_page(l3);
free_xenheap_page(l2);
- free_xenheap_page(l1);
return -ENOMEM;
}
clear_page(l3);
clear_page(l2);
- clear_page(l1);
-
- /* Ensure one L1 table is enough to cover for the per-CPU fixmap. */
- BUILD_BUG_ON(PERCPU_FIXADDR_SIZE > (1U << L2_PAGETABLE_SHIFT));
l3[l3_table_offset(PERCPU_VIRT_START)] =
l3e_from_mfn(virt_to_mfn(l2), __PAGE_HYPERVISOR_RW);
- l2[l2_table_offset(PERCPU_VIRT_START)] =
- l2e_from_mfn(virt_to_mfn(l1), __PAGE_HYPERVISOR_RW);
per_cpu(local_l3, cpu) = l3;
+ /* Assume the per-cpu fixmap doesn't need more than an L2. */
+ BUILD_BUG_ON(PERCPU_FIXADDR_SIZE > (1U << L3_PAGETABLE_SHIFT));
+ for ( i = 0; i < PERCPU_FIXADDR_SIZE; i += (1U << L2_PAGETABLE_SHIFT) )
+ {
+ l1_pgentry_t *l1 = alloc_xenheap_page();
+
+ ASSERT(!l2[l2_table_offset(PERCPU_VIRT_START + i)].l2);
+
+ if ( !l1 )
+ {
+ free_perdomain_local_l3(cpu);
+ return -ENOMEM;
+ }
+
+ clear_page(l1);
+ l2[l2_table_offset(PERCPU_VIRT_START + i)] =
+ l2e_from_mfn(virt_to_mfn(l1), __PAGE_HYPERVISOR_RW);
+ }
+
/* Setup the slot in the idle page table. */
root_pgt[root_table_offset(PERDOMAIN_VIRT_START)] =
l4e_from_mfn(virt_to_mfn(l3), __PAGE_HYPERVISOR_RW);
void free_perdomain_local_l3(unsigned int cpu)
{
l3_pgentry_t *l3 = per_cpu(local_l3, cpu);
- l2_pgentry_t *l2 = NULL;
- l1_pgentry_t *l1 = NULL;
+ l2_pgentry_t *l2;
+ size_t i;
if ( !l3 )
return;
per_cpu(local_l3, cpu) = NULL;
l2 = maddr_to_virt(l3e_get_paddr(l3[l3_table_offset(PERCPU_VIRT_START)]));
- l1 = maddr_to_virt(l2e_get_paddr(l2[l2_table_offset(PERCPU_VIRT_START)]));
- free_xenheap_page(l1);
+ for ( i = 0; i < PERCPU_FIXADDR_SIZE; i += (1U << L2_PAGETABLE_SHIFT) )
+ {
+ unsigned long pfn =
+ l2e_get_pfn(l2[l2_table_offset(PERCPU_VIRT_START + i)]);
+ l1_pgentry_t *l1;
+
+ if ( !pfn )
+ break;
+
+ l1 = mfn_to_virt(pfn);
+ free_xenheap_page(l1);
+ }
+
free_xenheap_page(l2);
free_xenheap_page(l3);
}
/* Update SYSCALL trampolines */
percpu_traps_init();
- stack_base[0] = stack;
-
rc = setup_cpu_root_pgt(0);
if ( rc )
panic("Error %d setting up PV root page table\n", rc);
- rc = allocate_perdomain_local_l3(0);
- if ( rc )
- panic("Error %d setting up local per-domain L3\n", rc);
if ( cpu_has_xen_shstk )
{
asm volatile ("setssbsy" ::: "memory");
}
+ printk("shstk enabled\n");
reset_stack_and_jump(init_done);
}
info->last_spec_ctrl = default_xen_spec_ctrl;
}
+ ret = allocate_perdomain_local_l3(0);
+ if ( ret )
+ panic("Error %d setting up local per-domain L3\n", ret);
+
+ for ( i = 0; i < (1U << STACK_ORDER); i++ )
+ {
+ percpu_set_fixmap(PERCPU_STACK_IDX(0) + i,
+ _mfn(virt_to_mfn(bsp_stack + i * PAGE_SIZE)),
+ PAGE_HYPERVISOR_RW);
+ printk("CPU%u populating %p -> %lx\n", 0,
+ percpu_fix_to_virt(PERCPU_STACK_IDX(0) + i),
+ virt_to_mfn(bsp_stack + i * PAGE_SIZE));
+ }
+
+ percpu_set_fixmap(PERCPU_STACK_IDX(0),
+ _mfn(virt_to_mfn(bsp_stack)),
+ PAGE_HYPERVISOR_SHSTK);
+ percpu_set_fixmap(PERCPU_STACK_IDX(0) + PRIMARY_SHSTK_SLOT,
+ _mfn(virt_to_mfn(bsp_stack +
+ PRIMARY_SHSTK_SLOT * PAGE_SIZE)),
+ PAGE_HYPERVISOR_SHSTK);
+
+ stack_base[0] = bsp_stack;
+
/* Copy the cpu info block, and move onto the BSP stack. */
- bsp_info = get_cpu_info_from_stack((unsigned long)bsp_stack);
+ bsp_info = get_cpu_info_from_stack((unsigned long)PERCPU_STACK_ADDR(0));
*bsp_info = *info;
asm volatile ("mov %[stk], %%rsp; jmp %c[fn]" ::
ret = cpu_down(cpu);
return ret;
}
+
+void arch_smp_pre_callfunc(unsigned int cpu)
+{
+ unsigned int i;
+
+ if ( cpu == smp_processor_id() )
+ return;
+
+ for ( i = 0; i < (1U << STACK_ORDER); i++ )
+ percpu_set_fixmap(PERCPU_STACK_IDX(cpu) + i,
+ _mfn(virt_to_mfn(stack_base[cpu] + i * PAGE_SIZE)),
+ __PAGE_HYPERVISOR_RW);
+}
+
+void arch_smp_post_callfunc(unsigned int cpu)
+{
+ unsigned int i;
+
+ if ( cpu == smp_processor_id() )
+ return;
+
+ for ( i = 0; i < (1U << STACK_ORDER); i++ )
+ percpu_clear_fixmap(PERCPU_STACK_IDX(cpu) + i);
+
+ flush_area_local(PERCPU_STACK_ADDR(cpu), FLUSH_ORDER(STACK_ORDER));
+}
{
int timeout, boot_error = 0, rc = 0;
unsigned long start_eip;
+ unsigned int i;
+ const unsigned int stack_pages = 1U << STACK_ORDER;
/*
* Save current MTRR state in case it was changed since early boot
printk("Booting processor %d/%d eip %lx\n",
cpu, apicid, start_eip);
- stack_start = stack_base[cpu] + STACK_SIZE - sizeof(struct cpu_info);
+ for ( i = 0; i < stack_pages; i++ )
+ {
+ percpu_set_fixmap_remote(cpu, PERCPU_STACK_IDX(cpu) + i,
+ _mfn(virt_to_mfn(stack_base[cpu] +
+ i * PAGE_SIZE)),
+ PAGE_HYPERVISOR_RW);
+ printk("CPU%u populating %p -> %lx\n", cpu,
+ percpu_fix_to_virt(PERCPU_STACK_IDX(cpu) + i),
+ virt_to_mfn(stack_base[cpu] + i * PAGE_SIZE));
+ }
+
+ percpu_set_fixmap_remote(cpu, PERCPU_STACK_IDX(cpu),
+ _mfn(virt_to_mfn(stack_base[cpu])),
+ PAGE_HYPERVISOR_SHSTK);
+ percpu_set_fixmap_remote(cpu, PERCPU_STACK_IDX(cpu) + PRIMARY_SHSTK_SLOT,
+ _mfn(virt_to_mfn(stack_base[cpu] +
+ PRIMARY_SHSTK_SLOT * PAGE_SIZE)),
+ PAGE_HYPERVISOR_SHSTK);
+
+ ASSERT(IS_ALIGNED((unsigned long)PERCPU_STACK_ADDR(cpu), STACK_SIZE));
+
+ stack_start = PERCPU_STACK_ADDR(cpu) + STACK_SIZE - sizeof(struct cpu_info);
/*
* If per-CPU idle root page table has been allocated, switch to it as
stack = alloc_xenheap_pages(STACK_ORDER, memflags);
if ( stack )
- memguard_guard_stack(stack);
+ memguard_guard_stack(stack, cpu);
return stack;
}
static const u16 fcw = FCW_DEFAULT;
static const u32 mxcsr = MXCSR_DEFAULT;
struct efi_rs_state state = { .cr3 = 0 };
+ root_pgentry_t *efi_pgt, *idle_pgt;
if ( mfn_eq(efi_l4_mfn, INVALID_MFN) )
return state;
efi_rs_on_cpu = smp_processor_id();
+ /* Insert the current per-domain slot for the stack mapping. */
+ efi_pgt = map_domain_page(efi_l4_mfn);
+ idle_pgt = maddr_to_virt(idle_vcpu[efi_rs_on_cpu]->arch.cr3);
+ efi_pgt[root_table_offset(PERDOMAIN_VIRT_START)].l4 =
+ idle_pgt[root_table_offset(PERDOMAIN_VIRT_START)].l4;
+ unmap_domain_page(efi_pgt);
+
/* prevent fixup_page_fault() from doing anything */
irq_enter();
void (*func) (void *info);
void *info;
int wait;
+ unsigned int caller;
cpumask_t selected;
} call_data;
call_data.func = func;
call_data.info = info;
call_data.wait = wait;
+ call_data.caller = smp_processor_id();
smp_send_call_function_mask(&call_data.selected);
if ( !cpumask_test_cpu(cpu, &call_data.selected) )
return;
+ arch_smp_pre_callfunc(call_data.caller);
+
irq_enter();
if ( unlikely(!func) )
}
irq_exit();
+
+ arch_smp_post_callfunc(call_data.caller);
}
/*
void initialize_cpu_data(unsigned int cpu);
int setup_cpu_root_pgt(unsigned int cpu);
+void arch_smp_pre_callfunc(unsigned int cpu);
+void arch_smp_post_callfunc(unsigned int cpu);
+
#endif /* __XEN_SMP_H__ */