]> xenbits.xensource.com Git - unikraft/unikraft.git/commitdiff
{lib,arch,plat}: Redo syscall ctx's and `swapgs` logic
authorSergiu Moga <sergiu@unikraft.io>
Sun, 3 Mar 2024 15:00:20 +0000 (17:00 +0200)
committerUnikraft Bot <monkey@unikraft.io>
Fri, 31 May 2024 13:45:34 +0000 (13:45 +0000)
To make git bisecting and rebasing significantly easier and avoid
builds breaking across commits, this whole set of changes shall be
introduced under one single all encompassing commit.

Following the introduction of the concept of auxiliary stack pointers,
swapgs, `struct uk_syscall_ctx` and `struct ukarch_sysregs`, a number
of things have emerged:
- the aforemenetioned structs are very generic so they should be moved
under libcontext (arch/)
- swapgs introduces a significant inconsistency between ARM64 and x86_64
as we never know during an exception the state of
MSR_GS_BASE/MSR_KERNEL_GS_BASE
- auxiliary stack pointers  have increased flexibility as every thread
and LCPU can have one and have private data stored in there than may
be accessed anytime, dependency free

Thus, this commit does the following:
1. Move/rename aforementioned structured to libcontext and document them
- lib/syscall_shim/arch/x86_64/sysregs.c -> arch/x86/sysctx.c
- lib/syscall_shim/arch/x86_64include/arch/sysregs.h -> arch/x86/x86_64include/uk/asm/sysctx.h
- s/struct ukarch_sysregs/struct ukarch_sysctx/ (and all related defs)
- struct uk_syscall_ctx from lib/syscall_shim/include/uk/syscall.h to
include/uk/arch/ctx.h as struct ukarch_execenv
- s/struct uk_syscall_ctx/struct ukarch_execenv/ (and all related defs)
- actually comment these functions
- re-adjust all places that make use of such definitions

2. Get rid of the `swapgs`, architecture specific holdback by exploiting
the flexibility of auxiliary stacks through the introduction of a new
always existing contrl block at their top end:
- introduce `struct ukarch_auxspcb` under libcontext
- add Unikraft system context as field to it so that we always have and
know Unikraft TLS (and LCPU in case ox x86_64) in a dependency free
and assumption free manner
- add a current frame pointer field: since the auxspscb will be part of
the auxiliary stack, we need to know the safe place where we can start
using the auxiliary stack area as a stack (this is also helpful in cases
where we need to nest on the auxstack)
-for the aforementione fields/structs, init/getter/setter functions have
been added and documented
- now the `swapgs` pair will only be done very early during system call
entry (and only there, not on clone child exit anymore either) just
enough so that we, first things first, switch to auxstack and push auxsp
so that on entry to C handler we will know that we must do a call to
`ukarch_sysctx_load` on the Unikraft sysctx we can get from the pushed
auxsp (another benefit of this is we get rid of MSR read/writes)

IMPORTANT NOTE: Additionally, some minor fixes have been made:
- Do not switch stack pointer to execenv pointer (previously
known as uk_syscall_ctx) during execenv loading as this implies that
functions such as `ukarch_ectx_load` or `ukarch_sysctx_load` would reuse
the space after the execenv as stack. While this is safe if the
execenv was passed through the stack, is definetely not safe if it was
passed through something like a heap buffer that may be bounded to the
execenv size by the caller. Instead, use one of the callee-saved
registers
- Set IRQ flag of the pushed flags of the caller during system call
early assembly entry (both native and binary for both architectures)
so that we don't have to explicitly set it during something like clone
child creation. This also reflects the reality better as no syscall
caller will have IRQ's disabled.
- Do not use spsr_el1, esr_el1 and elr_el1 during native system call
assembly prologue (UK_SYSCALL_EXECENV_PROLOGUE_DEFINE) on Arm, as they
are invalid because there is no actual SVC/exception happening. Instead,
try to emulate it by manually building sane values for them on the
created execenv to replicate an actual SVC while benefitting from not
dealing with the performance impacting flow of actually taking a SVC.

Signed-off-by: Sergiu Moga <sergiu@unikraft.io>
Approved-by: Michalis Pappas <michalis@unikraft.io>
Reviewed-by: Simon Kuenzer <simon.kuenzer@unikraft.io>
GitHub-Closes: #1346

49 files changed:
arch/arm/Makefile.uk
arch/arm/arm64/execenv.S [new file with mode: 0644]
arch/arm/arm64/include/uk/asm/sysctx.h [new file with mode: 0644]
arch/arm/ctx.c
arch/arm/sysctx.c [new file with mode: 0644]
arch/x86/Makefile.uk
arch/x86/ctx.c
arch/x86/sysctx.c [new file with mode: 0644]
arch/x86/x86_64/execenv.S [new file with mode: 0644]
arch/x86/x86_64/include/uk/asm/sysctx.h [new file with mode: 0644]
include/uk/arch/ctx.h
include/uk/plat/syscall.h
lib/posix-process/Makefile.uk
lib/posix-process/arch/arm64/clone.c
lib/posix-process/arch/arm64/include/arch/clone.h
lib/posix-process/arch/x86_64/clone.c
lib/posix-process/arch/x86_64/include/arch/clone.h
lib/posix-process/clone.c
lib/posix-process/exportsyms.uk
lib/syscall_shim/Makefile.uk
lib/syscall_shim/arch/arm64/include/arch/regmap_usc.h [deleted file]
lib/syscall_shim/arch/arm64/include/arch/syscall_prologue.h
lib/syscall_shim/arch/arm64/include/arch/sysregs.h [deleted file]
lib/syscall_shim/arch/arm64/syscall_ctx.S [deleted file]
lib/syscall_shim/arch/arm64/sysregs.c [deleted file]
lib/syscall_shim/arch/regmap_linuxabi.h
lib/syscall_shim/arch/x86_64/include/arch/regmap_usc.h [deleted file]
lib/syscall_shim/arch/x86_64/include/arch/syscall_prologue.h
lib/syscall_shim/arch/x86_64/include/arch/sysregs.h [deleted file]
lib/syscall_shim/arch/x86_64/syscall_ctx.S [deleted file]
lib/syscall_shim/arch/x86_64/sysregs.c [deleted file]
lib/syscall_shim/include/uk/syscall.h
lib/syscall_shim/syscall_provided.awk
lib/syscall_shim/syscall_r_static.awk
lib/syscall_shim/syscall_stubs.awk
lib/syscall_shim/uk_syscall6_r.awk
lib/syscall_shim/uk_syscall_binary.c
lib/ukboot/boot.c
lib/uksched/thread.c
plat/common/arm/traps_arm64.c
plat/common/include/uk/plat/common/lcpu.h
plat/common/lcpu.c
plat/common/x86/lcpu.c
plat/common/x86/syscall.S
plat/kvm/arm/exceptions.S
plat/kvm/arm/lcpu.c
plat/kvm/x86/lcpu.c
plat/xen/lcpu.c
support/scripts/checkpatch.pl

index 12388b36890946ef59eb375bcdc41043de35dd96..44986b7837d7fd5e23a8274fd2c5730002a8800e 100644 (file)
@@ -12,5 +12,7 @@ $(eval $(call addlib,libcontext))
 LIBCONTEXT_CINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include
 LIBCONTEXT_SRCS-y += $(LIBCONTEXT_BASE)/ctx.c|isr
 LIBCONTEXT_SRCS-y += $(LIBCONTEXT_BASE)/ectx.c|isr
+LIBCONTEXT_SRCS-y += $(LIBCONTEXT_BASE)/sysctx.c|isr
+LIBCONTEXT_SRCS-$(CONFIG_ARCH_ARM_64) += $(LIBCONTEXT_BASE)/arm64/execenv.S|arm64
 LIBCONTEXT_SRCS-$(CONFIG_ARCH_ARM_64) += $(LIBCONTEXT_BASE)/arm64/ctx.S|arm64
 LIBCONTEXT_SRCS-$(CONFIG_ARCH_ARM_64) += $(LIBCONTEXT_BASE)/arm64/tls.c|arm64
diff --git a/arch/arm/arm64/execenv.S b/arch/arm/arm64/execenv.S
new file mode 100644 (file)
index 0000000..f73ee75
--- /dev/null
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors.
+ * Licensed under the BSD-3-Clause License (the "License").
+ * You may not use this file except in compliance with the License.
+ */
+
+#include <uk/arch/ctx.h>
+#include <uk/arch/lcpu.h>
+#include <uk/asm.h>
+
+/**
+ * Loads a given execution environment on the currently executing CPU.
+ *
+ * NOTE: This function cannot be returned from, it overwrites the entire current
+ *       context.
+ *
+ * @X0 execenv
+ *   Reference to execution environment to load
+ */
+ENTRY(ukarch_execenv_load)
+       /* Mask IRQ to make sure restore would not be interrupted by IRQ */
+       msr     daifset, #2
+
+       /**
+        * Assign pointer to execution environment to load (first argument).
+        * We do this because it will be easy to keep track of it as, unlike
+        * x0, we do not have to store/restore x19 across function calls.
+        * As per AAPCS64, x19-x28 are callee-saved.
+        */
+       mov     x19, x0
+
+       /**
+        * Load execenv's stored ECTX which resides at offset:
+        * sizeof(struct __regs) + sizeof(struct ukarch_sysctx) from beginning
+        * of execenv.
+        */
+       add     x0, x0, #(__REGS_SIZEOF + UKARCH_SYSCTX_SIZE)
+       bl      ukarch_ectx_load
+       /**
+        * As stated previously, after function calls, x19 preserved value of
+        * execenv pointer so restore that into %rdi.
+        */
+       mov     x0, x19
+
+       /**
+        * Load execenv's stored system context which resides at offset:
+        * sizeof(struct __regs) from beginning of execenv.
+        */
+       add     x0, x0, #(__REGS_SIZEOF)
+       bl      ukarch_sysctx_load
+
+       /**
+        * Load execenv's stored general purpose registers which resides at
+        * the beginning.
+        */
+       mov     sp, x19
+
+       /* Restore pstate and exception status register */
+       ldp     x22, x23, [sp, #16 * 16]
+       msr     spsr_el1, x22
+       msr     esr_el1, x23
+
+       /* Restore LR and exception PC */
+       ldp     x30, x21, [sp, #16 * 15]
+       msr     elr_el1, x21
+
+       /* Restore general purpose registers */
+       ldp     x28, x29, [sp, #16 * 14]
+       ldp     x26, x27, [sp, #16 * 13]
+       ldp     x24, x25, [sp, #16 * 12]
+       ldp     x22, x23, [sp, #16 * 11]
+       ldp     x20, x21, [sp, #16 * 10]
+       /* Skip x18, x19 */
+       ldp     x16, x17, [sp, #16 * 8]
+       ldp     x14, x15, [sp, #16 * 7]
+       ldp     x12, x13, [sp, #16 * 6]
+       ldp     x10, x11, [sp, #16 * 5]
+       ldp     x8, x9, [sp, #16 * 4]
+       ldp     x6, x7, [sp, #16 * 3]
+       ldp     x4, x5, [sp, #16 * 2]
+       ldp     x2, x3, [sp, #16 * 1]
+       ldp     x0, x1, [sp, #16 * 0]
+
+       /* Restore stack pointer */
+       ldr     x18, [sp, #__SP_OFFSET]
+       mov     x19, sp
+       mov     sp, x18
+
+       /* Restore x18, x19 */
+       ldp     x18, x19, [x19, #16 * 9]
+
+       eret
diff --git a/arch/arm/arm64/include/uk/asm/sysctx.h b/arch/arm/arm64/include/uk/asm/sysctx.h
new file mode 100644 (file)
index 0000000..43b9cbf
--- /dev/null
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors.
+ * Licensed under the BSD-3-Clause License (the "License").
+ * You may not use this file except in compliance with the License.
+ */
+
+#ifndef __UKARCH_CTX_H__
+#error Do not include this header directly
+#endif
+
+#define UKARCH_SYSCTX_SIZE                     16
+
+#if !__ASSEMBLY__
+
+#include <uk/essentials.h>
+
+/* Architecture specific system context */
+struct ukarch_sysctx {
+       /* AAPCS's TLS pointer register */
+       __uptr tpidr_el0;
+
+       __u8 pad[8];    /* Make sure we are a multiple of 16 bytes */
+};
+
+UK_CTASSERT(sizeof(struct ukarch_sysctx) == UKARCH_SYSCTX_SIZE);
+
+/**
+ * Get the TLS pointer from system register context given as argument
+ *
+ * @param sysctx
+ *   The system register context whose TLS pointer to get.
+ * @return
+ *   The TLS pointer stored in the system register context.
+ */
+__uptr ukarch_sysctx_get_tlsp(struct ukarch_sysctx *sysctx);
+
+/**
+ * Set the TLS pointer from system register context given as argument to the
+ * pointer given as argument.
+ *
+ * @param sysctx
+ *   The system register context whose TLS pointer to set.
+ * @param tlsp
+ *   The TLS pointer to store in the system register context.
+ */
+void ukarch_sysctx_set_tlsp(struct ukarch_sysctx *sysctx, __uptr tlsp);
+
+/**
+ * Store the current system context register state into the system context
+ * given as argument. Stores TLS pointer (TPIDR_EL0).
+ *
+ * @param sysctx
+ *   The system register context to store state in.
+ */
+void ukarch_sysctx_store(struct ukarch_sysctx *sysctx);
+
+/**
+ * Load the system context register state from the system context
+ * given as argument. Loads TLS pointer (MSR_FS_BASE) into its respective
+ * system register.
+ *
+ * @param sysctx
+ *   The system register context to load state from.
+ */
+void ukarch_sysctx_load(struct ukarch_sysctx *sysctx);
+
+#endif /* !__ASSEMBLY__ */
index 9dc48fa98f865d4c228c721e29395ac40bd2d0c2..9ecddbe1bae447441961b89104220575dbfbbbfd 100644 (file)
@@ -34,6 +34,7 @@
 
 #include <uk/config.h>
 #include <uk/arch/ctx.h>
+#include <uk/plat/lcpu.h>
 #if CONFIG_LIBUKDEBUG
 #include <uk/assert.h>
 #include <uk/print.h>
diff --git a/arch/arm/sysctx.c b/arch/arm/sysctx.c
new file mode 100644 (file)
index 0000000..fd35fba
--- /dev/null
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors.
+ * Licensed under the BSD-3-Clause License (the "License").
+ * You may not use this file except in compliance with the License.
+ */
+#include <uk/arch/ctx.h>
+#include <uk/assert.h>
+
+__uptr ukarch_sysctx_get_tlsp(struct ukarch_sysctx *sysctx)
+{
+       UK_ASSERT(sysctx);
+
+       return sysctx->tpidr_el0;
+}
+
+void ukarch_sysctx_set_tlsp(struct ukarch_sysctx *sysctx, __uptr tlsp)
+{
+       UK_ASSERT(sysctx);
+
+       uk_pr_debug("Sysctx %p TLS pointer register updated to %p (before: %p)\n",
+                   sysctx, (void *)tlsp, (void *)sysctx->tpidr_el0);
+
+       sysctx->tpidr_el0 = tlsp;
+}
+
+void ukarch_sysctx_store(struct ukarch_sysctx *sysctx)
+{
+       UK_ASSERT(sysctx);
+
+       sysctx->tpidr_el0 = SYSREG_READ(TPIDR_EL0);
+}
+
+void ukarch_sysctx_load(struct ukarch_sysctx *sysctx)
+{
+       UK_ASSERT(sysctx);
+
+       SYSREG_WRITE(TPIDR_EL0, sysctx->tpidr_el0);
+}
index ffbc516f7d2274800879a9feeaed778144a1204f..4eed830137429b15ce365a37d5f0eba0e6fbdea3 100644 (file)
@@ -12,6 +12,8 @@ LIBCONTEXT_ASINCLUDES-y               += -I$(UK_PLAT_COMMON_BASE)/include
 
 LIBCONTEXT_SRCS-y += $(LIBCONTEXT_BASE)/ctx.c|isr
 LIBCONTEXT_SRCS-y += $(LIBCONTEXT_BASE)/ectx.c|isr
+LIBCONTEXT_SRCS-y += $(LIBCONTEXT_BASE)/sysctx.c|isr
+LIBCONTEXT_SRCS-$(CONFIG_ARCH_X86_64) += $(LIBCONTEXT_BASE)/x86_64/execenv.S|x86_64
 LIBCONTEXT_SRCS-$(CONFIG_ARCH_X86_64) += $(LIBCONTEXT_BASE)/x86_64/ctx.S|x86_64
 LIBCONTEXT_SRCS-$(CONFIG_ARCH_X86_64) += $(LIBCONTEXT_BASE)/x86_64/tls.c|x86_64
 LIBCONTEXT_SRCS-$(CONFIG_ARCH_X86_64) += $(LIBCONTEXT_BASE)/x86_64/fsgsbase.c|x86_64
index d850b2337b31466aba12fb687644137dc192795c..d0f609127b047a5e413914a4d70ca08fe4ff4793 100644 (file)
@@ -33,6 +33,7 @@
 
 #include <uk/config.h>
 #include <uk/arch/ctx.h>
+#include <uk/plat/lcpu.h>
 #if CONFIG_LIBUKDEBUG
 #include <uk/assert.h>
 #include <uk/print.h>
diff --git a/arch/x86/sysctx.c b/arch/x86/sysctx.c
new file mode 100644 (file)
index 0000000..ec86132
--- /dev/null
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors.
+ * Licensed under the BSD-3-Clause License (the "License").
+ * You may not use this file except in compliance with the License.
+ */
+#include <uk/assert.h>
+#include <uk/arch/ctx.h>
+
+__uptr ukarch_sysctx_get_tlsp(struct ukarch_sysctx *sysctx)
+{
+       UK_ASSERT(sysctx);
+
+       return sysctx->fsbase;
+}
+
+void ukarch_sysctx_set_tlsp(struct ukarch_sysctx *sysctx, __uptr tlsp)
+{
+       UK_ASSERT(sysctx);
+
+       uk_pr_debug("Sysctx %p TLS pointer register updated to %p (before: %p)\n",
+                   sysctx, (void *)tlsp, (void *)sysctx->fsbase);
+
+       sysctx->fsbase = tlsp;
+}
+
+__uptr ukarch_sysctx_get_gsbase(struct ukarch_sysctx *sysctx)
+{
+       UK_ASSERT(sysctx);
+
+       return sysctx->gsbase;
+}
+
+void ukarch_sysctx_set_gsbase(struct ukarch_sysctx *sysctx, __uptr gsbase)
+{
+       UK_ASSERT(sysctx);
+
+       uk_pr_debug("Sysctx %p GS_BASE register updated to %p (before: %p)\n",
+                   sysctx, (void *)gsbase, (void *)sysctx->gsbase);
+
+       sysctx->gsbase = gsbase;
+}
+
+void ukarch_sysctx_store(struct ukarch_sysctx *sysctx)
+{
+       UK_ASSERT(sysctx);
+
+       sysctx->gsbase = rdgsbase();
+       sysctx->fsbase = rdfsbase();
+}
+
+void ukarch_sysctx_load(struct ukarch_sysctx *sysctx)
+{
+       UK_ASSERT(sysctx);
+
+       wrgsbase(sysctx->gsbase);
+       wrfsbase(sysctx->fsbase);
+}
diff --git a/arch/x86/x86_64/execenv.S b/arch/x86/x86_64/execenv.S
new file mode 100644 (file)
index 0000000..f9c1274
--- /dev/null
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors.
+ * Licensed under the BSD-3-Clause License (the "License").
+ * You may not use this file except in compliance with the License.
+ */
+
+#include <uk/arch/ctx.h>
+#include <uk/arch/lcpu.h>
+#include <uk/asm.h>
+
+/**
+ * Loads a given execution environment on the currently executing CPU.
+ *
+ * NOTE: This function cannot be returned from, it overwrites the entire current
+ *       context.
+ *
+ * @RDI execenv
+ *   Reference to execution environment to load
+ */
+ENTRY(ukarch_execenv_load)
+       .cfi_startproc simple
+       .cfi_def_cfa rsp, 0
+
+       /**
+        * Do all this with IRQ's disabled, as the final iretq should pop off
+        * a proper rflags anyway.
+        */
+       cli
+
+       /**
+        * Assign pointer to execution environment to load (first argument).
+        * We do this because it will be easy to keep track of it as, unlike
+        * %rdi, we do not have to store/restore %r12 across function calls.
+        * As per AMD64 ABI, %r12-r15 are callee-saved.
+        */
+       movq    %rdi, %r12
+
+       /**
+        * Load execenv's stored ECTX which resides at offset:
+        * sizeof(struct __regs) + sizeof(struct ukarch_sysctx) from beginning
+        * of execenv.
+        */
+       addq    $(__REGS_SIZEOF + UKARCH_SYSCTX_SIZE), %rdi
+       call    ukarch_ectx_load
+       /**
+        * As stated previously, after function calls, %r12 preserved value of
+        * execenv pointer so restore that into %rdi.
+        */
+       movq    %r12, %rdi
+
+       /**
+        * Load execenv's stored system context which resides at offset:
+        * sizeof(struct __regs) from beginning of execenv.
+        */
+       addq    $(__REGS_SIZEOF), %rdi
+       call    ukarch_sysctx_load
+
+       /**
+        * Load execenv's stored general purpose registers which resides at
+        * the beginning.
+        */
+       movq    %r12, %rsp
+       .cfi_undefined rsp
+       addq    $(__REGS_PAD_SIZE), %rsp
+
+       popq    %r15
+       popq    %r14
+       popq    %r13
+       popq    %r12
+       popq    %rbp
+       popq    %rbx
+       popq    %r11
+       popq    %r10
+       popq    %r9
+       popq    %r8
+       popq    %rax
+       popq    %rcx
+       popq    %rdx
+       popq    %rsi
+       popq    %rdi
+
+       addq    $(__REGS_PAD_SIZE), %rsp
+
+       iretq
+       .cfi_endproc
diff --git a/arch/x86/x86_64/include/uk/asm/sysctx.h b/arch/x86/x86_64/include/uk/asm/sysctx.h
new file mode 100644 (file)
index 0000000..6956d25
--- /dev/null
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors.
+ * Licensed under the BSD-3-Clause License (the "License").
+ * You may not use this file except in compliance with the License.
+ */
+
+#ifndef __UKARCH_CTX_H__
+#error Do not include this header directly
+#endif
+
+#define UKARCH_SYSCTX_SIZE                     16
+
+#if !__ASSEMBLY__
+
+#include <uk/essentials.h>
+
+/* Architecture specific system context */
+struct ukarch_sysctx {
+       /**
+        * The current value of %gs's gsbase register of the application.
+        * On syscall entry, this will be updated to hold the value of
+        * GS_BASE set by the application.
+        */
+       __uptr gsbase;
+
+       /* AMD64 sysV ABI's TLS pointer */
+       __uptr fsbase;
+};
+
+UK_CTASSERT(sizeof(struct ukarch_sysctx) == UKARCH_SYSCTX_SIZE);
+
+/**
+ * Get the TLS pointer from system register context given as argument
+ *
+ * @param sysctx
+ *   The system register context whose TLS pointer to get.
+ * @return
+ *   The TLS pointer stored in the system register context.
+ */
+__uptr ukarch_sysctx_get_tlsp(struct ukarch_sysctx *sysctx);
+
+/**
+ * Set the TLS pointer from system register context given as argument to the
+ * pointer given as argument.
+ *
+ * @param sysctx
+ *   The system register context whose TLS pointer to set.
+ * @param tlsp
+ *   The TLS pointer to store in the system register context.
+ */
+void ukarch_sysctx_set_tlsp(struct ukarch_sysctx *sysctx, __uptr tlsp);
+
+/**
+ * NOTE: X86_64 SPECIFIC
+ * Get the MSR_GS_BASE from system register context given as argument. We
+ * use this on x86_64 to store current LCPU.
+ *
+ * @param sysctx
+ *   The system register context whose MSR_GS_BASE to get.
+ * @return
+ *   The MSR_GS_BASE stored in the system register context.
+ */
+__uptr ukarch_sysctx_get_gsbase(struct ukarch_sysctx *sysctx);
+
+/**
+ * NOTE: X86_64 SPECIFIC
+ * Set the MSR_GS_BASE of the system register context given as argument to
+ * the pointer given as argument. We use this on x86_64 to store current LCPU.
+ *
+ * @param sysctx
+ *   The system register context whose MSR_GS_BASE to get.
+ * @param gsbase
+ *   The MSR_GS_BASE to store in the system register context.
+ */
+void ukarch_sysctx_set_gsbase(struct ukarch_sysctx *sysctx, __uptr gsbase);
+
+/**
+ * Store the current system context register state into the system context
+ * given as argument. Stores TLS pointer (MSR_FS_BASE) and LCPU pointer
+ * (MSR_GS_BASE).
+ *
+ * @param sysctx
+ *   The system register context to store state in.
+ */
+void ukarch_sysctx_store(struct ukarch_sysctx *sysctx);
+
+/**
+ * Load the system context register state from the system context
+ * given as argument. Loads TLS pointer (MSR_FS_BASE) and LCPU pointer
+ * (MSR_GS_BASE) into their respective system registers.
+ *
+ * @param sysctx
+ *   The system register context to load state from.
+ */
+void ukarch_sysctx_load(struct ukarch_sysctx *sysctx);
+
+#endif /* !__ASSEMBLY__ */
index 9f45387c97a58136c55efd2a3f111ee1d32a1206..ddb5c40405cf3b8bc768babdc0af8fb2f04dc2bf 100644 (file)
@@ -1,9 +1,11 @@
 /* SPDX-License-Identifier: BSD-3-Clause */
 /*
  * Authors: Simon Kuenzer <simon.kuenzer@neclab.eu>
+ *          Sergiu Moga <sergiu@unikraft.io>
  *
  * Copyright (c) 2021, NEC Laboratories Europe GmbH, NEC Corporation.
  *                     All rights reserved.
+ * Copyright (c) 2024, Unikraft GmbH. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -36,6 +38,7 @@
 
 #include <uk/arch/types.h>
 #include <uk/asm/ctx.h>
+#include <uk/asm/sysctx.h>
 
 #ifndef __ASSEMBLY__
 #include <uk/config.h>
 #define UKARCH_CTX_OFFSETOF_SP 8
 #endif
 
-#ifndef __ASSEMBLY__
+/* We must make sure that ECTX is aligned, so we make use of some padding,
+ * whose size is equal to what we need to add to UKARCH_ECTX_SIZE
+ * to make it aligned with UKARCH_ECTX_ALIGN
+ */
+#define UKARCH_EXECENV_PAD_SIZE                                        \
+       (ALIGN_UP(UKARCH_ECTX_SIZE,                             \
+                UKARCH_ECTX_ALIGN) -                           \
+        UKARCH_ECTX_SIZE)
+
+/* If we make sure that the in-memory structure's end address is aligned to
+ * the ECTX alignment, then subtracting from that end address a value that is
+ * also a multiple of that alignment, guarantees that the resulted address
+ * is also ECTX aligned.
+ */
+#define UKARCH_EXECENV_END_ALIGN                               \
+       UKARCH_ECTX_ALIGN
+#define UKARCH_EXECENV_SIZE                                    \
+       (UKARCH_EXECENV_PAD_SIZE +                              \
+        UKARCH_ECTX_SIZE +                                     \
+        UKARCH_SYSCTX_SIZE +                                   \
+        __REGS_SIZEOF)
+
+#define UKARCH_EXECENV_OFFSETOF_REGS                           0x0
+#define UKARCH_EXECENV_OFFSETOF_SYSCTX                         \
+       (UKARCH_EXECENV_OFFSETOF_REGS + __REGS_SIZEOF)
+#define UKARCH_EXECENV_OFFSETOF_ECTX                           \
+       (UKARCH_EXECENV_OFFSETOF_SYSCTX + UKARCH_SYSCTX_SIZE)
+
+/**
+ * Size of the current frame pointer Auxiliary Stack Pointer Control Block:
+ */
+#if (defined __PTR_IS_16)
+#define UKARCH_AUXSPCB_CURR_FP_SIZE                            2
+#elif (defined __PTR_IS_32)
+#define UKARCH_AUXSPCB_CURR_FP_SIZE                            4
+#elif (defined __PTR_IS_64)
+#define UKARCH_AUXSPCB_CURR_FP_SIZE                            8
+#endif
+
+/**
+ * Size of the Auxiliary Stack Pointer Control Block
+ * - sizeof(__uptr) for the frame pointer field
+ * - sizeof(struct ukarch_sysctx) for the field representing the current
+ * thread's Kernel system context
+ */
+#define UKARCH_AUXSPCB_SIZE                                    \
+       (ALIGN_UP(UKARCH_AUXSPCB_CURR_FP_SIZE +                 \
+                 UKARCH_SYSCTX_SIZE, UKARCH_AUXSP_ALIGN))
+
+/**
+ * Size of the padding required to ensure the size of the Auxiliary Stack
+ * Pointer Control Block is a multiple of the alignment required for the
+ * auxiliary stack pointer.
+ */
+#define UKARCH_AUXSPCB_PAD                                     \
+       (UKARCH_AUXSPCB_SIZE -                                  \
+        (UKARCH_AUXSPCB_CURR_FP_SIZE + UKARCH_SYSCTX_SIZE))
+
+/**
+ * Offset to current frame pointer field.
+ */
+#define UKARCH_AUXSPCB_OFFSETOF_CURR_FP                                0x0
+
+/**
+ * Offset to current Unikraft system context field.
+ */
+#define UKARCH_AUXSPCB_OFFSETOF_UKSYSCTX                       \
+       (UKARCH_AUXSPCB_OFFSETOF_CURR_FP +                      \
+        UKARCH_AUXSPCB_CURR_FP_SIZE)
+
+#if !__ASSEMBLY__
 struct ukarch_ctx {
        __uptr ip;      /**< instruction pointer */
        __uptr sp;      /**< stack pointer */
 } __packed;
 
+struct ukarch_execenv {
+       /* General purpose/flags registers */
+       struct __regs regs;
+       /* System registers (e.g. TLS pointer) */
+       struct ukarch_sysctx sysctx;
+       /* Extended context (e.g. SIMD etc.) */
+       __u8 ectx[UKARCH_ECTX_SIZE];
+       /* Padding for end alignment */
+       __u8 pad[UKARCH_EXECENV_PAD_SIZE];
+};
+
+UK_CTASSERT(sizeof(struct ukarch_execenv) == UKARCH_EXECENV_SIZE);
+UK_CTASSERT(IS_ALIGNED(UKARCH_EXECENV_PAD_SIZE + UKARCH_ECTX_SIZE,
+                      UKARCH_ECTX_ALIGN));
+UK_CTASSERT(__offsetof(struct ukarch_execenv, regs) ==
+           UKARCH_EXECENV_OFFSETOF_REGS);
+UK_CTASSERT(__offsetof(struct ukarch_execenv, sysctx) ==
+           UKARCH_EXECENV_OFFSETOF_SYSCTX);
+UK_CTASSERT(__offsetof(struct ukarch_execenv, ectx) ==
+           UKARCH_EXECENV_OFFSETOF_ECTX);
+
+/**
+ * Layout of the auxiliary stack and its embedded control block located at its
+ * end (towards higher address space).
+ *               ┌─────────────── auxsp
+ *               │
+ *    ┌──────────▼───────────┐  ▲ ▲                    │ auxsp
+ *    │ struct ukarch_auxspcb│  │ │                    │
+ *    │{                     │  │ │                    │
+ *┌───┼─────curr_fp          │  │ │                    │
+ *│   │     uksysctx         │  │ │UKARCH_AUXSPCB_SIZE │
+ *│   │[pad till auxsp align]│  │ │                    │
+ *│   │}                     │  │ │                    │
+ *│ ┌►│◄─────────────────────►  │ ▼                    │
+ *│ │ │   UKARCH_AUXSP_ALIGN │  │                      │
+ *│ │ │                      │  │                      │ STACK GROWTH
+ *│ │ │                      │  │ AUXSTACK_SIZE        │  DIRECTION
+ *│ │ │ curr_fp points       │  │                      │
+ *│ │ │ to a safe            │  │                      │
+ *└►│ │ usable frame in      │  │                      │
+ *  │ │ the auxstack         │  │                      │
+ *  │ │ (the area below the  │  │                      │
+ *  │ │  auxstack control    │  │                      │
+ *  │ │       block)         │  │                      │
+ *  │ │                      │  │                      │
+ *  │ │                      │  │                      │
+ *  │ │                      │  │                      │
+ *  │ │                      │  │                      │
+ *  └►└──────────────────────┘  ▼                      ▼ auxsp - AUXSTACK_SIZE
+ *    ◄─────────────────────►
+ *       UKARCH_AUXSP_ALIGN
+ */
+
+#define SP_IN_AUXSP(sp, auxsp)                                 \
+       (IN_RANGE((sp), (auxsp) - AUXSTACK_SIZE, AUXSTACK_SIZE))
+
+struct ukarch_auxspcb {
+       /* Current safe frame pointer inside the auxiliary stack area */
+       __uptr curr_fp;
+       /* Unikraft system registers (e.g. TLS pointer) */
+       struct ukarch_sysctx uksysctx;
+       /* Padding for end alignment, the auxiliary stack area begins after */
+       __u8 pad[UKARCH_AUXSPCB_PAD];
+};
+
+UK_CTASSERT(sizeof(struct ukarch_auxspcb) == UKARCH_AUXSPCB_SIZE);
+UK_CTASSERT(IS_ALIGNED(sizeof(struct ukarch_auxspcb), UKARCH_AUXSP_ALIGN));
+UK_CTASSERT(__offsetof(struct ukarch_auxspcb, curr_fp) ==
+           UKARCH_AUXSPCB_OFFSETOF_CURR_FP);
+UK_CTASSERT(__offsetof(struct ukarch_auxspcb, uksysctx) ==
+           UKARCH_AUXSPCB_OFFSETOF_UKSYSCTX);
+
 /*
  * Context functions are not allowed to return
  */
@@ -194,6 +339,127 @@ void ukarch_ctx_init_entry2(struct ukarch_ctx *ctx,
  */
 void ukarch_ctx_switch(struct ukarch_ctx *store, struct ukarch_ctx *load);
 
+/**
+ * Initialize an auxiliary stack pointer. This must be always called the
+ * first time you create an auxiliary stack pointer.
+ *
+ * @param auxsp
+ *   The auxiliary stack pointer to initialize. Must point to the high end of
+ *  the auxiliary stack.
+ *
+ * NOTE: Auxiliary stack pointer must have UKARCH_AUXSP_ALIGN alignment.
+ *
+ */
+static inline void ukarch_auxsp_init(__uptr auxsp)
+{
+       struct ukarch_auxspcb *auxspcb_ptr;
+
+       UK_ASSERT(auxsp);
+       UK_ASSERT(IS_ALIGNED(auxsp, UKARCH_AUXSP_ALIGN));
+
+       auxspcb_ptr = (struct ukarch_auxspcb *)(auxsp - sizeof(*auxspcb_ptr));
+       auxspcb_ptr->curr_fp = auxsp - sizeof(*auxspcb_ptr);
+       UK_ASSERT(IS_ALIGNED(auxspcb_ptr->curr_fp, UKARCH_AUXSP_ALIGN));
+}
+
+/**
+ * Get the control block of the auxiliary stack pointer.
+ *
+ * @param auxsp
+ *   The auxiliary stack pointer whose control block to set.
+ *  Must point to the high end of the auxiliary stack.
+ * @return
+ *   The control block of the auxiliary stack pointer
+ *
+ */
+static inline struct ukarch_auxspcb *ukarch_auxsp_get_cb(__uptr auxsp)
+{
+       struct ukarch_auxspcb *auxspcb_ptr;
+
+       UK_ASSERT(auxsp);
+       UK_ASSERT(IS_ALIGNED(auxsp, UKARCH_AUXSP_ALIGN));
+
+       auxspcb_ptr = (struct ukarch_auxspcb *)(auxsp - sizeof(*auxspcb_ptr));
+       UK_ASSERT(IS_ALIGNED(auxspcb_ptr->curr_fp, UKARCH_AUXSP_ALIGN));
+
+       return auxspcb_ptr;
+}
+
+/**
+ * Set the Unikraft TLS pointer of the control block of the auxiliary stack
+ * pointer.
+ *
+ * @param auxspcb
+ *   The auxiliary stack control block pointer whose Unikraft TLS pointer to
+ *  set.
+ * @param uktlsp
+ *   The TLS pointer to set in the control block of the auxstack pointer.
+ *
+ */
+static inline void ukarch_auxspcb_set_uktlsp(struct ukarch_auxspcb *auxspcb,
+                                            __uptr uktlsp)
+{
+       UK_ASSERT(auxspcb);
+       UK_ASSERT(IS_ALIGNED((__uptr)auxspcb, UKARCH_AUXSP_ALIGN));
+       ukarch_sysctx_set_tlsp(&auxspcb->uksysctx, uktlsp);
+}
+
+/**
+ * Get the Unikraft TLS pointer of the auxiliary stack pointer.
+ *
+ * @param auxspcb
+ *   The auxiliary stack control block pointer whose Unikraft TLS pointer to
+ *  get.
+ * @return
+ *   The Unikraft TLS pointer of the auxiliary stack pointer
+ *
+ */
+static inline __uptr ukarch_auxspcb_get_uktlsp(struct ukarch_auxspcb *auxspcb)
+{
+       UK_ASSERT(auxspcb);
+       UK_ASSERT(IS_ALIGNED((__uptr)auxspcb, UKARCH_AUXSP_ALIGN));
+       return ukarch_sysctx_get_tlsp(&auxspcb->uksysctx);
+}
+
+/**
+ * Set the current frame pointer of the control block of the auxiliary stack
+ * pointer.
+ *
+ * @param auxspcb
+ *   The auxiliary stack control block pointer whose current frame pointer to
+ *  set.
+ * @param curr_fp
+ *   The current frame pointer to set in the control block of the auxstack
+ *  pointer.
+ *
+ */
+static inline void ukarch_auxspcb_set_curr_fp(struct ukarch_auxspcb *auxspcb,
+                                             __uptr curr_fp)
+{
+       UK_ASSERT(auxspcb);
+       UK_ASSERT(IS_ALIGNED((__uptr)auxspcb, UKARCH_AUXSP_ALIGN));
+       UK_ASSERT(IS_ALIGNED(curr_fp, UKARCH_AUXSP_ALIGN));
+       auxspcb->curr_fp = curr_fp;
+}
+
+/**
+ * Get the current frame pointer of the control block of the auxiliary stack
+ * pointer.
+ *
+ * @param auxspcb
+ *   The auxiliary stack control block pointer whose current frame pointer to
+ *  get.
+ * @return
+ *   The current frame pointer of the auxiliary stack pointer
+ *
+ */
+static inline __uptr ukarch_auxspcb_get_curr_fp(struct ukarch_auxspcb *auxspcb)
+{
+       UK_ASSERT(auxspcb);
+       UK_ASSERT(IS_ALIGNED((__uptr)auxspcb, UKARCH_AUXSP_ALIGN));
+       return auxspcb->curr_fp;
+}
+
 /**
  * State of extended context, like additional CPU registers and units
  * (e.g., floating point, vector registers)
@@ -249,6 +515,17 @@ void ukarch_ectx_store(struct ukarch_ectx *state);
  */
 void ukarch_ectx_load(struct ukarch_ectx *state);
 
+/**
+ * Loads a given execution environment on the currently executing CPU.
+ *
+ * NOTE: This function does not return, it overwrites the entire current
+ *       context.
+ *
+ * @param state
+ *   Reference to execution environment to load
+ */
+void ukarch_execenv_load(long state) __noreturn;
+
 #ifdef CONFIG_ARCH_X86_64
 /**
  * Compare the given extended context with the state of the currently executing
index a4b5ca3fca00ea6f8405a6c9c2bc4e3174df7a78..a63b6be37634f0be806b82eb37fd59c076aa3f55 100644 (file)
@@ -43,6 +43,8 @@ extern "C" {
 
 #ifdef CONFIG_HAVE_SYSCALL
 
+struct uk_syscall_ctx;
+
 /**
  * Called by platform library when a binary system call was trapped.
  * This function has to be provided by a non-platform library for
index a96685a45f78dae5d8e94b9bb40f51d399c416c7..2ed42405f8a3114a38227b8762ef64c245e52f1b 100644 (file)
@@ -23,7 +23,7 @@ LIBPOSIX_PROCESS_CINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include
 LIBPOSIX_PROCESS_ASINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include
 LIBPOSIX_PROCESS_CXXINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include
 
-UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS_CLONE) += clone-5u
+UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS_CLONE) += clone-5e
 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += execve-3
 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += wait4-4 waitid-4
 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += getpgid-1
index db9413009c800934ab23f4d3198fd10ef9e0e0f0..877069063fe1002133924294f7dcb548d1988523 100644 (file)
@@ -7,51 +7,55 @@
 #include <arm/arm64/irq.h>
 #include <string.h>
 #include <uk/process.h>
+#include <uk/arch/ctx.h>
 
-void uk_syscall_ctx_popall(void);
-
-void clone_setup_child_ctx(struct uk_syscall_ctx *pusc,
-                                 struct uk_thread *child, __uptr sp)
+void clone_setup_child_ctx(struct ukarch_execenv *pexecenv,
+                          struct uk_thread *child, __uptr sp)
 {
-       __uptr auxsp_pos = child->auxsp;
-       struct uk_syscall_ctx *cusc;
+       struct ukarch_execenv *cexecenv;
+       struct ukarch_auxspcb *auxspcb;
+       __uptr auxsp_pos;
+
+       UK_ASSERT(pexecenv);
+       UK_ASSERT(child);
+       UK_ASSERT(sp);
+
+       auxspcb = ukarch_auxsp_get_cb(child->auxsp);
+       UK_ASSERT(auxspcb);
+
+       auxsp_pos = ukarch_auxspcb_get_curr_fp(auxspcb);
+       UK_ASSERT(auxsp_pos);
 
        /* Create a child context whose stack pointer is that of the auxiliary
-        * stack, minus the parent's `struct uk_syscall_ctx` saved on the
+        * stack, minus the parent's `struct ukarch_execenv` saved on the
         * auxiliary stack that we will have to first patch now and then pop off
         */
 
-       /* Make room for child's `struct uk_syscall_ctx` and copy them */
-       auxsp_pos = ALIGN_DOWN(auxsp_pos, UK_SYSCALL_CTX_END_ALIGN);
-       auxsp_pos -= UK_SYSCALL_CTX_SIZE;
-       memcpy((void *)auxsp_pos, (void *)pusc, UK_SYSCALL_CTX_SIZE);
+       /* Make room for child's `struct ukarch_execenv` and copy them */
+       auxsp_pos = ALIGN_DOWN(auxsp_pos, UKARCH_EXECENV_END_ALIGN);
+       auxsp_pos -= UKARCH_EXECENV_SIZE;
 
        /* Now patch the child's return registers */
-       cusc = (struct uk_syscall_ctx *)auxsp_pos;
-
-       /* Child must see x0 as 0 */
-       cusc->regs.x[0] = 0x0;
+       cexecenv = (struct ukarch_execenv *)auxsp_pos;
+       *cexecenv = *pexecenv;
 
-       /* Make sure we have interrupts enabled, as this is supposedly a normal
-        * userspace thread - the other flags don't really matter since the
-        * first thing the child does is compare x0 to 0x0.
+       /* Child must see x0 as 0, as this is the register holding the return
+        * value of clone children.
         */
-       cusc->regs.spsr_el1 &= ~PSR_I;
+       cexecenv->regs.x[0] = 0x0;
 
-       /* Make sure we do return to what the child is expected to
-        * have as an instruction pointer as well as a stack pointer.
-        */
-       cusc->regs.elr_el1 = pusc->regs.lr;
-       cusc->regs.sp = sp;
+       /* Use new stack pointer */
+       cexecenv->regs.sp = sp;
 
        /* Use parent's user land TPIDR_EL0 if clone did not have SETTLS */
        if (!child->tlsp)
-               cusc->sysregs.tpidr_el0 = pusc->sysregs.tpidr_el0;
+               cexecenv->sysctx.tpidr_el0 = pexecenv->sysctx.tpidr_el0;
        else
-               cusc->sysregs.tpidr_el0 = child->tlsp;
+               cexecenv->sysctx.tpidr_el0 = child->tlsp;
 
-       ukarch_ctx_init(&child->ctx,
-                       auxsp_pos,
-                       0,
-                       (__uptr)&uk_syscall_ctx_popall);
+       ukarch_ctx_init_entry1(&child->ctx,
+                              auxsp_pos,
+                              1,
+                              (ukarch_ctx_entry1)&ukarch_execenv_load,
+                              auxsp_pos);
 }
index 2a65ef4504071092ad1848616f60b90410dc079c..7b45d2173254eee5b082ed77117c22ab854a19c5 100644 (file)
@@ -13,7 +13,7 @@
 
 #if !__ASSEMBLY__
 
-void clone_setup_child_ctx(struct uk_syscall_ctx *pusc,
+void clone_setup_child_ctx(struct ukarch_execenv *pexecenv,
                           struct uk_thread *child, __uptr sp);
 
 #endif /* !__ASSEMBLY__ */
index 6d5e3f188ce353b6b2b0847b7d9917918873c9f7..a8a856d640b650c5af5e8aecee72d869d2d3b355 100644 (file)
@@ -7,54 +7,56 @@
 #include <string.h>
 #include <uk/plat/common/cpu.h>
 #include <uk/process.h>
+#include <uk/arch/ctx.h>
 
-void uk_syscall_ctx_popall(void);
-
-void clone_setup_child_ctx(struct uk_syscall_ctx *pusc,
+void clone_setup_child_ctx(struct ukarch_execenv *pexecenv,
                           struct uk_thread *child, __uptr sp)
 {
-       __uptr auxsp_pos = child->auxsp;
-       struct uk_syscall_ctx *cusc;
+       struct ukarch_execenv *cexecenv;
+       struct ukarch_auxspcb *auxspcb;
+       __uptr auxsp_pos;
+
+       UK_ASSERT(pexecenv);
+       UK_ASSERT(child);
+       UK_ASSERT(sp);
+
+       auxspcb = ukarch_auxsp_get_cb(child->auxsp);
+       UK_ASSERT(auxspcb);
+
+       auxsp_pos = ukarch_auxspcb_get_curr_fp(auxspcb);
+       UK_ASSERT(auxsp_pos);
 
        /* Create a child context whose stack pointer is that of the auxiliary
-        * stack, minus the parent's `struct uk_syscall_ctx` saved on the
+        * stack, minus the parent's `struct ukarch_execenv` saved on the
         * auxiliary stack that we will have to first patch now and then pop off
         */
 
-       /* Make room for child's copy of `struct uk_syscall_ctx` */
-       auxsp_pos = ALIGN_DOWN(auxsp_pos, UK_SYSCALL_CTX_END_ALIGN);
-       auxsp_pos -= UK_SYSCALL_CTX_SIZE;
-       memcpy((void *)auxsp_pos, (void *)pusc, UK_SYSCALL_CTX_SIZE);
+       /* Make room for child's copy of `struct ukarch_execenv` */
+       auxsp_pos = ALIGN_DOWN(auxsp_pos, UKARCH_EXECENV_END_ALIGN);
+       auxsp_pos -= UKARCH_EXECENV_SIZE;
 
        /* Now patch the child's return registers */
-       cusc = (struct uk_syscall_ctx *)auxsp_pos;
+       cexecenv = (struct ukarch_execenv *)auxsp_pos;
+       *cexecenv = *pexecenv;
 
        /* Child must see %rax as 0 */
-       cusc->regs.rax = 0x0;
+       cexecenv->regs.rax = 0x0;
 
-       /* Make sure we have interrupts enabled, as this is supposedly a normal
-        * userspace thread - the other flags don't really matter since the
-        * first thing the child does is compare %rax to 0x0.
-        */
-       cusc->regs.eflags |= X86_EFLAGS_IF;
-
-       /* Finally, make sure we do return to what the child is expected to
-        * have as an instruction pointer as well as a stack pointer.
-        */
-       cusc->regs.rip = pusc->regs.rip;
-       cusc->regs.rsp = sp;
+       /* Use new stack pointer */
+       cexecenv->regs.rsp = sp;
 
-       /* Use parent's userland gs_base */
-       cusc->sysregs.gs_base = pusc->sysregs.gs_base;
+       /* Use parent's userland gsbase */
+       cexecenv->sysctx.gsbase = pexecenv->sysctx.gsbase;
 
-       /* Use parent's fs_base if clone did not have SETTLS */
+       /* Use parent's fsbase if clone did not have SETTLS */
        if (!child->tlsp)
-               cusc->sysregs.fs_base = pusc->sysregs.fs_base;
+               cexecenv->sysctx.fsbase = pexecenv->sysctx.fsbase;
        else
-               cusc->sysregs.fs_base = child->tlsp;
+               cexecenv->sysctx.fsbase = child->tlsp;
 
-       ukarch_ctx_init(&child->ctx,
-                       auxsp_pos,
-                       0,
-                       (__uptr)&uk_syscall_ctx_popall);
+       ukarch_ctx_init_entry1(&child->ctx,
+                              auxsp_pos,
+                              1,
+                              (ukarch_ctx_entry1)&ukarch_execenv_load,
+                              auxsp_pos);
 }
index 7cd367f895a1a3b461015fc4daa0bde47a809bca..0b425f31886ff39a541f2cfee78257d40715e088 100644 (file)
@@ -13,7 +13,7 @@
 
 #if !__ASSEMBLY__
 
-void clone_setup_child_ctx(struct uk_syscall_ctx *pusc,
+void clone_setup_child_ctx(struct ukarch_execenv *pexecenv,
                           struct uk_thread *child, __uptr sp);
 
 #endif /* !__ASSEMBLY__ */
index eefdcc7723acb08eb093be81bb276828a68587a1..ca8f4e368141f0a961f01035861264bcfb893799 100644 (file)
@@ -322,7 +322,7 @@ static void _clone_child_gc(struct uk_thread *t)
  *       to zero.
  */
 static int _clone(struct clone_args *cl_args, size_t cl_args_len,
-                 struct uk_syscall_ctx *usc)
+                 struct ukarch_execenv *execenv)
 {
        struct uk_thread *child = NULL;
        struct uk_thread *t;
@@ -384,13 +384,13 @@ static int _clone(struct clone_args *cl_args, size_t cl_args_len,
                uk_pr_debug(" child_tid: %p\n", (void *)cl_args->child_tid);
        uk_pr_debug(" stack: %p\n", (void *)cl_args->stack);
        uk_pr_debug(" tls: %p\n", (void *)cl_args->tls);
-       uk_pr_debug(" <return>: %p\n", (void *)usc->regs.rip);
+       uk_pr_debug(" <return>: %p\n", (void *)execenv->regs.rip);
        uk_pr_debug(")\n");
 #endif /* UK_DEBUG */
 
        if ((flags & CLONE_SETTLS)
 #if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
-           && (ukarch_sysregs_get_tlsp(&usc->sysregs) == 0x0)
+           && (ukarch_sysctx_get_tlsp(&execenv->sysctx) == 0x0)
 #endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */
        ) {
                /* The caller already created a TLS for the child (for instance
@@ -417,7 +417,7 @@ static int _clone(struct clone_args *cl_args, size_t cl_args_len,
                 * places TLS variables and uses them effectively as TCB.
                 */
 #if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
-               if (ukarch_sysregs_get_tlsp(&usc->sysregs) != 0x0) {
+               if (ukarch_sysctx_get_tlsp(&execenv->sysctx) != 0x0) {
                        uk_pr_debug("Allocating an Unikraft TLS for the new child, parent called from context with custom TLS\n");
                } else
 #endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */
@@ -466,7 +466,7 @@ static int _clone(struct clone_args *cl_args, size_t cl_args_len,
                    t, t->name ? child->name : "<unnamed>",
                    child, child->name ? child->name : "<unnamed>", ret);
 
-       clone_setup_child_ctx(usc, child, (__uptr)cl_args->stack);
+       clone_setup_child_ctx(execenv, child, (__uptr)cl_args->stack);
 
        uk_thread_set_runnable(child);
 
@@ -489,14 +489,14 @@ err_out:
 }
 
 #if CONFIG_ARCH_X86_64
-UK_LLSYSCALL_R_U_DEFINE(int, clone,
+UK_LLSYSCALL_R_E_DEFINE(int, clone,
                        unsigned long, flags,
                        void *, sp,
                        int *, parent_tid,
                        int *, child_tid,
                        unsigned long, tlsp)
 #else /* !CONFIG_ARCH_X86_64 */
-UK_LLSYSCALL_R_U_DEFINE(int, clone,
+UK_LLSYSCALL_R_E_DEFINE(int, clone,
                        unsigned long, flags,
                        void *, sp,
                        int *, parent_tid,
@@ -515,7 +515,7 @@ UK_LLSYSCALL_R_U_DEFINE(int, clone,
                .tls         = (__u64) tlsp
        };
 
-       return _clone(&cl_args, sizeof(cl_args), usc);
+       return _clone(&cl_args, sizeof(cl_args), execenv);
 }
 
 #if UK_LIBC_SYSCALLS
index a2775e0d776803b0fd1bcb54f1af8be7af520de8..5b15295fd1292e2971e430eef91d33b583d5c55d 100644 (file)
@@ -79,7 +79,7 @@ uk_posix_process_kill
 clone
 uk_syscall_r_clone
 uk_syscall_e_clone
-uk_syscall_r_u_clone
-uk_syscall_e_u_clone
+uk_syscall_r_e_clone
+uk_syscall_e_e_clone
 uk_syscall_r_clone3
 uk_syscall_e_clone3
index 9a6b2e54af0551ae77a4938b579ce8cf002d5492..e7af8229fa8c3043cbe2e6181e72b507daa3a57b 100644 (file)
@@ -13,9 +13,6 @@ ASINCLUDES-y                          += -I$(LIBSYSCALL_SHIM_BASE)/arch/$(CONFIG_UK_ARCH)/include
 CINCLUDES-y                            += -I$(LIBSYSCALL_SHIM_BASE)/arch/$(CONFIG_UK_ARCH)/include
 CXXINCLUDES-y                          += -I$(LIBSYSCALL_SHIM_BASE)/arch/$(CONFIG_UK_ARCH)/include
 
-LIBSYSCALL_SHIM_SRCS-y += $(LIBSYSCALL_SHIM_BASE)/arch/$(CONFIG_UK_ARCH)/sysregs.c
-LIBSYSCALL_SHIM_SRCS-y += $(LIBSYSCALL_SHIM_BASE)/arch/$(CONFIG_UK_ARCH)/syscall_ctx.S
-
 LIBSYSCALL_SHIM_CINCLUDES += -I$(LIBSYSCALL_SHIM_BASE)
 LIBSYSCALL_SHIM_COMPFLAGS-$(CONFIG_LIBSYSCALL_SHIM_DEBUG) += -DUK_DEBUG
 
diff --git a/lib/syscall_shim/arch/arm64/include/arch/regmap_usc.h b/lib/syscall_shim/arch/arm64/include/arch/regmap_usc.h
deleted file mode 100644 (file)
index a172978..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause */
-/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors.
- * Licensed under the BSD-3-Clause License (the "License").
- * You may not use this file except in compliance with the License.
- */
-
-#ifndef __UK_SYSCALL_H__
-#error Do not include this header directly
-#endif
-
-/* Taken from regmap_linuxabi.h, but defined differently so as not to
- * needlessly contaminate the namespace of source files including this header
- */
-
-#define usc_arg0               x[0]
-#define usc_arg1               x[1]
-#define usc_arg2               x[2]
-#define usc_arg3               x[3]
-#define usc_arg4               x[4]
-#define usc_arg5               x[5]
index d54af63f7bedfb000b8c82d4f027a8a47bbf9fe4..2d4b026f45e9a7ccde9972ee9ea3eff07c75dcf1 100644 (file)
 
 #include <uk/essentials.h>
 
-#define UK_SYSCALL_USC_PROLOGUE_DEFINE(pname, fname, x, ...)           \
+/**
+ * Define a default value for SPSR_EL1, since we are not actually taking an
+ * exception. Most fields do not interest us yet and we ignore NZCF.
+ * Additionally, we leave IRQ's unmasked as, normally, when a SVC call would
+ * happen in userspace, IRQ's would be enabled.
+ */
+#define UK_SYSCALL_PROLOGUE_SPSR_EL1_SVC64_DEFAULT_VALUE               \
+       ((0b1 << 0) /* M[0] must be 1 for non-EL0 state */ |            \
+        (0b0 << 1) /* M[1] must be 0 for AArch64 state */ |            \
+        (0b01 << 2) /* M[3:2] (EL) must be 0b01 for EL1 */ |           \
+        (0b0 << 4) /* M[4] = 0 for AArch64 state */ |                  \
+        (0b0 << 5) /* T32, does not matter */ |                        \
+        (0b1101 << 6) /* D, A, I, F, only IRQ's unmasked */)
+
+#define UK_SYSCALL_EXECENV_PROLOGUE_DEFINE(pname, fname, x, ...)       \
        long __used __noreturn __attribute__((optimize("O3")))          \
        pname(UK_ARG_MAPx(x, UK_S_ARG_LONG_MAYBE_UNUSED, __VA_ARGS__))  \
        {                                                               \
                "/* Use `struct lcpu` pointer from TPIDR_EL1 */\n\t"    \
                "mrs    x0, tpidr_el1\n\t"                              \
                " /* Switch to per-CPU auxiliary stack */\n\t"          \
-               "str    x0, [x0, #"STRINGIFY(LCPU_AUXSP_OFFSET)"]\n\t"  \
+               "ldr    x0, [x0, #"STRINGIFY(LCPU_AUXSP_OFFSET)"]\n\t"  \
+               "sub    x0, x0, #"STRINGIFY(UKARCH_AUXSPCB_SIZE)"\n\t"  \
+               "ldr    x0, [x0, #"                                     \
+                       STRINGIFY(UKARCH_AUXSPCB_OFFSETOF_CURR_FP)"]\n\t"\
                "/* Auxiliary stack is already ECTX aligned */\n\t"     \
-               "/* Make room for `struct uk_syscall_ctx` */\n\t"       \
-               "sub    x0, x0, #"STRINGIFY(UK_SYSCALL_CTX_SIZE)"\n\t"  \
+               "/* Make room for `struct ukarch_execenv` */\n\t"       \
+               "sub    x0, x0, #"STRINGIFY(UKARCH_EXECENV_SIZE)"\n\t"  \
                "/* Swap x0 and (old) sp */\n\t"                        \
                "add    sp, sp, x0\n\t"                                 \
                "sub    x0, sp, x0\n\t"                                 \
                "stp    x24, x25, [sp, #16 * 12]\n\t"                   \
                "stp    x26, x27, [sp, #16 * 13]\n\t"                   \
                "stp    x28, x29, [sp, #16 * 14]\n\t"                   \
-               "mrs    x21, elr_el1\n\t"                               \
-               "stp    x30, x21, [sp, #16 * 15]\n\t"                   \
-               "mrs    x22, spsr_el1\n\t"                              \
-               "mrs    x23, esr_el1\n\t"                               \
+               "/* Here we should push lr and elr_el1, however\n\t"    \
+               " * we are not in an actual exception, but instead\n\t" \
+               " * we are trying to emulate a SVC with a function\n\t" \
+               " * call which makes elr_el1 invalid and we instead\n\t"\
+               " * double push lr (x30).\n\t"                          \
+               " */\n\t"                                               \
+               "stp    x30, x30, [sp, #16 * 15]\n\t"                   \
+               "/* Just like above for elr_el1, spsr_el1 is also\n\t"  \
+               " * invalid. Therefore we use a sane default value\n\t" \
+               " * which one would normally see in spsr_el1\n\t"       \
+               " * following a SVC.\n\t"                               \
+               " */\n\t"                                               \
+               "mov    x22, #"                                         \
+                       STRINGIFY(UK_SYSCALL_PROLOGUE_SPSR_EL1_SVC64_DEFAULT_VALUE)"\n\t"\
+               "/* Same for esr_el1, make it look like a SVC\n\t"      \
+               " * happened.\n\t"                                      \
+               " */\n\t"                                               \
+               "mov    x23, xzr\n\t"                                   \
+               "add    x23, x23, #"STRINGIFY(ESR_EL1_EC_SVC64)"\n\t"   \
+               "orr    x23, xzr, x23, lsl #"STRINGIFY(ESR_EC_SHIFT)"\n\t"\
+               "orr    x23, x23, #"STRINGIFY(ESR_IL)"\n\t"             \
                "stp    x22, x23, [sp, #16 * 16]\n\t"                   \
-               "/* ECTX at slot w.r.t. `struct uk_syscall_ctx` */\n\t"\
+               "/* ECTX at slot w.r.t. `struct ukarch_execenv` */\n\t" \
                "mov    x0, sp\n\t"                                     \
                "add    x0, x0, #("STRINGIFY(__REGS_SIZEOF +            \
-                                    UKARCH_SYSREGS_SIZE)")\n\t"                \
+                                    UKARCH_SYSCTX_SIZE)")\n\t"         \
                "bl     ukarch_ectx_store\n\t"                          \
-               "/* SYSREGS at slot w.r.t. `struct uk_syscall_ctx` */\n\t"\
+               "/* SYSCTX at slot w.r.t. `struct ukarch_execenv` */\n\t"\
                "mov    x0, sp\n\t"                                     \
                "add    x0, x0, #"STRINGIFY(__REGS_SIZEOF)"\n\t"        \
-               "bl     ukarch_sysregs_switch_uk\n\t"                   \
+               "bl     ukarch_sysctx_store\n\t"                        \
                "mov    x0, sp\n\t"                                     \
                "msr    daifclr, #2\n\t"                                \
                "bl     "STRINGIFY(fname)"\n\t"                         \
diff --git a/lib/syscall_shim/arch/arm64/include/arch/sysregs.h b/lib/syscall_shim/arch/arm64/include/arch/sysregs.h
deleted file mode 100644 (file)
index b23e34d..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause */
-/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors.
- * Licensed under the BSD-3-Clause License (the "License").
- * You may not use this file except in compliance with the License.
- */
-
-#ifndef __UK_SYSCALL_H__
-#error Do not include this header directly
-#endif
-
-#define UKARCH_SYSREGS_SIZE                    16
-
-#if !__ASSEMBLY__
-
-#include <uk/essentials.h>
-
-/* Architecture specific userland context */
-struct ukarch_sysregs {
-       __uptr tpidr_el0;
-
-       __u8 pad[8];    /* Make sure we are a multiple of 16 bytes */
-};
-
-UK_CTASSERT(sizeof(struct ukarch_sysregs) == UKARCH_SYSREGS_SIZE);
-
-#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
-__uptr ukarch_sysregs_get_tlsp(struct ukarch_sysregs *sysregs);
-
-void ukarch_sysregs_set_tlsp(struct ukarch_sysregs *sysregs, __uptr tlsp);
-
-void ukarch_sysregs_switch_uk_tls(struct ukarch_sysregs *sysregs);
-
-void ukarch_sysregs_switch_ul_tls(struct ukarch_sysregs *sysregs);
-#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */
-
-void ukarch_sysregs_switch_uk(struct ukarch_sysregs *sysregs);
-
-void ukarch_sysregs_switch_ul(struct ukarch_sysregs *sysregs);
-
-#endif /* !__ASSEMBLY__ */
diff --git a/lib/syscall_shim/arch/arm64/syscall_ctx.S b/lib/syscall_shim/arch/arm64/syscall_ctx.S
deleted file mode 100644 (file)
index 10b7be8..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause */
-/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors.
- * Licensed under the BSD-3-Clause License (the "License").
- * You may not use this file except in compliance with the License.
- */
-
-#include <uk/arch/lcpu.h>
-#include <uk/asm.h>
-#include <uk/syscall.h>
-
-ENTRY(uk_syscall_ctx_popall)
-       /* Mask IRQ to make sure restore would not be interrupted by IRQ */
-       msr     daifset, #2
-
-       mov     x0, sp
-       add     x0, x0, #(__REGS_SIZEOF + UKARCH_SYSREGS_SIZE)
-       bl      ukarch_ectx_store
-
-       mov     x0, sp
-       add     x0, x0, #(__REGS_SIZEOF)
-       bl      ukarch_sysregs_switch_ul
-
-       /* Restore pstate and exception status register */
-       ldp x22, x23, [sp, #16 * 16]
-       msr spsr_el1, x22
-       msr esr_el1, x23
-
-       /* Restore LR and exception PC */
-       ldp     x30, x21, [sp, #16 * 15]
-       msr     elr_el1, x21
-
-       /* Restore general purpose registers */
-       ldp     x28, x29, [sp, #16 * 14]
-       ldp     x26, x27, [sp, #16 * 13]
-       ldp     x24, x25, [sp, #16 * 12]
-       ldp     x22, x23, [sp, #16 * 11]
-       ldp     x20, x21, [sp, #16 * 10]
-       /* Skip x18, x19 */
-       ldp     x16, x17, [sp, #16 * 8]
-       ldp     x14, x15, [sp, #16 * 7]
-       ldp     x12, x13, [sp, #16 * 6]
-       ldp     x10, x11, [sp, #16 * 5]
-       ldp     x8, x9, [sp, #16 * 4]
-       ldp     x6, x7, [sp, #16 * 3]
-       ldp     x4, x5, [sp, #16 * 2]
-       ldp     x2, x3, [sp, #16 * 1]
-       ldp     x0, x1, [sp, #16 * 0]
-
-       /* Restore stack pointer */
-       ldr     x18, [sp, #__SP_OFFSET]
-       mov     x19, sp
-       mov     sp, x18
-
-       /* Restore x18, x19 */
-       ldp     x18, x19, [x19, #16 * 9]
-
-       eret
diff --git a/lib/syscall_shim/arch/arm64/sysregs.c b/lib/syscall_shim/arch/arm64/sysregs.c
deleted file mode 100644 (file)
index b0d06bd..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause */
-/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors.
- * Licensed under the BSD-3-Clause License (the "License").
- * You may not use this file except in compliance with the License.
- */
-#include <uk/assert.h>
-#include <uk/thread.h>
-#include <uk/syscall.h>
-
-void ukarch_sysregs_switch_uk(struct ukarch_sysregs *sysregs)
-{
-       UK_ASSERT(sysregs);
-
-#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
-       ukarch_sysregs_switch_uk_tls(sysregs);
-#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */
-}
-
-void ukarch_sysregs_switch_ul(struct ukarch_sysregs *sysregs)
-{
-       UK_ASSERT(sysregs);
-
-#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
-       ukarch_sysregs_switch_ul_tls(sysregs);
-#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */
-}
-
-#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
-__uptr ukarch_sysregs_get_tlsp(struct ukarch_sysregs *sysregs)
-{
-       UK_ASSERT(sysregs);
-
-       return sysregs->tpidr_el0;
-}
-
-void ukarch_sysregs_set_tlsp(struct ukarch_sysregs *sysregs, __uptr tlsp)
-{
-       UK_ASSERT(sysregs);
-
-       uk_pr_debug("System call updated userland TLS pointer register to %p (before: %p)\n",
-                   (void *)sysregs->tpidr_el0, (void *)tlsp);
-
-       sysregs->tpidr_el0 = tlsp;
-}
-
-void ukarch_sysregs_switch_uk_tls(struct ukarch_sysregs *sysregs)
-{
-       struct uk_thread *t = uk_thread_current();
-
-       UK_ASSERT(sysregs);
-       UK_ASSERT(t);
-
-       sysregs->tpidr_el0 = ukplat_tlsp_get();
-       ukplat_tlsp_set(t->uktlsp);
-       t->tlsp = t->uktlsp;
-}
-
-void ukarch_sysregs_switch_ul_tls(struct ukarch_sysregs *sysregs)
-{
-       struct uk_thread *t = uk_thread_current();
-
-       UK_ASSERT(sysregs);
-       UK_ASSERT(t);
-
-       ukplat_tlsp_set(sysregs->tpidr_el0);
-       t->tlsp = sysregs->tpidr_el0;
-}
-#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */
index b1a7aec5a3ac13ddd73fab5acb4409c525e97ac6..8ec953ce00c3669d05cc6a5362cda6b84e0ee609 100644 (file)
  */
 
 #if (defined __X86_64__)
-#define rip            rcx
-#define rsyscall       orig_rax
-#define rarg0          rdi
-#define rarg1          rsi
-#define rarg2          rdx
-#define rarg3          r10
-#define rarg4          r8
-#define rarg5          r9
-
-#define rret0          rax
-#define rret1          rdx
+#define __syscall_rip          rcx
+#define __syscall_rsyscall     orig_rax
+#define __syscall_rarg0                rdi
+#define __syscall_rarg1                rsi
+#define __syscall_rarg2                rdx
+#define __syscall_rarg3                r10
+#define __syscall_rarg4                r8
+#define __syscall_rarg5                r9
+
+#define __syscall_rret0                rax
+#define __syscall_rret1                rdx
 
 #elif (defined __ARM_64__)
-#define rip            elr_el1
-#define rsyscall       x[8]
-#define rarg0          x[0]
-#define rarg1          x[1]
-#define rarg2          x[2]
-#define rarg3          x[3]
-#define rarg4          x[4]
-#define rarg5          x[5]
-
-#define rret0          x[0]
-#define rret1          x[1]
+#define __syscall_rip          elr_el1
+#define __syscall_rsyscall     x[8]
+#define __syscall_rarg0                x[0]
+#define __syscall_rarg1                x[1]
+#define __syscall_rarg2                x[2]
+#define __syscall_rarg3                x[3]
+#define __syscall_rarg4                x[4]
+#define __syscall_rarg5                x[5]
+
+#define __syscall_rret0                x[0]
+#define __syscall_rret1                x[1]
 
 #else
 #error "Missing register mappings for selected target architecture"
diff --git a/lib/syscall_shim/arch/x86_64/include/arch/regmap_usc.h b/lib/syscall_shim/arch/x86_64/include/arch/regmap_usc.h
deleted file mode 100644 (file)
index 4ea5966..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause */
-/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors.
- * Licensed under the BSD-3-Clause License (the "License").
- * You may not use this file except in compliance with the License.
- */
-
-#ifndef __UK_SYSCALL_H__
-#error Do not include this header directly
-#endif
-
-/* Taken from regmap_linuxabi.h, but defined differently so as not to
- * needlessly contaminate the namespace of source files including this header
- */
-
-#define usc_arg0               rdi
-#define usc_arg1               rsi
-#define usc_arg2               rdx
-#define usc_arg3               r10
-#define usc_arg4               r8
-#define usc_arg5               r9
index 202683167c0e4ad2c913d42cc3ccefae7de88897..5482194f93ad0c80f8a9dd0f2bb31f98e5e39e23 100644 (file)
@@ -23,7 +23,7 @@
 
 #include <uk/essentials.h>
 
-#define UK_SYSCALL_USC_PROLOGUE_DEFINE(pname, fname, x, ...)           \
+#define UK_SYSCALL_EXECENV_PROLOGUE_DEFINE(pname, fname, x, ...)       \
        long __used __naked __noreturn                                  \
        pname(UK_ARG_MAPx(x, UK_S_ARG_LONG_MAYBE_UNUSED, __VA_ARGS__))  \
        {                                                               \
@@ -34,8 +34,8 @@
                "movq   %%rsp, %%r11\n\t"                               \
                "movq   %%gs:("STRINGIFY(LCPU_AUXSP_OFFSET)"), %%rsp\n\t"\
                "/* Auxiliary stack is already ECTX aligned */\n\t"     \
-               "/* Make room for `struct uk_syscall_ctx` */\n\t"       \
-               "subq   $("STRINGIFY(UK_SYSCALL_CTX_SIZE -              \
+               "/* Make room for `struct UKARCH_EXECENV` */\n\t"       \
+               "subq   $("STRINGIFY(UKARCH_EXECENV_SIZE -              \
                                     __REGS_SIZEOF)"), %%rsp\n\t"       \
                "/* Now build stack frame beginning with 5 pointers\n\t"\
                " * in the classical iretq/`struct __regs` format\n\t"  \
                "pushq  $(0x10)\n\t"                                    \
                "/* Push saving original rsp stored in r11 */\n\t"      \
                "pushq  %%r11\n\t"                                      \
-               "/* Push EFLAGS register */\n\t"                        \
+               "/* Push EFLAGS register. Additionally, since we\n\t"   \
+               " * pushed it with IRQs disabled, it won't have\n\t"    \
+               " * the corresponding bit flag set, making it look\n\t" \
+               " * like the caller of the syscall had IRQs off,\n\t"   \
+               " * which no sane application would do, therefore\n\t"  \
+               " * manually set the flag.\n\t"                         \
+               " */\n\t"                                               \
                "pushfq\n\t"                                            \
+               "orq    $("STRINGIFY(X86_EFLAGS_IF)"), 0(%%rsp)\n\t"    \
                "/* Push code segment, GDT code segment selector:\n\t"  \
                " * [15: 3]: Selector Index - first GDT entry\n\t"      \
                " * [ 2: 2]: Table Indicator - GDT, table 0\n\t"        \
                "pushq  %%r14\n\t"                                      \
                "pushq  %%r15\n\t"                                      \
                "subq   $("STRINGIFY(__REGS_PAD_SIZE)"), %%rsp\n\t"     \
-               "/* ECTX at slot w.r.t. `struct uk_syscall_ctx` */\n\t"\
+               "/* ECTX at slot w.r.t. `struct UKARCH_EXECENV` */\n\t"\
                "movq   %%rsp, %%rdi\n\t"                               \
                "addq   $("STRINGIFY(__REGS_SIZEOF +                    \
-                                    UKARCH_SYSREGS_SIZE)"), %%rdi\n\t" \
+                                    UKARCH_SYSCTX_SIZE)"), %%rdi\n\t"  \
                "call   ukarch_ectx_store\n\t"                          \
-               "/* SYSREGS at slot w.r.t. `struct uk_syscall_ctx` */\n\t"\
+               "/* SYSCTX at slot w.r.t. `struct UKARCH_EXECENV` */\n\t"\
                "movq   %%rsp, %%rdi\n\t"                               \
                "addq   $("STRINGIFY(__REGS_SIZEOF)"), %%rdi\n\t"       \
-               "call   ukarch_sysregs_switch_uk\n\t"                   \
+               "call   ukarch_sysctx_store\n\t"                        \
                "movq   %%rsp, %%rdi\n\t"                               \
                "sti\n\t"                                               \
                "call   "STRINGIFY(fname)"\n\t"                         \
diff --git a/lib/syscall_shim/arch/x86_64/include/arch/sysregs.h b/lib/syscall_shim/arch/x86_64/include/arch/sysregs.h
deleted file mode 100644 (file)
index dcc6527..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause */
-/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors.
- * Licensed under the BSD-3-Clause License (the "License").
- * You may not use this file except in compliance with the License.
- */
-
-#ifndef __UK_SYSCALL_H__
-#error Do not include this header directly
-#endif
-
-#define UKARCH_SYSREGS_SIZE                    16
-
-#if !__ASSEMBLY__
-
-#include <uk/essentials.h>
-
-/* Architecture specific userland context */
-struct ukarch_sysregs {
-       /* The current value of %gs's gs_base register of the application.
-        * On syscall entry, this will be updated to hold the value of
-        * MSR_KERNEL_GS_BASE following a swapgs instruction.
-        */
-       __uptr gs_base;
-
-       __uptr fs_base;
-};
-
-UK_CTASSERT(sizeof(struct ukarch_sysregs) == UKARCH_SYSREGS_SIZE);
-
-#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
-__uptr ukarch_sysregs_get_tlsp(struct ukarch_sysregs *sysregs);
-
-void ukarch_sysregs_set_tlsp(struct ukarch_sysregs *sysregs, __uptr tlsp);
-
-void ukarch_sysregs_switch_uk_tls(struct ukarch_sysregs *sysregs);
-
-void ukarch_sysregs_switch_ul_tls(struct ukarch_sysregs *sysregs);
-#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */
-
-void ukarch_sysregs_switch_uk(struct ukarch_sysregs *sysregs);
-
-void ukarch_sysregs_switch_ul(struct ukarch_sysregs *sysregs);
-
-__uptr ukarch_sysregs_get_gs_base(struct ukarch_sysregs *sysregs);
-
-void ukarch_sysregs_set_gs_base(struct ukarch_sysregs *sysregs, __uptr gs_base);
-
-#endif /* !__ASSEMBLY__ */
diff --git a/lib/syscall_shim/arch/x86_64/syscall_ctx.S b/lib/syscall_shim/arch/x86_64/syscall_ctx.S
deleted file mode 100644 (file)
index d5d31ad..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause */
-/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors.
- * Licensed under the BSD-3-Clause License (the "License").
- * You may not use this file except in compliance with the License.
- */
-
-#include <uk/arch/lcpu.h>
-#include <uk/asm.h>
-#include <uk/syscall.h>
-
-/* Used to pop the contents of a `struct uk_syscall_ctx` off the stack */
-ENTRY(uk_syscall_ctx_popall)
-       /* Do all this with IRQ's disabled, as the final iretq should pop off
-        * a proper rflags anyway.
-        */
-       cli
-
-       movq    %rsp, %rdi
-       addq    $(__REGS_SIZEOF + UKARCH_SYSREGS_SIZE), %rdi
-       call    ukarch_ectx_load
-
-       movq    %rsp, %rdi
-       addq    $(__REGS_SIZEOF), %rdi
-       call    ukarch_sysregs_switch_ul
-
-       addq    $(__REGS_PAD_SIZE), %rsp
-
-       popq    %r15
-       popq    %r14
-       popq    %r13
-       popq    %r12
-       popq    %rbp
-       popq    %rbx
-       popq    %r11
-       popq    %r10
-       popq    %r9
-       popq    %r8
-       popq    %rax
-       popq    %rcx
-       popq    %rdx
-       popq    %rsi
-       popq    %rdi
-
-       addq    $(__REGS_PAD_SIZE), %rsp
-
-       swapgs
-
-       iretq
diff --git a/lib/syscall_shim/arch/x86_64/sysregs.c b/lib/syscall_shim/arch/x86_64/sysregs.c
deleted file mode 100644 (file)
index 061ef15..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause */
-/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors.
- * Licensed under the BSD-3-Clause License (the "License").
- * You may not use this file except in compliance with the License.
- */
-#include <uk/arch/lcpu.h>
-#include <uk/assert.h>
-#include <uk/plat/common/cpu.h>
-#include <uk/thread.h>
-#include <uk/syscall.h>
-
-void ukarch_sysregs_switch_uk(struct ukarch_sysregs *sysregs)
-{
-       UK_ASSERT(sysregs);
-       UK_ASSERT(lcpu_get_current());
-
-       /* This can only be called from Unikraft ctx in bincompat mode.
-        * Therefore, X86_MSR_GS_BASE holds the current `struct lcpu` and
-        * X86_MSR_KERNEL_GS_BASE contains the app-saved gs_base.
-        */
-       sysregs->gs_base = rdkgsbase();
-
-#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
-       ukarch_sysregs_switch_uk_tls(sysregs);
-#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */
-}
-
-void ukarch_sysregs_switch_ul(struct ukarch_sysregs *sysregs)
-{
-       UK_ASSERT(sysregs);
-       UK_ASSERT(lcpu_get_current());
-
-#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
-       ukarch_sysregs_switch_ul_tls(sysregs);
-#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */
-
-       /* This can only be called from Unikraft ctx in bincompat mode.
-        * Therefore, X86_MSR_GS_BASE holds the current `struct lcpu` and
-        * X86_MSR_KERNEL_GS_BASE contains the app-saved gs_base.
-        */
-       wrgsbase((__uptr)lcpu_get_current());
-       wrkgsbase(sysregs->gs_base);
-}
-
-#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
-__uptr ukarch_sysregs_get_tlsp(struct ukarch_sysregs *sysregs)
-{
-       UK_ASSERT(sysregs);
-
-       return sysregs->fs_base;
-}
-
-void ukarch_sysregs_set_tlsp(struct ukarch_sysregs *sysregs, __uptr tlsp)
-{
-       UK_ASSERT(sysregs);
-
-       uk_pr_debug("System call updated userland TLS pointer register to %p (before: %p)\n",
-                   (void *)sysregs->fs_base, (void *)tlsp);
-
-       sysregs->fs_base = tlsp;
-}
-
-void ukarch_sysregs_switch_uk_tls(struct ukarch_sysregs *sysregs)
-{
-       struct uk_thread *t = uk_thread_current();
-
-       UK_ASSERT(sysregs);
-       UK_ASSERT(t);
-
-       sysregs->fs_base = ukplat_tlsp_get();
-       ukplat_tlsp_set(t->uktlsp);
-       t->tlsp = t->uktlsp;
-}
-
-void ukarch_sysregs_switch_ul_tls(struct ukarch_sysregs *sysregs)
-{
-       struct uk_thread *t = uk_thread_current();
-
-       UK_ASSERT(sysregs);
-       UK_ASSERT(t);
-
-       ukplat_tlsp_set(sysregs->fs_base);
-       t->tlsp = sysregs->fs_base;
-}
-#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */
-
-__uptr ukarch_sysregs_get_gs_base(struct ukarch_sysregs *sysregs)
-{
-       UK_ASSERT(sysregs);
-
-       return sysregs->gs_base;
-}
-
-void ukarch_sysregs_set_gs_base(struct ukarch_sysregs *sysregs, __uptr gs_base)
-{
-       UK_ASSERT(sysregs);
-
-       uk_pr_debug("System call updated userland GS_BASE pointer register to %p (before: %p)\n",
-                   (void *)sysregs->gs_base, (void *)gs_base);
-
-       sysregs->gs_base = gs_base;
-}
index c513ee3737706a809b03c27f2b6accb5205dfce3..e468ab75d9bb98dc233195ae3eb74670963cface 100644 (file)
 #define __UK_SYSCALL_H__
 
 #include <uk/arch/ctx.h>
-#include <arch/sysregs.h>
-#include <arch/regmap_usc.h>
 #include <arch/syscall_prologue.h>
 
-/* We must make sure that ECTX is aligned, so we make use of some padding,
- * whose size is equal to what we need to add to UKARCH_ECTX_SIZE
- * to make it aligned with UKARCH_ECTX_ALIGN
- */
-#define UK_SYSCALL_CTX_PAD_SIZE                                \
-       (ALIGN_UP(UKARCH_ECTX_SIZE,             \
-                UKARCH_ECTX_ALIGN) -           \
-        UKARCH_ECTX_SIZE)
-/* If we make sure that the in-memory structure's end address is aligned to
- * the ECTX alignment, then subtracting from that end address a value that is
- * also a multiple of that alignment, guarantees that the resulted address
- * is also ECTX aligned.
- */
-#define UK_SYSCALL_CTX_END_ALIGN                       \
-       UKARCH_ECTX_ALIGN
-#define UK_SYSCALL_CTX_SIZE                            \
-       (UK_SYSCALL_CTX_PAD_SIZE +                              \
-        UKARCH_ECTX_SIZE +                     \
-        UKARCH_SYSREGS_SIZE +                          \
-        __REGS_SIZEOF)
-
 #if !__ASSEMBLY__
 #include <uk/config.h>
 #include <uk/essentials.h>
 #include <stdarg.h>
 #include <uk/print.h>
 #include "legacy_syscall.h"
+#include "../../arch/regmap_linuxabi.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-struct uk_syscall_ctx {
-       struct __regs regs;
-       struct ukarch_sysregs sysregs;
-       __u8 ectx[UKARCH_ECTX_SIZE];
-       __u8 pad[UK_SYSCALL_CTX_PAD_SIZE];
-};
-
-UK_CTASSERT(sizeof(struct uk_syscall_ctx) == UK_SYSCALL_CTX_SIZE);
-UK_CTASSERT(IS_ALIGNED(UK_SYSCALL_CTX_PAD_SIZE + UKARCH_ECTX_SIZE,
-                      UKARCH_ECTX_ALIGN));
-
 /*
  * Whenever the hidden Config.uk option LIBSYSCALL_SHIM_NOWRAPPER
  * is set, the creation of libc-style wrappers are disable by the
@@ -115,96 +82,102 @@ typedef long uk_syscall_arg_t;
 #define UK_ARG_MAP14(m, type, arg, ...) m(type, arg), UK_ARG_MAP12(m, __VA_ARGS__)
 #define UK_ARG_MAPx(nr_args, ...) UK_CONCAT(UK_ARG_MAP, nr_args)(__VA_ARGS__)
 
-#define UK_USC_CALLMAP0_0(...)
-#define UK_USC_CALLMAP2_2(m, type, arg) , (type)usc->regs.usc_arg0
-
-#define UK_USC_CALLMAP2_4(m, type, arg) , (type)usc->regs.usc_arg1
-#define UK_USC_CALLMAP4_4(m, type, arg, ...)                           \
-       , (type)usc->regs.usc_arg0 UK_USC_CALLMAP2_4(m, __VA_ARGS__)
-
-#define UK_USC_CALLMAP2_6(m, type, arg) , (type)usc->regs.usc_arg2
-#define UK_USC_CALLMAP4_6(m, type, arg, ...)                           \
-       , (type)usc->regs.usc_arg1 UK_USC_CALLMAP2_6(m, __VA_ARGS__)
-#define UK_USC_CALLMAP6_6(m, type, arg, ...)                           \
-       , (type)usc->regs.usc_arg0 UK_USC_CALLMAP4_6(m, __VA_ARGS__)
-
-#define UK_USC_CALLMAP2_8(m, type, arg) , (type)usc->regs.usc_arg3
-#define UK_USC_CALLMAP4_8(m, type, arg, ...)                           \
-       , (type)usc->regs.usc_arg2 UK_USC_CALLMAP2_8(m, __VA_ARGS__)
-#define UK_USC_CALLMAP6_8(m, type, arg, ...)                           \
-       , (type)usc->regs.usc_arg1 UK_USC_CALLMAP4_8(m, __VA_ARGS__)
-#define UK_USC_CALLMAP8_8(m, type, arg, ...)                           \
-       , (type)usc->regs.usc_arg0 UK_USC_CALLMAP6_8(m, __VA_ARGS__)
-
-#define UK_USC_CALLMAP2_10(m, type, arg) , (type)usc->regs.usc_arg4
-#define UK_USC_CALLMAP4_10(m, type, arg, ...)                          \
-       , (type)usc->regs.usc_arg3 UK_USC_CALLMAP2_10(m, __VA_ARGS__)
-#define UK_USC_CALLMAP6_10(m, type, arg, ...)                          \
-       , (type)usc->regs.usc_arg2 UK_USC_CALLMAP4_10(m, __VA_ARGS__)
-#define UK_USC_CALLMAP8_10(m, type, arg, ...)                          \
-       , (type)usc->regs.usc_arg1 UK_USC_CALLMAP6_10(m, __VA_ARGS__)
-#define UK_USC_CALLMAP10_10(m, type, arg, ...)                         \
-       , (type)usc->regs.usc_arg0 UK_USC_CALLMAP8_10(m, __VA_ARGS__)
-
-#define UK_USC_CALLMAP2_12(m, type, arg) , (type)usc->regs.usc_arg5
-#define UK_USC_CALLMAP4_12(m, type, arg, ...)                          \
-       , (type)usc->regs.usc_arg4 UK_USC_CALLMAP2_12(m, __VA_ARGS__)
-#define UK_USC_CALLMAP6_12(m, type, arg, ...)                          \
-       , (type)usc->regs.usc_arg3 UK_USC_CALLMAP4_12(m, __VA_ARGS__)
-#define UK_USC_CALLMAP8_12(m, type, arg, ...)                          \
-       , (type)usc->regs.usc_arg2 UK_USC_CALLMAP6_12(m, __VA_ARGS__)
-#define UK_USC_CALLMAP10_12(m, type, arg, ...)                         \
-       , (type)usc->regs.usc_arg1 UK_USC_CALLMAP8_12(m, __VA_ARGS__)
-#define UK_USC_CALLMAP12_12(m, type, arg, ...)                         \
-       , (type)usc->regs.usc_arg0 UK_USC_CALLMAP10_12(m, __VA_ARGS__)
-#define UK_USC_CALLMAPx(nr_args, ...)                                  \
-       usc UK_CONCAT(UK_CONCAT(UK_USC_CALLMAP, nr_args),               \
-                     _##nr_args)(__VA_ARGS__)
-
-#define UK_USC_EMAP0_0(...)
-#define UK_USC_EMAP2_2(m, type, arg) , (long)usc->regs.usc_arg0
-
-#define UK_USC_EMAP2_4(m, type, arg) , (long)usc->regs.usc_arg1
-#define UK_USC_EMAP4_4(m, type, arg, ...)                              \
-       , (long)usc->regs.usc_arg0 UK_USC_EMAP2_4(m, __VA_ARGS__)
-
-#define UK_USC_EMAP2_6(m, type, arg) , (long)usc->regs.usc_arg2
-#define UK_USC_EMAP4_6(m, type, arg, ...)                              \
-       , (long)usc->regs.usc_arg1 UK_USC_EMAP2_6(m, __VA_ARGS__)
-#define UK_USC_EMAP6_6(m, type, arg, ...)                              \
-       , (long)usc->regs.usc_arg0 UK_USC_EMAP4_6(m, __VA_ARGS__)
-
-#define UK_USC_EMAP2_8(m, type, arg) , (long)usc->regs.usc_arg3
-#define UK_USC_EMAP4_8(m, type, arg, ...)                              \
-       , (long)usc->regs.usc_arg2 UK_USC_EMAP2_8(m, __VA_ARGS__)
-#define UK_USC_EMAP6_8(m, type, arg, ...)                              \
-       , (long)usc->regs.usc_arg1 UK_USC_EMAP4_8(m, __VA_ARGS__)
-#define UK_USC_EMAP8_8(m, type, arg, ...)                              \
-       , (long)usc->regs.usc_arg0 UK_USC_EMAP6_8(m, __VA_ARGS__)
-
-#define UK_USC_EMAP2_10(m, type, arg) , (long)usc->regs.usc_arg4
-#define UK_USC_EMAP4_10(m, type, arg, ...)                             \
-       , (long)usc->regs.usc_arg3 UK_USC_EMAP2_10(m, __VA_ARGS__)
-#define UK_USC_EMAP6_10(m, type, arg, ...)                             \
-       , (long)usc->regs.usc_arg2 UK_USC_EMAP4_10(m, __VA_ARGS__)
-#define UK_USC_EMAP8_10(m, type, arg, ...)                             \
-       , (long)usc->regs.usc_arg1 UK_USC_EMAP6_10(m, __VA_ARGS__)
-#define UK_USC_EMAP10_10(m, type, arg, ...)                            \
-       , (long)usc->regs.usc_arg0 UK_USC_EMAP8_10(m, __VA_ARGS__)
-
-#define UK_USC_EMAP2_12(m, type, arg) , (long)usc->regs.usc_arg5
-#define UK_USC_EMAP4_12(m, type, arg, ...)                             \
-       , (long)usc->regs.usc_arg4 UK_USC_EMAP2_12(m, __VA_ARGS__)
-#define UK_USC_EMAP6_12(m, type, arg, ...)                             \
-       , (long)usc->regs.usc_arg3 UK_USC_EMAP4_12(m, __VA_ARGS__)
-#define UK_USC_EMAP8_12(m, type, arg, ...)                             \
-       , (long)usc->regs.usc_arg2 UK_USC_EMAP6_12(m, __VA_ARGS__)
-#define UK_USC_EMAP10_12(m, type, arg, ...)                            \
-       , (long)usc->regs.usc_arg1 UK_USC_EMAP8_12(m, __VA_ARGS__)
-#define UK_USC_EMAP12_12(m, type, arg, ...)                            \
-       , (long)usc->regs.usc_arg0 UK_USC_EMAP10_12(m, __VA_ARGS__)
-#define UK_USC_EMAPx(nr_args, ...)                                     \
-       (long)usc UK_CONCAT(UK_CONCAT(UK_USC_EMAP, nr_args),            \
+#define UK_EXECENV_CALLMAP0_0(...)
+#define UK_EXECENV_CALLMAP2_2(m, type, arg)                            \
+       , (type)execenv->regs.__syscall_rarg0
+
+#define UK_EXECENV_CALLMAP2_4(m, type, arg)                            \
+       , (type)execenv->regs.__syscall_rarg1
+#define UK_EXECENV_CALLMAP4_4(m, type, arg, ...)                       \
+       , (type)execenv->regs.__syscall_rarg0 UK_EXECENV_CALLMAP2_4(m, __VA_ARGS__)
+
+#define UK_EXECENV_CALLMAP2_6(m, type, arg)                            \
+       , (type)execenv->regs.__syscall_rarg2
+#define UK_EXECENV_CALLMAP4_6(m, type, arg, ...)                       \
+       , (type)execenv->regs.__syscall_rarg1 UK_EXECENV_CALLMAP2_6(m, __VA_ARGS__)
+#define UK_EXECENV_CALLMAP6_6(m, type, arg, ...)                       \
+       , (type)execenv->regs.__syscall_rarg0 UK_EXECENV_CALLMAP4_6(m, __VA_ARGS__)
+
+#define UK_EXECENV_CALLMAP2_8(m, type, arg)                            \
+       , (type)execenv->regs.__syscall_rarg3
+#define UK_EXECENV_CALLMAP4_8(m, type, arg, ...)                       \
+       , (type)execenv->regs.__syscall_rarg2 UK_EXECENV_CALLMAP2_8(m, __VA_ARGS__)
+#define UK_EXECENV_CALLMAP6_8(m, type, arg, ...)                       \
+       , (type)execenv->regs.__syscall_rarg1 UK_EXECENV_CALLMAP4_8(m, __VA_ARGS__)
+#define UK_EXECENV_CALLMAP8_8(m, type, arg, ...)                       \
+       , (type)execenv->regs.__syscall_rarg0 UK_EXECENV_CALLMAP6_8(m, __VA_ARGS__)
+
+#define UK_EXECENV_CALLMAP2_10(m, type, arg)                           \
+       , (type)execenv->regs.__syscall_rarg4
+#define UK_EXECENV_CALLMAP4_10(m, type, arg, ...)                      \
+       , (type)execenv->regs.__syscall_rarg3 UK_EXECENV_CALLMAP2_10(m, __VA_ARGS__)
+#define UK_EXECENV_CALLMAP6_10(m, type, arg, ...)                      \
+       , (type)execenv->regs.__syscall_rarg2 UK_EXECENV_CALLMAP4_10(m, __VA_ARGS__)
+#define UK_EXECENV_CALLMAP8_10(m, type, arg, ...)                      \
+       , (type)execenv->regs.__syscall_rarg1 UK_EXECENV_CALLMAP6_10(m, __VA_ARGS__)
+#define UK_EXECENV_CALLMAP10_10(m, type, arg, ...)                     \
+       , (type)execenv->regs.__syscall_rarg0 UK_EXECENV_CALLMAP8_10(m, __VA_ARGS__)
+
+#define UK_EXECENV_CALLMAP2_12(m, type, arg)                           \
+       , (type)execenv->regs.__syscall_rarg5
+#define UK_EXECENV_CALLMAP4_12(m, type, arg, ...)                      \
+       , (type)execenv->regs.__syscall_rarg4 UK_EXECENV_CALLMAP2_12(m, __VA_ARGS__)
+#define UK_EXECENV_CALLMAP6_12(m, type, arg, ...)                      \
+       , (type)execenv->regs.__syscall_rarg3 UK_EXECENV_CALLMAP4_12(m, __VA_ARGS__)
+#define UK_EXECENV_CALLMAP8_12(m, type, arg, ...)                      \
+       , (type)execenv->regs.__syscall_rarg2 UK_EXECENV_CALLMAP6_12(m, __VA_ARGS__)
+#define UK_EXECENV_CALLMAP10_12(m, type, arg, ...)                     \
+       , (type)execenv->regs.__syscall_rarg1 UK_EXECENV_CALLMAP8_12(m, __VA_ARGS__)
+#define UK_EXECENV_CALLMAP12_12(m, type, arg, ...)                     \
+       , (type)execenv->regs.__syscall_rarg0 UK_EXECENV_CALLMAP10_12(m, __VA_ARGS__)
+#define UK_EXECENV_CALLMAPx(nr_args, ...)                              \
+       execenv UK_CONCAT(UK_CONCAT(UK_EXECENV_CALLMAP, nr_args),       \
+                         _##nr_args)(__VA_ARGS__)
+
+#define UK_EXECENV_EMAP0_0(...)
+#define UK_EXECENV_EMAP2_2(m, type, arg) , (long)execenv->regs.__syscall_rarg0
+
+#define UK_EXECENV_EMAP2_4(m, type, arg) , (long)execenv->regs.__syscall_rarg1
+#define UK_EXECENV_EMAP4_4(m, type, arg, ...)                          \
+       , (long)execenv->regs.__syscall_rarg0 UK_EXECENV_EMAP2_4(m, __VA_ARGS__)
+
+#define UK_EXECENV_EMAP2_6(m, type, arg) , (long)execenv->regs.__syscall_rarg2
+#define UK_EXECENV_EMAP4_6(m, type, arg, ...)                          \
+       , (long)execenv->regs.__syscall_rarg1 UK_EXECENV_EMAP2_6(m, __VA_ARGS__)
+#define UK_EXECENV_EMAP6_6(m, type, arg, ...)                          \
+       , (long)execenv->regs.__syscall_rarg0 UK_EXECENV_EMAP4_6(m, __VA_ARGS__)
+
+#define UK_EXECENV_EMAP2_8(m, type, arg) , (long)execenv->regs.__syscall_rarg3
+#define UK_EXECENV_EMAP4_8(m, type, arg, ...)                          \
+       , (long)execenv->regs.__syscall_rarg2 UK_EXECENV_EMAP2_8(m, __VA_ARGS__)
+#define UK_EXECENV_EMAP6_8(m, type, arg, ...)                          \
+       , (long)execenv->regs.__syscall_rarg1 UK_EXECENV_EMAP4_8(m, __VA_ARGS__)
+#define UK_EXECENV_EMAP8_8(m, type, arg, ...)                          \
+       , (long)execenv->regs.__syscall_rarg0 UK_EXECENV_EMAP6_8(m, __VA_ARGS__)
+
+#define UK_EXECENV_EMAP2_10(m, type, arg) , (long)execenv->regs.__syscall_rarg4
+#define UK_EXECENV_EMAP4_10(m, type, arg, ...)                         \
+       , (long)execenv->regs.__syscall_rarg3 UK_EXECENV_EMAP2_10(m, __VA_ARGS__)
+#define UK_EXECENV_EMAP6_10(m, type, arg, ...)                         \
+       , (long)execenv->regs.__syscall_rarg2 UK_EXECENV_EMAP4_10(m, __VA_ARGS__)
+#define UK_EXECENV_EMAP8_10(m, type, arg, ...)                         \
+       , (long)execenv->regs.__syscall_rarg1 UK_EXECENV_EMAP6_10(m, __VA_ARGS__)
+#define UK_EXECENV_EMAP10_10(m, type, arg, ...)                                \
+       , (long)execenv->regs.__syscall_rarg0 UK_EXECENV_EMAP8_10(m, __VA_ARGS__)
+
+#define UK_EXECENV_EMAP2_12(m, type, arg) , (long)execenv->regs.__syscall_rarg5
+#define UK_EXECENV_EMAP4_12(m, type, arg, ...)                         \
+       , (long)execenv->regs.__syscall_rarg4 UK_EXECENV_EMAP2_12(m, __VA_ARGS__)
+#define UK_EXECENV_EMAP6_12(m, type, arg, ...)                         \
+       , (long)execenv->regs.__syscall_rarg3 UK_EXECENV_EMAP4_12(m, __VA_ARGS__)
+#define UK_EXECENV_EMAP8_12(m, type, arg, ...)                         \
+       , (long)execenv->regs.__syscall_rarg2 UK_EXECENV_EMAP6_12(m, __VA_ARGS__)
+#define UK_EXECENV_EMAP10_12(m, type, arg, ...)                                \
+       , (long)execenv->regs.__syscall_rarg1 UK_EXECENV_EMAP8_12(m, __VA_ARGS__)
+#define UK_EXECENV_EMAP12_12(m, type, arg, ...)                                \
+       , (long)execenv->regs.__syscall_rarg0 UK_EXECENV_EMAP10_12(m, __VA_ARGS__)
+#define UK_EXECENV_EMAPx(nr_args, ...)                                 \
+       (long)execenv UK_CONCAT(UK_CONCAT(UK_EXECENV_EMAP, nr_args),            \
                      _##nr_args)(__VA_ARGS__)
 
 /* Variant of UK_ARG_MAPx() but prepends a comma if nr_args > 0 */
@@ -224,12 +197,12 @@ typedef long uk_syscall_arg_t;
 #define UK_S_ARG_ACTUAL_MAYBE_UNUSED(type, arg) type arg __maybe_unused
 #define UK_S_ARG_CAST_LONG(type, arg)   (long) arg
 #define UK_S_ARG_CAST_ACTUAL(type, arg) (type) arg
-#define UK_S_USC_ARG_ACTUAL    struct uk_syscall_ctx *usc
-#define UK_S_USC_ARG_ACTUAL_MAYBE_UNUSED                               \
-       struct uk_syscall_ctx *usc __maybe_unused
+#define UK_S_EXECENV_ARG_ACTUAL        struct ukarch_execenv *execenv
+#define UK_S_EXECENV_ARG_ACTUAL_MAYBE_UNUSED                           \
+       struct ukarch_execenv *execenv __maybe_unused
 
-#define UK_USC_DECLMAPx(usc_arg, nr_args, ...)                                 \
-       usc_arg UK_ARG_EMAPx(nr_args, __VA_ARGS__)
+#define UK_EXECENV_DECLMAPx(__syscall_rarg, nr_args, ...)                      \
+       __syscall_rarg UK_ARG_EMAPx(nr_args, __VA_ARGS__)
 
 #if CONFIG_LIBSYSCALL_SHIM_DEBUG_SYSCALLS || CONFIG_LIBUKDEBUG_PRINTD
 #define UK_ARG_FMT_MAP0(...)
@@ -251,16 +224,18 @@ typedef long uk_syscall_arg_t;
                   "(" UK_ARG_FMT_MAPx(x, UK_S_ARG_FMT_LONGX, __VA_ARGS__) ")\n" \
                   UK_ARG_EMAPx(x, UK_S_ARG_CAST_LONG, __VA_ARGS__) )
 
-#define __UK_SYSCALL_USC_PRINTD(x, rtype, fname, ...)                  \
+#define __UK_SYSCALL_EXECENV_PRINTD(x, rtype, fname, ...)                      \
        uk_printd("\nInvoking context saving %s system call.\n",        \
                  STRINGIFY(fname));                                    \
        _uk_printd(uk_libid_self(), __STR_BASENAME__, __LINE__,         \
                   "(" STRINGIFY(rtype) ") " STRINGIFY(fname)           \
-                  "( usc 0x%lx, " UK_ARG_FMT_MAPx(x, UK_S_ARG_FMT_LONGX, __VA_ARGS__) ")\n", \
-                  UK_USC_EMAPx(x, UK_S_ARG_CAST_LONG, __VA_ARGS__) )
+                  "( execenv 0x%lx, " UK_ARG_FMT_MAPx(x,               \
+                                                      UK_S_ARG_FMT_LONGX,\
+                                                      __VA_ARGS__) ")\n",\
+                  UK_EXECENV_EMAPx(x, UK_S_ARG_CAST_LONG, __VA_ARGS__))
 #else
 #define __UK_SYSCALL_PRINTD(...) do {} while(0)
-#define __UK_SYSCALL_USC_PRINTD(...) do {} while(0)
+#define __UK_SYSCALL_EXECENV_PRINTD(...) do {} while(0)
 #endif /* CONFIG_LIBSYSCALL_SHIM_DEBUG || CONFIG_LIBUKDEBUG_PRINTD */
 
 /* System call implementation that uses errno and returns -1 on errors */
@@ -389,54 +364,54 @@ typedef long uk_syscall_arg_t;
                               __UK_NAME2SCALLR_FN(name),               \
                               __VA_ARGS__)
 
-#define __UK_LLSYSCALL_R_U_DEFINE(x, rtype, name, ename, rname, ...)\
-       long rname(long _usc);                                          \
-       long __used ename(long _usc)                                            \
+#define __UK_LLSYSCALL_R_E_DEFINE(x, rtype, name, ename, rname, ...)   \
+       long rname(long _execenv);                                      \
+       long __used ename(long _execenv)                                \
        {                                                               \
                long ret;                                               \
                                                                        \
-               ret = rname(_usc);                                      \
+               ret = rname(_execenv);                                  \
                if (ret < 0 && PTRISERR(ret)) {                         \
                        errno = -(int) PTR2ERR(ret);                    \
                        return -1;                                      \
                }                                                       \
                return ret;                                             \
        }                                                               \
-       static inline rtype __##rname(UK_USC_DECLMAPx(UK_S_USC_ARG_ACTUAL,\
+       static inline rtype __##rname(UK_EXECENV_DECLMAPx(UK_S_EXECENV_ARG_ACTUAL,\
                                                      x, UK_S_ARG_ACTUAL,\
                                                      __VA_ARGS__));    \
-       long __used rname(long _usc)                                    \
+       long __used rname(long _execenv)                                        \
        {                                                               \
-               struct uk_syscall_ctx *usc;                             \
+               struct ukarch_execenv *execenv;                         \
                long ret;                                               \
                                                                        \
-               usc = (struct uk_syscall_ctx *)_usc;                    \
-               __UK_SYSCALL_USC_PRINTD(x, rtype, rname,                \
+               execenv = (struct ukarch_execenv *)_execenv;            \
+               __UK_SYSCALL_EXECENV_PRINTD(x, rtype, rname,            \
                                        __VA_ARGS__);                   \
-               ret = (long) __##rname(UK_USC_CALLMAPx(x,               \
+               ret = (long) __##rname(UK_EXECENV_CALLMAPx(x,           \
                                                   UK_S_ARG_ACTUAL,     \
                                                   __VA_ARGS__));       \
                return ret;                                             \
        }                                                               \
-       static inline rtype __used __##rname(UK_USC_DECLMAPx(           \
-                                            UK_S_USC_ARG_ACTUAL_MAYBE_UNUSED,\
+       static inline rtype __used __##rname(UK_EXECENV_DECLMAPx(               \
+                                            UK_S_EXECENV_ARG_ACTUAL_MAYBE_UNUSED,\
                                             x, UK_S_ARG_ACTUAL_MAYBE_UNUSED,\
                                             __VA_ARGS__))
-#define _UK_LLSYSCALL_R_U_DEFINE(...) __UK_LLSYSCALL_R_U_DEFINE(__VA_ARGS__)
-#define UK_LLSYSCALL_R_U_DEFINE(rtype, name, ...)                      \
-       UK_SYSCALL_USC_PROLOGUE_DEFINE(__UK_NAME2SCALLE_FN(name),       \
-                                      __UK_NAME2SCALLE_FN(u_##name),   \
+#define _UK_LLSYSCALL_R_E_DEFINE(...) __UK_LLSYSCALL_R_E_DEFINE(__VA_ARGS__)
+#define UK_LLSYSCALL_R_E_DEFINE(rtype, name, ...)                      \
+       UK_SYSCALL_EXECENV_PROLOGUE_DEFINE(__UK_NAME2SCALLE_FN(name),   \
+                                      __UK_NAME2SCALLE_FN(e_##name),   \
                                       UK_NARGS(__VA_ARGS__),           \
                                       __VA_ARGS__)                     \
-       UK_SYSCALL_USC_PROLOGUE_DEFINE(__UK_NAME2SCALLR_FN(name),       \
-                                      __UK_NAME2SCALLR_FN(u_##name),   \
+       UK_SYSCALL_EXECENV_PROLOGUE_DEFINE(__UK_NAME2SCALLR_FN(name),   \
+                                      __UK_NAME2SCALLR_FN(e_##name),   \
                                       UK_NARGS(__VA_ARGS__),           \
                                       __VA_ARGS__)                     \
-       _UK_LLSYSCALL_R_U_DEFINE(UK_NARGS(__VA_ARGS__),                 \
+       _UK_LLSYSCALL_R_E_DEFINE(UK_NARGS(__VA_ARGS__),                 \
                                 rtype,                                 \
                                 name,                                  \
-                                __UK_NAME2SCALLE_FN(u_##name),         \
-                                __UK_NAME2SCALLR_FN(u_##name),         \
+                                __UK_NAME2SCALLE_FN(e_##name),         \
+                                __UK_NAME2SCALLR_FN(e_##name),         \
                                 __VA_ARGS__)
 
 /*
@@ -547,7 +522,7 @@ long uk_syscall6(long nr, long arg1, long arg2, long arg3,
 /* Raw system call, returns negative codes on errors */
 long uk_syscall_r(long nr, ...);
 long uk_vsyscall_r(long nr, va_list arg);
-long uk_syscall6_r_u(struct uk_syscall_ctx *usc);
+long uk_syscall6_r_e(struct ukarch_execenv *execenv);
 long uk_syscall6_r(long nr, long arg1, long arg2, long arg3,
                   long arg4, long arg5, long arg6);
 
index bbc6ca5f95fa027edf82b7666e3dba989b40c077..9bb0ea4e3434d4983e093cdcae8de389e79a955e 100644 (file)
@@ -5,7 +5,7 @@ BEGIN {
        print "\n#include <uk/bits/syscall_nrs.h>"
 }
 
-/[a-zA-Z0-9]+-[0-9]+u?/{
+/[a-zA-Z0-9]+-[0-9]+e?/{
        # check if the syscall is not defined
        printf "\n#ifndef SYS_%s\n", $1;
        # if a LEGACY_<syscall_name> symbol is defined, the syscall is not required
@@ -20,10 +20,10 @@ BEGIN {
        printf "#error Failed to map system call '%s': No system call number available\n", $1
        printf "#endif  /* LEGACY_SYS_%s */\n", $1
        printf "#else\n";
-       if (substr($0, length($0)) == "u") {
-               printf "#define HAVE_uk_syscall_u_%s t\n", $1;
-               printf "UK_SYSCALL_E_PROTO(1, u_%s);\n", $1;
-               printf "UK_SYSCALL_R_PROTO(1, u_%s);\n", $1;
+       if (substr($0, length($0)) == "e") {
+               printf "#define HAVE_uk_syscall_e_%s t\n", $1;
+               printf "UK_SYSCALL_E_PROTO(1, e_%s);\n", $1;
+               printf "UK_SYSCALL_R_PROTO(1, e_%s);\n", $1;
                printf "#define HAVE_uk_syscall_%s t\n", $1;
                printf "UK_SYSCALL_E_PROTO(%s, %s);\n", $2 + 0, $1;
                printf "UK_SYSCALL_R_PROTO(%s, %s);\n", $2 + 0, $1;
index feaae35c1e8d40c338bda4f8a9293dde908267cd..518bde8b5cc28880f68ad6d4d1e0c2fe5d7791b5 100644 (file)
@@ -3,15 +3,15 @@ BEGIN {
        print "/* Auto generated file. DO NOT EDIT */\n\n"
 }
 
-/[a-zA-Z0-9]+-[0-9]+u?/ {
+/[a-zA-Z0-9]+-[0-9]+e?/ {
        name = $1
        args_nr = $2 + 0
 
        printf "\n/* SYS_%s: %d argument(s) */\n", name, args_nr;
        for (i = 0; i <= max_args; i++)
        {
-               if (substr($0, length($0)) == "u") {
-                       printf "#define uk_syscall_r%d_u_%s(", i, name;
+               if (substr($0, length($0)) == "e") {
+                       printf "#define uk_syscall_r%d_e_%s(", i, name;
                        for (j = 1; j <= i; j++)
                        {
                                if (j > 1)
@@ -20,7 +20,7 @@ BEGIN {
                        }
                        printf ") \\\n";
 
-                       printf "\tuk_syscall_r_u_%s(", name;
+                       printf "\tuk_syscall_r_e_%s(", name;
 
                        # hand-over given arguments
                        for (j = 1; j <= i && j <= args_nr; j++)
index a44793ba9eb9b48bb329d62d846c03b13c0c99f5..8ae5cf35db873d50318954bb72060450b7aec782 100644 (file)
@@ -4,9 +4,9 @@ BEGIN { print "/* Auto generated file. Do not edit */" }
        printf "\n#ifndef HAVE_uk_syscall_%s", name;
        printf "\n#define uk_syscall_e_%s(...) uk_syscall_e_stub(\"%s\")", name, name;
        printf "\n#define uk_syscall_r_%s(...) uk_syscall_r_stub(\"%s\")", name, name;
-       printf "\n#ifndef HAVE_uk_syscall_u_%s", name;
-       printf "\n#define uk_syscall_e_u_%s(...) uk_syscall_e_stub(\"%s\")", name, name;
-       printf "\n#define uk_syscall_r_u_%s(...) uk_syscall_r_stub(\"%s\")", name, name;
-       printf "\n#endif /* !HAVE_uk_syscall_u_%s */\n", name;
+       printf "\n#ifndef HAVE_uk_syscall_e_%s", name;
+       printf "\n#define uk_syscall_e_e_%s(...) uk_syscall_e_stub(\"%s\")", name, name;
+       printf "\n#define uk_syscall_r_e_%s(...) uk_syscall_r_stub(\"%s\")", name, name;
+       printf "\n#endif /* !HAVE_uk_syscall_e_%s */\n", name;
        printf "\n#endif /* !HAVE_uk_syscall_%s */\n", name;
 }
index 116a82c6cbc3a7d60e6a41654fdc6787bdf11deb..e969d5582a3e1604c99ca113cf5a903ce325f7cd 100644 (file)
@@ -6,39 +6,39 @@ BEGIN {
        print "#include <uk/syscall.h>"
        print "#include \"arch/regmap_linuxabi.h\"\n"
 
-       print "UK_SYSCALL_USC_PROLOGUE_DEFINE(uk_syscall6_r, uk_syscall6_r_u,"
+       print "UK_SYSCALL_EXECENV_PROLOGUE_DEFINE(uk_syscall6_r, uk_syscall6_r_e,"
        print "\t\t\t\t14, long, nr, long, arg1, long, arg2, long, arg3, long, arg4, long, arg5, long, arg6)";
-       print "\nlong __used uk_syscall6_r_u(struct uk_syscall_ctx *usc)"
+       print "\nlong __used uk_syscall6_r_e(struct ukarch_execenv *execenv)"
        print "{"
        print "\tlong ret;"
-       print "\n\tswitch (usc->regs.rsyscall) {"
+       print "\n\tswitch (execenv->regs.__syscall_rsyscall) {"
 }
 
 /[a-zA-Z0-9]+-[0-9]+/{
        name = $1
        sys_name = "SYS_" name
        uk_syscall_r = "uk_syscall_r_" name
-       uk_syscall_r_u = "uk_syscall_r_u_" name
+       uk_syscall_r_e = "uk_syscall_r_e_" name
        args_nr = $2 + 0
        printf "\n#ifdef HAVE_uk_syscall_%s\n", name;
        printf "\tcase %s:\n", sys_name;
-       printf "\n#ifdef HAVE_uk_syscall_u_%s\n", name;
-       printf "\t\tret = %s((long)usc);\n", uk_syscall_r_u;
-       printf "#else /* !HAVE_uk_syscall_u_%s */\n", name;
+       printf "\n#ifdef HAVE_uk_syscall_e_%s\n", name;
+       printf "\t\tret = %s((long)execenv);\n", uk_syscall_r_e;
+       printf "#else /* !HAVE_uk_syscall_e_%s */\n", name;
        printf "\t\tret = %s(\n\t\t\t\t\t", uk_syscall_r;
        for (i = 0; i < args_nr - 1; i++)
-               printf("usc->regs.rarg%d, ", i)
+               printf("execenv->regs.__syscall_rarg%d, ", i)
        if (args_nr > 0)
-               printf("usc->regs.rarg%d", args_nr - 1)
+               printf("execenv->regs.__syscall_rarg%d", args_nr - 1)
        printf(");\n")
-       printf "\n#endif /* !HAVE_uk_syscall_u_%s */\n\n", name;
+       printf "\n#endif /* !HAVE_uk_syscall_e_%s */\n\n", name;
        printf "\t\tbreak;\n"
        printf "\n#endif /* HAVE_uk_syscall_%s */\n", name;
 }
 
 END {
        printf "\tdefault:\n"
-       printf "\t\tuk_pr_debug(\"syscall \\\"%%s\\\" is not available\\n\", uk_syscall_name(usc->regs.rsyscall));\n"
+       printf "\t\tuk_pr_debug(\"syscall \\\"%%s\\\" is not available\\n\", uk_syscall_name(execenv->regs.__syscall_rsyscall));\n"
        printf "\t\tret = -ENOSYS;\n"
        printf "\t}\n"
        printf "\treturn ret;\n"
index 62133622963df18e3fefc69510f1074d4cc9502e..8685b1d10e61c269086f772a5cc6f23af42f65b3 100644 (file)
@@ -3,9 +3,11 @@
  * Binary system call handler (Linux ABI)
  *
  * Authors: Simon Kuenzer <simon.kuenzer@neclab.eu>
+ *          Sergiu Moga <sergiu@unikraft.io>
  *
  * Copyright (c) 2020, NEC Laboratories Europe GmbH, NEC Corporation.
  *                     All rights reserved.
+ * Copyright (c) 2024, Unikraft GmbH. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
 #include <uk/arch/ctx.h>
 #include <uk/assert.h>
 #include <uk/essentials.h>
+#include <uk/thread.h>
 #include "arch/regmap_linuxabi.h"
 #if CONFIG_LIBSYSCALL_SHIM_STRACE
 #include <uk/plat/console.h> /* ukplat_coutk */
 #endif /* CONFIG_LIBSYSCALL_SHIM_STRACE */
 
+/**
+ * This is a convenience structure. The earlier architecture specific system
+ * call entry call chain ensures that the execenv's of the caller is fully
+ * stored, **BUT** it does not touch the current values so the C entry here
+ * receiving this structure should ensure that the system registers are those
+ * of Unikraft and not of the application.
+ * This structure also contains `auxsp` because the previous caller needed
+ * to know this value anyway in order to switch stacks so we can get that
+ * value from here instead of re-fetching it from a system register. This
+ * would have otherwise been costly, e.g. for x86 it could have meant a
+ * wrmsr/rdgsbase which is significantly slower than a memory read so avoid
+ * doing it again and choose the ugly design of adding this additional `auxsp`
+ * field in favor of less CPU cycles wasted on the syscall hotpath.
+ */
+struct uk_syscall_ctx {
+       struct ukarch_execenv execenv;
+       __uptr auxsp;
+};
+
 void ukplat_syscall_handler(struct uk_syscall_ctx *usc)
 {
 #if CONFIG_LIBSYSCALL_SHIM_STRACE
@@ -53,24 +75,45 @@ void ukplat_syscall_handler(struct uk_syscall_ctx *usc)
 #endif /* !CONFIG_LIBSYSCALL_SHIM_STRACE_ANSI_COLOR */
        int prsyscalllen;
 #endif /* CONFIG_LIBSYSCALL_SHIM_STRACE */
+       struct ukarch_auxspcb *auxspcb;
+       struct ukarch_execenv *execenv;
+#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
+       struct uk_thread *t;
+#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */
 
        UK_ASSERT(usc);
 
-       /* Save extended register state */
-       ukarch_ectx_sanitize((struct ukarch_ectx *)&usc->ectx);
-       ukarch_ectx_store((struct ukarch_ectx *)&usc->ectx);
+       execenv = &usc->execenv;
+       UK_ASSERT(execenv);
 
-       ukarch_sysregs_switch_uk(&usc->sysregs);
+       /**
+        * The earlier architecture specific system call entry call chain
+        * ensures that the execenv's of the caller is fully stored, **BUT** it
+        * does not touch the current values so the C entry here receiving this
+        * uk_syscall_ctx structure should ensure that the **ACTIVE** system
+        * registers are those of Unikraft and not of the application.
+        */
+       auxspcb = ukarch_auxsp_get_cb(usc->auxsp);
+       ukarch_sysctx_load(&auxspcb->uksysctx);
+
+#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
+       t = uk_thread_current();
+       UK_ASSERT(t);
+       t->tlsp = t->uktlsp;
+       UK_ASSERT(t->uktlsp == ukarch_auxspcb_get_uktlsp(auxspcb));
+#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */
 
 #if CONFIG_LIBSYSCALL_SHIM_DEBUG_HANDLER
        _uk_printd(uk_libid_self(), __STR_BASENAME__, __LINE__,
                        "Binary system call request \"%s\" (%lu) at ip:%p (arg0=0x%lx, arg1=0x%lx, ...)\n",
-                   uk_syscall_name(usc->regs.rsyscall), usc->regs.rsyscall,
-                   (void *)usc->regs.rip, usc->regs.rarg0,
-                   usc->regs.rarg1);
+                   uk_syscall_name(execenv->regs.__syscall_rsyscall),
+                   execenv->regs.__syscall_rsyscall,
+                   (void *)execenv->regs.__syscall_rip,
+                   execenv->regs.__syscall_rarg0,
+                   execenv->regs.__syscall_rarg1);
 #endif /* CONFIG_LIBSYSCALL_SHIM_DEBUG_HANDLER */
 
-       usc->regs.rret0 = uk_syscall6_r_u(usc);
+       execenv->regs.__syscall_rret0 = uk_syscall6_r_e(execenv);
 
 #if CONFIG_LIBSYSCALL_SHIM_STRACE
        prsyscalllen = uk_snprsyscall(prsyscallbuf, ARRAY_SIZE(prsyscallbuf),
@@ -79,9 +122,14 @@ void ukplat_syscall_handler(struct uk_syscall_ctx *usc)
 #else /* !CONFIG_LIBSYSCALL_SHIM_STRACE_ANSI_COLOR */
                     UK_PRSYSCALL_FMTF_NEWLINE,
 #endif /* !CONFIG_LIBSYSCALL_SHIM_STRACE_ANSI_COLOR */
-                    usc->regs.rsyscall, usc->regs.rret0, usc->regs.rarg0,
-                    usc->regs.rarg1, usc->regs.rarg2, usc->regs.rarg3,
-                    usc->regs.rarg4, usc->regs.rarg5);
+                    execenv->regs.__syscall_rsyscall,
+                    execenv->regs.__syscall_rret0,
+                    execenv->regs.__syscall_rarg0,
+                    execenv->regs.__syscall_rarg1,
+                    execenv->regs.__syscall_rarg2,
+                    execenv->regs.__syscall_rarg3,
+                    execenv->regs.__syscall_rarg4,
+                    execenv->regs.__syscall_rarg5);
        /*
         * FIXME:
         * We directly use `ukplat_coutk()` until lib/ukdebug printing
@@ -90,8 +138,7 @@ void ukplat_syscall_handler(struct uk_syscall_ctx *usc)
        ukplat_coutk(prsyscallbuf, (__sz) prsyscalllen);
 #endif /* CONFIG_LIBSYSCALL_SHIM_STRACE */
 
-       ukarch_sysregs_switch_ul(&usc->sysregs);
-
-       /* Restore extended register state */
-       ukarch_ectx_load((struct ukarch_ectx *)&usc->ectx);
+#if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
+       t->tlsp = ukarch_sysctx_get_tlsp(&execenv->sysctx);
+#endif /* CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS */
 }
index b59ed1af1ccf85fca27b3173c2bd92fe3ca798ae..a1f8914c4de16872f687f4cb10279015377c1a18 100644 (file)
@@ -80,6 +80,7 @@
 #endif /* CONFIG_LIBUKBOOT_INITSCHEDCOOP */
 #include <uk/arch/lcpu.h>
 #include <uk/plat/bootstrap.h>
+#include <uk/plat/common/lcpu.h>
 #include <uk/plat/memory.h>
 #include <uk/plat/lcpu.h>
 #include <uk/plat/time.h>
@@ -267,7 +268,10 @@ void ukplat_entry(int argc, char *argv[])
        struct uk_alloc *a = NULL, *sa = NULL, *auxsa = NULL;
 #endif
 #if CONFIG_LIBUKBOOT_INITALLOC
+       struct ukarch_auxspcb *auxspcb;
+       __uptr auxsp, uktlsp;
        void *tls = NULL;
+       void *auxstack;
 #endif
 #if CONFIG_LIBUKSCHED
        struct uk_sched *s = NULL;
@@ -277,7 +281,6 @@ void ukplat_entry(int argc, char *argv[])
 #endif /* CONFIG_LIBUKBOOT_MAINTHREAD */
        uk_ctor_func_t *ctorfn;
        struct uk_inittab_entry *init_entry;
-       void *auxstack;
 
 #if CONFIG_LIBUKBOOT_MAINTHREAD
        /* Initialize shutdown control structure */
@@ -361,15 +364,22 @@ void ukplat_entry(int argc, char *argv[])
        /* Copy from TLS master template */
        ukarch_tls_area_init(tls);
        /* Activate TLS */
-       ukplat_tlsp_set(ukarch_tls_tlsp(tls));
+       uktlsp = ukarch_tls_tlsp(tls);
+       ukplat_tlsp_set(uktlsp);
 
        /* Allocate auxiliary stack for this execution context */
        auxstack = uk_memalign(auxsa,
                               UKARCH_AUXSP_ALIGN, AUXSTACK_SIZE);
        if (unlikely(!auxstack))
                UK_CRASH("Failed to allocate the auxiliary stack\n");
+
        /* Activate auxiliary stack */
-       ukplat_lcpu_set_auxsp(ukarch_gen_sp(auxstack, AUXSTACK_SIZE));
+       auxsp = ukarch_gen_sp(auxstack, AUXSTACK_SIZE);
+       ukarch_auxsp_init(auxsp);
+       auxspcb = ukarch_auxsp_get_cb(auxsp);
+       UK_ASSERT(auxspcb);
+       ukarch_auxspcb_set_uktlsp(auxspcb, uktlsp);
+       ukplat_lcpu_set_auxsp(auxsp);
 #endif /* CONFIG_LIBUKBOOT_INITALLOC */
 
 #if CONFIG_LIBUKINTCTLR
index f5bd8e7800a55d7a79be19957c320e058661768d..8b1614168cec89510bf3aa1ea2d6e5a140c0b8a6 100644 (file)
@@ -237,6 +237,8 @@ static void _uk_thread_struct_init(struct uk_thread *t,
                                   void *priv,
                                   uk_thread_dtor_t dtor)
 {
+       struct ukarch_auxspcb *auxspcb;
+
        /* TLS pointer required if is_uktls is set */
        UK_ASSERT(!is_uktls || tlsp);
 
@@ -252,6 +254,7 @@ static void _uk_thread_struct_init(struct uk_thread *t,
        if (auxsp) {
                t->flags |= UK_THREADF_AUXSP;
                t->auxsp = auxsp;
+               ukarch_auxsp_init(auxsp);
        }
        if (tlsp && is_uktls) {
                t->flags |= UK_THREADF_UKTLS;
@@ -262,6 +265,12 @@ static void _uk_thread_struct_init(struct uk_thread *t,
                t->flags |= UK_THREADF_ECTX;
        }
 
+       if (t->flags & UK_THREADF_UKTLS && t->flags & UK_THREADF_AUXSP) {
+               auxspcb = ukarch_auxsp_get_cb(auxsp);
+               UK_ASSERT(auxspcb);
+               ukarch_sysctx_set_tlsp(&auxspcb->uksysctx, t->uktlsp);
+       }
+
        uk_pr_debug("uk_thread %p (%s): ctx:%p, ectx:%p, tlsp:%p\n",
                    t, t->name ? t->name : "<unnamed>",
                    &t->ctx, t->ectx, (void *) t->tlsp);
index e05bf63a039f909c4135137c30d85b4ccd15ff39..01b70fa90bd508db1dac541f35463be1b3919f1e 100644 (file)
 
 #include <uk/arch/lcpu.h>
 #include <uk/arch/types.h>
+#include <uk/arch/ctx.h>
 #include <arm/cpu.h>
 #include <arm/traps.h>
 #include <uk/print.h>
 #include <uk/assert.h>
 #include <uk/intctlr/gic.h>
-#include <uk/syscall.h>
+#include <uk/plat/syscall.h>
 
 #ifdef CONFIG_ARM64_FEAT_MTE
 #include <arm/arm64/mte.h>
@@ -225,13 +226,28 @@ void trap_el1_irq(struct __regs *regs)
 
 #ifdef CONFIG_LIBSYSCALL_SHIM_HANDLER
 
-extern void ukplat_syscall_handler(struct uk_syscall_ctx *usr);
+extern void ukplat_syscall_handler(struct uk_syscall_ctx *usc);
 
 static int arm64_syscall_adapter(void *data)
 {
        struct ukarch_trap_ctx *ctx = (struct ukarch_trap_ctx *)data;
+       struct ukarch_execenv *execenv = (struct ukarch_execenv *)ctx->regs;
+
+       /* Save extended register state */
+       ukarch_ectx_sanitize((struct ukarch_ectx *)&execenv->ectx);
+       ukarch_ectx_store((struct ukarch_ectx *)&execenv->ectx);
+
+       /* Save system context state */
+       ukarch_sysctx_store(&execenv->sysctx);
+
+       ukplat_syscall_handler((struct uk_syscall_ctx *)execenv);
+
+       /* Restore system context state */
+       ukarch_sysctx_load(&execenv->sysctx);
+
+       /* Restore extended register state */
+       ukarch_ectx_load((struct ukarch_ectx *)&execenv->ectx);
 
-       ukplat_syscall_handler((struct uk_syscall_ctx *)ctx->regs);
        return 1; /* Success */
 }
 
index d78ae2d6d5b89d9482f6421b50117a51ae3706c6..47a7c4815b41b7375d47fccfa668c5fdc8e204cc 100644 (file)
@@ -39,6 +39,7 @@
 #define __PLAT_CMN_LCPU_H__
 
 #include <uk/config.h>
+#include <uk/plat/config.h>
 #ifndef __ASSEMBLY__
 #include <uk/essentials.h>
 #include <uk/arch/types.h>
@@ -62,10 +63,10 @@ struct lcpu_arch { };
 #endif /* !__ASSEMBLY__ */
 #endif /* !LCPU_ARCH_SIZE */
 
-#define IS_LCPU_PTR(ptr)                                               \
-       (IN_RANGE((__uptr)(ptr),                                        \
-                 (__uptr)lcpu_get(0),                                  \
-                 (__uptr)CONFIG_UKPLAT_LCPU_MAXCOUNT *                 \
+#define IS_LCPU_PTR(ptr)                                               \
+       (IN_RANGE((__uptr)(ptr),                                        \
+                 (__uptr)lcpu_get(0),                                  \
+                 (__uptr)CONFIG_UKPLAT_LCPU_MAXCOUNT *                 \
                  sizeof(struct lcpu)))
 
 /*
index a2c006916d00f347700671446748242741ce36c0..05de538623658a51f39609131f88cd471657350a 100644 (file)
@@ -196,20 +196,6 @@ void ukplat_lcpu_halt_irq_until(__nsec until)
        time_block_until(until);
 }
 
-__uptr ukplat_lcpu_get_auxsp(void)
-{
-       UK_ASSERT(IS_LCPU_PTR(lcpu_get_current()));
-
-       return lcpu_get_current()->auxsp;
-}
-
-void ukplat_lcpu_set_auxsp(__uptr auxsp)
-{
-       UK_ASSERT(IS_LCPU_PTR(lcpu_get_current()));
-
-       lcpu_get_current()->auxsp = auxsp;
-}
-
 #ifdef CONFIG_HAVE_SMP
 __lcpuid ukplat_lcpu_id(void)
 {
index 3e636d8745aabff697cd9e3710e416b90f015273..72ee4f34b28e243eed9bcb898d916671db9988a2 100644 (file)
@@ -34,6 +34,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <uk/arch/ctx.h>
 #include <uk/config.h>
 #include <uk/assert.h>
 #include <uk/print.h>
index e24a33dd39611c0885246b0a04d032b915abd6be..6621ab6f1f492b54ca4e1d32b5102a849bd8dbd5 100644 (file)
@@ -1,9 +1,11 @@
 /* SPDX-License-Identifier: BSD-3-Clause */
 /*
  * Authors: Simon Kuenzer <simon.kuenzer@neclab.eu>
+ *          Sergiu Moga <sergiu@unikraft.io>
  *
  * Copyright (c) 2019, NEC Laboratories Europe GmbH, NEC Corporation.
  *                     All rights reserved.
+ * Copyright (c) 2024, Unikraft GmbH. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -36,7 +38,7 @@
 #include <uk/asm.h>
 #include <uk/asm/cfi.h>
 #include <uk/plat/common/lcpu.h>
-#include <uk/syscall.h>
+#include <uk/arch/ctx.h>
 
 ENTRY(_ukplat_syscall)
        .cfi_startproc simple
@@ -65,11 +67,96 @@ ENTRY(_ukplat_syscall)
         * in the Red Zone.
         */
        movq    %gs:LCPU_AUXSP_OFFSET, %rsp
+
        /* Describing the rsp relative to GS would make it necessary to emit
         * raw CFI. Instead of doing so, mark rsp as undefined temporarily
         */
        .cfi_undefined rsp
 
+       subq    $(UKARCH_AUXSPCB_SIZE), %rsp
+       movq    UKARCH_AUXSPCB_OFFSETOF_CURR_FP(%rsp), %rsp
+
+       /* We subtract 8 bytes less here so that we have room for the pushed
+        * auxsp.
+        * Afterwards, the current stack pointer is pointing to the current
+        * frame within the auxiliary stack. Subtract UKARCH_EXECENV_END_ALIGN
+        * to make room for the 8-byte auxsp pointer, since the layout is
+        * struct uk_syscall_ctx {
+        *      struct ukarch_execenv execenv;
+        *      __uptr auxsp;  <-- make room for these
+        *     ..... space left unused because of alignment subtraction ....
+        * };
+        * We cannot just simply push or subtract 8 bytes because we break
+        * the alignment required by EXECENV, so we must subtract more.
+        * This leads to some wasted bytes but it's fine because they are not
+        * permanent and it is mandatory that we maintain alignment. This
+        * is an optimization so that we do not have to fetch `auxsp` in the
+        * syscall C entry as well (which usually involves reading some
+        * system register). The final stack layout for entering the syscall
+        * C handler should look like the following:
+        *
+        *               lcpu->auxsp (AUXSP aligned)
+        *                  +-------------+ ^
+        *                  |             | | EXECENV_END_ALIGN
+        *                ^ |<----------->| |             ^
+        *         8 bytes| |pushed auxsp | |             |
+        *                v |-------------| v             |
+        *                ^ |  struct     | ^             |
+        *                | |  __regs     | |__REGS_SIZEOF|
+        *                | |-------------| v             |
+        *                | |  struct     | ^             |uk_syscall_ctx
+        *      struct    | |ukarch_sysctx| |SYSCTX_SIZE  |
+        *  ukarch_execenv| |-------------| v             |
+        *                | |             | ^             |
+        *                | |  struct     | |             |
+        *                | | ukarch_ectx | |ECTX_SIZE    |
+        *                | |             | |             |
+        *                | |             | |             |
+        *                | |             | |             |
+        *                v +-------------+ v             v
+        *                       stack
+        *                       pointer
+        *
+        * Where ukarch_sysctx/ukarch_ectx/__regs is filled in the following,
+        * after making room for it.
+        */
+
+       /* Create the room */
+       /* The subtraction we need to make so that we can push the 8-byte auxsp.
+        * This is an inconvenient operation as it ends up wasting a few bytes,
+        * the subtraction being bigger than 8 bytes to comply with required
+        * alignment. But it is a trade-off worth doing to avoid having to
+        * read the GS_BASE MSR again in the syscall entry, by storing what
+        * we need now, while we are still swapgs'd.
+        *
+        * NOTE: It is 8 byte less than the actual required subtraction. This is
+        * so we can right afterwards push the auxsp's value, thus subtracting
+        * 8 bytes yet again from the stack pointer.
+        */
+       subq    $(UKARCH_EXECENV_END_ALIGN - 8), %rsp
+       .cfi_adjust_cfa_offset (UKARCH_EXECENV_END_ALIGN - 8)
+
+       /* Push out auxsp for faster access later. */
+       pushq   %gs:LCPU_AUXSP_OFFSET
+       .cfi_adjust_cfa_offset 8
+
+       /**
+        * We are done getting what we needed from KERNEL_GS_BASE, swap back.
+        * We do this immediately so that we avoid confusions like:
+        * - I am in an exception handler, was I in a syscall or application
+        * code when it happened? What is the value of KERNEL_GS_BASE/GS_BASE
+        * - I am returning from clone(), should I do swapgs? Was this an
+        * internal clone() call?
+        * ...
+        *
+        * By doing this we will always know:
+        * - Were we interrupted/trapped from Unikraft/syscall code? Then
+        * GS_BASE == lcpu
+        * - Were we interrupted/trapped from application code? Then we don't
+        * know GS_BASE so we need to get lcpu from KERNEL_GS_BASE.
+        */
+       swapgs
+
        /* NOTE: We should normally align the stack before doing this
         * subtraction because we must ensure that the `ectx` field
         * is aligned to the corresponding ECTX alignment.
@@ -77,42 +164,28 @@ ENTRY(_ukplat_syscall)
         * auxiliary stack because it is allocated with this exact alignment
         * in mind.
         */
-       subq    $(UK_SYSCALL_CTX_SIZE - __REGS_SIZEOF), %rsp
-       .cfi_adjust_cfa_offset (UK_SYSCALL_CTX_SIZE - __REGS_SIZEOF)
+       subq    $(UKARCH_EXECENV_SIZE - __REGS_SIZEOF), %rsp
+       .cfi_adjust_cfa_offset (UKARCH_EXECENV_SIZE - __REGS_SIZEOF)
 
        pushq_cfi       $(GDT_DESC_OFFSET(GDT_DESC_DATA))
 
-       /* Store application's stack pointer at the top of current thread's
-        * auxiliary stack. We have to do this because we obviously can't
-        * rely on the scratch register being maintained between thread switches
-        */
        pushq_reg_cfi r11
        .cfi_rel_offset rsp, 0
 
-       /* We are now in a state where the stack looks like this:
-        *      --------------- <-- auxsp (i.e. lcpu_get_current()->auxsp,
-        *      | app's saved |            i.e. uk_thread_current()->auxsp)
-        *      |     %ss     |
-        *      ---------------
-        *      | app's saved |
-        *      |     %rsp    |
-        *      --------------- <-- (auxsp - 16) i.e. (**current %rsp**)
-        *      |             |
-        *      |             |
-        *            ...
-        *      |             |
-        *      --------------- <-- (auxsp - AUXSTACK_SIZE)
-        *        END OF AUXSP
-        */
-
        /*
         * Push arguments in the order of 'struct __regs' to the stack.
-        * We are going to handover a refernce to this stack area as
+        * We are going to handover a reference to this stack area as
         * `struct __regs *` argument to the system call handler.
         */
        /* We now have %ss and %rsp on the frame, finish classic trap frame */
+       /* Push EFLAGS register. Additionally, since we pushed it with IRQs
+        * disabled, it won't have the corresponding bit flag set, making it
+        * look like the caller of the syscall had IRQs off, which no sane
+        * application would do, therefore manually set the flag.
+        */
        pushfq                  /* eflags */
        .cfi_adjust_cfa_offset 8
+       orq     $X86_EFLAGS_IF, 0(%rsp)
 
        pushq_cfi       $(GDT_DESC_OFFSET(GDT_DESC_CODE))       /* cs */
        pushq_reg_cfi rcx       /* rcx contains the next rip on syscall exit */
@@ -143,11 +216,33 @@ ENTRY(_ukplat_syscall)
        /*
         * Handle call
         * NOTE: Handler function is going to modify saved registers state
-        * NOTE: Stack pointer as "struct __regs *" argument
+        * NOTE: Stack pointer as "struct uk_syscall_ctx *" argument
         *       (calling convention: 1st arg on %rdi)
         */
        movq %rsp, %rdi
 
+       /**
+        * Store execenv's stored ECTX which resides at offset:
+        * sizeof(struct __regs) + sizeof(struct ukarch_sysctx) from beginning
+        * of execenv.
+        */
+       addq    $(__REGS_SIZEOF + UKARCH_SYSCTX_SIZE), %rdi
+       call    ukarch_ectx_store
+
+       /**
+        * After function calls, %rsp preserved value of execenv pointer so
+        * restore that into %rdi.
+        */
+       movq    %rsp, %rdi
+
+       /**
+        * Store execenv's system context which resides at offset:
+        * sizeof(struct __regs) from beginning of execenv.
+        */
+       addq    $(__REGS_SIZEOF), %rdi
+       call    ukarch_sysctx_store
+       movq    %rsp, %rdi
+
        /*
         * Make sure the stack is aligned to 16-bytes. We store the original
         * stack pointer in the frame pointer (callee saved)
@@ -163,6 +258,35 @@ ENTRY(_ukplat_syscall)
        .cfi_def_cfa_register rsp
 
        cli
+
+       /**
+        * Assign pointer to execution environment to load (first argument).
+        * We do this because it will be easy to keep track of it as, unlike
+        * %rdi, we do not have to store/restore %rsp across function calls.
+        */
+       movq    %rsp, %rdi
+
+       /**
+        * Load execenv's stored ECTX which resides at offset:
+        * sizeof(struct __regs) + sizeof(struct ukarch_sysctx) from beginning
+        * of execenv.
+        */
+       addq    $(__REGS_SIZEOF + UKARCH_SYSCTX_SIZE), %rdi
+       call    ukarch_ectx_load
+
+       /**
+        * As stated previously, after function calls, %rsp preserved value of
+        * execenv pointer so restore that into %rdi.
+        */
+       movq    %rsp, %rdi
+
+       /**
+        * Load execenv's stored system context which resides at offset:
+        * sizeof(struct __regs) from beginning of execenv.
+        */
+       addq    $(__REGS_SIZEOF), %rdi
+       call    ukarch_sysctx_load
+
        /* Load the updated state back to registers */
        addq $(__REGS_PAD_SIZE), %rsp
        .cfi_adjust_cfa_offset -__REGS_PAD_SIZE
@@ -187,9 +311,6 @@ ENTRY(_ukplat_syscall)
        .cfi_restore rsp
        .cfi_def_cfa rsp, 0
 
-       /* Restore application's gs_base register */
-       swapgs
-
        sti
 
        /*
index 388fa016f3927a5d3b0facf92451ab9a309380ef..9514e4005051748aa87f6dbd58766dc034b12519 100644 (file)
@@ -30,7 +30,7 @@
 #include <uk/asm.h>
 #include <uk/plat/common/lcpu.h>
 #include <uk/plat/config.h>
-#include <uk/syscall.h>
+#include <uk/arch/ctx.h>
 
 .macro EXCHANGE_SP_WITH_X0
        add sp, sp, x0  // new_sp = sp + x0
         */
        mrs     x0, tpidr_el1
        ldr     x0, [x0, #LCPU_AUXSP_OFFSET]
+       sub     x0, x0, #UKARCH_AUXSPCB_SIZE
+       ldr     x0, [x0, #UKARCH_AUXSPCB_OFFSETOF_CURR_FP]
+
+       /**
+        * Current stack pointer now points to the current frame pointer
+        * within the auxiliary stack. Subtract UKARCH_EXECENV_END_ALIGN
+        * to make room for the 8-byte auxsp pointer, since the layout is
+        * struct uk_syscall_ctx {
+        *      struct ukarch_execenv execenv;
+        *      __uptr auxsp;  <-- make room for these
+        *     ..... space left unused because of alignment subtraction ....
+        * };
+        * We cannot just simply push or subtract 8 bytes because we break
+        * the alignment required by EXECENV, so we must subtract more.
+        * This leads to some wasted bytes but it's fine because they are not
+        * permanent and it is mandatory that we maintain alignment. This
+        * is an optimization so that we do not have to fetch `auxsp` in the
+        * syscall C entry as well (which usually involves reading some
+        * system register). The final stack layout for entering the syscall
+        * C handler should look like the following:
+        *
+        *               lcpu->auxsp (AUXSP aligned)
+        *                  +-------------+ ^
+        *                  |             | | EXECENV_END_ALIGN
+        *                ^ |<----------->| |             ^
+        *         8 bytes| |pushed auxsp | |             |
+        *                v |-------------| v             |
+        *                ^ |  struct     | ^             |
+        *                | |  __regs     | |__REGS_SIZEOF|
+        *                | |-------------| v             |
+        *                | |  struct     | ^             |uk_syscall_ctx
+        *      struct    | |ukarch_sysctx| |SYSCTX_SIZE  |
+        *  ukarch_execenv| |-------------| v             |
+        *                | |             | ^             |
+        *                | |  struct     | |             |
+        *                | | ukarch_ectx | |ECTX_SIZE    |
+        *                | |             | |             |
+        *                | |             | |             |
+        *                | |             | |             |
+        *                v +-------------+ v             v
+        *                       stack
+        *                       pointer
+        *
+        * Where ukarch_sysctx/ukarch_ectx are to be filled in by the next C
+        * caller and __regs we fill in below.
+        */
+       sub     x0, x0, #UKARCH_EXECENV_END_ALIGN
+
        /* NOTE: We should normally align the stack before doing this
         * subtraction because we must ensure that the `ectx` field
         * is aligned to the corresponding ECTX alignment.
         * auxiliary stack because it is allocated with this exact alignment
         * in mind.
         */
-       sub     x0, x0, #UK_SYSCALL_CTX_SIZE
+       sub     x0, x0, #UKARCH_EXECENV_SIZE
 
        /* We now have in SP the trap stack and in x0 the auxiliary stack */
        EXCHANGE_SP_WITH_X0  /* Switch them */
+
        /* Restore old SP we stored before system call check */
        ldr     x0, [x0, #-16]
        str     x0, [sp, #__SP_OFFSET]  /* Store old SP in auxiliary stack */
+       mrs     x0, tpidr_el1
+       ldr     x0, [x0, #LCPU_AUXSP_OFFSET]
+       str     x0, [sp, #UKARCH_EXECENV_SIZE]
        b       1f
 0:
        sub     sp, sp, #__TRAP_STACK_SIZE
index a6dc262e69c74cbc85f974f81eed42b3ddb20f0b..b144ba959834e3d0214c7862b5e168f5875b9fc1 100644 (file)
@@ -79,3 +79,13 @@ void ukplat_lcpu_irqs_handle_pending(void)
 {
        // TODO
 }
+
+void ukplat_lcpu_set_auxsp(__uptr auxsp)
+{
+       lcpu_get_current()->auxsp = auxsp;
+}
+
+__uptr ukplat_lcpu_get_auxsp(void)
+{
+       return lcpu_get_current()->auxsp;
+}
index 4c41e72b437d0956ce37c8657dc65e0d5c180f7c..5a7167cdfefa33611ff8037ba9f6a78519337660 100644 (file)
@@ -32,7 +32,9 @@
  */
 
 #include <stdint.h>
+#include <uk/arch/ctx.h>
 #include <uk/assert.h>
+#include <uk/plat/common/lcpu.h>
 #include <uk/plat/lcpu.h>
 #include <x86/irq.h>
 
@@ -86,3 +88,21 @@ void ukplat_lcpu_irqs_handle_pending(void)
 {
 
 }
+
+void ukplat_lcpu_set_auxsp(__uptr auxsp)
+{
+       struct lcpu *lcpu = lcpu_get_current();
+       struct ukarch_auxspcb *auxspcb;
+
+       UK_ASSERT(IS_LCPU_PTR(rdgsbase()));
+
+       lcpu->auxsp = auxsp;
+       auxspcb = ukarch_auxsp_get_cb(auxsp);
+       ukarch_sysctx_set_gsbase(&auxspcb->uksysctx, (__uptr)lcpu);
+}
+
+__uptr ukplat_lcpu_get_auxsp(void)
+{
+       UK_ASSERT(IS_LCPU_PTR(lcpu_get_current()));
+       return lcpu_get_current()->auxsp;
+}
index 8bd7de1315c1cd7eb3ae8b53e36f4354d63de839..4a5c5dbfbe7d6d6368d164a4cac35c4e998612da 100644 (file)
@@ -81,3 +81,13 @@ int ukplat_lcpu_irqs_disabled(void)
 {
        return irqs_disabled();
 }
+
+void ukplat_lcpu_set_auxsp(__uptr auxsp)
+{
+       lcpu_get_current()->auxsp = auxsp;
+}
+
+__uptr ukplat_lcpu_get_auxsp(void)
+{
+       return lcpu_get_current()->auxsp;
+}
index 05ed937efae76f13511ebe2b288ba2212818bd26..fac1f8b7279aceeb49b69268d7ff69e056721bcb 100755 (executable)
@@ -7702,7 +7702,7 @@ sub process {
 # check for UK_(LL)SYSCALL_DEFINE(), raw implementation should be preferred
                if ($line =~ /\bUK_(LL)?SYSCALL_DEFINE\s*\(/) {
                        WARN("NON_RAW_SYSCALL",
-                            "Prefer using raw system call definitions: 'UK_SYSCALL_R_DEFINE', 'UK_LLSYSCALL_R_DEFINE', 'UK_LLSYSCALL_R_U_DEFINE'\n" . $herecurr);
+                            "Prefer using raw system call definitions: 'UK_SYSCALL_R_DEFINE', 'UK_LLSYSCALL_R_DEFINE', 'UK_LLSYSCALL_R_E_DEFINE'\n" . $herecurr);
                }
        }