]> xenbits.xensource.com Git - unikraft/unikraft.git/commitdiff
lib/syscall_shim/arch/x86_64: Save proper rsp in execenv prologue
authorSergiu Moga <sergiu@unikraft.io>
Wed, 19 Mar 2025 14:31:50 +0000 (16:31 +0200)
committerUnikraft Bot <monkey@unikraft.io>
Thu, 17 Apr 2025 12:33:46 +0000 (12:33 +0000)
In the execenv prologue meant for native builds we try to mimic the
context save/restore that would happen following a syscall instruction
but in the case of a direct call instruction. This means that the rsp
on entry is actually 8 bytes less than the rsp we are supposed to show
to actual users of this execenv. To cope with this, after pushing the
original rsp do an addition of 8 so that children (e.g. vfork) that may
inherit this context have the proper rsp. Lastly, because of this, upon
exiting the execenv assembly wrapper we must ensure that the context
whose execenv we store/restore is using the proper rsp as well by
undoing aforementioned addition, since it actually returns like a normal
function through ret.

This bug hasn't been caught before because we've only been using this
in the context of the clone syscall for native builds. Unlike vfork,
in the case of clone, the children typically begin execution with a
brand new stack instead of reusing and mimicking that of the parent.

Signed-off-by: Sergiu Moga <sergiu@unikraft.io>
Approved-by: Michalis Pappas <michalis@unikraft.io>
Reviewed-by: Michalis Pappas <michalis@unikraft.io>
Reviewed-by: Andrei Tatar <andrei@unikraft.io>
GitHub-Closes: #1618

lib/syscall_shim/arch/x86_64/include/arch/syscall_prologue.h

index 395fb66f26b5d9ee53fe5ff984dc45ad7f489c91..53e90da76941227150c58918ba38b1bd7988a08b 100644 (file)
                "cli\n\t"                                               \
                "/* Switch to the per-CPU auxiliary stack */\n\t"       \
                "/* AMD64 SysV ABI: r11 is scratch register */\n\t"     \
+               "/* Our stack top now contains a return address\n\t"    \
+               " * pushed by call; this must be ignored when\n\t"      \
+               " * saving the stack pointer to the interrupt\n\t"      \
+               " * return structure, but taken into account when\n\t"  \
+               " * we actually return execution\n\t"                   \
+               " */\n\t"                                               \
                "movq   %rsp, %r11\n\t"                                 \
                "movq   %gs:(" STRINGIFY(LCPU_AUXSP_OFFSET) "), %rsp\n\t"\
                "/* Auxiliary stack is already ECTX aligned */\n\t"     \
                " * [ 1: 0]: Requestor Privilege Level - ring 0\n\t"    \
                " */\n\t"                                               \
                "pushq  $(0x10)\n\t"                                    \
-               "/* Push saving original rsp stored in r11 */\n\t"      \
+               "/* Push saving original rsp - 8 stored in r11 */\n\t"  \
                "pushq  %r11\n\t"                                       \
+               "/* Above pushed rsp is actually the caller's\n\t"      \
+               " * rsp minus 8, because the call instruction\n\t"      \
+               " * pushes the address the ret instruction is\n\t"      \
+               " * supposed to return to. This means that to truly\n\t"\
+               " * mimic a trap/syscall we must store/restore\n\t"     \
+               " * the rsp we were given, plus 8.\n\t"                 \
+               " */\n\t"                                               \
+               "addq   $8, (%rsp)\n\t"                                 \
                "/* Push EFLAGS register. Additionally, since we\n\t"   \
                " * pushed it with IRQs disabled, it won't have\n\t"    \
                " * the corresponding bit flag set, making it look\n\t" \
                "popq   %rbx\n\t"                                       \
                "/* Restore rsp from where it was stored */\n\t"        \
                "movq   104(%rsp), %rsp\n\t"                            \
+               "/* Adjust saved stack to original value; ret\n\t"      \
+               " * expects return address on top of stack\n\t"         \
+               " */\n\t"                                               \
+               "subq   $8, %rsp\n\t"                                   \
                "ret\n\t"                                               \
        );