From: Sergiu Moga Date: Wed, 19 Mar 2025 14:31:50 +0000 (+0200) Subject: lib/syscall_shim/arch/x86_64: Save proper rsp in execenv prologue X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=75d2c304572fedbf97495439f6690704d20ea5fe;p=unikraft%2Funikraft.git lib/syscall_shim/arch/x86_64: Save proper rsp in execenv prologue In the execenv prologue meant for native builds we try to mimic the context save/restore that would happen following a syscall instruction but in the case of a direct call instruction. This means that the rsp on entry is actually 8 bytes less than the rsp we are supposed to show to actual users of this execenv. To cope with this, after pushing the original rsp do an addition of 8 so that children (e.g. vfork) that may inherit this context have the proper rsp. Lastly, because of this, upon exiting the execenv assembly wrapper we must ensure that the context whose execenv we store/restore is using the proper rsp as well by undoing aforementioned addition, since it actually returns like a normal function through ret. This bug hasn't been caught before because we've only been using this in the context of the clone syscall for native builds. Unlike vfork, in the case of clone, the children typically begin execution with a brand new stack instead of reusing and mimicking that of the parent. Signed-off-by: Sergiu Moga Approved-by: Michalis Pappas Reviewed-by: Michalis Pappas Reviewed-by: Andrei Tatar GitHub-Closes: #1618 --- diff --git a/lib/syscall_shim/arch/x86_64/include/arch/syscall_prologue.h b/lib/syscall_shim/arch/x86_64/include/arch/syscall_prologue.h index 395fb66f2..53e90da76 100644 --- a/lib/syscall_shim/arch/x86_64/include/arch/syscall_prologue.h +++ b/lib/syscall_shim/arch/x86_64/include/arch/syscall_prologue.h @@ -32,6 +32,12 @@ "cli\n\t" \ "/* Switch to the per-CPU auxiliary stack */\n\t" \ "/* AMD64 SysV ABI: r11 is scratch register */\n\t" \ + "/* Our stack top now contains a return address\n\t" \ + " * pushed by call; this must be ignored when\n\t" \ + " * saving the stack pointer to the interrupt\n\t" \ + " * return structure, but taken into account when\n\t" \ + " * we actually return execution\n\t" \ + " */\n\t" \ "movq %rsp, %r11\n\t" \ "movq %gs:(" STRINGIFY(LCPU_AUXSP_OFFSET) "), %rsp\n\t"\ "/* Auxiliary stack is already ECTX aligned */\n\t" \ @@ -47,8 +53,16 @@ " * [ 1: 0]: Requestor Privilege Level - ring 0\n\t" \ " */\n\t" \ "pushq $(0x10)\n\t" \ - "/* Push saving original rsp stored in r11 */\n\t" \ + "/* Push saving original rsp - 8 stored in r11 */\n\t" \ "pushq %r11\n\t" \ + "/* Above pushed rsp is actually the caller's\n\t" \ + " * rsp minus 8, because the call instruction\n\t" \ + " * pushes the address the ret instruction is\n\t" \ + " * supposed to return to. This means that to truly\n\t"\ + " * mimic a trap/syscall we must store/restore\n\t" \ + " * the rsp we were given, plus 8.\n\t" \ + " */\n\t" \ + "addq $8, (%rsp)\n\t" \ "/* Push EFLAGS register. Additionally, since we\n\t" \ " * pushed it with IRQs disabled, it won't have\n\t" \ " * the corresponding bit flag set, making it look\n\t" \ @@ -110,6 +124,10 @@ "popq %rbx\n\t" \ "/* Restore rsp from where it was stored */\n\t" \ "movq 104(%rsp), %rsp\n\t" \ + "/* Adjust saved stack to original value; ret\n\t" \ + " * expects return address on top of stack\n\t" \ + " */\n\t" \ + "subq $8, %rsp\n\t" \ "ret\n\t" \ );