]> xenbits.xensource.com Git - unikraft/unikraft.git/commitdiff
lib/posix-process: Add vfork()
authorMichalis Pappas <michalis@unikraft.io>
Mon, 15 Apr 2024 09:48:52 +0000 (11:48 +0200)
committerUnikraft Bot <monkey@unikraft.io>
Fri, 17 Jan 2025 14:59:21 +0000 (14:59 +0000)
The vfork() syscall is equivalent to calling clone() with the flags
parameter set to CLONE_VM | CLONE_VFORK | SIGCHLD. Update clone() to
support CLONE_VFORK and CLONE_VM. Implement vfork() as a wrapper of
clone().

For more info see vfork(2).

Signed-off-by: Michalis Pappas <michalis@unikraft.io>
Approved-by: Andrei Tatar <andrei@unikraft.io>
Reviewed-by: Sergiu Moga <sergiu@unikraft.io>
Reviewed-by: Andrei Tatar <andrei@unikraft.io>
GitHub-Closes: #1386

lib/posix-process/Config.uk
lib/posix-process/Makefile.uk
lib/posix-process/clone.c
lib/posix-process/exportsyms.uk
lib/posix-process/process.h
lib/posix-process/vfork.c [new file with mode: 0644]
lib/posix-process/wait.c

index 90f2f13c45267d8435d28c274dd42f38b6d45fc7..49ea8218ac0aca1c16f2d522374bfcbe4fda616e 100644 (file)
@@ -39,7 +39,13 @@ config LIBPOSIX_PROCESS_EXECVE
        select LIBPOSIX_PROCESS_PIDS
        select LIBUKBINFMT
 
+config LIBPOSIX_PROCESS_VFORK
+       bool "vfork() system call"
+       select LIBPOSIX_PROCESS_PIDS
+       select LIBPOSIX_PROCESS_CLONE
+
 config LIBPOSIX_PROCESS_DEBUG
        bool "Enable debug messages"
        default n
+
 endif
index bbb34b7393cb0ced73364c5b4c9be725a1698277..30821a1c8e598eb8805b36c5c407e8dca8a4ba36 100644 (file)
@@ -12,6 +12,8 @@ LIBPOSIX_PROCESS_CFLAGS-$(CONFIG_LIBPOSIX_PROCESS_DEBUG) += -DUK_DEBUG
 LIBPOSIX_PROCESS_SRCS-$(CONFIG_LIBPOSIX_PROCESS_EXECVE) += $(LIBPOSIX_PROCESS_BASE)/execve.c
 LIBPOSIX_PROCESS_SRCS-$(CONFIG_LIBPOSIX_PROCESS_EXECVE) += $(LIBPOSIX_PROCESS_BASE)/arch/$(CONFIG_UK_ARCH)/execve.c|arch
 
+LIBPOSIX_PROCESS_SRCS-$(CONFIG_LIBPOSIX_PROCESS_VFORK) += $(LIBPOSIX_PROCESS_BASE)/vfork.c
+
 LIBPOSIX_PROCESS_SRCS-y += $(LIBPOSIX_PROCESS_BASE)/deprecated.c
 LIBPOSIX_PROCESS_SRCS-y += $(LIBPOSIX_PROCESS_BASE)/process.c
 LIBPOSIX_PROCESS_SRCS-y += $(LIBPOSIX_PROCESS_BASE)/wait.c
@@ -28,6 +30,7 @@ LIBPOSIX_PROCESS_CXXINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include
 
 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS_CLONE) += clone-5e
 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS_EXECVE) += execve-3e
+UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS_VFORK) += vfork-2e
 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += wait4-4 waitid-4
 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += getpgid-1
 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += setpgid-2
index ca8f4e368141f0a961f01035861264bcfb893799..55314ff9650745b08806b0a9c4b46d6c39ea97dc 100644 (file)
@@ -310,24 +310,28 @@ static void _clone_child_gc(struct uk_thread *t)
 /*
  * NOTE: The clone system call and the handling of the TLS
  *
- *       `_clone()` assumes that a passed TLS pointer is an Unikraft TLS.
- *       The only exception exists if `_clone()` is called from a context
+ *       `uk_clone()` assumes that a passed TLS pointer is an Unikraft TLS.
+ *       The only exception exists if `uk_clone()` is called from a context
  *       where a custom TLS is already active (depends on
  *       `CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS`). In such a case, an
  *       Unikraft TLS is allocated but the passed TLS pointer is activated.
  *       The reason is that Unikraft libraries place TLS variables and use
  *       the TLS effectively as TCB.
- *       In case no TLS is handed over (CLONE_SETTLS is not set), _clone will
+ *       In case no TLS is handed over (CLONE_SETTLS is not set), uk_clone will
  *       still allocate an Unikraft TLS but sets the TLS architecture pointer
  *       to zero.
  */
-static int _clone(struct clone_args *cl_args, size_t cl_args_len,
-                 struct ukarch_execenv *execenv)
+int uk_clone(struct clone_args *cl_args, size_t cl_args_len,
+            struct ukarch_execenv *execenv)
 {
+       struct posix_thread *pthread;
        struct uk_thread *child = NULL;
        struct uk_thread *t;
        struct uk_sched *s;
+       __u64 stack_size;
+       __u64 stack;
        __u64 flags;
+       __u64 tls;
        int ret;
 
        t = uk_thread_current();
@@ -345,7 +349,13 @@ static int _clone(struct clone_args *cl_args, size_t cl_args_len,
                goto err_out;
        }
 
+       /* shadow cl_args that may be modified
+        * by this function into locals
+        */
+       stack_size = cl_args->stack_size;
+       stack = cl_args->stack;
        flags = cl_args->flags;
+       tls = cl_args->tls;
 
 #if UK_DEBUG
        uk_pr_debug("uk_syscall_r_clone(\n");
@@ -382,12 +392,33 @@ static int _clone(struct clone_args *cl_args, size_t cl_args_len,
                uk_pr_debug(" parent_tid: %p\n", (void *)cl_args->parent_tid);
        if (flags & (CLONE_CHILD_CLEARTID | CLONE_CHILD_SETTID))
                uk_pr_debug(" child_tid: %p\n", (void *)cl_args->child_tid);
-       uk_pr_debug(" stack: %p\n", (void *)cl_args->stack);
-       uk_pr_debug(" tls: %p\n", (void *)cl_args->tls);
+       uk_pr_debug(" stack: %p\n", (void *)stack);
+       uk_pr_debug(" tls: %p\n", (void *)tls);
        uk_pr_debug(" <return>: %p\n", (void *)execenv->regs.rip);
        uk_pr_debug(")\n");
 #endif /* UK_DEBUG */
 
+       /* CLONE_VM requires that the child operates on the same memory
+        * space as the parent.
+        *
+        * Assign the parent's stack. We assign the user TLS to the
+        * parent's in clone_setup_child_ctx(). We copy the registers
+        * later below.
+        */
+       if (flags & CLONE_VM) {
+               if (!stack && !stack_size) {
+                       stack = ukarch_regs_get_sp(&execenv->regs);
+                       uk_pr_debug("Using parent's sp @ 0x%lx\n",
+                                   stack);
+               }
+
+               if (!tls) {
+                       tls = ukarch_sysctx_get_tlsp(&execenv->sysctx);
+                       uk_pr_debug("Using parent's tls @ 0x%lx\n",
+                                   tls);
+               }
+       }
+
        if ((flags & CLONE_SETTLS)
 #if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS
            && (ukarch_sysctx_get_tlsp(&execenv->sysctx) == 0x0)
@@ -398,12 +429,12 @@ static int _clone(struct clone_args *cl_args, size_t cl_args_len,
                 * Unikraft TLS.
                 */
                uk_pr_debug("Using passed TLS pointer %p as an Unikraft TLS\n",
-                           (void *) cl_args->tls);
+                           (void *)tls);
                child = uk_thread_create_container2(s->a,
-                                                   (__uptr) cl_args->stack,
+                                                   (__uptr)stack,
                                                    s->a_auxstack,
                                                    AUXSTACK_SIZE,
-                                                   (__uptr) cl_args->tls,
+                                                   (__uptr)tls,
                                                    true, /* TLS is an UKTLS */
                                                    false, /* We want ECTX */
                                                    (t->name) ? strdup(t->name)
@@ -446,16 +477,46 @@ static int _clone(struct clone_args *cl_args, size_t cl_args_len,
                  == CL_UKTLS_SANITY_MAGIC);
 #endif /* CONFIG_LIBUKDEBUG_ENABLE_ASSERT */
 
-       /* CLONE_SETTLS: Instead of just activating the Unikraft TLS, we
-        * activate the passed TLS pointer as soon as the child wakes up.
-        * NOTE: If SETTLS is not set, we do not activate any TLS although
-        *       an Unikraft TLS was allocated.
-        */
-       child->tlsp = (flags & CLONE_SETTLS) ? cl_args->tls : 0x0;
+       if (flags & CLONE_VFORK) {
+               /* We will be blocking the parent and pass control to the child
+                * via the scheduler. Therefore we need to set the child's TLS
+                * pointer the Unikraft TLS.
+                */
+               child->tlsp = child->uktlsp;
+
+               /* Since we didn't specify a stack to
+                * uk_thread_create_container2() above,
+                * we need to assign the stack manually.
+                */
+               child->_mem.stack = (void *)stack;
+
+               /* Also inherit the parent's stack allocator */
+               child->_mem.stack_a = t->_mem.stack_a;
+       } else  {
+               /* CLONE_SETTLS: Instead of just activating the Unikraft TLS
+                * we activate the passed TLS pointer as soon as the child
+                * wakes up.
+                * NOTE: If SETTLS is not set, we do not activate any TLS
+                * although a Unikraft TLS was allocated.
+                */
+               if ((flags & CLONE_SETTLS) || (flags & CLONE_VM))
+                       child->tlsp = tls;
+               else
+                       child->tlsp = 0;
+       }
        uk_pr_debug("Child is going to wake up with TLS pointer set to: %p (%s TLS)\n",
                    (void *) child->tlsp,
                    (child->tlsp != child->uktlsp) ? "custom" : "Unikraft");
 
+       if (!(cl_args->flags & CLONE_THREAD)) {
+               uk_pr_debug("Creating new process for child\n");
+               ret = uk_posix_process_create(uk_alloc_get_default(), child, t);
+               if (unlikely(ret)) {
+                       uk_pr_err("Could not create child process\n");
+                       goto err_free_child;
+               }
+       }
+
        /* Call clone handler table but treat CLONE_SETTLS as handled */
        ret = _uk_posix_clonetab_init(cl_args, cl_args_len,
                                      CLONE_SETTLS,
@@ -466,7 +527,7 @@ static int _clone(struct clone_args *cl_args, size_t cl_args_len,
                    t, t->name ? child->name : "<unnamed>",
                    child, child->name ? child->name : "<unnamed>", ret);
 
-       clone_setup_child_ctx(execenv, child, (__uptr)cl_args->stack);
+       clone_setup_child_ctx(execenv, child, (__uptr)stack);
 
        uk_thread_set_runnable(child);
 
@@ -476,10 +537,21 @@ static int _clone(struct clone_args *cl_args, size_t cl_args_len,
        /* Assign the child to the scheduler */
        uk_sched_thread_add(s, child);
 
+       /* CLONE_VFORK: Block the parent until the child calls execve()
+        * or exit(). Yield to schedule the child.
+        */
+       if (flags & CLONE_VFORK) {
+               pthread = tid2pthread(ukthread2tid(t));
+               uk_thread_block(t);
+               pthread->state = POSIX_THREAD_BLOCKED_VFORK;
+               uk_sched_yield();
+               goto out;
+       }
+
 #ifdef CONFIG_LIBPOSIX_PROCESS_CLONE_PREFER_CHILD
        uk_sched_yield();
 #endif /* CONFIG_LIBPOSIX_PROCESS_CLONE_PREFER_CHILD */
-
+out:
        return ret;
 
 err_free_child:
@@ -515,7 +587,7 @@ UK_LLSYSCALL_R_E_DEFINE(int, clone,
                .tls         = (__u64) tlsp
        };
 
-       return _clone(&cl_args, sizeof(cl_args), execenv);
+       return uk_clone(&cl_args, sizeof(cl_args), execenv);
 }
 
 #if UK_LIBC_SYSCALLS
@@ -559,3 +631,13 @@ static int uk_posix_clone_detached(const struct clone_args *cl_args __unused,
        return 0;
 }
 UK_POSIX_CLONE_HANDLER(CLONE_DETACHED, false, uk_posix_clone_detached, 0x0);
+
+static int uk_posix_clone_vfork(const struct clone_args *cl_args __unused,
+                               size_t cl_args_len __unused,
+                               struct uk_thread *child __unused,
+                               struct uk_thread *parent __unused)
+{
+       return 0;
+}
+
+UK_POSIX_CLONE_HANDLER(CLONE_VFORK, false, uk_posix_clone_vfork, 0x0);
index 8a7436f913e2bfc24deadd3acdae8986f277087b..8b053daa3a5fd69deae571a33e5db617febff762 100644 (file)
@@ -85,3 +85,8 @@ uk_syscall_r_e_clone
 uk_syscall_e_e_clone
 uk_syscall_r_clone3
 uk_syscall_e_clone3
+vfork
+uk_syscall_r_vfork
+uk_syscall_e_vfork
+uk_syscall_r_e_vfork
+uk_syscall_e_e_vfork
index 6d19f710c1d390b1b0ab590ad9f83ba1dbd149d9..5522907fa2d04cd578a7e90064a07d75073e5f1f 100644 (file)
 #ifndef __PROCESS_H_INTERNAL__
 #define __PROCESS_H_INTERNAL__
 
+#define _GNU_SOURCE /* struct clone_args */
+
 #include <uk/config.h>
 #include <sys/types.h>
+
+#if CONFIG_LIBPOSIX_PROCESS_CLONE
+#include <sched.h>
+#include <uk/arch/ctx.h>
+#endif /* CONFIG_LIBPOSIX_PROCESS_CLONE */
+
 #if CONFIG_LIBPOSIX_PROCESS_PIDS
 #include <uk/thread.h>
 #endif /* CONFIG_LIBPOSIX_PROCESS_PIDS */
@@ -89,4 +97,9 @@ pid_t ukthread2pid(struct uk_thread *thread);
 
 void pprocess_kill_siblings(struct uk_thread *thread);
 
+#if CONFIG_LIBPOSIX_PROCESS_CLONE
+int uk_clone(struct clone_args *cl_args, size_t cl_args_len,
+            struct ukarch_execenv *execenv);
+#endif /* CONFIG_LIBPOSIX_PROCESS_CLONE */
+
 #endif /* __PROCESS_H_INTERNAL__ */
diff --git a/lib/posix-process/vfork.c b/lib/posix-process/vfork.c
new file mode 100644 (file)
index 0000000..99bef4e
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Copyright (c) 2024, Unikraft GmbH and The Unikraft Authors.
+ * Licensed under the BSD-3-Clause License (the "License").
+ * You may not use this file except in compliance with the License.
+ */
+
+#define _GNU_SOURCE /* struct clone_args */
+
+#include <sched.h>
+#include <signal.h>
+
+#include <uk/essentials.h>
+#include <uk/process.h>
+#include <uk/sched.h>
+#include <uk/syscall.h>
+
+#include "process.h"
+
+UK_LLSYSCALL_R_E_DEFINE(pid_t, vfork,
+                       unsigned long __unused, a0,
+                       unsigned long __unused, a1)
+{
+       struct posix_process *child_proc;
+       struct clone_args cl_args = {0};
+       pid_t child_tid;
+
+       cl_args.flags       = CLONE_VM | CLONE_VFORK;
+       cl_args.exit_signal = SIGCHLD;
+
+       child_tid = uk_clone(&cl_args, sizeof(cl_args), execenv);
+       if (unlikely(child_tid < 0)) {
+               uk_pr_err("Could not clone thread\n");
+               return child_tid;
+       }
+
+       child_proc = tid2pprocess(child_tid);
+       UK_ASSERT(child_proc);
+
+       return child_proc->pid;
+}
index 5ec5d72a5f5c492c73920269bf2cc649ad54c5cd..0a554f7d438221713dcba5a08fa700f60e5a4d1a 100644 (file)
@@ -37,7 +37,6 @@
 #include <uk/syscall.h>
 #include <errno.h>
 #include <stddef.h>
-#include "process.h"
 
 /* FIXME: Provide with sys/wait.h */
 struct rusage;