From: Michalis Pappas Date: Mon, 15 Apr 2024 09:48:52 +0000 (+0200) Subject: lib/posix-process: Add vfork() X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=865d7b00ee4b79ae505a5f36d4ac84cd50ade430;p=unikraft%2Funikraft.git lib/posix-process: Add vfork() The vfork() syscall is equivalent to calling clone() with the flags parameter set to CLONE_VM | CLONE_VFORK | SIGCHLD. Update clone() to support CLONE_VFORK and CLONE_VM. Implement vfork() as a wrapper of clone(). For more info see vfork(2). Signed-off-by: Michalis Pappas Approved-by: Andrei Tatar Reviewed-by: Sergiu Moga Reviewed-by: Andrei Tatar GitHub-Closes: #1386 --- diff --git a/lib/posix-process/Config.uk b/lib/posix-process/Config.uk index 90f2f13c4..49ea8218a 100644 --- a/lib/posix-process/Config.uk +++ b/lib/posix-process/Config.uk @@ -39,7 +39,13 @@ config LIBPOSIX_PROCESS_EXECVE select LIBPOSIX_PROCESS_PIDS select LIBUKBINFMT +config LIBPOSIX_PROCESS_VFORK + bool "vfork() system call" + select LIBPOSIX_PROCESS_PIDS + select LIBPOSIX_PROCESS_CLONE + config LIBPOSIX_PROCESS_DEBUG bool "Enable debug messages" default n + endif diff --git a/lib/posix-process/Makefile.uk b/lib/posix-process/Makefile.uk index bbb34b739..30821a1c8 100644 --- a/lib/posix-process/Makefile.uk +++ b/lib/posix-process/Makefile.uk @@ -12,6 +12,8 @@ LIBPOSIX_PROCESS_CFLAGS-$(CONFIG_LIBPOSIX_PROCESS_DEBUG) += -DUK_DEBUG LIBPOSIX_PROCESS_SRCS-$(CONFIG_LIBPOSIX_PROCESS_EXECVE) += $(LIBPOSIX_PROCESS_BASE)/execve.c LIBPOSIX_PROCESS_SRCS-$(CONFIG_LIBPOSIX_PROCESS_EXECVE) += $(LIBPOSIX_PROCESS_BASE)/arch/$(CONFIG_UK_ARCH)/execve.c|arch +LIBPOSIX_PROCESS_SRCS-$(CONFIG_LIBPOSIX_PROCESS_VFORK) += $(LIBPOSIX_PROCESS_BASE)/vfork.c + LIBPOSIX_PROCESS_SRCS-y += $(LIBPOSIX_PROCESS_BASE)/deprecated.c LIBPOSIX_PROCESS_SRCS-y += $(LIBPOSIX_PROCESS_BASE)/process.c LIBPOSIX_PROCESS_SRCS-y += $(LIBPOSIX_PROCESS_BASE)/wait.c @@ -28,6 +30,7 @@ LIBPOSIX_PROCESS_CXXINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS_CLONE) += clone-5e UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS_EXECVE) += execve-3e +UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS_VFORK) += vfork-2e UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += wait4-4 waitid-4 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += getpgid-1 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += setpgid-2 diff --git a/lib/posix-process/clone.c b/lib/posix-process/clone.c index ca8f4e368..55314ff96 100644 --- a/lib/posix-process/clone.c +++ b/lib/posix-process/clone.c @@ -310,24 +310,28 @@ static void _clone_child_gc(struct uk_thread *t) /* * NOTE: The clone system call and the handling of the TLS * - * `_clone()` assumes that a passed TLS pointer is an Unikraft TLS. - * The only exception exists if `_clone()` is called from a context + * `uk_clone()` assumes that a passed TLS pointer is an Unikraft TLS. + * The only exception exists if `uk_clone()` is called from a context * where a custom TLS is already active (depends on * `CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS`). In such a case, an * Unikraft TLS is allocated but the passed TLS pointer is activated. * The reason is that Unikraft libraries place TLS variables and use * the TLS effectively as TCB. - * In case no TLS is handed over (CLONE_SETTLS is not set), _clone will + * In case no TLS is handed over (CLONE_SETTLS is not set), uk_clone will * still allocate an Unikraft TLS but sets the TLS architecture pointer * to zero. */ -static int _clone(struct clone_args *cl_args, size_t cl_args_len, - struct ukarch_execenv *execenv) +int uk_clone(struct clone_args *cl_args, size_t cl_args_len, + struct ukarch_execenv *execenv) { + struct posix_thread *pthread; struct uk_thread *child = NULL; struct uk_thread *t; struct uk_sched *s; + __u64 stack_size; + __u64 stack; __u64 flags; + __u64 tls; int ret; t = uk_thread_current(); @@ -345,7 +349,13 @@ static int _clone(struct clone_args *cl_args, size_t cl_args_len, goto err_out; } + /* shadow cl_args that may be modified + * by this function into locals + */ + stack_size = cl_args->stack_size; + stack = cl_args->stack; flags = cl_args->flags; + tls = cl_args->tls; #if UK_DEBUG uk_pr_debug("uk_syscall_r_clone(\n"); @@ -382,12 +392,33 @@ static int _clone(struct clone_args *cl_args, size_t cl_args_len, uk_pr_debug(" parent_tid: %p\n", (void *)cl_args->parent_tid); if (flags & (CLONE_CHILD_CLEARTID | CLONE_CHILD_SETTID)) uk_pr_debug(" child_tid: %p\n", (void *)cl_args->child_tid); - uk_pr_debug(" stack: %p\n", (void *)cl_args->stack); - uk_pr_debug(" tls: %p\n", (void *)cl_args->tls); + uk_pr_debug(" stack: %p\n", (void *)stack); + uk_pr_debug(" tls: %p\n", (void *)tls); uk_pr_debug(" : %p\n", (void *)execenv->regs.rip); uk_pr_debug(")\n"); #endif /* UK_DEBUG */ + /* CLONE_VM requires that the child operates on the same memory + * space as the parent. + * + * Assign the parent's stack. We assign the user TLS to the + * parent's in clone_setup_child_ctx(). We copy the registers + * later below. + */ + if (flags & CLONE_VM) { + if (!stack && !stack_size) { + stack = ukarch_regs_get_sp(&execenv->regs); + uk_pr_debug("Using parent's sp @ 0x%lx\n", + stack); + } + + if (!tls) { + tls = ukarch_sysctx_get_tlsp(&execenv->sysctx); + uk_pr_debug("Using parent's tls @ 0x%lx\n", + tls); + } + } + if ((flags & CLONE_SETTLS) #if CONFIG_LIBSYSCALL_SHIM_HANDLER_ULTLS && (ukarch_sysctx_get_tlsp(&execenv->sysctx) == 0x0) @@ -398,12 +429,12 @@ static int _clone(struct clone_args *cl_args, size_t cl_args_len, * Unikraft TLS. */ uk_pr_debug("Using passed TLS pointer %p as an Unikraft TLS\n", - (void *) cl_args->tls); + (void *)tls); child = uk_thread_create_container2(s->a, - (__uptr) cl_args->stack, + (__uptr)stack, s->a_auxstack, AUXSTACK_SIZE, - (__uptr) cl_args->tls, + (__uptr)tls, true, /* TLS is an UKTLS */ false, /* We want ECTX */ (t->name) ? strdup(t->name) @@ -446,16 +477,46 @@ static int _clone(struct clone_args *cl_args, size_t cl_args_len, == CL_UKTLS_SANITY_MAGIC); #endif /* CONFIG_LIBUKDEBUG_ENABLE_ASSERT */ - /* CLONE_SETTLS: Instead of just activating the Unikraft TLS, we - * activate the passed TLS pointer as soon as the child wakes up. - * NOTE: If SETTLS is not set, we do not activate any TLS although - * an Unikraft TLS was allocated. - */ - child->tlsp = (flags & CLONE_SETTLS) ? cl_args->tls : 0x0; + if (flags & CLONE_VFORK) { + /* We will be blocking the parent and pass control to the child + * via the scheduler. Therefore we need to set the child's TLS + * pointer the Unikraft TLS. + */ + child->tlsp = child->uktlsp; + + /* Since we didn't specify a stack to + * uk_thread_create_container2() above, + * we need to assign the stack manually. + */ + child->_mem.stack = (void *)stack; + + /* Also inherit the parent's stack allocator */ + child->_mem.stack_a = t->_mem.stack_a; + } else { + /* CLONE_SETTLS: Instead of just activating the Unikraft TLS + * we activate the passed TLS pointer as soon as the child + * wakes up. + * NOTE: If SETTLS is not set, we do not activate any TLS + * although a Unikraft TLS was allocated. + */ + if ((flags & CLONE_SETTLS) || (flags & CLONE_VM)) + child->tlsp = tls; + else + child->tlsp = 0; + } uk_pr_debug("Child is going to wake up with TLS pointer set to: %p (%s TLS)\n", (void *) child->tlsp, (child->tlsp != child->uktlsp) ? "custom" : "Unikraft"); + if (!(cl_args->flags & CLONE_THREAD)) { + uk_pr_debug("Creating new process for child\n"); + ret = uk_posix_process_create(uk_alloc_get_default(), child, t); + if (unlikely(ret)) { + uk_pr_err("Could not create child process\n"); + goto err_free_child; + } + } + /* Call clone handler table but treat CLONE_SETTLS as handled */ ret = _uk_posix_clonetab_init(cl_args, cl_args_len, CLONE_SETTLS, @@ -466,7 +527,7 @@ static int _clone(struct clone_args *cl_args, size_t cl_args_len, t, t->name ? child->name : "", child, child->name ? child->name : "", ret); - clone_setup_child_ctx(execenv, child, (__uptr)cl_args->stack); + clone_setup_child_ctx(execenv, child, (__uptr)stack); uk_thread_set_runnable(child); @@ -476,10 +537,21 @@ static int _clone(struct clone_args *cl_args, size_t cl_args_len, /* Assign the child to the scheduler */ uk_sched_thread_add(s, child); + /* CLONE_VFORK: Block the parent until the child calls execve() + * or exit(). Yield to schedule the child. + */ + if (flags & CLONE_VFORK) { + pthread = tid2pthread(ukthread2tid(t)); + uk_thread_block(t); + pthread->state = POSIX_THREAD_BLOCKED_VFORK; + uk_sched_yield(); + goto out; + } + #ifdef CONFIG_LIBPOSIX_PROCESS_CLONE_PREFER_CHILD uk_sched_yield(); #endif /* CONFIG_LIBPOSIX_PROCESS_CLONE_PREFER_CHILD */ - +out: return ret; err_free_child: @@ -515,7 +587,7 @@ UK_LLSYSCALL_R_E_DEFINE(int, clone, .tls = (__u64) tlsp }; - return _clone(&cl_args, sizeof(cl_args), execenv); + return uk_clone(&cl_args, sizeof(cl_args), execenv); } #if UK_LIBC_SYSCALLS @@ -559,3 +631,13 @@ static int uk_posix_clone_detached(const struct clone_args *cl_args __unused, return 0; } UK_POSIX_CLONE_HANDLER(CLONE_DETACHED, false, uk_posix_clone_detached, 0x0); + +static int uk_posix_clone_vfork(const struct clone_args *cl_args __unused, + size_t cl_args_len __unused, + struct uk_thread *child __unused, + struct uk_thread *parent __unused) +{ + return 0; +} + +UK_POSIX_CLONE_HANDLER(CLONE_VFORK, false, uk_posix_clone_vfork, 0x0); diff --git a/lib/posix-process/exportsyms.uk b/lib/posix-process/exportsyms.uk index 8a7436f91..8b053daa3 100644 --- a/lib/posix-process/exportsyms.uk +++ b/lib/posix-process/exportsyms.uk @@ -85,3 +85,8 @@ uk_syscall_r_e_clone uk_syscall_e_e_clone uk_syscall_r_clone3 uk_syscall_e_clone3 +vfork +uk_syscall_r_vfork +uk_syscall_e_vfork +uk_syscall_r_e_vfork +uk_syscall_e_e_vfork diff --git a/lib/posix-process/process.h b/lib/posix-process/process.h index 6d19f710c..5522907fa 100644 --- a/lib/posix-process/process.h +++ b/lib/posix-process/process.h @@ -33,8 +33,16 @@ #ifndef __PROCESS_H_INTERNAL__ #define __PROCESS_H_INTERNAL__ +#define _GNU_SOURCE /* struct clone_args */ + #include #include + +#if CONFIG_LIBPOSIX_PROCESS_CLONE +#include +#include +#endif /* CONFIG_LIBPOSIX_PROCESS_CLONE */ + #if CONFIG_LIBPOSIX_PROCESS_PIDS #include #endif /* CONFIG_LIBPOSIX_PROCESS_PIDS */ @@ -89,4 +97,9 @@ pid_t ukthread2pid(struct uk_thread *thread); void pprocess_kill_siblings(struct uk_thread *thread); +#if CONFIG_LIBPOSIX_PROCESS_CLONE +int uk_clone(struct clone_args *cl_args, size_t cl_args_len, + struct ukarch_execenv *execenv); +#endif /* CONFIG_LIBPOSIX_PROCESS_CLONE */ + #endif /* __PROCESS_H_INTERNAL__ */ diff --git a/lib/posix-process/vfork.c b/lib/posix-process/vfork.c new file mode 100644 index 000000000..99bef4e76 --- /dev/null +++ b/lib/posix-process/vfork.c @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright (c) 2024, Unikraft GmbH and The Unikraft Authors. + * Licensed under the BSD-3-Clause License (the "License"). + * You may not use this file except in compliance with the License. + */ + +#define _GNU_SOURCE /* struct clone_args */ + +#include +#include + +#include +#include +#include +#include + +#include "process.h" + +UK_LLSYSCALL_R_E_DEFINE(pid_t, vfork, + unsigned long __unused, a0, + unsigned long __unused, a1) +{ + struct posix_process *child_proc; + struct clone_args cl_args = {0}; + pid_t child_tid; + + cl_args.flags = CLONE_VM | CLONE_VFORK; + cl_args.exit_signal = SIGCHLD; + + child_tid = uk_clone(&cl_args, sizeof(cl_args), execenv); + if (unlikely(child_tid < 0)) { + uk_pr_err("Could not clone thread\n"); + return child_tid; + } + + child_proc = tid2pprocess(child_tid); + UK_ASSERT(child_proc); + + return child_proc->pid; +} diff --git a/lib/posix-process/wait.c b/lib/posix-process/wait.c index 5ec5d72a5..0a554f7d4 100644 --- a/lib/posix-process/wait.c +++ b/lib/posix-process/wait.c @@ -37,7 +37,6 @@ #include #include #include -#include "process.h" /* FIXME: Provide with sys/wait.h */ struct rusage;