From 3ee9afedec2d1bce9d80e353c81b62e8e5f3568a Mon Sep 17 00:00:00 2001 From: Michalis Pappas Date: Wed, 10 Apr 2024 12:15:03 +0200 Subject: [PATCH] lib/posix-process: Add execve() Add implementation for execve(). For more info see execve(2). Requires a binfmt ELF loader. The default loader is provided by app-bincompat. Checkpatch-Ignore: AVOID_EXTERNS Signed-off-by: Michalis Pappas Approved-by: Andrei Tatar Reviewed-by: Sergiu Moga Reviewed-by: Andrei Tatar GitHub-Closes: #1386 --- lib/posix-process/Config.uk | 13 +- lib/posix-process/Makefile.uk | 5 +- lib/posix-process/arch/arm64/execve.c | 39 +++++ lib/posix-process/arch/x86_64/execve.c | 38 +++++ lib/posix-process/deprecated.c | 9 -- lib/posix-process/execve.c | 201 +++++++++++++++++++++++++ lib/posix-process/exportsyms.uk | 2 + lib/posix-process/include/uk/process.h | 9 ++ lib/posix-process/process.c | 39 +++++ lib/posix-process/process.h | 2 + 10 files changed, 343 insertions(+), 14 deletions(-) create mode 100644 lib/posix-process/arch/arm64/execve.c create mode 100644 lib/posix-process/arch/x86_64/execve.c create mode 100644 lib/posix-process/execve.c diff --git a/lib/posix-process/Config.uk b/lib/posix-process/Config.uk index afa6ac4f2..90f2f13c4 100644 --- a/lib/posix-process/Config.uk +++ b/lib/posix-process/Config.uk @@ -20,12 +20,12 @@ if LIBPOSIX_PROCESS_PIDS default n endif - config LIBPOSIX_PROCESS_CLONE - bool "Clone system call" +config LIBPOSIX_PROCESS_CLONE + bool "clone() system call" default n select LIBPOSIX_PROCESS_PIDS - config LIBPOSIX_PROCESS_CLONE_PREFER_CHILD +config LIBPOSIX_PROCESS_CLONE_PREFER_CHILD bool "Prefer scheduling of child" depends on LIBPOSIX_PROCESS_CLONE help @@ -34,7 +34,12 @@ endif the new thread does progress while the clone caller is not context switching. - config LIBPOSIX_PROCESS_DEBUG +config LIBPOSIX_PROCESS_EXECVE + bool "execve() system call" + select LIBPOSIX_PROCESS_PIDS + select LIBUKBINFMT + +config LIBPOSIX_PROCESS_DEBUG bool "Enable debug messages" default n endif diff --git a/lib/posix-process/Makefile.uk b/lib/posix-process/Makefile.uk index 2ed42405f..bbb34b739 100644 --- a/lib/posix-process/Makefile.uk +++ b/lib/posix-process/Makefile.uk @@ -9,6 +9,9 @@ CXXINCLUDES-$(CONFIG_LIBPOSIX_PROCESS) += $(LIBPOSIX_PROCESS_COMMON_INCLUDES-y) LIBPOSIX_PROCESS_CFLAGS-$(CONFIG_LIBPOSIX_PROCESS_DEBUG) += -DUK_DEBUG +LIBPOSIX_PROCESS_SRCS-$(CONFIG_LIBPOSIX_PROCESS_EXECVE) += $(LIBPOSIX_PROCESS_BASE)/execve.c +LIBPOSIX_PROCESS_SRCS-$(CONFIG_LIBPOSIX_PROCESS_EXECVE) += $(LIBPOSIX_PROCESS_BASE)/arch/$(CONFIG_UK_ARCH)/execve.c|arch + LIBPOSIX_PROCESS_SRCS-y += $(LIBPOSIX_PROCESS_BASE)/deprecated.c LIBPOSIX_PROCESS_SRCS-y += $(LIBPOSIX_PROCESS_BASE)/process.c LIBPOSIX_PROCESS_SRCS-y += $(LIBPOSIX_PROCESS_BASE)/wait.c @@ -24,7 +27,7 @@ LIBPOSIX_PROCESS_ASINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include LIBPOSIX_PROCESS_CXXINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS_CLONE) += clone-5e -UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += execve-3 +UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS_EXECVE) += execve-3e UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += wait4-4 waitid-4 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += getpgid-1 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += setpgid-2 diff --git a/lib/posix-process/arch/arm64/execve.c b/lib/posix-process/arch/arm64/execve.c new file mode 100644 index 000000000..e4ff9e4e2 --- /dev/null +++ b/lib/posix-process/arch/arm64/execve.c @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright (c) 2024, Unikraft GmbH and The Unikraft Authors. + * Licensed under the BSD-3-Clause License (the "License"). + * You may not use this file except in compliance with the License. + */ + +#include +#include + +void execve_arch_execenv_init(struct ukarch_execenv *execenv_new, + struct ukarch_execenv *execenv, + __uptr ip, __uptr sp) +{ + UK_ASSERT(execenv_new); + UK_ASSERT(execenv); + UK_ASSERT(ip); + UK_ASSERT(sp); + UK_ASSERT(IS_ALIGNED(sp, UKARCH_SP_ALIGN)); + + execenv_new->regs.lr = ip; + execenv_new->regs.sp = sp; + + /* Copy SPSR to preserve the application's state at + * syscall time. + */ + execenv_new->regs.spsr_el1 = execenv->regs.spsr_el1; + + /* Copy ESR to make sure we restore a sane value */ + execenv_new->regs.esr_el1 = execenv->regs.esr_el1; + + /* Leave gpregs and ectx uninitialized for the new + * execution context. + */ + + /* Also copy the current sysctx to avoid ending up with undefined + * values that trigger alignment errors. + */ + ukarch_sysctx_store((struct ukarch_sysctx *)&execenv_new->sysctx); +} diff --git a/lib/posix-process/arch/x86_64/execve.c b/lib/posix-process/arch/x86_64/execve.c new file mode 100644 index 000000000..aa2774b7f --- /dev/null +++ b/lib/posix-process/arch/x86_64/execve.c @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright (c) 2024, Unikraft GmbH and The Unikraft Authors. + * Licensed under the BSD-3-Clause License (the "License"). + * You may not use this file except in compliance with the License. + */ + +#include +#include + +void execve_arch_execenv_init(struct ukarch_execenv *execenv_new, + struct ukarch_execenv *execenv, + __uptr ip, __uptr sp) +{ + UK_ASSERT(execenv_new); + UK_ASSERT(execenv); + UK_ASSERT(ip); + UK_ASSERT(sp); + UK_ASSERT(IS_ALIGNED(sp, UKARCH_SP_ALIGN)); + + execenv_new->regs.rip = ip; + execenv_new->regs.rsp = sp; + + /* Prepare for iretq + * FIXME re-arch: use GDT macros once moved out of plat/common + */ + execenv_new->regs.eflags = execenv->regs.eflags; + execenv_new->regs.cs = 8; /* GDT_DESC_OFFSET(GDT_DESC_CODE) */ + execenv_new->regs.ss = 16; /* GDT_DESC_OFFSET(GDT_DESC_DATA) */ + + /* Copy current ectx to inerhit platform-initialized regs like mxcsr */ + ukarch_ectx_sanitize((struct ukarch_ectx *)&execenv_new->ectx); + ukarch_ectx_store((struct ukarch_ectx *)&execenv_new->ectx); + + /* Also copy the current sysregs to avoid ending up with undefined + * values that trigger alignment errors. + */ + ukarch_sysctx_store((struct ukarch_sysctx *)&execenv_new->sysctx); +} diff --git a/lib/posix-process/deprecated.c b/lib/posix-process/deprecated.c index 00098ed24..91ebe2244 100644 --- a/lib/posix-process/deprecated.c +++ b/lib/posix-process/deprecated.c @@ -120,15 +120,6 @@ int execle(const char *path, const char *arg, ... return -1; } -UK_SYSCALL_R_DEFINE(int, execve, const char *, path, - char *const *, argv, char *const *, envp) -{ - uk_pr_warn("%s(): path=%s\n", __func__, path); - exec_warn_argv(argv); - exec_warn_envp(envp); - return -ENOSYS; -} - int execv(const char *path, char *const argv[]) { uk_pr_warn("%s(): path=%s\n", __func__, path); diff --git a/lib/posix-process/execve.c b/lib/posix-process/execve.c new file mode 100644 index 000000000..6911b1dca --- /dev/null +++ b/lib/posix-process/execve.c @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright (c) 2024, Unikraft GmbH and The Unikraft Authors. + * Licensed under the BSD-3-Clause License (the "License"). + * You may not use this file except in compliance with the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "process.h" + +void execve_arch_execenv_init(struct ukarch_execenv *execenv_new, + struct ukarch_execenv *execenv, + __uptr ip, __uptr sp); + +UK_EVENT(POSIX_PROCESS_EXECVE_EVENT); + +/* arg0: execenv_new, arg1: stack_old */ +static void __noreturn execve_ctx_switch(long arg0, long arg1) +{ + struct ukarch_execenv *execenv_new; + struct posix_thread *pthread_parent; + struct posix_thread *pthread; + struct uk_thread *this_thread; + void *stack_old; + + execenv_new = (struct ukarch_execenv *)arg0; + stack_old = (void *)arg1; + + this_thread = uk_thread_current(); + UK_ASSERT(this_thread); + + pthread = tid2pthread(ukthread2tid(this_thread)); /* FIXME */ + UK_ASSERT(pthread); + + pthread_parent = pthread->parent; + + /* If there is no parent, then must be calling execve() + * from init without having called vfork(). + */ + if (!pthread_parent) { + UK_ASSERT(pthread->process); + UK_ASSERT(pthread->process->pid == 0); + goto switch_ctx; + } + + /* If we're coming from VFORK it's time to wake up the parent. + * Otherwise, free the old stack. + */ + if (pthread_parent->state == POSIX_THREAD_BLOCKED_VFORK) { + uk_pr_debug("Waking up parent (tid %d)\n", pthread_parent->tid); + uk_thread_wake(tid2ukthread(pthread_parent->tid)); + pthread_parent->state = POSIX_THREAD_RUNNING; + } else { + uk_free(this_thread->_mem.stack_a, stack_old); + } + + uk_pr_debug("Switching context\n"); +switch_ctx: + ukarch_execenv_load((long)execenv_new); +} + +/* Prepare process for executing new context. For a complete list see + * "Effect on process attributes" in execve(2). + */ +static int pprocess_cleanup(struct uk_thread *thread __maybe_unused) +{ + /* Kill this thread's siblings */ + pprocess_kill_siblings(thread); + + return 0; +} + +UK_LLSYSCALL_R_E_DEFINE(int, execve, const char *, pathname, + char *const *, argv, + char *const *, envp) +{ + struct posix_process_execve_event_data event_data; + struct uk_binfmt_loader_args loader_args; + struct ukarch_execenv *execenv_new; + struct uk_thread *this_thread; + struct ukarch_ctx ctx_old; + void *stack_old; + void *stack_new; + int rc = 0; + + /* Linux deviates from POSIX by treating NULL pointers to + * argv / envp equivalently to passing a list with a single + * NULL element. Preserve this behavior for compatibiltiy + * and treat NULL pointers as valid. + */ + UK_ASSERT(pathname); + + this_thread = uk_thread_current(); + UK_ASSERT(this_thread); + + loader_args.argv = (const char **)argv; + loader_args.envp = (const char **)envp; + loader_args.pathname = (char *)pathname; + loader_args.loader = NULL; + loader_args.user = NULL; + + /* Assume that if argv is set the caller follows the convention */ + loader_args.progname = (argv && argv[0]) ? argv[0] : ""; + + /* Assign the default allocator to the loader. This will be used + * to allocate memory for the executable image. + */ + loader_args.alloc = uk_alloc_get_default(); + + /* Allocate a new stack. Even if we don't come from vfork we + * can't operate on the current thread's stack without either + * corrupting it or wasting space. Use the threads's + * stack allocator for the new stack. + */ + stack_new = uk_malloc(this_thread->_mem.stack_a, STACK_SIZE); + if (unlikely(!stack_new)) { + uk_pr_err("Could not allocate stack\n"); + return -ENOMEM; + } + + loader_args.stack_size = STACK_SIZE; + loader_args.ctx.sp = ukarch_gen_sp((__uptr)stack_new, + loader_args.stack_size); + + uk_pr_debug("%s: New stack at %p - %p, stack pointer: %p\n", + loader_args.progname, stack_new, + (void *)((uintptr_t)stack_new + loader_args.stack_size), + (void *)loader_args.ctx.sp); + + /* Make room for the new execenv (context) to restore. Normally we + * would use the aux stack for this, and since this function is a + * noreturn we could even ovewrite the current auxsp frame. But + * just in case one day this is called outside the syscall context + * using the aux stack could corrupt the state of a syscall or the + * deferred exception handler. Avoid that by pushing instead into + * the newly allocated stack. + */ + loader_args.ctx.sp = ALIGN_DOWN(loader_args.ctx.sp, + UKARCH_EXECENV_END_ALIGN); + loader_args.ctx.sp -= UKARCH_EXECENV_SIZE; + execenv_new = (struct ukarch_execenv *)loader_args.ctx.sp; + + /* Load executable */ + rc = uk_binfmt_load(&loader_args); + if (unlikely(rc)) { + uk_pr_err("%s: Unable to load (%d)\n", pathname, rc); + goto err_free_stack_new; + } + + /* Do arch-specific context (execenv) initialization */ + execve_arch_execenv_init(execenv_new, execenv, + loader_args.ctx.ip, + loader_args.ctx.sp); + + /* Prepare process for executing new context. First notify posix + * libraries that may have registered for the event, then do the + * internal cleanup. + */ + event_data.thread = this_thread; + rc = uk_raise_event(POSIX_PROCESS_EXECVE_EVENT, &event_data); + if (unlikely(rc < 0)) { + uk_pr_err("execve event error (%d)\n", rc); + goto err_free_stack_new; + } + pprocess_cleanup(this_thread); + + /* Prepare switch to the new context. + * + * Update this thread's ctx and stack. We then need to free the old + * stack, and restore the new execenv (context). This needs to happen + * on the new stack, to avoid freeing the stack we are currently + * operating on, in case we are not on the auxstack. We do both via + * a context-switch trampoline that we execute on the new stack. + */ + stack_old = this_thread->_mem.stack; + uk_pr_debug("%s: Old stack at %p - %p\n", loader_args.progname, + stack_old, (void *)((uintptr_t)stack_old + STACK_SIZE)); + + this_thread->_mem.stack = stack_new; + this_thread->ctx = loader_args.ctx; + + ukarch_ctx_init_entry2(&loader_args.ctx, + loader_args.ctx.sp, + 1, /* keep regs */ + execve_ctx_switch, + (long)execenv_new, (long)stack_old); + ukarch_ctx_switch(&ctx_old, &loader_args.ctx); + UK_BUG(); /* noreturn */ + +err_free_stack_new: + uk_free(this_thread->_mem.stack_a, stack_new); + + return rc; +} diff --git a/lib/posix-process/exportsyms.uk b/lib/posix-process/exportsyms.uk index 5b15295fd..8a7436f91 100644 --- a/lib/posix-process/exportsyms.uk +++ b/lib/posix-process/exportsyms.uk @@ -4,7 +4,9 @@ execlp execv execve uk_syscall_r_execve +uk_syscall_r_e_execve uk_syscall_e_execve +uk_syscall_e_e_execve execvp execvpe getpgid diff --git a/lib/posix-process/include/uk/process.h b/lib/posix-process/include/uk/process.h index 2ba9f671b..dd2c1f93a 100644 --- a/lib/posix-process/include/uk/process.h +++ b/lib/posix-process/include/uk/process.h @@ -200,4 +200,13 @@ struct uk_posix_clonetab_entry { #endif /* CONFIG_LIBPOSIX_PROCESS_CLONE */ +#if CONFIG_LIBPOSIX_PROCESS_EXECVE + +/* Data delivered to the handlers of the POSIX_PROCESS_EXECVE_EVENT */ +struct posix_process_execve_event_data { + struct uk_thread *thread; +}; + +#endif /* CONFIG_LIBPOSIX_PROCESS_EXECVE */ + #endif /* __UK_PROCESS_H__ */ diff --git a/lib/posix-process/process.c b/lib/posix-process/process.c index f3aed6f40..a63cf3ec7 100644 --- a/lib/posix-process/process.c +++ b/lib/posix-process/process.c @@ -305,6 +305,45 @@ static void pprocess_release(struct posix_process *pprocess) uk_free(pprocess->_a, pprocess); } +void pprocess_kill_siblings(struct uk_thread *thread) +{ + struct posix_thread *pthread, *pthreadn; + struct posix_thread *this_thread; + struct posix_process *pprocess; + pid_t this_tid; + + this_tid = ukthread2tid(thread); + this_thread = tid2pthread(this_tid); + + pprocess = this_thread->process; + UK_ASSERT(pprocess); + + /* Kill all remaining threads of the process */ + uk_list_for_each_entry_safe(pthread, pthreadn, + &pprocess->threads, thread_list_entry) { + if (pthread->tid == this_tid) + continue; + + /* If this thread is already exited it may + * be waiting to be garbage-collected. + */ + if (uk_thread_is_exited(pthread->thread)) + continue; + + uk_pr_debug("Terminating siblings of tid: %d (pid: %d): Killing TID %d: thread %p (%s)...\n", + this_thread->tid, pprocess->pid, + pthread->tid, pthread->thread, + pthread->thread->name); + + /* Terminating the thread will lead to calling + * `posix_thread_fini()` which will clean-up the related + * pthread resources and pprocess resources on the last + * thread + */ + uk_sched_thread_terminate(pthread->thread); + } +} + static void pprocess_kill(struct posix_process *pprocess) { struct posix_thread *pthread, *pthreadn, *pthread_self = NULL; diff --git a/lib/posix-process/process.h b/lib/posix-process/process.h index fb313e059..6d19f710c 100644 --- a/lib/posix-process/process.h +++ b/lib/posix-process/process.h @@ -87,4 +87,6 @@ pid_t ukthread2tid(struct uk_thread *thread); pid_t ukthread2pid(struct uk_thread *thread); #endif /* CONFIG_LIBPOSIX_PROCESS_PIDS */ +void pprocess_kill_siblings(struct uk_thread *thread); + #endif /* __PROCESS_H_INTERNAL__ */ -- 2.39.5