From: Michalis Pappas Date: Mon, 7 Apr 2025 16:35:28 +0000 (+0200) Subject: lib/ukboot: Implement /sbin/init logic X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=65d6cc10c91e004475261f90e0bf7232feeff559;p=unikraft%2Funikraft.git lib/ukboot: Implement /sbin/init logic Add /sbin/init logic for multiprocess configurations. This is controlled by a new option, CONFIG_LIBUKBOOT_INIT. The init logic is implemented as part of PID 1, and spawns the application into a new process. Besides fostering reparented orphans, and reaping children, it also manages system shutdown and returning the application's exit status back to ukboot. A second option, namely CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN, provides the ability to signal children with a configurable signal, and block until they exit until a configurable timeout. If the timeout is reached and children are still alive, init returns and libposix-process forcefully terminates all remaining processes. Checkpatch-Ignore: AVOID_EXTERNS Checkpatch-Ignore: LONG_LINE; Signed-off-by: Michalis Pappas Approved-by: Andrei Tatar Reviewed-by: Andrei Tatar GitHub-Closes: #1633 --- diff --git a/lib/ukboot/Config.uk b/lib/ukboot/Config.uk index af7bcbbec..b1e70e9c2 100644 --- a/lib/ukboot/Config.uk +++ b/lib/ukboot/Config.uk @@ -196,6 +196,46 @@ config LIBUKBOOT_SHUTDOWNREQ_HANDLER if it is requested by a driver on the shutdown_req event queue (e.g., ACPI power button event). +config LIBUKBOOT_INIT + bool "Provide /sbin/init logic" + depends on LIBPOSIX_PROCESS_MULTIPROCESS + select LIBPOSIX_FDTAB + select LIBPOSIX_PROCESS_SIGNAL + select LIBPOSIX_PROCESS_SIGNALFD + select LIBUKBOOT_MAINTHREAD + help + Provides /sbin/init logic to handle orphan processes + and manage shutdown. When this option is selected, the + application is executed on a new process. If you don't + select this in a multiprocess configuration, you need + to provide your own init. + +menuconfig LIBUKBOOT_GRACEFUL_SHUTDOWN + bool "Graceful shutdown" + depends on LIBUKBOOT_INIT + select LIBPOSIX_POLL + help + Signal the application on system shutdown to give it + a chance to terminate gracefully. If timeout is reached, + all processes are killed forecefully. + +if LIBUKBOOT_GRACEFUL_SHUTDOWN +config LIBUKBOOT_GRACEFUL_SHUTDOWN_SIGNAL + int "Termination signal" + default 15 if (ARCH_ARM_64 || ARCH_X86_64) + help + Signal to send the application to gracefully terminate + before shutting down. Defaults to SIGTERM. + +config LIBUKBOOT_GRACEFUL_SHUTDOWN_TIMEOUT + int "Termination timeout (msec)" + default 5000 + help + Time (msec) to wait for children to gracefully terminate + before shutting down. Setting a negative value causes an + indefinite wait. +endif + config LIBUKBOOT_ALLOCSTACK bool default y if LIBUKBOOT_INITALLOC diff --git a/lib/ukboot/Makefile.uk b/lib/ukboot/Makefile.uk index 8ff3ff990..f6a88bcfa 100644 --- a/lib/ukboot/Makefile.uk +++ b/lib/ukboot/Makefile.uk @@ -5,6 +5,8 @@ CXXINCLUDES-$(CONFIG_LIBUKBOOT) += -I$(LIBUKBOOT_BASE)/include LIBUKBOOT_ASINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include LIBUKBOOT_CINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include +LIBUKBOOT_SRCS-$(CONFIG_LIBUKBOOT_INIT) += $(LIBUKBOOT_BASE)/init.c + LIBUKBOOT_SRCS-y += $(LIBUKBOOT_BASE)/boot.c LIBUKBOOT_SRCS-y += $(LIBUKBOOT_BASE)/early_init.c LIBUKBOOT_SRCS-y += $(LIBUKBOOT_BASE)/earlytab.lds.S diff --git a/lib/ukboot/boot.c b/lib/ukboot/boot.c index c585fdd4e..962f49cc2 100644 --- a/lib/ukboot/boot.c +++ b/lib/ukboot/boot.c @@ -107,11 +107,12 @@ #include #endif /* CONFIG_LIBUKINTCTLR */ +#include "init.h" + extern char **boot_argv; extern int boot_argc; int main(int argc, char *argv[]) __weak; -static inline int do_main(int argc, char *argv[]); #if CONFIG_LIBUKBOOT_MAINTHREAD static __noreturn void main_thread(void *, void *); @@ -453,7 +454,7 @@ exit: ukplat_terminate(tctx.target); /* does not return */ } -static inline int do_main(int argc, char *argv[]) +int do_main(int argc, char *argv[]) { char **envp __maybe_unused; uk_ctor_func_t *ctorfn; @@ -528,8 +529,11 @@ static __noreturn void main_thread(void *a0, void *a1) /* block until we are allowed to execute main() */ uk_semaphore_down(&main_sema); - +#if CONFIG_LIBUKBOOT_INIT + tctx->exit_code = do_init(ictx->cmdline.argc, ictx->cmdline.argv); +#else /* !CONFIG_LIBUKBOOT_INIT */ tctx->exit_code = do_main(ictx->cmdline.argc, ictx->cmdline.argv); +#endif /* !CONFIG_LIBUKBOOT_INIT */ #if !CONFIG_LIBUKBOOT_MAINTHREAD_NOHALT /* NOTE: The scheduler's garbage collector would also initiate a diff --git a/lib/ukboot/init.c b/lib/ukboot/init.c new file mode 100644 index 000000000..cf1f8e268 --- /dev/null +++ b/lib/ukboot/init.c @@ -0,0 +1,248 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright (c) 2025, Unikraft GmbH and The Unikraft Authors. + * Licensed under the BSD-3-Clause License (the "License"). + * You may not use this file except in compliance with the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "init.h" + +#if CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN +#define GRACEFUL_SHUTDOWN_SIGNAL \ + CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN_SIGNAL + +#define GRACEFUL_SHUTDOWN_TIMEOUT \ + CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN_TIMEOUT +#endif /* CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN */ + +/* We use the shell convention to set the high bit + * when the process is terminated by signal. + */ +#define TERM_BY_SIGNAL_BIT 0x80 + +static pid_t application_pid; + +static int application_returned; +static int application_status; + +static inline pid_t wait_nonblocking(void) +{ + int wstatus; + pid_t pid; + + pid = waitpid(-1, &wstatus, WNOHANG); + + /* Save the application's return code. This will be + * the value Unikraft returns upon exit. If the + * application was killed we set the high bit as + * by convention. + */ + if (pid == application_pid) { + application_returned = 1; + UK_ASSERT(WIFEXITED(wstatus) || WIFSIGNALED(wstatus)); + if (WIFEXITED(wstatus)) + application_status = WEXITSTATUS(wstatus); + else if (WIFSIGNALED(wstatus)) + application_status = TERM_BY_SIGNAL_BIT | WTERMSIG(wstatus); + } + + return pid; +} + +#if CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN +/* Monotonic time in msec */ +static inline long now(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + + return ts.tv_sec * 1000UL + ts.tv_nsec / 1000000UL; +} + +/* Wait for processes to terminate gracefully. Although we return main()'s exit + * status back to libukboot, when it comes to reaping we don't differentiate + * between the application processes and reparented children, as these are + * likely the result of daemonization so we also need to treat as equal. + * + * Notice: We don't return on error as we don't have a way to handle these + * other than printing an error message (do_init() is expected to + * return the application's code, that we don't want to shadow if + * the application has already returned by this point). + */ +static void graceful_shutdown(int sigfd) +{ + struct epoll_event ev; + struct signalfd_siginfo info; + long deadline; /* msec */ + long timeout; /* msec */ + ssize_t bytes; + int epollfd; + int nfds; + int ret; + + uk_pr_debug("Signalling children\n"); + + kill(0, GRACEFUL_SHUTDOWN_SIGNAL); + + /* Make signalfd non-blocking */ + ret = fcntl(sigfd, F_SETFL, fcntl(sigfd, F_GETFL) | O_NONBLOCK); + if (unlikely(ret < 0)) { + uk_pr_err("Could not set signalfd to non-blocking mode (%d)\n", + errno); + return; + } + + /* Create epoll instance */ + epollfd = epoll_create1(0); + if (unlikely(epollfd == -1)) { + uk_pr_err("epoll_create() failed (%d)\n", errno); + return; + } + + /* Add existing signalfd to epoll */ + ev.events = EPOLLIN; + ev.data.fd = sigfd; + ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, sigfd, &ev); + if (unlikely(ret == -1)) { + uk_pr_err("epoll_ctl failed (%d)\n", errno); + goto err_close_epollfd; + } + + /* Reap terminated children */ + if (GRACEFUL_SHUTDOWN_TIMEOUT > 0) + deadline = now() + GRACEFUL_SHUTDOWN_TIMEOUT; + timeout = GRACEFUL_SHUTDOWN_TIMEOUT; + while (1) { + /* Wait for events or timeout */ + nfds = epoll_wait(epollfd, &ev, 1, timeout); + if (unlikely(nfds == -1)) { + uk_pr_err("epoll_wait failed (%d)\n", errno); + break; + } + + if (!nfds) { + uk_pr_debug("Reached timeout, terminating forcefully\n"); + break; + } + + /* Process SIGCHLD */ + bytes = read(sigfd, &info, sizeof(info)); + if (bytes == sizeof(info) && info.ssi_signo == SIGCHLD) { + /* Reap terminated children */ + do { + ret = wait_nonblocking(); + } while (ret > 0); + + UK_ASSERT(ret == 0 || (ret == -1 && errno == ECHILD)); + + if (ret == -1 && errno == ECHILD) { + uk_pr_debug("All children exited gracefully\n"); + break; + } + } + + if (timeout >= 0) { + if (!timeout) /* User set to zero */ + break; + timeout = deadline - now(); + if (timeout <= 0) + break; + } + } + +err_close_epollfd: + close(epollfd); +} +#endif /* CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN */ + +int do_init(int argc, char *argv[]) +{ + struct signalfd_siginfo info; + ssize_t bytes; + sigset_t mask; + int sigfd; + int ret; + + /* Block all signals */ + sigfillset(&mask); + sigprocmask(SIG_BLOCK, &mask, NULL); + + /* Prepare mask for signalfd */ + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + sigaddset(&mask, SIGTERM); + + /* Create signalfd for SIGCHLD and SIGTERM */ + sigfd = signalfd(-1, &mask, SFD_CLOEXEC); + if (sigfd < 0) { + uk_pr_err("signalfd error (%d)\n", errno); + return errno; + } + + /* Spawn application process */ + application_pid = uk_posix_process_run(do_main, argc, + (const char **)argv); + + /* Wait for application to exit and reap reparented children */ + while (1) { + bytes = read(sigfd, &info, sizeof(info)); + if (bytes != sizeof(info)) { + uk_pr_err("Read from signalfd failed\n"); + goto err_close_signalfd; + } + + if (info.ssi_signo == SIGCHLD) { + /* Reap children. If the last child terminated, + * initiate shutdown. + */ + do { + ret = wait_nonblocking(); + } while (ret > 0); + + UK_ASSERT(ret == 0 || (ret == -1 && errno == ECHILD)); + + if (ret == -1 && errno == ECHILD) { + uk_pr_info("All children terminated. Initiating shutdown...\n"); + break; + } + } else if (info.ssi_signo == SIGTERM) { + uk_pr_info("Received SIGTERM. Initiating shutdown...\n"); + break; + } + } + +#if CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN + graceful_shutdown(sigfd); +#endif /* CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN */ + +err_close_signalfd: + /* If the application is still running, set the error code to SIGKILL + * to signify it was (actually, will be) force-killed. Return back to + * Unikraft to terminate all remaining processes and shut down the + * system. + */ + if (!application_returned) { + uk_pr_debug("The application did not exit gracefully\n"); + application_status = TERM_BY_SIGNAL_BIT | SIGKILL; + } + + close(sigfd); + + return application_status; +} diff --git a/lib/ukboot/init.h b/lib/ukboot/init.h new file mode 100644 index 000000000..0379b3c8a --- /dev/null +++ b/lib/ukboot/init.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright (c) 2024, Unikraft GmbH and The Unikraft Authors. + * Licensed under the BSD-3-Clause License (the "License"). + * You may not use this file except in compliance with the License. + */ + +#ifndef __UK_BOOT_INIT_PRIV_H__ +#define __UK_BOOT_INIT_PRIV_H__ + +/** + * INTERNAL. main() wrapper + * + * Calls application's constructors follwed up by main(). + * + * @param argc Arg count + * @param argc Arg vector + * @return The return value of main() + */ +int do_main(int argc, char *argv[]); + +#if CONFIG_LIBUKBOOT_INIT +/** + * INTERNAL. /sbin/init logic + * + * Spawns a new process that executes do_main(), fosters orphans, + * reaps children, and triggers (graceful) shutdown in response of + * SIGTERM. + * + * @param argc Arg count + * @param argc Arg vector + * @return The return value of main(), or SIGKILL | 0xff if the + * application did not exit gracefully. + */ +int do_init(int argc, char *argv[]); +#endif /* CONFIG_LIBUKBOOT_INIT */ + +#endif /* __UK_BOOT_INIT_PRIV_H__ */