]> xenbits.xensource.com Git - unikraft/unikraft.git/commitdiff
lib/ukboot: Implement /sbin/init logic
authorMichalis Pappas <michalis@unikraft.io>
Mon, 7 Apr 2025 16:35:28 +0000 (18:35 +0200)
committerUnikraft Bot <monkey@unikraft.io>
Wed, 30 Apr 2025 10:07:29 +0000 (10:07 +0000)
Add /sbin/init logic for multiprocess configurations. This is controlled
by a new option, CONFIG_LIBUKBOOT_INIT. The init logic is implemented as
part of PID 1, and spawns the application into a new process. Besides
fostering reparented orphans, and reaping children, it also manages
system shutdown and returning the application's exit status back to
ukboot.

A second option, namely CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN, provides the
ability to signal children with a configurable signal, and block until
they exit until a configurable timeout. If the timeout is reached and
children are still alive, init returns and libposix-process forcefully
terminates all remaining processes.

Checkpatch-Ignore: AVOID_EXTERNS
Checkpatch-Ignore: LONG_LINE;
Signed-off-by: Michalis Pappas <michalis@unikraft.io>
Approved-by: Andrei Tatar <andrei@unikraft.io>
Reviewed-by: Andrei Tatar <andrei@unikraft.io>
GitHub-Closes: #1633

lib/ukboot/Config.uk
lib/ukboot/Makefile.uk
lib/ukboot/boot.c
lib/ukboot/init.c [new file with mode: 0644]
lib/ukboot/init.h [new file with mode: 0644]

index af7bcbbece3d0c15878881c94bea02df61a47689..b1e70e9c264416a08978659d9c5302adee938fa4 100644 (file)
@@ -196,6 +196,46 @@ config LIBUKBOOT_SHUTDOWNREQ_HANDLER
          if it is requested by a driver on the shutdown_req event queue
          (e.g., ACPI power button event).
 
+config LIBUKBOOT_INIT
+       bool "Provide /sbin/init logic"
+       depends on LIBPOSIX_PROCESS_MULTIPROCESS
+       select LIBPOSIX_FDTAB
+       select LIBPOSIX_PROCESS_SIGNAL
+       select LIBPOSIX_PROCESS_SIGNALFD
+       select LIBUKBOOT_MAINTHREAD
+       help
+               Provides /sbin/init logic to handle orphan processes
+               and manage shutdown. When this option is selected, the
+               application is executed on a new process. If you don't
+               select this in a multiprocess configuration, you need
+               to provide your own init.
+
+menuconfig LIBUKBOOT_GRACEFUL_SHUTDOWN
+       bool "Graceful shutdown"
+       depends on LIBUKBOOT_INIT
+       select LIBPOSIX_POLL
+       help
+               Signal the application on system shutdown to give it
+               a chance to terminate gracefully. If timeout is reached,
+               all processes are killed forecefully.
+
+if LIBUKBOOT_GRACEFUL_SHUTDOWN
+config LIBUKBOOT_GRACEFUL_SHUTDOWN_SIGNAL
+       int "Termination signal"
+       default 15 if (ARCH_ARM_64 || ARCH_X86_64)
+       help
+               Signal to send the application to gracefully terminate
+               before shutting down. Defaults to SIGTERM.
+
+config LIBUKBOOT_GRACEFUL_SHUTDOWN_TIMEOUT
+       int "Termination timeout (msec)"
+       default 5000
+       help
+               Time (msec) to wait for children to gracefully terminate
+               before shutting down. Setting a negative value causes an
+               indefinite wait.
+endif
+
 config LIBUKBOOT_ALLOCSTACK
        bool
        default y if LIBUKBOOT_INITALLOC
index 8ff3ff9908b4add5bd86821df98e81f3050e91c5..f6a88bcfa61037dbfd3b8c1590b6ff86477bd600 100644 (file)
@@ -5,6 +5,8 @@ CXXINCLUDES-$(CONFIG_LIBUKBOOT)         += -I$(LIBUKBOOT_BASE)/include
 LIBUKBOOT_ASINCLUDES-y                 += -I$(UK_PLAT_COMMON_BASE)/include
 LIBUKBOOT_CINCLUDES-y                  += -I$(UK_PLAT_COMMON_BASE)/include
 
+LIBUKBOOT_SRCS-$(CONFIG_LIBUKBOOT_INIT) += $(LIBUKBOOT_BASE)/init.c
+
 LIBUKBOOT_SRCS-y += $(LIBUKBOOT_BASE)/boot.c
 LIBUKBOOT_SRCS-y += $(LIBUKBOOT_BASE)/early_init.c
 LIBUKBOOT_SRCS-y += $(LIBUKBOOT_BASE)/earlytab.lds.S
index c585fdd4e693a7c986b517dde93d8831abc7c02d..962f49cc2861d4100dcf2f26d81353b145929656 100644 (file)
 #include <uk/intctlr.h>
 #endif /* CONFIG_LIBUKINTCTLR */
 
+#include "init.h"
+
 extern char **boot_argv;
 extern int boot_argc;
 
 int main(int argc, char *argv[]) __weak;
-static inline int do_main(int argc, char *argv[]);
 
 #if CONFIG_LIBUKBOOT_MAINTHREAD
 static __noreturn void main_thread(void *, void *);
@@ -453,7 +454,7 @@ exit:
        ukplat_terminate(tctx.target); /* does not return */
 }
 
-static inline int do_main(int argc, char *argv[])
+int do_main(int argc, char *argv[])
 {
        char **envp __maybe_unused;
        uk_ctor_func_t *ctorfn;
@@ -528,8 +529,11 @@ static __noreturn void main_thread(void *a0, void *a1)
 
        /* block until we are allowed to execute main() */
        uk_semaphore_down(&main_sema);
-
+#if CONFIG_LIBUKBOOT_INIT
+       tctx->exit_code = do_init(ictx->cmdline.argc, ictx->cmdline.argv);
+#else /* !CONFIG_LIBUKBOOT_INIT */
        tctx->exit_code = do_main(ictx->cmdline.argc, ictx->cmdline.argv);
+#endif /* !CONFIG_LIBUKBOOT_INIT */
 
 #if !CONFIG_LIBUKBOOT_MAINTHREAD_NOHALT
        /* NOTE: The scheduler's garbage collector would also initiate a
diff --git a/lib/ukboot/init.c b/lib/ukboot/init.c
new file mode 100644 (file)
index 0000000..cf1f8e2
--- /dev/null
@@ -0,0 +1,248 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Copyright (c) 2025, Unikraft GmbH and The Unikraft Authors.
+ * Licensed under the BSD-3-Clause License (the "License").
+ * You may not use this file except in compliance with the License.
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/signalfd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <uk/essentials.h>
+#include <uk/process.h>
+#include <uk/sched.h>
+
+#include "init.h"
+
+#if CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN
+#define GRACEFUL_SHUTDOWN_SIGNAL                           \
+       CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN_SIGNAL
+
+#define GRACEFUL_SHUTDOWN_TIMEOUT                          \
+       CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN_TIMEOUT
+#endif /* CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN */
+
+/* We use the shell convention to set the high bit
+ * when the process is terminated by signal.
+ */
+#define TERM_BY_SIGNAL_BIT  0x80
+
+static pid_t application_pid;
+
+static int application_returned;
+static int application_status;
+
+static inline pid_t wait_nonblocking(void)
+{
+       int wstatus;
+       pid_t pid;
+
+       pid = waitpid(-1, &wstatus, WNOHANG);
+
+       /* Save the application's return code. This will be
+        * the value Unikraft returns upon exit. If the
+        * application was killed we set the high bit as
+        * by convention.
+        */
+       if (pid == application_pid) {
+               application_returned = 1;
+               UK_ASSERT(WIFEXITED(wstatus) || WIFSIGNALED(wstatus));
+               if (WIFEXITED(wstatus))
+                       application_status = WEXITSTATUS(wstatus);
+               else if (WIFSIGNALED(wstatus))
+                       application_status = TERM_BY_SIGNAL_BIT | WTERMSIG(wstatus);
+       }
+
+       return pid;
+}
+
+#if CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN
+/* Monotonic time in msec */
+static inline long now(void)
+{
+       struct timespec ts;
+
+       clock_gettime(CLOCK_MONOTONIC, &ts);
+
+       return ts.tv_sec * 1000UL + ts.tv_nsec / 1000000UL;
+}
+
+/* Wait for processes to terminate gracefully. Although we return main()'s exit
+ * status back to libukboot, when it comes to reaping we don't differentiate
+ * between the application processes and reparented children, as these are
+ * likely the result of daemonization so we also need to treat as equal.
+ *
+ * Notice: We don't return on error as we don't have a way to handle these
+ *         other than printing an error message (do_init() is expected to
+ *         return the application's code, that we don't want to shadow if
+ *         the application has already returned by this point).
+ */
+static void graceful_shutdown(int sigfd)
+{
+       struct epoll_event ev;
+       struct signalfd_siginfo info;
+       long deadline; /* msec */
+       long timeout;  /* msec */
+       ssize_t bytes;
+       int epollfd;
+       int nfds;
+       int ret;
+
+       uk_pr_debug("Signalling children\n");
+
+       kill(0, GRACEFUL_SHUTDOWN_SIGNAL);
+
+       /* Make signalfd non-blocking */
+       ret = fcntl(sigfd, F_SETFL, fcntl(sigfd, F_GETFL) | O_NONBLOCK);
+       if (unlikely(ret < 0)) {
+               uk_pr_err("Could not set signalfd to non-blocking mode (%d)\n",
+                         errno);
+               return;
+       }
+
+       /* Create epoll instance */
+       epollfd = epoll_create1(0);
+       if (unlikely(epollfd == -1)) {
+               uk_pr_err("epoll_create() failed (%d)\n", errno);
+               return;
+       }
+
+       /* Add existing signalfd to epoll */
+       ev.events = EPOLLIN;
+       ev.data.fd = sigfd;
+       ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, sigfd, &ev);
+       if (unlikely(ret == -1)) {
+               uk_pr_err("epoll_ctl failed (%d)\n", errno);
+               goto err_close_epollfd;
+       }
+
+       /* Reap terminated children */
+       if (GRACEFUL_SHUTDOWN_TIMEOUT > 0)
+               deadline = now() + GRACEFUL_SHUTDOWN_TIMEOUT;
+       timeout = GRACEFUL_SHUTDOWN_TIMEOUT;
+       while (1) {
+               /* Wait for events or timeout */
+               nfds = epoll_wait(epollfd, &ev, 1, timeout);
+               if (unlikely(nfds == -1)) {
+                       uk_pr_err("epoll_wait failed (%d)\n", errno);
+                       break;
+               }
+
+               if (!nfds) {
+                       uk_pr_debug("Reached timeout, terminating forcefully\n");
+                       break;
+               }
+
+               /* Process SIGCHLD */
+               bytes = read(sigfd, &info, sizeof(info));
+               if (bytes == sizeof(info) && info.ssi_signo == SIGCHLD) {
+                       /* Reap terminated children */
+                       do {
+                               ret = wait_nonblocking();
+                       } while (ret > 0);
+
+                       UK_ASSERT(ret == 0 || (ret == -1 && errno == ECHILD));
+
+                       if (ret == -1 && errno == ECHILD) {
+                               uk_pr_debug("All children exited gracefully\n");
+                               break;
+                       }
+               }
+
+               if (timeout >= 0) {
+                       if (!timeout) /* User set to zero */
+                               break;
+                       timeout = deadline - now();
+                       if (timeout <= 0)
+                               break;
+               }
+       }
+
+err_close_epollfd:
+       close(epollfd);
+}
+#endif /* CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN */
+
+int do_init(int argc, char *argv[])
+{
+       struct signalfd_siginfo info;
+       ssize_t bytes;
+       sigset_t mask;
+       int sigfd;
+       int ret;
+
+       /* Block all signals */
+       sigfillset(&mask);
+       sigprocmask(SIG_BLOCK, &mask, NULL);
+
+       /* Prepare mask for signalfd */
+       sigemptyset(&mask);
+       sigaddset(&mask, SIGCHLD);
+       sigaddset(&mask, SIGTERM);
+
+       /* Create signalfd for SIGCHLD and SIGTERM */
+       sigfd = signalfd(-1, &mask, SFD_CLOEXEC);
+       if (sigfd < 0) {
+               uk_pr_err("signalfd error (%d)\n", errno);
+               return errno;
+       }
+
+       /* Spawn application process */
+       application_pid = uk_posix_process_run(do_main, argc,
+                                              (const char **)argv);
+
+       /* Wait for application to exit and reap reparented children */
+       while (1) {
+               bytes = read(sigfd, &info, sizeof(info));
+               if (bytes != sizeof(info)) {
+                       uk_pr_err("Read from signalfd failed\n");
+                       goto err_close_signalfd;
+               }
+
+               if (info.ssi_signo == SIGCHLD) {
+                       /* Reap children. If the last child terminated,
+                        * initiate shutdown.
+                        */
+                       do {
+                               ret = wait_nonblocking();
+                       } while (ret > 0);
+
+                       UK_ASSERT(ret == 0 || (ret == -1 && errno == ECHILD));
+
+                       if (ret == -1 && errno == ECHILD) {
+                               uk_pr_info("All children terminated. Initiating shutdown...\n");
+                               break;
+                       }
+               } else if (info.ssi_signo == SIGTERM) {
+                       uk_pr_info("Received SIGTERM. Initiating shutdown...\n");
+                       break;
+               }
+       }
+
+#if CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN
+       graceful_shutdown(sigfd);
+#endif /* CONFIG_LIBUKBOOT_GRACEFUL_SHUTDOWN */
+
+err_close_signalfd:
+       /* If the application is still running, set the error code to SIGKILL
+        * to signify it was (actually, will be) force-killed. Return back to
+        * Unikraft to terminate all remaining processes and shut down the
+        * system.
+        */
+       if (!application_returned) {
+               uk_pr_debug("The application did not exit gracefully\n");
+               application_status = TERM_BY_SIGNAL_BIT | SIGKILL;
+       }
+
+       close(sigfd);
+
+       return application_status;
+}
diff --git a/lib/ukboot/init.h b/lib/ukboot/init.h
new file mode 100644 (file)
index 0000000..0379b3c
--- /dev/null
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Copyright (c) 2024, Unikraft GmbH and The Unikraft Authors.
+ * Licensed under the BSD-3-Clause License (the "License").
+ * You may not use this file except in compliance with the License.
+ */
+
+#ifndef __UK_BOOT_INIT_PRIV_H__
+#define __UK_BOOT_INIT_PRIV_H__
+
+/**
+ * INTERNAL. main() wrapper
+ *
+ * Calls application's constructors follwed up by main().
+ *
+ * @param argc Arg count
+ * @param argc Arg vector
+ * @return The return value of main()
+ */
+int do_main(int argc, char *argv[]);
+
+#if CONFIG_LIBUKBOOT_INIT
+/**
+ * INTERNAL. /sbin/init logic
+ *
+ * Spawns a new process that executes do_main(), fosters orphans,
+ * reaps children, and triggers (graceful) shutdown in response of
+ * SIGTERM.
+ *
+ * @param argc Arg count
+ * @param argc Arg vector
+ * @return The return value of main(), or SIGKILL | 0xff if the
+ *         application did not exit gracefully.
+ */
+int do_init(int argc, char *argv[]);
+#endif /* CONFIG_LIBUKBOOT_INIT */
+
+#endif /* __UK_BOOT_INIT_PRIV_H__ */