From: Sergiu Moga Date: Thu, 20 Mar 2025 17:54:14 +0000 (+0200) Subject: lib/posix-process: Add `signalfd`/`signalfd4` support X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=8e4c7d9c5ef6d0b63983cfbb48c5360b1aac2700;p=unikraft%2Funikraft.git lib/posix-process: Add `signalfd`/`signalfd4` support Add support for the `signalfd`/`signalfd4` syscalls which will allow callers to add monitored signal files to the current process, allowing them to be notified of pending signals. Introduce the primitives required for supporting signalfd: signal files. These embed the actual base file structure, the mask that shows what signals are being monitored as well as other file data to properly register with libukfile. Each process' signal descriptor holds control over the signal files context which tracks the combined mask of all of the open signal files as well as a list of their references. Having a combined mask helps to make checking to see whether a process has signal files monitoring for a given signal faster as compared to iterating every time through each signal file and checking their masks. Thus, adding a new signal file entry means updating the combined mask. Deleting one implies resetting the signal mask and reiterating through all remaining signal files for a proper combined mask. However, as an optimization, we don't do that. Instead, we defer mask recalculation until we need to reiterate through the signal files again, which is when we have to notify the registered signal files against a signal number. Signed-off-by: Sergiu Moga Approved-by: Michalis Pappas Reviewed-by: Michalis Pappas Reviewed-by: Andrei Tatar GitHub-Closes: #1619 --- diff --git a/lib/posix-process/Config.uk b/lib/posix-process/Config.uk index 1338177f8..511762211 100644 --- a/lib/posix-process/Config.uk +++ b/lib/posix-process/Config.uk @@ -30,6 +30,11 @@ config LIBPOSIX_PROCESS_CLONE bool "clone() system call" select LIBPOSIX_PROCESS_PIDS +config LIBPOSIX_PROCESS_SIGNALFD + bool "signalfd4() and signalfd() syscalls" + depends on LIBPOSIX_PROCESS_SIGNAL + select LIBUKFILE_POLLED + config LIBPOSIX_PROCESS_CLONE_PREFER_CHILD bool "Prefer scheduling of child" depends on LIBPOSIX_PROCESS_CLONE diff --git a/lib/posix-process/Makefile.uk b/lib/posix-process/Makefile.uk index 9e61777e7..0b7d0ca8d 100644 --- a/lib/posix-process/Makefile.uk +++ b/lib/posix-process/Makefile.uk @@ -24,6 +24,7 @@ LIBPOSIX_PROCESS_SRCS-$(CONFIG_LIBPOSIX_PROCESS_SIGNAL) += $(LIBPOSIX_PROCESS_BA LIBPOSIX_PROCESS_SRCS-$(CONFIG_LIBPOSIX_PROCESS_SIGNAL) += $(LIBPOSIX_PROCESS_BASE)/arch/$(CONFIG_UK_ARCH)/ucontext.c LIBPOSIX_PROCESS_SRCS-$(CONFIG_LIBPOSIX_PROCESS_SIGNAL) += $(LIBPOSIX_PROCESS_BASE)/arch/$(CONFIG_UK_ARCH)/signal.S|arch LIBPOSIX_PROCESS_SRCS-$(CONFIG_LIBPOSIX_PROCESS_SIGNAL) += $(LIBPOSIX_PROCESS_BASE)/signal/deliver.c +LIBPOSIX_PROCESS_SRCS-$(CONFIG_LIBPOSIX_PROCESS_SIGNALFD) += $(LIBPOSIX_PROCESS_BASE)/signal/signal_file.c LIBPOSIX_PROCESS_SRCS-y += $(LIBPOSIX_PROCESS_BASE)/signal/alarm.c LIBPOSIX_PROCESS_SRCS-y += $(LIBPOSIX_PROCESS_BASE)/signal/pause.c LIBPOSIX_PROCESS_SRCS-y += $(LIBPOSIX_PROCESS_BASE)/signal/sigaltstack.c @@ -62,6 +63,8 @@ UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += rt_sigaction-4 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += rt_sigqueueinfo-3 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += rt_tgsigqueueinfo-4 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += rt_sigtimedwait-4 +UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS_SIGNALFD) += signalfd4-4 +UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS_SIGNALFD) += signalfd-3 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += kill-2 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += tgkill-3 UK_PROVIDED_SYSCALLS-$(CONFIG_LIBPOSIX_PROCESS) += tkill-2 diff --git a/lib/posix-process/exportsyms.uk b/lib/posix-process/exportsyms.uk index 56ec39f69..f24e8b910 100644 --- a/lib/posix-process/exportsyms.uk +++ b/lib/posix-process/exportsyms.uk @@ -53,3 +53,4 @@ sigfillset sigismember sigprocmask tgkill +signalfd diff --git a/lib/posix-process/signal/signal.h b/lib/posix-process/signal/signal.h index c8dbb03bf..b69888450 100644 --- a/lib/posix-process/signal/signal.h +++ b/lib/posix-process/signal/signal.h @@ -20,6 +20,10 @@ #include "process.h" #include "sigset.h" +#if CONFIG_LIBPOSIX_PROCESS_SIGNALFD +#include "signal_file.h" +#endif /* CONFIG_LIBPOSIX_PROCESS_SIGNALFD */ + #define SIG_ARRAY_COUNT _NSIG /* SIGRTMAX + 1 */ /* Check if signal number is valid */ @@ -162,6 +166,9 @@ struct uk_signal_pdesc { * share the same set of handlers. */ struct uk_sigaction *sigaction; +#if CONFIG_LIBPOSIX_PROCESS_SIGNALFD + struct uk_signal_files_ctx sigfiles_ctx; +#endif /* CONFIG_LIBPOSIX_PROCESS_SIGNALFD */ }; /* Signal descriptor of a posix_thread. @@ -295,6 +302,24 @@ void pprocess_signal_arch_get_ucontext(ucontext_t *ucontext, */ bool pprocess_signal_is_deliverable(struct posix_thread *pthread, int signum); +#if CONFIG_LIBPOSIX_PROCESS_SIGNALFD +/* Add a signal file to a process in order to track it */ +static inline +void pprocess_signal_file_add(struct posix_process *pproc, + struct uk_signal_file *sigf) +{ + uk_signal_files_ctx_add(&pproc->signal->sigfiles_ctx, sigf); +} + +/* Remove a signal file from a process */ +static inline +void pprocess_signal_file_del(struct posix_process *pproc, + struct uk_signal_file *sigf) +{ + uk_signal_files_ctx_del(&pproc->signal->sigfiles_ctx, sigf); +} +#endif /* CONFIG_LIBPOSIX_PROCESS_SIGNALFD */ + #endif /* CONFIG_LIBPOSIX_PROCESS_PIDS */ #endif /* __UK_PROCESS_SIGNAL_H__ */ diff --git a/lib/posix-process/signal/signal_file.c b/lib/posix-process/signal/signal_file.c new file mode 100644 index 000000000..f05035b24 --- /dev/null +++ b/lib/posix-process/signal/signal_file.c @@ -0,0 +1,358 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright (c) 2025, Unikraft GmbH and The Unikraft Authors. + * Licensed under the BSD-3-Clause License (the "License"). + * You may not use this file except in compliance with the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#if CONFIG_LIBPOSIX_FDTAB +#include +#include +#include +#endif /* CONFIG_LIBPOSIX_FDTAB */ + +#include + +#include "signal.h" + +/* Volume ID string used to identify signal files */ +static const char SIGNAL_VOLID[] = "signal_vol"; +/* Macro to check if a file is a signal file by comparing volume ID */ +#define FILE_IS_SIGNAL(f) \ + ((f)->vol == SIGNAL_VOLID) + +static inline +struct uk_signal_file *uk_file_to_signal_file(const struct uk_file *f) +{ + UK_ASSERT(f); + UK_ASSERT(f->node); + UK_ASSERT(FILE_IS_SIGNAL(f)); + return (struct uk_signal_file *)f->node; +} + +static uk_pollevent signal_file_poll(const struct uk_file *f, uk_pollevent mask) +{ + const struct uk_signal_file *sigf; + struct posix_thread *pthread; + struct posix_process *pproc; + __sz sn; + + if (unlikely(!(mask & UKFD_POLLIN))) + return 0; + + sigf = uk_file_to_signal_file(f); + pthread = uk_pthread_current(); + pproc = uk_pprocess_current(); + + pprocess_signal_foreach(sn) + if (uk_sigismember(&sigf->mask, sn) && + (IS_PENDING(pthread->signal->sigqueue, sn) || + IS_PENDING(pproc->signal->sigqueue, sn))) + return UKFD_POLLIN; + + return 0; +} + +/* Convert internal siginfo_t to signalfd_siginfo structure */ +static void si_to_ssi(const siginfo_t *si, struct signalfd_siginfo *ssi) +{ + /* Zero out the structure first */ + memset(ssi, 0, sizeof(*ssi)); + + /* Basic signal information */ + ssi->ssi_signo = si->si_signo; + ssi->ssi_errno = si->si_errno; + ssi->ssi_code = si->si_code; + + /* Sender identification */ + ssi->ssi_pid = si->si_pid; + ssi->ssi_uid = si->si_uid; + + /* Signal-specific fields */ + switch (si->si_signo) { + case SIGCHLD: + ssi->ssi_status = si->si_status; + ssi->ssi_utime = si->si_utime; + ssi->ssi_stime = si->si_stime; + break; + case SIGILL: + case SIGFPE: + case SIGSEGV: + case SIGBUS: + ssi->ssi_addr = (uint64_t)si->si_addr; + ssi->ssi_addr_lsb = si->si_addr_lsb; + break; + case SIGIO: + ssi->ssi_band = si->si_band; + ssi->ssi_fd = si->si_fd; + break; + default: + /* For real-time signals which may contain sigqueue data */ + if (si->si_signo >= SIGRTMIN && si->si_signo <= SIGRTMAX) { + ssi->ssi_int = si->si_value.sival_int; + ssi->ssi_ptr = (uint64_t)si->si_value.sival_ptr; + return; + } + + break; + } + + /* Handle signals sent with sigqueue regardless of signal number */ + if (si->si_code == SI_QUEUE || si->si_code == SI_USER) { + ssi->ssi_int = si->si_value.sival_int; + ssi->ssi_ptr = (uint64_t)si->si_value.sival_ptr; + } +} + +static ssize_t signal_file_read(const struct uk_file *f, + const struct iovec *iov, size_t iovcnt, + size_t off, long flags __unused) +{ + size_t iovlen_total, iovlen_wr, iovi, cur; + struct posix_thread *pthread; + struct posix_process *pproc; + struct uk_signal_file *sigf; + struct signalfd_siginfo ssi; + struct uk_signal *sig; + __sz sn; + + if (unlikely(off)) + return -EINVAL; + + if (unlikely(!iovcnt)) + return -EINVAL; + + if (unlikely(!iov[0].iov_base)) + return -EFAULT; + + iovlen_total = uk_iov_remaining(iov, iovcnt, 0, 0); + if (unlikely(iovlen_total < sizeof(ssi))) + return -EINVAL; + + sigf = uk_file_to_signal_file(f); + + pthread = uk_pthread_current(); + pproc = uk_pprocess_current(); + + iovlen_wr = 0; + iovi = 0; + cur = 0; + pprocess_signal_foreach(sn) { + if (!uk_sigismember(&sigf->mask, sn)) + continue; + + /* + * Proceed like Linux and always prioritize dequeueing + * signal from the specific target thread and only fallback + * to dequeueing from the containing process if it also happens + * to have this signal queued. + */ + if (IS_PENDING(pthread->signal->sigqueue, sn)) + sig = pprocess_signal_dequeue(pproc, pthread, sn); + else if (IS_PENDING(pproc->signal->sigqueue, sn)) + sig = pprocess_signal_dequeue(pproc, NULL, sn); + else + continue; + + if (!sig) + continue; + + si_to_ssi(&sig->siginfo, &ssi); + uk_signal_free(pthread->_a, sig); + iovlen_wr += uk_iov_scatter(iov, iovcnt, + (const char *)&ssi, sizeof(ssi), + &iovi, &cur); + + /* Have we reached the end? */ + if (iovlen_total - iovlen_wr < sizeof(ssi)) + break; + } + + /* No pending signals were queued - try again */ + if (!iovlen_wr) + return -EAGAIN; + + return iovlen_wr; +} + +static const struct uk_file_ops signal_file_ops = { + .read = signal_file_read, + .write = uk_file_nop_write, + .getstat = uk_file_nop_getstat, + .setstat = uk_file_nop_setstat, + .ctl = uk_file_nop_ctl +}; + +static void signal_file_release(const struct uk_file *f, int what) +{ + struct posix_process *pproc; + struct uk_signal_file *sigf; + + pproc = uk_pprocess_current(); + + if (what & UK_FILE_RELEASE_OBJ) { + /* Free */ + sigf = uk_file_to_signal_file(f); + pprocess_signal_file_del(pproc, sigf); + uk_free(sigf->a, sigf); + } +} + +const struct uk_file *signal_file_create(struct uk_alloc *a, + const uk_sigset_t *mask) +{ + struct uk_signal_file *sigf; + struct posix_process *pproc; + + sigf = uk_malloc(a, sizeof(*sigf)); + if (unlikely(!sigf)) + return ERR2PTR(-ENOMEM); + + sigf->a = a; + sigf->fstate = UK_FILE_POLLED_STATE_INIT_VALUE(sigf->fstate, + &signal_file_poll); + sigf->frefcnt = UK_FILE_REFCNT_INIT_VALUE(sigf->frefcnt); + + sigf->f = (struct uk_file){ + .vol = SIGNAL_VOLID, + .node = sigf, + .refcnt = &sigf->frefcnt, + .state = &sigf->fstate, + .ops = &signal_file_ops, + ._release = signal_file_release, + }; + + /* + * It is not possible to receive SIGKILL or SIGSTOP signals via a + * signal file descriptor; these signals are silently ignored if + * specified in mask. + */ + uk_sigcopyset(&sigf->mask, mask); + uk_sigdelset(&sigf->mask, SIGKILL); + uk_sigdelset(&sigf->mask, SIGSTOP); + + pproc = uk_pprocess_current(); + pprocess_signal_file_add(pproc, sigf); + + return &sigf->f; +} + +#if CONFIG_LIBPOSIX_FDTAB +static int signalfd_create(const uk_sigset_t *mask, int flags) +{ + unsigned int mode = O_RDONLY | UKFD_O_NOSEEK; + const struct uk_file *sigf; + int fd; + + sigf = signal_file_create(uk_alloc_get_default(), mask); + if (unlikely(PTRISERR(sigf))) + return PTR2ERR(sigf); + + /* Register fd */ + if (flags & SFD_NONBLOCK) + mode |= O_NONBLOCK; + if (flags & SFD_CLOEXEC) + mode |= O_CLOEXEC; + + fd = uk_fdtab_open(sigf, mode); + uk_file_release(sigf); + + return fd; +} + +static int signalfd_set_mask(int fd, const uk_sigset_t *new_mask) +{ + struct uk_signal_file *sigf; + struct posix_process *pproc; + struct uk_ofile *of; + + pproc = uk_pprocess_current(); + UK_ASSERT(pproc); + + of = uk_fdtab_get(fd); + if (unlikely(!of)) + return -EBADF; + + sigf = uk_file_to_signal_file(of->file); + + /* + * It is not possible to receive SIGKILL or SIGSTOP signals via a + * signal file descriptor; these signals are silently ignored if + * specified in mask. + */ + uk_sigcopyset(&sigf->mask, new_mask); + uk_sigdelset(&sigf->mask, SIGKILL); + uk_sigdelset(&sigf->mask, SIGSTOP); + + /* Make sure we don't miss any newly added signals */ + uk_sigorset(&pproc->signal->sigfiles_ctx.allmask, &sigf->mask); + + uk_ofile_release(of); + + return 0; +} + +static inline int uk_sys_signalfd(int fd, const uk_sigset_t *mask, + size_t masksz, int flags) +{ + if (unlikely(masksz != sizeof(uk_sigset_t))) + return -EINVAL; + + if (unlikely(!mask)) + return -EFAULT; + + if (fd == -1) + return signalfd_create(mask, flags); + + return signalfd_set_mask(fd, mask); +} + +UK_LLSYSCALL_R_DEFINE(int, signalfd4, + int, fd, + const uk_sigset_t *, mask, + size_t, masksz, + int, flags) +{ + return uk_sys_signalfd(fd, mask, masksz, flags); +} + +#if UK_LIBC_SYSCALLS +int signalfd(int fd, const sigset_t *mask, int flags) +{ + /* + * Libc implementations define a larger, extensible representation + * (usually 128 bytes) of `sigset_t` to comply with POSIX standards + * and accommodate potential future expansions. However, + * kernels like Linux typically have a smaller defined `sigset_t` of + * a fixed size of 8 bytes as that is what it uses at the time of + * writing this. When a libc makes syscalls that use signal masks it + * truncates or extracts only the relevant portion of its internal + * representation to pass to the kernel. This helps ensure compatibility + * between userspace applications and the kernel. + * + * Thus, mimic a typical libc here and pass a hardcoded value: that + * of the number of signals supported divided by the number of bits in + * a byte. + */ + return (int)uk_syscall_e_signalfd4((long)fd, (long)mask, + NSIG / 8, (long)flags); +} +#endif /* UK_LIBC_SYSCALLS */ + +UK_LLSYSCALL_R_DEFINE(int, signalfd, + int, fd, + const uk_sigset_t *, mask, + size_t, masksz) +{ + return uk_sys_signalfd(fd, mask, masksz, 0); +} +#endif /* CONFIG_LIBPOSIX_FDTAB */ diff --git a/lib/posix-process/signal/signal_file.h b/lib/posix-process/signal/signal_file.h new file mode 100644 index 000000000..072556086 --- /dev/null +++ b/lib/posix-process/signal/signal_file.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright (c) 2025, Unikraft GmbH and The Unikraft Authors. + * Licensed under the BSD-3-Clause License (the "License"). + * You may not use this file except in compliance with the License. + */ + +#ifndef __UK_PROCESS_SIGNAL_H__ +#error Do not include this header directly +#endif + +#include +#include +#include + +/* Signal file structure combining file operations with signal handling */ +struct uk_signal_file { + /* Allocator used to create this signal file */ + struct uk_alloc *a; + /* Signal file refcount */ + uk_file_refcnt frefcnt; + /* Signal file state */ + struct uk_file_state fstate; + /* Base file structure */ + struct uk_file f; + /* Signal mask for this file */ + uk_sigset_t mask; + /* List head for tracking in signal context */ + struct uk_list_head sigf_head; +}; + +/* Context for managing multiple signal files */ +struct uk_signal_files_ctx { + /* Combined signal mask of all tracked files */ + uk_sigset_t allmask; + /* List head for tracking signal files */ + struct uk_list_head sigfiles; +}; + +/* + * Initialize signal files context + * + * param ctx + * Context structure to initialize + */ +static inline +void uk_signal_files_ctx_init(struct uk_signal_files_ctx *ctx) +{ + UK_INIT_LIST_HEAD(&ctx->sigfiles); + uk_sigemptyset(&ctx->allmask); +} + +/* + * Add signal file to context and update combined mask + * + * param ctx + * Context to modify + * param sigf + * Signal file to add + */ +static inline +void uk_signal_files_ctx_add(struct uk_signal_files_ctx *ctx, + struct uk_signal_file *sigf) +{ + uk_sigorset(&ctx->allmask, &sigf->mask); + uk_list_add_tail(&sigf->sigf_head, &ctx->sigfiles); +} + +/* + * Remove signal file from context. Mask is not recalculated but instead the + * recalculation is deferred to the next time the signal files list is + * iterated upon. + * + * param ctx + * Context to modify + * param sigf + * Signal file to remove + */ +static inline +void uk_signal_files_ctx_del(struct uk_signal_files_ctx *ctx __unused, + struct uk_signal_file *sigf) +{ + /* + * NOTE: We do not recalculate the mask here. Instead, as an + * optimization, we defer this operation until we need to iterate + * through the registered signal files which, at this moment, + * is when we want to notify them against a given signal number. + */ + uk_list_del_init(&sigf->sigf_head); +} + +/* + * Check if a signal is present in the combined mask + * + * param ctx + * Context to check + * param signum + * Signal number to check + * + * @return + * True if signal is in mask, false otherwise + */ +static inline +bool uk_signal_files_ctx_is_set(struct uk_signal_files_ctx *ctx, int signum) +{ + return (bool)uk_sigismember(&ctx->allmask, signum); +} + +/* + * Notify all signal files that monitor a given signal number. At the same time, + * as an optimization, also recalculate the mask since we are iterating over + * all signal files anyway. + * + * param ctx + * Context whose signal files to notify + * param signum + * Signal number to check against for monitoring signal files + */ +static inline +void uk_signal_files_ctx_notify(struct uk_signal_files_ctx *ctx, + int signum) +{ + struct uk_signal_file *it; + uk_sigset_t allmask; + + uk_sigemptyset(&allmask); + + uk_list_for_each_entry(it, &ctx->sigfiles, sigf_head) { + uk_sigorset(&allmask, &it->mask); + + if (!uk_sigismember(&it->mask, signum)) + continue; + + uk_file_event_set(&it->f, UKFD_POLLIN); + } + + uk_sigcopyset(&ctx->allmask, &allmask); +} + +/* + * Create a signal file with a given mask + * + * param a + * Allocator to use for signal file creation + * param mask + * Signal mask to be monitored by the signal file + * + * @return + * Pointer to the created signal file or ERR2PTR on creation failure + */ +const struct uk_file *signal_file_create(struct uk_alloc *a, + const uk_sigset_t *mask);