From: Andrei Tatar Date: Fri, 24 Nov 2023 15:44:20 +0000 (+0100) Subject: lib/posix-timerfd: Introduce posix-timerfd library X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=017ed08186872887a3ce587b92002f69711ef799;p=unikraft%2Funikraft.git lib/posix-timerfd: Introduce posix-timerfd library This change introduces posix-timerfd, a library providing timerfd files. Initial implementation consists of Unikraft-internal API. Signed-off-by: Andrei Tatar --- diff --git a/lib/Makefile.uk b/lib/Makefile.uk index 319a572a2..8dae7ed20 100644 --- a/lib/Makefile.uk +++ b/lib/Makefile.uk @@ -17,6 +17,7 @@ $(eval $(call import_lib,$(CONFIG_UK_BASE)/lib/posix-process)) $(eval $(call import_lib,$(CONFIG_UK_BASE)/lib/posix-socket)) $(eval $(call import_lib,$(CONFIG_UK_BASE)/lib/posix-sysinfo)) $(eval $(call import_lib,$(CONFIG_UK_BASE)/lib/posix-futex)) +$(eval $(call import_lib,$(CONFIG_UK_BASE)/lib/posix-timerfd)) $(eval $(call import_lib,$(CONFIG_UK_BASE)/lib/posix-user)) $(eval $(call import_lib,$(CONFIG_UK_BASE)/lib/ramfs)) $(eval $(call import_lib,$(CONFIG_UK_BASE)/lib/syscall_shim)) diff --git a/lib/posix-timerfd/Config.uk b/lib/posix-timerfd/Config.uk new file mode 100644 index 000000000..eaa74385f --- /dev/null +++ b/lib/posix-timerfd/Config.uk @@ -0,0 +1,7 @@ +config LIBPOSIX_TIMERFD + bool "posix-timerfd: Support for timerfd files" + select LIBPOSIX_FDIO + select LIBUKLOCK + select LIBUKLOCK_MUTEX + select LIBUKTIMECONV + select LIBUKSCHED diff --git a/lib/posix-timerfd/Makefile.uk b/lib/posix-timerfd/Makefile.uk new file mode 100644 index 000000000..ea68dc070 --- /dev/null +++ b/lib/posix-timerfd/Makefile.uk @@ -0,0 +1,6 @@ +$(eval $(call addlib_s,libposix_timerfd,$(CONFIG_LIBPOSIX_TIMERFD))) + +CINCLUDES-$(CONFIG_LIBPOSIX_TIMERFD) += -I$(LIBPOSIX_TIMERFD_BASE)/include +CXXINCLUDES-$(CONFIG_LIBPOSIX_TIMERFD) += -I$(LIBPOSIX_TIMERFD_BASE)/include + +LIBPOSIX_TIMERFD_SRCS-y += $(LIBPOSIX_TIMERFD_BASE)/timerfd.c diff --git a/lib/posix-timerfd/include/uk/posix-timerfd.h b/lib/posix-timerfd/include/uk/posix-timerfd.h new file mode 100644 index 000000000..9f9ff657a --- /dev/null +++ b/lib/posix-timerfd/include/uk/posix-timerfd.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors. + * Licensed under the BSD-3-Clause License (the "License"). + * You may not use this file except in compliance with the License. + */ + +#ifndef __UK_POSIX_TIMERFD_H__ +#define __UK_POSIX_TIMERFD_H__ + +#include + +/* File creation */ + +struct uk_file *uk_timerfile_create(clockid_t id); + +/* Internal Syscalls */ + +int uk_sys_timerfd_create(clockid_t id, int flags); + +int uk_sys_timerfd_settime(const struct uk_file *f, int flags, + const struct itimerspec *new_value, + struct itimerspec *old_value); + +int uk_sys_timerfd_gettime(const struct uk_file *f, + struct itimerspec *curr_value); + + +#endif /* __UK_POSIX_TIMERFD_H__ */ diff --git a/lib/posix-timerfd/timerfd.c b/lib/posix-timerfd/timerfd.c new file mode 100644 index 000000000..ec690a0cb --- /dev/null +++ b/lib/posix-timerfd/timerfd.c @@ -0,0 +1,332 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright (c) 2023, Unikraft GmbH and The Unikraft Authors. + * Licensed under the BSD-3-Clause License (the "License"). + * You may not use this file except in compliance with the License. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +static const char TIMERFD_VOLID[] = "timerfd_vol"; + + +struct timerfd_node { + struct itimerspec set; + __u64 val; + clockid_t clkid; + struct uk_thread *upthread; +}; + +struct timerfd_alloc { + struct uk_alloc *alloc; + struct uk_file f; + uk_file_refcnt frefcnt; + struct uk_file_state fstate; + struct timerfd_node node; +}; + +struct timerfd_status { + __u64 exp; + __nsec next; +}; + +/* Internal */ + +static inline +struct timerfd_status _timerfd_valnext(const struct itimerspec *set, + const struct timespec *now) +{ + struct timerfd_status ret; + __snsec passed = uk_time_spec_nsecdiff(&set->it_value, now); + + if (passed >= 0) { + /* Now is after set */ + __nsec period = uk_time_spec_to_nsec(&set->it_interval); + + ret.exp = 1; + if (period) { + ret.exp += passed/period; + ret.next = ret.exp * period - passed; + } else { + ret.next = 0; + } + } else { + /* Now is before set */ + ret.exp = 0; + ret.next = -passed; + } + return ret; +} + +static __nsec _timerfd_update(const struct uk_file *f) +{ + __nsec deadline; + struct timespec t; + __nsec now; + struct timerfd_status st; + struct timerfd_node *d = (struct timerfd_node *)f->node; + struct itimerspec set = d->set; + + /* Clear & sleep if timer disarmed */ + if (!set.it_value.tv_sec && !set.it_value.tv_nsec) { + if (d->val) { + uk_file_event_clear(f, UKFD_POLLIN); + d->val = 0; + } + return 0; + } + + /* Get time & decide value & deadline */ + uk_syscall_r_clock_gettime(d->clkid, (uintptr_t)&t); + now = ukplat_monotonic_clock(); + st = _timerfd_valnext(&set, &t); + deadline = now + st.next; + + /* Update val & events */ + if (st.exp != d->val) { + d->val = st.exp; + if (st.exp) + uk_file_event_set(f, UKFD_POLLIN); + else + uk_file_event_clear(f, UKFD_POLLIN); + } + return deadline; +} + +static void _timerfd_set(struct timerfd_node *d, const struct itimerspec *set) +{ + if (!set->it_value.tv_sec && !set->it_value.tv_nsec) { + /* Disarm */ + if (d->set.it_value.tv_sec || d->set.it_value.tv_nsec) { + d->set.it_value = set->it_value; + uk_thread_wake(d->upthread); + } + } else { + /* Arm */ + d->set.it_value = set->it_value; + d->set.it_interval = set->it_interval; + uk_thread_wake(d->upthread); + } +} + +/* Ops */ + +static ssize_t timerfd_read(const struct uk_file *f, + const struct iovec *iov, int iovcnt, + off_t off, long flags __unused) +{ + struct timerfd_node *d; + __u64 v; + + if (unlikely(f->vol != TIMERFD_VOLID)) + return -EINVAL; + if (unlikely(off != 0)) + return -EINVAL; + if (unlikely(!iovcnt || iov[0].iov_len < sizeof(__u64))) + return -EINVAL; + if (unlikely(!iov[0].iov_base)) + return -EFAULT; + + d = (struct timerfd_node *)f->node; + uk_file_event_clear(f, UKFD_POLLIN); + v = ukarch_exchange_n(&d->val, 0); + if (!v) + return -EAGAIN; + *(__u64 *)(iov[0].iov_base) = v; + return sizeof(v); +} + +static __noreturn void timerfd_updatefn(void *arg) +{ + __nsec deadline; + const struct uk_file *f = (struct uk_file *)arg; + + UK_ASSERT(f->vol == TIMERFD_VOLID); + for (;;) { + uk_file_wlock(f); + deadline = _timerfd_update(f); + /* Unlock & wait */ + uk_thread_block_until(uk_thread_current(), deadline); + uk_file_wunlock(f); + + uk_sched_yield(); + } +} + +static void timerfd_release(const struct uk_file *f, int what) +{ + UK_ASSERT(f->vol == TIMERFD_VOLID); + if (what & UK_FILE_RELEASE_RES) { + struct timerfd_node *d = (struct timerfd_node *)f->node; + + /* Disarm */ + uk_file_rlock(f); + uk_thread_terminate(d->upthread); + uk_file_runlock(f); + /* Collect thread */ + uk_thread_release(d->upthread); + } + if (what & UK_FILE_RELEASE_OBJ) { + struct timerfd_alloc *al; + + al = __containerof(f, struct timerfd_alloc, f); + uk_free(al->alloc, al); + } +} + +static const struct uk_file_ops timerfd_ops = { + .read = timerfd_read, + .write = uk_file_nop_write, + .getstat = uk_file_nop_getstat, + .setstat = uk_file_nop_setstat, + .ctl = uk_file_nop_ctl +}; + +/* File creation */ + +struct uk_file *uk_timerfile_create(clockid_t id) +{ + struct uk_alloc *a; + struct timerfd_alloc *al; + struct uk_thread *ut; + + /* Check clock id */ + if (unlikely(uk_syscall_r_clock_getres(id, (uintptr_t)NULL))) + return ERR2PTR(-EINVAL); + + /* Alloc stuff */ + a = uk_alloc_get_default(); + al = uk_malloc(a, sizeof(*al)); + if (unlikely(!al)) + return ERR2PTR(-ENOMEM); + + /* Fill in fields */ + al->alloc = a; + al->node = (struct timerfd_node){ + .set = { + .it_interval = {0, 0}, + .it_value = {0, 0}, + }, + .val = 0, + .clkid = id, + .upthread = NULL + }; + al->fstate = UK_FILE_STATE_INITIALIZER(al->fstate); + al->frefcnt = UK_FILE_REFCNT_INITIALIZER; + al->f = (struct uk_file){ + .vol = TIMERFD_VOLID, + .node = &al->node, + .refcnt = &al->frefcnt, + .state = &al->fstate, + .ops = &timerfd_ops, + ._release = timerfd_release + }; + + /* Create update thread */ + ut = uk_sched_thread_create( + uk_sched_current(), + timerfd_updatefn, &al->f, + "timerfd_update_thread" + ); + if (unlikely(!ut)) { + uk_free(a, al); + return ERR2PTR(-ENODEV); + } + al->node.upthread = ut; + + return &al->f; +} + +/* Internal API */ + +int uk_sys_timerfd_create(clockid_t id, int flags) +{ + int ret; + struct uk_file *timerf; + unsigned int mode = O_RDONLY|UKFD_O_NOSEEK; + + /* Get file */ + timerf = uk_timerfile_create(id); + if (unlikely(PTRISERR(timerf))) + return PTR2ERR(timerf); + + /* Register fd */ + if (flags & TFD_NONBLOCK) + mode |= O_NONBLOCK; + if (flags & TFD_CLOEXEC) + mode |= O_CLOEXEC; + ret = uk_fdtab_open(timerf, mode); + uk_file_release(timerf); + return ret; +} + + +int uk_sys_timerfd_settime(const struct uk_file *f, int flags, + const struct itimerspec *new_value, + struct itimerspec *old_value) +{ + struct timerfd_node *d; + const struct itimerspec *set; + struct itimerspec absset; + const int disarm = !new_value->it_value.tv_sec && + !new_value->it_value.tv_nsec; + + if (unlikely(flags & ~TFD_TIMER_ABSTIME)) + return -EINVAL; + if (unlikely(f->vol != TIMERFD_VOLID)) + return -EINVAL; + + d = f->node; + uk_file_wlock(f); + if (disarm || flags & TFD_TIMER_ABSTIME) { + set = new_value; + } else { + struct timespec t; + + uk_syscall_r_clock_gettime(d->clkid, (uintptr_t)&t); + absset.it_interval = new_value->it_interval; + absset.it_value = uk_time_spec_sum(&new_value->it_value, &t); + set = &absset; + } + if (old_value) + *old_value = d->set; + _timerfd_set(d, set); + (void)_timerfd_update(f); + uk_file_wunlock(f); + return 0; +} + +int uk_sys_timerfd_gettime(const struct uk_file *f, + struct itimerspec *curr_value) +{ + struct timerfd_node *d; + struct timespec t; + struct timerfd_status st; + struct itimerspec set; + + if (unlikely(f->vol != TIMERFD_VOLID)) + return -EINVAL; + + d = f->node; + uk_file_rlock(f); + uk_syscall_r_clock_gettime(d->clkid, (uintptr_t)&t); + set = d->set; + st = _timerfd_valnext(&set, &t); + uk_file_runlock(f); + + curr_value->it_interval = set.it_interval; + curr_value->it_value = uk_time_spec_from_nsec(st.next); + return 0; +}