From ca72f2c19dc28fb2e40801083a41fe1d74a7fb4d Mon Sep 17 00:00:00 2001 From: Marc Rittinghaus Date: Mon, 20 Jun 2022 18:54:05 +0200 Subject: [PATCH] Switch to posix-socket This commit replaces the old socket interface provided by lwip with a driver that plugs into posix-socket. This removes the system call definitions from lwip and separates system calls from the network stack. Signed-off-by: Marc Rittinghaus Reviewed-by: Cezar Craciunoiu Reviewed-by: Razvan Deaconescu Approved-by: Razvan Deaconescu Tested-by: Unikraft CI GitHub-Pull-Request: #17 --- Config.uk | 2 +- sockets.c | 1239 ++++++++++++++++++++--------------------------------- 2 files changed, 455 insertions(+), 786 deletions(-) diff --git a/Config.uk b/Config.uk index be7e74c..d878460 100644 --- a/Config.uk +++ b/Config.uk @@ -283,7 +283,7 @@ endif menuconfig LWIP_SOCKET depends on LWIP_THREADS && (LWIP_UDP || LWIP_TCP) bool "Socket API" - select LIBVFSCORE + select LIBPOSIX_SOCKET default y if LWIP_SOCKET diff --git a/sockets.c b/sockets.c index 8f25e3f..7a26eb6 100644 --- a/sockets.c +++ b/sockets.c @@ -1,9 +1,11 @@ /* SPDX-License-Identifier: BSD-3-Clause */ /* - * Authors: Sharan Santhanam + * Authors: Alexander Jung + * Marc Rittinghaus * - * Copyright (c) 2019, NEC Laboratories Europe GmbH, NEC Corporation. + * Copyright (c) 2020, NEC Laboratories Europe GmbH, NEC Corporation. * All rights reserved. + * Copyright (c) 2021, Karlsruhe Institute of Technology (KIT). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -31,933 +33,600 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/* network stub calls */ #include -#include -#if CONFIG_LWIP_SOCKET_PPOLL -#include -#endif -#include -#include -#include -#include -#include -#include +#include +#include #include -#include -#include -#include -#include -#include +#include +#include -#define SOCK_NET_SET_ERRNO(errcode) \ - (errno = -(errcode)) - -static int sock_net_close(struct vnode *s_vnode, - struct vfscore_file *vfscore_file); -static int sock_net_write(struct vnode *s_vnode, - struct uio *buf, int ioflag __unused); -static int sock_net_read(struct vnode *s_vnode, - struct vfscore_file *vfscore_file __unused, - struct uio *buf, int ioflag __unused); -static int sock_net_ioctl(struct vnode *s_vnode, - struct vfscore_file *vfscore_file __unused, - unsigned long request, - void *buf); - -#define sock_net_getattr ((vnop_getattr_t) vfscore_vop_einval) -#define sock_net_inactive ((vnop_inactive_t) vfscore_vop_nullop) - -static struct vnops sock_net_vnops = { - .vop_close = sock_net_close, - .vop_write = sock_net_write, - .vop_read = sock_net_read, - .vop_ioctl = sock_net_ioctl, - .vop_getattr = sock_net_getattr, - .vop_inactive = sock_net_inactive +#include +#include +#include +#include + +struct lwip_socket_data { + /* fd of the corresponding lwip socket */ + int lwip_fd; + + /* List of registered eventpolls. The list is synchronized with + * lwip SYS_ARCH_PROTECT, as this lock is held anyways during the event + * callback and needed during poll to receive a current event state + * from the lwip socket. + */ + struct uk_list_head evp_list; }; -#define sock_net_vget ((vfsop_vget_t) vfscore_vop_nullop) +static struct lwip_socket_data * +lwip_socket_data_alloc(struct uk_alloc *a) +{ + struct lwip_socket_data *sock_data; -static struct vfsops sock_net_vfsops = { - .vfs_vget = sock_net_vget, - .vfs_vnops = &sock_net_vnops -}; + sock_data = uk_malloc(a, sizeof(struct lwip_socket_data)); + if (unlikely(!sock_data)) + return NULL; + sock_data->lwip_fd = -1; -static uint64_t s_inode = 0; -/* - * Bogus mount point used by all sockets - */ -static struct mount s_mount = { - .m_op = &sock_net_vfsops -}; + UK_INIT_LIST_HEAD(&sock_data->evp_list); -struct sock_net_file { - struct vfscore_file *vfscore_file; - int sock_fd; -}; + return sock_data; +} -static inline struct sock_net_file *sock_net_file_get(int fd) +static void +lwip_socket_data_free(struct uk_alloc *a, struct lwip_socket_data *sock_data) { - struct sock_net_file *file = NULL; - struct vfscore_file *fos; - - fos = vfscore_get_file(fd); - if (!fos) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed with invalid descriptor\n")); - file = ERR2PTR(-EINVAL); - goto EXIT; - } - if (fos->f_dentry->d_vnode->v_type != VSOCK) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("file descriptor is not a socket\n")); - file = ERR2PTR(-EBADF); - goto EXIT; - } - file = fos->f_data; -EXIT: - return file; + UK_ASSERT(sock_data); + + uk_free(a, sock_data); } -static int sock_fd_alloc(int sock_fd) +static void * +lwip_posix_socket_create(struct posix_socket_driver *d, int family, int type, + int protocol) { - int ret = 0; - int vfs_fd; - struct sock_net_file *file = NULL; - struct vfscore_file *vfs_file = NULL; - struct dentry *s_dentry; - struct vnode *s_vnode; - - /* Reserve file descriptor number */ - vfs_fd = vfscore_alloc_fd(); - if (vfs_fd < 0) { - ret = -ENFILE; - LWIP_DEBUGF(SOCKETS_DEBUG, - ("Failed to allocate file descriptor number\n")); - goto ERR_EXIT; - } + struct lwip_socket_data *sock_data; + void *ret = NULL; - /* Allocate file, dentry, and vnode */ - file = uk_calloc(uk_alloc_get_default(), 1, sizeof(*file)); - if (!file) { - ret = -ENOMEM; - LWIP_DEBUGF(SOCKETS_DEBUG, - ("Failed to allocate socket file: Out of memory\n")); - goto ERR_MALLOC_FILE; + sock_data = lwip_socket_data_alloc(d->allocator); + if (unlikely(!sock_data)) { + ret = ERR2PTR(-ENOMEM); + goto EXIT; } - vfs_file = uk_calloc(uk_alloc_get_default(), 1, sizeof(*vfs_file)); - if (!vfs_file) { - ret = -ENOMEM; - LWIP_DEBUGF(SOCKETS_DEBUG, - ("Failed to allocate socket vfs_file: Out of memory\n")); - goto ERR_MALLOC_VFS_FILE; + + sock_data->lwip_fd = lwip_socket(family, type, protocol); + if (unlikely(sock_data->lwip_fd < 0)) { + ret = ERR2PTR(-errno); + goto LWIP_SOCKET_CLEANUP; } - ret = vfscore_vget(&s_mount, s_inode++, &s_vnode); - UK_ASSERT(ret == 0); /* we should not find it in cache */ + ret = sock_data; - if (!s_vnode) { - ret = -ENOMEM; - LWIP_DEBUGF(SOCKETS_DEBUG, - ("Failed to allocate socket vnode: Out of memory\n")); - goto ERR_ALLOC_VNODE; - } +EXIT: + return ret; - uk_mutex_unlock(&s_vnode->v_lock); +LWIP_SOCKET_CLEANUP: + lwip_socket_data_free(d->allocator, sock_data); + return ret; +} - /* - * it doesn't matter that all the dentries have the - * same path since we never lookup for them - */ - s_dentry = dentry_alloc(NULL, s_vnode, "/"); +static void * +lwip_posix_socket_accept4(struct posix_socket_file *file, + struct sockaddr *restrict addr, + socklen_t *restrict addr_len, int flags __unused) +{ + struct lwip_socket_data *sock_data, *new_sock_data; + void *ret = NULL; + + UK_ASSERT(file->sock_data); + + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); - if (!s_dentry) { - ret = -ENOMEM; - LWIP_DEBUGF(SOCKETS_DEBUG, - ("Failed to allocate socket dentry: Out of memory\n")); - goto ERR_ALLOC_DENTRY; + /* We allocate the socket data prior to accepting the connection so + * that we do not have to */ + new_sock_data = lwip_socket_data_alloc(file->driver->allocator); + if (unlikely(!new_sock_data)) { + ret = ERR2PTR(-ENOMEM); + goto EXIT; } - /* Put things together, and fill out necessary fields */ - vfs_file->fd = vfs_fd; - vfs_file->f_flags = UK_FWRITE | UK_FREAD; - vfs_file->f_count = 1; - vfs_file->f_data = file; - vfs_file->f_dentry = s_dentry; - vfs_file->f_vfs_flags = UK_VFSCORE_NOPOS; - - s_vnode->v_data = file; - s_vnode->v_type = VSOCK; - - file->vfscore_file = vfs_file; - file->sock_fd = sock_fd; - LWIP_DEBUGF(SOCKETS_DEBUG, ("Allocated socket %d (%x)\n", - file->vfscore_file->fd, - file->sock_fd)); - - /* Storing the information within the vfs structure */ - ret = vfscore_install_fd(vfs_fd, file->vfscore_file); - if (ret) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("Failed to install socket fd\n")); - goto ERR_VFS_INSTALL; + new_sock_data->lwip_fd = lwip_accept(sock_data->lwip_fd, + addr, addr_len); + if (unlikely(new_sock_data->lwip_fd < 0)) { + ret = ERR2PTR(-errno); + goto LWIP_SOCKET_CLEANUP; } - /* Only the dentry should hold a reference; release ours */ - vrele(s_vnode); - - /* Return file descriptor of our socket */ - return vfs_fd; - -ERR_VFS_INSTALL: - drele(s_dentry); -ERR_ALLOC_DENTRY: - vrele(s_vnode); -ERR_ALLOC_VNODE: - uk_free(uk_alloc_get_default(), vfs_file); -ERR_MALLOC_VFS_FILE: - uk_free(uk_alloc_get_default(), file); -ERR_MALLOC_FILE: - vfscore_put_fd(vfs_fd); -ERR_EXIT: - UK_ASSERT(ret < 0); + /* TODO: set the provided flags */ + + ret = new_sock_data; + +EXIT: return ret; +LWIP_SOCKET_CLEANUP: + lwip_socket_data_free(file->driver->allocator, new_sock_data); + goto EXIT; } -static int sock_net_close(struct vnode *s_vnode, - struct vfscore_file *vfscore_file) +static int +lwip_posix_socket_bind(struct posix_socket_file *file, + const struct sockaddr *addr, + socklen_t addr_len) { + struct lwip_socket_data *sock_data; int ret; - struct sock_net_file *file = NULL; - - file = s_vnode->v_data; - LWIP_DEBUGF(SOCKETS_DEBUG, ("%s fd:%d lwip_fd:%d\n", - __func__, - file->vfscore_file->fd, - file->sock_fd)); - - UK_ASSERT(vfscore_file->f_dentry->d_vnode == s_vnode); - UK_ASSERT(s_vnode->v_refcnt == 1); - /* Close and release the lwip socket */ - ret = lwip_close(file->sock_fd); + UK_ASSERT(file->sock_data); - /* - * Free socket file - * The rest of the resources will be freed by vfs - * - * TODO: vfs ignores close errors right now, so free our file - */ - uk_free(uk_alloc_get_default(), file); + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); - /* - * lwip sets errno and returns -1 in case of error, but - * vfs expects us to return a positive errno - */ - if (ret < 0) - return errno; + ret = lwip_bind(sock_data->lwip_fd, addr, addr_len); + if (unlikely(ret < 0)) + ret = -errno; return ret; } -static int sock_net_write(struct vnode *s_vnode, - struct uio *buf, int ioflag __unused) +static int +lwip_posix_socket_shutdown(struct posix_socket_file *file, int how) { - int ret = 0; - struct sock_net_file *file = NULL; - - file = s_vnode->v_data; - LWIP_DEBUGF(SOCKETS_DEBUG, ("%s fd:%d lwip_fd:%d\n", - __func__, - file->vfscore_file->fd, - file->sock_fd)); - ret = lwip_writev(file->sock_fd, buf->uio_iov, buf->uio_iovcnt); - /* - * lwip sets errno and returns -1 in case of error, but - * vfs expects us to return a positive errno - */ - if (ret < 0) - return errno; + struct lwip_socket_data *sock_data; + int ret; - buf->uio_resid -= ret; - return 0; -} + UK_ASSERT(file->sock_data); -static int sock_net_read(struct vnode *s_vnode, - struct vfscore_file *vfscore_file __unused, - struct uio *buf, int ioflag __unused) -{ - int ret = 0; - struct sock_net_file *file = NULL; - - file = s_vnode->v_data; - LWIP_DEBUGF(SOCKETS_DEBUG, ("%s fd:%d lwip_fd:%d\n", - __func__, - file->vfscore_file->fd, - file->sock_fd)); - ret = lwip_readv(file->sock_fd, buf->uio_iov, buf->uio_iovcnt); - /* - * lwip sets errno and returns -1 in case of error, but - * vfs expects us to return a positive errno - */ - if (ret < 0) - return errno; + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); - buf->uio_resid -= ret; - return 0; -} + ret = lwip_shutdown(sock_data->lwip_fd, how); + if (unlikely(ret < 0)) + ret = -errno; -static int sock_net_ioctl(struct vnode *s_vnode, - struct vfscore_file *vfscore_file __unused, - unsigned long request, - void *buf) -{ - struct sock_net_file *file = NULL; - - file = s_vnode->v_data; - LWIP_DEBUGF(SOCKETS_DEBUG, ("%s fd:%d lwip_fd:%d\n", - __func__, - file->vfscore_file->fd, - file->sock_fd)); - return lwip_ioctl(file->sock_fd, request, buf); + return ret; } -int socket(int domain, int type, int protocol) +static int +lwip_posix_socket_getpeername(struct posix_socket_file *file, + struct sockaddr *restrict addr, + socklen_t *restrict addr_len) { - int ret = 0; - int vfs_fd = 0xff; - int sock_fd = 0; - - /* Create lwip_socket */ - sock_fd = lwip_socket(domain, type, protocol); - if (sock_fd < 0) { - LWIP_DEBUGF(SOCKETS_DEBUG, ("failed to create socket %d\n", - errno)); - ret = -1; - goto EXIT; - } + struct lwip_socket_data *sock_data; + int ret; - /* Allocate the file descriptor */ - vfs_fd = sock_fd_alloc(sock_fd); - if (vfs_fd < 0) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to allocate descriptor %d\n", - errno)); - ret = -1; - /* Setting the errno */ - SOCK_NET_SET_ERRNO(vfs_fd); - goto LWIP_SOCKET_CLEANUP; - } + UK_ASSERT(file->sock_data); + + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); + + ret = lwip_getpeername(sock_data->lwip_fd, addr, addr_len); + if (unlikely(ret < 0)) + ret = -errno; - /* Returning the file descriptor to the user */ - ret = vfs_fd; -EXIT: return ret; -LWIP_SOCKET_CLEANUP: - /* Cleanup the lwip socket */ - lwip_close(sock_fd); - goto EXIT; } -int accept(int s, struct sockaddr *addr, socklen_t *addrlen) +static int +lwip_posix_socket_getsockname(struct posix_socket_file *file, + struct sockaddr *restrict addr, + socklen_t *restrict addr_len) { - int ret = 0; - struct sock_net_file *file; - int sock_fd, vfs_fd; - - file = sock_net_file_get(s); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to accept incoming connection\n")); - ret = -1; - /* Setting the errno */ - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } + struct lwip_socket_data *sock_data; + int ret; - /* Accept an incoming connection */ - sock_fd = lwip_accept(file->sock_fd, addr, addrlen); - if (sock_fd < 0) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to accept incoming connection\n")); - ret = -1; - goto EXIT_FDROP; - } + UK_ASSERT(file->sock_data); - /* Allocate the file descriptor for the accepted connection */ - vfs_fd = sock_fd_alloc(sock_fd); - if (vfs_fd < 0) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to allocate descriptor for accepted connection\n")); - ret = -1; - /* Setting the errno */ - SOCK_NET_SET_ERRNO(vfs_fd); - goto LWIP_SOCKET_CLEANUP; - } - ret = vfs_fd; -EXIT_FDROP: - vfscore_put_file(file->vfscore_file); /* release refcount */ -EXIT: - return ret; + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); -LWIP_SOCKET_CLEANUP: - lwip_close(sock_fd); - goto EXIT_FDROP; -} + ret = lwip_getsockname(sock_data->lwip_fd, addr, addr_len); + if (unlikely(ret < 0)) + ret = -errno; -int bind(int s, const struct sockaddr *name, socklen_t namelen) -{ - int ret = 0; - struct sock_net_file *file = NULL; - - file = sock_net_file_get(s); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - /* Setting the errno */ - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - /* Bind an incoming connection */ - ret = lwip_bind(file->sock_fd, name, namelen); - if (ret < 0) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to bind with socket\n")); - ret = -1; - goto EXIT_FDROP; - } -EXIT_FDROP: - vfscore_put_file(file->vfscore_file); /* release refcount */ -EXIT: return ret; } -int poll(struct pollfd fds[], nfds_t nfds, int timeout) +static int +lwip_posix_socket_getsockopt(struct posix_socket_file *file, int level, + int optname, void *restrict optval, + socklen_t *restrict optlen) { + struct lwip_socket_data *sock_data; int ret; - unsigned int i; - struct sock_net_file *file; - struct pollfd lwip_fds[nfds]; - - for (i = 0; i < nfds; i++) { - if (fds[i].fd < 0) - lwip_fds[i].fd = fds[i].fd; - else { - file = sock_net_file_get(fds[i].fd); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - /* Setting the errno */ - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - lwip_fds[i].fd = file->sock_fd; - lwip_fds[i].events = fds[i].events; - vfscore_put_file(file->vfscore_file); /* release refcount */ - } - } - ret = lwip_poll(lwip_fds, nfds, timeout); - if (ret < 0) - goto EXIT; + UK_ASSERT(file->sock_data); - for (i = 0; i < nfds; i++) { - if (fds[i].fd < 0) - fds[i].revents = 0; - else - fds[i].revents = lwip_fds[i].revents; - } + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); + + ret = lwip_getsockopt(sock_data->lwip_fd, level, optname, + optval, optlen); + if (unlikely(ret < 0)) + ret = -errno; -EXIT: return ret; } -#if CONFIG_LWIP_SOCKET_PPOLL -#if CONFIG_LIBPTHREAD_EMBEDDED -#define __sigmask pthread_sigmask -#else -#define __sigmask sigprocmask -#endif -int ppoll(struct pollfd *fds, nfds_t nfds, const struct timespec *tmo_p, - const sigset_t *sigmask) +static int +lwip_posix_socket_setsockopt(struct posix_socket_file *file, int level, + int optname, const void *optval, socklen_t optlen) { - sigset_t origmask; - int timeout, rc, _rc; + struct lwip_socket_data *sock_data; + int ret; - if (!fds) { - errno = EFAULT; - rc = -1; - goto out; - } + UK_ASSERT(file->sock_data); + + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); + + ret = lwip_setsockopt(sock_data->lwip_fd, level, optname, + optval, optlen); + if (unlikely(ret < 0)) + ret = -errno; - timeout = (tmo_p == NULL) ? -1 : - (tmo_p->tv_sec * 1000 + tmo_p->tv_nsec / 1000000); - rc = __sigmask(SIG_SETMASK, sigmask, &origmask); - if (rc) - goto out; - rc = poll(fds, nfds, timeout); - _rc = __sigmask(SIG_SETMASK, &origmask, NULL); - if (rc == 0 && _rc != 0) - rc = _rc; -out: - return rc; + return ret; } -#endif /* CONFIG_LWIP_SOCKET_PPOLL */ -int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, - struct timeval *timeout) +static int +lwip_posix_socket_connect(struct posix_socket_file *file, + const struct sockaddr *addr, socklen_t addr_len) { - uint64_t nsecs; - fd_set rd, wr, xc; - int i, ret, maxfd; - struct sock_net_file *file; - - if (nfds == 0 && timeout != NULL) { - nsecs = timeout->tv_sec * 1000000000; - nsecs += timeout->tv_usec * 1000; - uk_sched_thread_sleep(nsecs); - return 0; - } + struct lwip_socket_data *sock_data; + int ret; - /* translate the public (vfscore) fds into lwIP socket fds */ - FD_ZERO(&rd); - FD_ZERO(&wr); - FD_ZERO(&xc); - maxfd = 0; - for (i = 0; i < nfds; i++) { - if (readfds && FD_ISSET(i, readfds)) { - file = sock_net_file_get(i); - if (PTRISERR(file)) { -#if CONFIG_LWIP_SOCKET_SELECT_GENERIC_FDS - /* We allow other fd types, but we don't support them */ - if (PTR2ERR(file) == -EBADF) { - FD_CLR(i, readfds); - continue; - } -#endif - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - /* Setting the errno */ - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - if (maxfd < file->sock_fd) - maxfd = file->sock_fd; - FD_SET(file->sock_fd, &rd); - vfscore_put_file(file->vfscore_file); /* release refcount */ - } - if (writefds && FD_ISSET(i, writefds)) { - file = sock_net_file_get(i); - if (PTRISERR(file)) { -#if CONFIG_LWIP_SOCKET_SELECT_GENERIC_FDS - /* We allow other fd types, but we don't support them */ - if (PTR2ERR(file) == -EBADF) { - FD_CLR(i, writefds); - continue; - } -#endif - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - /* Setting the errno */ - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - if (maxfd < file->sock_fd) - maxfd = file->sock_fd; - FD_SET(file->sock_fd, &wr); - vfscore_put_file(file->vfscore_file); /* release refcount */ - } - if (exceptfds && FD_ISSET(i, exceptfds)) { - file = sock_net_file_get(i); - if (PTRISERR(file)) { -#if CONFIG_LWIP_SOCKET_SELECT_GENERIC_FDS - /* We allow other fd types, but we don't support them */ - if (PTR2ERR(file) == -EBADF) { - FD_CLR(i, exceptfds); - continue; - } -#endif - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - /* Setting the errno */ - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - if (maxfd < file->sock_fd) - maxfd = file->sock_fd; - FD_SET(file->sock_fd, &xc); - vfscore_put_file(file->vfscore_file); /* release refcount */ - } - } + UK_ASSERT(file->sock_data); - ret = lwip_select(maxfd+1, &rd, &wr, &xc, timeout); - if (ret < 0) - goto EXIT; + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); - /* translate back from lwIP socket fds to public (vfscore) fds. - * But there's no way to go from lwIP to vfscore, so iterate over - * everything again. Check which ones were set originally, and if - * they aren't also set in lwip_select()'s return, clear them. - */ - for (i = 0; i < nfds; i++) { - if (readfds && FD_ISSET(i, readfds)) { - /* This lookup can't fail, or it would already have - * failed during the translation above. - */ - file = sock_net_file_get(i); - if (!FD_ISSET(file->sock_fd, &rd)) - FD_CLR(i, readfds); - vfscore_put_file(file->vfscore_file); /* release refcount */ - } - if (writefds && FD_ISSET(i, writefds)) { - /* This lookup can't fail, or it would already have - * failed during the translation above. - */ - file = sock_net_file_get(i); - if (!FD_ISSET(file->sock_fd, &wr)) - FD_CLR(i, writefds); - vfscore_put_file(file->vfscore_file); /* release refcount */ - } - if (exceptfds && FD_ISSET(i, exceptfds)) { - /* This lookup can't fail, or it would already have - * failed during the translation above. - */ - file = sock_net_file_get(i); - if (!FD_ISSET(file->sock_fd, &xc)) - FD_CLR(i, exceptfds); - vfscore_put_file(file->vfscore_file); /* release refcount */ - } - } + ret = lwip_connect(sock_data->lwip_fd, addr, addr_len); + if (unlikely(ret < 0)) + ret = -errno; -EXIT: return ret; } -int shutdown(int s, int how) +static int +lwip_posix_socket_listen(struct posix_socket_file *file, int backlog) { - int ret = 0; - struct sock_net_file *file = NULL; - - file = sock_net_file_get(s); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - /* Setting the errno */ - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - /* Shutdown of the descriptor */ - ret = lwip_shutdown(file->sock_fd, how); - vfscore_put_file(file->vfscore_file); /* release refcount */ -EXIT: - return ret; -} + struct lwip_socket_data *sock_data; + int ret; -int getpeername(int s, struct sockaddr *name, socklen_t *namelen) -{ - int ret = 0; - struct sock_net_file *file = NULL; - - file = sock_net_file_get(s); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, ("failed to identify socket\n")); - ret = -1; - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - ret = lwip_getpeername(file->sock_fd, name, namelen); - vfscore_put_file(file->vfscore_file); /* release refcount */ -EXIT: - return ret; -} + UK_ASSERT(file->sock_data); -int getsockname(int s, struct sockaddr *name, socklen_t *namelen) -{ - int ret = 0; - struct sock_net_file *file = NULL; - - file = sock_net_file_get(s); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, ("failed to identify socket\n")); - ret = -1; - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - ret = lwip_getsockname(file->sock_fd, name, namelen); - vfscore_put_file(file->vfscore_file); /* release refcount */ -EXIT: - return ret; -} + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); + + ret = lwip_listen(sock_data->lwip_fd, backlog); + if (unlikely(ret < 0)) + ret = -errno; -int getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen) -{ - int ret = 0; - struct sock_net_file *file = NULL; - - file = sock_net_file_get(s); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - ret = lwip_getsockopt(file->sock_fd, level, optname, optval, optlen); - vfscore_put_file(file->vfscore_file); /* release refcount */ -EXIT: return ret; } -int setsockopt(int s, int level, int optname, const void *optval, - socklen_t optlen) +static ssize_t +lwip_posix_socket_recvfrom(struct posix_socket_file *file, void *restrict buf, + size_t len, int flags, struct sockaddr *from, + socklen_t *restrict fromlen) { - int ret = 0; - struct sock_net_file *file = NULL; - - file = sock_net_file_get(s); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - ret = lwip_setsockopt(file->sock_fd, level, optname, optval, optlen); - vfscore_put_file(file->vfscore_file); /* release refcount */ -EXIT: + struct lwip_socket_data *sock_data; + ssize_t ret; + + UK_ASSERT(file->sock_data); + + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); + + ret = lwip_recvfrom(sock_data->lwip_fd, buf, len, flags, from, fromlen); + if (unlikely(ret < 0)) + ret = -errno; + return ret; } -int connect(int s, const struct sockaddr *name, socklen_t namelen) +static ssize_t +lwip_posix_socket_recvmsg(struct posix_socket_file *file, struct msghdr *msg, + int flags) { - int ret = 0; - struct sock_net_file *file = NULL; - - file = sock_net_file_get(s); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - ret = lwip_connect(file->sock_fd, name, namelen); - vfscore_put_file(file->vfscore_file); /* release refcount */ -EXIT: + struct lwip_socket_data *sock_data; + ssize_t ret; + + UK_ASSERT(file->sock_data); + + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); + + ret = lwip_recvmsg(sock_data->lwip_fd, msg, flags); + if (unlikely(ret < 0)) + ret = -errno; + return ret; } -int listen(int s, int backlog) +static ssize_t +lwip_posix_socket_sendmsg(struct posix_socket_file *file, + const struct msghdr *msg, int flags) { - int ret = 0; - struct sock_net_file *file = NULL; - - file = sock_net_file_get(s); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - ret = lwip_listen(file->sock_fd, backlog); - vfscore_put_file(file->vfscore_file); /* release refcount */ -EXIT: + struct lwip_socket_data *sock_data; + ssize_t ret; + + UK_ASSERT(file->sock_data); + + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); + + ret = lwip_sendmsg(sock_data->lwip_fd, msg, flags); + if (unlikely(ret < 0)) + ret = -errno; + return ret; } -int recv(int s, void *mem, size_t len, int flags) +static ssize_t +lwip_posix_socket_sendto(struct posix_socket_file *file, const void *buf, + size_t len, int flags, + const struct sockaddr *dest_addr, + socklen_t addrlen) { - int ret = 0; - struct sock_net_file *file = NULL; - - file = sock_net_file_get(s); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - ret = lwip_recv(file->sock_fd, mem, len, flags); - vfscore_put_file(file->vfscore_file); /* release refcount */ -EXIT: + struct lwip_socket_data *sock_data; + ssize_t ret; + + UK_ASSERT(file->sock_data); + + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); + + ret = lwip_sendto(sock_data->lwip_fd, buf, len, flags, + dest_addr, addrlen); + if (unlikely(ret < 0)) + ret = -errno; + return ret; } -int recvfrom(int s, void *mem, size_t len, int flags, - struct sockaddr *from, socklen_t *fromlen) +static ssize_t +lwip_posix_socket_read(struct posix_socket_file *file, const struct iovec *iov, + int iovcnt) { - int ret = 0; - struct sock_net_file *file = NULL; - - file = sock_net_file_get(s); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - ret = lwip_recvfrom(file->sock_fd, mem, len, flags, from, fromlen); - vfscore_put_file(file->vfscore_file); /* release refcount */ -EXIT: + struct lwip_socket_data *sock_data; + ssize_t ret; + + UK_ASSERT(file->sock_data); + + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); + + ret = lwip_readv(sock_data->lwip_fd, iov, iovcnt); + if (unlikely(ret < 0)) + ret = -errno; + return ret; } -int recvmsg(int s, struct msghdr *msg, int flags) +static ssize_t +lwip_posix_socket_write(struct posix_socket_file *file, const struct iovec *iov, + int iovcnt) { - int ret = 0; - struct sock_net_file *file = NULL; - - file = sock_net_file_get(s); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - ret = lwip_recvmsg(file->sock_fd, msg, flags); - vfscore_put_file(file->vfscore_file); /* release refcount */ -EXIT: + struct lwip_socket_data *sock_data; + ssize_t ret; + + UK_ASSERT(file->sock_data); + + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); + + ret = lwip_writev(sock_data->lwip_fd, iov, iovcnt); + if (unlikely(ret < 0)) + ret = -errno; + return ret; } -int send(int s, const void *dataptr, size_t size, int flags) +static int +lwip_posix_socket_close(struct posix_socket_file *file) { - int ret = 0; - struct sock_net_file *file = NULL; - - file = sock_net_file_get(s); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - ret = lwip_send(file->sock_fd, dataptr, size, flags); - vfscore_put_file(file->vfscore_file); /* release refcount */ -EXIT: + struct lwip_socket_data *sock_data; + int ret; + + UK_ASSERT(file->sock_data); + + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); + + ret = lwip_close(sock_data->lwip_fd); + if (unlikely(ret < 0)) + ret = -errno; + + lwip_socket_data_free(file->driver->allocator, sock_data); + return ret; } -int sendmsg(int s, const struct msghdr *message, int flags) +static int +lwip_posix_socket_ioctl(struct posix_socket_file *file, int request, void *argp) { - int ret = 0; - struct sock_net_file *file = NULL; - - file = sock_net_file_get(s); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; - } - ret = lwip_sendmsg(file->sock_fd, message, flags); - vfscore_put_file(file->vfscore_file); /* release refcount */ -EXIT: + struct lwip_socket_data *sock_data; + int ret; + + UK_ASSERT(file->sock_data); + + sock_data = (struct lwip_socket_data *)file->sock_data; + UK_ASSERT(sock_data->lwip_fd >= 0); + + ret = lwip_ioctl(sock_data->lwip_fd, request, argp); + if (unlikely(ret < 0)) + ret = -errno; + return ret; } -int sendto(int s, const void *dataptr, size_t size, int flags, - const struct sockaddr *to, socklen_t tolen) +#if LWIP_NETCONN_FULLDUPLEX +#define NETCONN_RECVMBOX_WAITABLE(conn) \ + (sys_mbox_valid(&(conn)->recvmbox) && \ + (((conn)->flags & NETCONN_FLAG_MBOXINVALID) == 0)) +#else /* LWIP_NETCONN_FULLDUPLEX */ +#define NETCONN_RECVMBOX_WAITABLE(conn) \ + sys_mbox_valid(&(conn)->recvmbox) +#endif /* LWIP_NETCONN_FULLDUPLEX */ + +static unsigned int +get_lwip_socket_events(struct lwip_sock *sock) { - int ret = 0; - struct sock_net_file *file = NULL; - - file = sock_net_file_get(s); - if (PTRISERR(file)) { - LWIP_DEBUGF(SOCKETS_DEBUG, - ("failed to identify socket descriptor\n")); - ret = -1; - SOCK_NET_SET_ERRNO(PTR2ERR(file)); - goto EXIT; + unsigned int events = 0; + + UK_ASSERT(sock); + + /* A TCP connection may be in not-connected state. Don't report it as + * readable or writeable. + */ + if ((NETCONNTYPE_GROUP(sock->conn->type) == NETCONN_TCP) && + (sock->conn->state == NETCONN_NONE) && + (!NETCONN_RECVMBOX_WAITABLE(sock->conn))) { + if (sock->errevent != 0) + events |= EPOLLERR; + + return events; } - ret = lwip_sendto(file->sock_fd, dataptr, size, flags, to, tolen); - vfscore_put_file(file->vfscore_file); /* release refcount */ -EXIT: - return ret; + + if (sock->lastdata.pbuf || sock->rcvevent > 0) + events |= EPOLLIN | EPOLLRDNORM; + if (sock->sendevent != 0) + events |= EPOLLOUT | EPOLLWRNORM; + if (sock->errevent != 0) + events |= EPOLLERR; + + return events; } -int socketpair(int domain, int type, int protocol, int socks[2]) +void +lwip_posix_socket_event_callback(struct lwip_sock *sock, + enum netconn_evt evt __unused, + u16_t len __unused) { - int listener; - int reuse = 1; - union { - struct sockaddr_in inaddr; - struct sockaddr addr; - } a; + struct lwip_socket_data *sock_data; + struct eventpoll_cb *ecb; + struct uk_list_head *itr; + unsigned int events; - socklen_t addrlen = sizeof(a.inaddr); + UK_ASSERT(sock); - listener = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (unlikely(!sock->sock_data)) + return; - memset(&a, 0, sizeof(a)); - a.inaddr.sin_family = AF_INET; - a.inaddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - a.inaddr.sin_port = 0; + sock_data = (struct lwip_socket_data *)sock->sock_data; + UK_ASSERT(sock_data->lwip_fd == sock->conn->socket); - if (setsockopt(listener, SOL_SOCKET, SO_REUSEADDR, - (char*) &reuse, (socklen_t) sizeof(reuse)) == -1) - goto error; + events = get_lwip_socket_events(sock); + if (!events) + return; - if (bind(listener, &a.addr, sizeof(a.inaddr)) < 0) - goto error; + uk_list_for_each(itr, &sock_data->evp_list) { + ecb = uk_list_entry(itr, struct eventpoll_cb, cb_link); - if (getsockname(listener, &a.addr, &addrlen) < 0) - goto error; + UK_ASSERT(ecb->unregister); - if (listen(listener, 1) < 0) - goto error; + eventpoll_signal(ecb, events); + } +} - socks[0] = socket(AF_INET, SOCK_STREAM, 0); +static void +lwip_socket_unregister_eventpoll(struct eventpoll_cb *ecb) +{ + SYS_ARCH_DECL_PROTECT(lev); - if (socks[0] < 0) - goto error; + UK_ASSERT(ecb); - if (connect(socks[0], &a.addr, sizeof(a.inaddr)) < 0) - goto error; + SYS_ARCH_PROTECT(lev); + UK_ASSERT(!uk_list_empty(&ecb->cb_link)); + uk_list_del(&ecb->cb_link); - socks[1] = accept(listener, NULL, NULL); + ecb->data = NULL; + ecb->unregister = NULL; + SYS_ARCH_UNPROTECT(lev); +} - if (socks[1] < 0) - goto error; +static int +lwip_posix_socket_poll(struct posix_socket_file *file, unsigned int *revents, + struct eventpoll_cb *ecb) +{ + struct lwip_socket_data *sock_data; + struct lwip_sock *sock; + SYS_ARCH_DECL_PROTECT(lev); + + UK_ASSERT(file->sock_data); + UK_ASSERT(revents); + + sock_data = (struct lwip_socket_data *)file->sock_data; + + SYS_ARCH_PROTECT(lev); + /* This is a bit hacky but lwip does not provide a different public + * interface to get a reference to the socket. Furthermore, this + * function does not increase the reference count which is good + * as we do not hold the reference longer than the lock anyways. Since + * we need to hold the lock for evaluating the socket state this fits + * in well. + */ + sock = lwip_socket_dbg_get_socket(sock_data->lwip_fd); + *revents = get_lwip_socket_events(sock); - close(listener); - return 0; + if (!ecb->unregister) { + UK_ASSERT(uk_list_empty(&ecb->cb_link)); + UK_ASSERT(!ecb->data); + + /* This is the first time we see this cb. Add it to the + * eventpoll list and set the unregister callback so + * we remove it when the eventpoll is freed. + */ + uk_list_add_tail(&ecb->cb_link, &sock_data->evp_list); + + ecb->data = sock_data; + ecb->unregister = lwip_socket_unregister_eventpoll; + + sock->sock_data = sock_data; + } + SYS_ARCH_UNPROTECT(lev); -error: - errno = ENOTSUP; - return -1; + return 0; } -#ifdef LWIP_SOCKET -unsigned int if_nametoindex(const char *ifname) -{ - int ret; +static struct posix_socket_ops lwip_posix_socket_ops = { + /* POSIX interfaces */ + .create = lwip_posix_socket_create, + .accept4 = lwip_posix_socket_accept4, + .bind = lwip_posix_socket_bind, + .shutdown = lwip_posix_socket_shutdown, + .getpeername = lwip_posix_socket_getpeername, + .getsockname = lwip_posix_socket_getsockname, + .getsockopt = lwip_posix_socket_getsockopt, + .setsockopt = lwip_posix_socket_setsockopt, + .connect = lwip_posix_socket_connect, + .listen = lwip_posix_socket_listen, + .recvfrom = lwip_posix_socket_recvfrom, + .recvmsg = lwip_posix_socket_recvmsg, + .sendmsg = lwip_posix_socket_sendmsg, + .sendto = lwip_posix_socket_sendto, + /* vfscore ops */ + .read = lwip_posix_socket_read, + .write = lwip_posix_socket_write, + .close = lwip_posix_socket_close, + .ioctl = lwip_posix_socket_ioctl, + .poll = lwip_posix_socket_poll, +}; - ret = lwip_if_nametoindex(ifname); +POSIX_SOCKET_FAMILY_REGISTER(AF_INET, &lwip_posix_socket_ops); - return ret; +#ifdef CONFIG_LWIP_IPV6 +POSIX_SOCKET_FAMILY_REGISTER(AF_INET6, &lwip_posix_socket_ops, NULL); +#endif /* CONFIG_LWIP_IPV6 */ + +#include +unsigned int if_nametoindex(const char *ifname) +{ + return lwip_if_nametoindex(ifname); } char *if_indextoname(unsigned int ifindex, char *ifname) { - char *ret; - - ret = lwip_if_indextoname(ifindex, ifname); - - return ret; + return lwip_if_indextoname(ifindex, ifname); } -#endif -- 2.39.5