From: Dragos Iulian Argint Date: Sat, 7 May 2022 08:54:09 +0000 (+0300) Subject: Add patches for system call handling X-Git-Tag: RELEASE-0.11.0~34 X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=f24bbab96811e92410b706cb2d6f82428d7dac6d;p=unikraft%2Flibs%2Fmusl.git Add patches for system call handling Musl wrappers use a binary system call, e.g. for x86_64 the `syscall` instruction is used which is a trap. In a single-address-space OS as an Unikraft unikernel this is just overhead. Hence, we changed the way musl deals with syscall handling by using some syscall shim layer macros. Those will replace binary system calls with function calls. Signed-off-by: Dragos Iulian Argint Reviewed-by: Sergiu Moga Reviewed-by: Simon Kuenzer Approved-by: Simon Kuenzer Tested-by: Unikraft CI GitHub-Closes: #5 --- diff --git a/patches/0005-Use-syscall-shim-layer.patch b/patches/0005-Use-syscall-shim-layer.patch deleted file mode 100644 index 7e628e5..0000000 --- a/patches/0005-Use-syscall-shim-layer.patch +++ /dev/null @@ -1,151 +0,0 @@ -From 3a8c96c79ce087a8edb8e3f4c6f02615da5bba4c Mon Sep 17 00:00:00 2001 -From: Gaulthier Gain -Date: Sat, 25 Jan 2020 15:37:55 +0100 -Subject: [PATCH] Use syscall_shim layer instead of musl internal syscalls - -Signed-off-by: Gaulthier Gain ---- - src/internal/syscall.h | 98 +++++++++++------------------------------- - 1 file changed, 25 insertions(+), 73 deletions(-) - -diff --git a/src/internal/syscall.h b/src/internal/syscall.h -index 6d378a8..655e99a 100644 ---- a/src/internal/syscall.h -+++ b/src/internal/syscall.h -@@ -1,8 +1,8 @@ --#ifndef _INTERNAL_SYSCALL_H --#define _INTERNAL_SYSCALL_H -+#ifndef UK_MUSL_SYSCALL -+#define UK_MUSL_SYSCALL - - #include --#include "syscall_arch.h" -+#include - - #ifndef SYSCALL_RLIM_INFINITY - #define SYSCALL_RLIM_INFINITY (~0ULL) -@@ -12,69 +12,40 @@ - #define SYSCALL_MMAP2_UNIT 4096ULL - #endif - --#ifndef __SYSCALL_LL_PRW --#define __SYSCALL_LL_PRW(x) __SYSCALL_LL_O(x) --#endif -- --#ifndef __scc --#define __scc(X) ((long) (X)) --typedef long syscall_arg_t; --#endif -+typedef uk_syscall_arg_t syscall_arg_t; - - __attribute__((visibility("hidden"))) --long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...), -- __syscall_cp(syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t, -- syscall_arg_t, syscall_arg_t, syscall_arg_t); -- --#ifdef SYSCALL_NO_INLINE --#define __syscall0(n) (__syscall)(n) --#define __syscall1(n,a) (__syscall)(n,__scc(a)) --#define __syscall2(n,a,b) (__syscall)(n,__scc(a),__scc(b)) --#define __syscall3(n,a,b,c) (__syscall)(n,__scc(a),__scc(b),__scc(c)) --#define __syscall4(n,a,b,c,d) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d)) --#define __syscall5(n,a,b,c,d,e) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e)) --#define __syscall6(n,a,b,c,d,e,f) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f)) --#else --#define __syscall1(n,a) __syscall1(n,__scc(a)) --#define __syscall2(n,a,b) __syscall2(n,__scc(a),__scc(b)) --#define __syscall3(n,a,b,c) __syscall3(n,__scc(a),__scc(b),__scc(c)) --#define __syscall4(n,a,b,c,d) __syscall4(n,__scc(a),__scc(b),__scc(c),__scc(d)) --#define __syscall5(n,a,b,c,d,e) __syscall5(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e)) --#define __syscall6(n,a,b,c,d,e,f) __syscall6(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f)) --#endif --#define __syscall7(n,a,b,c,d,e,f,g) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f),__scc(g)) -+long __syscall_ret(unsigned long), __syscall(uk_syscall_arg_t, ...), -+ __syscall_cp(uk_syscall_arg_t, uk_syscall_arg_t, uk_syscall_arg_t, uk_syscall_arg_t, -+ uk_syscall_arg_t, uk_syscall_arg_t, uk_syscall_arg_t); - --#define __SYSCALL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n --#define __SYSCALL_NARGS(...) __SYSCALL_NARGS_X(__VA_ARGS__,7,6,5,4,3,2,1,0,) --#define __SYSCALL_CONCAT_X(a,b) a##b --#define __SYSCALL_CONCAT(a,b) __SYSCALL_CONCAT_X(a,b) --#define __SYSCALL_DISP(b,...) __SYSCALL_CONCAT(b,__SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__) -+#define __syscall(...) uk_syscall(__VA_ARGS__) -+#define syscall(...) __syscall_ret(uk_syscall(__VA_ARGS__)) - --#define __syscall(...) __SYSCALL_DISP(__syscall,__VA_ARGS__) --#define syscall(...) __syscall_ret(__syscall(__VA_ARGS__)) -+#define __syscall_cp(...) uk_syscall(__VA_ARGS__) -+#define syscall_cp(...) __syscall_ret(uk_syscall(__VA_ARGS__)) - --#define socketcall __socketcall --#define socketcall_cp __socketcall_cp -- --#define __syscall_cp0(n) (__syscall_cp)(n,0,0,0,0,0,0) --#define __syscall_cp1(n,a) (__syscall_cp)(n,__scc(a),0,0,0,0,0) --#define __syscall_cp2(n,a,b) (__syscall_cp)(n,__scc(a),__scc(b),0,0,0,0) --#define __syscall_cp3(n,a,b,c) (__syscall_cp)(n,__scc(a),__scc(b),__scc(c),0,0,0) --#define __syscall_cp4(n,a,b,c,d) (__syscall_cp)(n,__scc(a),__scc(b),__scc(c),__scc(d),0,0) --#define __syscall_cp5(n,a,b,c,d,e) (__syscall_cp)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),0) --#define __syscall_cp6(n,a,b,c,d,e,f) (__syscall_cp)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f)) -- --#define __syscall_cp(...) __SYSCALL_DISP(__syscall_cp,__VA_ARGS__) --#define syscall_cp(...) __syscall_ret(__syscall_cp(__VA_ARGS__)) -+#define __sys_open(...) open(__VA_ARGS__) -+#define sys_open(...) open(__VA_ARGS__) - - #ifndef SYSCALL_USE_SOCKETCALL - #define __socketcall(nm,a,b,c,d,e,f) syscall(SYS_##nm, a, b, c, d, e, f) - #define __socketcall_cp(nm,a,b,c,d,e,f) syscall_cp(SYS_##nm, a, b, c, d, e, f) - #else - #define __socketcall(nm,a,b,c,d,e,f) syscall(SYS_socketcall, __SC_##nm, \ -- ((long [6]){ (long)a, (long)b, (long)c, (long)d, (long)e, (long)f })) -+ ((long [6]){ (long)a, (long)b, (long)c, (long)d, (long)e, (long)f })) - #define __socketcall_cp(nm,a,b,c,d,e,f) syscall_cp(SYS_socketcall, __SC_##nm, \ -- ((long [6]){ (long)a, (long)b, (long)c, (long)d, (long)e, (long)f })) -+ ((long [6]){ (long)a, (long)b, (long)c, (long)d, (long)e, (long)f })) -+#endif -+ -+#define socketcall __socketcall -+#define socketcall_cp __socketcall_cp -+ -+#if (defined CONFIG_ARCH_X86_64) -+#define __SYSCALL_LL_E(x) (x) -+#define __SYSCALL_LL_O(x) (x) -+#else -+#error "Missing macro for architecture" - #endif - - /* fixup legacy 16-bit junk */ -@@ -120,7 +91,6 @@ long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...), - #define SYS_setfsgid SYS_setfsgid32 - #endif - -- - /* fixup legacy 32-bit-vs-lfs64 junk */ - - #ifdef SYS_fcntl64 -@@ -226,22 +196,4 @@ long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...), - #define __SC_recvmmsg 19 - #define __SC_sendmmsg 20 - --#ifdef SYS_open --#define __sys_open2(x,pn,fl) __syscall2(SYS_open, pn, (fl)|O_LARGEFILE) --#define __sys_open3(x,pn,fl,mo) __syscall3(SYS_open, pn, (fl)|O_LARGEFILE, mo) --#define __sys_open_cp2(x,pn,fl) __syscall_cp2(SYS_open, pn, (fl)|O_LARGEFILE) --#define __sys_open_cp3(x,pn,fl,mo) __syscall_cp3(SYS_open, pn, (fl)|O_LARGEFILE, mo) --#else --#define __sys_open2(x,pn,fl) __syscall3(SYS_openat, AT_FDCWD, pn, (fl)|O_LARGEFILE) --#define __sys_open3(x,pn,fl,mo) __syscall4(SYS_openat, AT_FDCWD, pn, (fl)|O_LARGEFILE, mo) --#define __sys_open_cp2(x,pn,fl) __syscall_cp3(SYS_openat, AT_FDCWD, pn, (fl)|O_LARGEFILE) --#define __sys_open_cp3(x,pn,fl,mo) __syscall_cp4(SYS_openat, AT_FDCWD, pn, (fl)|O_LARGEFILE, mo) --#endif -- --#define __sys_open(...) __SYSCALL_DISP(__sys_open,,__VA_ARGS__) --#define sys_open(...) __syscall_ret(__sys_open(__VA_ARGS__)) -- --#define __sys_open_cp(...) __SYSCALL_DISP(__sys_open_cp,,__VA_ARGS__) --#define sys_open_cp(...) __syscall_ret(__sys_open_cp(__VA_ARGS__)) -- - #endif --- -2.20.1 diff --git a/patches/0005-lib-musl-Use-macros-defined-in-syscall-shim-layer.patch b/patches/0005-lib-musl-Use-macros-defined-in-syscall-shim-layer.patch new file mode 100644 index 0000000..4dbe01f --- /dev/null +++ b/patches/0005-lib-musl-Use-macros-defined-in-syscall-shim-layer.patch @@ -0,0 +1,128 @@ +From 0c7691509eaf13a2b06bf201e5259233b3ca9e20 Mon Sep 17 00:00:00 2001 +From: Dragos Iulian Argint +Date: Sat, 7 May 2022 10:52:39 +0300 +Subject: [PATCH] lib-musl: Use macros defined in syscall shim layer. + +This patch allows you to use function calls instead of binary +system calls (ie `syscall`). Also, using the +`uk_syscall_r_static (SYS_nr)` macro will determine, at compile time, +the function corresponding to the system call given by SYS_nr +(only possible when SYS_nr is known at compile time). + +Signed-off-by: Dragos Iulian Argint +--- + src/internal/syscall.h | 74 ++++++++++++------------------------------ + 1 file changed, 21 insertions(+), 53 deletions(-) + +diff --git a/src/internal/syscall.h b/src/internal/syscall.h +index 6d378a8..5bdb9ea 100644 +--- a/src/internal/syscall.h ++++ b/src/internal/syscall.h +@@ -3,6 +3,8 @@ + + #include + #include "syscall_arch.h" ++#include ++ + + #ifndef SYSCALL_RLIM_INFINITY + #define SYSCALL_RLIM_INFINITY (~0ULL) +@@ -18,53 +20,26 @@ + + #ifndef __scc + #define __scc(X) ((long) (X)) +-typedef long syscall_arg_t; ++typedef uk_syscall_arg_t syscall_arg_t; + #endif + + __attribute__((visibility("hidden"))) +-long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...), +- __syscall_cp(syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t, +- syscall_arg_t, syscall_arg_t, syscall_arg_t); +- +-#ifdef SYSCALL_NO_INLINE +-#define __syscall0(n) (__syscall)(n) +-#define __syscall1(n,a) (__syscall)(n,__scc(a)) +-#define __syscall2(n,a,b) (__syscall)(n,__scc(a),__scc(b)) +-#define __syscall3(n,a,b,c) (__syscall)(n,__scc(a),__scc(b),__scc(c)) +-#define __syscall4(n,a,b,c,d) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d)) +-#define __syscall5(n,a,b,c,d,e) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e)) +-#define __syscall6(n,a,b,c,d,e,f) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f)) +-#else +-#define __syscall1(n,a) __syscall1(n,__scc(a)) +-#define __syscall2(n,a,b) __syscall2(n,__scc(a),__scc(b)) +-#define __syscall3(n,a,b,c) __syscall3(n,__scc(a),__scc(b),__scc(c)) +-#define __syscall4(n,a,b,c,d) __syscall4(n,__scc(a),__scc(b),__scc(c),__scc(d)) +-#define __syscall5(n,a,b,c,d,e) __syscall5(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e)) +-#define __syscall6(n,a,b,c,d,e,f) __syscall6(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f)) +-#endif +-#define __syscall7(n,a,b,c,d,e,f,g) (__syscall)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f),__scc(g)) +- +-#define __SYSCALL_NARGS_X(a,b,c,d,e,f,g,h,n,...) n +-#define __SYSCALL_NARGS(...) __SYSCALL_NARGS_X(__VA_ARGS__,7,6,5,4,3,2,1,0,) +-#define __SYSCALL_CONCAT_X(a,b) a##b +-#define __SYSCALL_CONCAT(a,b) __SYSCALL_CONCAT_X(a,b) +-#define __SYSCALL_DISP(b,...) __SYSCALL_CONCAT(b,__SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__) +- +-#define __syscall(...) __SYSCALL_DISP(__syscall,__VA_ARGS__) ++long __syscall_ret(unsigned long), __syscall(uk_syscall_arg_t, ...), ++ __syscall_cp(uk_syscall_arg_t, uk_syscall_arg_t, uk_syscall_arg_t, uk_syscall_arg_t, ++ uk_syscall_arg_t, uk_syscall_arg_t, uk_syscall_arg_t); ++ ++#define __syscall(...) uk_syscall_r_static(__VA_ARGS__) + #define syscall(...) __syscall_ret(__syscall(__VA_ARGS__)) + + #define socketcall __socketcall + #define socketcall_cp __socketcall_cp + +-#define __syscall_cp0(n) (__syscall_cp)(n,0,0,0,0,0,0) +-#define __syscall_cp1(n,a) (__syscall_cp)(n,__scc(a),0,0,0,0,0) +-#define __syscall_cp2(n,a,b) (__syscall_cp)(n,__scc(a),__scc(b),0,0,0,0) +-#define __syscall_cp3(n,a,b,c) (__syscall_cp)(n,__scc(a),__scc(b),__scc(c),0,0,0) +-#define __syscall_cp4(n,a,b,c,d) (__syscall_cp)(n,__scc(a),__scc(b),__scc(c),__scc(d),0,0) +-#define __syscall_cp5(n,a,b,c,d,e) (__syscall_cp)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),0) +-#define __syscall_cp6(n,a,b,c,d,e,f) (__syscall_cp)(n,__scc(a),__scc(b),__scc(c),__scc(d),__scc(e),__scc(f)) +- +-#define __syscall_cp(...) __SYSCALL_DISP(__syscall_cp,__VA_ARGS__) ++/** ++ * This should allow cancelation points inside a system call. ++ * However, we currently don't support this operation. ++ * Hence, we stick to the basic uk_syscall ++ */ ++#define __syscall_cp(...) uk_syscall_r_static(__VA_ARGS__) + #define syscall_cp(...) __syscall_ret(__syscall_cp(__VA_ARGS__)) + + #ifndef SYSCALL_USE_SOCKETCALL +@@ -226,22 +201,15 @@ long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...), + #define __SC_recvmmsg 19 + #define __SC_sendmmsg 20 + +-#ifdef SYS_open +-#define __sys_open2(x,pn,fl) __syscall2(SYS_open, pn, (fl)|O_LARGEFILE) +-#define __sys_open3(x,pn,fl,mo) __syscall3(SYS_open, pn, (fl)|O_LARGEFILE, mo) +-#define __sys_open_cp2(x,pn,fl) __syscall_cp2(SYS_open, pn, (fl)|O_LARGEFILE) +-#define __sys_open_cp3(x,pn,fl,mo) __syscall_cp3(SYS_open, pn, (fl)|O_LARGEFILE, mo) +-#else +-#define __sys_open2(x,pn,fl) __syscall3(SYS_openat, AT_FDCWD, pn, (fl)|O_LARGEFILE) +-#define __sys_open3(x,pn,fl,mo) __syscall4(SYS_openat, AT_FDCWD, pn, (fl)|O_LARGEFILE, mo) +-#define __sys_open_cp2(x,pn,fl) __syscall_cp3(SYS_openat, AT_FDCWD, pn, (fl)|O_LARGEFILE) +-#define __sys_open_cp3(x,pn,fl,mo) __syscall_cp4(SYS_openat, AT_FDCWD, pn, (fl)|O_LARGEFILE, mo) +-#endif +- +-#define __sys_open(...) __SYSCALL_DISP(__sys_open,,__VA_ARGS__) ++#define __sys_open(...) uk_syscall_r_static(SYS_open, __VA_ARGS__) + #define sys_open(...) __syscall_ret(__sys_open(__VA_ARGS__)) + +-#define __sys_open_cp(...) __SYSCALL_DISP(__sys_open_cp,,__VA_ARGS__) ++/** ++ * This should allow cancelation points inside a system call. ++ * However, we currently don't support this operation. ++ * Hence, we stick to the basic uk_syscall ++ */ ++#define __sys_open_cp(...) uk_syscall_r_static(SYS_open, __VA_ARGS__) + #define sys_open_cp(...) __syscall_ret(__sys_open_cp(__VA_ARGS__)) + + #endif +-- +2.17.1 + diff --git a/patches/0005-lib-musl-Use-uk_syscall_r-for-__syscall_cp_c.patch b/patches/0005-lib-musl-Use-uk_syscall_r-for-__syscall_cp_c.patch new file mode 100644 index 0000000..18e087b --- /dev/null +++ b/patches/0005-lib-musl-Use-uk_syscall_r-for-__syscall_cp_c.patch @@ -0,0 +1,31 @@ +From b97146b7c5633fc561692c21546f52b3c7164dbb Mon Sep 17 00:00:00 2001 +From: Dragos Iulian Argint +Date: Sat, 7 May 2022 11:23:38 +0300 +Subject: [PATCH] lib-musl: Use `uk_syscall_r()` for `__syscall_cp_c()` + +If we have a system call number known only at runtime then +we will use `uk_syscall_r()` from syscall shim. This +makes a function call (not a binary system call), but knows +how to demultiplex depending on the number given as a parameter. + +Signed-off-by: Dragos Iulian Argint +--- + src/thread/pthread_cancel.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/thread/pthread_cancel.c b/src/thread/pthread_cancel.c +index 3d22922..8321734 100644 +--- a/src/thread/pthread_cancel.c ++++ b/src/thread/pthread_cancel.c +@@ -30,7 +30,7 @@ long __syscall_cp_c(syscall_arg_t nr, + + if ((st=(self=__pthread_self())->canceldisable) + && (st==PTHREAD_CANCEL_DISABLE || nr==SYS_close)) +- return __syscall(nr, u, v, w, x, y, z); ++ return uk_syscall6_r(nr, u, v, w, x, y, z); + + r = __syscall_cp_asm(&self->cancel, nr, u, v, w, x, y, z); + if (r==-EINTR && nr!=SYS_close && self->cancel && +-- +2.17.1 + diff --git a/patches/0005-lib-musl-Use-uk_syscall_r-for-do_setxid.patch b/patches/0005-lib-musl-Use-uk_syscall_r-for-do_setxid.patch new file mode 100644 index 0000000..3f59e41 --- /dev/null +++ b/patches/0005-lib-musl-Use-uk_syscall_r-for-do_setxid.patch @@ -0,0 +1,39 @@ +From db5c67091a7a9817668aaa61a782eeb5ee83847b Mon Sep 17 00:00:00 2001 +From: Dragos Iulian Argint +Date: Sat, 7 May 2022 11:10:07 +0300 +Subject: [PATCH] lib-musl: Use `uk_syscall_r()` for `do_setxid()` + +If we have a system call number known only at runtime then +we will use `uk_syscall_r()` from syscall shim. This +makes a function call (not a binary system call), but knows +how to demultiplex depending on the number given as a parameter. + +Signed-off-by: Dragos Iulian Argint +--- + src/unistd/setxid.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/src/unistd/setxid.c b/src/unistd/setxid.c +index 0239f8a..d994de9 100644 +--- a/src/unistd/setxid.c ++++ b/src/unistd/setxid.c +@@ -3,6 +3,7 @@ + #include "syscall.h" + #include "libc.h" + #include "pthread_impl.h" ++#include + + struct ctx { + int id, eid, sid; +@@ -13,7 +14,7 @@ static void do_setxid(void *p) + { + struct ctx *c = p; + if (c->err>0) return; +- int ret = -__syscall(c->nr, c->id, c->eid, c->sid); ++ int ret = -uk_syscall_r(c->nr, c->id, c->eid, c->sid); + if (ret && !c->err) { + /* If one thread fails to set ids after another has already + * succeeded, forcibly killing the process is the only safe +-- +2.17.1 +