]> xenbits.xensource.com Git - people/julieng/freebsd.git/commitdiff
wordexp: Rewrite to make WRDE_NOCMD reliable.
authorjilles <jilles@FreeBSD.org>
Wed, 30 Sep 2015 21:32:29 +0000 (21:32 +0000)
committerjilles <jilles@FreeBSD.org>
Wed, 30 Sep 2015 21:32:29 +0000 (21:32 +0000)
Shell syntax is too complicated to detect command substitution and unquoted
operators reliably without implementing much of sh's parser. Therefore, have
sh do this detection.

While changing sh's support anyway, also read input from a pipe instead of
arguments to avoid {ARG_MAX} limits and improve privacy, and output count
and length using 16 instead of 8 digits.

The basic concept is:
execl("/bin/sh", "sh", "-c", "freebsd_wordexp ${1:+\"$1\"} -f "$2",
    "", flags & WRDE_NOCMD ? "-p" : "", <pipe with words>);

The WRDE_BADCHAR error is still implemented in libc. POSIX requires us to
fail strings containing unquoted braces with code WRDE_BADCHAR. Since this
is normally not a syntax error in sh, there is still a need for checking
code in libc, we_check().

The new we_check() is an optimistic check that all the characters
  <newline> | & ; < > ( ) { }
are quoted. To avoid duplicating too much sh logic, such characters are
permitted when quoting characters are seen, even if the quoting characters
may themselves be quoted. This code reports all WRDE_BADCHAR errors; bad
characters that get past it and are a syntax error in sh return WRDE_SYNTAX.

Although many implementations of WRDE_NOCMD erroneously allow some command
substitutions (and ours even documented this), there appears to be code that
relies on its security (codesearch.debian.net shows quite a few uses).
Passing untrusted data to wordexp() still exposes a denial of service
possibility and a fairly large attack surface.

Reviewed by: wblock (man page only)
MFC after: 2 weeks
Relnotes: yes
Security: fixes command execution with wordexp(untrusted, WRDE_NOCMD)

bin/sh/builtins.def
bin/sh/expand.c
bin/sh/parser.c
bin/sh/parser.h
lib/libc/gen/wordexp.3
lib/libc/gen/wordexp.c

index 1cbeea913a92733480b858a9eb1f480d308c5550..8807347f4f19ea9f2996e16207532aaec8e247ba 100644 (file)
@@ -65,6 +65,7 @@ exportcmd     -s export -s readonly
 #exprcmd               expr
 falsecmd       false
 fgcmd -j       fg
+freebsd_wordexpcmd     freebsd_wordexp
 getoptscmd     getopts
 hashcmd                hash
 histcmd -h     fc
index 7411f1cc97f5f2a3de88c9f63c93b4b0fc1b89c1..88d80ea4bd5949dadf4eadb69621f6620b9b598d 100644 (file)
@@ -1656,3 +1656,57 @@ wordexpcmd(int argc, char **argv)
                outbin(argv[i], strlen(argv[i]) + 1, out1);
         return (0);
 }
+
+/*
+ * Do most of the work for wordexp(3), new version.
+ */
+
+int
+freebsd_wordexpcmd(int argc __unused, char **argv __unused)
+{
+       struct arglist arglist;
+       union node *args, *n;
+       struct strlist *sp;
+       size_t count, len;
+       int ch;
+       int protected = 0;
+       int fd = -1;
+
+       while ((ch = nextopt("f:p")) != '\0') {
+               switch (ch) {
+               case 'f':
+                       fd = number(shoptarg);
+                       break;
+               case 'p':
+                       protected = 1;
+                       break;
+               }
+       }
+       if (*argptr != NULL)
+               error("wrong number of arguments");
+       if (fd < 0)
+               error("missing fd");
+       INTOFF;
+       setinputfd(fd, 1);
+       INTON;
+       args = parsewordexp();
+       popfile(); /* will also close fd */
+       if (protected)
+               for (n = args; n != NULL; n = n->narg.next) {
+                       if (n->narg.backquote != NULL) {
+                               outcslow('C', out1);
+                               error("command substitution disabled");
+                       }
+               }
+       outcslow(' ', out1);
+       arglist.lastp = &arglist.list;
+       for (n = args; n != NULL; n = n->narg.next)
+               expandarg(n, &arglist, EXP_FULL | EXP_TILDE);
+       *arglist.lastp = NULL;
+       for (sp = arglist.list, count = len = 0; sp; sp = sp->next)
+               count++, len += strlen(sp->text);
+       out1fmt("%016zx %016zx", count, len);
+       for (sp = arglist.list; sp; sp = sp->next)
+               outbin(sp->text, strlen(sp->text) + 1, out1);
+       return (0);
+}
index 302d1797c19453412c133e85b7b51bd3cc680b99..53d7923f8755e996b6d54debe7a78e073c7bf2a2 100644 (file)
@@ -231,6 +231,39 @@ parsecmd(int interact)
 }
 
 
+/*
+ * Read and parse words for wordexp.
+ * Returns a list of NARG nodes; NULL if there are no words.
+ */
+union node *
+parsewordexp(void)
+{
+       union node *n, *first = NULL, **pnext;
+       int t;
+
+       /* This assumes the parser is not re-entered,
+        * which could happen if we add command substitution on PS1/PS2.
+        */
+       parser_temp_free_all();
+       heredoclist = NULL;
+
+       tokpushback = 0;
+       checkkwd = 0;
+       doprompt = 0;
+       setprompt(0);
+       needprompt = 0;
+       pnext = &first;
+       while ((t = readtoken()) != TEOF) {
+               if (t != TWORD)
+                       synexpect(TWORD);
+               n = makename();
+               *pnext = n;
+               pnext = &n->narg.next;
+       }
+       return first;
+}
+
+
 static union node *
 list(int nlflag)
 {
index 5982594391244e4777229a7e4db3ffd6b9d7e28c..0c3cd88601ab4352bb2fd67e0055a5a096df8618 100644 (file)
@@ -76,6 +76,7 @@ extern const char *const parsekwd[];
 
 
 union node *parsecmd(int);
+union node *parsewordexp(void);
 void forcealias(void);
 void fixredir(union node *, const char *, int);
 int goodname(const char *);
index 2fc20ef81f2bbb3937d48e9992d5dd3e05135f0d..dd4605ffa6fd58f432e2f0fb88c635c7524743d5 100644 (file)
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd August 18, 2015
+.Dd September 30, 2015
 .Dt WORDEXP 3
 .Os
 .Sh NAME
@@ -108,8 +108,9 @@ function frees the memory allocated by
 .Sh IMPLEMENTATION NOTES
 The
 .Fn wordexp
-function is implemented by executing
-.Xr sh 1 .
+function is implemented using the undocumented
+.Ic freebsd_wordexp
+shell built-in command.
 .Sh RETURN VALUES
 The
 .Fn wordexp
@@ -191,18 +192,19 @@ and
 functions conform to
 .St -p1003.1-2001 .
 .Sh BUGS
-Do not pass untrusted user data to
-.Fn wordexp ,
-regardless of whether the
-.Dv WRDE_NOCMD
-flag is set.
-The
-.Fn wordexp
-function attempts to detect input that would cause commands to be
-executed before passing it to the shell
-but it does not use the same parser so it may be fooled.
-.Pp
 The current
 .Fn wordexp
 implementation does not recognize multibyte characters other than UTF-8, since
 the shell (which it invokes to perform expansions) does not.
+.Sh SECURITY CONSIDERATIONS
+Pathname generation may create output that is exponentially larger than the
+input size.
+.Pp
+Although this implementation detects command substitution reliably for
+.Dv WRDE_NOCMD ,
+the attack surface remains fairly large.
+Also, some other implementations
+(such as older versions of this one)
+may execute command substitutions even if
+.Dv WRDE_NOCMD
+is set.
index 859ca50ea6aa09d83e15b2499b19a3ebd5d0ed79..3791e1e05b1957818cad72e9ef5aab51054b64da 100644 (file)
@@ -32,6 +32,7 @@
 #include <fcntl.h>
 #include <paths.h>
 #include <signal.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -43,7 +44,7 @@
 __FBSDID("$FreeBSD$");
 
 static int     we_askshell(const char *, wordexp_t *, int);
-static int     we_check(const char *, int);
+static int     we_check(const char *);
 
 /*
  * wordexp --
@@ -65,7 +66,7 @@ wordexp(const char * __restrict words, wordexp_t * __restrict we, int flags)
                we->we_strings = NULL;
                we->we_nbytes = 0;
        }
-       if ((error = we_check(words, flags)) != 0) {
+       if ((error = we_check(words)) != 0) {
                wordfree(we);
                return (error);
        }
@@ -94,17 +95,37 @@ we_read_fully(int fd, char *buffer, size_t len)
        return done;
 }
 
+static bool
+we_write_fully(int fd, const char *buffer, size_t len)
+{
+       size_t done;
+       ssize_t nwritten;
+
+       done = 0;
+       do {
+               nwritten = _write(fd, buffer + done, len - done);
+               if (nwritten == -1 && errno == EINTR)
+                       continue;
+               if (nwritten <= 0)
+                       return (false);
+               done += nwritten;
+       } while (done != len);
+       return (true);
+}
+
 /*
  * we_askshell --
- *     Use the `wordexp' /bin/sh builtin function to do most of the work
- *     in expanding the word string. This function is complicated by
+ *     Use the `freebsd_wordexp' /bin/sh builtin function to do most of the
+ *     work in expanding the word string. This function is complicated by
  *     memory management.
  */
 static int
 we_askshell(const char *words, wordexp_t *we, int flags)
 {
-       int pdes[2];                    /* Pipe to child */
-       char buf[18];                   /* Buffer for byte and word count */
+       int pdesw[2];                   /* Pipe for writing words */
+       int pdes[2];                    /* Pipe for reading output */
+       char wfdstr[sizeof(int) * 3 + 1];
+       char buf[35];                   /* Buffer for byte and word count */
        long nwords, nbytes;            /* Number of words, bytes from child */
        long i;                         /* Handy integer */
        size_t sofs;                    /* Offset into we->we_strings */
@@ -119,18 +140,25 @@ we_askshell(const char *words, wordexp_t *we, int flags)
        char **nwv;                     /* Temporary for realloc() */
        sigset_t newsigblock, oldsigblock;
        const char *ifs;
-       char save;
 
        serrno = errno;
        ifs = getenv("IFS");
 
-       if (pipe2(pdes, O_CLOEXEC) < 0)
+       if (pipe2(pdesw, O_CLOEXEC) < 0)
+               return (WRDE_NOSPACE);  /* XXX */
+       snprintf(wfdstr, sizeof(wfdstr), "%d", pdesw[0]);
+       if (pipe2(pdes, O_CLOEXEC) < 0) {
+               _close(pdesw[0]);
+               _close(pdesw[1]);
                return (WRDE_NOSPACE);  /* XXX */
+       }
        (void)sigemptyset(&newsigblock);
        (void)sigaddset(&newsigblock, SIGCHLD);
        (void)__libc_sigprocmask(SIG_BLOCK, &newsigblock, &oldsigblock);
        if ((pid = fork()) < 0) {
                serrno = errno;
+               _close(pdesw[0]);
+               _close(pdesw[1]);
                _close(pdes[0]);
                _close(pdes[1]);
                (void)__libc_sigprocmask(SIG_SETMASK, &oldsigblock, NULL);
@@ -146,43 +174,54 @@ we_askshell(const char *words, wordexp_t *we, int flags)
                    _dup2(pdes[1], STDOUT_FILENO) :
                    _fcntl(pdes[1], F_SETFD, 0)) < 0)
                        _exit(1);
+               if (_fcntl(pdesw[0], F_SETFD, 0) < 0)
+                       _exit(1);
                execl(_PATH_BSHELL, "sh", flags & WRDE_UNDEF ? "-u" : "+u",
-                   "-c", "IFS=$1;eval \"$2\";eval \"echo;set -- $3\";"
-                   "IFS=;a=\"$*\";printf '%08x' \"$#\" \"${#a}\";"
-                   "printf '%s\\0' \"$@\"",
+                   "-c", "IFS=$1;eval \"$2\";"
+                   "freebsd_wordexp -f \"$3\" ${4:+\"$4\"}",
                    "",
                    ifs != NULL ? ifs : " \t\n",
-                   flags & WRDE_SHOWERR ? "" : "exec 2>/dev/null", words,
+                   flags & WRDE_SHOWERR ? "" : "exec 2>/dev/null",
+                   wfdstr,
+                   flags & WRDE_NOCMD ? "-p" : "",
                    (char *)NULL);
                _exit(1);
        }
 
        /*
-        * We are the parent; read the output of the shell wordexp function,
-        * which is a byte indicating that the words were parsed successfully,
-        * a 32-bit hexadecimal word count, a 32-bit hexadecimal byte count
-        * (not including terminating null bytes), followed by the expanded
-        * words separated by nulls.
+        * We are the parent; write the words.
         */
        _close(pdes[1]);
-       switch (we_read_fully(pdes[0], buf, 17)) {
+       _close(pdesw[0]);
+       if (!we_write_fully(pdesw[1], words, strlen(words))) {
+               _close(pdesw[1]);
+               error = WRDE_SYNTAX;
+               goto cleanup;
+       }
+       _close(pdesw[1]);
+       /*
+        * Read the output of the shell wordexp function,
+        * which is a byte indicating that the words were parsed successfully,
+        * a 64-bit hexadecimal word count, a dummy byte, a 64-bit hexadecimal
+        * byte count (not including terminating null bytes), followed by the
+        * expanded words separated by nulls.
+        */
+       switch (we_read_fully(pdes[0], buf, 34)) {
        case 1:
-               error = WRDE_BADVAL;
+               error = buf[0] == 'C' ? WRDE_CMDSUB : WRDE_BADVAL;
                serrno = errno;
                goto cleanup;
-       case 17:
+       case 34:
                break;
        default:
                error = WRDE_SYNTAX;
                serrno = errno;
                goto cleanup;
        }
-       save = buf[9];
-       buf[9] = '\0';
-       nwords = strtol(buf + 1, NULL, 16);
-       buf[9] = save;
        buf[17] = '\0';
-       nbytes = strtol(buf + 9, NULL, 16) + nwords;
+       nwords = strtol(buf + 1, NULL, 16);
+       buf[34] = '\0';
+       nbytes = strtol(buf + 18, NULL, 16) + nwords;
 
        /*
         * Allocate or reallocate (when flags & WRDE_APPEND) the word vector
@@ -255,83 +294,96 @@ cleanup:
  * we_check --
  *     Check that the string contains none of the following unquoted
  *     special characters: <newline> |&;<>(){}
- *     or command substitutions when WRDE_NOCMD is set in flags.
+ *     This mainly serves for {} which are normally legal in sh.
+ *     It deliberately does not attempt to model full sh syntax.
  */
 static int
-we_check(const char *words, int flags)
+we_check(const char *words)
 {
        char c;
-       int dquote, level, quote, squote;
+       /* Saw \ or $, possibly not special: */
+       bool quote = false, dollar = false;
+       /* Saw ', ", ${, ` or $(, possibly not special: */
+       bool have_sq = false, have_dq = false, have_par_begin = false;
+       bool have_cmd = false;
+       /* Definitely saw a ', ", ${, ` or $(, need a closing character: */
+       bool need_sq = false, need_dq = false, need_par_end = false;
+       bool need_cmd_old = false, need_cmd_new = false;
 
-       quote = squote = dquote = 0;
        while ((c = *words++) != '\0') {
                switch (c) {
                case '\\':
-                       if (squote == 0)
-                               quote ^= 1;
+                       quote = !quote;
+                       continue;
+               case '$':
+                       if (quote)
+                               quote = false;
+                       else
+                               dollar = !dollar;
                        continue;
                case '\'':
-                       if (quote + dquote == 0)
-                               squote ^= 1;
+                       if (!quote && !have_sq && !have_dq)
+                               need_sq = true;
+                       else
+                               need_sq = false;
+                       have_sq = true;
                        break;
                case '"':
-                       if (quote + squote == 0)
-                               dquote ^= 1;
+                       if (!quote && !have_sq && !have_dq)
+                               need_dq = true;
+                       else
+                               need_dq = false;
+                       have_dq = true;
                        break;
                case '`':
-                       if (quote + squote == 0 && flags & WRDE_NOCMD)
-                               return (WRDE_CMDSUB);
-                       while ((c = *words++) != '\0' && c != '`')
-                               if (c == '\\' && (c = *words++) == '\0')
-                                       break;
-                       if (c == '\0')
-                               return (WRDE_SYNTAX);
+                       if (!quote && !have_sq && !have_cmd)
+                               need_cmd_old = true;
+                       else
+                               need_cmd_old = false;
+                       have_cmd = true;
                        break;
-               case '|': case '&': case ';': case '<': case '>':
-               case '{': case '}': case '(': case ')': case '\n':
-                       if (quote + squote + dquote == 0)
+               case '{':
+                       if (!quote && !dollar && !have_sq && !have_dq &&
+                           !have_cmd)
                                return (WRDE_BADCHAR);
+                       if (dollar) {
+                               if (!quote && !have_sq)
+                                       need_par_end = true;
+                               have_par_begin = true;
+                       }
                        break;
-               case '$':
-                       if ((c = *words++) == '\0')
-                               break;
-                       else if (quote + squote == 0 && c == '(') {
-                               if (flags & WRDE_NOCMD && *words != '(')
-                                       return (WRDE_CMDSUB);
-                               level = 1;
-                               while ((c = *words++) != '\0') {
-                                       if (c == '\\') {
-                                               if ((c = *words++) == '\0')
-                                                       break;
-                                       } else if (c == '(')
-                                               level++;
-                                       else if (c == ')' && --level == 0)
-                                               break;
-                               }
-                               if (c == '\0' || level != 0)
-                                       return (WRDE_SYNTAX);
-                       } else if (quote + squote == 0 && c == '{') {
-                               level = 1;
-                               while ((c = *words++) != '\0') {
-                                       if (c == '\\') {
-                                               if ((c = *words++) == '\0')
-                                                       break;
-                                       } else if (c == '{')
-                                               level++;
-                                       else if (c == '}' && --level == 0)
-                                               break;
-                               }
-                               if (c == '\0' || level != 0)
-                                       return (WRDE_SYNTAX);
-                       } else
-                               --words;
+               case '}':
+                       if (!quote && !have_sq && !have_dq && !have_par_begin &&
+                           !have_cmd)
+                               return (WRDE_BADCHAR);
+                       need_par_end = false;
+                       break;
+               case '(':
+                       if (!quote && !dollar && !have_sq && !have_dq &&
+                           !have_cmd)
+                               return (WRDE_BADCHAR);
+                       if (dollar) {
+                               if (!quote && !have_sq)
+                                       need_cmd_new = true;
+                               have_cmd = true;
+                       }
+                       break;
+               case ')':
+                       if (!quote && !have_sq && !have_dq && !have_cmd)
+                               return (WRDE_BADCHAR);
+                       need_cmd_new = false;
+                       break;
+               case '|': case '&': case ';': case '<': case '>': case '\n':
+                       if (!quote && !have_sq && !have_dq && !have_cmd)
+                               return (WRDE_BADCHAR);
                        break;
                default:
                        break;
                }
-               quote = 0;
+               quote = dollar = false;
        }
-       if (quote + squote + dquote != 0)
+       if (quote || dollar || need_sq || need_dq || need_par_end ||
+           need_cmd_old || need_cmd_new)
                return (WRDE_SYNTAX);
 
        return (0);