]> xenbits.xensource.com Git - unikraft/unikraft.git/commitdiff
lib/ukargparse: Support escaping of quotes
authorSimon Kuenzer <simon@unikraft.io>
Fri, 24 Mar 2023 16:33:17 +0000 (17:33 +0100)
committerUnikraft <monkey@unikraft.io>
Mon, 15 May 2023 20:57:55 +0000 (20:57 +0000)
This commit introduces the support of escaping single (`'`) and double
quotes (`"`) with the escape character `\`. The behaviour is similar to
what one expects from Unix shells, like `sh` or `bash`.

Single-quoted characters and character sequences preserve each character.
Double-quoted characters and character sequences preserve each character
except the backslash `\`. The backslash can be used to switch of the
special meaning of a character, like the double quote (`\"`) or a
backslash (`\\`).

Signed-off-by: Simon Kuenzer <simon@unikraft.io>
Reviewed-by: Razvan Deaconescu <razvand@unikraft.io>
Approved-by: Razvan Deaconescu <razvand@unikraft.io>
Tested-by: Unikraft CI <monkey@unikraft.io>
GitHub-Closes: #893

lib/ukargparse/argparse.c
lib/ukargparse/tests/test_ukargparse_parse.c

index 124dd63171449be64f64b6d9a24af0597c47718f..baa628ce24722906f40ff19ee386d2b39f6ffc69 100644 (file)
@@ -1,9 +1,10 @@
 /* SPDX-License-Identifier: BSD-3-Clause */
 /*
- * Authors: Simon Kuenzer <simon.kuenzer@neclab.eu>
+ * Authors: Simon Kuenzer <simon@unikraft.io>
  *
  *
  * Copyright (c) 2017, NEC Europe Ltd., NEC Corporation. All rights reserved.
+ * Copyright (c) 2023, Unikraft GmbH. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -47,6 +48,7 @@ int uk_argnparse(char *argb, __sz maxlen, char *argv[], int maxcount)
 {
        int argc = 0;
        int prev_wspace = 1;
+       int prev_escape = 0;
        char in_quote = '\0';
        __sz i;
 
@@ -66,7 +68,7 @@ int uk_argnparse(char *argb, __sz maxlen, char *argv[], int maxcount)
                case '\n':
                case '\t':
                case '\v':
-                       if (!in_quote) {
+                       if (!in_quote && !prev_escape) {
                                argb[i] = '\0';
                                prev_wspace = 1;
                                break;
@@ -76,6 +78,18 @@ int uk_argnparse(char *argb, __sz maxlen, char *argv[], int maxcount)
                /* quotes */
                case '\'':
                case '"':
+                       if (prev_escape) {
+                               if (!in_quote) {
+                                       /* escaping removes special meaning */
+                                       goto regularchar;
+                               } else if (argb[i] == '"') {
+                                       /* \" -> " */
+                                       left_shift(argb, i - 1, maxlen + 1);
+                                       --i;
+                                       prev_escape = 0;
+                                       goto regularchar;
+                               }
+                       }
                        if (!in_quote) {
                                in_quote = argb[i];
                                left_shift(argb, i, maxlen);
@@ -96,6 +110,27 @@ int uk_argnparse(char *argb, __sz maxlen, char *argv[], int maxcount)
                        if (prev_wspace) {
                                argv[argc++] = &argb[i];
                                prev_wspace = 0;
+                               prev_escape = 0;
+                       }
+
+                       /* escape character handling */
+                       if (argb[i] == '\\' && in_quote != '\'') {
+                               if (prev_escape) {
+                                       /* double escape: \\ -> \ */
+                                       left_shift(argb, i, maxlen);
+                                       --i;
+                                       prev_escape = 0;
+                               } else {
+                                       prev_escape = 1;
+                               }
+                       } else if (prev_escape) {
+                               /* any character after escape symbol */
+                               if (!in_quote) {
+                                       /* remove escape symbol */
+                                       left_shift(argb, i - 1, maxlen + 1);
+                                       --i;
+                               }
+                               prev_escape = 0;
                        }
                        break;
                }
index 9be6b9e56b1df64461297de096bf38c56222afa7..b44cd684c8da2c903a7c8063dc7ba80e68b8da13 100644 (file)
@@ -103,4 +103,43 @@ UK_TESTCASE(ukargparse, parse_quotes)
                UK_TEST_EXPECT_SNUM_EQ(strcmp(arg_ex[i], arg_out[i]), 0);
 }
 
+UK_TESTCASE(ukargparse, parse_quotes_escaped)
+{
+       int argc, i;
+       char *arg_vec[0x10] = { NULL };
+       char arg_str[] =
+               "\\'"
+               " \\\""
+               " \"arg0\\\"\""
+               " \"\\\"arg1 '-'\\\"\"-\" \\\\ arg2\\\"\""
+               " '\" \\\\ \\\" \\''\"'"
+               " \\a\\b\\\"\\c\\\""
+               " \"\\a\\b\\\"\\c\\\"\""
+               " '\\a\\b\\\"\\c\\\"'"
+               " '\\'a\\'"
+               " a\\ b"
+               " \\";
+       static const char * const arg_exp[] = {
+               "'",
+               "\"",
+               "arg0\"",
+               "\"arg1 '-'\"- \\ arg2\"",
+               "\" \\\\ \\\" \\\"",
+               "ab\"c\"",
+               "\\a\\b\"\\c\"",
+               "\\a\\b\\\"\\c\\\"",
+               "\\a'",
+               "a b",
+               "\\"
+       };
+
+       argc = uk_argparse(arg_str, arg_vec, ARRAY_SIZE(arg_vec) - 1);
+       UK_TEST_EXPECT_SNUM_EQ(argc, ARRAY_SIZE(arg_exp));
+       if (argc != ARRAY_SIZE(arg_exp))
+               return;
+
+       for (i = 0; i < (int) ARRAY_SIZE(arg_exp); ++i)
+               UK_TEST_EXPECT_SNUM_EQ(strcmp(arg_exp[i], arg_vec[i]), 0);
+}
+
 uk_testsuite_register(ukargparse, NULL);