diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2023-11-25 21:09:44 -0800 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2023-11-25 21:09:44 -0800 |
commit | 1cb01c571adc42ae269d4361df44f4bf6ab9d3ec (patch) | |
tree | a20dce2852c81309e5d9693e88755d33c17b993d | |
parent | 81fa0c0f30bb6bcd2a4d287748db6d090b0d04ea (diff) | |
download | txr-1cb01c571adc42ae269d4361df44f4bf6ab9d3ec.tar.gz txr-1cb01c571adc42ae269d4361df44f4bf6ab9d3ec.tar.bz2 txr-1cb01c571adc42ae269d4361df44f4bf6ab9d3ec.zip |
sh-esc: clean up mess I made.
Not all special characters can just be backslash escaped.
Spaces and newlines must be quoted.
* stream.c (sh_esc_common): New function. Handles both
sh-esc and sh-esc-all logic, distinguished by a flag.
Quoting is used, rather than backslash escaping.
If the string contains no special characters, it is just
erturned. If it can be double quoted, it is double quoted.
Otherwise it is single quoted and any contained single
quotes are replaced by '\''.
(sh_esc, sh_esc_all): Now just wrap sh_esc_common.
(sh_esc_dq): Remove the newline from the set of escaped
characters. Escaping a newline generates a continuation
sequence which eats the newline.
* tests/018/sh-esc.tl: Most test cases deleted; many new test
cases added.
* txr.1: Documentation revised.
-rw-r--r-- | stream.c | 47 | ||||
-rw-r--r-- | tests/018/sh-esc.tl | 63 | ||||
-rw-r--r-- | txr.1 | 34 |
3 files changed, 113 insertions, 31 deletions
@@ -4851,24 +4851,55 @@ static val sh(val command) #endif -static val sh_esc(val string) +static val sh_esc_dq(val string) { - return str_esc(lit("|&;<>()$`\\\"' \t\n*?[#~"), chr('\\'), string); + return str_esc(lit("$`\\\""), chr('\\'), string); } -static val sh_esc_all(val string) +static val sh_esc_sq(val string) { - return str_esc(lit("|&;<>()$`\\\"' \t\n*?[#~=%"), chr('\\'), string); + return str_esc(lit("'"), lit("'\\'"), string); } -static val sh_esc_dq(val string) +static val sh_esc_common(val string, int all, val self) +{ + const wchar_t *s, *str = c_str(string, self); + int sq = 0, dq = 0, es = 0; + + for (s = str; *s; s++) + { + wchar_t ch = *s; + + if (ch == '\'') + es = sq = 1; + else if (wcschr(L"$`\\\"", ch)) + es = dq = 1; + else if (wcschr(L"|&;<>() \t\n*?[#~", ch)) + es = 1; + else if (all && wcschr(L"=%", ch)) + es = 1; + } + + if (!es) + return string; + + if (!dq) + return scat3(chr('"'), string, chr('"')); + + if (!sq) + return scat3(chr('\''), string, chr('\'')); + + return scat3(chr('\''), sh_esc_sq(string), chr('\'')); +} + +static val sh_esc(val string) { - return str_esc(lit("$`\\\"\n"), chr('\\'), string); + return sh_esc_common(string, 0, lit("sh-esc")); } -static val sh_esc_sq(val string) +static val sh_esc_all(val string) { - return str_esc(lit("'"), lit("'\\'"), string); + return sh_esc_common(string, 1, lit("sh-esc-all")); } val remove_path(val path, val throw_on_error) diff --git a/tests/018/sh-esc.tl b/tests/018/sh-esc.tl index 3466ee76..f508475e 100644 --- a/tests/018/sh-esc.tl +++ b/tests/018/sh-esc.tl @@ -1,11 +1,58 @@ (load "../common") (mtest - (sh-esc "|&;<>()$`\\\"' \t\n*?[#~=%abc") - "\\|\\&\\;\\<\\>\\(\\)\\$\\`\\\\\\\"\\'\\ \\\t\\\n\\*\\?\\[\\#\\~=%abc" - (sh-esc-all "|&;<>()$`\\\"' \t\n*?[#~=%abc") - "\\|\\&\\;\\<\\>\\(\\)\\$\\`\\\\\\\"\\'\\ \\\t\\\n\\*\\?\\[\\#\\~\\=\\%abc" - (sh-esc-dq "$`\\\"\n'abc()*~") - "\\$\\`\\\\\\\"\\\n'abc()*~" - (sh-esc-sq "$`\\\"\n'abc()*~") - "$`\\\"\n'\\''abc()*~") + (sh-esc "") "" + (sh-esc "a") "a") + +(mtest + (sh-esc "|") "\"|\"" + (sh-esc "&") "\"&\"" + (sh-esc ";") "\";\"" + (sh-esc "<") "\"<\"" + (sh-esc ">") "\">\"" + (sh-esc "(") "\"(\"" + (sh-esc ")") "\")\"" + (sh-esc " ") "\" \"" + (sh-esc "\t") "\"\t\"" + (sh-esc "\n") "\"\n\"" + (sh-esc "*") "\"*\"" + (sh-esc "?") "\"?\"" + (sh-esc "[") "\"[\"" + (sh-esc "#") "\"#\"" + (sh-esc "~") "\"~\"") + +(mtest + (sh-esc "'") "\"'\"") + +(mtest + (sh-esc "\"") "'\"'" + (sh-esc "$") "'$'" + (sh-esc "`") "'`'" + (sh-esc "\\") "'\\'") + +(mtest + (sh-esc "=") "=" + (sh-esc "%") "%" + (sh-esc-all "=") "\"=\"" + (sh-esc-all "%") "\"%\"") + +(test + (sh-esc "a\"b'c") "'a\"b'\\''c'") + +(mtest + (sh-esc "|'") "\"|'\"" + (sh-esc "|\"") "'|\"'" + (sh-esc "'$") "''\\''$'") + +(mtest + (sh-esc-all "|=") "\"|=\"" + (sh-esc-all "'=") "\"'=\"" + (sh-esc-all "\"=") "'\"='") + +(mtest + (sh-esc "|&;<>() \t\n*?[#~") "\"|&;<>() \t\n*?[#~\"" + (sh-esc "\"$`\\") "'\"$`\\'") + +(mtest + (sh-esc-dq "$`\\\"\n'abc()*~") "\\$\\`\\\\\\\"\n'abc()*~" + (sh-esc-sq "$`\\\"\n'abc()*~") "$`\\\"\n'\\''abc()*~") @@ -73373,20 +73373,20 @@ and related functions is the POSIX Shell Command Language. The .code sh-esc -function escapes its argument in such a way that the resulting -string may be inserted as an argument into a command. +function adds quoting and escaping into its argument in such a way that the +resulting string may be inserted as an argument into a command. The .code sh-esc-all -function performs a stricter escaping, such that the transformed -string may be inserted into any syntactic context where a -textual operand is required for any reason, such as the +function performs a stricter escaping and quoting, such that the transformed +string may be inserted into any syntactic context where a textual operand is +required for any reason, such as the .meta pattern in the .mono .meti <2> ${ var % pattern } .onom -construct, or +construct. The .code sh-esc-dq @@ -73398,17 +73398,24 @@ The function escapes its argument for insertion into a single-quoted field in a shell command line. It does not add the single quotes themselves. -The precise set of characters which are escaped by a backslash by the +The precise set of characters which, according to the .code sh-esc -function is the following: +function, require escaping or quoting, is the following: .verb | & ; < > ( ) $ ` \e " ' tab newline space * ? [ # ~ .brev +If none of these characters occur in +.metn str , +then +.code sh-esc +returns +.metn str . + The .code sh-esc-all -function escapes all the above characters, and also these: +function considers all the above characters, and also these: .verb = % @@ -73416,16 +73423,13 @@ function escapes all the above characters, and also these: The .code sh-esc-dq -function escapes the following characters: +function escapes the following characters by preceding them with +the \e (backslash) character: .verb - $ ` \e " newline + $ ` \e " .brev -To escape with a backslash means to precede every occurrence of the -specified characters with the character -.codn \e . - The .code sh-esc-sq function replaces every occurrence of the |