summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2023-11-25 21:09:44 -0800
committerKaz Kylheku <kaz@kylheku.com>2023-11-25 21:09:44 -0800
commit1cb01c571adc42ae269d4361df44f4bf6ab9d3ec (patch)
treea20dce2852c81309e5d9693e88755d33c17b993d
parent81fa0c0f30bb6bcd2a4d287748db6d090b0d04ea (diff)
downloadtxr-1cb01c571adc42ae269d4361df44f4bf6ab9d3ec.tar.gz
txr-1cb01c571adc42ae269d4361df44f4bf6ab9d3ec.tar.bz2
txr-1cb01c571adc42ae269d4361df44f4bf6ab9d3ec.zip
sh-esc: clean up mess I made.
Not all special characters can just be backslash escaped. Spaces and newlines must be quoted. * stream.c (sh_esc_common): New function. Handles both sh-esc and sh-esc-all logic, distinguished by a flag. Quoting is used, rather than backslash escaping. If the string contains no special characters, it is just erturned. If it can be double quoted, it is double quoted. Otherwise it is single quoted and any contained single quotes are replaced by '\''. (sh_esc, sh_esc_all): Now just wrap sh_esc_common. (sh_esc_dq): Remove the newline from the set of escaped characters. Escaping a newline generates a continuation sequence which eats the newline. * tests/018/sh-esc.tl: Most test cases deleted; many new test cases added. * txr.1: Documentation revised.
-rw-r--r--stream.c47
-rw-r--r--tests/018/sh-esc.tl63
-rw-r--r--txr.134
3 files changed, 113 insertions, 31 deletions
diff --git a/stream.c b/stream.c
index 5ea9330f..d1d615e9 100644
--- a/stream.c
+++ b/stream.c
@@ -4851,24 +4851,55 @@ static val sh(val command)
#endif
-static val sh_esc(val string)
+static val sh_esc_dq(val string)
{
- return str_esc(lit("|&;<>()$`\\\"' \t\n*?[#~"), chr('\\'), string);
+ return str_esc(lit("$`\\\""), chr('\\'), string);
}
-static val sh_esc_all(val string)
+static val sh_esc_sq(val string)
{
- return str_esc(lit("|&;<>()$`\\\"' \t\n*?[#~=%"), chr('\\'), string);
+ return str_esc(lit("'"), lit("'\\'"), string);
}
-static val sh_esc_dq(val string)
+static val sh_esc_common(val string, int all, val self)
+{
+ const wchar_t *s, *str = c_str(string, self);
+ int sq = 0, dq = 0, es = 0;
+
+ for (s = str; *s; s++)
+ {
+ wchar_t ch = *s;
+
+ if (ch == '\'')
+ es = sq = 1;
+ else if (wcschr(L"$`\\\"", ch))
+ es = dq = 1;
+ else if (wcschr(L"|&;<>() \t\n*?[#~", ch))
+ es = 1;
+ else if (all && wcschr(L"=%", ch))
+ es = 1;
+ }
+
+ if (!es)
+ return string;
+
+ if (!dq)
+ return scat3(chr('"'), string, chr('"'));
+
+ if (!sq)
+ return scat3(chr('\''), string, chr('\''));
+
+ return scat3(chr('\''), sh_esc_sq(string), chr('\''));
+}
+
+static val sh_esc(val string)
{
- return str_esc(lit("$`\\\"\n"), chr('\\'), string);
+ return sh_esc_common(string, 0, lit("sh-esc"));
}
-static val sh_esc_sq(val string)
+static val sh_esc_all(val string)
{
- return str_esc(lit("'"), lit("'\\'"), string);
+ return sh_esc_common(string, 1, lit("sh-esc-all"));
}
val remove_path(val path, val throw_on_error)
diff --git a/tests/018/sh-esc.tl b/tests/018/sh-esc.tl
index 3466ee76..f508475e 100644
--- a/tests/018/sh-esc.tl
+++ b/tests/018/sh-esc.tl
@@ -1,11 +1,58 @@
(load "../common")
(mtest
- (sh-esc "|&;<>()$`\\\"' \t\n*?[#~=%abc")
- "\\|\\&\\;\\<\\>\\(\\)\\$\\`\\\\\\\"\\'\\ \\\t\\\n\\*\\?\\[\\#\\~=%abc"
- (sh-esc-all "|&;<>()$`\\\"' \t\n*?[#~=%abc")
- "\\|\\&\\;\\<\\>\\(\\)\\$\\`\\\\\\\"\\'\\ \\\t\\\n\\*\\?\\[\\#\\~\\=\\%abc"
- (sh-esc-dq "$`\\\"\n'abc()*~")
- "\\$\\`\\\\\\\"\\\n'abc()*~"
- (sh-esc-sq "$`\\\"\n'abc()*~")
- "$`\\\"\n'\\''abc()*~")
+ (sh-esc "") ""
+ (sh-esc "a") "a")
+
+(mtest
+ (sh-esc "|") "\"|\""
+ (sh-esc "&") "\"&\""
+ (sh-esc ";") "\";\""
+ (sh-esc "<") "\"<\""
+ (sh-esc ">") "\">\""
+ (sh-esc "(") "\"(\""
+ (sh-esc ")") "\")\""
+ (sh-esc " ") "\" \""
+ (sh-esc "\t") "\"\t\""
+ (sh-esc "\n") "\"\n\""
+ (sh-esc "*") "\"*\""
+ (sh-esc "?") "\"?\""
+ (sh-esc "[") "\"[\""
+ (sh-esc "#") "\"#\""
+ (sh-esc "~") "\"~\"")
+
+(mtest
+ (sh-esc "'") "\"'\"")
+
+(mtest
+ (sh-esc "\"") "'\"'"
+ (sh-esc "$") "'$'"
+ (sh-esc "`") "'`'"
+ (sh-esc "\\") "'\\'")
+
+(mtest
+ (sh-esc "=") "="
+ (sh-esc "%") "%"
+ (sh-esc-all "=") "\"=\""
+ (sh-esc-all "%") "\"%\"")
+
+(test
+ (sh-esc "a\"b'c") "'a\"b'\\''c'")
+
+(mtest
+ (sh-esc "|'") "\"|'\""
+ (sh-esc "|\"") "'|\"'"
+ (sh-esc "'$") "''\\''$'")
+
+(mtest
+ (sh-esc-all "|=") "\"|=\""
+ (sh-esc-all "'=") "\"'=\""
+ (sh-esc-all "\"=") "'\"='")
+
+(mtest
+ (sh-esc "|&;<>() \t\n*?[#~") "\"|&;<>() \t\n*?[#~\""
+ (sh-esc "\"$`\\") "'\"$`\\'")
+
+(mtest
+ (sh-esc-dq "$`\\\"\n'abc()*~") "\\$\\`\\\\\\\"\n'abc()*~"
+ (sh-esc-sq "$`\\\"\n'abc()*~") "$`\\\"\n'\\''abc()*~")
diff --git a/txr.1 b/txr.1
index 585d052c..edc7bf36 100644
--- a/txr.1
+++ b/txr.1
@@ -73373,20 +73373,20 @@ and related functions is the POSIX Shell Command Language.
The
.code sh-esc
-function escapes its argument in such a way that the resulting
-string may be inserted as an argument into a command.
+function adds quoting and escaping into its argument in such a way that the
+resulting string may be inserted as an argument into a command.
The
.code sh-esc-all
-function performs a stricter escaping, such that the transformed
-string may be inserted into any syntactic context where a
-textual operand is required for any reason, such as the
+function performs a stricter escaping and quoting, such that the transformed
+string may be inserted into any syntactic context where a textual operand is
+required for any reason, such as the
.meta pattern
in the
.mono
.meti <2> ${ var % pattern }
.onom
-construct, or
+construct.
The
.code sh-esc-dq
@@ -73398,17 +73398,24 @@ The
function escapes its argument for insertion into a single-quoted field in a
shell command line. It does not add the single quotes themselves.
-The precise set of characters which are escaped by a backslash by the
+The precise set of characters which, according to the
.code sh-esc
-function is the following:
+function, require escaping or quoting, is the following:
.verb
| & ; < > ( ) $ ` \e " ' tab newline space * ? [ # ~
.brev
+If none of these characters occur in
+.metn str ,
+then
+.code sh-esc
+returns
+.metn str .
+
The
.code sh-esc-all
-function escapes all the above characters, and also these:
+function considers all the above characters, and also these:
.verb
= %
@@ -73416,16 +73423,13 @@ function escapes all the above characters, and also these:
The
.code sh-esc-dq
-function escapes the following characters:
+function escapes the following characters by preceding them with
+the \e (backslash) character:
.verb
- $ ` \e " newline
+ $ ` \e "
.brev
-To escape with a backslash means to precede every occurrence of the
-specified characters with the character
-.codn \e .
-
The
.code sh-esc-sq
function replaces every occurrence of the