summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2015-07-02 19:56:58 -0700
committerKaz Kylheku <kaz@kylheku.com>2015-07-02 19:56:58 -0700
commit9aaeb2fc604feee26a72a672fd846b4e70c6c1aa (patch)
tree04b94e178dbd26e177524894bef9a24d0dda2176
parent7e13dd271df070671f9ef59969307d1bfb045ffb (diff)
downloadtxr-9aaeb2fc604feee26a72a672fd846b4e70c6c1aa.tar.gz
txr-9aaeb2fc604feee26a72a672fd846b4e70c6c1aa.tar.bz2
txr-9aaeb2fc604feee26a72a672fd846b4e70c6c1aa.zip
Support trailing semicolon after hex/octal characters.
* parser.l (%option): Remove nounput option since we need yyunput. (grammar): Rule for matching hex and octal escape in SPECIAL state recognizes optional semicolon. In 109 compatibility, this is pushed back into the stream, otherwise consumed. * txr.1: Updated documentation, including compat notes. * genvim.txr (txr_char): Include optional semicolon in match. Corrected some errors where 8 and 9 were being included as matches for octal digits. (txr_error): Default match for \x or \o not followed by digits.
-rw-r--r--ChangeLog18
-rw-r--r--genvim.txr7
-rw-r--r--parser.l11
-rw-r--r--txr.117
4 files changed, 47 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index fbb802ff..998d3dab 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,23 @@
2015-07-02 Kaz Kylheku <kaz@kylheku.com>
+ Support trailing semicolon after hex/octal characters.
+
+ * parser.l (%option): Remove nounput option since we need
+ yyunput.
+ (grammar): Rule for matching hex and octal escape in SPECIAL
+ state recognizes optional semicolon. In 109 compatibility,
+ this is pushed back into the stream, otherwise consumed.
+
+ * txr.1: Updated documentation, including compat notes.
+
+ * genvim.txr (txr_char): Include optional semicolon in
+ match. Corrected some errors where 8 and 9 were being
+ included as matches for octal digits.
+ (txr_error): Default match for \x or \o not followed
+ by digits.
+
+2015-07-02 Kaz Kylheku <kaz@kylheku.com>
+
Hash-bang support for .tl files.
* parser.c (read_eval_stream): New boolean argument
diff --git a/genvim.txr b/genvim.txr
index e74ed9c2..5aab351f 100644
--- a/genvim.txr
+++ b/genvim.txr
@@ -78,8 +78,9 @@ syn match txr_atat "@@[ \t]*@@"
syn match txr_comment "@@[ \t]*[#;].*"
syn match txr_contin "@@[ \t]*\\$"
syn match txr_char "@@[ \t]*\\."
-syn match txr_char "@@[ \t]*\\x[0-9A-Fa-f]\+"
-syn match txr_char "@@[ \t]*\\[0-9]\+"
+syn match txr_error "@@[ \t]*\\[xo]"
+syn match txr_char "@@[ \t]*\\x[0-9A-Fa-f]\+;\?"
+syn match txr_char "@@[ \t]*\\[0-7]\+;\?"
syn match txr_variable "@@[ \t]*[*]\?[ \t]*[A-Za-z_][A-Za-z0-9_]*"
syn match txr_splicevar "@@[ \t,*]*[A-Za-z_][A-Za-z0-9_]*"
syn match txr_regdir "@@[ \t]*/\(\\/\|[^/]\|\\\n\)*/"
@@ -97,7 +98,7 @@ syn match txr_regesc "\\[abtnvfre\\ \n/sSdDwW()\|.*?+~&%\[\]\-]" contained
syn match txr_nested_error "[^\t `]\+" contained
syn match txr_chr "#\\x[A-Fa-f0-9]\+"@(if txr-p " contained")
-syn match txr_chr "#\\o[0-9]\+"@(if txr-p " contained")
+syn match txr_chr "#\\o[0-7]\+"@(if txr-p " contained")
syn match txr_chr "#\\[^ \t\nA-Za-z0-9_]"@(if txr-p " contained")
syn match txr_chr "#\\[A-Za-z0-9_]\+"@(if txr-p " contained")
syn match txr_ncomment ";.*"@(if txr-p " contained")
diff --git a/parser.l b/parser.l
index 37bbdc70..30cb3034 100644
--- a/parser.l
+++ b/parser.l
@@ -166,7 +166,7 @@ static wchar_t num_esc(scanner_t *scn, char *num)
%}
-%option stack nounput noinput reentrant bison-bridge extra-type="parser_t *"
+%option stack noinput reentrant bison-bridge extra-type="parser_t *"
SYM [a-zA-Z0-9_]+
SGN [+\-]
@@ -685,11 +685,18 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return TEXT;
}
-<SPECIAL>[\\](x{HEX}+|{OCT}+) {
+<SPECIAL>[\\](x{HEX}+|{OCT}+);? {
wchar_t lexeme[2];
lexeme[0] = num_esc(yyg, yytext + 1);
lexeme[1] = 0;
yylval->lexeme = chk_strdup(lexeme);
+
+ {
+ char lastchar = yytext[yyleng-1];
+ if (lastchar == ';' && opt_compat && opt_compat <= 109)
+ unput(lastchar);
+ }
+
yy_pop_state(yyscanner);
return TEXT;
}
diff --git a/txr.1 b/txr.1
index 3bbdabef..894143db 100644
--- a/txr.1
+++ b/txr.1
@@ -1155,7 +1155,9 @@ A
immediately followed by a sequence of hex digits is interpreted as a hexadecimal
numeric character code. For instance
.code @\ex41
-is the ASCII character A.
+is the ASCII character A. If a semicolon character immediately follows the
+hex digits, it is consumed, and characters which follow are not considered
+part of the hex escape even if they are hex digits.
.meIP @\e < octal-digits
A
@@ -1165,6 +1167,9 @@ as an octal character code. For instance
.code @\e010
is character 8, same as
.codn @\eb .
+If a semicolon character immediately follows the octal digits, it is consumed,
+and subsequent characters are not treated as part of the octal escape,
+even if they are octal digits.
.PP
Note that if a newline is embedded into a query line with
@@ -30636,6 +30641,16 @@ is given an argument which is equal or lower. For instance
.code -C 103
selects the behaviors described below for version 105, but not those for 102.
+.IP 109
+The optional trailing semicolon on hex and octal codes in the \*(TX
+pattern language was introduced in 110. The feature is disabled
+with 109 or lower compatibility, so that
+.code @\ex21;a
+encodes
+.code !;a
+rather than the current behavior of encoding
+.codn !a .
+
.IP 107
Up through \*(TX 107, by accident, there was a function called
.code flip