summaryrefslogtreecommitdiffstats
path: root/parser.l
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2016-04-21 06:57:42 -0700
committerKaz Kylheku <kaz@kylheku.com>2016-04-21 06:57:42 -0700
commitef8fe557841c440bf9e3e13ee0801bc127091b7e (patch)
tree5f351a20c31d7a9e8d4c832cd2a536b6cc9aa89f /parser.l
parent96ca2e8a7799a2b820e6e3e6727f3fe15d56c061 (diff)
downloadtxr-ef8fe557841c440bf9e3e13ee0801bc127091b7e.tar.gz
txr-ef8fe557841c440bf9e3e13ee0801bc127091b7e.tar.bz2
txr-ef8fe557841c440bf9e3e13ee0801bc127091b7e.zip
Handle non-UTF-8 byte in regex scanned from string.
The current behavior is that there is no lex rule for this, so such a byte gets echoed. parser.l (grammar): Add fallback rule to match one byte in SREGEX state and turn it into 0xDCxx character.
Diffstat (limited to 'parser.l')
-rw-r--r--parser.l6
1 files changed, 6 insertions, 0 deletions
diff --git a/parser.l b/parser.l
index d87e03eb..9ac79228 100644
--- a/parser.l
+++ b/parser.l
@@ -872,6 +872,12 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return REGCHAR;
}
+<SREGEX>. {
+ /* Allow non-UTF-8 byte for regexes scanned from string */
+ yylval->chr = (unsigned char) yytext[0] + 0xDC00;
+ return REGCHAR;
+}
+
<REGEX>. {
yyerrprepf(yyg, lit("non-UTF-8 byte in regex: '\\x~02x'"),
num(convert(unsigned char, yytext[0])), nao);