diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2015-04-28 09:28:04 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2015-04-28 09:28:04 +0300 |
commit | 7bda05c66848de97a7b43aa3e37ff4336f1b3220 (patch) | |
tree | 2721099b731ca1503f690a101caf592ccda3d4f3 | |
parent | 454ae7c0f350842ab40a30ff4a2643cd76e8e277 (diff) | |
download | egawk-7bda05c66848de97a7b43aa3e37ff4336f1b3220.tar.gz egawk-7bda05c66848de97a7b43aa3e37ff4336f1b3220.tar.bz2 egawk-7bda05c66848de97a7b43aa3e37ff4336f1b3220.zip |
Fix bracket handling. "This time for sure."
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | awkgram.c | 49 | ||||
-rw-r--r-- | awkgram.y | 49 |
3 files changed, 61 insertions, 42 deletions
@@ -1,3 +1,8 @@ +2015-04-28 Arnold D. Robbins <arnold@skeeve.com> + + * awkgram.y (yylex): Rework the bracket handling from zero. + Thanks to Michal Jaegermann for yet another test case. + 2015-04-27 Arnold D. Robbins <arnold@skeeve.com> * awkgram.y (yylex): Make change of Jan 7 for parsing regexps @@ -5358,21 +5358,24 @@ yylex(void) thisline = NULL; if (want_regexp) { int in_brack = 0; /* count brackets, [[:alnum:]] allowed */ + int b_index = -1; + int cur_index = 0; + /* - * Counting brackets is non-trivial. [[] is ok, - * and so is [\]], with a point being that /[/]/ as a regexp - * constant has to work. + * Here is what's ok with brackets: + * + * [[] [^[] []] [^]] [.../...] + * [...\[...] [...\]...] [...\/...] + * + * (Remember that all of the above are inside /.../) + * + * The code for \ handles \[, \] and \/. * - * Do not count [ or ] if either one is preceded by a \. - * A `[' should be counted if - * a) it is the first one so far (in_brack == 0) - * b) it is the `[' in `[:' - * A ']' should be counted if not preceded by a \, since - * it is either closing `:]' or just a plain list. - * According to POSIX, []] is how you put a ] into a set. - * Try to handle that too. + * Otherwise, track the first open [ position, and if + * an embedded [ or ] occurs, allow it to pass through + * if it's right after the first [ or after [^. * - * The code for \ handles \[ and \]. + * Whew! */ want_regexp = false; @@ -5382,17 +5385,21 @@ yylex(void) if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) { case '[': - /* one day check for `.' and `=' too */ - if (nextc(false) == ':' || in_brack == 0) - in_brack++; - pushback(); - break; case ']': - if ((tok[-1] == '[' && tok[-2] != '\\') - || (tok[-2] == '[' && tok[-3] != '\\' && tok[-1] == '^')) - /* do nothing */; - else + cur_index = tok - tokstart; + if (in_brack > 0 + && (cur_index == b_index + 1 + || (cur_index == b_index + 2 && tok[-1] == '^'))) + ; /* do nothing */ + else if (c == '[') { + in_brack++; + if (in_brack == 1) + b_index = tok - tokstart; + } else { in_brack--; + if (in_brack == 0) + b_index = -1; + } break; case '\\': if ((c = nextc(false)) == END_FILE) { @@ -3019,21 +3019,24 @@ yylex(void) thisline = NULL; if (want_regexp) { int in_brack = 0; /* count brackets, [[:alnum:]] allowed */ + int b_index = -1; + int cur_index = 0; + /* - * Counting brackets is non-trivial. [[] is ok, - * and so is [\]], with a point being that /[/]/ as a regexp - * constant has to work. + * Here is what's ok with brackets: + * + * [[] [^[] []] [^]] [.../...] + * [...\[...] [...\]...] [...\/...] + * + * (Remember that all of the above are inside /.../) + * + * The code for \ handles \[, \] and \/. * - * Do not count [ or ] if either one is preceded by a \. - * A `[' should be counted if - * a) it is the first one so far (in_brack == 0) - * b) it is the `[' in `[:' - * A ']' should be counted if not preceded by a \, since - * it is either closing `:]' or just a plain list. - * According to POSIX, []] is how you put a ] into a set. - * Try to handle that too. + * Otherwise, track the first open [ position, and if + * an embedded [ or ] occurs, allow it to pass through + * if it's right after the first [ or after [^. * - * The code for \ handles \[ and \]. + * Whew! */ want_regexp = false; @@ -3043,17 +3046,21 @@ yylex(void) if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) { case '[': - /* one day check for `.' and `=' too */ - if (nextc(false) == ':' || in_brack == 0) - in_brack++; - pushback(); - break; case ']': - if ((tok[-1] == '[' && tok[-2] != '\\') - || (tok[-2] == '[' && tok[-3] != '\\' && tok[-1] == '^')) - /* do nothing */; - else + cur_index = tok - tokstart; + if (in_brack > 0 + && (cur_index == b_index + 1 + || (cur_index == b_index + 2 && tok[-1] == '^'))) + ; /* do nothing */ + else if (c == '[') { + in_brack++; + if (in_brack == 1) + b_index = tok - tokstart; + } else { in_brack--; + if (in_brack == 0) + b_index = -1; + } break; case '\\': if ((c = nextc(false)) == END_FILE) { |