aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2015-04-28 09:28:04 +0300
committerArnold D. Robbins <arnold@skeeve.com>2015-04-28 09:28:04 +0300
commit7bda05c66848de97a7b43aa3e37ff4336f1b3220 (patch)
tree2721099b731ca1503f690a101caf592ccda3d4f3
parent454ae7c0f350842ab40a30ff4a2643cd76e8e277 (diff)
downloadegawk-7bda05c66848de97a7b43aa3e37ff4336f1b3220.tar.gz
egawk-7bda05c66848de97a7b43aa3e37ff4336f1b3220.tar.bz2
egawk-7bda05c66848de97a7b43aa3e37ff4336f1b3220.zip
Fix bracket handling. "This time for sure."
-rw-r--r--ChangeLog5
-rw-r--r--awkgram.c49
-rw-r--r--awkgram.y49
3 files changed, 61 insertions, 42 deletions
diff --git a/ChangeLog b/ChangeLog
index e5d473c0..3e9764a2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2015-04-28 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awkgram.y (yylex): Rework the bracket handling from zero.
+ Thanks to Michal Jaegermann for yet another test case.
+
2015-04-27 Arnold D. Robbins <arnold@skeeve.com>
* awkgram.y (yylex): Make change of Jan 7 for parsing regexps
diff --git a/awkgram.c b/awkgram.c
index 530aa27c..14e29d98 100644
--- a/awkgram.c
+++ b/awkgram.c
@@ -5358,21 +5358,24 @@ yylex(void)
thisline = NULL;
if (want_regexp) {
int in_brack = 0; /* count brackets, [[:alnum:]] allowed */
+ int b_index = -1;
+ int cur_index = 0;
+
/*
- * Counting brackets is non-trivial. [[] is ok,
- * and so is [\]], with a point being that /[/]/ as a regexp
- * constant has to work.
+ * Here is what's ok with brackets:
+ *
+ * [[] [^[] []] [^]] [.../...]
+ * [...\[...] [...\]...] [...\/...]
+ *
+ * (Remember that all of the above are inside /.../)
+ *
+ * The code for \ handles \[, \] and \/.
*
- * Do not count [ or ] if either one is preceded by a \.
- * A `[' should be counted if
- * a) it is the first one so far (in_brack == 0)
- * b) it is the `[' in `[:'
- * A ']' should be counted if not preceded by a \, since
- * it is either closing `:]' or just a plain list.
- * According to POSIX, []] is how you put a ] into a set.
- * Try to handle that too.
+ * Otherwise, track the first open [ position, and if
+ * an embedded [ or ] occurs, allow it to pass through
+ * if it's right after the first [ or after [^.
*
- * The code for \ handles \[ and \].
+ * Whew!
*/
want_regexp = false;
@@ -5382,17 +5385,21 @@ yylex(void)
if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
case '[':
- /* one day check for `.' and `=' too */
- if (nextc(false) == ':' || in_brack == 0)
- in_brack++;
- pushback();
- break;
case ']':
- if ((tok[-1] == '[' && tok[-2] != '\\')
- || (tok[-2] == '[' && tok[-3] != '\\' && tok[-1] == '^'))
- /* do nothing */;
- else
+ cur_index = tok - tokstart;
+ if (in_brack > 0
+ && (cur_index == b_index + 1
+ || (cur_index == b_index + 2 && tok[-1] == '^')))
+ ; /* do nothing */
+ else if (c == '[') {
+ in_brack++;
+ if (in_brack == 1)
+ b_index = tok - tokstart;
+ } else {
in_brack--;
+ if (in_brack == 0)
+ b_index = -1;
+ }
break;
case '\\':
if ((c = nextc(false)) == END_FILE) {
diff --git a/awkgram.y b/awkgram.y
index 31751e8e..beb85d5a 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -3019,21 +3019,24 @@ yylex(void)
thisline = NULL;
if (want_regexp) {
int in_brack = 0; /* count brackets, [[:alnum:]] allowed */
+ int b_index = -1;
+ int cur_index = 0;
+
/*
- * Counting brackets is non-trivial. [[] is ok,
- * and so is [\]], with a point being that /[/]/ as a regexp
- * constant has to work.
+ * Here is what's ok with brackets:
+ *
+ * [[] [^[] []] [^]] [.../...]
+ * [...\[...] [...\]...] [...\/...]
+ *
+ * (Remember that all of the above are inside /.../)
+ *
+ * The code for \ handles \[, \] and \/.
*
- * Do not count [ or ] if either one is preceded by a \.
- * A `[' should be counted if
- * a) it is the first one so far (in_brack == 0)
- * b) it is the `[' in `[:'
- * A ']' should be counted if not preceded by a \, since
- * it is either closing `:]' or just a plain list.
- * According to POSIX, []] is how you put a ] into a set.
- * Try to handle that too.
+ * Otherwise, track the first open [ position, and if
+ * an embedded [ or ] occurs, allow it to pass through
+ * if it's right after the first [ or after [^.
*
- * The code for \ handles \[ and \].
+ * Whew!
*/
want_regexp = false;
@@ -3043,17 +3046,21 @@ yylex(void)
if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
case '[':
- /* one day check for `.' and `=' too */
- if (nextc(false) == ':' || in_brack == 0)
- in_brack++;
- pushback();
- break;
case ']':
- if ((tok[-1] == '[' && tok[-2] != '\\')
- || (tok[-2] == '[' && tok[-3] != '\\' && tok[-1] == '^'))
- /* do nothing */;
- else
+ cur_index = tok - tokstart;
+ if (in_brack > 0
+ && (cur_index == b_index + 1
+ || (cur_index == b_index + 2 && tok[-1] == '^')))
+ ; /* do nothing */
+ else if (c == '[') {
+ in_brack++;
+ if (in_brack == 1)
+ b_index = tok - tokstart;
+ } else {
in_brack--;
+ if (in_brack == 0)
+ b_index = -1;
+ }
break;
case '\\':
if ((c = nextc(false)) == END_FILE) {