diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2014-03-17 11:48:29 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2014-03-17 11:48:29 -0700 |
commit | 6ac24f8203cd10d5442a02c220a1229b2b7d5513 (patch) | |
tree | 46186b93b835f7027470440b8eae00f091bb0182 /awkreg.awk | |
parent | b9f7be76a0d7fd986603ae24ff820547d0f78716 (diff) | |
download | unix-cruft-6ac24f8203cd10d5442a02c220a1229b2b7d5513.tar.gz unix-cruft-6ac24f8203cd10d5442a02c220a1229b2b7d5513.tar.bz2 unix-cruft-6ac24f8203cd10d5442a02c220a1229b2b7d5513.zip |
Fix in {m,n} syntax. Improved driver code.
The issue is that the parser partially consumes broken {m,n}
syntax. The eat_rep_notation function must backtrack fully,
and its caller must detect it has done so.
Diffstat (limited to 'awkreg.awk')
-rw-r--r-- | awkreg.awk | 47 |
1 files changed, 36 insertions, 11 deletions
@@ -1,3 +1,9 @@ +function dbg(x) +{ + printf("dbg: <%s>\n", x) + return x +} + function empty(s) { return s == "" @@ -25,6 +31,13 @@ function match_and_eat(s, pfx) return s } +function match_and_eat_else(s, pfx, e) +{ + if (matches(s, pfx)) + return eat_chars(s, length(pfx)) + return e +} + function eat_rchar(c) { if (c ~ /^\\./) @@ -102,12 +115,15 @@ function eat_bracket_exp(e, } } -function eat_rep_notation(n) +function eat_rep_notation(n, + # local + o) { + o = n n = eat_char(n) if (n !~ /^[0-9]/) - return n + return o while (n ~ /^[0-9]/) n = eat_char(n) @@ -116,7 +132,7 @@ function eat_rep_notation(n) return eat_char(n) if (!matches(n, ",")) - return n + return o n = eat_char(n) @@ -124,18 +140,18 @@ function eat_rep_notation(n) return eat_char(n) if (n !~ /^[0-9]/) - return n + return o while (n ~ /^[0-9]/) n = eat_char(n) - return match_and_eat(n, "}") + return match_and_eat_else(n, "}", o) } function eat_factor(f) { if (matches(f, "(")) - return match_and_eat(eat_regex(eat_char(f)), ")") + return match_and_eat_else(eat_regex(eat_char(f)), ")", f) if (matches(f, "[")) return eat_bracket_exp(f) @@ -183,23 +199,32 @@ function eat_regex(r, return eat_regex(r) } - -function is_regex(r) +function regex_check(r) { if (matches(r, "^")) r = eat_char(r) if (empty(r)) - return 1 + return r r = eat_regex(r) if (r == "$") r = "" - return empty(r); + return r +} + +function is_regex(r) +{ + return empty(regex_check(r)) } { - printf("is_regex(%s)\n", is_regex($0) ? "yes" : "no") + ok = is_regex($0) + + printf("is_regex(\"%s\") = %d", $0, ir) + if (!ok) + printf(", junk = \"%s\"", regex_check($0)) + printf("\n") } |