summaryrefslogtreecommitdiffstats
path: root/awkreg.awk
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2014-03-17 11:48:29 -0700
committerKaz Kylheku <kaz@kylheku.com>2014-03-17 11:48:29 -0700
commit6ac24f8203cd10d5442a02c220a1229b2b7d5513 (patch)
tree46186b93b835f7027470440b8eae00f091bb0182 /awkreg.awk
parentb9f7be76a0d7fd986603ae24ff820547d0f78716 (diff)
downloadunix-cruft-6ac24f8203cd10d5442a02c220a1229b2b7d5513.tar.gz
unix-cruft-6ac24f8203cd10d5442a02c220a1229b2b7d5513.tar.bz2
unix-cruft-6ac24f8203cd10d5442a02c220a1229b2b7d5513.zip
Fix in {m,n} syntax. Improved driver code.
The issue is that the parser partially consumes broken {m,n} syntax. The eat_rep_notation function must backtrack fully, and its caller must detect it has done so.
Diffstat (limited to 'awkreg.awk')
-rw-r--r--awkreg.awk47
1 files changed, 36 insertions, 11 deletions
diff --git a/awkreg.awk b/awkreg.awk
index a0ea898..f966168 100644
--- a/awkreg.awk
+++ b/awkreg.awk
@@ -1,3 +1,9 @@
+function dbg(x)
+{
+ printf("dbg: <%s>\n", x)
+ return x
+}
+
function empty(s)
{
return s == ""
@@ -25,6 +31,13 @@ function match_and_eat(s, pfx)
return s
}
+function match_and_eat_else(s, pfx, e)
+{
+ if (matches(s, pfx))
+ return eat_chars(s, length(pfx))
+ return e
+}
+
function eat_rchar(c)
{
if (c ~ /^\\./)
@@ -102,12 +115,15 @@ function eat_bracket_exp(e,
}
}
-function eat_rep_notation(n)
+function eat_rep_notation(n,
+ # local
+ o)
{
+ o = n
n = eat_char(n)
if (n !~ /^[0-9]/)
- return n
+ return o
while (n ~ /^[0-9]/)
n = eat_char(n)
@@ -116,7 +132,7 @@ function eat_rep_notation(n)
return eat_char(n)
if (!matches(n, ","))
- return n
+ return o
n = eat_char(n)
@@ -124,18 +140,18 @@ function eat_rep_notation(n)
return eat_char(n)
if (n !~ /^[0-9]/)
- return n
+ return o
while (n ~ /^[0-9]/)
n = eat_char(n)
- return match_and_eat(n, "}")
+ return match_and_eat_else(n, "}", o)
}
function eat_factor(f)
{
if (matches(f, "("))
- return match_and_eat(eat_regex(eat_char(f)), ")")
+ return match_and_eat_else(eat_regex(eat_char(f)), ")", f)
if (matches(f, "["))
return eat_bracket_exp(f)
@@ -183,23 +199,32 @@ function eat_regex(r,
return eat_regex(r)
}
-
-function is_regex(r)
+function regex_check(r)
{
if (matches(r, "^"))
r = eat_char(r)
if (empty(r))
- return 1
+ return r
r = eat_regex(r)
if (r == "$")
r = ""
- return empty(r);
+ return r
+}
+
+function is_regex(r)
+{
+ return empty(regex_check(r))
}
{
- printf("is_regex(%s)\n", is_regex($0) ? "yes" : "no")
+ ok = is_regex($0)
+
+ printf("is_regex(\"%s\") = %d", $0, ir)
+ if (!ok)
+ printf(", junk = \"%s\"", regex_check($0))
+ printf("\n")
}