summaryrefslogtreecommitdiffstats
path: root/awkreg.awk
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2014-03-17 11:00:28 -0700
committerKaz Kylheku <kaz@kylheku.com>2014-03-17 11:00:28 -0700
commitb9f7be76a0d7fd986603ae24ff820547d0f78716 (patch)
tree3f4adf9114b004bfb31ffb0b6ab1208e8bcc58f3 /awkreg.awk
downloadunix-cruft-b9f7be76a0d7fd986603ae24ff820547d0f78716.tar.gz
unix-cruft-b9f7be76a0d7fd986603ae24ff820547d0f78716.tar.bz2
unix-cruft-b9f7be76a0d7fd986603ae24ff820547d0f78716.zip
Initial version.
Diffstat (limited to 'awkreg.awk')
-rw-r--r--awkreg.awk205
1 files changed, 205 insertions, 0 deletions
diff --git a/awkreg.awk b/awkreg.awk
new file mode 100644
index 0000000..a0ea898
--- /dev/null
+++ b/awkreg.awk
@@ -0,0 +1,205 @@
+function empty(s)
+{
+ return s == ""
+}
+
+function eat_char(s)
+{
+ return substr(s, 2)
+}
+
+function eat_chars(s, n)
+{
+ return substr(s, n + 1)
+}
+
+function matches(s, pfx)
+{
+ return substr(s, 1, length(pfx)) == pfx
+}
+
+function match_and_eat(s, pfx)
+{
+ if (matches(s, pfx))
+ return eat_chars(s, length(pfx))
+ return s
+}
+
+function eat_rchar(c)
+{
+ if (c ~ /^\\./)
+ return eat_chars(c, 2)
+
+ if (c == "$")
+ return c
+
+ if (c !~ /^[\[\*\+\?{}\(\)|]/)
+ return eat_char(c)
+
+ return c
+}
+
+function eat_bchar(c)
+{
+ if (c ~ /^\\]|\\-|\\\\/)
+ return eat_chars(c, 2)
+
+ if (c !~ /^[\-\[]/)
+ return eat_char(c)
+
+ return c
+}
+
+function eat_class(c)
+{
+ c = match_and_eat(c, "[:alnum:]")
+ c = match_and_eat(c, "[:alpha:]")
+ c = match_and_eat(c, "[:blank:]")
+ c = match_and_eat(c, "[:cntrl:]")
+ c = match_and_eat(c, "[:digit:]")
+ c = match_and_eat(c, "[:graph:]")
+ c = match_and_eat(c, "[:lower:]")
+ c = match_and_eat(c, "[:print:]")
+ c = match_and_eat(c, "[:punct:]")
+ c = match_and_eat(c, "[:space:]")
+ c = match_and_eat(c, "[:upper:]")
+ return match_and_eat(c, "[:xdigit:]")
+}
+
+function eat_bracket_exp(e,
+ #local
+ f, o)
+{
+ o = e
+ e = eat_char(e)
+
+ for (;;) {
+ if (matches(e, "]")) {
+ return eat_char(e)
+ }
+
+ if (matches(e, "[")) {
+ f = eat_class(e)
+ if (f == e)
+ return o
+ e = f
+ continue;
+ }
+
+ f = eat_bchar(e)
+
+ if (f == e)
+ return o
+ e = f
+
+ if (matches(e, "-")) {
+ e = eat_char(e)
+ f = eat_bchar(e)
+ if (f == e)
+ return o
+ e = f
+ }
+ }
+}
+
+function eat_rep_notation(n)
+{
+ n = eat_char(n)
+
+ if (n !~ /^[0-9]/)
+ return n
+
+ while (n ~ /^[0-9]/)
+ n = eat_char(n)
+
+ if (matches(n, "}"))
+ return eat_char(n)
+
+ if (!matches(n, ","))
+ return n
+
+ n = eat_char(n)
+
+ if (matches(n, "}"))
+ return eat_char(n)
+
+ if (n !~ /^[0-9]/)
+ return n
+
+ while (n ~ /^[0-9]/)
+ n = eat_char(n)
+
+ return match_and_eat(n, "}")
+}
+
+function eat_factor(f)
+{
+ if (matches(f, "("))
+ return match_and_eat(eat_regex(eat_char(f)), ")")
+
+ if (matches(f, "["))
+ return eat_bracket_exp(f)
+
+ return eat_rchar(f)
+}
+
+function eat_term(t,
+ #local
+ s)
+{
+ s = eat_factor(t)
+
+ if (empty(s) || s == t)
+ return s
+
+ t = s
+
+ if (t ~ /^[?+*]/)
+ return eat_char(t)
+
+ if (matches(t, "{"))
+ return eat_rep_notation(t)
+
+ return t
+}
+
+function eat_regex(r,
+ #locals
+ s)
+{
+ if (empty(r))
+ return r
+
+ s = eat_term(r)
+
+ if (empty(s) || s == r)
+ return s
+
+ r = s;
+
+ if (matches(r, "|"))
+ r = eat_char(r)
+
+ return eat_regex(r)
+}
+
+
+function is_regex(r)
+{
+ if (matches(r, "^"))
+ r = eat_char(r)
+
+ if (empty(r))
+ return 1
+
+ r = eat_regex(r)
+
+ if (r == "$")
+ r = ""
+
+ return empty(r);
+}
+
+{
+ printf("is_regex(%s)\n", is_regex($0) ? "yes" : "no")
+}