summaryrefslogtreecommitdiffstats
path: root/regex.c
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2016-10-10 09:48:47 -0700
committerKaz Kylheku <kaz@kylheku.com>2016-10-10 09:48:47 -0700
commit0e07d523d298ab156bea4fb7570a6cc4b41fb79d (patch)
treeaa634cf2d98c8cfb32d0469439459cec694a6580 /regex.c
parent0c09759df87cdbbd33296a086debe58e58587f7e (diff)
downloadtxr-0e07d523d298ab156bea4fb7570a6cc4b41fb79d.tar.gz
txr-0e07d523d298ab156bea4fb7570a6cc4b41fb79d.tar.bz2
txr-0e07d523d298ab156bea4fb7570a6cc4b41fb79d.zip
Support n-ary and and or operators in regex.
Since much regex code assumes these are binary, the easiest and briefest approach is to implement a code transformation pass which rewrites n-ary forms into binary. * regex.c (reg_nary_unfold, reg_nary_to_bin): New functions. (regex_compile): Put raw sexp through reg_nary_to_bin to expand the nary syntax.
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c64
1 files changed, 63 insertions, 1 deletions
diff --git a/regex.c b/regex.c
index a3338d22..14f89fcc 100644
--- a/regex.c
+++ b/regex.c
@@ -1434,6 +1434,68 @@ static val reg_expand_nongreedy(val exp)
uw_throwf(error_s, lit("bad object in regex syntax: ~s"), exp, nao);
}
}
+
+static val reg_nary_to_bin(val regex);
+
+static val reg_nary_unfold(val sym, val args, val orig)
+{
+ if (atom(args)) {
+ return t; /* Nullary intersection and union are both empty. */
+ } else if (!cdr(args)) {
+ return reg_nary_to_bin(car(args));
+ } else if (!cddr(args)) {
+ val rx1 = pop(&args);
+ val rx2 = pop(&args);
+ val newrx1 = reg_nary_to_bin(rx1);
+ val newrx2 = reg_nary_to_bin(rx2);
+ if (!orig || rx1 != newrx1 || rx2 != newrx2)
+ return list(sym, newrx1, newrx2, nao);
+ return orig;
+ } else {
+ return list(sym,
+ reg_nary_to_bin(car(args)),
+ reg_nary_unfold(sym, cdr(args), nil), nao);
+ }
+}
+
+static val reg_nary_to_bin(val regex)
+{
+ if (atom(regex)) {
+ return regex;
+ } else {
+ val sym = first(regex);
+ val args = rest(regex);
+
+ if (sym == or_s || sym == and_s) {
+ return reg_nary_unfold(sym, args, regex);
+ } else if (sym == compound_s || sym == zeroplus_s || sym == oneplus_s ||
+ sym == optional_s || sym == compl_s || sym == nongreedy_s)
+ {
+ list_collect_decl (out, ptail);
+ val args_orig = args;
+ val nochange = t;
+
+ for (; args; args = cdr(args)) {
+ val rx = car(args);
+ val newrx = reg_nary_to_bin(car(args));
+
+ if (nochange && rx != newrx) {
+ ptail = list_collect_nconc(ptail, ldiff(args_orig, args));
+ nochange = nil;
+ }
+
+ if (!nochange)
+ ptail = list_collect(ptail, newrx);
+ }
+
+ if (!nochange)
+ return cons(sym, out);
+ }
+
+ return regex;
+ }
+}
+
static val reg_compile_csets(val exp)
{
if (exp == space_k) {
@@ -2044,7 +2106,7 @@ val regex_compile(val regex_sexp, val error_stream)
return if2(regex_sexp, regex_compile(regex_sexp, error_stream));
}
- regex_sexp = reg_optimize(reg_expand_nongreedy(regex_sexp));
+ regex_sexp = reg_optimize(reg_expand_nongreedy(reg_nary_to_bin(regex_sexp)));
if (opt_derivative_regex || regex_requires_dv(regex_sexp)) {
regex_t *regex = coerce(regex_t *, chk_malloc(sizeof *regex));