summaryrefslogtreecommitdiffstats
path: root/regex.c
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2015-09-24 21:16:56 -0700
committerKaz Kylheku <kaz@kylheku.com>2015-09-24 21:16:56 -0700
commit977d61d2229d3517226edfb3a32b929c6d95d19b (patch)
tree97490af644fca30b67e839e04dd74cb9215352f3 /regex.c
parent1b5b19c0d1e1399f1276cdcad14b3f30bfca9001 (diff)
downloadtxr-977d61d2229d3517226edfb3a32b929c6d95d19b.tar.gz
txr-977d61d2229d3517226edfb3a32b929c6d95d19b.tar.bz2
txr-977d61d2229d3517226edfb3a32b929c6d95d19b.zip
regex: major optimization for complement operator.
This change a huge improvement for expressions that use complement, directly or via the non-greedy % operator. * regex.c (reg_matches_all): New static function. (reg_derivative): When the dervative is applied to a complement expression, identify situations when the remaining expression cannot possibly match anything, and convert them to the t expression.
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c47
1 files changed, 46 insertions, 1 deletions
diff --git a/regex.c b/regex.c
index 9cfd60d8..cb490d40 100644
--- a/regex.c
+++ b/regex.c
@@ -1450,6 +1450,46 @@ static val reg_nullable(val exp)
}
}
+static val reg_matches_all(val exp)
+{
+ if (atom(exp)) {
+ return nil;
+ } else {
+ val sym = first(exp), args = rest(exp);
+
+ if (sym == set_s || sym == cset_s) {
+ return nil;
+ } else if (sym == compound_s) {
+ val am = nil;
+ for (; args; args = cdr(args)) {
+ val arg = car(args);
+ if (!reg_nullable(arg))
+ return nil;
+ if (!am && reg_matches_all(arg))
+ am = t;
+ }
+ return am;
+ } else if (sym == oneplus_s) {
+ return reg_matches_all(car(args));
+ } else if (sym == zeroplus_s) {
+ val arg = car(args);
+ if (arg == wild_s || reg_matches_all(arg))
+ return t;
+ return nil;
+ } else if (sym == optional_s) {
+ return reg_matches_all(car(args));
+ } else if (sym == compl_s) {
+ return tnil(car(args));
+ } else if (sym == or_s) {
+ return tnil(reg_matches_all(pop(&args)) || reg_matches_all(pop(&args)));
+ } else if (sym == and_s) {
+ return tnil(reg_matches_all(pop(&args)) && reg_matches_all(pop(&args)));
+ } else {
+ internal_error("bad operator in regex");
+ }
+ }
+}
+
static val flatten_or(val or_expr)
{
if (atom(or_expr) || car(or_expr) != or_s) {
@@ -1588,7 +1628,12 @@ static val reg_derivative(val exp, val ch)
return exp;
return cons(compound_s, cons(d_arg, cons(exp, nil)));
} else if (sym == compl_s) {
- return cons(sym, cons(reg_derivative(first(args), ch), nil));
+ val d_arg = reg_derivative(first(args), ch);
+ if (reg_matches_all(d_arg))
+ return t;
+ if (d_arg == t)
+ return nil;
+ return cons(sym, cons(d_arg, nil));
} else if (sym == or_s) {
val d_arg1 = reg_derivative(first(args), ch);
val d_arg2 = reg_derivative(second(args), ch);