summaryrefslogtreecommitdiffstats
path: root/regex.c
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2010-01-18 13:42:09 -0800
committerKaz Kylheku <kaz@kylheku.com>2010-01-18 13:42:09 -0800
commit8ad6bb9114f0f8ce2d86e4290f8ac86ed108dc16 (patch)
tree14ae09bb9257512704d78036bd42cf53b3c3d6b6 /regex.c
parentcf64082d6fe266a75fa65194c0a11eba37ca458b (diff)
downloadtxr-8ad6bb9114f0f8ce2d86e4290f8ac86ed108dc16.tar.gz
txr-8ad6bb9114f0f8ce2d86e4290f8ac86ed108dc16.tar.bz2
txr-8ad6bb9114f0f8ce2d86e4290f8ac86ed108dc16.zip
Adjust semantics of non-greedy operator R%S, to avoid the broken
case whereby R%S matches nothing at all when S is not empty but equivalent to empty, or more generally when S is nullable. A much nicer definition is ``the intersection of R* and the set of all strings that do not contain a non-empty substring that matches S, followed by S''.
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c12
1 files changed, 9 insertions, 3 deletions
diff --git a/regex.c b/regex.c
index 5107c5ec..174fb0d9 100644
--- a/regex.c
+++ b/regex.c
@@ -1126,6 +1126,8 @@ static struct cobj_ops regex_obj_ops = {
cobj_hash_op
};
+static val reg_nullable(val);
+
/*
* ``Compile'' raw regular expression to a form that is
* easier to simulate by the derivative method.
@@ -1165,12 +1167,18 @@ static val dv_compile_regex(val exp)
return zplus;
} else {
val any = list(zeroplus_s, wild_s, nao);
+ val notempty = list(oneplus_s, wild_s, nao);
return list(compound_s,
list(and_s,
zplus,
list(compl_s,
- list(compound_s, any, xsecond, any, nao),
+ list(compound_s,
+ any,
+ if3(reg_nullable(xsecond),
+ list(and_s, xsecond, notempty, nao),
+ xsecond),
+ any, nao),
nao),
nao),
xsecond, nao);
@@ -1181,8 +1189,6 @@ static val dv_compile_regex(val exp)
}
}
-static val reg_nullable(val);
-
/*
* Helper to reg_nullable for recursing over
* contents of a compound expression.