Remove avoid_dfa. Simplify dfa usage and rearrange callers in re.c.

author: Arnold D. Robbins <arnold@skeeve.com> 2016-08-25 21:40:11 +0300
committer: Arnold D. Robbins <arnold@skeeve.com> 2016-08-25 21:40:11 +0300
commit: 7453c813457583197fcf0fe1c7d2301d6013bfea (patch)
tree: ecaa73572d5f486bf64c1fdf000d7b3944277a82 /re.c
parent: d60bf1935df309eea0bcc87ec542030a5b022f35 (diff)
download: egawk-7453c813457583197fcf0fe1c7d2301d6013bfea.tar.gz
egawk-7453c813457583197fcf0fe1c7d2301d6013bfea.tar.bz2
egawk-7453c813457583197fcf0fe1c7d2301d6013bfea.zip
1 files changed, 13 insertions, 42 deletions
diff --git a/re.c b/re.c
index c7899694..b11a6984 100644
--- a/re.c
+++ b/re.c
@@ -170,7 +170,6 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
 
 	emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
 	memset((char *) rp, 0, sizeof(*rp));
-	rp->dfareg = NULL;
 	rp->pat.allocated = 0;	/* regex will allocate the buffer */
 	emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
 
@@ -223,12 +222,11 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
 	/* gack. this must be done *after* re_compile_pattern */
 	rp->pat.newline_anchor = false; /* don't get \n in middle of string */
 	if (dfa && ! no_dfa) {
-		rp->dfa = true;
 		rp->dfareg = dfaalloc();
 		dfasyntax(rp->dfareg, dfa_syn, ignorecase, '\n');
 		dfacomp(buf, len, rp->dfareg, true);
 	} else
-		rp->dfa = false;
+		rp->dfareg = NULL;
 	rp->has_anchor = has_anchor;
 
 	/* Additional flags that help with RS as regexp. */
@@ -278,26 +276,25 @@ research(Regexp *rp, char *str, int start,
 	 * starts in the middle of a string, so don't bother trying it
 	 * in that case.
 	 */
-	if (rp->dfa && ! no_bol && start == 0) {
-		char save;
-		size_t count = 0;
+	if (rp->dfareg != NULL && ! no_bol && start == 0) {
 		struct dfa *superset = dfasuperset(rp->dfareg);
-		/*
-		 * dfa likes to stick a '\n' right after the matched
-		 * text.  So we just save and restore the character.
-		 */
-		save = str[start+len];
 		if (superset)
 			ret = dfaexec(superset, str+start, str+start+len,
 							true, NULL, NULL);
-		if (ret)
+
+		if (ret && ((! need_start && ! rp->has_anchor)
+				|| (! superset && dfaisfast(rp->dfareg))))
 			ret = dfaexec(rp->dfareg, str+start, str+start+len,
-						true, &count, &try_backref);
-		str[start+len] = save;
+						true, NULL, &try_backref);
 	}
 
 	if (ret) {
-		if (need_start || rp->dfa == false || try_backref) {
+		if (   rp->dfareg == NULL
+			|| start != 0
+			|| no_bol
+			|| need_start
+			|| rp->has_anchor
+			|| try_backref) {
 			/*
 			 * Passing NULL as last arg speeds up search for cases
 			 * where we don't need the start/end info.
@@ -326,7 +323,7 @@ refree(Regexp *rp)
 		free(rp->regs.start);
 	if (rp->regs.end)
 		free(rp->regs.end);
-	if (rp->dfa) {
+	if (rp->dfareg != NULL) {
 		dfafree(rp->dfareg);
 		free(rp->dfareg);
 	}
@@ -425,32 +422,6 @@ resetup()
 	dfa_init();
 }
 
-/* avoid_dfa --- return true if we should not use the DFA matcher */
-
-int
-avoid_dfa(NODE *re, char *str, size_t len)
-{
-	char *end;
-
-	/*
-	 * f = @/.../
-	 * if ("foo" ~ f) ...
-	 *
-	 * This creates a Node_dynregex with NULL re_reg.
-	 */
-	if (re->re_reg == NULL)
-		return false;
-
-	if (! re->re_reg->has_anchor)
-		return false;
-
-	for (end = str + len; str < end; str++)
-		if (*str == '\n')
-			return true;
-
-	return false;
-}
-
 /* reisstring --- return true if the RE match is a simple string match */
 
 int
author	Arnold D. Robbins <arnold@skeeve.com>	2016-08-25 21:40:11 +0300
committer	Arnold D. Robbins <arnold@skeeve.com>	2016-08-25 21:40:11 +0300
commit	7453c813457583197fcf0fe1c7d2301d6013bfea (patch)
tree	ecaa73572d5f486bf64c1fdf000d7b3944277a82 /re.c
parent	d60bf1935df309eea0bcc87ec542030a5b022f35 (diff)
download	egawk-7453c813457583197fcf0fe1c7d2301d6013bfea.tar.gz egawk-7453c813457583197fcf0fe1c7d2301d6013bfea.tar.bz2 egawk-7453c813457583197fcf0fe1c7d2301d6013bfea.zip