aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--awk.h1
-rw-r--r--re.c10
-rw-r--r--test/ChangeLog5
-rw-r--r--test/Makefile.am5
-rw-r--r--test/Makefile.in10
-rw-r--r--test/Maketests5
-rw-r--r--test/anchor.awk33
-rw-r--r--test/anchor.in3
-rw-r--r--test/anchor.ok6
10 files changed, 72 insertions, 11 deletions
diff --git a/ChangeLog b/ChangeLog
index 09d2a22f..755be27b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2016-09-09 Norihiro Tanaka <noritnk@kcn.ne.jp>
+
+ * awk.h (struct Regexp): Remove member has_anchor. All uses removed.
+ * re.c (make_regexp, research): Use dfa matcher for regex with anchor.
+
2016-09-09 Arnold D. Robbins <arnold@skeeve.com>
* dfa.c: Sync with grep.
diff --git a/awk.h b/awk.h
index 2c401637..2353700e 100644
--- a/awk.h
+++ b/awk.h
@@ -210,7 +210,6 @@ typedef struct Regexp {
struct re_pattern_buffer pat;
struct re_registers regs;
struct dfa *dfareg;
- bool has_anchor; /* re has anchors which dfa avoids */
bool non_empty; /* for use in fpat_parse_field */
bool has_meta; /* re has meta chars so (probably) isn't simple string */
bool maybe_long; /* re has meta chars that can match long text */
diff --git a/re.c b/re.c
index 69cc50e1..167a265d 100644
--- a/re.c
+++ b/re.c
@@ -49,7 +49,6 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
int c, c2;
static bool first = true;
static bool no_dfa = false;
- bool has_anchor = false;
reg_syntax_t dfa_syn;
int i;
@@ -160,9 +159,6 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
} /* switch */
} else {
c = *src;
- if (c == '^' || c == '$')
- has_anchor = true;
-
*dest++ = *src++; /* not '\\' */
}
if (gawk_mb_cur_max > 1 && is_multibyte)
@@ -228,11 +224,10 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
if (dfa && ! no_dfa) {
rp->dfareg = dfaalloc();
dfasyntax(rp->dfareg, & localeinfo, dfa_syn,
- ignorecase ? DFA_CASE_FOLD : 0);
+ (ignorecase ? DFA_CASE_FOLD : 0) | DFA_ANCHOR);
dfacomp(buf, len, rp->dfareg, true);
} else
rp->dfareg = NULL;
- rp->has_anchor = has_anchor;
/* Additional flags that help with RS as regexp. */
for (i = 0; i < len; i++) {
@@ -287,7 +282,7 @@ research(Regexp *rp, char *str, int start,
ret = dfaexec(superset, str+start, str+start+len,
true, NULL, NULL);
- if (ret && ((! need_start && ! rp->has_anchor)
+ if (ret && (! need_start
|| (! superset && dfaisfast(rp->dfareg))))
ret = dfaexec(rp->dfareg, str+start, str+start+len,
true, NULL, &try_backref);
@@ -298,7 +293,6 @@ research(Regexp *rp, char *str, int start,
|| start != 0
|| no_bol
|| need_start
- || rp->has_anchor
|| try_backref) {
/*
* Passing NULL as last arg speeds up search for cases
diff --git a/test/ChangeLog b/test/ChangeLog
index 9ac7111e..a4d5baae 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,8 @@
+2016-09-09 Norihiro Tanaka <noritnk@kcn.ne.jp>
+
+ * Makefile.am (anchor): New test.
+ * anchor.awk, anchor.in, anchor.ok: New files.
+
2016-08-25 Arnold D. Robbins <arnold@skeeve.com>
* 4.1.4: Release tar ball made.
diff --git a/test/Makefile.am b/test/Makefile.am
index a0b4f525..9dbedb35 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -47,6 +47,9 @@ EXTRA_DIST = \
anchgsub.awk \
anchgsub.in \
anchgsub.ok \
+ anchor.awk \
+ anchor.in \
+ anchor.ok \
argarray.awk \
argarray.in \
argarray.ok \
@@ -1147,7 +1150,7 @@ CLEANFILES = core core.* fmtspcl.ok
# try to keep these sorted. each letter starts a new line
BASIC_TESTS = \
- addcomma anchgsub argarray arrayind1 arrayind2 arrayparm arrayprm2 arrayprm3 \
+ addcomma anchgsub anchor argarray arrayind1 arrayind2 arrayparm arrayprm2 arrayprm3 \
arrayref arrymem1 arryref2 arryref3 arryref4 arryref5 arynasty \
arynocls aryprm1 aryprm2 aryprm3 aryprm4 aryprm5 aryprm6 aryprm7 \
aryprm8 aryprm9 arysubnm asgext awkpath \
diff --git a/test/Makefile.in b/test/Makefile.in
index 5731487f..f1ffc0db 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -304,6 +304,9 @@ EXTRA_DIST = \
anchgsub.awk \
anchgsub.in \
anchgsub.ok \
+ anchor.awk \
+ anchor.in \
+ anchor.ok \
argarray.awk \
argarray.in \
argarray.ok \
@@ -1403,7 +1406,7 @@ CLEANFILES = core core.* fmtspcl.ok
# try to keep these sorted. each letter starts a new line
BASIC_TESTS = \
- addcomma anchgsub argarray arrayind1 arrayind2 arrayparm arrayprm2 arrayprm3 \
+ addcomma anchgsub anchor argarray arrayind1 arrayind2 arrayparm arrayprm2 arrayprm3 \
arrayref arrymem1 arryref2 arryref3 arryref4 arryref5 arynasty \
arynocls aryprm1 aryprm2 aryprm3 aryprm4 aryprm5 aryprm6 aryprm7 \
aryprm8 aryprm9 arysubnm asgext awkpath \
@@ -2800,6 +2803,11 @@ anchgsub:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+anchor:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
arrayind1:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/Maketests b/test/Maketests
index b28a32ab..a13ed244 100644
--- a/test/Maketests
+++ b/test/Maketests
@@ -10,6 +10,11 @@ anchgsub:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+anchor:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
arrayind1:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/anchor.awk b/test/anchor.awk
new file mode 100644
index 00000000..56f47569
--- /dev/null
+++ b/test/anchor.awk
@@ -0,0 +1,33 @@
+BEGIN { RS = "" }
+
+{
+ if (/^A/)
+ print "ok"
+ else
+ print "not ok"
+
+ if (/B$/)
+ print "not ok"
+ else
+ print "ok"
+
+ if (/^C/)
+ print "not ok"
+ else
+ print "ok"
+
+ if (/D$/)
+ print "not ok"
+ else
+ print "ok"
+
+ if (/^E/)
+ print "not ok"
+ else
+ print "ok"
+
+ if (/F$/)
+ print "ok"
+ else
+ print "not ok"
+}
diff --git a/test/anchor.in b/test/anchor.in
new file mode 100644
index 00000000..a97e9859
--- /dev/null
+++ b/test/anchor.in
@@ -0,0 +1,3 @@
+A line1 B
+C line2 D
+E line3 F
diff --git a/test/anchor.ok b/test/anchor.ok
new file mode 100644
index 00000000..7780b88b
--- /dev/null
+++ b/test/anchor.ok
@@ -0,0 +1,6 @@
+ok
+ok
+ok
+ok
+ok
+ok