diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2016-09-20 21:31:45 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2016-09-20 21:31:45 +0300 |
commit | 491c127c5c78f0729f3e75bc0d07d49285a2041b (patch) | |
tree | b65badb595578c62bf906f90949e0a4b5f05667c | |
parent | 2679ddd1324765b37ef69933b8ad4cf3f80c2795 (diff) | |
download | egawk-491c127c5c78f0729f3e75bc0d07d49285a2041b.tar.gz egawk-491c127c5c78f0729f3e75bc0d07d49285a2041b.tar.bz2 egawk-491c127c5c78f0729f3e75bc0d07d49285a2041b.zip |
Use dfa's new ability to handle anchors.
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | awk.h | 1 | ||||
-rw-r--r-- | re.c | 10 | ||||
-rw-r--r-- | test/ChangeLog | 5 | ||||
-rw-r--r-- | test/Makefile.am | 5 | ||||
-rw-r--r-- | test/Makefile.in | 10 | ||||
-rw-r--r-- | test/Maketests | 5 | ||||
-rw-r--r-- | test/anchor.awk | 33 | ||||
-rw-r--r-- | test/anchor.in | 3 | ||||
-rw-r--r-- | test/anchor.ok | 6 |
10 files changed, 72 insertions, 11 deletions
@@ -1,3 +1,8 @@ +2016-09-09 Norihiro Tanaka <noritnk@kcn.ne.jp> + + * awk.h (struct Regexp): Remove member has_anchor. All uses removed. + * re.c (make_regexp, research): Use dfa matcher for regex with anchor. + 2016-09-09 Arnold D. Robbins <arnold@skeeve.com> * dfa.c: Sync with grep. @@ -210,7 +210,6 @@ typedef struct Regexp { struct re_pattern_buffer pat; struct re_registers regs; struct dfa *dfareg; - bool has_anchor; /* re has anchors which dfa avoids */ bool non_empty; /* for use in fpat_parse_field */ bool has_meta; /* re has meta chars so (probably) isn't simple string */ bool maybe_long; /* re has meta chars that can match long text */ @@ -49,7 +49,6 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal) int c, c2; static bool first = true; static bool no_dfa = false; - bool has_anchor = false; reg_syntax_t dfa_syn; int i; @@ -160,9 +159,6 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal) } /* switch */ } else { c = *src; - if (c == '^' || c == '$') - has_anchor = true; - *dest++ = *src++; /* not '\\' */ } if (gawk_mb_cur_max > 1 && is_multibyte) @@ -228,11 +224,10 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal) if (dfa && ! no_dfa) { rp->dfareg = dfaalloc(); dfasyntax(rp->dfareg, & localeinfo, dfa_syn, - ignorecase ? DFA_CASE_FOLD : 0); + (ignorecase ? DFA_CASE_FOLD : 0) | DFA_ANCHOR); dfacomp(buf, len, rp->dfareg, true); } else rp->dfareg = NULL; - rp->has_anchor = has_anchor; /* Additional flags that help with RS as regexp. */ for (i = 0; i < len; i++) { @@ -287,7 +282,7 @@ research(Regexp *rp, char *str, int start, ret = dfaexec(superset, str+start, str+start+len, true, NULL, NULL); - if (ret && ((! need_start && ! rp->has_anchor) + if (ret && (! need_start || (! superset && dfaisfast(rp->dfareg)))) ret = dfaexec(rp->dfareg, str+start, str+start+len, true, NULL, &try_backref); @@ -298,7 +293,6 @@ research(Regexp *rp, char *str, int start, || start != 0 || no_bol || need_start - || rp->has_anchor || try_backref) { /* * Passing NULL as last arg speeds up search for cases diff --git a/test/ChangeLog b/test/ChangeLog index 9ac7111e..a4d5baae 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,8 @@ +2016-09-09 Norihiro Tanaka <noritnk@kcn.ne.jp> + + * Makefile.am (anchor): New test. + * anchor.awk, anchor.in, anchor.ok: New files. + 2016-08-25 Arnold D. Robbins <arnold@skeeve.com> * 4.1.4: Release tar ball made. diff --git a/test/Makefile.am b/test/Makefile.am index a0b4f525..9dbedb35 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -47,6 +47,9 @@ EXTRA_DIST = \ anchgsub.awk \ anchgsub.in \ anchgsub.ok \ + anchor.awk \ + anchor.in \ + anchor.ok \ argarray.awk \ argarray.in \ argarray.ok \ @@ -1147,7 +1150,7 @@ CLEANFILES = core core.* fmtspcl.ok # try to keep these sorted. each letter starts a new line BASIC_TESTS = \ - addcomma anchgsub argarray arrayind1 arrayind2 arrayparm arrayprm2 arrayprm3 \ + addcomma anchgsub anchor argarray arrayind1 arrayind2 arrayparm arrayprm2 arrayprm3 \ arrayref arrymem1 arryref2 arryref3 arryref4 arryref5 arynasty \ arynocls aryprm1 aryprm2 aryprm3 aryprm4 aryprm5 aryprm6 aryprm7 \ aryprm8 aryprm9 arysubnm asgext awkpath \ diff --git a/test/Makefile.in b/test/Makefile.in index 5731487f..f1ffc0db 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -304,6 +304,9 @@ EXTRA_DIST = \ anchgsub.awk \ anchgsub.in \ anchgsub.ok \ + anchor.awk \ + anchor.in \ + anchor.ok \ argarray.awk \ argarray.in \ argarray.ok \ @@ -1403,7 +1406,7 @@ CLEANFILES = core core.* fmtspcl.ok # try to keep these sorted. each letter starts a new line BASIC_TESTS = \ - addcomma anchgsub argarray arrayind1 arrayind2 arrayparm arrayprm2 arrayprm3 \ + addcomma anchgsub anchor argarray arrayind1 arrayind2 arrayparm arrayprm2 arrayprm3 \ arrayref arrymem1 arryref2 arryref3 arryref4 arryref5 arynasty \ arynocls aryprm1 aryprm2 aryprm3 aryprm4 aryprm5 aryprm6 aryprm7 \ aryprm8 aryprm9 arysubnm asgext awkpath \ @@ -2800,6 +2803,11 @@ anchgsub: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +anchor: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + arrayind1: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/Maketests b/test/Maketests index b28a32ab..a13ed244 100644 --- a/test/Maketests +++ b/test/Maketests @@ -10,6 +10,11 @@ anchgsub: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +anchor: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + arrayind1: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/anchor.awk b/test/anchor.awk new file mode 100644 index 00000000..56f47569 --- /dev/null +++ b/test/anchor.awk @@ -0,0 +1,33 @@ +BEGIN { RS = "" } + +{ + if (/^A/) + print "ok" + else + print "not ok" + + if (/B$/) + print "not ok" + else + print "ok" + + if (/^C/) + print "not ok" + else + print "ok" + + if (/D$/) + print "not ok" + else + print "ok" + + if (/^E/) + print "not ok" + else + print "ok" + + if (/F$/) + print "ok" + else + print "not ok" +} diff --git a/test/anchor.in b/test/anchor.in new file mode 100644 index 00000000..a97e9859 --- /dev/null +++ b/test/anchor.in @@ -0,0 +1,3 @@ +A line1 B +C line2 D +E line3 F diff --git a/test/anchor.ok b/test/anchor.ok new file mode 100644 index 00000000..7780b88b --- /dev/null +++ b/test/anchor.ok @@ -0,0 +1,6 @@ +ok +ok +ok +ok +ok +ok |