diff options
-rw-r--r-- | ChangeLog | 13 | ||||
-rw-r--r-- | dfa.c | 24 | ||||
-rw-r--r-- | dfa.h | 5 | ||||
-rw-r--r-- | re.c | 17 |
4 files changed, 52 insertions, 7 deletions
@@ -1,9 +1,22 @@ 2016-11-29 Arnold D. Robbins <arnold@skeeve.com> + Remove redundant flag from dfa: + * dfa.c (dfasyntax): Use RE_ICASE instead of DFA_CASE_FOLD. * dfa.h (DFA_CASE_FOLD): Removed. * re.c (make_regexp): Use RE_ICASE for regex and dfa. Yay! + Unrelated: Don't have to recompute syntax stuff every time + we compile a regexp. + + * dfa.c (dfacopysyntax): New function. + (dfaalloc): Zero out the newly allocated memory. + * dfa.h (dfacopysyntax): Declare it. + * re.c (make_regexp): Declare two static dfaregs, one for + with and without ignorecase. Compute the syntax once for each, + then use dfacopysyntax to copy the settings when compiling + a regexp. + 2016-11-28 Arnold D. Robbins <arnold@skeeve.com> Make gawk compile on HP-UX 11.33. @@ -805,6 +805,23 @@ char_context (struct dfa const *dfa, unsigned char c) return CTX_NONE; } +/* Copy the syntax settings from one dfa instance to another. + Saves considerable computation time if compiling many regular expressions + based on the same setting. */ +void +dfacopysyntax (struct dfa *to, const struct dfa *from) +{ + to->dfaexec = from->dfaexec; + to->simple_locale = from->simple_locale; + to->localeinfo = from->localeinfo; + + to->fast = from->fast; + + to->canychar = from->canychar; + to->lex.cur_mb_len = from->lex.cur_mb_len; + to->syntax = from->syntax; +} + /* Set a bit in the charclass for the given wchar_t. Do nothing if WC is represented by a multi-byte sequence. Even for MB_CUR_MAX == 1, this may happen when folding case in weird Turkish locales where @@ -3999,7 +4016,12 @@ dfamustfree (struct dfamust *dm) struct dfa * dfaalloc (void) { - return xmalloc (sizeof (struct dfa)); + void *p = xmalloc (sizeof (struct dfa)); + if (p) + { + memset (p, 0, sizeof (struct dfa)); + } + return p; } /* Initialize DFA. */ @@ -110,6 +110,11 @@ extern struct dfa *dfasuperset (struct dfa const *d) _GL_ATTRIBUTE_PURE; /* The DFA is likely to be fast. */ extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE; +/* Copy the syntax settings from one dfa instance to another. + Saves considerable computation time if compiling many regular expressions + based on the same setting. */ +extern void dfacopysyntax (struct dfa *to, const struct dfa *from); + /* Free the storage held by the components of a struct dfa. */ extern void dfafree (struct dfa *); @@ -49,8 +49,8 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal) int c, c2; static bool first = true; static bool no_dfa = false; - reg_syntax_t dfa_syn; int i; + static struct dfa* dfaregs[2] = { NULL, NULL }; /* * The number of bytes in the current multibyte character. @@ -62,9 +62,9 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal) memset(&mbs, 0, sizeof(mbstate_t)); /* Initialize. */ if (first) { - first = false; /* for debugging and testing */ no_dfa = (getenv("GAWK_NO_DFA") != NULL); + /* don't set first to false here, we do it below */ } /* always check */ @@ -202,9 +202,14 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal) syn &= ~RE_ICASE; } - dfa_syn = syn; - if (ignorecase) - dfa_syn |= RE_ICASE; + /* initialize dfas to hold syntax */ + if (first) { + first = false; + dfaregs[0] = dfaalloc(); + dfaregs[1] = dfaalloc(); + dfasyntax(dfaregs[0], & localeinfo, syn, DFA_ANCHOR); + dfasyntax(dfaregs[1], & localeinfo, syn | RE_ICASE, DFA_ANCHOR); + } re_set_syntax(syn); @@ -222,7 +227,7 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal) rp->pat.newline_anchor = false; /* don't get \n in middle of string */ if (dfa && ! no_dfa) { rp->dfareg = dfaalloc(); - dfasyntax(rp->dfareg, & localeinfo, dfa_syn, DFA_ANCHOR); + dfacopysyntax(rp->dfareg, dfaregs[ignorecase]); dfacomp(buf, len, rp->dfareg, true); } else rp->dfareg = NULL; |