aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog13
-rw-r--r--dfa.c24
-rw-r--r--dfa.h5
-rw-r--r--re.c17
4 files changed, 52 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index 051e83ec..b2f0e8cb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,9 +1,22 @@
2016-11-29 Arnold D. Robbins <arnold@skeeve.com>
+ Remove redundant flag from dfa:
+
* dfa.c (dfasyntax): Use RE_ICASE instead of DFA_CASE_FOLD.
* dfa.h (DFA_CASE_FOLD): Removed.
* re.c (make_regexp): Use RE_ICASE for regex and dfa. Yay!
+ Unrelated: Don't have to recompute syntax stuff every time
+ we compile a regexp.
+
+ * dfa.c (dfacopysyntax): New function.
+ (dfaalloc): Zero out the newly allocated memory.
+ * dfa.h (dfacopysyntax): Declare it.
+ * re.c (make_regexp): Declare two static dfaregs, one for
+ with and without ignorecase. Compute the syntax once for each,
+ then use dfacopysyntax to copy the settings when compiling
+ a regexp.
+
2016-11-28 Arnold D. Robbins <arnold@skeeve.com>
Make gawk compile on HP-UX 11.33.
diff --git a/dfa.c b/dfa.c
index cd7dce65..0a231050 100644
--- a/dfa.c
+++ b/dfa.c
@@ -805,6 +805,23 @@ char_context (struct dfa const *dfa, unsigned char c)
return CTX_NONE;
}
+/* Copy the syntax settings from one dfa instance to another.
+ Saves considerable computation time if compiling many regular expressions
+ based on the same setting. */
+void
+dfacopysyntax (struct dfa *to, const struct dfa *from)
+{
+ to->dfaexec = from->dfaexec;
+ to->simple_locale = from->simple_locale;
+ to->localeinfo = from->localeinfo;
+
+ to->fast = from->fast;
+
+ to->canychar = from->canychar;
+ to->lex.cur_mb_len = from->lex.cur_mb_len;
+ to->syntax = from->syntax;
+}
+
/* Set a bit in the charclass for the given wchar_t. Do nothing if WC
is represented by a multi-byte sequence. Even for MB_CUR_MAX == 1,
this may happen when folding case in weird Turkish locales where
@@ -3999,7 +4016,12 @@ dfamustfree (struct dfamust *dm)
struct dfa *
dfaalloc (void)
{
- return xmalloc (sizeof (struct dfa));
+ void *p = xmalloc (sizeof (struct dfa));
+ if (p)
+ {
+ memset (p, 0, sizeof (struct dfa));
+ }
+ return p;
}
/* Initialize DFA. */
diff --git a/dfa.h b/dfa.h
index 0fd9b2c9..c68b4df7 100644
--- a/dfa.h
+++ b/dfa.h
@@ -110,6 +110,11 @@ extern struct dfa *dfasuperset (struct dfa const *d) _GL_ATTRIBUTE_PURE;
/* The DFA is likely to be fast. */
extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE;
+/* Copy the syntax settings from one dfa instance to another.
+ Saves considerable computation time if compiling many regular expressions
+ based on the same setting. */
+extern void dfacopysyntax (struct dfa *to, const struct dfa *from);
+
/* Free the storage held by the components of a struct dfa. */
extern void dfafree (struct dfa *);
diff --git a/re.c b/re.c
index 6c1e360c..5be3d178 100644
--- a/re.c
+++ b/re.c
@@ -49,8 +49,8 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
int c, c2;
static bool first = true;
static bool no_dfa = false;
- reg_syntax_t dfa_syn;
int i;
+ static struct dfa* dfaregs[2] = { NULL, NULL };
/*
* The number of bytes in the current multibyte character.
@@ -62,9 +62,9 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
memset(&mbs, 0, sizeof(mbstate_t)); /* Initialize. */
if (first) {
- first = false;
/* for debugging and testing */
no_dfa = (getenv("GAWK_NO_DFA") != NULL);
+ /* don't set first to false here, we do it below */
}
/* always check */
@@ -202,9 +202,14 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
syn &= ~RE_ICASE;
}
- dfa_syn = syn;
- if (ignorecase)
- dfa_syn |= RE_ICASE;
+ /* initialize dfas to hold syntax */
+ if (first) {
+ first = false;
+ dfaregs[0] = dfaalloc();
+ dfaregs[1] = dfaalloc();
+ dfasyntax(dfaregs[0], & localeinfo, syn, DFA_ANCHOR);
+ dfasyntax(dfaregs[1], & localeinfo, syn | RE_ICASE, DFA_ANCHOR);
+ }
re_set_syntax(syn);
@@ -222,7 +227,7 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
rp->pat.newline_anchor = false; /* don't get \n in middle of string */
if (dfa && ! no_dfa) {
rp->dfareg = dfaalloc();
- dfasyntax(rp->dfareg, & localeinfo, dfa_syn, DFA_ANCHOR);
+ dfacopysyntax(rp->dfareg, dfaregs[ignorecase]);
dfacomp(buf, len, rp->dfareg, true);
} else
rp->dfareg = NULL;