2 files changed, 228 insertions, 289 deletions
diff --git a/ChangeLog b/ChangeLog
index 064e375c..2d88b917 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -5,6 +5,10 @@
 	* debug.c (serialize_list): Renamed from `serialize'.
 	(unserialize_list): Renamed from `unserialize', for consistency.
 
+	Unrelated:
+
+	* dfa.c: Sync with GNULIB.
+
 2016-11-21         Arnold D. Robbins     <arnold@skeeve.com>
 
 	* dfa.c: Sync with GNULIB.
diff --git a/dfa.c b/dfa.c
index 02673781..f3939189 100644
--- a/dfa.c
+++ b/dfa.c
@@ -340,7 +340,8 @@ typedef struct
                                    ANYCHAR.  */
 } dfa_state;
 
-/* Maximum for any transition table count that exceeds min_trcount.  */
+/* Maximum for any transition table count.  This should be at least 3,
+   for the initial state setup.  */
 enum { MAX_TRCOUNT = 1024 };
 
 /* A bracket operator.
@@ -503,22 +504,26 @@ struct dfa
 
   /* Fields filled by dfaexec.  */
   state_num tralloc;            /* Number of transition tables that have
-                                   slots so far, not counting trans[-1].  */
+                                   slots so far, not counting trans[-1] and
+                                   trans[-2].  */
   int trcount;                  /* Number of transition tables that have
-                                   actually been built.  */
-  int min_trcount;              /* Minimum of number of transition tables.
-                                   Always keep the number, even after freeing
-                                   the transition tables.  It is also the
-                                   number of initial states.  */
+                                   been built, other than for initial
+                                   states.  */
+  int min_trcount;              /* Number of initial states.  Equivalently,
+                                   the minimum state number for which trcount
+                                   counts transitions.  */
   state_num **trans;            /* Transition tables for states that can
                                    never accept.  If the transitions for a
                                    state have not yet been computed, or the
                                    state could possibly accept, its entry in
                                    this table is NULL.  This points to one
                                    past the start of the allocated array,
-                                   and trans[-1] is always NULL.  */
+                                   and trans[-1] and trans[-2] are always
+                                   NULL.  */
   state_num **fails;            /* Transition tables after failing to accept
-                                   on a state that potentially could do so.  */
+                                   on a state that potentially could do so.
+                                   If trans[i] is non-null, fails[i] must
+                                   be null.  */
   int *success;                 /* Table of acceptance conditions used in
                                    dfaexec and computed in build_state.  */
   state_num *newlines;          /* Transitions on newlines.  The entry for a
@@ -533,7 +538,8 @@ struct dfa
                                    do not distinguish between their contexts,
                                    as not supported word.  */
   position_set mb_follows;      /* Follow set added by ANYCHAR on demand.  */
-  state_num **mb_trans;      /* Transition tables for states with ANYCHAR.  */
+  state_num **mb_trans;         /* Transition tables for states with
+                                   ANYCHAR.  */
   state_num mb_trcount;         /* Number of transition tables for states with
                                    ANYCHAR that have actually been built.  */
 
@@ -715,10 +721,17 @@ zeroset (charclass s)
 }
 
 static void
-notset (charclass s)
+fillset (charclass s)
 {
   int i;
+  for (i = 0; i < CHARCLASS_WORDS; i++)
+    s[i] = CHARCLASS_WORD_MASK;
+}
 
+static void
+notset (charclass s)
+{
+  int i;
   for (i = 0; i < CHARCLASS_WORDS; ++i)
     s[i] = CHARCLASS_WORD_MASK & ~s[i];
 }
@@ -1429,8 +1442,7 @@ lex (struct dfa *dfa)
             goto normal_char;
           if (dfa->canychar == (size_t) -1)
             {
-              zeroset (ccl);
-              notset (ccl);
+              fillset (ccl);
               if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE))
                 clrbit ('\n', ccl);
               if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL)
@@ -2498,6 +2510,7 @@ dfaanalyze (struct dfa *d, bool searchflag)
   if (separate_contexts & CTX_LETTER)
     d->min_trcount = state_index (d, &merged, CTX_LETTER);
   d->min_trcount++;
+  d->trcount = 0;
 
   free (posalloc);
   free (stkalloc);
@@ -2506,75 +2519,80 @@ dfaanalyze (struct dfa *d, bool searchflag)
 }
 
 
-/* Find, for each character, the transition out of state s of d, and store
-   it in the appropriate slot of trans.
+/* Return the transition out of state s of d for the input character uc,
+   updating the slots in trans accordingly.
 
-   We divide the positions of s into groups (positions can appear in more
-   than one group).  Each group is labeled with a set of characters that
+   Do not worry about all possible input characters; calculate just the group
+   of positions that match uc.  Label it with the set of characters that
    every position in the group matches (taking into account, if necessary,
-   preceding context information of s).  For each group, find the union
-   of the its elements' follows.  This set is the set of positions of the
+   preceding context information of s).  Then find the union
+   of these positions' follows, i.e., the set of positions of the
    new state.  For each character in the group's label, set the transition
    on this character to be to a state corresponding to the set's positions,
    and its associated backward context information, if necessary.
 
-   If we are building a searching matcher, we include the positions of state
+   When building a searching matcher, include the positions of state
    0 in every state.
 
-   The collection of groups is constructed by building an equivalence-class
+   The group is constructed by building an equivalence-class
    partition of the positions of s.
 
    For each position, find the set of characters C that it matches.  Eliminate
    any characters from C that fail on grounds of backward context.
 
-   Search through the groups, looking for a group whose label L has nonempty
+   Check whether the group's label L has nonempty
    intersection with C.  If L - C is nonempty, create a new group labeled
    L - C and having the same positions as the current group, and set L to
-   the intersection of L and C.  Insert the position in this group, set
+   the intersection of L and C.  Insert the position in the group, set
    C = C - L, and resume scanning.
 
    If after comparing with every group there are characters remaining in C,
    create a new group labeled with the characters of C and insert this
    position in that group.  */
-static void
-dfastate (state_num s, struct dfa *d, state_num trans[])
+static state_num
+dfastate (state_num s, struct dfa *d, unsigned char uc, state_num trans[])
 {
-  leaf_set grps[NOTCHAR];       /* As many as will ever be needed.  */
-  charclass labels[NOTCHAR];    /* Labels corresponding to the groups.  */
-  size_t ngrps = 0;             /* Number of groups actually used.  */
-  position pos;                 /* Current position being considered.  */
-  charclass matches;            /* Set of matching characters.  */
-  charclass_word matchesf;	/* Nonzero if matches is nonempty.  */
-  charclass intersect;          /* Intersection with some label set.  */
-  charclass_word intersectf;	/* Nonzero if intersect is nonempty.  */
-  charclass leftovers;          /* Stuff in the label that didn't match.  */
-  charclass_word leftoversf;	/* Nonzero if leftovers is nonempty.  */
-  position_set follows;         /* Union of the follows of some group.  */
+  leaf_set group;               /* Positions that match the input char.  */
+  charclass label;              /* The group's label.  */
+  position_set follows;         /* Union of the follows of the group.  */
   position_set tmp;             /* Temporary space for merging sets.  */
-  int possible_contexts;        /* Contexts that this group can match.  */
-  int separate_contexts;        /* Context that new state wants to know.  */
   state_num state;              /* New state.  */
   state_num state_newline;      /* New state on a newline transition.  */
   state_num state_letter;       /* New state on a letter transition.  */
-  bool next_isnt_1st_byte = false; /* We can't add state0.  */
   size_t i, j, k;
 
 #ifdef DEBUG
   fprintf (stderr, "build state %td\n", s);
 #endif
 
-  zeroset (matches);
+  group.elems = xnmalloc (d->nleaves, sizeof *group.elems);
+  group.nelem = 0;
+
+  fillset (label);
 
   for (i = 0; i < d->states[s].elems.nelem; ++i)
     {
-      pos = d->states[s].elems.elems[i];
+      charclass matches;            /* Set of matching characters.  */
+      position pos = d->states[s].elems.elems[i];
+      bool matched = false;
       if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR)
-        setbit (d->tokens[pos.index], matches);
+        {
+          zeroset (matches);
+          setbit (d->tokens[pos.index], matches);
+          if (d->tokens[pos.index] == uc)
+            matched = true;
+        }
       else if (d->tokens[pos.index] >= CSET)
-        copyset (d->charclasses[d->tokens[pos.index] - CSET], matches);
-      else if (d->tokens[pos.index] == ANYCHAR)
         {
+          copyset (d->charclasses[d->tokens[pos.index] - CSET], matches);
+          if (tstbit (uc, d->charclasses[d->tokens[pos.index] - CSET]))
+            matched = true;
+        }
+       else if (d->tokens[pos.index] == ANYCHAR)
+         {
           copyset (d->charclasses[d->canychar], matches);
+          if (tstbit (uc, d->charclasses[d->canychar]))
+            matched = true;
 
           /* ANYCHAR must match with a single character, so we must put
              it to D->states[s].mbps which contains the positions which
@@ -2629,155 +2647,69 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
       fprintf (stderr, "\n");
 #endif
 
-      for (j = 0; j < ngrps; ++j)
+      if (matched)
         {
-          /* If matches contains a single character only, and the current
-             group's label doesn't contain that character, go on to the
-             next group.  */
-          if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR
-              && !tstbit (d->tokens[pos.index], labels[j]))
-            continue;
-
-          /* Check if this group's label has a nonempty intersection with
-             matches.  */
-          intersectf = 0;
           for (k = 0; k < CHARCLASS_WORDS; ++k)
-            intersectf |= intersect[k] = matches[k] & labels[j][k];
-          if (!intersectf)
-            continue;
-
-          /* It does; now find the set differences both ways.  */
-          leftoversf = matchesf = 0;
-          for (k = 0; k < CHARCLASS_WORDS; ++k)
-            {
-              /* Even an optimizing compiler can't know this for sure.  */
-              charclass_word match = matches[k], label = labels[j][k];
-
-              leftoversf |= leftovers[k] = label & ~match;
-              matchesf |= matches[k] = match & ~label;
-            }
-
-          /* If there were leftovers, create a new group labeled with them.  */
-          if (leftoversf)
-            {
-              copyset (leftovers, labels[ngrps]);
-              copyset (intersect, labels[j]);
-              grps[ngrps].elems = xnmalloc (d->nleaves,
-                                            sizeof *grps[ngrps].elems);
-              memcpy (grps[ngrps].elems, grps[j].elems,
-                      sizeof (grps[j].elems[0]) * grps[j].nelem);
-              grps[ngrps].nelem = grps[j].nelem;
-              ++ngrps;
-            }
-
-          /* Put the position in the current group.  The constraint is
-             irrelevant here.  */
-          grps[j].elems[grps[j].nelem++] = pos.index;
-
-          /* If every character matching the current position has been
-             accounted for, we're done.  */
-          if (!matchesf)
-            break;
+            label[k] &= matches[k];
+          group.elems[group.nelem++] = pos.index;
         }
-
-      /* If we've passed the last group, and there are still characters
-         unaccounted for, then we'll have to create a new group.  */
-      if (j == ngrps)
+      else
         {
-          copyset (matches, labels[ngrps]);
-          zeroset (matches);
-          grps[ngrps].elems = xnmalloc (d->nleaves, sizeof *grps[ngrps].elems);
-          grps[ngrps].nelem = 1;
-          grps[ngrps].elems[0] = pos.index;
-          ++ngrps;
+          for (k = 0; k < CHARCLASS_WORDS; ++k)
+            label[k] &= ~matches[k];
         }
     }
 
   alloc_position_set (&follows, d->nleaves);
   alloc_position_set (&tmp, d->nleaves);
 
-  /* If we are a searching matcher, the default transition is to a state
-     containing the positions of state 0, otherwise the default transition
-     is to fail miserably.  */
-  if (d->searchflag)
+  if (group.nelem > 0)
     {
-      int c;
+      int possible_contexts;    /* Contexts that the group can match.  */
+      int separate_contexts;    /* Context that new state wants to know.  */
 
-      state_newline = 0;
-      state_letter = d->min_trcount - 1;
-      state = d->initstate_notbol;
-
-      for (c = 0; c < NOTCHAR; ++c)
-        {
-          switch (d->syntax.sbit[c])
-            {
-            case CTX_NEWLINE:
-              trans[c] = state_newline;
-              break;
-            case CTX_LETTER:
-              trans[c] = state_letter;
-              break;
-            default:
-              trans[c] = state;
-              break;
-            }
-        }
-    }
-  else
-    for (i = 0; i < NOTCHAR; ++i)
-      trans[i] = -1;
-
-  for (i = 0; i < ngrps; ++i)
-    {
       follows.nelem = 0;
 
       /* Find the union of the follows of the positions of the group.
          This is a hideously inefficient loop.  Fix it someday.  */
-      for (j = 0; j < grps[i].nelem; ++j)
-        for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k)
-          insert (d->follows[grps[i].elems[j]].elems[k], &follows);
+      for (j = 0; j < group.nelem; ++j)
+        for (k = 0; k < d->follows[group.elems[j]].nelem; ++k)
+          insert (d->follows[group.elems[j]].elems[k], &follows);
 
-      if (d->localeinfo.multibyte)
+      /* If we are building a searching matcher, throw in the positions
+         of state 0 as well, if possible.  */
+      if (d->searchflag)
         {
           /* If a token in follows.elems is not 1st byte of a multibyte
              character, or the states of follows must accept the bytes
              which are not 1st byte of the multibyte character.
-             Then, if a state of follows encounter a byte, it must not be
-             a 1st byte of a multibyte character nor single byte character.
-             We cansel to add state[0].follows to next state, because
-             state[0] must accept 1st-byte
-
-             For example, we assume <sb a> is a certain single byte
-             character, <mb A> is a certain multibyte character, and the
-             codepoint of <sb a> equals the 2nd byte of the codepoint of
-             <mb A>.
-             When state[0] accepts <sb a>, state[i] transit to state[i+1]
-             by accepting accepts 1st byte of <mb A>, and state[i+1]
-             accepts 2nd byte of <mb A>, if state[i+1] encounter the
-             codepoint of <sb a>, it must not be <sb a> but 2nd byte of
-             <mb A>, so we cannot add state[0].  */
-
-          next_isnt_1st_byte = false;
-          for (j = 0; j < follows.nelem; ++j)
+             Then, if a state of follows encounters a byte, it must not be
+             a 1st byte of a multibyte character nor a single byte character.
+             In this case, do not add state[0].follows to next state, because
+             state[0] must accept 1st-byte.
+
+             For example, suppose <sb a> is a certain single byte character,
+             <mb A> is a certain multibyte character, and the codepoint of
+             <sb a> equals the 2nd byte of the codepoint of <mb A>.  When
+             state[0] accepts <sb a>, state[i] transits to state[i+1] by
+             accepting the 1st byte of <mb A>, and state[i+1] accepts the
+             2nd byte of <mb A>, if state[i+1] encounters the codepoint of
+             <sb a>, it must not be <sb a> but the 2nd byte of <mb A>, so do
+             not add state[0].  */
+
+          bool mergeit = !d->localeinfo.multibyte;
+          if (!mergeit)
+            for (mergeit = true, j = 0; mergeit && j < follows.nelem; j++)
+              mergeit &= d->multibyte_prop[follows.elems[j].index];
+          if (mergeit)
             {
-              if (!(d->multibyte_prop[follows.elems[j].index] & 1))
-                {
-                  next_isnt_1st_byte = true;
-                  break;
-                }
+              merge (&d->states[0].elems, &follows, &tmp);
+              copy (&tmp, &follows);
             }
         }
 
-      /* If we are building a searching matcher, throw in the positions
-         of state 0 as well.  */
-      if (d->searchflag && (!d->localeinfo.multibyte || !next_isnt_1st_byte))
-        {
-          merge (&d->states[0].elems, &follows, &tmp);
-          copy (&tmp, &follows);
-        }
-
       /* Find out if the new state will want any context information.  */
-      possible_contexts = charclass_context (d, labels[i]);
+      possible_contexts = charclass_context (d, label);
       separate_contexts = state_separate_contexts (&follows);
 
       /* Find the state(s) corresponding to the union of the follows.  */
@@ -2793,51 +2725,39 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
         state_letter = state_index (d, &follows, CTX_LETTER);
       else
         state_letter = state;
+    }
 
-#ifdef DEBUG
-      fprintf (stderr, "group %zu\n nextpos:", i);
-      for (j = 0; j < grps[i].nelem; ++j)
-        {
-          fprintf (stderr, " %zu:", grps[i].elems[j]);
-          prtok (d->tokens[grps[i].elems[j]]);
-        }
-      fprintf (stderr, "\n follows:");
-      for (j = 0; j < follows.nelem; ++j)
+  /* If we are a searching matcher, the default transition is to a state
+     containing the positions of state 0, otherwise the default transition
+     is to fail miserably.  */
+  else if (d->searchflag)
+    {
+      state_newline = 0;
+      state_letter = d->min_trcount - 1;
+      state = d->initstate_notbol;
+    }
+  else
+    {
+      state_newline = -1;
+      state_letter = -1;
+      state = -1;
+    }
+
+  /* Set the transitions for each character in the label.  */
+  for (i = 0; i < NOTCHAR; i++)
+    if (tstbit (i, label))
+      switch (d->syntax.sbit[i])
         {
-          fprintf (stderr, " %zu:", follows.elems[j].index);
-          prtok (d->tokens[follows.elems[j].index]);
+        case CTX_NEWLINE:
+          trans[i] = state_newline;
+          break;
+        case CTX_LETTER:
+          trans[i] = state_letter;
+          break;
+        default:
+          trans[i] = state;
+          break;
         }
-      fprintf (stderr, "\n states:");
-      if (possible_contexts & CTX_NEWLINE)
-        fprintf (stderr, " CTX_NEWLINE:%td", state_newline);
-      if (possible_contexts & CTX_LETTER)
-        fprintf (stderr, " CTX_LETTER:%td", state_letter);
-      if (possible_contexts & CTX_NONE)
-        fprintf (stderr, " CTX_NONE:%td", state);
-      fprintf (stderr, "\n");
-#endif
-
-      /* Set the transitions for each character in the current label.  */
-      for (j = 0; j < CHARCLASS_WORDS; ++j)
-        for (k = 0; k < CHARCLASS_WORD_BITS; ++k)
-          if (labels[i][j] >> k & 1)
-            {
-              int c = j * CHARCLASS_WORD_BITS + k;
-
-              switch (d->syntax.sbit[c])
-                {
-                case CTX_NEWLINE:
-                  trans[c] = state_newline;
-                  break;
-                case CTX_LETTER:
-                  trans[c] = state_letter;
-                  break;
-                default:
-                  trans[c] = state;
-                  break;
-                }
-            }
-    }
 
 #ifdef DEBUG
   fprintf (stderr, "trans table %td", s);
@@ -2850,10 +2770,19 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
   fprintf (stderr, "\n");
 #endif
 
-  for (i = 0; i < ngrps; ++i)
-    free (grps[i].elems);
+  free (group.elems);
   free (follows.elems);
   free (tmp.elems);
+
+  /* Keep the newline transition in a special place so we can use it as
+     a sentinel.  */
+  if (tstbit (d->syntax.eolbyte, label))
+    {
+      d->newlines[s] = trans[d->syntax.eolbyte];
+      trans[d->syntax.eolbyte] = -1;
+    }
+
+  return trans[uc];
 }
 
 /* Make sure D's state arrays are large enough to hold NEW_STATE.  */
@@ -2863,23 +2792,23 @@ realloc_trans_if_necessary (struct dfa *d, state_num new_state)
   state_num oldalloc = d->tralloc;
   if (oldalloc <= new_state)
     {
-      state_num **realtrans = d->trans ? d->trans - 1 : NULL;
+      state_num **realtrans = d->trans ? d->trans - 2 : NULL;
       size_t newalloc, newalloc1;
-      newalloc1 = new_state + 1;
+      newalloc1 = realtrans ? new_state + 2 : 0;
       realtrans = x2nrealloc (realtrans, &newalloc1, sizeof *realtrans);
-      realtrans[0] = NULL;
-      d->trans = realtrans + 1;
-      d->tralloc = newalloc = newalloc1 - 1;
+      realtrans[0] = realtrans[1] = NULL;
+      d->trans = realtrans + 2;
+      d->tralloc = newalloc = newalloc1 - 2;
       d->fails = xnrealloc (d->fails, newalloc, sizeof *d->fails);
       d->success = xnrealloc (d->success, newalloc, sizeof *d->success);
       d->newlines = xnrealloc (d->newlines, newalloc, sizeof *d->newlines);
       if (d->localeinfo.multibyte)
         {
-          realtrans = d->mb_trans ? d->mb_trans - 1 : NULL;
+          realtrans = d->mb_trans ? d->mb_trans - 2 : NULL;
           realtrans = xnrealloc (realtrans, newalloc1, sizeof *realtrans);
           if (oldalloc == 0)
-            realtrans[0] = NULL;
-          d->mb_trans = realtrans + 1;
+            realtrans[0] = realtrans[1] = NULL;
+          d->mb_trans = realtrans + 2;
         }
       for (; oldalloc < newalloc; oldalloc++)
         {
@@ -2891,47 +2820,48 @@ realloc_trans_if_necessary (struct dfa *d, state_num new_state)
     }
 }
 
-/* Some routines for manipulating a compiled dfa's transition tables.
-   Each state may or may not have a transition table; if it does, and it
-   is a non-accepting state, then d->trans[state] points to its table.
-   If it is an accepting state then d->fails[state] points to its table.
-   If it has no table at all, then d->trans[state] is NULL.
-   TODO: Improve this comment, get rid of the unnecessary redundancy.  */
+/* Calculate the transition table for a new state derived from state s
+   for a compiled dfa d after input character uc, and return the new
+   state number.  */
 
-static void
-build_state (state_num s, struct dfa *d)
+static state_num
+build_state (state_num s, struct dfa *d, unsigned char uc)
 {
   state_num *trans;             /* The new transition table.  */
   state_num i, maxstate;
 
-  /* Set an upper limit on the number of transition tables that will ever
-     exist at once.  MAX_TRCOUNT is arbitrary.  The idea is that the frequently
-     used transition tables will be quickly rebuilt, whereas the ones that
-     were only needed once or twice will be cleared away.  However, do not
-     clear the initial D->min_trcount states, since they are always used.  */
-  if (MAX_TRCOUNT <= d->trcount)
+  if (d->fails[s] != NULL)
+    trans = d->fails[s];
+  else
     {
-      for (i = d->min_trcount; i < d->tralloc; ++i)
+      state_num **ptrans = (ACCEPTING (s, *d) ? d->fails : d->trans) + s;
+      if (!*ptrans)
         {
-          free (d->trans[i]);
-          free (d->fails[i]);
-          d->trans[i] = d->fails[i] = NULL;
-        }
-      d->trcount = d->min_trcount;
-
-      if (d->localeinfo.multibyte)
-        {
-          for (i = d->min_trcount; i < d->tralloc; i++)
+          /* MAX_TRCOUNT is an arbitrary upper limit on the number of
+             transition tables that can exist at once, other than for
+             initial states.  Often-used transition tables are quickly
+             rebuilt, whereas rarely-used ones are cleared away.  */
+          if (MAX_TRCOUNT <= d->trcount)
             {
-              free (d->mb_trans[i]);
-              d->mb_trans[i] = NULL;
+              for (i = d->min_trcount; i < d->tralloc; i++)
+                {
+                  free (d->trans[i]);
+                  free (d->fails[i]);
+                  d->trans[i] = d->fails[i] = NULL;
+                }
+              d->trcount = 0;
             }
-          free (d->mb_trans[-1]);
-          d->mb_trans[-1] = NULL;
+
+          d->trcount++;
+          *ptrans = xmalloc (NOTCHAR * sizeof *trans);
         }
-    }
+      trans = *ptrans;
 
-  ++d->trcount;
+      /* Fill transition table with a default value which means that the
+         transited state has not been calculated yet.  */
+      for (i = 0; i < NOTCHAR; i++)
+        trans[i] = -2;
+    }
 
   /* Set up the success bits for this state.  */
   d->success[s] = 0;
@@ -2942,8 +2872,7 @@ build_state (state_num s, struct dfa *d)
   if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_NONE, s, *d))
     d->success[s] |= CTX_NONE;
 
-  trans = xmalloc (NOTCHAR * sizeof *trans);
-  dfastate (s, d, trans);
+  s = dfastate (s, d, uc, trans);
 
   /* Now go through the new transition table, and make sure that the trans
      and fail arrays are allocated large enough to hold a pointer for the
@@ -2954,15 +2883,7 @@ build_state (state_num s, struct dfa *d)
       maxstate = trans[i];
   realloc_trans_if_necessary (d, maxstate);
 
-  /* Keep the newline transition in a special place so we can use it as
-     a sentinel.  */
-  d->newlines[s] = trans[d->syntax.eolbyte];
-  trans[d->syntax.eolbyte] = -1;
-
-  if (ACCEPTING (s, *d))
-    d->fails[s] = trans;
-  else
-    d->trans[s] = trans;
+  return s;
 }
 
 /* Multibyte character handling sub-routines for dfaexec.  */
@@ -2982,7 +2903,7 @@ transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const **pp)
     t = d->fails[s];
   else
     {
-      build_state (s, d);
+      build_state (s, d, **pp);
       if (d->trans[s])
         t = d->trans[s];
       else
@@ -2992,6 +2913,9 @@ transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const **pp)
         }
     }
 
+  if (t[**pp] == -2)
+    build_state (s, d, **pp);
+
   return t[*(*pp)++];
 }
 
@@ -3057,7 +2981,7 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp,
   else if (d->mb_trans[s][d->states[s1].mb_trindex] >= 0)
     return d->mb_trans[s][d->states[s1].mb_trindex];
 
-  if (s < 0)
+  if (s == -1)
     copy (&d->states[s1].mbps, &d->mb_follows);
   else
     merge (&d->states[s1].mbps, &d->states[s].elems, &d->mb_follows);
@@ -3165,10 +3089,7 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
     }
 
   if (!d->tralloc)
-    {
-      realloc_trans_if_necessary (d, 1);
-      build_state (0, d);
-    }
+    realloc_trans_if_necessary (d, 0);
 
   s = s1 = 0;
   p = mbp = (unsigned char const *) begin;
@@ -3238,21 +3159,28 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
 
       if (s < 0)
         {
-          if ((char *) p > end || p[-1] != eol || d->newlines[s1] < 0)
+          if (s == -2)
+            {
+              s = build_state (s1, d, p[-1]);
+              trans = d->trans;
+            }
+          else if ((char *) p <= end && p[-1] == eol && 0 <= d->newlines[s1])
+            {
+              /* The previous character was a newline.  Count it, and skip
+                 checking of multibyte character boundary until here.  */
+              nlcount++;
+              mbp = p;
+
+              s = (allow_nl ? d->newlines[s1]
+                   : d->syntax.sbit[eol] == CTX_NEWLINE ? 0
+                   : d->syntax.sbit[eol] == CTX_LETTER ? d->min_trcount - 1
+                   : d->initstate_notbol);
+            }
+          else
             {
               p = NULL;
               goto done;
             }
-
-          /* The previous character was a newline, count it, and skip
-             checking of multibyte character boundary until here.  */
-          nlcount++;
-          mbp = p;
-
-          s = (allow_nl ? d->newlines[s1]
-               : d->syntax.sbit[eol] == CTX_NEWLINE ? 0
-               : d->syntax.sbit[eol] == CTX_LETTER ? d->min_trcount - 1
-               : d->initstate_notbol);
         }
       else if (d->fails[s])
         {
@@ -3282,7 +3210,7 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
         }
       else
         {
-          build_state (s, d);
+          build_state (s, d, p[0]);
           trans = d->trans;
         }
     }
@@ -3362,7 +3290,7 @@ free_mbdata (struct dfa *d)
       state_num s;
       for (s = -1; s < d->tralloc; s++)
         free (d->mb_trans[s]);
-      free (d->mb_trans - 1);
+      free (d->mb_trans - 2);
     }
 }
 
@@ -3370,6 +3298,12 @@ free_mbdata (struct dfa *d)
 static bool _GL_ATTRIBUTE_PURE
 dfa_supported (struct dfa const *d)
 {
+  /* Declare any non-UTF8 multibyte locale "not supported."  Otherwise, a
+     regexp like ".*7" would mistakenly match \uC9, e.g., via this command:
+     (export LC_ALL=zh_CN.gb18030; printf '\uC9\n' | grep '.*7')  */
+  if (d->localeinfo.multibyte && !d->localeinfo.using_utf8)
+    return false;
+
   size_t i;
   for (i = 0; i < d->tindex; i++)
     {
@@ -3436,7 +3370,6 @@ static void
 dfassbuild (struct dfa *d)
 {
   size_t i, j;
-  charclass ccl;
   bool have_achar = false;
   bool have_nchar = false;
   struct dfa *sup = dfaalloc ();
@@ -3473,14 +3406,16 @@ dfassbuild (struct dfa *d)
         case ANYCHAR:
         case MBCSET:
         case BACKREF:
-          zeroset (ccl);
-          notset (ccl);
-          sup->tokens[j++] = CSET + charclass_index (sup, ccl);
-          sup->tokens[j++] = STAR;
-          if (d->tokens[i + 1] == QMARK || d->tokens[i + 1] == STAR
-              || d->tokens[i + 1] == PLUS)
-            i++;
-          have_achar = true;
+          {
+            charclass ccl;
+            fillset (ccl);
+            sup->tokens[j++] = CSET + charclass_index (sup, ccl);
+            sup->tokens[j++] = STAR;
+            if (d->tokens[i + 1] == QMARK || d->tokens[i + 1] == STAR
+                || d->tokens[i + 1] == PLUS)
+              i++;
+            have_achar = true;
+          }
           break;
         case BEGWORD:
         case ENDWORD:
@@ -3571,7 +3506,7 @@ dfafree (struct dfa *d)
           free (d->fails[i]);
         }
 
-      free (d->trans - 1);
+      free (d->trans - 2);
       free (d->fails);
       free (d->newlines);
       free (d->success);