1 files changed, 677 insertions, 748 deletions
diff --git a/dfa.c b/dfa.c
index 378305df..d306d5c9 100644
--- a/dfa.c
+++ b/dfa.c
@@ -37,21 +37,11 @@
 #if HAVE_SETLOCALE
 #include <locale.h>
 #endif
-#ifdef HAVE_STDBOOL_H
-#include <stdbool.h>
-#else
-#include "missing_d/gawkbool.h"
-#endif /* HAVE_STDBOOL_H */
 
-/* Gawk doesn't use Gnulib, so don't assume that setlocale and
-   static_assert are present.  */
+/* Gawk doesn't use Gnulib, so don't assume that setlocale is present.  */
 #ifndef LC_ALL
 # define setlocale(category, locale) NULL
 #endif
-#ifndef static_assert
-# define static_assert(cond, diagnostic) \
-    extern int (*foo (void)) [!!sizeof (struct { int foo: (cond) ? 8 : -1; })]
-#endif
 
 #define STREQ(a, b) (strcmp (a, b) == 0)
 
@@ -85,10 +75,6 @@
 #endif
 #endif /* GAWK */
 
-#if HAVE_LANGINFO_CODESET
-# include <langinfo.h>
-#endif
-
 #include "xalloc.h"
 
 #include "dfa.h"
@@ -338,7 +324,8 @@ typedef struct
   size_t hash;                  /* Hash of the positions of this state.  */
   position_set elems;           /* Positions this state could match.  */
   unsigned char context;        /* Context from previous state.  */
-  char backref;                 /* True if this state matches a \<digit>.  */
+  bool has_backref;             /* True if this state matches a \<digit>.  */
+  bool has_mbcset;              /* True if this state matches a MBCSET.  */
   unsigned short constraint;    /* Constraint for this state to accept.  */
   token first_end;              /* Token value of the first END in elems.  */
   position_set mbps;            /* Positions which can match multibyte
@@ -355,13 +342,16 @@ typedef ptrdiff_t state_num;
 struct mb_char_classes
 {
   ptrdiff_t cset;
-  int invert;
+  bool invert;
   wchar_t *chars;               /* Normal characters.  */
   size_t nchars;
   wctype_t *ch_classes;         /* Character classes.  */
   size_t nch_classes;
-  wchar_t *range_sts;           /* Range characters (start of the range).  */
-  wchar_t *range_ends;          /* Range characters (end of the range).  */
+  struct			/* Range characters.  */
+  {
+    wchar_t beg;		/* Range start.  */
+    wchar_t end;		/* Range end.  */
+  } *ranges;
   size_t nranges;
   char **equivs;                /* Equivalence classes.  */
   size_t nequivs;
@@ -387,10 +377,11 @@ struct dfa
   size_t nleaves;               /* Number of leaves on the parse tree.  */
   size_t nregexps;              /* Count of parallel regexps being built
                                    with dfaparse.  */
-  unsigned int mb_cur_max;      /* Cached value of MB_CUR_MAX.  */
+  bool multibyte;		/* True iff MB_CUR_MAX > 1.  */
   token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales.  */
+  mbstate_t mbs;		/* Multibyte conversion state.  */
 
-  /* The following are used only if MB_CUR_MAX > 1.  */
+  /* The following are valid only if MB_CUR_MAX > 1.  */
 
   /* The value of multibyte_prop[i] is defined by following rule.
      if tokens[i] < NOTCHAR
@@ -409,7 +400,6 @@ struct dfa
      multibyte_prop
      = 3     , 1               ,  0              ,  2              , 3
    */
-  size_t nmultibyte_prop;
   int *multibyte_prop;
 
 #if MBS_SUPPORT
@@ -425,10 +415,13 @@ struct dfa
   size_t nmbcsets;
   size_t mbcsets_alloc;
 
+  /* Fields filled by the superset.  */
+  struct dfa *superset;             /* Hint of the dfa.  */
+
   /* Fields filled by the state builder.  */
   dfa_state *states;            /* States of the dfa.  */
   state_num sindex;             /* Index for adding new states.  */
-  state_num salloc;             /* Number of states currently allocated.  */
+  size_t salloc;		/* Number of states currently allocated.  */
 
   /* Fields filled by the parse tree->NFA conversion.  */
   position_set *follows;        /* Array of follow sets, indexed by position
@@ -438,7 +431,7 @@ struct dfa
                                    matching the given position in a string
                                    matching the regexp.  Allocated to the
                                    maximum possible position index.  */
-  int searchflag;               /* True if we are supposed to build a searching
+  bool searchflag;              /* True if we are supposed to build a searching
                                    as opposed to an exact matcher.  A searching
                                    matcher finds the first and shortest string
                                    matching a regexp anywhere in the buffer,
@@ -448,16 +441,16 @@ struct dfa
 
   /* Fields filled by dfaexec.  */
   state_num tralloc;            /* Number of transition tables that have
-                                   slots so far.  */
+                                   slots so far, not counting trans[-1].  */
   int trcount;                  /* Number of transition tables that have
                                    actually been built.  */
   state_num **trans;            /* Transition tables for states that can
                                    never accept.  If the transitions for a
                                    state have not yet been computed, or the
                                    state could possibly accept, its entry in
-                                   this table is NULL.  */
-  state_num **realtrans;        /* Trans always points to realtrans + 1; this
-                                   is so trans[-1] can contain NULL.  */
+                                   this table is NULL.  This points to one
+                                   past the start of the allocated array,
+                                   and trans[-1] is always NULL.  */
   state_num **fails;            /* Transition tables after failing to accept
                                    on a state that potentially could do so.  */
   int *success;                 /* Table of acceptance conditions used in
@@ -472,6 +465,11 @@ struct dfa
   struct dfamust *musts;        /* List of strings, at least one of which
                                    is known to appear in any r.e. matching
                                    the dfa.  */
+  position_set mb_follows;	/* Follow set added by ANYCHAR and/or MBCSET
+                                   on demand.  */
+  int *mb_match_lens;           /* Array of length reduced by ANYCHAR and/or
+                                   MBCSET.  Null if mb_follows.elems has not
+                                   been allocated.  */
 };
 
 /* Some macros for user access to dfa internals.  */
@@ -487,41 +485,6 @@ struct dfa
 static void dfamust (struct dfa *dfa);
 static void regexp (void);
 
-/* These two macros are identical to the ones in gnulib's xalloc.h,
-   except that they do not cast the result to "(t *)", and thus may
-   be used via type-free CALLOC and MALLOC macros.  */
-#undef XNMALLOC
-#undef XCALLOC
-
-/* Allocate memory for N elements of type T, with error checking.  */
-/* extern t *XNMALLOC (size_t n, typename t); */
-# define XNMALLOC(n, t) \
-    (sizeof (t) == 1 ? xmalloc (n) : xnmalloc (n, sizeof (t)))
-
-/* Allocate memory for N elements of type T, with error checking,
-   and zero it.  */
-/* extern t *XCALLOC (size_t n, typename t); */
-# define XCALLOC(n, t) \
-    (sizeof (t) == 1 ? xzalloc (n) : xcalloc (n, sizeof (t)))
-
-#define CALLOC(p, n) do { (p) = XCALLOC (n, *(p)); } while (0)
-#undef MALLOC	/* Irix defines this */
-#define MALLOC(p, n) do { (p) = XNMALLOC (n, *(p)); } while (0)
-#define REALLOC(p, n) do {(p) = xnrealloc (p, n, sizeof (*(p))); } while (0)
-
-/* Reallocate an array of type *P if N_ALLOC is <= N_REQUIRED.  */
-#define REALLOC_IF_NECESSARY(p, n_alloc, n_required)		\
-  do								\
-    {								\
-      if ((n_alloc) <= (n_required))				\
-        {							\
-          size_t new_n_alloc = (n_required) + !(p);		\
-          (p) = x2nrealloc (p, &new_n_alloc, sizeof (*(p)));	\
-          (n_alloc) = new_n_alloc;				\
-        }							\
-    }								\
-  while (false)
-
 static void
 dfambcache (struct dfa *d)
 {
@@ -546,33 +509,34 @@ dfambcache (struct dfa *d)
 }
 
 #if MBS_SUPPORT
-/* Given the dfa D, store into *PWC the result of converting the
-   leading bytes of the multibyte buffer S of length N bytes, updating
-   the conversion state in *MBS.  On conversion error, convert just a
-   single byte as-is.  Return the number of bytes converted.
+/* Store into *PWC the result of converting the leading bytes of the
+   multibyte buffer S of length N bytes, using the mbrtowc_cache in *D
+   and updating the conversion state in *D.  On conversion error,
+   convert just a single byte as-is.  Return the number of bytes
+   converted.
 
-   This differs from mbrtowc (PWC, S, N, MBS) as follows:
+   This differs from mbrtowc (PWC, S, N, &D->mbs) as follows:
 
-   * Extra arg D, containing an mbrtowc_cache for speed.
+   * The last arg is a dfa *D instead of merely a multibyte conversion
+     state D->mbs.  D also contains an mbrtowc_cache for speed.
    * N must be at least 1.
    * S[N - 1] must be a sentinel byte.
    * Shift encodings are not supported.
    * The return value is always in the range 1..N.
-   * *MBS is always valid afterwards.
+   * D->mbs is always valid afterwards.
    * *PWC is always set to something.  */
 static size_t
-mbs_to_wchar (struct dfa *d, wchar_t *pwc, char const *s, size_t n,
-              mbstate_t *mbs)
+mbs_to_wchar (wchar_t *pwc, char const *s, size_t n, struct dfa *d)
 {
   unsigned char uc = s[0];
   wint_t wc = d->mbrtowc_cache[uc];
 
   if (wc == WEOF)
     {
-      size_t nbytes = mbrtowc (pwc, s, n, mbs);
+      size_t nbytes = mbrtowc (pwc, s, n, &d->mbs);
       if (0 < nbytes && nbytes < (size_t) -2)
         return nbytes;
-      memset (mbs, 0, sizeof *mbs);
+      memset (&d->mbs, 0, sizeof d->mbs);
       wc = uc;
     }
 
@@ -700,36 +664,60 @@ notset (charclass s)
     s[i] = ~s[i];
 }
 
-static int
+static bool
 equal (charclass const s1, charclass const s2)
 {
   return memcmp (s1, s2, sizeof (charclass)) == 0;
 }
 
-/* A pointer to the current dfa is kept here during parsing.  */
-static struct dfa *dfa;
+/* Ensure that the array addressed by PTR holds at least NITEMS +
+   (PTR || !NITEMS) items.  Either return PTR, or reallocate the array
+   and return its new address.  Although PTR may be null, the returned
+   value is never null.
+
+   The array holds *NALLOC items; *NALLOC must be zero if PTR is null,
+   and is updated on reallocation.  ITEMSIZE is the size of one item.
+   Avoid O(N**2) behavior on arrays growing linearly.  */
+static void *
+maybe_realloc (void *ptr, size_t nitems, size_t *nalloc, size_t itemsize)
+{
+  if (nitems < *nalloc)
+    return ptr;
+  *nalloc = nitems;
+  return x2nrealloc (ptr, nalloc, itemsize);
+}
 
-/* Find the index of charclass s in dfa->charclasses, or allocate a
-   new charclass.  */
+/* In DFA D, find the index of charclass S, or allocate a new one.  */
 static size_t
-charclass_index (charclass const s)
+dfa_charclass_index (struct dfa *d, charclass const s)
 {
   size_t i;
 
-  for (i = 0; i < dfa->cindex; ++i)
-    if (equal (s, dfa->charclasses[i]))
+  for (i = 0; i < d->cindex; ++i)
+    if (equal (s, d->charclasses[i]))
       return i;
-  REALLOC_IF_NECESSARY (dfa->charclasses, dfa->calloc, dfa->cindex + 1);
-  ++dfa->cindex;
-  copyset (s, dfa->charclasses[i]);
+  d->charclasses = maybe_realloc (d->charclasses, d->cindex, &d->calloc,
+                                  sizeof *d->charclasses);
+  ++d->cindex;
+  copyset (s, d->charclasses[i]);
   return i;
 }
 
+/* A pointer to the current dfa is kept here during parsing.  */
+static struct dfa *dfa;
+
+/* Find the index of charclass S in the current DFA, or allocate a new one.  */
+static size_t
+charclass_index (charclass const s)
+{
+  return dfa_charclass_index (dfa, s);
+}
+
 /* Syntax bits controlling the behavior of the lexical analyzer.  */
 static reg_syntax_t syntax_bits, syntax_bits_set;
 
 /* Flag for case-folding letters into sets.  */
-static int case_fold;
+static bool case_fold;
 
 /* End-of-line byte in data.  */
 static unsigned char eolbyte;
@@ -759,7 +747,7 @@ static charclass newline;
 static int
 char_context (unsigned char c)
 {
-  if (c == eolbyte || c == 0)
+  if (c == eolbyte)
     return CTX_NEWLINE;
   if (IS_WORD_CONSTITUENT (c))
     return CTX_LETTER;
@@ -784,7 +772,7 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
 
   syntax_bits_set = 1;
   syntax_bits = bits;
-  case_fold = fold;
+  case_fold = fold != 0;
   eolbyte = eol;
 
   for (i = 0; i < NOTCHAR; ++i)
@@ -843,19 +831,16 @@ int
 using_utf8 (void)
 {
   static int utf8 = -1;
-  if (utf8 == -1)
+  if (utf8 < 0)
     {
-#if defined HAVE_LANGINFO_CODESET && MBS_SUPPORT
-      utf8 = (STREQ (nl_langinfo (CODESET), "UTF-8"));
-#else
-      utf8 = 0;
-#endif
+      wchar_t wc;
+      mbstate_t mbs = { 0 };
+      utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
 #ifdef LIBC_IS_BORKED
       if (gawk_mb_cur_max == 1)
          utf8 = 0;
 #endif
     }
-
   return utf8;
 }
 
@@ -907,7 +892,7 @@ using_simple_locale (void)
 static char const *lexptr;      /* Pointer to next input character.  */
 static size_t lexleft;          /* Number of characters remaining.  */
 static token lasttok;           /* Previous token returned; initially END.  */
-static int laststart;           /* True if we're separated from beginning or (,
+static bool laststart;          /* True if we're separated from beginning or (,
                                    | only by zero-width characters.  */
 static size_t parens;           /* Count of outstanding left parens.  */
 static int minrep, maxrep;      /* Repeat counts for {m,n}.  */
@@ -915,27 +900,8 @@ static int minrep, maxrep;      /* Repeat counts for {m,n}.  */
 static int cur_mb_len = 1;      /* Length of the multibyte representation of
                                    wctok.  */
 /* These variables are used only if (MB_CUR_MAX > 1).  */
-static mbstate_t mbs;           /* mbstate for mbrtowc.  */
 static wchar_t wctok;           /* Wide character representation of the current
                                    multibyte character.  */
-static unsigned char *mblen_buf;/* Correspond to the input buffer in dfaexec.
-                                   Each element stores the number of remaining
-                                   bytes of the corresponding multibyte
-                                   character in the input string.  A element's
-                                   value is 0 if the corresponding character is
-                                   single-byte.
-                                   e.g., input : 'a', <mb(0)>, <mb(1)>, <mb(2)>
-                                   mblen_buf   :  0,       3,       2,       1
-                                 */
-static wchar_t *inputwcs;       /* Wide character representation of the input
-                                   string in dfaexec.
-                                   The length of this array is the same as
-                                   the length of input string (char array).
-                                   inputstring[i] is a single-byte char,
-                                   or the first byte of a multibyte char;
-                                   inputwcs[i] is the codepoint.  */
-static unsigned char const *buf_begin;  /* reference to begin in dfaexec.  */
-static unsigned char const *buf_end;    /* reference to end in dfaexec.  */
 
 
 #if MBS_SUPPORT
@@ -952,7 +918,7 @@ static unsigned char const *buf_end;    /* reference to end in dfaexec.  */
     else					\
       {						\
         wchar_t _wc;				\
-        size_t nbytes = mbs_to_wchar (dfa, &_wc, lexptr, lexleft, &mbs); \
+        size_t nbytes = mbs_to_wchar (&_wc, lexptr, lexleft, dfa); \
         cur_mb_len = nbytes;			\
         (wc) = _wc;				\
         (c) = nbytes == 1 ? to_uchar (*lexptr) : EOF;    \
@@ -999,14 +965,17 @@ static short const lonesome_lower[] =
     0x03F5, 0x1E9B, 0x1FBE,
   };
 
-static_assert ((sizeof lonesome_lower / sizeof *lonesome_lower + 2
-                == CASE_FOLDED_BUFSIZE),
-               "CASE_FOLDED_BUFSIZE is wrong");
+/* Maximum number of characters that can be the case-folded
+   counterparts of a single character, not counting the character
+   itself.  This is 1 for towupper, 1 for towlower, and 1 for each
+   entry in LONESOME_LOWER.  */
+enum
+{ CASE_FOLDED_BUFSIZE = 2 + sizeof lonesome_lower / sizeof *lonesome_lower };
 
 /* Find the characters equal to C after case-folding, other than C
    itself, and store them into FOLDED.  Return the number of characters
    stored.  */
-int
+static int
 case_folded_counterparts (wchar_t c, wchar_t folded[CASE_FOLDED_BUFSIZE])
 {
   int i;
@@ -1071,7 +1040,7 @@ find_pred (const char *str)
 static token
 parse_bracket_exp (void)
 {
-  int invert;
+  bool invert;
   int c, c1, c2;
   charclass ccl;
 
@@ -1092,16 +1061,14 @@ parse_bracket_exp (void)
 
   /* Work area to build a mb_char_classes.  */
   struct mb_char_classes *work_mbc;
-  size_t chars_al, range_sts_al, range_ends_al, ch_classes_al,
-    equivs_al, coll_elems_al;
+  size_t chars_al, ranges_al, ch_classes_al, equivs_al, coll_elems_al;
 
-  chars_al = 0;
-  range_sts_al = range_ends_al = 0;
-  ch_classes_al = equivs_al = coll_elems_al = 0;
-  if (MB_CUR_MAX > 1)
+  chars_al = ranges_al = ch_classes_al = equivs_al = coll_elems_al = 0;
+  if (dfa->multibyte)
     {
-      REALLOC_IF_NECESSARY (dfa->mbcsets, dfa->mbcsets_alloc,
-                            dfa->nmbcsets + 1);
+      dfa->mbcsets = maybe_realloc (dfa->mbcsets, dfa->nmbcsets,
+                                    &dfa->mbcsets_alloc,
+                                    sizeof *dfa->mbcsets);
 
       /* dfa->multibyte_prop[] hold the index of dfa->mbcsets.
          We will update dfa->multibyte_prop[] in addtok, because we can't
@@ -1119,11 +1086,11 @@ parse_bracket_exp (void)
   if (c == '^')
     {
       FETCH_WC (c, wc, _("unbalanced ["));
-      invert = 1;
+      invert = true;
       known_bracket_exp = using_simple_locale ();
     }
   else
-    invert = 0;
+    invert = false;
 
   colon_warning_state = (c == ':');
   do
@@ -1173,14 +1140,15 @@ parse_bracket_exp (void)
                   if (!pred)
                     dfaerror (_("invalid character class"));
 
-                  if (MB_CUR_MAX > 1 && !pred->single_byte_only)
+                  if (dfa->multibyte && !pred->single_byte_only)
                     {
                       /* Store the character class as wctype_t.  */
                       wctype_t wt = (wctype_t) wctype (class);
 
-                      REALLOC_IF_NECESSARY (work_mbc->ch_classes,
-                                            ch_classes_al,
-                                            work_mbc->nch_classes + 1);
+                      work_mbc->ch_classes
+                        = maybe_realloc (work_mbc->ch_classes,
+                                         work_mbc->nch_classes, &ch_classes_al,
+                                         sizeof *work_mbc->ch_classes);
                       work_mbc->ch_classes[work_mbc->nch_classes++] = wt;
                     }
 
@@ -1227,29 +1195,25 @@ parse_bracket_exp (void)
               if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
                 FETCH_WC (c2, wc2, _("unbalanced ["));
 
-              if (MB_CUR_MAX > 1)
+              if (dfa->multibyte)
                 {
                   /* When case folding map a range, say [m-z] (or even [M-z])
                      to the pair of ranges, [m-z] [M-Z].  Although this code
                      is wrong in multiple ways, it's never used in practice.
                      FIXME: Remove this (and related) unused code.  */
-                  REALLOC_IF_NECESSARY (work_mbc->range_sts,
-                                        range_sts_al, work_mbc->nranges + 1);
-                  REALLOC_IF_NECESSARY (work_mbc->range_ends,
-                                        range_ends_al, work_mbc->nranges + 1);
-                  work_mbc->range_sts[work_mbc->nranges] =
-                    case_fold ? towlower (wc) : (wchar_t) wc;
-                  work_mbc->range_ends[work_mbc->nranges++] =
-                    case_fold ? towlower (wc2) : (wchar_t) wc2;
+                  work_mbc->ranges
+                    = maybe_realloc (work_mbc->ranges, work_mbc->nranges + 2,
+                                     &ranges_al, sizeof *work_mbc->ranges);
+                  work_mbc->ranges[work_mbc->nranges].beg
+                    = case_fold ? towlower (wc) : wc;
+                  work_mbc->ranges[work_mbc->nranges++].end
+                    = case_fold ? towlower (wc2) : wc2;
 
                   if (case_fold && (iswalpha (wc) || iswalpha (wc2)))
                     {
-                      REALLOC_IF_NECESSARY (work_mbc->range_sts,
-                                            range_sts_al, work_mbc->nranges + 1);
-                      work_mbc->range_sts[work_mbc->nranges] = towupper (wc);
-                      REALLOC_IF_NECESSARY (work_mbc->range_ends,
-                                            range_ends_al, work_mbc->nranges + 1);
-                      work_mbc->range_ends[work_mbc->nranges++] = towupper (wc2);
+                      work_mbc->ranges[work_mbc->nranges].beg = towupper (wc);
+                      work_mbc->ranges[work_mbc->nranges++].end
+                        = towupper (wc2);
                     }
                 }
               else if (using_simple_locale ())
@@ -1284,7 +1248,7 @@ parse_bracket_exp (void)
 
       colon_warning_state |= (c == ':') ? 2 : 4;
 
-      if (MB_CUR_MAX == 1)
+      if (!dfa->multibyte)
         {
           if (case_fold)
             setbit_case_fold_c (c, ccl);
@@ -1297,16 +1261,17 @@ parse_bracket_exp (void)
         {
           wchar_t folded[CASE_FOLDED_BUFSIZE];
           int i, n = case_folded_counterparts (wc, folded);
-          REALLOC_IF_NECESSARY (work_mbc->chars, chars_al,
-                                work_mbc->nchars + n);
+          work_mbc->chars = maybe_realloc (work_mbc->chars,
+                                           work_mbc->nchars + n, &chars_al,
+                                           sizeof *work_mbc->chars);
           for (i = 0; i < n; i++)
             if (!setbit_wc (folded[i], ccl))
               work_mbc->chars[work_mbc->nchars++] = folded[i];
         }
       if (!setbit_wc (wc, ccl))
         {
-          REALLOC_IF_NECESSARY (work_mbc->chars, chars_al,
-                                work_mbc->nchars + 1);
+          work_mbc->chars = maybe_realloc (work_mbc->chars, work_mbc->nchars,
+                                           &chars_al, sizeof *work_mbc->chars);
           work_mbc->chars[work_mbc->nchars++] = wc;
         }
     }
@@ -1318,7 +1283,7 @@ parse_bracket_exp (void)
   if (! known_bracket_exp)
     return BACKREF;
 
-  if (MB_CUR_MAX > 1)
+  if (dfa->multibyte)
     {
       static charclass zeroclass;
       work_mbc->invert = invert;
@@ -1328,7 +1293,7 @@ parse_bracket_exp (void)
 
   if (invert)
     {
-      assert (MB_CUR_MAX == 1);
+      assert (!dfa->multibyte);
       notset (ccl);
       if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
         clrbit (eolbyte, ccl);
@@ -1341,7 +1306,7 @@ static token
 lex (void)
 {
   unsigned int c, c2;
-  int backslash = 0;
+  bool backslash = false;
   charclass ccl;
   int i;
 
@@ -1364,7 +1329,7 @@ lex (void)
             goto normal_char;
           if (lexleft == 0)
             dfaerror (_("unfinished \\ escape"));
-          backslash = 1;
+          backslash = true;
           break;
 
         case '^':
@@ -1402,7 +1367,7 @@ lex (void)
         case '9':
           if (backslash && !(syntax_bits & RE_NO_BK_REFS))
             {
-              laststart = 0;
+              laststart = false;
               return lasttok = BACKREF;
             }
           goto normal_char;
@@ -1517,7 +1482,7 @@ lex (void)
             lexptr = p;
             lexleft = lim - p;
           }
-          laststart = 0;
+          laststart = false;
           return lasttok = REPMN;
 
         case '|':
@@ -1525,21 +1490,21 @@ lex (void)
             goto normal_char;
           if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0))
             goto normal_char;
-          laststart = 1;
+          laststart = true;
           return lasttok = OR;
 
         case '\n':
           if (syntax_bits & RE_LIMITED_OPS
               || backslash || !(syntax_bits & RE_NEWLINE_ALT))
             goto normal_char;
-          laststart = 1;
+          laststart = true;
           return lasttok = OR;
 
         case '(':
           if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
             goto normal_char;
           ++parens;
-          laststart = 1;
+          laststart = true;
           return lasttok = LPAREN;
 
         case ')':
@@ -1548,17 +1513,17 @@ lex (void)
           if (parens == 0 && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
             goto normal_char;
           --parens;
-          laststart = 0;
+          laststart = false;
           return lasttok = RPAREN;
 
         case '.':
           if (backslash)
             goto normal_char;
-          if (MB_CUR_MAX > 1)
+          if (dfa->multibyte)
             {
               /* In multibyte environment period must match with a single
                  character not a byte.  So we use ANYCHAR.  */
-              laststart = 0;
+              laststart = false;
               return lasttok = ANYCHAR;
             }
           zeroset (ccl);
@@ -1567,14 +1532,14 @@ lex (void)
             clrbit (eolbyte, ccl);
           if (syntax_bits & RE_DOT_NOT_NULL)
             clrbit ('\0', ccl);
-          laststart = 0;
+          laststart = false;
           return lasttok = CSET + charclass_index (ccl);
 
         case 's':
         case 'S':
           if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
             goto normal_char;
-          if (MB_CUR_MAX == 1)
+          if (!dfa->multibyte)
             {
               zeroset (ccl);
               for (c2 = 0; c2 < NOTCHAR; ++c2)
@@ -1582,7 +1547,7 @@ lex (void)
                   setbit (c2, ccl);
               if (c == 'S')
                 notset (ccl);
-              laststart = 0;
+              laststart = false;
               return lasttok = CSET + charclass_index (ccl);
             }
 
@@ -1612,7 +1577,7 @@ lex (void)
 
           POP_LEX_STATE ();
 
-          laststart = 0;
+          laststart = false;
           return lasttok;
 
         case 'w':
@@ -1625,21 +1590,21 @@ lex (void)
               setbit (c2, ccl);
           if (c == 'W')
             notset (ccl);
-          laststart = 0;
+          laststart = false;
           return lasttok = CSET + charclass_index (ccl);
 
         case '[':
           if (backslash)
             goto normal_char;
-          laststart = 0;
+          laststart = false;
           return lasttok = parse_bracket_exp ();
 
         default:
         normal_char:
-          laststart = 0;
+          laststart = false;
           /* For multibyte character sets, folding is done in atom.  Always
              return WCHAR.  */
-          if (MB_CUR_MAX > 1)
+          if (dfa->multibyte)
             return lasttok = WCHAR;
 
           if (case_fold && isalpha (c))
@@ -1671,14 +1636,16 @@ static size_t depth;            /* Current depth of a hypothetical stack
 static void
 addtok_mb (token t, int mbprop)
 {
-  if (MB_CUR_MAX > 1)
+  if (dfa->talloc == dfa->tindex)
     {
-      REALLOC_IF_NECESSARY (dfa->multibyte_prop, dfa->nmultibyte_prop,
-                            dfa->tindex + 1);
-      dfa->multibyte_prop[dfa->tindex] = mbprop;
+      dfa->tokens = x2nrealloc (dfa->tokens, &dfa->talloc,
+                                sizeof *dfa->tokens);
+      if (dfa->multibyte)
+        dfa->multibyte_prop = xnrealloc (dfa->multibyte_prop, dfa->talloc,
+                                         sizeof *dfa->multibyte_prop);
     }
-
-  REALLOC_IF_NECESSARY (dfa->tokens, dfa->talloc, dfa->tindex + 1);
+  if (dfa->multibyte)
+    dfa->multibyte_prop[dfa->tindex] = mbprop;
   dfa->tokens[dfa->tindex++] = t;
 
   switch (t)
@@ -1710,7 +1677,7 @@ static void addtok_wc (wint_t wc);
 static void
 addtok (token t)
 {
-  if (MB_CUR_MAX > 1 && t == MBCSET)
+  if (dfa->multibyte && t == MBCSET)
     {
       bool need_or = false;
       struct mb_char_classes *work_mbc = &dfa->mbcsets[dfa->nmbcsets - 1];
@@ -1967,7 +1934,7 @@ copytoks (size_t tindex, size_t ntokens)
 {
   size_t i;
 
-  if (MB_CUR_MAX > 1)
+  if (dfa->multibyte)
     for (i = 0; i < ntokens; ++i)
       addtok_mb (dfa->tokens[tindex + i], dfa->multibyte_prop[tindex + i]);
   else
@@ -2050,12 +2017,12 @@ dfaparse (char const *s, size_t len, struct dfa *d)
   lexptr = s;
   lexleft = len;
   lasttok = END;
-  laststart = 1;
+  laststart = true;
   parens = 0;
-  if (MB_CUR_MAX > 1)
+  if (dfa->multibyte)
     {
       cur_mb_len = 0;
-      memset (&mbs, 0, sizeof mbs);
+      memset (&d->mbs, 0, sizeof d->mbs);
     }
 
   if (!syntax_bits_set)
@@ -2080,19 +2047,24 @@ dfaparse (char const *s, size_t len, struct dfa *d)
 
 /* Some primitives for operating on sets of positions.  */
 
-/* Copy one set to another; the destination must be large enough.  */
+/* Copy one set to another.  */
 static void
 copy (position_set const *src, position_set * dst)
 {
-  REALLOC_IF_NECESSARY (dst->elems, dst->alloc, src->nelem);
-  memcpy (dst->elems, src->elems, sizeof (dst->elems[0]) * src->nelem);
+  if (dst->alloc < src->nelem)
+    {
+      free (dst->elems);
+      dst->alloc = src->nelem;
+      dst->elems = x2nrealloc (NULL, &dst->alloc, sizeof *dst->elems);
+    }
+  memcpy (dst->elems, src->elems, src->nelem * sizeof *dst->elems);
   dst->nelem = src->nelem;
 }
 
 static void
 alloc_position_set (position_set * s, size_t size)
 {
-  MALLOC (s->elems, size);
+  s->elems = xnmalloc (size, sizeof *s->elems);
   s->alloc = size;
   s->nelem = 0;
 }
@@ -2122,7 +2094,7 @@ insert (position p, position_set * s)
       return;
     }
 
-  REALLOC_IF_NECESSARY (s->elems, s->alloc, count + 1);
+  s->elems = maybe_realloc (s->elems, count, &s->alloc, sizeof *s->elems);
   for (i = count; i > lo; i--)
     s->elems[i] = s->elems[i - 1];
   s->elems[lo] = p;
@@ -2136,7 +2108,12 @@ merge (position_set const *s1, position_set const *s2, position_set * m)
 {
   size_t i = 0, j = 0;
 
-  REALLOC_IF_NECESSARY (m->elems, m->alloc, s1->nelem + s2->nelem);
+  if (m->alloc < s1->nelem + s2->nelem)
+    {
+      free (m->elems);
+      m->elems = maybe_realloc (NULL, s1->nelem + s2->nelem, &m->alloc,
+                                sizeof *m->elems);
+    }
   m->nelem = 0;
   while (i < s1->nelem && j < s2->nelem)
     if (s1->elems[i].index > s2->elems[j].index)
@@ -2197,19 +2174,19 @@ state_index (struct dfa *d, position_set const *s, int context)
     }
 
   /* We'll have to create a new state.  */
-  REALLOC_IF_NECESSARY (d->states, d->salloc, d->sindex + 1);
+  d->states = maybe_realloc (d->states, d->sindex, &d->salloc,
+                             sizeof *d->states);
   d->states[i].hash = hash;
   alloc_position_set (&d->states[i].elems, s->nelem);
   copy (s, &d->states[i].elems);
   d->states[i].context = context;
-  d->states[i].backref = 0;
+  d->states[i].has_backref = false;
+  d->states[i].has_mbcset = false;
   d->states[i].constraint = 0;
   d->states[i].first_end = 0;
-  if (MBS_SUPPORT)
-    {
-      d->states[i].mbps.nelem = 0;
-      d->states[i].mbps.elems = NULL;
-    }
+  d->states[i].mbps.nelem = 0;
+  d->states[i].mbps.elems = NULL;
+
   for (j = 0; j < s->nelem; ++j)
     if (d->tokens[s->elems[j].index] < 0)
       {
@@ -2222,7 +2199,7 @@ state_index (struct dfa *d, position_set const *s, int context)
     else if (d->tokens[s->elems[j].index] == BACKREF)
       {
         d->states[i].constraint = NO_CONSTRAINT;
-        d->states[i].backref = 1;
+        d->states[i].has_backref = true;
       }
 
   ++d->sindex;
@@ -2239,10 +2216,10 @@ static void
 epsclosure (position_set * s, struct dfa const *d)
 {
   size_t i, j;
-  char *visited;  /* Array of booleans, enough to use char, not int.  */
   position p, old;
 
-  CALLOC (visited, d->tindex);
+  /* Array of booleans, large enough to use char, not int.  */
+  char *visited = xzalloc (d->tindex);
 
   for (i = 0; i < s->nelem; ++i)
     if (d->tokens[s->elems[i].index] >= NOTCHAR
@@ -2399,17 +2376,26 @@ state_separate_contexts (position_set const *s)
 void
 dfaanalyze (struct dfa *d, int searchflag)
 {
-  int *nullable;                /* Nullable stack.  */
-  size_t *nfirstpos;            /* Element count stack for firstpos sets.  */
-  position *firstpos;           /* Array where firstpos elements are stored.  */
-  size_t *nlastpos;             /* Element count stack for lastpos sets.  */
-  position *lastpos;            /* Array where lastpos elements are stored.  */
+  /* Array allocated to hold position sets.  */
+  position *posalloc = xnmalloc (d->nleaves, 2 * sizeof *posalloc);
+  /* Firstpos and lastpos elements.  */
+  position *firstpos = posalloc + d->nleaves;
+  position *lastpos = firstpos + d->nleaves;
+
+  /* Stack for element counts and nullable flags.  */
+  struct
+  {
+    /* Whether the entry is nullable.  */
+    bool nullable;
+
+    /* Counts of firstpos and lastpos sets.  */
+    size_t nfirstpos;
+    size_t nlastpos;
+  } *stkalloc = xnmalloc (d->depth, sizeof *stkalloc), *stk = stkalloc;
+
   position_set tmp;             /* Temporary set for merging sets.  */
   position_set merged;          /* Result of merging sets.  */
   int separate_contexts;        /* Context wanted by some position.  */
-  int *o_nullable;
-  size_t *o_nfirst, *o_nlast;
-  position *o_firstpos, *o_lastpos;
   size_t i, j;
   position *pos;
 
@@ -2423,21 +2409,9 @@ dfaanalyze (struct dfa *d, int searchflag)
   putc ('\n', stderr);
 #endif
 
-  d->searchflag = searchflag;
-
-  MALLOC (nullable, d->depth);
-  o_nullable = nullable;
-  MALLOC (nfirstpos, d->depth);
-  o_nfirst = nfirstpos;
-  MALLOC (firstpos, d->nleaves);
-  o_firstpos = firstpos, firstpos += d->nleaves;
-  MALLOC (nlastpos, d->depth);
-  o_nlast = nlastpos;
-  MALLOC (lastpos, d->nleaves);
-  o_lastpos = lastpos, lastpos += d->nleaves;
+  d->searchflag = searchflag != 0;
   alloc_position_set (&merged, d->nleaves);
-
-  CALLOC (d->follows, d->tindex);
+  d->follows = xcalloc (d->tindex, sizeof *d->follows);
 
   for (i = 0; i < d->tindex; ++i)
     {
@@ -2445,20 +2419,21 @@ dfaanalyze (struct dfa *d, int searchflag)
         {
         case EMPTY:
           /* The empty set is nullable.  */
-          *nullable++ = 1;
+          stk->nullable = true;
 
           /* The firstpos and lastpos of the empty leaf are both empty.  */
-          *nfirstpos++ = *nlastpos++ = 0;
+          stk->nfirstpos = stk->nlastpos = 0;
+          stk++;
           break;
 
         case STAR:
         case PLUS:
           /* Every element in the firstpos of the argument is in the follow
              of every element in the lastpos.  */
-          tmp.nelem = nfirstpos[-1];
+          tmp.nelem = stk[-1].nfirstpos;
           tmp.elems = firstpos;
           pos = lastpos;
-          for (j = 0; j < nlastpos[-1]; ++j)
+          for (j = 0; j < stk[-1].nlastpos; ++j)
             {
               merge (&tmp, &d->follows[pos[j].index], &merged);
               copy (&merged, &d->follows[pos[j].index]);
@@ -2467,16 +2442,16 @@ dfaanalyze (struct dfa *d, int searchflag)
         case QMARK:
           /* A QMARK or STAR node is automatically nullable.  */
           if (d->tokens[i] != PLUS)
-            nullable[-1] = 1;
+            stk[-1].nullable = true;
           break;
 
         case CAT:
           /* Every element in the firstpos of the second argument is in the
              follow of every element in the lastpos of the first argument.  */
-          tmp.nelem = nfirstpos[-1];
+          tmp.nelem = stk[-1].nfirstpos;
           tmp.elems = firstpos;
-          pos = lastpos + nlastpos[-1];
-          for (j = 0; j < nlastpos[-2]; ++j)
+          pos = lastpos + stk[-1].nlastpos;
+          for (j = 0; j < stk[-2].nlastpos; ++j)
             {
               merge (&tmp, &d->follows[pos[j].index], &merged);
               copy (&merged, &d->follows[pos[j].index]);
@@ -2484,43 +2459,39 @@ dfaanalyze (struct dfa *d, int searchflag)
 
           /* The firstpos of a CAT node is the firstpos of the first argument,
              union that of the second argument if the first is nullable.  */
-          if (nullable[-2])
-            nfirstpos[-2] += nfirstpos[-1];
+          if (stk[-2].nullable)
+            stk[-2].nfirstpos += stk[-1].nfirstpos;
           else
-            firstpos += nfirstpos[-1];
-          --nfirstpos;
+            firstpos += stk[-1].nfirstpos;
 
           /* The lastpos of a CAT node is the lastpos of the second argument,
              union that of the first argument if the second is nullable.  */
-          if (nullable[-1])
-            nlastpos[-2] += nlastpos[-1];
+          if (stk[-1].nullable)
+            stk[-2].nlastpos += stk[-1].nlastpos;
           else
             {
-              pos = lastpos + nlastpos[-2];
-              for (j = nlastpos[-1]; j-- > 0;)
+              pos = lastpos + stk[-2].nlastpos;
+              for (j = stk[-1].nlastpos; j-- > 0;)
                 pos[j] = lastpos[j];
-              lastpos += nlastpos[-2];
-              nlastpos[-2] = nlastpos[-1];
+              lastpos += stk[-2].nlastpos;
+              stk[-2].nlastpos = stk[-1].nlastpos;
             }
-          --nlastpos;
 
           /* A CAT node is nullable if both arguments are nullable.  */
-          nullable[-2] = nullable[-1] && nullable[-2];
-          --nullable;
+          stk[-2].nullable &= stk[-1].nullable;
+          stk--;
           break;
 
         case OR:
           /* The firstpos is the union of the firstpos of each argument.  */
-          nfirstpos[-2] += nfirstpos[-1];
-          --nfirstpos;
+          stk[-2].nfirstpos += stk[-1].nfirstpos;
 
           /* The lastpos is the union of the lastpos of each argument.  */
-          nlastpos[-2] += nlastpos[-1];
-          --nlastpos;
+          stk[-2].nlastpos += stk[-1].nlastpos;
 
           /* An OR node is nullable if either argument is nullable.  */
-          nullable[-2] = nullable[-1] || nullable[-2];
-          --nullable;
+          stk[-2].nullable |= stk[-1].nullable;
+          stk--;
           break;
 
         default:
@@ -2529,10 +2500,12 @@ dfaanalyze (struct dfa *d, int searchflag)
              an "epsilon closure" effectively makes them nullable later.
              Backreferences have to get a real position so we can detect
              transitions on them later.  But they are nullable.  */
-          *nullable++ = d->tokens[i] == BACKREF;
+          stk->nullable = d->tokens[i] == BACKREF;
 
           /* This position is in its own firstpos and lastpos.  */
-          *nfirstpos++ = *nlastpos++ = 1;
+          stk->nfirstpos = stk->nlastpos = 1;
+          stk++;
+
           --firstpos, --lastpos;
           firstpos->index = lastpos->index = i;
           firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
@@ -2546,15 +2519,16 @@ dfaanalyze (struct dfa *d, int searchflag)
       fprintf (stderr, "node %zd:", i);
       prtok (d->tokens[i]);
       putc ('\n', stderr);
-      fprintf (stderr, nullable[-1] ? " nullable: yes\n" : " nullable: no\n");
+      fprintf (stderr,
+               stk[-1].nullable ? " nullable: yes\n" : " nullable: no\n");
       fprintf (stderr, " firstpos:");
-      for (j = nfirstpos[-1]; j-- > 0;)
+      for (j = stk[-1].nfirstpos; j-- > 0;)
         {
           fprintf (stderr, " %zd:", firstpos[j].index);
           prtok (d->tokens[firstpos[j].index]);
         }
       fprintf (stderr, "\n lastpos:");
-      for (j = nlastpos[-1]; j-- > 0;)
+      for (j = stk[-1].nlastpos; j-- > 0;)
         {
           fprintf (stderr, " %zd:", lastpos[j].index);
           prtok (d->tokens[lastpos[j].index]);
@@ -2591,25 +2565,18 @@ dfaanalyze (struct dfa *d, int searchflag)
   /* Get the epsilon closure of the firstpos of the regexp.  The result will
      be the set of positions of state 0.  */
   merged.nelem = 0;
-  for (i = 0; i < nfirstpos[-1]; ++i)
+  for (i = 0; i < stk[-1].nfirstpos; ++i)
     insert (firstpos[i], &merged);
   epsclosure (&merged, d);
 
   /* Build the initial state.  */
-  d->salloc = 1;
-  d->sindex = 0;
-  MALLOC (d->states, d->salloc);
-
   separate_contexts = state_separate_contexts (&merged);
   state_index (d, &merged,
                (separate_contexts & CTX_NEWLINE
                 ? CTX_NEWLINE : separate_contexts ^ CTX_ANY));
 
-  free (o_nullable);
-  free (o_nfirst);
-  free (o_firstpos);
-  free (o_nlast);
-  free (o_lastpos);
+  free (posalloc);
+  free (stkalloc);
   free (merged.elems);
 }
 
@@ -2647,16 +2614,16 @@ dfaanalyze (struct dfa *d, int searchflag)
 void
 dfastate (state_num s, struct dfa *d, state_num trans[])
 {
-  leaf_set *grps;               /* As many as will ever be needed.  */
-  charclass *labels;            /* Labels corresponding to the groups.  */
+  leaf_set grps[NOTCHAR];       /* As many as will ever be needed.  */
+  charclass labels[NOTCHAR];    /* Labels corresponding to the groups.  */
   size_t ngrps = 0;             /* Number of groups actually used.  */
   position pos;                 /* Current position being considered.  */
   charclass matches;            /* Set of matching characters.  */
-  int matchesf;                 /* True if matches is nonempty.  */
+  unsigned int matchesf;        /* Nonzero if matches is nonempty.  */
   charclass intersect;          /* Intersection with some label set.  */
-  int intersectf;               /* True if intersect is nonempty.  */
+  unsigned int intersectf;      /* Nonzero if intersect is nonempty.  */
   charclass leftovers;          /* Stuff in the label that didn't match.  */
-  int leftoversf;               /* True if leftovers is nonempty.  */
+  unsigned int leftoversf;      /* Nonzero if leftovers is nonempty.  */
   position_set follows;         /* Union of the follows of some group.  */
   position_set tmp;             /* Temporary space for merging sets.  */
   int possible_contexts;        /* Contexts that this group can match.  */
@@ -2664,12 +2631,9 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
   state_num state;              /* New state.  */
   state_num state_newline;      /* New state on a newline transition.  */
   state_num state_letter;       /* New state on a letter transition.  */
-  int next_isnt_1st_byte = 0;   /* Flag if we can't add state0.  */
+  bool next_isnt_1st_byte = false; /* Flag if we can't add state0.  */
   size_t i, j, k;
 
-  MALLOC (grps, NOTCHAR);
-  MALLOC (labels, NOTCHAR);
-
   zeroset (matches);
 
   for (i = 0; i < d->states[s].elems.nelem; ++i)
@@ -2732,7 +2696,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
              matches.  */
           intersectf = 0;
           for (k = 0; k < CHARCLASS_INTS; ++k)
-            (intersect[k] = matches[k] & labels[j][k]) ? (intersectf = 1) : 0;
+            intersectf |= intersect[k] = matches[k] & labels[j][k];
           if (!intersectf)
             continue;
 
@@ -2743,8 +2707,8 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
               /* Even an optimizing compiler can't know this for sure.  */
               int match = matches[k], label = labels[j][k];
 
-              (leftovers[k] = ~match & label) ? (leftoversf = 1) : 0;
-              (matches[k] = match & ~label) ? (matchesf = 1) : 0;
+              leftoversf |= leftovers[k] = ~match & label;
+              matchesf |= matches[k] = match & ~label;
             }
 
           /* If there were leftovers, create a new group labeled with them.  */
@@ -2752,7 +2716,8 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
             {
               copyset (leftovers, labels[ngrps]);
               copyset (intersect, labels[j]);
-              MALLOC (grps[ngrps].elems, d->nleaves);
+              grps[ngrps].elems = xnmalloc (d->nleaves,
+                                            sizeof *grps[ngrps].elems);
               memcpy (grps[ngrps].elems, grps[j].elems,
                       sizeof (grps[j].elems[0]) * grps[j].nelem);
               grps[ngrps].nelem = grps[j].nelem;
@@ -2775,7 +2740,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
         {
           copyset (matches, labels[ngrps]);
           zeroset (matches);
-          MALLOC (grps[ngrps].elems, d->nleaves);
+          grps[ngrps].elems = xnmalloc (d->nleaves, sizeof *grps[ngrps].elems);
           grps[ngrps].nelem = 1;
           grps[ngrps].elems[0] = pos.index;
           ++ngrps;
@@ -2821,7 +2786,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
         for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k)
           insert (d->follows[grps[i].elems[j]].elems[k], &follows);
 
-      if (d->mb_cur_max > 1)
+      if (d->multibyte)
         {
           /* If a token in follows.elems is not 1st byte of a multibyte
              character, or the states of follows must accept the bytes
@@ -2841,12 +2806,12 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
              codepoint of <sb a>, it must not be <sb a> but 2nd byte of
              <mb A>, so we cannot add state[0].  */
 
-          next_isnt_1st_byte = 0;
+          next_isnt_1st_byte = false;
           for (j = 0; j < follows.nelem; ++j)
             {
               if (!(d->multibyte_prop[follows.elems[j].index] & 1))
                 {
-                  next_isnt_1st_byte = 1;
+                  next_isnt_1st_byte = true;
                   break;
                 }
             }
@@ -2855,7 +2820,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
       /* If we are building a searching matcher, throw in the positions
          of state 0 as well.  */
       if (d->searchflag
-          && (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte)))
+          && (!MBS_SUPPORT || (!d->multibyte || !next_isnt_1st_byte)))
         for (j = 0; j < d->states[0].elems.nelem; ++j)
           insert (d->states[0].elems.elems[j], &follows);
 
@@ -2897,8 +2862,31 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
     free (grps[i].elems);
   free (follows.elems);
   free (tmp.elems);
-  free (grps);
-  free (labels);
+}
+
+/* Make sure D's state arrays are large enough to hold NEW_STATE.  */
+static void
+realloc_trans_if_necessary (struct dfa *d, state_num new_state)
+{
+  state_num oldalloc = d->tralloc;
+  if (oldalloc <= new_state)
+    {
+      state_num **realtrans = d->trans ? d->trans - 1 : NULL;
+      size_t newalloc, newalloc1;
+      newalloc1 = new_state + 1;
+      realtrans = x2nrealloc (realtrans, &newalloc1, sizeof *realtrans);
+      realtrans[0] = NULL;
+      d->trans = realtrans + 1;
+      d->tralloc = newalloc = newalloc1 - 1;
+      d->fails = xnrealloc (d->fails, newalloc, sizeof *d->fails);
+      d->success = xnrealloc (d->success, newalloc, sizeof *d->success);
+      d->newlines = xnrealloc (d->newlines, newalloc, sizeof *d->newlines);
+      for (; oldalloc < newalloc; oldalloc++)
+        {
+          d->trans[oldalloc] = NULL;
+          d->fails[oldalloc] = NULL;
+        }
+    }
 }
 
 /* Some routines for manipulating a compiled dfa's transition tables.
@@ -2912,7 +2900,7 @@ static void
 build_state (state_num s, struct dfa *d)
 {
   state_num *trans;             /* The new transition table.  */
-  state_num i;
+  state_num i, maxstate;
 
   /* Set an upper limit on the number of transition tables that will ever
      exist at once.  1024 is arbitrary.  The idea is that the frequently
@@ -2940,30 +2928,17 @@ build_state (state_num s, struct dfa *d)
   if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_NONE, s, *d))
     d->success[s] |= CTX_NONE;
 
-  MALLOC (trans, NOTCHAR);
+  trans = xmalloc (NOTCHAR * sizeof *trans);
   dfastate (s, d, trans);
 
   /* Now go through the new transition table, and make sure that the trans
      and fail arrays are allocated large enough to hold a pointer for the
      largest state mentioned in the table.  */
+  maxstate = -1;
   for (i = 0; i < NOTCHAR; ++i)
-    if (trans[i] >= d->tralloc)
-      {
-        state_num oldalloc = d->tralloc;
-
-        while (trans[i] >= d->tralloc)
-          d->tralloc *= 2;
-        REALLOC (d->realtrans, d->tralloc + 1);
-        d->trans = d->realtrans + 1;
-        REALLOC (d->fails, d->tralloc);
-        REALLOC (d->success, d->tralloc);
-        REALLOC (d->newlines, d->tralloc);
-        while (oldalloc < d->tralloc)
-          {
-            d->trans[oldalloc] = NULL;
-            d->fails[oldalloc++] = NULL;
-          }
-      }
+    if (maxstate < trans[i])
+      maxstate = trans[i];
+  realloc_trans_if_necessary (d, maxstate);
 
   /* Keep the newline transition in a special place so we can use it as
      a sentinel.  */
@@ -2979,65 +2954,21 @@ build_state (state_num s, struct dfa *d)
 static void
 build_state_zero (struct dfa *d)
 {
-  d->tralloc = 1;
+  /* Initial size of the transition tables; must be positive.  */
+  int initial_tab_size = 1;
+
+  d->tralloc = 0;
   d->trcount = 0;
-  CALLOC (d->realtrans, d->tralloc + 1);
-  d->trans = d->realtrans + 1;
-  CALLOC (d->fails, d->tralloc);
-  MALLOC (d->success, d->tralloc);
-  MALLOC (d->newlines, d->tralloc);
+  d->trans = NULL;
+  d->fails = NULL;
+  d->success = NULL;
+  d->newlines = NULL;
+  realloc_trans_if_necessary (d, initial_tab_size);
   build_state (0, d);
 }
 
 /* Multibyte character handling sub-routines for dfaexec.  */
 
-/* The initial state may encounter a byte which is not a single byte character
-   nor the first byte of a multibyte character.  But it is incorrect for the
-   initial state to accept such a byte.  For example, in Shift JIS the regular
-   expression "\\" accepts the codepoint 0x5c, but should not accept the second
-   byte of the codepoint 0x815c.  Then the initial state must skip the bytes
-   that are not a single byte character nor the first byte of a multibyte
-   character.  */
-#define SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p)		\
-  if (s == 0)						\
-    {							\
-      while (inputwcs[p - buf_begin] == 0		\
-            && mblen_buf[p - buf_begin] > 0		\
-            && (unsigned char const *) p < buf_end)	\
-        ++p;						\
-      if ((char *) p >= end)				\
-        {						\
-          free (mblen_buf);				\
-          free (inputwcs);				\
-          *end = saved_end;				\
-          return NULL;					\
-        }						\
-    }
-
-static void
-realloc_trans_if_necessary (struct dfa *d, state_num new_state)
-{
-  /* Make sure that the trans and fail arrays are allocated large enough
-     to hold a pointer for the new state.  */
-  if (new_state >= d->tralloc)
-    {
-      state_num oldalloc = d->tralloc;
-
-      while (new_state >= d->tralloc)
-        d->tralloc *= 2;
-      REALLOC (d->realtrans, d->tralloc + 1);
-      d->trans = d->realtrans + 1;
-      REALLOC (d->fails, d->tralloc);
-      REALLOC (d->success, d->tralloc);
-      REALLOC (d->newlines, d->tralloc);
-      while (oldalloc < d->tralloc)
-        {
-          d->trans[oldalloc] = NULL;
-          d->fails[oldalloc++] = NULL;
-        }
-    }
-}
-
 /* Return values of transit_state_singlebyte, and
    transit_state_consume_1char.  */
 typedef enum
@@ -3070,14 +3001,7 @@ transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const *p,
             works = 0;
         }
       else if (works < 0)
-        {
-          if (p == buf_end)
-            {
-              /* At the moment, it must not happen.  */
-              abort ();
-            }
-          works = 0;
-        }
+        works = 0;
       else if (d->fails[works])
         {
           works = d->fails[works][*p];
@@ -3092,18 +3016,13 @@ transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const *p,
   return rval;
 }
 
-/* Match a "." against the current context.  buf_begin[IDX] is the
-   current position.  Return the length of the match, in bytes.
-   POS is the position of the ".".  */
+/* Match a "." against the current context.  Return the length of the
+   match, in bytes.  POS is the position of the ".".  */
 static int
-match_anychar (struct dfa *d, state_num s, position pos, size_t idx)
+match_anychar (struct dfa *d, state_num s, position pos,
+               wchar_t wc, size_t mbclen)
 {
   int context;
-  wchar_t wc;
-  int mbclen;
-
-  wc = inputwcs[idx];
-  mbclen = (mblen_buf[idx] == 0) ? 1 : mblen_buf[idx];
 
   /* Check syntax bits.  */
   if (wc == (wchar_t) eolbyte)
@@ -3125,16 +3044,14 @@ match_anychar (struct dfa *d, state_num s, position pos, size_t idx)
 }
 
 /* Match a bracket expression against the current context.
-   buf_begin[IDX] is the current position.
    Return the length of the match, in bytes.
    POS is the position of the bracket expression.  */
 static int
-match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx)
+match_mb_charset (struct dfa *d, state_num s, position pos,
+                  char const *p, wchar_t wc, size_t match_len)
 {
   size_t i;
-  int match;               /* Matching succeeded.  */
-  int match_len;           /* Length of the character (or collating element)
-                              with which this operator matches.  */
+  bool match;              /* Matching succeeded.  */
   int op_len;              /* Length of the operator.  */
   char buffer[128];
 
@@ -3142,9 +3059,6 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx)
   struct mb_char_classes *work_mbc;
 
   int context;
-  wchar_t wc;                   /* Current referring character.  */
-
-  wc = inputwcs[idx];
 
   /* Check syntax bits.  */
   if (wc == (wchar_t) eolbyte)
@@ -3165,7 +3079,6 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx)
   /* Assign the current referring operator to work_mbc.  */
   work_mbc = &(d->mbcsets[(d->multibyte_prop[pos.index]) >> 2]);
   match = !work_mbc->invert;
-  match_len = (mblen_buf[idx] == 0) ? 1 : mblen_buf[idx];
 
   /* Match in range 0-255?  */
   if (wc < NOTCHAR && work_mbc->cset != -1
@@ -3179,14 +3092,14 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx)
         goto charset_matched;
     }
 
-  strncpy (buffer, (char const *) buf_begin + idx, match_len);
+  strncpy (buffer, p, match_len);
   buffer[match_len] = '\0';
 
   /* match with an equivalence class?  */
   for (i = 0; i < work_mbc->nequivs; i++)
     {
       op_len = strlen (work_mbc->equivs[i]);
-      strncpy (buffer, (char const *) buf_begin + idx, op_len);
+      strncpy (buffer, p, op_len);
       buffer[op_len] = '\0';
       if (strcoll (work_mbc->equivs[i], buffer) == 0)
         {
@@ -3199,7 +3112,7 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx)
   for (i = 0; i < work_mbc->ncoll_elems; i++)
     {
       op_len = strlen (work_mbc->coll_elems[i]);
-      strncpy (buffer, (char const *) buf_begin + idx, op_len);
+      strncpy (buffer, p, op_len);
       buffer[op_len] = '\0';
 
       if (strcoll (work_mbc->coll_elems[i], buffer) == 0)
@@ -3212,7 +3125,7 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx)
   /* match with a range?  */
   for (i = 0; i < work_mbc->nranges; i++)
     {
-      if (work_mbc->range_sts[i] <= wc && wc <= work_mbc->range_ends[i])
+      if (work_mbc->ranges[i].beg <= wc && wc <= work_mbc->ranges[i].end)
         goto charset_matched;
     }
 
@@ -3233,27 +3146,25 @@ charset_matched:
    array which corresponds to 'd->states[s].mbps.elem'; each element of the
    array contains the number of bytes with which the element can match.
 
-   'idx' is the index from buf_begin, and it is the current position
-   in the buffer.
-
    The caller MUST free the array which this function return.  */
 static int *
-check_matching_with_multibyte_ops (struct dfa *d, state_num s, size_t idx)
+check_matching_with_multibyte_ops (struct dfa *d, state_num s,
+                                   char const *p, wchar_t wc, size_t mbclen)
 {
   size_t i;
   int *rarray;
 
-  MALLOC (rarray, d->states[s].mbps.nelem);
+  rarray = d->mb_match_lens;
   for (i = 0; i < d->states[s].mbps.nelem; ++i)
     {
       position pos = d->states[s].mbps.elems[i];
       switch (d->tokens[pos.index])
         {
         case ANYCHAR:
-          rarray[i] = match_anychar (d, s, pos, idx);
+          rarray[i] = match_anychar (d, s, pos, wc, mbclen);
           break;
         case MBCSET:
-          rarray[i] = match_mb_charset (d, s, pos, idx);
+          rarray[i] = match_mb_charset (d, s, pos, p, wc, mbclen);
           break;
         default:
           break;                /* cannot happen.  */
@@ -3273,48 +3184,39 @@ check_matching_with_multibyte_ops (struct dfa *d, state_num s, size_t idx)
 static status_transit_state
 transit_state_consume_1char (struct dfa *d, state_num s,
                              unsigned char const **pp,
-                             int *match_lens, int *mbclen, position_set * pps)
+                             wchar_t wc, size_t mbclen,
+                             int *match_lens)
 {
   size_t i, j;
   int k;
   state_num s1, s2;
-  int *work_mbls;
   status_transit_state rs = TRANSIT_STATE_DONE;
 
-  /* Calculate the length of the (single/multi byte) character
-     to which p points.  */
-  *mbclen = (mblen_buf[*pp - buf_begin] == 0) ? 1 : mblen_buf[*pp - buf_begin];
+  if (! match_lens && d->states[s].mbps.nelem != 0)
+    match_lens = check_matching_with_multibyte_ops (d, s, (char const *) *pp,
+                                                    wc, mbclen);
 
   /* Calculate the state which can be reached from the state 's' by
-     consuming '*mbclen' single bytes from the buffer.  */
+     consuming 'mbclen' single bytes from the buffer.  */
   s1 = s;
-  for (k = 0; k < *mbclen; k++)
+  for (k = 0; k < mbclen; k++)
     {
       s2 = s1;
       rs = transit_state_singlebyte (d, s2, (*pp)++, &s1);
     }
-  /* Copy the positions contained by 's1' to the set 'pps'.  */
-  copy (&(d->states[s1].elems), pps);
-
-  /* Check (input) match_lens, and initialize if it is NULL.  */
-  if (match_lens == NULL && d->states[s].mbps.nelem != 0)
-    work_mbls = check_matching_with_multibyte_ops (d, s, *pp - buf_begin);
-  else
-    work_mbls = match_lens;
+  copy (&d->states[s1].elems, &d->mb_follows);
 
   /* Add all of the positions which can be reached from 's' by consuming
      a single character.  */
   for (i = 0; i < d->states[s].mbps.nelem; i++)
     {
-      if (work_mbls[i] == *mbclen)
+      if (match_lens[i] == mbclen)
         for (j = 0; j < d->follows[d->states[s].mbps.elems[i].index].nelem;
              j++)
-          insert (d->follows[d->states[s].mbps.elems[i].index].elems[j], pps);
+          insert (d->follows[d->states[s].mbps.elems[i].index].elems[j],
+                  &d->mb_follows);
     }
 
-  if (match_lens == NULL && work_mbls != NULL)
-    free (work_mbls);
-
   /* FIXME: this return value is always ignored.  */
   return rs;
 }
@@ -3323,7 +3225,8 @@ transit_state_consume_1char (struct dfa *d, state_num s,
    buffer.  This function is for some operator which can match with a multi-
    byte character or a collating element (which may be multi characters).  */
 static state_num
-transit_state (struct dfa *d, state_num s, unsigned char const **pp)
+transit_state (struct dfa *d, state_num s, unsigned char const **pp,
+               unsigned char const *end)
 {
   state_num s1;
   int mbclen;  /* The length of current input multibyte character.  */
@@ -3331,7 +3234,6 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp)
   size_t i, j;
   int *match_lens = NULL;
   size_t nelem = d->states[s].mbps.nelem;       /* Just a alias.  */
-  position_set follows;
   unsigned char const *p1 = *pp;
   wchar_t wc;
 
@@ -3340,7 +3242,9 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp)
        We check whether each of them can match or not.  */
     {
       /* Note: caller must free the return value of this function.  */
-      match_lens = check_matching_with_multibyte_ops (d, s, *pp - buf_begin);
+      mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
+      match_lens = check_matching_with_multibyte_ops (d, s, (char const *) *pp,
+                                                      wc, mbclen);
 
       for (i = 0; i < nelem; i++)
         /* Search the operator which match the longest string,
@@ -3362,26 +3266,25 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp)
       if (rs == TRANSIT_STATE_DONE)
         ++*pp;
 
-      free (match_lens);
       return s1;
     }
 
   /* This state has some operators which can match a multibyte character.  */
-  alloc_position_set (&follows, d->nleaves);
+  d->mb_follows.nelem = 0;
 
   /* 'maxlen' may be longer than the length of a character, because it may
      not be a character but a (multi character) collating element.
      We enumerate all of the positions which 's' can reach by consuming
      'maxlen' bytes.  */
-  transit_state_consume_1char (d, s, pp, match_lens, &mbclen, &follows);
+  transit_state_consume_1char (d, s, pp, wc, mbclen, match_lens);
 
-  wc = inputwcs[*pp - mbclen - buf_begin];
-  s1 = state_index (d, &follows, wchar_context (wc));
+  s1 = state_index (d, &d->mb_follows, wchar_context (wc));
   realloc_trans_if_necessary (d, s1);
 
   while (*pp - p1 < maxlen)
     {
-      transit_state_consume_1char (d, s1, pp, NULL, &mbclen, &follows);
+      mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
+      transit_state_consume_1char (d, s1, pp, wc, mbclen, NULL);
 
       for (i = 0; i < nelem; i++)
         {
@@ -3389,51 +3292,15 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp)
             for (j = 0;
                  j < d->follows[d->states[s1].mbps.elems[i].index].nelem; j++)
               insert (d->follows[d->states[s1].mbps.elems[i].index].elems[j],
-                      &follows);
+                      &d->mb_follows);
         }
 
-      wc = inputwcs[*pp - mbclen - buf_begin];
-      s1 = state_index (d, &follows, wchar_context (wc));
+      s1 = state_index (d, &d->mb_follows, wchar_context (wc));
       realloc_trans_if_necessary (d, s1);
     }
-  free (match_lens);
-  free (follows.elems);
   return s1;
 }
 
-
-/* Initialize mblen_buf and inputwcs with data from the next line.  */
-
-static void
-prepare_wc_buf (struct dfa *d, const char *begin, const char *end)
-{
-#if MBS_SUPPORT
-  unsigned char eol = eolbyte;
-  size_t i;
-  size_t ilim = end - begin + 1;
-
-  buf_begin = (unsigned char *) begin;
-
-  for (i = 0; i < ilim; i++)
-    {
-      size_t nbytes = mbs_to_wchar (d, inputwcs + i, begin + i, ilim - i, &mbs);
-      mblen_buf[i] = nbytes - (nbytes == 1);
-      if (begin[i] == eol)
-        break;
-      while (--nbytes != 0)
-        {
-          i++;
-          mblen_buf[i] = nbytes;
-          inputwcs[i] = 0;
-        }
-    }
-
-  buf_end = (unsigned char *) (begin + i);
-  mblen_buf[i] = 0;
-  inputwcs[i] = 0;              /* sentinel */
-#endif /* MBS_SUPPORT */
-}
-
 /* Search through a buffer looking for a match to the given struct dfa.
    Find the first occurrence of a string matching the regexp in the
    buffer, and the shortest possible version thereof.  Return a pointer to
@@ -3451,39 +3318,64 @@ dfaexec (struct dfa *d, char const *begin, char *end,
          int allow_nl, size_t *count, int *backref)
 {
   state_num s, s1;              /* Current state.  */
-  unsigned char const *p;       /* Current input character.  */
+  unsigned char const *p, *mbp; /* Current input character.  */
   state_num **trans, *t;        /* Copy of d->trans so it can be optimized
                                    into a register.  */
   unsigned char eol = eolbyte;  /* Likewise for eolbyte.  */
   unsigned char saved_end;
+  size_t nlcount = 0;
 
   if (!d->tralloc)
     build_state_zero (d);
 
   s = s1 = 0;
-  p = (unsigned char const *) begin;
+  p = mbp = (unsigned char const *) begin;
   trans = d->trans;
   saved_end = *(unsigned char *) end;
   *end = eol;
 
-  if (d->mb_cur_max > 1)
+  if (d->multibyte)
     {
-      MALLOC (mblen_buf, end - begin + 2);
-      MALLOC (inputwcs, end - begin + 2);
-      memset (&mbs, 0, sizeof (mbstate_t));
-      prepare_wc_buf (d, (const char *) p, end);
+      memset (&d->mbs, 0, sizeof d->mbs);
+      if (! d->mb_match_lens)
+        {
+          d->mb_match_lens = xnmalloc (d->nleaves, sizeof *d->mb_match_lens);
+          alloc_position_set (&d->mb_follows, d->nleaves);
+        }
     }
 
   for (;;)
     {
-      if (d->mb_cur_max > 1)
+      if (d->multibyte)
         {
           while ((t = trans[s]) != NULL)
             {
-              if (p > buf_end)
-                break;
               s1 = s;
-              SKIP_REMAINS_MB_IF_INITIAL_STATE (s, p);
+
+              if (s == 0)
+                {
+                  /* The initial state may encounter a byte which is not
+                     a single byte character nor the first byte of a
+                     multibyte character.  But it is incorrect for the
+                     initial state to accept such a byte.  For example,
+                     in Shift JIS the regular expression "\\" accepts
+                     the codepoint 0x5c, but should not accept the second
+                     byte of the codepoint 0x815c.  Then the initial
+                     state must skip the bytes that are not a single
+                     byte character nor the first byte of a multibyte
+                     character.  */
+                  wchar_t wc;
+                  while (mbp < p)
+                    mbp += mbs_to_wchar (&wc, (char const *) mbp,
+                                         end - (char const *) mbp, d);
+                  p = mbp;
+
+                  if ((char *) p >= end)
+                    {
+                      p = NULL;
+                      goto done;
+                    }
+                }
 
               if (d->states[s].mbps.nelem == 0)
                 {
@@ -3495,18 +3387,16 @@ dfaexec (struct dfa *d, char const *begin, char *end,
                  better performance (up to 25% better on [a-z], for
                  example) and enables support for collating symbols and
                  equivalence classes.  */
-              if (backref)
+              if (d->states[s].has_mbcset && backref)
                 {
                   *backref = 1;
-                  free (mblen_buf);
-                  free (inputwcs);
-                  *end = saved_end;
-                  return (char *) p;
+                  goto done;
                 }
 
               /* Can match with a multibyte character (and multi character
                  collating element).  Transition table might be updated.  */
-              s = transit_state (d, s, &p);
+              s = transit_state (d, s, &p, (unsigned char *) end);
+              mbp = p;
               trans = d->trans;
             }
         }
@@ -3526,27 +3416,28 @@ dfaexec (struct dfa *d, char const *begin, char *end,
             }
         }
 
-      if (s >= 0 && (char *) p <= end && d->fails[s])
+      if ((char *) p > end)
+        {
+          p = NULL;
+          goto done;
+        }
+
+      if (s >= 0 && d->fails[s])
         {
           if (d->success[s] & sbit[*p])
             {
               if (backref)
-                *backref = (d->states[s].backref != 0);
-              if (d->mb_cur_max > 1)
-                {
-                  free (mblen_buf);
-                  free (inputwcs);
-                }
-              *end = saved_end;
-              return (char *) p;
+                *backref = d->states[s].has_backref;
+              goto done;
             }
 
           s1 = s;
-          if (d->mb_cur_max > 1)
+          if (d->multibyte)
             {
               /* Can match with a multibyte character (and multicharacter
                  collating element).  Transition table might be updated.  */
-              s = transit_state (d, s, &p);
+              s = transit_state (d, s, &p, (unsigned char *) end);
+              mbp = p;
               trans = d->trans;
             }
           else
@@ -3554,31 +3445,18 @@ dfaexec (struct dfa *d, char const *begin, char *end,
           continue;
         }
 
-      /* If the previous character was a newline, count it.  */
-      if ((char *) p <= end && p[-1] == eol)
+      /* If the previous character was a newline, count it, and skip
+         checking of multibyte character boundary until here.  */
+      if (p[-1] == eol)
         {
-          if (count)
-            ++*count;
-
-          if (d->mb_cur_max > 1)
-            prepare_wc_buf (d, (const char *) p, end);
-        }
-
-      /* Check if we've run off the end of the buffer.  */
-      if ((char *) p > end)
-        {
-          if (d->mb_cur_max > 1)
-            {
-              free (mblen_buf);
-              free (inputwcs);
-            }
-          *end = saved_end;
-          return NULL;
+          nlcount++;
+          mbp = p;
         }
 
       if (s >= 0)
         {
-          build_state (s, d);
+          if (!d->trans[s])
+            build_state (s, d);
           trans = d->trans;
           continue;
         }
@@ -3591,6 +3469,32 @@ dfaexec (struct dfa *d, char const *begin, char *end,
 
       s = 0;
     }
+
+ done:
+  if (count)
+    *count += nlcount;
+  *end = saved_end;
+  return (char *) p;
+}
+
+/* Search through a buffer looking for a potential match for D.
+   Return the offset of the byte after the first potential match.
+   If there is no match, return (size_t) -1.  If D lacks a superset
+   so it's not known whether there is a match, return (size_t) -2.
+   BEGIN points to the beginning of the buffer, and END points to the
+   first byte after its end.  Store a sentinel byte (usually newline)
+   in *END, so the actual buffer must be one byte longer.  If COUNT is
+   non-NULL, increment *COUNT once for each newline processed.  */
+size_t
+dfahint (struct dfa *d, char const *begin, char *end, size_t *count)
+{
+  if (! d->superset)
+    return -2;
+  else
+    {
+      char const *match = dfaexec (d->superset, begin, end, 1, count, NULL);
+      return match ? match - begin : -1;
+    }
 }
 
 static void
@@ -3599,7 +3503,6 @@ free_mbdata (struct dfa *d)
   size_t i;
 
   free (d->multibyte_prop);
-  d->multibyte_prop = NULL;
 
   for (i = 0; i < d->nmbcsets; ++i)
     {
@@ -3607,8 +3510,7 @@ free_mbdata (struct dfa *d)
       struct mb_char_classes *p = &(d->mbcsets[i]);
       free (p->chars);
       free (p->ch_classes);
-      free (p->range_sts);
-      free (p->range_ends);
+      free (p->ranges);
 
       for (j = 0; j < p->nequivs; ++j)
         free (p->equivs[j]);
@@ -3620,8 +3522,9 @@ free_mbdata (struct dfa *d)
     }
 
   free (d->mbcsets);
-  d->mbcsets = NULL;
-  d->nmbcsets = 0;
+  free (d->mb_follows.elems);
+  free (d->mb_match_lens);
+  d->mb_match_lens = NULL;
 }
 
 /* Initialize the components of a dfa that the other routines don't
@@ -3630,22 +3533,7 @@ void
 dfainit (struct dfa *d)
 {
   memset (d, 0, sizeof *d);
-
-  d->calloc = 1;
-  MALLOC (d->charclasses, d->calloc);
-
-  d->talloc = 1;
-  MALLOC (d->tokens, d->talloc);
-
-  d->mb_cur_max = MB_CUR_MAX;
-
-  if (d->mb_cur_max > 1)
-    {
-      d->nmultibyte_prop = 1;
-      MALLOC (d->multibyte_prop, d->nmultibyte_prop);
-      d->mbcsets_alloc = 1;
-      MALLOC (d->mbcsets, d->mbcsets_alloc);
-    }
+  d->multibyte = MB_CUR_MAX > 1;
 }
 
 static void
@@ -3672,7 +3560,83 @@ dfaoptimize (struct dfa *d)
     }
 
   free_mbdata (d);
-  d->mb_cur_max = 1;
+  d->multibyte = false;
+}
+
+static void
+dfasuperset (struct dfa *d)
+{
+  size_t i, j;
+  charclass ccl;
+  bool have_achar = false;
+  bool have_nchar = false;
+  struct dfa *sup = dfaalloc ();
+
+  *sup = *d;
+  sup->multibyte = false;
+  sup->multibyte_prop = NULL;
+  sup->mbcsets = NULL;
+  sup->superset = NULL;
+  sup->states = NULL;
+  sup->sindex = 0;
+  sup->follows = NULL;
+  sup->tralloc = 0;
+  sup->trans = NULL;
+  sup->fails = NULL;
+  sup->success = NULL;
+  sup->newlines = NULL;
+  sup->musts = NULL;
+
+  sup->charclasses = xnmalloc (sup->calloc, sizeof *sup->charclasses);
+  memcpy (sup->charclasses, d->charclasses,
+          d->cindex * sizeof *sup->charclasses);
+
+  sup->tokens = xnmalloc (d->tindex, 2 * sizeof *sup->tokens);
+  sup->talloc = d->tindex * 2;
+
+  for (i = j = 0; i < d->tindex; i++)
+    {
+      switch (d->tokens[i])
+        {
+        case ANYCHAR:
+        case MBCSET:
+        case BACKREF:
+          zeroset (ccl);
+          notset (ccl);
+          sup->tokens[j++] = CSET + dfa_charclass_index (sup, ccl);
+          sup->tokens[j++] = STAR;
+          if (d->tokens[i + 1] == QMARK || d->tokens[i + 1] == STAR
+              || d->tokens[i + 1] == PLUS)
+            i++;
+          have_achar = true;
+          break;
+        case BEGWORD:
+        case ENDWORD:
+        case LIMWORD:
+        case NOTLIMWORD:
+          if (d->multibyte)
+            {
+              /* Ignore these constraints.  */
+              sup->tokens[j++] = EMPTY;
+              break;
+            }
+        default:
+          sup->tokens[j++] = d->tokens[i];
+          if ((0 <= d->tokens[i] && d->tokens[i] < NOTCHAR)
+              || d->tokens[i] >= CSET)
+            have_nchar = true;
+          break;
+        }
+    }
+  sup->tindex = j;
+
+  if (have_nchar && (have_achar || d->multibyte))
+    d->superset = sup;
+  else
+    {
+      dfafree (sup);
+      free (sup);
+    }
 }
 
 /* Parse and analyze a single string of the given length.  */
@@ -3684,7 +3648,10 @@ dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
   dfaparse (s, len, d);
   dfamust (d);
   dfaoptimize (d);
+  dfasuperset (d);
   dfaanalyze (d, searchflag);
+  if (d->superset)
+    dfaanalyze (d->superset, searchflag);
 }
 
 /* Free the storage held by the components of a dfa.  */
@@ -3697,34 +3664,46 @@ dfafree (struct dfa *d)
   free (d->charclasses);
   free (d->tokens);
 
-  if (d->mb_cur_max > 1)
+  if (d->multibyte)
     free_mbdata (d);
 
   for (i = 0; i < d->sindex; ++i)
     {
       free (d->states[i].elems.elems);
-      if (MBS_SUPPORT)
-        free (d->states[i].mbps.elems);
+      free (d->states[i].mbps.elems);
     }
   free (d->states);
-  for (i = 0; i < d->tindex; ++i)
-    free (d->follows[i].elems);
-  free (d->follows);
-  for (i = 0; i < d->tralloc; ++i)
+
+  if (d->follows)
+    {
+      for (i = 0; i < d->tindex; ++i)
+        free (d->follows[i].elems);
+      free (d->follows);
+    }
+
+  if (d->trans)
     {
-      free (d->trans[i]);
-      free (d->fails[i]);
+      for (i = 0; i < d->tralloc; ++i)
+        {
+          free (d->trans[i]);
+          free (d->fails[i]);
+        }
+
+      free (d->trans - 1);
+      free (d->fails);
+      free (d->newlines);
+      free (d->success);
     }
-  free (d->realtrans);
-  free (d->fails);
-  free (d->newlines);
-  free (d->success);
+
   for (dm = d->musts; dm; dm = ndm)
     {
       ndm = dm->next;
       free (dm->must);
       free (dm);
     }
+
+  if (d->superset)
+    dfafree (d->superset);
 }
 
 /* Having found the postfix representation of the regular expression,
@@ -3815,21 +3794,16 @@ static char *
 icatalloc (char *old, char const *new)
 {
   char *result;
-  size_t oldsize = old == NULL ? 0 : strlen (old);
-  size_t newsize = new == NULL ? 0 : strlen (new);
+  size_t oldsize;
+  size_t newsize = strlen (new);
   if (newsize == 0)
     return old;
+  oldsize = strlen (old);
   result = xrealloc (old, oldsize + newsize + 1);
   memcpy (result + oldsize, new, newsize + 1);
   return result;
 }
 
-static char *
-icpyalloc (char const *string)
-{
-  return icatalloc (NULL, string);
-}
-
 static char *_GL_ATTRIBUTE_PURE
 istrstr (char const *lookin, char const *lookfor)
 {
@@ -3846,29 +3820,15 @@ istrstr (char const *lookin, char const *lookfor)
 static void
 freelist (char **cpp)
 {
-  size_t i;
-
-  if (cpp == NULL)
-    return;
-  for (i = 0; cpp[i] != NULL; ++i)
-    {
-      free (cpp[i]);
-      cpp[i] = NULL;
-    }
+  while (*cpp)
+    free (*cpp++);
 }
 
 static char **
 enlist (char **cpp, char *new, size_t len)
 {
   size_t i, j;
-
-  if (cpp == NULL)
-    return NULL;
-  if ((new = icpyalloc (new)) == NULL)
-    {
-      freelist (cpp);
-      return NULL;
-    }
+  new = memcpy (xmalloc (len + 1), new, len);
   new[len] = '\0';
   /* Is there already something in the list that's new (or longer)?  */
   for (i = 0; cpp[i] != NULL; ++i)
@@ -3891,53 +3851,35 @@ enlist (char **cpp, char *new, size_t len)
         cpp[i] = NULL;
       }
   /* Add the new string.  */
-  REALLOC (cpp, i + 2);
+  cpp = xnrealloc (cpp, i + 2, sizeof *cpp);
   cpp[i] = new;
   cpp[i + 1] = NULL;
   return cpp;
 }
 
 /* Given pointers to two strings, return a pointer to an allocated
-   list of their distinct common substrings.  Return NULL if something
-   seems wild.  */
+   list of their distinct common substrings.  */
 static char **
 comsubs (char *left, char const *right)
 {
-  char **cpp;
+  char **cpp = xzalloc (sizeof *cpp);
   char *lcp;
-  char *rcp;
-  size_t i, len;
-
-  if (left == NULL || right == NULL)
-    return NULL;
-  cpp = malloc (sizeof *cpp);
-  if (cpp == NULL)
-    return NULL;
-  cpp[0] = NULL;
+
   for (lcp = left; *lcp != '\0'; ++lcp)
     {
-      len = 0;
-      rcp = strchr (right, *lcp);
+      size_t len = 0;
+      char *rcp = strchr (right, *lcp);
       while (rcp != NULL)
         {
+          size_t i;
           for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
             continue;
           if (i > len)
             len = i;
           rcp = strchr (rcp + 1, *lcp);
         }
-      if (len == 0)
-        continue;
-      {
-        char **p = enlist (cpp, lcp, len);
-        if (p == NULL)
-          {
-            freelist (cpp);
-            cpp = NULL;
-            break;
-          }
-        cpp = p;
-      }
+      if (len != 0)
+        cpp = enlist (cpp, lcp, len);
     }
   return cpp;
 }
@@ -3945,16 +3887,8 @@ comsubs (char *left, char const *right)
 static char **
 addlists (char **old, char **new)
 {
-  size_t i;
-
-  if (old == NULL || new == NULL)
-    return NULL;
-  for (i = 0; new[i] != NULL; ++i)
-    {
-      old = enlist (old, new[i], strlen (new[i]));
-      if (old == NULL)
-        break;
-    }
+  for (; *new; new++)
+    old = enlist (old, *new, strlen (*new));
   return old;
 }
 
@@ -3963,125 +3897,127 @@ addlists (char **old, char **new)
 static char **
 inboth (char **left, char **right)
 {
-  char **both;
-  char **temp;
+  char **both = xzalloc (sizeof *both);
   size_t lnum, rnum;
 
-  if (left == NULL || right == NULL)
-    return NULL;
-  both = malloc (sizeof *both);
-  if (both == NULL)
-    return NULL;
-  both[0] = NULL;
   for (lnum = 0; left[lnum] != NULL; ++lnum)
     {
       for (rnum = 0; right[rnum] != NULL; ++rnum)
         {
-          temp = comsubs (left[lnum], right[rnum]);
-          if (temp == NULL)
-            {
-              freelist (both);
-              return NULL;
-            }
+          char **temp = comsubs (left[lnum], right[rnum]);
           both = addlists (both, temp);
           freelist (temp);
           free (temp);
-          if (both == NULL)
-            return NULL;
         }
     }
   return both;
 }
 
-typedef struct
+typedef struct must must;
+
+struct must
 {
   char **in;
   char *left;
   char *right;
   char *is;
-} must;
+  bool begline;
+  bool endline;
+  must *prev;
+};
+
+static must *
+allocmust (must *mp)
+{
+  must *new_mp = xmalloc (sizeof *new_mp);
+  new_mp->in = xzalloc (sizeof *new_mp->in);
+  new_mp->left = xzalloc (2);
+  new_mp->right = xzalloc (2);
+  new_mp->is = xzalloc (2);
+  new_mp->begline = false;
+  new_mp->endline = false;
+  new_mp->prev = mp;
+  return new_mp;
+}
 
 static void
-resetmust (must * mp)
+resetmust (must *mp)
 {
+  freelist (mp->in);
+  mp->in[0] = NULL;
   mp->left[0] = mp->right[0] = mp->is[0] = '\0';
+  mp->begline = false;
+  mp->endline = false;
+}
+
+static void
+freemust (must *mp)
+{
   freelist (mp->in);
+  free (mp->in);
+  free (mp->left);
+  free (mp->right);
+  free (mp->is);
+  free (mp);
 }
 
 static void
 dfamust (struct dfa *d)
 {
-  must *musts;
-  must *mp;
-  char *result;
+  must *mp = NULL;
+  char const *result = "";
   size_t ri;
   size_t i;
-  int exact;
-  token t;
-  static must must0;
+  bool exact = false;
+  bool begline = false;
+  bool endline = false;
   struct dfamust *dm;
-  static char empty_string[] = "";
-
-  result = empty_string;
-  exact = 0;
-  MALLOC (musts, d->tindex + 1);
-  mp = musts;
-  for (i = 0; i <= d->tindex; ++i)
-    mp[i] = must0;
-  for (i = 0; i <= d->tindex; ++i)
-    {
-      mp[i].in = xmalloc (sizeof *mp[i].in);
-      mp[i].left = xmalloc (2);
-      mp[i].right = xmalloc (2);
-      mp[i].is = xmalloc (2);
-      mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0';
-      mp[i].in[0] = NULL;
-    }
-#ifdef DEBUG
-  fprintf (stderr, "dfamust:\n");
-  for (i = 0; i < d->tindex; ++i)
-    {
-      fprintf (stderr, " %zd:", i);
-      prtok (d->tokens[i]);
-    }
-  putc ('\n', stderr);
-#endif
+
   for (ri = 0; ri < d->tindex; ++ri)
     {
-      switch (t = d->tokens[ri])
+      token t = d->tokens[ri];
+      switch (t)
         {
+        case BEGLINE:
+          mp = allocmust (mp);
+          mp->begline = true;
+          break;
+        case ENDLINE:
+          mp = allocmust (mp);
+          mp->endline = true;
+          break;
         case LPAREN:
         case RPAREN:
           assert (!"neither LPAREN nor RPAREN may appear here");
+
         case EMPTY:
-        case BEGLINE:
-        case ENDLINE:
         case BEGWORD:
         case ENDWORD:
         case LIMWORD:
         case NOTLIMWORD:
         case BACKREF:
-          resetmust (mp);
+        case ANYCHAR:
+        case MBCSET:
+          mp = allocmust (mp);
           break;
+
         case STAR:
         case QMARK:
-          assert (musts < mp);
-          --mp;
           resetmust (mp);
           break;
+
         case OR:
-          assert (&musts[2] <= mp);
           {
             char **new;
-            must *lmp;
-            must *rmp;
+            must *rmp = mp;
+            must *lmp = mp = mp->prev;
             size_t j, ln, rn, n;
 
-            rmp = --mp;
-            lmp = --mp;
             /* Guaranteed to be.  Unlikely, but ...  */
             if (!STREQ (lmp->is, rmp->is))
               lmp->is[0] = '\0';
+            lmp->begline &= rmp->begline;
+            lmp->endline &= rmp->endline;
             /* Left side--easy */
             i = 0;
             while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i])
@@ -4100,133 +4036,126 @@ dfamust (struct dfa *d)
               lmp->right[j] = lmp->right[(ln - i) + j];
             lmp->right[j] = '\0';
             new = inboth (lmp->in, rmp->in);
-            if (new == NULL)
-              goto done;
             freelist (lmp->in);
             free (lmp->in);
             lmp->in = new;
+            freemust (rmp);
           }
           break;
+
         case PLUS:
-          assert (musts < mp);
-          --mp;
           mp->is[0] = '\0';
           break;
+
         case END:
-          assert (mp == &musts[1]);
-          for (i = 0; musts[0].in[i] != NULL; ++i)
-            if (strlen (musts[0].in[i]) > strlen (result))
-              result = musts[0].in[i];
-          if (STREQ (result, musts[0].is))
-            exact = 1;
+          assert (!mp->prev);
+          for (i = 0; mp->in[i] != NULL; ++i)
+            if (strlen (mp->in[i]) > strlen (result))
+              result = mp->in[i];
+          if (STREQ (result, mp->is))
+            {
+              exact = true;
+              begline = mp->begline;
+              endline = mp->endline;
+            }
           goto done;
+
         case CAT:
-          assert (&musts[2] <= mp);
           {
-            must *lmp;
-            must *rmp;
+            must *rmp = mp;
+            must *lmp = mp = mp->prev;
 
-            rmp = --mp;
-            lmp = --mp;
             /* In.  Everything in left, plus everything in
                right, plus concatenation of
                left's right and right's left.  */
             lmp->in = addlists (lmp->in, rmp->in);
-            if (lmp->in == NULL)
-              goto done;
             if (lmp->right[0] != '\0' && rmp->left[0] != '\0')
               {
-                char *tp;
-
-                tp = icpyalloc (lmp->right);
-                tp = icatalloc (tp, rmp->left);
-                lmp->in = enlist (lmp->in, tp, strlen (tp));
+                size_t lrlen = strlen (lmp->right);
+                size_t rllen = strlen (rmp->left);
+                char *tp = xmalloc (lrlen + rllen);
+                memcpy (tp, lmp->right, lrlen);
+                memcpy (tp + lrlen, rmp->left, rllen);
+                lmp->in = enlist (lmp->in, tp, lrlen + rllen);
                 free (tp);
-                if (lmp->in == NULL)
-                  goto done;
               }
             /* Left-hand */
             if (lmp->is[0] != '\0')
-              {
-                lmp->left = icatalloc (lmp->left, rmp->left);
-                if (lmp->left == NULL)
-                  goto done;
-              }
+              lmp->left = icatalloc (lmp->left, rmp->left);
             /* Right-hand */
             if (rmp->is[0] == '\0')
               lmp->right[0] = '\0';
             lmp->right = icatalloc (lmp->right, rmp->right);
-            if (lmp->right == NULL)
-              goto done;
             /* Guaranteed to be */
-            if (lmp->is[0] != '\0' && rmp->is[0] != '\0')
+            if ((lmp->is[0] != '\0' || lmp->begline)
+                && (rmp->is[0] != '\0' || rmp->endline))
               {
                 lmp->is = icatalloc (lmp->is, rmp->is);
-                if (lmp->is == NULL)
-                  goto done;
+                lmp->endline = rmp->endline;
               }
             else
-              lmp->is[0] = '\0';
+              {
+                lmp->is[0] = '\0';
+                lmp->begline = false;
+                lmp->endline = false;
+              }
+            freemust (rmp);
           }
           break;
+
+        case '\0':
+          /* Not on *my* shift.  */
+          goto done;
+
         default:
-          if (t < END)
-            {
-              assert (!"oops! t >= END");
-            }
-          else if (t == '\0')
-            {
-              /* not on *my* shift */
-              goto done;
-            }
-          else if (t >= CSET || !MBS_SUPPORT || t == ANYCHAR || t == MBCSET)
-            {
-              /* easy enough */
-              resetmust (mp);
-            }
-          else
+          mp = allocmust (mp);
+          if (CSET <= t)
             {
-              /* plain character */
-              resetmust (mp);
-              mp->is[0] = mp->left[0] = mp->right[0] = t;
-              mp->is[1] = mp->left[1] = mp->right[1] = '\0';
-              mp->in = enlist (mp->in, mp->is, (size_t) 1);
-              if (mp->in == NULL)
-                goto done;
+              /* If T is a singleton, or if case-folding in a unibyte
+                 locale and T's members all case-fold to the same char,
+                 convert T to one of its members.  Otherwise, do
+                 nothing further with T.  */
+              charclass *ccl = &d->charclasses[t - CSET];
+              int j;
+              for (j = 0; j < NOTCHAR; j++)
+                if (tstbit (j, *ccl))
+                  break;
+              if (! (j < NOTCHAR))
+                break;
+              t = j;
+              while (++j < NOTCHAR)
+                if (tstbit (j, *ccl)
+                    && ! (case_fold && !d->multibyte
+                          && toupper (j) == toupper (t)))
+                  break;
+              if (j < NOTCHAR)
+                break;
             }
+          mp->is[0] = mp->left[0] = mp->right[0]
+            = case_fold && !d->multibyte ? toupper (t) : t;
+          mp->is[1] = mp->left[1] = mp->right[1] = '\0';
+          mp->in = enlist (mp->in, mp->is, 1);
           break;
         }
-#ifdef DEBUG
-      fprintf (stderr, " node: %zd:", ri);
-      prtok (d->tokens[ri]);
-      fprintf (stderr, "\n  in:");
-      for (i = 0; mp->in[i]; ++i)
-        fprintf (stderr, " \"%s\"", mp->in[i]);
-      fprintf (stderr, "\n  is: \"%s\"\n", mp->is);
-      fprintf (stderr, "  left: \"%s\"\n", mp->left);
-      fprintf (stderr, "  right: \"%s\"\n", mp->right);
-#endif
-      ++mp;
     }
 done:
-  if (strlen (result))
+  if (*result)
     {
-      MALLOC (dm, 1);
+      dm = xmalloc (sizeof *dm);
       dm->exact = exact;
-      dm->must = xmemdup (result, strlen (result) + 1);
+      dm->begline = begline;
+      dm->endline = endline;
+      dm->must = xstrdup (result);
       dm->next = d->musts;
       d->musts = dm;
     }
-  mp = musts;
-  for (i = 0; i <= d->tindex; ++i)
+
+  while (mp)
     {
-      freelist (mp[i].in);
-      free (mp[i].in);
-      free (mp[i].left);
-      free (mp[i].right);
-      free (mp[i].is);
+      must *prev = mp->prev;
+      freemust (mp);
+      mp = prev;
     }
-  free (mp);
 }
 
 struct dfa *