12 files changed, 761 insertions, 706 deletions
diff --git a/ChangeLog b/ChangeLog
index 06c4f84d..9b29fe9d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,38 @@
 2016-08-25         Arnold D. Robbins     <arnold@skeeve.com>
 
+	POSIX now says use strcmp for == and !=. Thanks to Chet Ramey
+	for pointing me at the change.  Make it so:
+
+	* awk.h (cmp_nodes): New 3rd param indicating strcmp, not strcoll.
+	* debug.c (cmp_val): Update call to cmp_nodes.
+	* eval.c (cmp_nodes): New 3rd param indicating strcmp, not strcoll.
+	Adjust code and all callers.
+	(scalar_cmp_t): New enum type. Used in ...
+	(cmp_scalars): ... in order to call cmp_nodes correctly.
+	* interpret.h: Use the enum type in calls to cmp_scalars.
+	* re.c (re_update): Adjust call to cmp_nodes.
+
+2016-08-25         Norihiro Tanaka      <noritnk@kcn.ne.jp>
+
+	* awk.h (struct Regexp): Remove dfa.  Now dfareg instead of it.  All
+	referers changed.
+	* re.c (research): Arrange caller of dfaexec and research.
+	* (avoid_dfa): Removed.  All callers changed.
+	* awk.h (avoid_dfa): Removed.
+
+	Other changes by Arnold Robbins:
+
+	* awk.h (struct Regexp): Change various boolean members to bool.
+	(RE_NO_FLAGS): New #define.
+	* interpret.h: Use RE_NO_FLAGS instead of zero.
+	* re.c (research): Prettify the logic a little bit.
+
+2016-08-25         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* dfa.c: Sync with grep.
+
+2016-08-25         Arnold D. Robbins     <arnold@skeeve.com>
+
 	* 4.1.4: Release tar ball made.
 
 2016-08-23         Arnold D. Robbins     <arnold@skeeve.com>
diff --git a/awk.h b/awk.h
index 15336c05..c107adb7 100644
--- a/awk.h
+++ b/awk.h
@@ -206,11 +206,10 @@ typedef struct Regexp {
 	struct re_pattern_buffer pat;
 	struct re_registers regs;
 	struct dfa *dfareg;
-	short dfa;
-	short has_anchor;	/* speed up of avoid_dfa kludge, temporary */
-	short non_empty;	/* for use in fpat_parse_field */
-	short has_meta;		/* re has meta chars so (probably) isn't simple string */
-	short maybe_long;	/* re has meta chars that can match long text */
+	bool has_anchor;	/* re has anchors which dfa avoids */
+	bool non_empty;		/* for use in fpat_parse_field */
+	bool has_meta;		/* re has meta chars so (probably) isn't simple string */
+	bool maybe_long;	/* re has meta chars that can match long text */
 } Regexp;
 #define	RESTART(rp,s)	(rp)->regs.start[0]
 #define	REEND(rp,s)	(rp)->regs.end[0]
@@ -219,6 +218,7 @@ typedef struct Regexp {
 #define	NUMSUBPATS(rp,s)	(rp)->regs.num_regs
 
 /* regexp matching flags: */
+#define RE_NO_FLAGS	0	/* empty flags */
 #define RE_NEED_START	1	/* need to know start/end of match */
 #define RE_NO_BOL	2	/* not allowed to match ^ in regexp */
 
@@ -1442,7 +1442,7 @@ extern int sanitize_exit_status(int status);
 extern void PUSH_CODE(INSTRUCTION *cp);
 extern INSTRUCTION *POP_CODE(void);
 extern void init_interpret(void);
-extern int cmp_nodes(NODE *t1, NODE *t2);
+extern int cmp_nodes(NODE *t1, NODE *t2, bool use_strcmp);
 extern int cmp_awknums(const NODE *t1, const NODE *t2);
 extern void set_IGNORECASE(void);
 extern void set_OFS(void);
@@ -1651,7 +1651,6 @@ extern void reg_error(const char *s);
 extern Regexp *re_update(NODE *t);
 extern void resyntax(int syntax);
 extern void resetup(void);
-extern int avoid_dfa(NODE *re, char *str, size_t len);
 extern int reisstring(const char *text, size_t len, Regexp *re, const char *buf);
 extern int get_numbase(const char *str, bool use_locale);
 
diff --git a/debug.c b/debug.c
index f4640adb..9f2d948b 100644
--- a/debug.c
+++ b/debug.c
@@ -1670,7 +1670,7 @@ cmp_val(struct list_item *w, NODE *old, NODE *new)
 
 	if (new->type == Node_var_array)	/* 5 */
 		return true;
-	return cmp_nodes(old, new);			/* 4 */
+	return cmp_nodes(old, new, true);	/* 4 */
 }
 
 /* watchpoint_triggered --- check if we should stop at this watchpoint;
diff --git a/dfa.c b/dfa.c
index cb11043e..85cb46ad 100644
--- a/dfa.c
+++ b/dfa.c
@@ -387,8 +387,8 @@ struct regex_syntax
    meaning of the @#%!@#%^!@ syntax bits.  */
 struct lexer_state
 {
-  char const *lexptr;	/* Pointer to next input character.  */
-  size_t lexleft;	/* Number of characters remaining.  */
+  char const *ptr;	/* Pointer to next input character.  */
+  size_t left;		/* Number of characters remaining.  */
   token lasttok;	/* Previous token returned; initially END.  */
   size_t parens;	/* Count of outstanding left parens.  */
   int minrep, maxrep;	/* Repeat counts for {m,n}.  */
@@ -429,10 +429,10 @@ struct dfa
   size_t calloc;                /* Number of charclasses allocated.  */
 
   /* Scanner state */
-  struct lexer_state lexstate;
+  struct lexer_state lex;
 
   /* Parser state */
-  struct parser_state parsestate;
+  struct parser_state parse;
 
   /* Fields filled by the parser.  */
   token *tokens;                /* Postfix parse array.  */
@@ -910,7 +910,7 @@ using_simple_locale (struct dfa const *dfa)
      && '}' == 125 && '~' == 126)
   };
 
-  return (!native_c_charset || dfa->multibyte) ? false : unibyte_c;
+  return (native_c_charset & !dfa->multibyte) | unibyte_c;
 }
 
 /* Fetch the next lexical input character.  Set C (of type int) to the
@@ -922,23 +922,23 @@ using_simple_locale (struct dfa const *dfa)
    otherwise.  */
 # define FETCH_WC(dfa, c, wc, eoferr)		\
   do {						\
-    if (! dfa->lexstate.lexleft)		\
+    if (! (dfa)->lex.left)			\
       {						\
         if ((eoferr) != 0)			\
           dfaerror (eoferr);			\
         else					\
-          return dfa->lexstate.lasttok = END;	\
+          return (dfa)->lex.lasttok = END;	\
       }						\
     else					\
       {						\
         wint_t _wc;				\
-        size_t nbytes = mbs_to_wchar (&_wc, dfa->lexstate.lexptr, \
-                                      dfa->lexstate.lexleft, dfa); \
-        dfa->lexstate.cur_mb_len = nbytes;	\
+        size_t nbytes = mbs_to_wchar (&_wc, (dfa)->lex.ptr, \
+                                      (dfa)->lex.left, dfa); \
+        (dfa)->lex.cur_mb_len = nbytes;		\
         (wc) = _wc;				\
-        (c) = nbytes == 1 ? to_uchar (*dfa->lexstate.lexptr) : EOF; \
-        dfa->lexstate.lexptr += nbytes;		\
-        dfa->lexstate.lexleft -= nbytes;	\
+        (c) = nbytes == 1 ? to_uchar ((dfa)->lex.ptr[0]) : EOF; \
+        (dfa)->lex.ptr += nbytes;		\
+        (dfa)->lex.left -= nbytes;		\
       }						\
   } while (false)
 
@@ -1112,8 +1112,8 @@ parse_bracket_exp (struct dfa *dfa)
               for (;;)
                 {
                   FETCH_WC (dfa, c, wc, _("unbalanced ["));
-                  if ((c == c1 && *dfa->lexstate.lexptr == ']')
-                      || dfa->lexstate.lexleft == 0)
+                  if (dfa->lex.left == 0
+                      || (c == c1 && dfa->lex.ptr[0] == ']'))
                     break;
                   if (len < MAX_BRACKET_STRING_LEN)
                     str[len++] = c;
@@ -1133,8 +1133,8 @@ parse_bracket_exp (struct dfa *dfa)
                 {
                   char const *class
                     = (dfa->syntax.case_fold && (STREQ (str, "upper")
-                                                 || STREQ (str, "lower")) ?
-                                                      "alpha" : str);
+                                                 || STREQ (str, "lower"))
+                       ? "alpha" : str);
                   const struct dfa_ctype *pred = find_pred (class);
                   if (!pred)
                     dfaerror (_("invalid character class"));
@@ -1174,7 +1174,7 @@ parse_bracket_exp (struct dfa *dfa)
           /* A bracket expression like [a-[.aa.]] matches an unknown set.
              Treat it like [-a[.aa.]] while parsing it, and
              remember that the set is unknown.  */
-          if (c2 == '[' && *dfa->lexstate.lexptr == '.')
+          if (c2 == '[' && dfa->lex.ptr[0] == '.')
             {
               known_bracket_exp = false;
               c2 = ']';
@@ -1184,8 +1184,8 @@ parse_bracket_exp (struct dfa *dfa)
             {
               /* In the case [x-], the - is an ordinary hyphen,
                  which is left in c1, the lookahead character.  */
-              dfa->lexstate.lexptr -= dfa->lexstate.cur_mb_len;
-              dfa->lexstate.lexleft += dfa->lexstate.cur_mb_len;
+              dfa->lex.ptr -= dfa->lex.cur_mb_len;
+              dfa->lex.left += dfa->lex.cur_mb_len;
             }
           else
             {
@@ -1283,19 +1283,27 @@ parse_bracket_exp (struct dfa *dfa)
   return CSET + dfa_charclass_index (dfa, ccl);
 }
 
-#define PUSH_LEX_STATE(s)			\
-  do						\
-    {						\
-      char const *lexptr_saved = dfa->lexstate.lexptr;	\
-      size_t lexleft_saved = dfa->lexstate.lexleft;		\
-      dfa->lexstate.lexptr = (s);				\
-      dfa->lexstate.lexleft = strlen (dfa->lexstate.lexptr)
+struct lexptr
+{
+  char const *ptr;
+  size_t left;
+};
+
+static void
+push_lex_state (struct dfa *dfa, struct lexptr *ls, char const *s)
+{
+  ls->ptr = dfa->lex.ptr;
+  ls->left = dfa->lex.left;
+  dfa->lex.ptr = s;
+  dfa->lex.left = strlen (s);
+}
 
-#define POP_LEX_STATE()				\
-      dfa->lexstate.lexptr = lexptr_saved;			\
-      dfa->lexstate.lexleft = lexleft_saved;			\
-    }						\
-  while (false)
+static void
+pop_lex_state (struct dfa *dfa, struct lexptr const *ls)
+{
+  dfa->lex.ptr = ls->ptr;
+  dfa->lex.left = ls->left;
+}
 
 static token
 lex (struct dfa *dfa)
@@ -1313,14 +1321,14 @@ lex (struct dfa *dfa)
      "if (backslash) ...".  */
   for (i = 0; i < 2; ++i)
     {
-      FETCH_WC (dfa, c, dfa->lexstate.wctok, NULL);
+      FETCH_WC (dfa, c, dfa->lex.wctok, NULL);
 
       switch (c)
         {
         case '\\':
           if (backslash)
             goto normal_char;
-          if (dfa->lexstate.lexleft == 0)
+          if (dfa->lex.left == 0)
             dfaerror (_("unfinished \\ escape"));
           backslash = true;
           break;
@@ -1329,28 +1337,29 @@ lex (struct dfa *dfa)
           if (backslash)
             goto normal_char;
           if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS
-              || dfa->lexstate.lasttok == END || dfa->lexstate.lasttok == LPAREN
-              || dfa->lexstate.lasttok == OR)
-            return dfa->lexstate.lasttok = BEGLINE;
+              || dfa->lex.lasttok == END || dfa->lex.lasttok == LPAREN
+              || dfa->lex.lasttok == OR)
+            return dfa->lex.lasttok = BEGLINE;
           goto normal_char;
 
         case '$':
           if (backslash)
             goto normal_char;
           if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS
-              || dfa->lexstate.lexleft == 0
-              || (dfa->syntax.syntax_bits & RE_NO_BK_PARENS
-                  ? dfa->lexstate.lexleft > 0 && *dfa->lexstate.lexptr == ')'
-                  : dfa->lexstate.lexleft > 1 && dfa->lexstate.lexptr[0] == '\\'
-                    && dfa->lexstate.lexptr[1] == ')')
-              || (dfa->syntax.syntax_bits & RE_NO_BK_VBAR
-                  ? dfa->lexstate.lexleft > 0 && *dfa->lexstate.lexptr == '|'
-                  : dfa->lexstate.lexleft > 1 && dfa->lexstate.lexptr[0] == '\\'
-                    && dfa->lexstate.lexptr[1] == '|')
+              || dfa->lex.left == 0
+              || ((dfa->lex.left
+                   > !(dfa->syntax.syntax_bits & RE_NO_BK_PARENS))
+                  && (dfa->lex.ptr[!(dfa->syntax.syntax_bits & RE_NO_BK_PARENS)
+                                   & (dfa->lex.ptr[0] == '\\')]
+                      == ')'))
+              || ((dfa->lex.left
+                   > !(dfa->syntax.syntax_bits & RE_NO_BK_VBAR))
+                  && (dfa->lex.ptr[!(dfa->syntax.syntax_bits & RE_NO_BK_VBAR)
+                                   & (dfa->lex.ptr[0] == '\\')]
+                      == '|'))
               || ((dfa->syntax.syntax_bits & RE_NEWLINE_ALT)
-                  && dfa->lexstate.lexleft > 0
-                  && *dfa->lexstate.lexptr == '\n'))
-            return dfa->lexstate.lasttok = ENDLINE;
+                  && dfa->lex.left > 0 && dfa->lex.ptr[0] == '\n'))
+            return dfa->lex.lasttok = ENDLINE;
           goto normal_char;
 
         case '1':
@@ -1364,8 +1373,8 @@ lex (struct dfa *dfa)
         case '9':
           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_BK_REFS))
             {
-              dfa->lexstate.laststart = false;
-              return dfa->lexstate.lasttok = BACKREF;
+              dfa->lex.laststart = false;
+              return dfa->lex.lasttok = BACKREF;
             }
           goto normal_char;
 
@@ -1373,7 +1382,7 @@ lex (struct dfa *dfa)
           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
             {
               /* FIXME: should be beginning of string */
-              return dfa->lexstate.lasttok = BEGLINE;
+              return dfa->lex.lasttok = BEGLINE;
             }
           goto normal_char;
 
@@ -1381,28 +1390,28 @@ lex (struct dfa *dfa)
           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
             {
               /* FIXME: should be end of string */
-              return dfa->lexstate.lasttok = ENDLINE;
+              return dfa->lex.lasttok = ENDLINE;
             }
           goto normal_char;
 
         case '<':
           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
-            return dfa->lexstate.lasttok = BEGWORD;
+            return dfa->lex.lasttok = BEGWORD;
           goto normal_char;
 
         case '>':
           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
-            return dfa->lexstate.lasttok = ENDWORD;
+            return dfa->lex.lasttok = ENDWORD;
           goto normal_char;
 
         case 'b':
           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
-            return dfa->lexstate.lasttok = LIMWORD;
+            return dfa->lex.lasttok = LIMWORD;
           goto normal_char;
 
         case 'B':
           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
-            return dfa->lexstate.lasttok = NOTLIMWORD;
+            return dfa->lex.lasttok = NOTLIMWORD;
           goto normal_char;
 
         case '?':
@@ -1411,17 +1420,17 @@ lex (struct dfa *dfa)
           if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
             goto normal_char;
           if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
-              && dfa->lexstate.laststart)
+              && dfa->lex.laststart)
             goto normal_char;
-          return dfa->lexstate.lasttok = QMARK;
+          return dfa->lex.lasttok = QMARK;
 
         case '*':
           if (backslash)
             goto normal_char;
           if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
-              && dfa->lexstate.laststart)
+              && dfa->lex.laststart)
             goto normal_char;
-          return dfa->lexstate.lasttok = STAR;
+          return dfa->lex.lasttok = STAR;
 
         case '+':
           if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
@@ -1429,9 +1438,9 @@ lex (struct dfa *dfa)
           if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
             goto normal_char;
           if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
-              && dfa->lexstate.laststart)
+              && dfa->lex.laststart)
             goto normal_char;
-          return dfa->lexstate.lasttok = PLUS;
+          return dfa->lex.lasttok = PLUS;
 
         case '{':
           if (!(dfa->syntax.syntax_bits & RE_INTERVALS))
@@ -1439,7 +1448,7 @@ lex (struct dfa *dfa)
           if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_BRACES) == 0))
             goto normal_char;
           if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
-              && dfa->lexstate.laststart)
+              && dfa->lex.laststart)
             goto normal_char;
 
           /* Cases:
@@ -1449,86 +1458,79 @@ lex (struct dfa *dfa)
              {,} - 0 to infinity (same as '*')
              {M,N} - M through N */
           {
-            char const *p = dfa->lexstate.lexptr;
-            char const *lim = p + dfa->lexstate.lexleft;
-            dfa->lexstate.minrep = dfa->lexstate.maxrep = -1;
+            char const *p = dfa->lex.ptr;
+            char const *lim = p + dfa->lex.left;
+            dfa->lex.minrep = dfa->lex.maxrep = -1;
             for (; p != lim && ISASCIIDIGIT (*p); p++)
-              {
-                if (dfa->lexstate.minrep < 0)
-                  dfa->lexstate.minrep = *p - '0';
-                else
-                  dfa->lexstate.minrep = MIN (RE_DUP_MAX + 1,
-                                              (dfa->lexstate.minrep
-                                               * 10 + *p - '0'));
-              }
+              dfa->lex.minrep = (dfa->lex.minrep < 0
+                                 ? *p - '0'
+                                 : MIN (RE_DUP_MAX + 1,
+                                        dfa->lex.minrep * 10 + *p - '0'));
             if (p != lim)
               {
                 if (*p != ',')
-                  dfa->lexstate.maxrep = dfa->lexstate.minrep;
+                  dfa->lex.maxrep = dfa->lex.minrep;
                 else
                   {
-                    if (dfa->lexstate.minrep < 0)
-                      dfa->lexstate.minrep = 0;
+                    if (dfa->lex.minrep < 0)
+                      dfa->lex.minrep = 0;
                     while (++p != lim && ISASCIIDIGIT (*p))
-                      {
-                        if (dfa->lexstate.maxrep < 0)
-                          dfa->lexstate.maxrep = *p - '0';
-                        else
-                          dfa->lexstate.maxrep = MIN (RE_DUP_MAX + 1,
-                                                      (dfa->lexstate.maxrep
-                                                       * 10 + *p - '0'));
-                      }
+                      dfa->lex.maxrep
+                        = (dfa->lex.maxrep < 0
+                           ? *p - '0'
+                           : MIN (RE_DUP_MAX + 1,
+                                  dfa->lex.maxrep * 10 + *p - '0'));
                   }
               }
             if (! ((! backslash || (p != lim && *p++ == '\\'))
                    && p != lim && *p++ == '}'
-                   && 0 <= dfa->lexstate.minrep
-                   && (dfa->lexstate.maxrep < 0
-                       || dfa->lexstate.minrep <= dfa->lexstate.maxrep)))
+                   && 0 <= dfa->lex.minrep
+                   && (dfa->lex.maxrep < 0
+                       || dfa->lex.minrep <= dfa->lex.maxrep)))
               {
                 if (dfa->syntax.syntax_bits & RE_INVALID_INTERVAL_ORD)
                   goto normal_char;
                 dfaerror (_("invalid content of \\{\\}"));
               }
-            if (RE_DUP_MAX < dfa->lexstate.maxrep)
+            if (RE_DUP_MAX < dfa->lex.maxrep)
               dfaerror (_("regular expression too big"));
-            dfa->lexstate.lexptr = p;
-            dfa->lexstate.lexleft = lim - p;
+            dfa->lex.ptr = p;
+            dfa->lex.left = lim - p;
           }
-          dfa->lexstate.laststart = false;
-          return dfa->lexstate.lasttok = REPMN;
+          dfa->lex.laststart = false;
+          return dfa->lex.lasttok = REPMN;
 
         case '|':
           if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
             goto normal_char;
           if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_VBAR) == 0))
             goto normal_char;
-          dfa->lexstate.laststart = true;
-          return dfa->lexstate.lasttok = OR;
+          dfa->lex.laststart = true;
+          return dfa->lex.lasttok = OR;
 
         case '\n':
           if (dfa->syntax.syntax_bits & RE_LIMITED_OPS
               || backslash || !(dfa->syntax.syntax_bits & RE_NEWLINE_ALT))
             goto normal_char;
-          dfa->lexstate.laststart = true;
-          return dfa->lexstate.lasttok = OR;
+          dfa->lex.laststart = true;
+          return dfa->lex.lasttok = OR;
 
         case '(':
           if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0))
             goto normal_char;
-          ++dfa->lexstate.parens;
-          dfa->lexstate.laststart = true;
-          return dfa->lexstate.lasttok = LPAREN;
+          dfa->lex.parens++;
+          dfa->lex.laststart = true;
+          return dfa->lex.lasttok = LPAREN;
 
         case ')':
           if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0))
             goto normal_char;
-          if (dfa->lexstate.parens == 0
+          if (dfa->lex.parens == 0
               && dfa->syntax.syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
             goto normal_char;
-          --dfa->lexstate.parens;
-          dfa->lexstate.laststart = false;
-          return dfa->lexstate.lasttok = RPAREN;
+          dfa->lex.parens--;
+          dfa->lex.laststart = false;
+          return dfa->lex.lasttok = RPAREN;
 
         case '.':
           if (backslash)
@@ -1537,8 +1539,8 @@ lex (struct dfa *dfa)
             {
               /* In multibyte environment period must match with a single
                  character not a byte.  So we use ANYCHAR.  */
-              dfa->lexstate.laststart = false;
-              return dfa->lexstate.lasttok = ANYCHAR;
+              dfa->lex.laststart = false;
+              return dfa->lex.lasttok = ANYCHAR;
             }
           zeroset (ccl);
           notset (ccl);
@@ -1546,8 +1548,8 @@ lex (struct dfa *dfa)
             clrbit ('\n', ccl);
           if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL)
             clrbit ('\0', ccl);
-          dfa->lexstate.laststart = false;
-          return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa, ccl);
+          dfa->lex.laststart = false;
+          return dfa->lex.lasttok = CSET + dfa_charclass_index (dfa, ccl);
 
         case 's':
         case 'S':
@@ -1561,9 +1563,8 @@ lex (struct dfa *dfa)
                   setbit (c2, ccl);
               if (c == 'S')
                 notset (ccl);
-              dfa->lexstate.laststart = false;
-              return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa,
-                                                                         ccl);
+              dfa->lex.laststart = false;
+              return dfa->lex.lasttok = CSET + dfa_charclass_index (dfa, ccl);
             }
 
           /* FIXME: see if optimizing this, as is done with ANYCHAR and
@@ -1572,14 +1573,15 @@ lex (struct dfa *dfa)
           /* \s and \S are documented to be equivalent to [[:space:]] and
              [^[:space:]] respectively, so tell the lexer to process those
              strings, each minus its "already processed" '['.  */
-          PUSH_LEX_STATE (c == 's' ? "[:space:]]" : "^[:space:]]");
-
-          dfa->lexstate.lasttok = parse_bracket_exp (dfa);
-
-          POP_LEX_STATE ();
+          {
+            struct lexptr ls;
+            push_lex_state (dfa, &ls, &"^[:space:]]"[c == 's']);
+            dfa->lex.lasttok = parse_bracket_exp (dfa);
+            pop_lex_state (dfa, &ls);
+          }
 
-          dfa->lexstate.laststart = false;
-          return dfa->lexstate.lasttok;
+          dfa->lex.laststart = false;
+          return dfa->lex.lasttok;
 
         case 'w':
         case 'W':
@@ -1594,9 +1596,8 @@ lex (struct dfa *dfa)
                   setbit (c2, ccl);
               if (c == 'W')
                 notset (ccl);
-              dfa->lexstate.laststart = false;
-              return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa,
-                                                                         ccl);
+              dfa->lex.laststart = false;
+              return dfa->lex.lasttok = CSET + dfa_charclass_index (dfa, ccl);
             }
 
           /* FIXME: see if optimizing this, as is done with ANYCHAR and
@@ -1605,38 +1606,38 @@ lex (struct dfa *dfa)
           /* \w and \W are documented to be equivalent to [_[:alnum:]] and
              [^_[:alnum:]] respectively, so tell the lexer to process those
              strings, each minus its "already processed" '['.  */
-          PUSH_LEX_STATE (c == 'w' ? "_[:alnum:]]" : "^_[:alnum:]]");
-
-          dfa->lexstate.lasttok = parse_bracket_exp (dfa);
-
-          POP_LEX_STATE ();
+          {
+            struct lexptr ls;
+            push_lex_state (dfa, &ls, &"^_[:alnum:]]"[c == 'w']);
+            dfa->lex.lasttok = parse_bracket_exp (dfa);
+            pop_lex_state (dfa, &ls);
+          }
 
-          dfa->lexstate.laststart = false;
-          return dfa->lexstate.lasttok;
+          dfa->lex.laststart = false;
+          return dfa->lex.lasttok;
 
         case '[':
           if (backslash)
             goto normal_char;
-          dfa->lexstate.laststart = false;
-          return dfa->lexstate.lasttok = parse_bracket_exp (dfa);
+          dfa->lex.laststart = false;
+          return dfa->lex.lasttok = parse_bracket_exp (dfa);
 
         default:
         normal_char:
-          dfa->lexstate.laststart = false;
+          dfa->lex.laststart = false;
           /* For multibyte character sets, folding is done in atom.  Always
              return WCHAR.  */
           if (dfa->multibyte)
-            return dfa->lexstate.lasttok = WCHAR;
+            return dfa->lex.lasttok = WCHAR;
 
           if (dfa->syntax.case_fold && isalpha (c))
             {
               zeroset (ccl);
               setbit_case_fold_c (c, ccl);
-              return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa,
-                                                                         ccl);
+              return dfa->lex.lasttok = CSET + dfa_charclass_index (dfa, ccl);
             }
 
-          return dfa->lexstate.lasttok = c;
+          return dfa->lex.lasttok = c;
         }
     }
 
@@ -1670,21 +1671,21 @@ addtok_mb (struct dfa *dfa, token t, int mbprop)
 
     case CAT:
     case OR:
-      --dfa->parsestate.depth;
+      dfa->parse.depth--;
       break;
 
     case BACKREF:
       dfa->fast = false;
       /* fallthrough */
     default:
-      ++dfa->nleaves;
+      dfa->nleaves++;
       /* fallthrough */
     case EMPTY:
-      ++dfa->parsestate.depth;
+      dfa->parse.depth++;
       break;
     }
-  if (dfa->parsestate.depth > dfa->depth)
-    dfa->depth = dfa->parsestate.depth;
+  if (dfa->parse.depth > dfa->depth)
+    dfa->depth = dfa->parse.depth;
 }
 
 static void addtok_wc (struct dfa *dfa, wint_t wc);
@@ -1741,19 +1742,19 @@ addtok_wc (struct dfa *dfa, wint_t wc)
   size_t stored_bytes = wcrtomb ((char *) buf, wc, &s);
 
   if (stored_bytes != (size_t) -1)
-    dfa->lexstate.cur_mb_len = stored_bytes;
+    dfa->lex.cur_mb_len = stored_bytes;
   else
     {
       /* This is merely stop-gap.  buf[0] is undefined, yet skipping
          the addtok_mb call altogether can corrupt the heap.  */
-      dfa->lexstate.cur_mb_len = 1;
+      dfa->lex.cur_mb_len = 1;
       buf[0] = 0;
     }
 
-  addtok_mb (dfa, buf[0], dfa->lexstate.cur_mb_len == 1 ? 3 : 1);
-  for (i = 1; i < dfa->lexstate.cur_mb_len; i++)
+  addtok_mb (dfa, buf[0], dfa->lex.cur_mb_len == 1 ? 3 : 1);
+  for (i = 1; i < dfa->lex.cur_mb_len; i++)
     {
-      addtok_mb (dfa, buf[i], i == dfa->lexstate.cur_mb_len - 1 ? 2 : 0);
+      addtok_mb (dfa, buf[i], i == dfa->lex.cur_mb_len - 1 ? 2 : 0);
       addtok (dfa, CAT);
     }
 }
@@ -1854,18 +1855,18 @@ add_utf8_anychar (struct dfa *dfa)
 static void
 atom (struct dfa *dfa)
 {
-  if (dfa->parsestate.tok == WCHAR)
+  if (dfa->parse.tok == WCHAR)
     {
-      if (dfa->lexstate.wctok == WEOF)
+      if (dfa->lex.wctok == WEOF)
         addtok (dfa, BACKREF);
       else
         {
-          addtok_wc (dfa, dfa->lexstate.wctok);
+          addtok_wc (dfa, dfa->lex.wctok);
 
           if (dfa->syntax.case_fold)
             {
               wchar_t folded[CASE_FOLDED_BUFSIZE];
-              unsigned int i, n = case_folded_counterparts (dfa->lexstate.wctok,
+              unsigned int i, n = case_folded_counterparts (dfa->lex.wctok,
                                                             folded);
               for (i = 0; i < n; i++)
                 {
@@ -1875,9 +1876,9 @@ atom (struct dfa *dfa)
             }
         }
 
-      dfa->parsestate.tok = lex (dfa);
+      dfa->parse.tok = lex (dfa);
     }
-  else if (dfa->parsestate.tok == ANYCHAR && using_utf8)
+  else if (dfa->parse.tok == ANYCHAR && using_utf8)
     {
       /* For UTF-8 expand the period to a series of CSETs that define a valid
          UTF-8 character.  This avoids using the slow multibyte path.  I'm
@@ -1887,26 +1888,25 @@ atom (struct dfa *dfa)
          UTF-8: it is the most used, and the structure of the encoding
          makes the correctness more obvious.  */
       add_utf8_anychar (dfa);
-      dfa->parsestate.tok = lex (dfa);
+      dfa->parse.tok = lex (dfa);
     }
-  else if ((dfa->parsestate.tok >= 0 && dfa->parsestate.tok < NOTCHAR)
-           || dfa->parsestate.tok >= CSET || dfa->parsestate.tok == BACKREF
-           || dfa->parsestate.tok == BEGLINE || dfa->parsestate.tok == ENDLINE
-           || dfa->parsestate.tok == BEGWORD || dfa->parsestate.tok == ANYCHAR
-           || dfa->parsestate.tok == MBCSET || dfa->parsestate.tok == ENDWORD
-           || dfa->parsestate.tok == LIMWORD
-           || dfa->parsestate.tok == NOTLIMWORD)
+  else if ((0 <= dfa->parse.tok && dfa->parse.tok < NOTCHAR)
+           || dfa->parse.tok >= CSET || dfa->parse.tok == BACKREF
+           || dfa->parse.tok == BEGLINE || dfa->parse.tok == ENDLINE
+           || dfa->parse.tok == BEGWORD || dfa->parse.tok == ANYCHAR
+           || dfa->parse.tok == MBCSET || dfa->parse.tok == ENDWORD
+           || dfa->parse.tok == LIMWORD || dfa->parse.tok == NOTLIMWORD)
     {
-      addtok (dfa, dfa->parsestate.tok);
-      dfa->parsestate.tok = lex (dfa);
+      addtok (dfa, dfa->parse.tok);
+      dfa->parse.tok = lex (dfa);
     }
-  else if (dfa->parsestate.tok == LPAREN)
+  else if (dfa->parse.tok == LPAREN)
     {
-      dfa->parsestate.tok = lex (dfa);
+      dfa->parse.tok = lex (dfa);
       regexp (dfa);
-      if (dfa->parsestate.tok != RPAREN)
+      if (dfa->parse.tok != RPAREN)
         dfaerror (_("unbalanced ("));
-      dfa->parsestate.tok = lex (dfa);
+      dfa->parse.tok = lex (dfa);
     }
   else
     addtok (dfa, EMPTY);
@@ -1954,40 +1954,39 @@ closure (struct dfa *dfa)
   size_t tindex, ntokens;
 
   atom (dfa);
-  while (dfa->parsestate.tok == QMARK || dfa->parsestate.tok == STAR
-         || dfa->parsestate.tok == PLUS || dfa->parsestate.tok == REPMN)
-    if (dfa->parsestate.tok == REPMN
-        && (dfa->lexstate.minrep || dfa->lexstate.maxrep))
+  while (dfa->parse.tok == QMARK || dfa->parse.tok == STAR
+         || dfa->parse.tok == PLUS || dfa->parse.tok == REPMN)
+    if (dfa->parse.tok == REPMN && (dfa->lex.minrep || dfa->lex.maxrep))
       {
         ntokens = nsubtoks (dfa, dfa->tindex);
         tindex = dfa->tindex - ntokens;
-        if (dfa->lexstate.maxrep < 0)
+        if (dfa->lex.maxrep < 0)
           addtok (dfa, PLUS);
-        if (dfa->lexstate.minrep == 0)
+        if (dfa->lex.minrep == 0)
           addtok (dfa, QMARK);
-        for (i = 1; i < dfa->lexstate.minrep; ++i)
+        for (i = 1; i < dfa->lex.minrep; i++)
           {
             copytoks (dfa, tindex, ntokens);
             addtok (dfa, CAT);
           }
-        for (; i < dfa->lexstate.maxrep; ++i)
+        for (; i < dfa->lex.maxrep; i++)
           {
             copytoks (dfa, tindex, ntokens);
             addtok (dfa, QMARK);
             addtok (dfa, CAT);
           }
-        dfa->parsestate.tok = lex (dfa);
+        dfa->parse.tok = lex (dfa);
       }
-    else if (dfa->parsestate.tok == REPMN)
+    else if (dfa->parse.tok == REPMN)
       {
         dfa->tindex -= nsubtoks (dfa, dfa->tindex);
-        dfa->parsestate.tok = lex (dfa);
+        dfa->parse.tok = lex (dfa);
         closure (dfa);
       }
     else
       {
-        addtok (dfa, dfa->parsestate.tok);
-        dfa->parsestate.tok = lex (dfa);
+        addtok (dfa, dfa->parse.tok);
+        dfa->parse.tok = lex (dfa);
       }
 }
 
@@ -1995,8 +1994,8 @@ static void
 branch (struct dfa* dfa)
 {
   closure (dfa);
-  while (dfa->parsestate.tok != RPAREN && dfa->parsestate.tok != OR
-         && dfa->parsestate.tok >= 0)
+  while (dfa->parse.tok != RPAREN && dfa->parse.tok != OR
+         && dfa->parse.tok >= 0)
     {
       closure (dfa);
       addtok (dfa, CAT);
@@ -2007,9 +2006,9 @@ static void
 regexp (struct dfa *dfa)
 {
   branch (dfa);
-  while (dfa->parsestate.tok == OR)
+  while (dfa->parse.tok == OR)
     {
-      dfa->parsestate.tok = lex (dfa);
+      dfa->parse.tok = lex (dfa);
       branch (dfa);
       addtok (dfa, OR);
     }
@@ -2021,26 +2020,26 @@ regexp (struct dfa *dfa)
 static void
 dfaparse (char const *s, size_t len, struct dfa *d)
 {
-  d->lexstate.lexptr = s;
-  d->lexstate.lexleft = len;
-  d->lexstate.lasttok = END;
-  d->lexstate.laststart = true;
-  d->lexstate.parens = 0;
+  d->lex.ptr = s;
+  d->lex.left = len;
+  d->lex.lasttok = END;
+  d->lex.laststart = true;
+  d->lex.parens = 0;
   if (d->multibyte)
     {
-      d->lexstate.cur_mb_len = 0;
+      d->lex.cur_mb_len = 0;
       memset (&d->mbs, 0, sizeof d->mbs);
     }
 
   if (!d->syntax.syntax_bits_set)
     dfaerror (_("no syntax specified"));
 
-  d->parsestate.tok = lex (d);
-  d->parsestate.depth = d->depth;
+  d->parse.tok = lex (d);
+  d->parse.depth = d->depth;
 
   regexp (d);
 
-  if (d->parsestate.tok != END)
+  if (d->parse.tok != END)
     dfaerror (_("unbalanced )"));
 
   addtok (d, END - d->nregexps);
@@ -3990,11 +3989,9 @@ dfamust (struct dfa const *d)
   bool exact = false;
   bool begline = false;
   bool endline = false;
-  size_t rj;
   bool need_begline = false;
   bool need_endline = false;
   bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1;
-  struct dfamust *dm;
 
   for (ri = 0; ri < d->tindex; ++ri)
     {
@@ -4171,7 +4168,7 @@ dfamust (struct dfa const *d)
                 }
             }
 
-          rj = ri + 2;
+          size_t rj = ri + 2;
           if (d->tokens[ri + 1] == CAT)
             {
               for (; rj < d->tindex - 1; rj += 2)
@@ -4200,7 +4197,7 @@ dfamust (struct dfa const *d)
     }
  done:;
 
-  dm = NULL;
+  struct dfamust *dm = NULL;
   if (*result)
     {
       dm = xmalloc (sizeof *dm);
@@ -4230,11 +4227,11 @@ dfamustfree (struct dfamust *dm)
 struct dfa *
 dfaalloc (void)
 {
-  struct dfa *d = xcalloc (1, sizeof (struct dfa));
+  struct dfa *d = xzalloc (sizeof *d);
   d->multibyte = MB_CUR_MAX > 1;
   d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb;
   d->fast = !d->multibyte;
-  d->lexstate.cur_mb_len = 1;
+  d->lex.cur_mb_len = 1;
   return d;
 }
 
diff --git a/doc/ChangeLog b/doc/ChangeLog
index e6d9bf05..efa2b561 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,5 +1,11 @@
 2016-08-25         Arnold D. Robbins     <arnold@skeeve.com>
 
+	* gawktexi.in (POSIX String Comparison): Update for new
+	spec where == and != use strcmp, rest use strcoll. Thanks to
+	Chet Ramey for pointing me at the new rules.
+
+2016-08-25         Arnold D. Robbins     <arnold@skeeve.com>
+
 	* 4.1.4: Release tar ball made.
 
 2016-08-24         Arnold D. Robbins     <arnold@skeeve.com>
diff --git a/doc/gawk.info b/doc/gawk.info
index 1766ab94..973af87c 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -8615,18 +8615,18 @@ Constant Regexps::, where this is discussed in more detail.
 
 File: gawk.info,  Node: POSIX String Comparison,  Prev: Comparison Operators,  Up: Typing and Comparison
 
-6.3.2.3 String Comparison with POSIX Rules
-..........................................
+6.3.2.3 String Comparison Based on Locale Collating Order
+.........................................................
 
-The POSIX standard says that string comparison is performed based on the
-locale's "collating order".  This is the order in which characters sort,
-as defined by the locale (for more discussion, *note Locales::).  This
-order is usually very different from the results obtained when doing
-straight character-by-character comparison.(1)
+The POSIX standard used to say that all string comparisons are performed
+based on the locale's "collating order".  This is the order in which
+characters sort, as defined by the locale (for more discussion, *note
+Locales::).  This order is usually very different from the results
+obtained when doing straight byte-by-byte comparison.(1)
 
    Because this behavior differs considerably from existing practice,
-'gawk' only implements it when in POSIX mode (*note Options::).  Here is
-an example to illustrate the difference, in an 'en_US.UTF-8' locale:
+'gawk' only implemented it when in POSIX mode (*note Options::).  Here
+is an example to illustrate the difference, in an 'en_US.UTF-8' locale:
 
      $ gawk 'BEGIN { printf("ABC < abc = %s\n",
      >                     ("ABC" < "abc" ? "TRUE" : "FALSE")) }'
@@ -8635,11 +8635,28 @@ an example to illustrate the difference, in an 'en_US.UTF-8' locale:
      >                             ("ABC" < "abc" ? "TRUE" : "FALSE")) }'
      -| ABC < abc = FALSE
 
+   Fortunately, as of August 2016, comparison based on locale collating
+order is no longer required for the '==' and '!=' operators.(2)
+However, comparison based on locales is still required for '<', '<=',
+'>', and '>='.  POSIX thus recommends as follows:
+
+     Since the '==' operator checks whether strings are identical, not
+     whether they collate equally, applications needing to check whether
+     strings collate equally can use:
+
+          a <= b && a >= b
+
+   As of version 4.2, 'gawk' continues to use locale collating order for
+'<', '<=', '>', and '>=' only in POSIX mode.
+
    ---------- Footnotes ----------
 
    (1) Technically, string comparison is supposed to behave the same way
 as if the strings were compared with the C 'strcoll()' function.
 
+   (2) See the Austin Group website
+(http://austingroupbugs.net/view.php?id=1070).
+
 
 File: gawk.info,  Node: Boolean Ops,  Next: Conditional Exp,  Prev: Typing and Comparison,  Up: Truth Values and Conditions
 
@@ -27603,7 +27620,7 @@ ranges, such that outside the '"C"' and '"POSIX"' locales, the meaning
 of range expressions was _undefined_.(3)
 
    By using this lovely technical term, the standard gives license to
-implementors to implement ranges in whatever way they choose.  The
+implementers to implement ranges in whatever way they choose.  The
 'gawk' maintainer chose to apply the pre-POSIX meaning both with the
 default regexp matching and when '--traditional' or '--posix' are used.
 In all cases 'gawk' remains POSIX-compliant.
@@ -35427,401 +35444,402 @@ Node: Variable Typing364779
 Node: Comparison Operators368403
 Ref: table-relational-ops368822
 Node: POSIX String Comparison372317
-Ref: POSIX String Comparison-Footnote-1373391
-Node: Boolean Ops373530
-Ref: Boolean Ops-Footnote-1378012
-Node: Conditional Exp378104
-Node: Function Calls379840
-Node: Precedence383717
-Node: Locales387376
-Node: Expressions Summary389008
-Node: Patterns and Actions391581
-Node: Pattern Overview392701
-Node: Regexp Patterns394378
-Node: Expression Patterns394920
-Node: Ranges398701
-Node: BEGIN/END401809
-Node: Using BEGIN/END402570
-Ref: Using BEGIN/END-Footnote-1405306
-Node: I/O And BEGIN/END405412
-Node: BEGINFILE/ENDFILE407726
-Node: Empty410633
-Node: Using Shell Variables410950
-Node: Action Overview413224
-Node: Statements415549
-Node: If Statement417397
-Node: While Statement418892
-Node: Do Statement420920
-Node: For Statement422068
-Node: Switch Statement425226
-Node: Break Statement427612
-Node: Continue Statement429704
-Node: Next Statement431531
-Node: Nextfile Statement433914
-Node: Exit Statement436566
-Node: Built-in Variables438969
-Node: User-modified440102
-Node: Auto-set447688
-Ref: Auto-set-Footnote-1462341
-Ref: Auto-set-Footnote-2462547
-Node: ARGC and ARGV462603
-Node: Pattern Action Summary466816
-Node: Arrays469246
-Node: Array Basics470575
-Node: Array Intro471419
-Ref: figure-array-elements473394
-Ref: Array Intro-Footnote-1476098
-Node: Reference to Elements476226
-Node: Assigning Elements478690
-Node: Array Example479181
-Node: Scanning an Array480940
-Node: Controlling Scanning483962
-Ref: Controlling Scanning-Footnote-1489361
-Node: Numeric Array Subscripts489677
-Node: Uninitialized Subscripts491861
-Node: Delete493480
-Ref: Delete-Footnote-1496232
-Node: Multidimensional496289
-Node: Multiscanning499384
-Node: Arrays of Arrays500975
-Node: Arrays Summary505742
-Node: Functions507835
-Node: Built-in508873
-Node: Calling Built-in509954
-Node: Numeric Functions511950
-Ref: Numeric Functions-Footnote-1516783
-Ref: Numeric Functions-Footnote-2517140
-Ref: Numeric Functions-Footnote-3517188
-Node: String Functions517460
-Ref: String Functions-Footnote-1540964
-Ref: String Functions-Footnote-2541092
-Ref: String Functions-Footnote-3541340
-Node: Gory Details541427
-Ref: table-sub-escapes543218
-Ref: table-sub-proposed544737
-Ref: table-posix-sub546100
-Ref: table-gensub-escapes547641
-Ref: Gory Details-Footnote-1548464
-Node: I/O Functions548618
-Ref: table-system-return-values555200
-Ref: I/O Functions-Footnote-1557180
-Ref: I/O Functions-Footnote-2557328
-Node: Time Functions557448
-Ref: Time Functions-Footnote-1567953
-Ref: Time Functions-Footnote-2568021
-Ref: Time Functions-Footnote-3568179
-Ref: Time Functions-Footnote-4568290
-Ref: Time Functions-Footnote-5568402
-Ref: Time Functions-Footnote-6568629
-Node: Bitwise Functions568895
-Ref: table-bitwise-ops569489
-Ref: Bitwise Functions-Footnote-1573827
-Node: Type Functions574000
-Node: I18N Functions576532
-Node: User-defined578183
-Node: Definition Syntax578988
-Ref: Definition Syntax-Footnote-1584675
-Node: Function Example584746
-Ref: Function Example-Footnote-1587668
-Node: Function Caveats587690
-Node: Calling A Function588208
-Node: Variable Scope589166
-Node: Pass By Value/Reference592160
-Node: Return Statement595659
-Node: Dynamic Typing598638
-Node: Indirect Calls599568
-Ref: Indirect Calls-Footnote-1609819
-Node: Functions Summary609947
-Node: Library Functions612652
-Ref: Library Functions-Footnote-1616259
-Ref: Library Functions-Footnote-2616402
-Node: Library Names616573
-Ref: Library Names-Footnote-1620033
-Ref: Library Names-Footnote-2620256
-Node: General Functions620342
-Node: Strtonum Function621445
-Node: Assert Function624467
-Node: Round Function627793
-Node: Cliff Random Function629334
-Node: Ordinal Functions630350
-Ref: Ordinal Functions-Footnote-1633413
-Ref: Ordinal Functions-Footnote-2633665
-Node: Join Function633875
-Ref: Join Function-Footnote-1635645
-Node: Getlocaltime Function635845
-Node: Readfile Function639587
-Node: Shell Quoting641559
-Node: Data File Management642960
-Node: Filetrans Function643592
-Node: Rewind Function647688
-Node: File Checking649594
-Ref: File Checking-Footnote-1650928
-Node: Empty Files651129
-Node: Ignoring Assigns653108
-Node: Getopt Function654658
-Ref: Getopt Function-Footnote-1666127
-Node: Passwd Functions666327
-Ref: Passwd Functions-Footnote-1675166
-Node: Group Functions675254
-Ref: Group Functions-Footnote-1683151
-Node: Walking Arrays683358
-Node: Library Functions Summary686366
-Node: Library Exercises687772
-Node: Sample Programs688237
-Node: Running Examples689007
-Node: Clones689735
-Node: Cut Program690959
-Node: Egrep Program700888
-Ref: Egrep Program-Footnote-1708400
-Node: Id Program708510
-Node: Split Program712190
-Ref: Split Program-Footnote-1715649
-Node: Tee Program715778
-Node: Uniq Program718568
-Node: Wc Program725994
-Ref: Wc Program-Footnote-1730249
-Node: Miscellaneous Programs730343
-Node: Dupword Program731556
-Node: Alarm Program733586
-Node: Translate Program738441
-Ref: Translate Program-Footnote-1743006
-Node: Labels Program743276
-Ref: Labels Program-Footnote-1746627
-Node: Word Sorting746711
-Node: History Sorting750783
-Node: Extract Program752618
-Node: Simple Sed760147
-Node: Igawk Program763221
-Ref: Igawk Program-Footnote-1777552
-Ref: Igawk Program-Footnote-2777754
-Ref: Igawk Program-Footnote-3777876
-Node: Anagram Program777991
-Node: Signature Program781053
-Node: Programs Summary782300
-Node: Programs Exercises783514
-Ref: Programs Exercises-Footnote-1787643
-Node: Advanced Features787734
-Node: Nondecimal Data789724
-Node: Array Sorting791315
-Node: Controlling Array Traversal792015
-Ref: Controlling Array Traversal-Footnote-1800382
-Node: Array Sorting Functions800500
-Ref: Array Sorting Functions-Footnote-1805591
-Node: Two-way I/O805787
-Ref: Two-way I/O-Footnote-1812337
-Ref: Two-way I/O-Footnote-2812524
-Node: TCP/IP Networking812606
-Node: Profiling815724
-Ref: Profiling-Footnote-1824217
-Node: Advanced Features Summary824540
-Node: Internationalization826384
-Node: I18N and L10N827864
-Node: Explaining gettext828551
-Ref: Explaining gettext-Footnote-1834443
-Ref: Explaining gettext-Footnote-2834628
-Node: Programmer i18n834793
-Ref: Programmer i18n-Footnote-1839648
-Node: Translator i18n839697
-Node: String Extraction840491
-Ref: String Extraction-Footnote-1841623
-Node: Printf Ordering841709
-Ref: Printf Ordering-Footnote-1844495
-Node: I18N Portability844559
-Ref: I18N Portability-Footnote-1847015
-Node: I18N Example847078
-Ref: I18N Example-Footnote-1849884
-Node: Gawk I18N849957
-Node: I18N Summary850602
-Node: Debugger851943
-Node: Debugging852965
-Node: Debugging Concepts853406
-Node: Debugging Terms855215
-Node: Awk Debugging857790
-Node: Sample Debugging Session858696
-Node: Debugger Invocation859230
-Node: Finding The Bug860616
-Node: List of Debugger Commands867094
-Node: Breakpoint Control868427
-Node: Debugger Execution Control872121
-Node: Viewing And Changing Data875483
-Node: Execution Stack878857
-Node: Debugger Info880494
-Node: Miscellaneous Debugger Commands884565
-Node: Readline Support889653
-Node: Limitations890549
-Ref: Limitations-Footnote-1894780
-Node: Debugging Summary894831
-Node: Arbitrary Precision Arithmetic896110
-Node: Computer Arithmetic897526
-Ref: table-numeric-ranges901117
-Ref: Computer Arithmetic-Footnote-1901839
-Node: Math Definitions901896
-Ref: table-ieee-formats905210
-Ref: Math Definitions-Footnote-1905813
-Node: MPFR features905918
-Node: FP Math Caution907635
-Ref: FP Math Caution-Footnote-1908707
-Node: Inexactness of computations909076
-Node: Inexact representation910036
-Node: Comparing FP Values911396
-Node: Errors accumulate912478
-Node: Getting Accuracy913911
-Node: Try To Round916621
-Node: Setting precision917520
-Ref: table-predefined-precision-strings918217
-Node: Setting the rounding mode920047
-Ref: table-gawk-rounding-modes920421
-Ref: Setting the rounding mode-Footnote-1923829
-Node: Arbitrary Precision Integers924008
-Ref: Arbitrary Precision Integers-Footnote-1928925
-Node: POSIX Floating Point Problems929074
-Ref: POSIX Floating Point Problems-Footnote-1932956
-Node: Floating point summary932994
-Node: Dynamic Extensions935184
-Node: Extension Intro936737
-Node: Plugin License938003
-Node: Extension Mechanism Outline938800
-Ref: figure-load-extension939239
-Ref: figure-register-new-function940804
-Ref: figure-call-new-function941896
-Node: Extension API Description943958
-Node: Extension API Functions Introduction945490
-Node: General Data Types950349
-Ref: General Data Types-Footnote-1956304
-Node: Memory Allocation Functions956603
-Ref: Memory Allocation Functions-Footnote-1959448
-Node: Constructor Functions959547
-Node: Registration Functions961292
-Node: Extension Functions961977
-Node: Exit Callback Functions964600
-Node: Extension Version String965850
-Node: Input Parsers966513
-Node: Output Wrappers976395
-Node: Two-way processors980907
-Node: Printing Messages983172
-Ref: Printing Messages-Footnote-1984343
-Node: Updating ERRNO984496
-Node: Requesting Values985235
-Ref: table-value-types-returned985972
-Node: Accessing Parameters986855
-Node: Symbol Table Access988090
-Node: Symbol table by name988602
-Node: Symbol table by cookie990623
-Ref: Symbol table by cookie-Footnote-1994775
-Node: Cached values994839
-Ref: Cached values-Footnote-1998346
-Node: Array Manipulation998437
-Ref: Array Manipulation-Footnote-1999528
-Node: Array Data Types999565
-Ref: Array Data Types-Footnote-11002223
-Node: Array Functions1002315
-Node: Flattening Arrays1006173
-Node: Creating Arrays1013081
-Node: Redirection API1017850
-Node: Extension API Variables1020681
-Node: Extension Versioning1021314
-Ref: gawk-api-version1021751
-Node: Extension API Informational Variables1023507
-Node: Extension API Boilerplate1024571
-Node: Finding Extensions1028385
-Node: Extension Example1028944
-Node: Internal File Description1029742
-Node: Internal File Ops1033822
-Ref: Internal File Ops-Footnote-11045584
-Node: Using Internal File Ops1045724
-Ref: Using Internal File Ops-Footnote-11048107
-Node: Extension Samples1048381
-Node: Extension Sample File Functions1049910
-Node: Extension Sample Fnmatch1057559
-Node: Extension Sample Fork1059046
-Node: Extension Sample Inplace1060264
-Node: Extension Sample Ord1063474
-Node: Extension Sample Readdir1064310
-Ref: table-readdir-file-types1065199
-Node: Extension Sample Revout1066004
-Node: Extension Sample Rev2way1066593
-Node: Extension Sample Read write array1067333
-Node: Extension Sample Readfile1069275
-Node: Extension Sample Time1070370
-Node: Extension Sample API Tests1071718
-Node: gawkextlib1072210
-Node: Extension summary1074657
-Node: Extension Exercises1078359
-Node: Language History1079857
-Node: V7/SVR3.11081513
-Node: SVR41083665
-Node: POSIX1085099
-Node: BTL1086478
-Node: POSIX/GNU1087207
-Node: Feature History1093069
-Node: Common Extensions1107439
-Node: Ranges and Locales1108722
-Ref: Ranges and Locales-Footnote-11113338
-Ref: Ranges and Locales-Footnote-21113365
-Ref: Ranges and Locales-Footnote-31113600
-Node: Contributors1113821
-Node: History summary1119381
-Node: Installation1120761
-Node: Gawk Distribution1121705
-Node: Getting1122189
-Node: Extracting1123150
-Node: Distribution contents1124788
-Node: Unix Installation1130882
-Node: Quick Installation1131564
-Node: Shell Startup Files1133978
-Node: Additional Configuration Options1135056
-Node: Configuration Philosophy1136861
-Node: Non-Unix Installation1139230
-Node: PC Installation1139688
-Node: PC Binary Installation1141008
-Node: PC Compiling1142860
-Ref: PC Compiling-Footnote-11145654
-Node: PC Testing1145763
-Node: PC Using1146943
-Ref: PC Using-Footnote-11151096
-Node: Cygwin1151169
-Node: MSYS1151939
-Node: VMS Installation1152440
-Node: VMS Compilation1153231
-Ref: VMS Compilation-Footnote-11154460
-Node: VMS Dynamic Extensions1154518
-Node: VMS Installation Details1156203
-Node: VMS Running1158456
-Node: VMS GNV1162735
-Node: VMS Old Gawk1163470
-Node: Bugs1163941
-Node: Other Versions1168256
-Node: Installation summary1174840
-Node: Notes1175891
-Node: Compatibility Mode1176756
-Node: Additions1177538
-Node: Accessing The Source1178463
-Node: Adding Code1179898
-Node: New Ports1186117
-Node: Derived Files1190605
-Ref: Derived Files-Footnote-11196090
-Ref: Derived Files-Footnote-21196125
-Ref: Derived Files-Footnote-31196723
-Node: Future Extensions1196837
-Node: Implementation Limitations1197495
-Node: Extension Design1198678
-Node: Old Extension Problems1199832
-Ref: Old Extension Problems-Footnote-11201350
-Node: Extension New Mechanism Goals1201407
-Ref: Extension New Mechanism Goals-Footnote-11204771
-Node: Extension Other Design Decisions1204960
-Node: Extension Future Growth1207073
-Node: Old Extension Mechanism1207909
-Node: Notes summary1209672
-Node: Basic Concepts1210854
-Node: Basic High Level1211535
-Ref: figure-general-flow1211817
-Ref: figure-process-flow1212502
-Ref: Basic High Level-Footnote-11215803
-Node: Basic Data Typing1215988
-Node: Glossary1219316
-Node: Copying1251263
-Node: GNU Free Documentation License1288802
-Node: Index1313920
+Ref: POSIX String Comparison-Footnote-1374012
+Ref: POSIX String Comparison-Footnote-2374151
+Node: Boolean Ops374235
+Ref: Boolean Ops-Footnote-1378717
+Node: Conditional Exp378809
+Node: Function Calls380545
+Node: Precedence384422
+Node: Locales388081
+Node: Expressions Summary389713
+Node: Patterns and Actions392286
+Node: Pattern Overview393406
+Node: Regexp Patterns395083
+Node: Expression Patterns395625
+Node: Ranges399406
+Node: BEGIN/END402514
+Node: Using BEGIN/END403275
+Ref: Using BEGIN/END-Footnote-1406011
+Node: I/O And BEGIN/END406117
+Node: BEGINFILE/ENDFILE408431
+Node: Empty411338
+Node: Using Shell Variables411655
+Node: Action Overview413929
+Node: Statements416254
+Node: If Statement418102
+Node: While Statement419597
+Node: Do Statement421625
+Node: For Statement422773
+Node: Switch Statement425931
+Node: Break Statement428317
+Node: Continue Statement430409
+Node: Next Statement432236
+Node: Nextfile Statement434619
+Node: Exit Statement437271
+Node: Built-in Variables439674
+Node: User-modified440807
+Node: Auto-set448393
+Ref: Auto-set-Footnote-1463046
+Ref: Auto-set-Footnote-2463252
+Node: ARGC and ARGV463308
+Node: Pattern Action Summary467521
+Node: Arrays469951
+Node: Array Basics471280
+Node: Array Intro472124
+Ref: figure-array-elements474099
+Ref: Array Intro-Footnote-1476803
+Node: Reference to Elements476931
+Node: Assigning Elements479395
+Node: Array Example479886
+Node: Scanning an Array481645
+Node: Controlling Scanning484667
+Ref: Controlling Scanning-Footnote-1490066
+Node: Numeric Array Subscripts490382
+Node: Uninitialized Subscripts492566
+Node: Delete494185
+Ref: Delete-Footnote-1496937
+Node: Multidimensional496994
+Node: Multiscanning500089
+Node: Arrays of Arrays501680
+Node: Arrays Summary506447
+Node: Functions508540
+Node: Built-in509578
+Node: Calling Built-in510659
+Node: Numeric Functions512655
+Ref: Numeric Functions-Footnote-1517488
+Ref: Numeric Functions-Footnote-2517845
+Ref: Numeric Functions-Footnote-3517893
+Node: String Functions518165
+Ref: String Functions-Footnote-1541669
+Ref: String Functions-Footnote-2541797
+Ref: String Functions-Footnote-3542045
+Node: Gory Details542132
+Ref: table-sub-escapes543923
+Ref: table-sub-proposed545442
+Ref: table-posix-sub546805
+Ref: table-gensub-escapes548346
+Ref: Gory Details-Footnote-1549169
+Node: I/O Functions549323
+Ref: table-system-return-values555905
+Ref: I/O Functions-Footnote-1557885
+Ref: I/O Functions-Footnote-2558033
+Node: Time Functions558153
+Ref: Time Functions-Footnote-1568658
+Ref: Time Functions-Footnote-2568726
+Ref: Time Functions-Footnote-3568884
+Ref: Time Functions-Footnote-4568995
+Ref: Time Functions-Footnote-5569107
+Ref: Time Functions-Footnote-6569334
+Node: Bitwise Functions569600
+Ref: table-bitwise-ops570194
+Ref: Bitwise Functions-Footnote-1574532
+Node: Type Functions574705
+Node: I18N Functions577237
+Node: User-defined578888
+Node: Definition Syntax579693
+Ref: Definition Syntax-Footnote-1585380
+Node: Function Example585451
+Ref: Function Example-Footnote-1588373
+Node: Function Caveats588395
+Node: Calling A Function588913
+Node: Variable Scope589871
+Node: Pass By Value/Reference592865
+Node: Return Statement596364
+Node: Dynamic Typing599343
+Node: Indirect Calls600273
+Ref: Indirect Calls-Footnote-1610524
+Node: Functions Summary610652
+Node: Library Functions613357
+Ref: Library Functions-Footnote-1616964
+Ref: Library Functions-Footnote-2617107
+Node: Library Names617278
+Ref: Library Names-Footnote-1620738
+Ref: Library Names-Footnote-2620961
+Node: General Functions621047
+Node: Strtonum Function622150
+Node: Assert Function625172
+Node: Round Function628498
+Node: Cliff Random Function630039
+Node: Ordinal Functions631055
+Ref: Ordinal Functions-Footnote-1634118
+Ref: Ordinal Functions-Footnote-2634370
+Node: Join Function634580
+Ref: Join Function-Footnote-1636350
+Node: Getlocaltime Function636550
+Node: Readfile Function640292
+Node: Shell Quoting642264
+Node: Data File Management643665
+Node: Filetrans Function644297
+Node: Rewind Function648393
+Node: File Checking650299
+Ref: File Checking-Footnote-1651633
+Node: Empty Files651834
+Node: Ignoring Assigns653813
+Node: Getopt Function655363
+Ref: Getopt Function-Footnote-1666832
+Node: Passwd Functions667032
+Ref: Passwd Functions-Footnote-1675871
+Node: Group Functions675959
+Ref: Group Functions-Footnote-1683856
+Node: Walking Arrays684063
+Node: Library Functions Summary687071
+Node: Library Exercises688477
+Node: Sample Programs688942
+Node: Running Examples689712
+Node: Clones690440
+Node: Cut Program691664
+Node: Egrep Program701593
+Ref: Egrep Program-Footnote-1709105
+Node: Id Program709215
+Node: Split Program712895
+Ref: Split Program-Footnote-1716354
+Node: Tee Program716483
+Node: Uniq Program719273
+Node: Wc Program726699
+Ref: Wc Program-Footnote-1730954
+Node: Miscellaneous Programs731048
+Node: Dupword Program732261
+Node: Alarm Program734291
+Node: Translate Program739146
+Ref: Translate Program-Footnote-1743711
+Node: Labels Program743981
+Ref: Labels Program-Footnote-1747332
+Node: Word Sorting747416
+Node: History Sorting751488
+Node: Extract Program753323
+Node: Simple Sed760852
+Node: Igawk Program763926
+Ref: Igawk Program-Footnote-1778257
+Ref: Igawk Program-Footnote-2778459
+Ref: Igawk Program-Footnote-3778581
+Node: Anagram Program778696
+Node: Signature Program781758
+Node: Programs Summary783005
+Node: Programs Exercises784219
+Ref: Programs Exercises-Footnote-1788348
+Node: Advanced Features788439
+Node: Nondecimal Data790429
+Node: Array Sorting792020
+Node: Controlling Array Traversal792720
+Ref: Controlling Array Traversal-Footnote-1801087
+Node: Array Sorting Functions801205
+Ref: Array Sorting Functions-Footnote-1806296
+Node: Two-way I/O806492
+Ref: Two-way I/O-Footnote-1813042
+Ref: Two-way I/O-Footnote-2813229
+Node: TCP/IP Networking813311
+Node: Profiling816429
+Ref: Profiling-Footnote-1824922
+Node: Advanced Features Summary825245
+Node: Internationalization827089
+Node: I18N and L10N828569
+Node: Explaining gettext829256
+Ref: Explaining gettext-Footnote-1835148
+Ref: Explaining gettext-Footnote-2835333
+Node: Programmer i18n835498
+Ref: Programmer i18n-Footnote-1840353
+Node: Translator i18n840402
+Node: String Extraction841196
+Ref: String Extraction-Footnote-1842328
+Node: Printf Ordering842414
+Ref: Printf Ordering-Footnote-1845200
+Node: I18N Portability845264
+Ref: I18N Portability-Footnote-1847720
+Node: I18N Example847783
+Ref: I18N Example-Footnote-1850589
+Node: Gawk I18N850662
+Node: I18N Summary851307
+Node: Debugger852648
+Node: Debugging853670
+Node: Debugging Concepts854111
+Node: Debugging Terms855920
+Node: Awk Debugging858495
+Node: Sample Debugging Session859401
+Node: Debugger Invocation859935
+Node: Finding The Bug861321
+Node: List of Debugger Commands867799
+Node: Breakpoint Control869132
+Node: Debugger Execution Control872826
+Node: Viewing And Changing Data876188
+Node: Execution Stack879562
+Node: Debugger Info881199
+Node: Miscellaneous Debugger Commands885270
+Node: Readline Support890358
+Node: Limitations891254
+Ref: Limitations-Footnote-1895485
+Node: Debugging Summary895536
+Node: Arbitrary Precision Arithmetic896815
+Node: Computer Arithmetic898231
+Ref: table-numeric-ranges901822
+Ref: Computer Arithmetic-Footnote-1902544
+Node: Math Definitions902601
+Ref: table-ieee-formats905915
+Ref: Math Definitions-Footnote-1906518
+Node: MPFR features906623
+Node: FP Math Caution908340
+Ref: FP Math Caution-Footnote-1909412
+Node: Inexactness of computations909781
+Node: Inexact representation910741
+Node: Comparing FP Values912101
+Node: Errors accumulate913183
+Node: Getting Accuracy914616
+Node: Try To Round917326
+Node: Setting precision918225
+Ref: table-predefined-precision-strings918922
+Node: Setting the rounding mode920752
+Ref: table-gawk-rounding-modes921126
+Ref: Setting the rounding mode-Footnote-1924534
+Node: Arbitrary Precision Integers924713
+Ref: Arbitrary Precision Integers-Footnote-1929630
+Node: POSIX Floating Point Problems929779
+Ref: POSIX Floating Point Problems-Footnote-1933661
+Node: Floating point summary933699
+Node: Dynamic Extensions935889
+Node: Extension Intro937442
+Node: Plugin License938708
+Node: Extension Mechanism Outline939505
+Ref: figure-load-extension939944
+Ref: figure-register-new-function941509
+Ref: figure-call-new-function942601
+Node: Extension API Description944663
+Node: Extension API Functions Introduction946195
+Node: General Data Types951054
+Ref: General Data Types-Footnote-1957009
+Node: Memory Allocation Functions957308
+Ref: Memory Allocation Functions-Footnote-1960153
+Node: Constructor Functions960252
+Node: Registration Functions961997
+Node: Extension Functions962682
+Node: Exit Callback Functions965305
+Node: Extension Version String966555
+Node: Input Parsers967218
+Node: Output Wrappers977100
+Node: Two-way processors981612
+Node: Printing Messages983877
+Ref: Printing Messages-Footnote-1985048
+Node: Updating ERRNO985201
+Node: Requesting Values985940
+Ref: table-value-types-returned986677
+Node: Accessing Parameters987560
+Node: Symbol Table Access988795
+Node: Symbol table by name989307
+Node: Symbol table by cookie991328
+Ref: Symbol table by cookie-Footnote-1995480
+Node: Cached values995544
+Ref: Cached values-Footnote-1999051
+Node: Array Manipulation999142
+Ref: Array Manipulation-Footnote-11000233
+Node: Array Data Types1000270
+Ref: Array Data Types-Footnote-11002928
+Node: Array Functions1003020
+Node: Flattening Arrays1006878
+Node: Creating Arrays1013786
+Node: Redirection API1018555
+Node: Extension API Variables1021386
+Node: Extension Versioning1022019
+Ref: gawk-api-version1022456
+Node: Extension API Informational Variables1024212
+Node: Extension API Boilerplate1025276
+Node: Finding Extensions1029090
+Node: Extension Example1029649
+Node: Internal File Description1030447
+Node: Internal File Ops1034527
+Ref: Internal File Ops-Footnote-11046289
+Node: Using Internal File Ops1046429
+Ref: Using Internal File Ops-Footnote-11048812
+Node: Extension Samples1049086
+Node: Extension Sample File Functions1050615
+Node: Extension Sample Fnmatch1058264
+Node: Extension Sample Fork1059751
+Node: Extension Sample Inplace1060969
+Node: Extension Sample Ord1064179
+Node: Extension Sample Readdir1065015
+Ref: table-readdir-file-types1065904
+Node: Extension Sample Revout1066709
+Node: Extension Sample Rev2way1067298
+Node: Extension Sample Read write array1068038
+Node: Extension Sample Readfile1069980
+Node: Extension Sample Time1071075
+Node: Extension Sample API Tests1072423
+Node: gawkextlib1072915
+Node: Extension summary1075362
+Node: Extension Exercises1079064
+Node: Language History1080562
+Node: V7/SVR3.11082218
+Node: SVR41084370
+Node: POSIX1085804
+Node: BTL1087183
+Node: POSIX/GNU1087912
+Node: Feature History1093774
+Node: Common Extensions1108144
+Node: Ranges and Locales1109427
+Ref: Ranges and Locales-Footnote-11114043
+Ref: Ranges and Locales-Footnote-21114070
+Ref: Ranges and Locales-Footnote-31114305
+Node: Contributors1114526
+Node: History summary1120086
+Node: Installation1121466
+Node: Gawk Distribution1122410
+Node: Getting1122894
+Node: Extracting1123855
+Node: Distribution contents1125493
+Node: Unix Installation1131587
+Node: Quick Installation1132269
+Node: Shell Startup Files1134683
+Node: Additional Configuration Options1135761
+Node: Configuration Philosophy1137566
+Node: Non-Unix Installation1139935
+Node: PC Installation1140393
+Node: PC Binary Installation1141713
+Node: PC Compiling1143565
+Ref: PC Compiling-Footnote-11146359
+Node: PC Testing1146468
+Node: PC Using1147648
+Ref: PC Using-Footnote-11151801
+Node: Cygwin1151874
+Node: MSYS1152644
+Node: VMS Installation1153145
+Node: VMS Compilation1153936
+Ref: VMS Compilation-Footnote-11155165
+Node: VMS Dynamic Extensions1155223
+Node: VMS Installation Details1156908
+Node: VMS Running1159161
+Node: VMS GNV1163440
+Node: VMS Old Gawk1164175
+Node: Bugs1164646
+Node: Other Versions1168961
+Node: Installation summary1175545
+Node: Notes1176596
+Node: Compatibility Mode1177461
+Node: Additions1178243
+Node: Accessing The Source1179168
+Node: Adding Code1180603
+Node: New Ports1186822
+Node: Derived Files1191310
+Ref: Derived Files-Footnote-11196795
+Ref: Derived Files-Footnote-21196830
+Ref: Derived Files-Footnote-31197428
+Node: Future Extensions1197542
+Node: Implementation Limitations1198200
+Node: Extension Design1199383
+Node: Old Extension Problems1200537
+Ref: Old Extension Problems-Footnote-11202055
+Node: Extension New Mechanism Goals1202112
+Ref: Extension New Mechanism Goals-Footnote-11205476
+Node: Extension Other Design Decisions1205665
+Node: Extension Future Growth1207778
+Node: Old Extension Mechanism1208614
+Node: Notes summary1210377
+Node: Basic Concepts1211559
+Node: Basic High Level1212240
+Ref: figure-general-flow1212522
+Ref: figure-process-flow1213207
+Ref: Basic High Level-Footnote-11216508
+Node: Basic Data Typing1216693
+Node: Glossary1220021
+Node: Copying1251968
+Node: GNU Free Documentation License1289507
+Node: Index1314625
 
 End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index a4b61895..90f6dcfc 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -12577,19 +12577,19 @@ One special place where @code{/foo/} is @emph{not} an abbreviation for
 where this is discussed in more detail.
 
 @node POSIX String Comparison
-@subsubsection String Comparison with POSIX Rules
+@subsubsection String Comparison Based on Locale Collating Order
 
-The POSIX standard says that string comparison is performed based
-on the locale's @dfn{collating order}. This is the order in which
-characters sort, as defined by the locale (for more discussion,
-@pxref{Locales}).  This order is usually very different
-from the results obtained when doing straight character-by-character
-comparison.@footnote{Technically, string comparison is supposed
-to behave the same way as if the strings were compared with the C
-@code{strcoll()} function.}
+The POSIX standard used to say that all string comparisons are
+performed based on the locale's @dfn{collating order}. This
+is the order in which characters sort, as defined by the locale
+(for more discussion, @pxref{Locales}).  This order is usually very
+different from the results obtained when doing straight byte-by-byte
+comparison.@footnote{Technically, string comparison is supposed to behave
+the same way as if the strings were compared with the C @code{strcoll()}
+function.}
 
 Because this behavior differs considerably from existing practice,
-@command{gawk} only implements it when in POSIX mode (@pxref{Options}).
+@command{gawk} only implemented it when in POSIX mode (@pxref{Options}).
 Here is an example to illustrate the difference, in an @code{en_US.UTF-8}
 locale:
 
@@ -12602,6 +12602,26 @@ $ @kbd{gawk --posix 'BEGIN @{ printf("ABC < abc = %s\n",}
 @print{} ABC < abc = FALSE
 @end example
 
+Fortunately, as of August 2016, comparison based on locale
+collating order is no longer required for the @code{==} and @code{!=}
+operators.@footnote{See @uref{http://austingroupbugs.net/view.php?id=1070,
+the Austin Group website}.} However, comparison based on locales is still
+required for @code{<}, @code{<=}, @code{>}, and @code{>=}.  POSIX thus
+recommends as follows:
+
+@quotation
+Since the @code{==} operator checks whether strings are identical,
+not whether they collate equally, applications needing to check whether
+strings collate equally can use:
+
+@example
+a <= b && a >= b
+@end example
+@end quotation
+
+As of @value{PVERSION} 4.2, @command{gawk} continues to use locale
+collating order for @code{<}, @code{<=}, @code{>}, and @code{>=} only
+in POSIX mode.
 
 @node Boolean Ops
 @subsection Boolean Expressions
@@ -37385,7 +37405,7 @@ and
 @uref{http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap09.html#tag_21_09_03_05, its rationale}.}
 
 By using this lovely technical term, the standard gives license
-to implementors to implement ranges in whatever way they choose.
+to implementers to implement ranges in whatever way they choose.
 The @command{gawk} maintainer chose to apply the pre-POSIX meaning
 both with the default regexp matching and when @option{--traditional} or
 @option{--posix} are used.
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index 9c2864cd..782884bb 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -11896,19 +11896,19 @@ One special place where @code{/foo/} is @emph{not} an abbreviation for
 where this is discussed in more detail.
 
 @node POSIX String Comparison
-@subsubsection String Comparison with POSIX Rules
+@subsubsection String Comparison Based on Locale Collating Order
 
-The POSIX standard says that string comparison is performed based
-on the locale's @dfn{collating order}. This is the order in which
-characters sort, as defined by the locale (for more discussion,
-@pxref{Locales}).  This order is usually very different
-from the results obtained when doing straight character-by-character
-comparison.@footnote{Technically, string comparison is supposed
-to behave the same way as if the strings were compared with the C
-@code{strcoll()} function.}
+The POSIX standard used to say that all string comparisons are
+performed based on the locale's @dfn{collating order}. This
+is the order in which characters sort, as defined by the locale
+(for more discussion, @pxref{Locales}).  This order is usually very
+different from the results obtained when doing straight byte-by-byte
+comparison.@footnote{Technically, string comparison is supposed to behave
+the same way as if the strings were compared with the C @code{strcoll()}
+function.}
 
 Because this behavior differs considerably from existing practice,
-@command{gawk} only implements it when in POSIX mode (@pxref{Options}).
+@command{gawk} only implemented it when in POSIX mode (@pxref{Options}).
 Here is an example to illustrate the difference, in an @code{en_US.UTF-8}
 locale:
 
@@ -11921,6 +11921,26 @@ $ @kbd{gawk --posix 'BEGIN @{ printf("ABC < abc = %s\n",}
 @print{} ABC < abc = FALSE
 @end example
 
+Fortunately, as of August 2016, comparison based on locale
+collating order is no longer required for the @code{==} and @code{!=}
+operators.@footnote{See @uref{http://austingroupbugs.net/view.php?id=1070,
+the Austin Group website}.} However, comparison based on locales is still
+required for @code{<}, @code{<=}, @code{>}, and @code{>=}.  POSIX thus
+recommends as follows:
+
+@quotation
+Since the @code{==} operator checks whether strings are identical,
+not whether they collate equally, applications needing to check whether
+strings collate equally can use:
+
+@example
+a <= b && a >= b
+@end example
+@end quotation
+
+As of @value{PVERSION} 4.2, @command{gawk} continues to use locale
+collating order for @code{<}, @code{<=}, @code{>}, and @code{>=} only
+in POSIX mode.
 
 @node Boolean Ops
 @subsection Boolean Expressions
@@ -36467,7 +36487,7 @@ and
 @uref{http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap09.html#tag_21_09_03_05, its rationale}.}
 
 By using this lovely technical term, the standard gives license
-to implementors to implement ranges in whatever way they choose.
+to implementers to implement ranges in whatever way they choose.
 The @command{gawk} maintainer chose to apply the pre-POSIX meaning
 both with the default regexp matching and when @option{--traditional} or
 @option{--posix} are used.
diff --git a/eval.c b/eval.c
index fc468543..cfd71b43 100644
--- a/eval.c
+++ b/eval.c
@@ -573,7 +573,7 @@ posix_compare(NODE *s1, NODE *s2)
 /* cmp_nodes --- compare two nodes, returning negative, 0, positive */
 
 int
-cmp_nodes(NODE *t1, NODE *t2)
+cmp_nodes(NODE *t1, NODE *t2, bool use_strcmp)
 {
 	int ret = 0;
 	size_t len1, len2;
@@ -596,7 +596,7 @@ cmp_nodes(NODE *t1, NODE *t2)
 	if (len1 == 0 || len2 == 0)
 		return ldiff;
 
-	if (do_posix)
+	if (do_posix && ! use_strcmp)
 		return posix_compare(t1, t2);
 
 	l = (ldiff <= 0 ? len1 : len2);
@@ -882,7 +882,7 @@ fmt_index(NODE *n)
 		emalloc(fmt_list, NODE **, fmt_num*sizeof(*fmt_list), "fmt_index");
 	n = force_string(n);
 	while (ix < fmt_hiwater) {
-		if (cmp_nodes(fmt_list[ix], n) == 0)
+		if (cmp_nodes(fmt_list[ix], n, true) == 0)
 			return ix;
 		ix++;
 	}
@@ -1502,10 +1502,15 @@ eval_condition(NODE *t)
 	return boolval(t);
 }
 
+typedef enum {
+	SCALAR_EQ_NEQ,
+	SCALAR_RELATIONAL
+} scalar_cmp_t;
+
 /* cmp_scalars -- compare two nodes on the stack */
 
 static inline int
-cmp_scalars()
+cmp_scalars(scalar_cmp_t comparison_type)
 {
 	NODE *t1, *t2;
 	int di;
@@ -1516,7 +1521,7 @@ cmp_scalars()
 		DEREF(t2);
 		fatal(_("attempt to use array `%s' in a scalar context"), array_vname(t1));
 	}
-	di = cmp_nodes(t1, t2);
+	di = cmp_nodes(t1, t2, comparison_type == SCALAR_EQ_NEQ);
 	DEREF(t1);
 	DEREF(t2);
 	return di;
diff --git a/extension/configure.ac b/extension/configure.ac
index b723a3c1..b5b27d03 100644
--- a/extension/configure.ac
+++ b/extension/configure.ac
@@ -23,7 +23,7 @@ dnl
 
 dnl Process this file with autoconf to produce a configure script.
 
-AC_INIT([GNU Awk Bundled Extensions],[4.1.3],[bug-gawk@gnu.org],[gawk-extensions])
+AC_INIT([GNU Awk Bundled Extensions],[4.1.4],[bug-gawk@gnu.org],[gawk-extensions])
 
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_AUX_DIR([build-aux])
diff --git a/interpret.h b/interpret.h
index 2f85049a..46c44cdb 100644
--- a/interpret.h
+++ b/interpret.h
@@ -446,37 +446,37 @@ uninitialized_scalar:
 			break;
 
 		case Op_equal:
-			r = node_Boolean[cmp_scalars() == 0];
+			r = node_Boolean[cmp_scalars(SCALAR_EQ_NEQ) == 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_notequal:
-			r = node_Boolean[cmp_scalars() != 0];
+			r = node_Boolean[cmp_scalars(SCALAR_EQ_NEQ) != 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_less:
-			r = node_Boolean[cmp_scalars() < 0];
+			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) < 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_greater:
-			r = node_Boolean[cmp_scalars() > 0];
+			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) > 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_leq:
-			r = node_Boolean[cmp_scalars() <= 0];
+			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) <= 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_geq:
-			r = node_Boolean[cmp_scalars() >= 0];
+			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) >= 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
@@ -834,12 +834,11 @@ mod:
 				t2 = TOP_SCALAR();	/* switch expression */
 				t2 = force_string(t2);
 				rp = re_update(m);
-				di = (research(rp, t2->stptr, 0, t2->stlen,
-							avoid_dfa(m, t2->stptr, t2->stlen)) >= 0);
+				di = (research(rp, t2->stptr, 0, t2->stlen, RE_NO_FLAGS) >= 0);
 			} else {
 				t1 = POP_SCALAR();	/* case value */
 				t2 = TOP_SCALAR();	/* switch expression */
-				di = (cmp_nodes(t2, t1) == 0);
+				di = (cmp_nodes(t2, t1, true) == 0);
 				DEREF(t1);
 			}
 
@@ -999,20 +998,7 @@ arrayfor:
 			t1 = *get_field(0, (Func_ptr *) 0);
 match_re:
 			rp = re_update(m);
-			/*
-			 * Any place where research() is called with a last parameter of
-			 * zero, we need to use the avoid_dfa test. This appears here and
-			 * in the code for Op_K_case.
-			 *
-			 * A new or improved dfa that distinguishes beginning/end of
-			 * string from beginning/end of line will allow us to get rid of
-			 * this hack.
-			 *
-			 * The avoid_dfa() function is in re.c; it is not very smart.
-			 */
-
-			di = research(rp, t1->stptr, 0, t1->stlen,
-								avoid_dfa(m, t1->stptr, t1->stlen));
+			di = research(rp, t1->stptr, 0, t1->stlen, RE_NO_FLAGS);
 			di = (di == -1) ^ (op != Op_nomatch);
 			if (op != Op_match_rec) {
 				decr_sp();
diff --git a/re.c b/re.c
index c7899694..c822c90f 100644
--- a/re.c
+++ b/re.c
@@ -170,7 +170,6 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
 
 	emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
 	memset((char *) rp, 0, sizeof(*rp));
-	rp->dfareg = NULL;
 	rp->pat.allocated = 0;	/* regex will allocate the buffer */
 	emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
 
@@ -223,12 +222,11 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
 	/* gack. this must be done *after* re_compile_pattern */
 	rp->pat.newline_anchor = false; /* don't get \n in middle of string */
 	if (dfa && ! no_dfa) {
-		rp->dfa = true;
 		rp->dfareg = dfaalloc();
 		dfasyntax(rp->dfareg, dfa_syn, ignorecase, '\n');
 		dfacomp(buf, len, rp->dfareg, true);
 	} else
-		rp->dfa = false;
+		rp->dfareg = NULL;
 	rp->has_anchor = has_anchor;
 
 	/* Additional flags that help with RS as regexp. */
@@ -278,26 +276,25 @@ research(Regexp *rp, char *str, int start,
 	 * starts in the middle of a string, so don't bother trying it
 	 * in that case.
 	 */
-	if (rp->dfa && ! no_bol && start == 0) {
-		char save;
-		size_t count = 0;
+	if (rp->dfareg != NULL && ! no_bol && start == 0) {
 		struct dfa *superset = dfasuperset(rp->dfareg);
-		/*
-		 * dfa likes to stick a '\n' right after the matched
-		 * text.  So we just save and restore the character.
-		 */
-		save = str[start+len];
 		if (superset)
 			ret = dfaexec(superset, str+start, str+start+len,
 							true, NULL, NULL);
-		if (ret)
+
+		if (ret && ((! need_start && ! rp->has_anchor)
+				|| (! superset && dfaisfast(rp->dfareg))))
 			ret = dfaexec(rp->dfareg, str+start, str+start+len,
-						true, &count, &try_backref);
-		str[start+len] = save;
+						true, NULL, &try_backref);
 	}
 
 	if (ret) {
-		if (need_start || rp->dfa == false || try_backref) {
+		if (   rp->dfareg == NULL
+			|| start != 0
+			|| no_bol
+			|| need_start
+			|| rp->has_anchor
+			|| try_backref) {
 			/*
 			 * Passing NULL as last arg speeds up search for cases
 			 * where we don't need the start/end info.
@@ -326,7 +323,7 @@ refree(Regexp *rp)
 		free(rp->regs.start);
 	if (rp->regs.end)
 		free(rp->regs.end);
-	if (rp->dfa) {
+	if (rp->dfareg != NULL) {
 		dfafree(rp->dfareg);
 		free(rp->dfareg);
 	}
@@ -359,7 +356,7 @@ re_update(NODE *t)
 		t1 = t->re_exp;
 		if (t->re_text != NULL) {
 			/* if contents haven't changed, just return it */
-			if (cmp_nodes(t->re_text, t1) == 0)
+			if (cmp_nodes(t->re_text, t1, true) == 0)
 				return t->re_reg;
 			/* things changed, fall through to recompile */
 			unref(t->re_text);
@@ -425,32 +422,6 @@ resetup()
 	dfa_init();
 }
 
-/* avoid_dfa --- return true if we should not use the DFA matcher */
-
-int
-avoid_dfa(NODE *re, char *str, size_t len)
-{
-	char *end;
-
-	/*
-	 * f = @/.../
-	 * if ("foo" ~ f) ...
-	 *
-	 * This creates a Node_dynregex with NULL re_reg.
-	 */
-	if (re->re_reg == NULL)
-		return false;
-
-	if (! re->re_reg->has_anchor)
-		return false;
-
-	for (end = str + len; str < end; str++)
-		if (*str == '\n')
-			return true;
-
-	return false;
-}
-
 /* reisstring --- return true if the RE match is a simple string match */
 
 int