Merge branch 'master' into feature/typed-regex

author: Arnold D. Robbins <arnold@skeeve.com> 2016-08-25 22:14:15 +0300
committer: Arnold D. Robbins <arnold@skeeve.com> 2016-08-25 22:14:15 +0300
commit: b03d089e9b87c4e64bd539a1703e740923a67aa4 (patch)
tree: c7351e0b46c45d282eba64e478c99c0771a055a1
parent: e0dd835cc155c900ca9725a0d36eb0f5a856d9bf (diff)
parent: 00682d87a1a1c0535c0fa5adb27867578dc76d49 (diff)
download: egawk-b03d089e9b87c4e64bd539a1703e740923a67aa4.tar.gz
egawk-b03d089e9b87c4e64bd539a1703e740923a67aa4.tar.bz2
egawk-b03d089e9b87c4e64bd539a1703e740923a67aa4.zip
12 files changed, 761 insertions, 706 deletions
diff --git a/ChangeLog b/ChangeLog
index 54242f64..75d1bd37 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,38 @@
 2016-08-25         Arnold D. Robbins     <arnold@skeeve.com>
 
+	POSIX now says use strcmp for == and !=. Thanks to Chet Ramey
+	for pointing me at the change.  Make it so:
+
+	* awk.h (cmp_nodes): New 3rd param indicating strcmp, not strcoll.
+	* debug.c (cmp_val): Update call to cmp_nodes.
+	* eval.c (cmp_nodes): New 3rd param indicating strcmp, not strcoll.
+	Adjust code and all callers.
+	(scalar_cmp_t): New enum type. Used in ...
+	(cmp_scalars): ... in order to call cmp_nodes correctly.
+	* interpret.h: Use the enum type in calls to cmp_scalars.
+	* re.c (re_update): Adjust call to cmp_nodes.
+
+2016-08-25         Norihiro Tanaka      <noritnk@kcn.ne.jp>
+
+	* awk.h (struct Regexp): Remove dfa.  Now dfareg instead of it.  All
+	referers changed.
+	* re.c (research): Arrange caller of dfaexec and research.
+	* (avoid_dfa): Removed.  All callers changed.
+	* awk.h (avoid_dfa): Removed.
+
+	Other changes by Arnold Robbins:
+
+	* awk.h (struct Regexp): Change various boolean members to bool.
+	(RE_NO_FLAGS): New #define.
+	* interpret.h: Use RE_NO_FLAGS instead of zero.
+	* re.c (research): Prettify the logic a little bit.
+
+2016-08-25         Arnold D. Robbins     <arnold@skeeve.com>
+
+	* dfa.c: Sync with grep.
+
+2016-08-25         Arnold D. Robbins     <arnold@skeeve.com>
+
 	* 4.1.4: Release tar ball made.
 
 2016-08-23         Arnold D. Robbins     <arnold@skeeve.com>
diff --git a/awk.h b/awk.h
index f11105ba..89106588 100644
--- a/awk.h
+++ b/awk.h
@@ -206,11 +206,10 @@ typedef struct Regexp {
 	struct re_pattern_buffer pat;
 	struct re_registers regs;
 	struct dfa *dfareg;
-	short dfa;
-	short has_anchor;	/* speed up of avoid_dfa kludge, temporary */
-	short non_empty;	/* for use in fpat_parse_field */
-	short has_meta;		/* re has meta chars so (probably) isn't simple string */
-	short maybe_long;	/* re has meta chars that can match long text */
+	bool has_anchor;	/* re has anchors which dfa avoids */
+	bool non_empty;		/* for use in fpat_parse_field */
+	bool has_meta;		/* re has meta chars so (probably) isn't simple string */
+	bool maybe_long;	/* re has meta chars that can match long text */
 } Regexp;
 #define	RESTART(rp,s)	(rp)->regs.start[0]
 #define	REEND(rp,s)	(rp)->regs.end[0]
@@ -219,6 +218,7 @@ typedef struct Regexp {
 #define	NUMSUBPATS(rp,s)	(rp)->regs.num_regs
 
 /* regexp matching flags: */
+#define RE_NO_FLAGS	0	/* empty flags */
 #define RE_NEED_START	1	/* need to know start/end of match */
 #define RE_NO_BOL	2	/* not allowed to match ^ in regexp */
 
@@ -1443,7 +1443,7 @@ extern int sanitize_exit_status(int status);
 extern void PUSH_CODE(INSTRUCTION *cp);
 extern INSTRUCTION *POP_CODE(void);
 extern void init_interpret(void);
-extern int cmp_nodes(NODE *t1, NODE *t2);
+extern int cmp_nodes(NODE *t1, NODE *t2, bool use_strcmp);
 extern int cmp_awknums(const NODE *t1, const NODE *t2);
 extern void set_IGNORECASE(void);
 extern void set_OFS(void);
@@ -1651,7 +1651,6 @@ extern void reg_error(const char *s);
 extern Regexp *re_update(NODE *t);
 extern void resyntax(int syntax);
 extern void resetup(void);
-extern int avoid_dfa(NODE *re, char *str, size_t len);
 extern int reisstring(const char *text, size_t len, Regexp *re, const char *buf);
 extern int get_numbase(const char *str, bool use_locale);
 
diff --git a/debug.c b/debug.c
index a0830621..c3d149d6 100644
--- a/debug.c
+++ b/debug.c
@@ -1670,7 +1670,7 @@ cmp_val(struct list_item *w, NODE *old, NODE *new)
 
 	if (new->type == Node_var_array)	/* 5 */
 		return true;
-	return cmp_nodes(old, new);			/* 4 */
+	return cmp_nodes(old, new, true);	/* 4 */
 }
 
 /* watchpoint_triggered --- check if we should stop at this watchpoint;
diff --git a/dfa.c b/dfa.c
index cb11043e..85cb46ad 100644
--- a/dfa.c
+++ b/dfa.c
@@ -387,8 +387,8 @@ struct regex_syntax
    meaning of the @#%!@#%^!@ syntax bits.  */
 struct lexer_state
 {
-  char const *lexptr;	/* Pointer to next input character.  */
-  size_t lexleft;	/* Number of characters remaining.  */
+  char const *ptr;	/* Pointer to next input character.  */
+  size_t left;		/* Number of characters remaining.  */
   token lasttok;	/* Previous token returned; initially END.  */
   size_t parens;	/* Count of outstanding left parens.  */
   int minrep, maxrep;	/* Repeat counts for {m,n}.  */
@@ -429,10 +429,10 @@ struct dfa
   size_t calloc;                /* Number of charclasses allocated.  */
 
   /* Scanner state */
-  struct lexer_state lexstate;
+  struct lexer_state lex;
 
   /* Parser state */
-  struct parser_state parsestate;
+  struct parser_state parse;
 
   /* Fields filled by the parser.  */
   token *tokens;                /* Postfix parse array.  */
@@ -910,7 +910,7 @@ using_simple_locale (struct dfa const *dfa)
      && '}' == 125 && '~' == 126)
   };
 
-  return (!native_c_charset || dfa->multibyte) ? false : unibyte_c;
+  return (native_c_charset & !dfa->multibyte) | unibyte_c;
 }
 
 /* Fetch the next lexical input character.  Set C (of type int) to the
@@ -922,23 +922,23 @@ using_simple_locale (struct dfa const *dfa)
    otherwise.  */
 # define FETCH_WC(dfa, c, wc, eoferr)		\
   do {						\
-    if (! dfa->lexstate.lexleft)		\
+    if (! (dfa)->lex.left)			\
       {						\
         if ((eoferr) != 0)			\
           dfaerror (eoferr);			\
         else					\
-          return dfa->lexstate.lasttok = END;	\
+          return (dfa)->lex.lasttok = END;	\
       }						\
     else					\
       {						\
         wint_t _wc;				\
-        size_t nbytes = mbs_to_wchar (&_wc, dfa->lexstate.lexptr, \
-                                      dfa->lexstate.lexleft, dfa); \
-        dfa->lexstate.cur_mb_len = nbytes;	\
+        size_t nbytes = mbs_to_wchar (&_wc, (dfa)->lex.ptr, \
+                                      (dfa)->lex.left, dfa); \
+        (dfa)->lex.cur_mb_len = nbytes;		\
         (wc) = _wc;				\
-        (c) = nbytes == 1 ? to_uchar (*dfa->lexstate.lexptr) : EOF; \
-        dfa->lexstate.lexptr += nbytes;		\
-        dfa->lexstate.lexleft -= nbytes;	\
+        (c) = nbytes == 1 ? to_uchar ((dfa)->lex.ptr[0]) : EOF; \
+        (dfa)->lex.ptr += nbytes;		\
+        (dfa)->lex.left -= nbytes;		\
       }						\
   } while (false)
 
@@ -1112,8 +1112,8 @@ parse_bracket_exp (struct dfa *dfa)
               for (;;)
                 {
                   FETCH_WC (dfa, c, wc, _("unbalanced ["));
-                  if ((c == c1 && *dfa->lexstate.lexptr == ']')
-                      || dfa->lexstate.lexleft == 0)
+                  if (dfa->lex.left == 0
+                      || (c == c1 && dfa->lex.ptr[0] == ']'))
                     break;
                   if (len < MAX_BRACKET_STRING_LEN)
                     str[len++] = c;
@@ -1133,8 +1133,8 @@ parse_bracket_exp (struct dfa *dfa)
                 {
                   char const *class
                     = (dfa->syntax.case_fold && (STREQ (str, "upper")
-                                                 || STREQ (str, "lower")) ?
-                                                      "alpha" : str);
+                                                 || STREQ (str, "lower"))
+                       ? "alpha" : str);
                   const struct dfa_ctype *pred = find_pred (class);
                   if (!pred)
                     dfaerror (_("invalid character class"));
@@ -1174,7 +1174,7 @@ parse_bracket_exp (struct dfa *dfa)
           /* A bracket expression like [a-[.aa.]] matches an unknown set.
              Treat it like [-a[.aa.]] while parsing it, and
              remember that the set is unknown.  */
-          if (c2 == '[' && *dfa->lexstate.lexptr == '.')
+          if (c2 == '[' && dfa->lex.ptr[0] == '.')
             {
               known_bracket_exp = false;
               c2 = ']';
@@ -1184,8 +1184,8 @@ parse_bracket_exp (struct dfa *dfa)
             {
               /* In the case [x-], the - is an ordinary hyphen,
                  which is left in c1, the lookahead character.  */
-              dfa->lexstate.lexptr -= dfa->lexstate.cur_mb_len;
-              dfa->lexstate.lexleft += dfa->lexstate.cur_mb_len;
+              dfa->lex.ptr -= dfa->lex.cur_mb_len;
+              dfa->lex.left += dfa->lex.cur_mb_len;
             }
           else
             {
@@ -1283,19 +1283,27 @@ parse_bracket_exp (struct dfa *dfa)
   return CSET + dfa_charclass_index (dfa, ccl);
 }
 
-#define PUSH_LEX_STATE(s)			\
-  do						\
-    {						\
-      char const *lexptr_saved = dfa->lexstate.lexptr;	\
-      size_t lexleft_saved = dfa->lexstate.lexleft;		\
-      dfa->lexstate.lexptr = (s);				\
-      dfa->lexstate.lexleft = strlen (dfa->lexstate.lexptr)
+struct lexptr
+{
+  char const *ptr;
+  size_t left;
+};
+
+static void
+push_lex_state (struct dfa *dfa, struct lexptr *ls, char const *s)
+{
+  ls->ptr = dfa->lex.ptr;
+  ls->left = dfa->lex.left;
+  dfa->lex.ptr = s;
+  dfa->lex.left = strlen (s);
+}
 
-#define POP_LEX_STATE()				\
-      dfa->lexstate.lexptr = lexptr_saved;			\
-      dfa->lexstate.lexleft = lexleft_saved;			\
-    }						\
-  while (false)
+static void
+pop_lex_state (struct dfa *dfa, struct lexptr const *ls)
+{
+  dfa->lex.ptr = ls->ptr;
+  dfa->lex.left = ls->left;
+}
 
 static token
 lex (struct dfa *dfa)
@@ -1313,14 +1321,14 @@ lex (struct dfa *dfa)
      "if (backslash) ...".  */
   for (i = 0; i < 2; ++i)
     {
-      FETCH_WC (dfa, c, dfa->lexstate.wctok, NULL);
+      FETCH_WC (dfa, c, dfa->lex.wctok, NULL);
 
       switch (c)
         {
         case '\\':
           if (backslash)
             goto normal_char;
-          if (dfa->lexstate.lexleft == 0)
+          if (dfa->lex.left == 0)
             dfaerror (_("unfinished \\ escape"));
           backslash = true;
           break;
@@ -1329,28 +1337,29 @@ lex (struct dfa *dfa)
           if (backslash)
             goto normal_char;
           if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS
-              || dfa->lexstate.lasttok == END || dfa->lexstate.lasttok == LPAREN
-              || dfa->lexstate.lasttok == OR)
-            return dfa->lexstate.lasttok = BEGLINE;
+              || dfa->lex.lasttok == END || dfa->lex.lasttok == LPAREN
+              || dfa->lex.lasttok == OR)
+            return dfa->lex.lasttok = BEGLINE;
           goto normal_char;
 
         case '$':
           if (backslash)
             goto normal_char;
           if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS
-              || dfa->lexstate.lexleft == 0
-              || (dfa->syntax.syntax_bits & RE_NO_BK_PARENS
-                  ? dfa->lexstate.lexleft > 0 && *dfa->lexstate.lexptr == ')'
-                  : dfa->lexstate.lexleft > 1 && dfa->lexstate.lexptr[0] == '\\'
-                    && dfa->lexstate.lexptr[1] == ')')
-              || (dfa->syntax.syntax_bits & RE_NO_BK_VBAR
-                  ? dfa->lexstate.lexleft > 0 && *dfa->lexstate.lexptr == '|'
-                  : dfa->lexstate.lexleft > 1 && dfa->lexstate.lexptr[0] == '\\'
-                    && dfa->lexstate.lexptr[1] == '|')
+              || dfa->lex.left == 0
+              || ((dfa->lex.left
+                   > !(dfa->syntax.syntax_bits & RE_NO_BK_PARENS))
+                  && (dfa->lex.ptr[!(dfa->syntax.syntax_bits & RE_NO_BK_PARENS)
+                                   & (dfa->lex.ptr[0] == '\\')]
+                      == ')'))
+              || ((dfa->lex.left
+                   > !(dfa->syntax.syntax_bits & RE_NO_BK_VBAR))
+                  && (dfa->lex.ptr[!(dfa->syntax.syntax_bits & RE_NO_BK_VBAR)
+                                   & (dfa->lex.ptr[0] == '\\')]
+                      == '|'))
               || ((dfa->syntax.syntax_bits & RE_NEWLINE_ALT)
-                  && dfa->lexstate.lexleft > 0
-                  && *dfa->lexstate.lexptr == '\n'))
-            return dfa->lexstate.lasttok = ENDLINE;
+                  && dfa->lex.left > 0 && dfa->lex.ptr[0] == '\n'))
+            return dfa->lex.lasttok = ENDLINE;
           goto normal_char;
 
         case '1':
@@ -1364,8 +1373,8 @@ lex (struct dfa *dfa)
         case '9':
           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_BK_REFS))
             {
-              dfa->lexstate.laststart = false;
-              return dfa->lexstate.lasttok = BACKREF;
+              dfa->lex.laststart = false;
+              return dfa->lex.lasttok = BACKREF;
             }
           goto normal_char;
 
@@ -1373,7 +1382,7 @@ lex (struct dfa *dfa)
           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
             {
               /* FIXME: should be beginning of string */
-              return dfa->lexstate.lasttok = BEGLINE;
+              return dfa->lex.lasttok = BEGLINE;
             }
           goto normal_char;
 
@@ -1381,28 +1390,28 @@ lex (struct dfa *dfa)
           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
             {
               /* FIXME: should be end of string */
-              return dfa->lexstate.lasttok = ENDLINE;
+              return dfa->lex.lasttok = ENDLINE;
             }
           goto normal_char;
 
         case '<':
           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
-            return dfa->lexstate.lasttok = BEGWORD;
+            return dfa->lex.lasttok = BEGWORD;
           goto normal_char;
 
         case '>':
           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
-            return dfa->lexstate.lasttok = ENDWORD;
+            return dfa->lex.lasttok = ENDWORD;
           goto normal_char;
 
         case 'b':
           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
-            return dfa->lexstate.lasttok = LIMWORD;
+            return dfa->lex.lasttok = LIMWORD;
           goto normal_char;
 
         case 'B':
           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
-            return dfa->lexstate.lasttok = NOTLIMWORD;
+            return dfa->lex.lasttok = NOTLIMWORD;
           goto normal_char;
 
         case '?':
@@ -1411,17 +1420,17 @@ lex (struct dfa *dfa)
           if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
             goto normal_char;
           if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
-              && dfa->lexstate.laststart)
+              && dfa->lex.laststart)
             goto normal_char;
-          return dfa->lexstate.lasttok = QMARK;
+          return dfa->lex.lasttok = QMARK;
 
         case '*':
           if (backslash)
             goto normal_char;
           if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
-              && dfa->lexstate.laststart)
+              && dfa->lex.laststart)
             goto normal_char;
-          return dfa->lexstate.lasttok = STAR;
+          return dfa->lex.lasttok = STAR;
 
         case '+':
           if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
@@ -1429,9 +1438,9 @@ lex (struct dfa *dfa)
           if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
             goto normal_char;
           if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
-              && dfa->lexstate.laststart)
+              && dfa->lex.laststart)
             goto normal_char;
-          return dfa->lexstate.lasttok = PLUS;
+          return dfa->lex.lasttok = PLUS;
 
         case '{':
           if (!(dfa->syntax.syntax_bits & RE_INTERVALS))
@@ -1439,7 +1448,7 @@ lex (struct dfa *dfa)
           if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_BRACES) == 0))
             goto normal_char;
           if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
-              && dfa->lexstate.laststart)
+              && dfa->lex.laststart)
             goto normal_char;
 
           /* Cases:
@@ -1449,86 +1458,79 @@ lex (struct dfa *dfa)
              {,} - 0 to infinity (same as '*')
              {M,N} - M through N */
           {
-            char const *p = dfa->lexstate.lexptr;
-            char const *lim = p + dfa->lexstate.lexleft;
-            dfa->lexstate.minrep = dfa->lexstate.maxrep = -1;
+            char const *p = dfa->lex.ptr;
+            char const *lim = p + dfa->lex.left;
+            dfa->lex.minrep = dfa->lex.maxrep = -1;
             for (; p != lim && ISASCIIDIGIT (*p); p++)
-              {
-                if (dfa->lexstate.minrep < 0)
-                  dfa->lexstate.minrep = *p - '0';
-                else
-                  dfa->lexstate.minrep = MIN (RE_DUP_MAX + 1,
-                                              (dfa->lexstate.minrep
-                                               * 10 + *p - '0'));
-              }
+              dfa->lex.minrep = (dfa->lex.minrep < 0
+                                 ? *p - '0'
+                                 : MIN (RE_DUP_MAX + 1,
+                                        dfa->lex.minrep * 10 + *p - '0'));
             if (p != lim)
               {
                 if (*p != ',')
-                  dfa->lexstate.maxrep = dfa->lexstate.minrep;
+                  dfa->lex.maxrep = dfa->lex.minrep;
                 else
                   {
-                    if (dfa->lexstate.minrep < 0)
-                      dfa->lexstate.minrep = 0;
+                    if (dfa->lex.minrep < 0)
+                      dfa->lex.minrep = 0;
                     while (++p != lim && ISASCIIDIGIT (*p))
-                      {
-                        if (dfa->lexstate.maxrep < 0)
-                          dfa->lexstate.maxrep = *p - '0';
-                        else
-                          dfa->lexstate.maxrep = MIN (RE_DUP_MAX + 1,
-                                                      (dfa->lexstate.maxrep
-                                                       * 10 + *p - '0'));
-                      }
+                      dfa->lex.maxrep
+                        = (dfa->lex.maxrep < 0
+                           ? *p - '0'
+                           : MIN (RE_DUP_MAX + 1,
+                                  dfa->lex.maxrep * 10 + *p - '0'));
                   }
               }
             if (! ((! backslash || (p != lim && *p++ == '\\'))
                    && p != lim && *p++ == '}'
-                   && 0 <= dfa->lexstate.minrep
-                   && (dfa->lexstate.maxrep < 0
-                       || dfa->lexstate.minrep <= dfa->lexstate.maxrep)))
+                   && 0 <= dfa->lex.minrep
+                   && (dfa->lex.maxrep < 0
+                       || dfa->lex.minrep <= dfa->lex.maxrep)))
               {
                 if (dfa->syntax.syntax_bits & RE_INVALID_INTERVAL_ORD)
                   goto normal_char;
                 dfaerror (_("invalid content of \\{\\}"));
               }
-            if (RE_DUP_MAX < dfa->lexstate.maxrep)
+            if (RE_DUP_MAX < dfa->lex.maxrep)
               dfaerror (_("regular expression too big"));
-            dfa->lexstate.lexptr = p;
-            dfa->lexstate.lexleft = lim - p;
+            dfa->lex.ptr = p;
+            dfa->lex.left = lim - p;
           }
-          dfa->lexstate.laststart = false;
-          return dfa->lexstate.lasttok = REPMN;
+          dfa->lex.laststart = false;
+          return dfa->lex.lasttok = REPMN;
 
         case '|':
           if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
             goto normal_char;
           if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_VBAR) == 0))
             goto normal_char;
-          dfa->lexstate.laststart = true;
-          return dfa->lexstate.lasttok = OR;
+          dfa->lex.laststart = true;
+          return dfa->lex.lasttok = OR;
 
         case '\n':
           if (dfa->syntax.syntax_bits & RE_LIMITED_OPS
               || backslash || !(dfa->syntax.syntax_bits & RE_NEWLINE_ALT))
             goto normal_char;
-          dfa->lexstate.laststart = true;
-          return dfa->lexstate.lasttok = OR;
+          dfa->lex.laststart = true;
+          return dfa->lex.lasttok = OR;
 
         case '(':
           if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0))
             goto normal_char;
-          ++dfa->lexstate.parens;
-          dfa->lexstate.laststart = true;
-          return dfa->lexstate.lasttok = LPAREN;
+          dfa->lex.parens++;
+          dfa->lex.laststart = true;
+          return dfa->lex.lasttok = LPAREN;
 
         case ')':
           if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0))
             goto normal_char;
-          if (dfa->lexstate.parens == 0
+          if (dfa->lex.parens == 0
               && dfa->syntax.syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
             goto normal_char;
-          --dfa->lexstate.parens;
-          dfa->lexstate.laststart = false;
-          return dfa->lexstate.lasttok = RPAREN;
+          dfa->lex.parens--;
+          dfa->lex.laststart = false;
+          return dfa->lex.lasttok = RPAREN;
 
         case '.':
           if (backslash)
@@ -1537,8 +1539,8 @@ lex (struct dfa *dfa)
             {
               /* In multibyte environment period must match with a single
                  character not a byte.  So we use ANYCHAR.  */
-              dfa->lexstate.laststart = false;
-              return dfa->lexstate.lasttok = ANYCHAR;
+              dfa->lex.laststart = false;
+              return dfa->lex.lasttok = ANYCHAR;
             }
           zeroset (ccl);
           notset (ccl);
@@ -1546,8 +1548,8 @@ lex (struct dfa *dfa)
             clrbit ('\n', ccl);
           if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL)
             clrbit ('\0', ccl);
-          dfa->lexstate.laststart = false;
-          return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa, ccl);
+          dfa->lex.laststart = false;
+          return dfa->lex.lasttok = CSET + dfa_charclass_index (dfa, ccl);
 
         case 's':
         case 'S':
@@ -1561,9 +1563,8 @@ lex (struct dfa *dfa)
                   setbit (c2, ccl);
               if (c == 'S')
                 notset (ccl);
-              dfa->lexstate.laststart = false;
-              return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa,
-                                                                         ccl);
+              dfa->lex.laststart = false;
+              return dfa->lex.lasttok = CSET + dfa_charclass_index (dfa, ccl);
             }
 
           /* FIXME: see if optimizing this, as is done with ANYCHAR and
@@ -1572,14 +1573,15 @@ lex (struct dfa *dfa)
           /* \s and \S are documented to be equivalent to [[:space:]] and
              [^[:space:]] respectively, so tell the lexer to process those
              strings, each minus its "already processed" '['.  */
-          PUSH_LEX_STATE (c == 's' ? "[:space:]]" : "^[:space:]]");
-
-          dfa->lexstate.lasttok = parse_bracket_exp (dfa);
-
-          POP_LEX_STATE ();
+          {
+            struct lexptr ls;
+            push_lex_state (dfa, &ls, &"^[:space:]]"[c == 's']);
+            dfa->lex.lasttok = parse_bracket_exp (dfa);
+            pop_lex_state (dfa, &ls);
+          }
 
-          dfa->lexstate.laststart = false;
-          return dfa->lexstate.lasttok;
+          dfa->lex.laststart = false;
+          return dfa->lex.lasttok;
 
         case 'w':
         case 'W':
@@ -1594,9 +1596,8 @@ lex (struct dfa *dfa)
                   setbit (c2, ccl);
               if (c == 'W')
                 notset (ccl);
-              dfa->lexstate.laststart = false;
-              return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa,
-                                                                         ccl);
+              dfa->lex.laststart = false;
+              return dfa->lex.lasttok = CSET + dfa_charclass_index (dfa, ccl);
             }
 
           /* FIXME: see if optimizing this, as is done with ANYCHAR and
@@ -1605,38 +1606,38 @@ lex (struct dfa *dfa)
           /* \w and \W are documented to be equivalent to [_[:alnum:]] and
              [^_[:alnum:]] respectively, so tell the lexer to process those
              strings, each minus its "already processed" '['.  */
-          PUSH_LEX_STATE (c == 'w' ? "_[:alnum:]]" : "^_[:alnum:]]");
-
-          dfa->lexstate.lasttok = parse_bracket_exp (dfa);
-
-          POP_LEX_STATE ();
+          {
+            struct lexptr ls;
+            push_lex_state (dfa, &ls, &"^_[:alnum:]]"[c == 'w']);
+            dfa->lex.lasttok = parse_bracket_exp (dfa);
+            pop_lex_state (dfa, &ls);
+          }
 
-          dfa->lexstate.laststart = false;
-          return dfa->lexstate.lasttok;
+          dfa->lex.laststart = false;
+          return dfa->lex.lasttok;
 
         case '[':
           if (backslash)
             goto normal_char;
-          dfa->lexstate.laststart = false;
-          return dfa->lexstate.lasttok = parse_bracket_exp (dfa);
+          dfa->lex.laststart = false;
+          return dfa->lex.lasttok = parse_bracket_exp (dfa);
 
         default:
         normal_char:
-          dfa->lexstate.laststart = false;
+          dfa->lex.laststart = false;
           /* For multibyte character sets, folding is done in atom.  Always
              return WCHAR.  */
           if (dfa->multibyte)
-            return dfa->lexstate.lasttok = WCHAR;
+            return dfa->lex.lasttok = WCHAR;
 
           if (dfa->syntax.case_fold && isalpha (c))
             {
               zeroset (ccl);
               setbit_case_fold_c (c, ccl);
-              return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa,
-                                                                         ccl);
+              return dfa->lex.lasttok = CSET + dfa_charclass_index (dfa, ccl);
             }
 
-          return dfa->lexstate.lasttok = c;
+          return dfa->lex.lasttok = c;
         }
     }
 
@@ -1670,21 +1671,21 @@ addtok_mb (struct dfa *dfa, token t, int mbprop)
 
     case CAT:
     case OR:
-      --dfa->parsestate.depth;
+      dfa->parse.depth--;
       break;
 
     case BACKREF:
       dfa->fast = false;
       /* fallthrough */
     default:
-      ++dfa->nleaves;
+      dfa->nleaves++;
       /* fallthrough */
     case EMPTY:
-      ++dfa->parsestate.depth;
+      dfa->parse.depth++;
       break;
     }
-  if (dfa->parsestate.depth > dfa->depth)
-    dfa->depth = dfa->parsestate.depth;
+  if (dfa->parse.depth > dfa->depth)
+    dfa->depth = dfa->parse.depth;
 }
 
 static void addtok_wc (struct dfa *dfa, wint_t wc);
@@ -1741,19 +1742,19 @@ addtok_wc (struct dfa *dfa, wint_t wc)
   size_t stored_bytes = wcrtomb ((char *) buf, wc, &s);
 
   if (stored_bytes != (size_t) -1)
-    dfa->lexstate.cur_mb_len = stored_bytes;
+    dfa->lex.cur_mb_len = stored_bytes;
   else
     {
       /* This is merely stop-gap.  buf[0] is undefined, yet skipping
          the addtok_mb call altogether can corrupt the heap.  */
-      dfa->lexstate.cur_mb_len = 1;
+      dfa->lex.cur_mb_len = 1;
       buf[0] = 0;
     }
 
-  addtok_mb (dfa, buf[0], dfa->lexstate.cur_mb_len == 1 ? 3 : 1);
-  for (i = 1; i < dfa->lexstate.cur_mb_len; i++)
+  addtok_mb (dfa, buf[0], dfa->lex.cur_mb_len == 1 ? 3 : 1);
+  for (i = 1; i < dfa->lex.cur_mb_len; i++)
     {
-      addtok_mb (dfa, buf[i], i == dfa->lexstate.cur_mb_len - 1 ? 2 : 0);
+      addtok_mb (dfa, buf[i], i == dfa->lex.cur_mb_len - 1 ? 2 : 0);
       addtok (dfa, CAT);
     }
 }
@@ -1854,18 +1855,18 @@ add_utf8_anychar (struct dfa *dfa)
 static void
 atom (struct dfa *dfa)
 {
-  if (dfa->parsestate.tok == WCHAR)
+  if (dfa->parse.tok == WCHAR)
     {
-      if (dfa->lexstate.wctok == WEOF)
+      if (dfa->lex.wctok == WEOF)
         addtok (dfa, BACKREF);
       else
         {
-          addtok_wc (dfa, dfa->lexstate.wctok);
+          addtok_wc (dfa, dfa->lex.wctok);
 
           if (dfa->syntax.case_fold)
             {
               wchar_t folded[CASE_FOLDED_BUFSIZE];
-              unsigned int i, n = case_folded_counterparts (dfa->lexstate.wctok,
+              unsigned int i, n = case_folded_counterparts (dfa->lex.wctok,
                                                             folded);
               for (i = 0; i < n; i++)
                 {
@@ -1875,9 +1876,9 @@ atom (struct dfa *dfa)
             }
         }
 
-      dfa->parsestate.tok = lex (dfa);
+      dfa->parse.tok = lex (dfa);
     }
-  else if (dfa->parsestate.tok == ANYCHAR && using_utf8)
+  else if (dfa->parse.tok == ANYCHAR && using_utf8)
     {
       /* For UTF-8 expand the period to a series of CSETs that define a valid
          UTF-8 character.  This avoids using the slow multibyte path.  I'm
@@ -1887,26 +1888,25 @@ atom (struct dfa *dfa)
          UTF-8: it is the most used, and the structure of the encoding
          makes the correctness more obvious.  */
       add_utf8_anychar (dfa);
-      dfa->parsestate.tok = lex (dfa);
+      dfa->parse.tok = lex (dfa);
     }
-  else if ((dfa->parsestate.tok >= 0 && dfa->parsestate.tok < NOTCHAR)
-           || dfa->parsestate.tok >= CSET || dfa->parsestate.tok == BACKREF
-           || dfa->parsestate.tok == BEGLINE || dfa->parsestate.tok == ENDLINE
-           || dfa->parsestate.tok == BEGWORD || dfa->parsestate.tok == ANYCHAR
-           || dfa->parsestate.tok == MBCSET || dfa->parsestate.tok == ENDWORD
-           || dfa->parsestate.tok == LIMWORD
-           || dfa->parsestate.tok == NOTLIMWORD)
+  else if ((0 <= dfa->parse.tok && dfa->parse.tok < NOTCHAR)
+           || dfa->parse.tok >= CSET || dfa->parse.tok == BACKREF
+           || dfa->parse.tok == BEGLINE || dfa->parse.tok == ENDLINE
+           || dfa->parse.tok == BEGWORD || dfa->parse.tok == ANYCHAR
+           || dfa->parse.tok == MBCSET || dfa->parse.tok == ENDWORD
+           || dfa->parse.tok == LIMWORD || dfa->parse.tok == NOTLIMWORD)
     {
-      addtok (dfa, dfa->parsestate.tok);
-      dfa->parsestate.tok = lex (dfa);
+      addtok (dfa, dfa->parse.tok);
+      dfa->parse.tok = lex (dfa);
     }
-  else if (dfa->parsestate.tok == LPAREN)
+  else if (dfa->parse.tok == LPAREN)
     {
-      dfa->parsestate.tok = lex (dfa);
+      dfa->parse.tok = lex (dfa);
       regexp (dfa);
-      if (dfa->parsestate.tok != RPAREN)
+      if (dfa->parse.tok != RPAREN)
         dfaerror (_("unbalanced ("));
-      dfa->parsestate.tok = lex (dfa);
+      dfa->parse.tok = lex (dfa);
     }
   else
     addtok (dfa, EMPTY);
@@ -1954,40 +1954,39 @@ closure (struct dfa *dfa)
   size_t tindex, ntokens;
 
   atom (dfa);
-  while (dfa->parsestate.tok == QMARK || dfa->parsestate.tok == STAR
-         || dfa->parsestate.tok == PLUS || dfa->parsestate.tok == REPMN)
-    if (dfa->parsestate.tok == REPMN
-        && (dfa->lexstate.minrep || dfa->lexstate.maxrep))
+  while (dfa->parse.tok == QMARK || dfa->parse.tok == STAR
+         || dfa->parse.tok == PLUS || dfa->parse.tok == REPMN)
+    if (dfa->parse.tok == REPMN && (dfa->lex.minrep || dfa->lex.maxrep))
       {
         ntokens = nsubtoks (dfa, dfa->tindex);
         tindex = dfa->tindex - ntokens;
-        if (dfa->lexstate.maxrep < 0)
+        if (dfa->lex.maxrep < 0)
           addtok (dfa, PLUS);
-        if (dfa->lexstate.minrep == 0)
+        if (dfa->lex.minrep == 0)
           addtok (dfa, QMARK);
-        for (i = 1; i < dfa->lexstate.minrep; ++i)
+        for (i = 1; i < dfa->lex.minrep; i++)
           {
             copytoks (dfa, tindex, ntokens);
             addtok (dfa, CAT);
           }
-        for (; i < dfa->lexstate.maxrep; ++i)
+        for (; i < dfa->lex.maxrep; i++)
           {
             copytoks (dfa, tindex, ntokens);
             addtok (dfa, QMARK);
             addtok (dfa, CAT);
           }
-        dfa->parsestate.tok = lex (dfa);
+        dfa->parse.tok = lex (dfa);
       }
-    else if (dfa->parsestate.tok == REPMN)
+    else if (dfa->parse.tok == REPMN)
       {
         dfa->tindex -= nsubtoks (dfa, dfa->tindex);
-        dfa->parsestate.tok = lex (dfa);
+        dfa->parse.tok = lex (dfa);
         closure (dfa);
       }
     else
       {
-        addtok (dfa, dfa->parsestate.tok);
-        dfa->parsestate.tok = lex (dfa);
+        addtok (dfa, dfa->parse.tok);
+        dfa->parse.tok = lex (dfa);
       }
 }
 
@@ -1995,8 +1994,8 @@ static void
 branch (struct dfa* dfa)
 {
   closure (dfa);
-  while (dfa->parsestate.tok != RPAREN && dfa->parsestate.tok != OR
-         && dfa->parsestate.tok >= 0)
+  while (dfa->parse.tok != RPAREN && dfa->parse.tok != OR
+         && dfa->parse.tok >= 0)
     {
       closure (dfa);
       addtok (dfa, CAT);
@@ -2007,9 +2006,9 @@ static void
 regexp (struct dfa *dfa)
 {
   branch (dfa);
-  while (dfa->parsestate.tok == OR)
+  while (dfa->parse.tok == OR)
     {
-      dfa->parsestate.tok = lex (dfa);
+      dfa->parse.tok = lex (dfa);
       branch (dfa);
       addtok (dfa, OR);
     }
@@ -2021,26 +2020,26 @@ regexp (struct dfa *dfa)
 static void
 dfaparse (char const *s, size_t len, struct dfa *d)
 {
-  d->lexstate.lexptr = s;
-  d->lexstate.lexleft = len;
-  d->lexstate.lasttok = END;
-  d->lexstate.laststart = true;
-  d->lexstate.parens = 0;
+  d->lex.ptr = s;
+  d->lex.left = len;
+  d->lex.lasttok = END;
+  d->lex.laststart = true;
+  d->lex.parens = 0;
   if (d->multibyte)
     {
-      d->lexstate.cur_mb_len = 0;
+      d->lex.cur_mb_len = 0;
       memset (&d->mbs, 0, sizeof d->mbs);
     }
 
   if (!d->syntax.syntax_bits_set)
     dfaerror (_("no syntax specified"));
 
-  d->parsestate.tok = lex (d);
-  d->parsestate.depth = d->depth;
+  d->parse.tok = lex (d);
+  d->parse.depth = d->depth;
 
   regexp (d);
 
-  if (d->parsestate.tok != END)
+  if (d->parse.tok != END)
     dfaerror (_("unbalanced )"));
 
   addtok (d, END - d->nregexps);
@@ -3990,11 +3989,9 @@ dfamust (struct dfa const *d)
   bool exact = false;
   bool begline = false;
   bool endline = false;
-  size_t rj;
   bool need_begline = false;
   bool need_endline = false;
   bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1;
-  struct dfamust *dm;
 
   for (ri = 0; ri < d->tindex; ++ri)
     {
@@ -4171,7 +4168,7 @@ dfamust (struct dfa const *d)
                 }
             }
 
-          rj = ri + 2;
+          size_t rj = ri + 2;
           if (d->tokens[ri + 1] == CAT)
             {
               for (; rj < d->tindex - 1; rj += 2)
@@ -4200,7 +4197,7 @@ dfamust (struct dfa const *d)
     }
  done:;
 
-  dm = NULL;
+  struct dfamust *dm = NULL;
   if (*result)
     {
       dm = xmalloc (sizeof *dm);
@@ -4230,11 +4227,11 @@ dfamustfree (struct dfamust *dm)
 struct dfa *
 dfaalloc (void)
 {
-  struct dfa *d = xcalloc (1, sizeof (struct dfa));
+  struct dfa *d = xzalloc (sizeof *d);
   d->multibyte = MB_CUR_MAX > 1;
   d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb;
   d->fast = !d->multibyte;
-  d->lexstate.cur_mb_len = 1;
+  d->lex.cur_mb_len = 1;
   return d;
 }
 
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 2dc83a60..ce21ba92 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,5 +1,11 @@
 2016-08-25         Arnold D. Robbins     <arnold@skeeve.com>
 
+	* gawktexi.in (POSIX String Comparison): Update for new
+	spec where == and != use strcmp, rest use strcoll. Thanks to
+	Chet Ramey for pointing me at the new rules.
+
+2016-08-25         Arnold D. Robbins     <arnold@skeeve.com>
+
 	* 4.1.4: Release tar ball made.
 
 2016-08-24         Arnold D. Robbins     <arnold@skeeve.com>
diff --git a/doc/gawk.info b/doc/gawk.info
index fd3a5b8a..c39afa4f 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -8666,18 +8666,18 @@ Constant Regexps::, where this is discussed in more detail.
 
 File: gawk.info,  Node: POSIX String Comparison,  Prev: Comparison Operators,  Up: Typing and Comparison
 
-6.3.2.3 String Comparison with POSIX Rules
-..........................................
+6.3.2.3 String Comparison Based on Locale Collating Order
+.........................................................
 
-The POSIX standard says that string comparison is performed based on the
-locale's "collating order".  This is the order in which characters sort,
-as defined by the locale (for more discussion, *note Locales::).  This
-order is usually very different from the results obtained when doing
-straight character-by-character comparison.(1)
+The POSIX standard used to say that all string comparisons are performed
+based on the locale's "collating order".  This is the order in which
+characters sort, as defined by the locale (for more discussion, *note
+Locales::).  This order is usually very different from the results
+obtained when doing straight byte-by-byte comparison.(1)
 
    Because this behavior differs considerably from existing practice,
-'gawk' only implements it when in POSIX mode (*note Options::).  Here is
-an example to illustrate the difference, in an 'en_US.UTF-8' locale:
+'gawk' only implemented it when in POSIX mode (*note Options::).  Here
+is an example to illustrate the difference, in an 'en_US.UTF-8' locale:
 
      $ gawk 'BEGIN { printf("ABC < abc = %s\n",
      >                     ("ABC" < "abc" ? "TRUE" : "FALSE")) }'
@@ -8686,11 +8686,28 @@ an example to illustrate the difference, in an 'en_US.UTF-8' locale:
      >                             ("ABC" < "abc" ? "TRUE" : "FALSE")) }'
      -| ABC < abc = FALSE
 
+   Fortunately, as of August 2016, comparison based on locale collating
+order is no longer required for the '==' and '!=' operators.(2)
+However, comparison based on locales is still required for '<', '<=',
+'>', and '>='.  POSIX thus recommends as follows:
+
+     Since the '==' operator checks whether strings are identical, not
+     whether they collate equally, applications needing to check whether
+     strings collate equally can use:
+
+          a <= b && a >= b
+
+   As of version 4.2, 'gawk' continues to use locale collating order for
+'<', '<=', '>', and '>=' only in POSIX mode.
+
    ---------- Footnotes ----------
 
    (1) Technically, string comparison is supposed to behave the same way
 as if the strings were compared with the C 'strcoll()' function.
 
+   (2) See the Austin Group website
+(http://austingroupbugs.net/view.php?id=1070).
+
 
 File: gawk.info,  Node: Boolean Ops,  Next: Conditional Exp,  Prev: Typing and Comparison,  Up: Truth Values and Conditions
 
@@ -27659,7 +27676,7 @@ ranges, such that outside the '"C"' and '"POSIX"' locales, the meaning
 of range expressions was _undefined_.(3)
 
    By using this lovely technical term, the standard gives license to
-implementors to implement ranges in whatever way they choose.  The
+implementers to implement ranges in whatever way they choose.  The
 'gawk' maintainer chose to apply the pre-POSIX meaning both with the
 default regexp matching and when '--traditional' or '--posix' are used.
 In all cases 'gawk' remains POSIX-compliant.
@@ -35483,401 +35500,402 @@ Node: Variable Typing367063
 Node: Comparison Operators370687
 Ref: table-relational-ops371106
 Node: POSIX String Comparison374601
-Ref: POSIX String Comparison-Footnote-1375675
-Node: Boolean Ops375814
-Ref: Boolean Ops-Footnote-1380296
-Node: Conditional Exp380388
-Node: Function Calls382124
-Node: Precedence386001
-Node: Locales389660
-Node: Expressions Summary391292
-Node: Patterns and Actions393865
-Node: Pattern Overview394985
-Node: Regexp Patterns396662
-Node: Expression Patterns397204
-Node: Ranges400985
-Node: BEGIN/END404093
-Node: Using BEGIN/END404854
-Ref: Using BEGIN/END-Footnote-1407590
-Node: I/O And BEGIN/END407696
-Node: BEGINFILE/ENDFILE410010
-Node: Empty412917
-Node: Using Shell Variables413234
-Node: Action Overview415508
-Node: Statements417833
-Node: If Statement419681
-Node: While Statement421176
-Node: Do Statement423204
-Node: For Statement424352
-Node: Switch Statement427510
-Node: Break Statement429896
-Node: Continue Statement431988
-Node: Next Statement433815
-Node: Nextfile Statement436198
-Node: Exit Statement438850
-Node: Built-in Variables441253
-Node: User-modified442386
-Node: Auto-set449972
-Ref: Auto-set-Footnote-1464625
-Ref: Auto-set-Footnote-2464831
-Node: ARGC and ARGV464887
-Node: Pattern Action Summary469100
-Node: Arrays471530
-Node: Array Basics472859
-Node: Array Intro473703
-Ref: figure-array-elements475678
-Ref: Array Intro-Footnote-1478382
-Node: Reference to Elements478510
-Node: Assigning Elements480974
-Node: Array Example481465
-Node: Scanning an Array483224
-Node: Controlling Scanning486246
-Ref: Controlling Scanning-Footnote-1491645
-Node: Numeric Array Subscripts491961
-Node: Uninitialized Subscripts494145
-Node: Delete495764
-Ref: Delete-Footnote-1498516
-Node: Multidimensional498573
-Node: Multiscanning501668
-Node: Arrays of Arrays503259
-Node: Arrays Summary508026
-Node: Functions510119
-Node: Built-in511157
-Node: Calling Built-in512238
-Node: Numeric Functions514234
-Ref: Numeric Functions-Footnote-1519067
-Ref: Numeric Functions-Footnote-2519424
-Ref: Numeric Functions-Footnote-3519472
-Node: String Functions519744
-Ref: String Functions-Footnote-1543248
-Ref: String Functions-Footnote-2543376
-Ref: String Functions-Footnote-3543624
-Node: Gory Details543711
-Ref: table-sub-escapes545502
-Ref: table-sub-proposed547021
-Ref: table-posix-sub548384
-Ref: table-gensub-escapes549925
-Ref: Gory Details-Footnote-1550748
-Node: I/O Functions550902
-Ref: table-system-return-values557484
-Ref: I/O Functions-Footnote-1559464
-Ref: I/O Functions-Footnote-2559612
-Node: Time Functions559732
-Ref: Time Functions-Footnote-1570237
-Ref: Time Functions-Footnote-2570305
-Ref: Time Functions-Footnote-3570463
-Ref: Time Functions-Footnote-4570574
-Ref: Time Functions-Footnote-5570686
-Ref: Time Functions-Footnote-6570913
-Node: Bitwise Functions571179
-Ref: table-bitwise-ops571773
-Ref: Bitwise Functions-Footnote-1576111
-Node: Type Functions576284
-Node: I18N Functions578945
-Node: User-defined580596
-Node: Definition Syntax581401
-Ref: Definition Syntax-Footnote-1587088
-Node: Function Example587159
-Ref: Function Example-Footnote-1590081
-Node: Function Caveats590103
-Node: Calling A Function590621
-Node: Variable Scope591579
-Node: Pass By Value/Reference594573
-Node: Return Statement598072
-Node: Dynamic Typing601051
-Node: Indirect Calls601981
-Ref: Indirect Calls-Footnote-1612232
-Node: Functions Summary612360
-Node: Library Functions615065
-Ref: Library Functions-Footnote-1618672
-Ref: Library Functions-Footnote-2618815
-Node: Library Names618986
-Ref: Library Names-Footnote-1622446
-Ref: Library Names-Footnote-2622669
-Node: General Functions622755
-Node: Strtonum Function623858
-Node: Assert Function626880
-Node: Round Function630206
-Node: Cliff Random Function631747
-Node: Ordinal Functions632763
-Ref: Ordinal Functions-Footnote-1635826
-Ref: Ordinal Functions-Footnote-2636078
-Node: Join Function636288
-Ref: Join Function-Footnote-1638058
-Node: Getlocaltime Function638258
-Node: Readfile Function642000
-Node: Shell Quoting643972
-Node: Data File Management645373
-Node: Filetrans Function646005
-Node: Rewind Function650101
-Node: File Checking652007
-Ref: File Checking-Footnote-1653341
-Node: Empty Files653542
-Node: Ignoring Assigns655521
-Node: Getopt Function657071
-Ref: Getopt Function-Footnote-1668540
-Node: Passwd Functions668740
-Ref: Passwd Functions-Footnote-1677579
-Node: Group Functions677667
-Ref: Group Functions-Footnote-1685564
-Node: Walking Arrays685771
-Node: Library Functions Summary688779
-Node: Library Exercises690185
-Node: Sample Programs690650
-Node: Running Examples691420
-Node: Clones692148
-Node: Cut Program693372
-Node: Egrep Program703301
-Ref: Egrep Program-Footnote-1710813
-Node: Id Program710923
-Node: Split Program714603
-Ref: Split Program-Footnote-1718062
-Node: Tee Program718191
-Node: Uniq Program720981
-Node: Wc Program728407
-Ref: Wc Program-Footnote-1732662
-Node: Miscellaneous Programs732756
-Node: Dupword Program733969
-Node: Alarm Program735999
-Node: Translate Program740854
-Ref: Translate Program-Footnote-1745419
-Node: Labels Program745689
-Ref: Labels Program-Footnote-1749040
-Node: Word Sorting749124
-Node: History Sorting753196
-Node: Extract Program755031
-Node: Simple Sed762560
-Node: Igawk Program765634
-Ref: Igawk Program-Footnote-1779965
-Ref: Igawk Program-Footnote-2780167
-Ref: Igawk Program-Footnote-3780289
-Node: Anagram Program780404
-Node: Signature Program783466
-Node: Programs Summary784713
-Node: Programs Exercises785927
-Ref: Programs Exercises-Footnote-1790056
-Node: Advanced Features790147
-Node: Nondecimal Data792137
-Node: Array Sorting793728
-Node: Controlling Array Traversal794428
-Ref: Controlling Array Traversal-Footnote-1802795
-Node: Array Sorting Functions802913
-Ref: Array Sorting Functions-Footnote-1808004
-Node: Two-way I/O808200
-Ref: Two-way I/O-Footnote-1814750
-Ref: Two-way I/O-Footnote-2814937
-Node: TCP/IP Networking815019
-Node: Profiling818137
-Ref: Profiling-Footnote-1826630
-Node: Advanced Features Summary826953
-Node: Internationalization828797
-Node: I18N and L10N830277
-Node: Explaining gettext830964
-Ref: Explaining gettext-Footnote-1836856
-Ref: Explaining gettext-Footnote-2837041
-Node: Programmer i18n837206
-Ref: Programmer i18n-Footnote-1842061
-Node: Translator i18n842110
-Node: String Extraction842904
-Ref: String Extraction-Footnote-1844036
-Node: Printf Ordering844122
-Ref: Printf Ordering-Footnote-1846908
-Node: I18N Portability846972
-Ref: I18N Portability-Footnote-1849428
-Node: I18N Example849491
-Ref: I18N Example-Footnote-1852297
-Node: Gawk I18N852370
-Node: I18N Summary853015
-Node: Debugger854356
-Node: Debugging855378
-Node: Debugging Concepts855819
-Node: Debugging Terms857628
-Node: Awk Debugging860203
-Node: Sample Debugging Session861109
-Node: Debugger Invocation861643
-Node: Finding The Bug863029
-Node: List of Debugger Commands869507
-Node: Breakpoint Control870840
-Node: Debugger Execution Control874534
-Node: Viewing And Changing Data877896
-Node: Execution Stack881270
-Node: Debugger Info882907
-Node: Miscellaneous Debugger Commands886978
-Node: Readline Support892066
-Node: Limitations892962
-Ref: Limitations-Footnote-1897193
-Node: Debugging Summary897244
-Node: Arbitrary Precision Arithmetic898523
-Node: Computer Arithmetic899939
-Ref: table-numeric-ranges903530
-Ref: Computer Arithmetic-Footnote-1904252
-Node: Math Definitions904309
-Ref: table-ieee-formats907623
-Ref: Math Definitions-Footnote-1908226
-Node: MPFR features908331
-Node: FP Math Caution910048
-Ref: FP Math Caution-Footnote-1911120
-Node: Inexactness of computations911489
-Node: Inexact representation912449
-Node: Comparing FP Values913809
-Node: Errors accumulate914891
-Node: Getting Accuracy916324
-Node: Try To Round919034
-Node: Setting precision919933
-Ref: table-predefined-precision-strings920630
-Node: Setting the rounding mode922460
-Ref: table-gawk-rounding-modes922834
-Ref: Setting the rounding mode-Footnote-1926242
-Node: Arbitrary Precision Integers926421
-Ref: Arbitrary Precision Integers-Footnote-1931338
-Node: POSIX Floating Point Problems931487
-Ref: POSIX Floating Point Problems-Footnote-1935369
-Node: Floating point summary935407
-Node: Dynamic Extensions937597
-Node: Extension Intro939150
-Node: Plugin License940416
-Node: Extension Mechanism Outline941213
-Ref: figure-load-extension941652
-Ref: figure-register-new-function943217
-Ref: figure-call-new-function944309
-Node: Extension API Description946371
-Node: Extension API Functions Introduction947903
-Node: General Data Types952762
-Ref: General Data Types-Footnote-1958717
-Node: Memory Allocation Functions959016
-Ref: Memory Allocation Functions-Footnote-1961861
-Node: Constructor Functions961960
-Node: Registration Functions963705
-Node: Extension Functions964390
-Node: Exit Callback Functions967013
-Node: Extension Version String968263
-Node: Input Parsers968926
-Node: Output Wrappers978808
-Node: Two-way processors983320
-Node: Printing Messages985585
-Ref: Printing Messages-Footnote-1986756
-Node: Updating ERRNO986909
-Node: Requesting Values987648
-Ref: table-value-types-returned988385
-Node: Accessing Parameters989268
-Node: Symbol Table Access990503
-Node: Symbol table by name991015
-Node: Symbol table by cookie993036
-Ref: Symbol table by cookie-Footnote-1997188
-Node: Cached values997252
-Ref: Cached values-Footnote-11000759
-Node: Array Manipulation1000850
-Ref: Array Manipulation-Footnote-11001941
-Node: Array Data Types1001978
-Ref: Array Data Types-Footnote-11004636
-Node: Array Functions1004728
-Node: Flattening Arrays1008586
-Node: Creating Arrays1015494
-Node: Redirection API1020263
-Node: Extension API Variables1023094
-Node: Extension Versioning1023727
-Ref: gawk-api-version1024164
-Node: Extension API Informational Variables1025920
-Node: Extension API Boilerplate1026984
-Node: Finding Extensions1030798
-Node: Extension Example1031357
-Node: Internal File Description1032155
-Node: Internal File Ops1036235
-Ref: Internal File Ops-Footnote-11047997
-Node: Using Internal File Ops1048137
-Ref: Using Internal File Ops-Footnote-11050520
-Node: Extension Samples1050794
-Node: Extension Sample File Functions1052323
-Node: Extension Sample Fnmatch1059972
-Node: Extension Sample Fork1061459
-Node: Extension Sample Inplace1062677
-Node: Extension Sample Ord1065887
-Node: Extension Sample Readdir1066723
-Ref: table-readdir-file-types1067612
-Node: Extension Sample Revout1068417
-Node: Extension Sample Rev2way1069006
-Node: Extension Sample Read write array1069746
-Node: Extension Sample Readfile1071688
-Node: Extension Sample Time1072783
-Node: Extension Sample API Tests1074131
-Node: gawkextlib1074623
-Node: Extension summary1077070
-Node: Extension Exercises1080772
-Node: Language History1082270
-Node: V7/SVR3.11083926
-Node: SVR41086078
-Node: POSIX1087512
-Node: BTL1088891
-Node: POSIX/GNU1089620
-Node: Feature History1095482
-Node: Common Extensions1109852
-Node: Ranges and Locales1111135
-Ref: Ranges and Locales-Footnote-11115751
-Ref: Ranges and Locales-Footnote-21115778
-Ref: Ranges and Locales-Footnote-31116013
-Node: Contributors1116234
-Node: History summary1121794
-Node: Installation1123174
-Node: Gawk Distribution1124118
-Node: Getting1124602
-Node: Extracting1125563
-Node: Distribution contents1127201
-Node: Unix Installation1133295
-Node: Quick Installation1133977
-Node: Shell Startup Files1136391
-Node: Additional Configuration Options1137469
-Node: Configuration Philosophy1139274
-Node: Non-Unix Installation1141643
-Node: PC Installation1142101
-Node: PC Binary Installation1143421
-Node: PC Compiling1145273
-Ref: PC Compiling-Footnote-11148067
-Node: PC Testing1148176
-Node: PC Using1149356
-Ref: PC Using-Footnote-11153509
-Node: Cygwin1153582
-Node: MSYS1154352
-Node: VMS Installation1154853
-Node: VMS Compilation1155644
-Ref: VMS Compilation-Footnote-11156873
-Node: VMS Dynamic Extensions1156931
-Node: VMS Installation Details1158616
-Node: VMS Running1160869
-Node: VMS GNV1165148
-Node: VMS Old Gawk1165883
-Node: Bugs1166354
-Node: Other Versions1170669
-Node: Installation summary1177253
-Node: Notes1178304
-Node: Compatibility Mode1179169
-Node: Additions1179951
-Node: Accessing The Source1180876
-Node: Adding Code1182311
-Node: New Ports1188530
-Node: Derived Files1193018
-Ref: Derived Files-Footnote-11198503
-Ref: Derived Files-Footnote-21198538
-Ref: Derived Files-Footnote-31199136
-Node: Future Extensions1199250
-Node: Implementation Limitations1199908
-Node: Extension Design1201091
-Node: Old Extension Problems1202245
-Ref: Old Extension Problems-Footnote-11203763
-Node: Extension New Mechanism Goals1203820
-Ref: Extension New Mechanism Goals-Footnote-11207184
-Node: Extension Other Design Decisions1207373
-Node: Extension Future Growth1209486
-Node: Old Extension Mechanism1210322
-Node: Notes summary1212085
-Node: Basic Concepts1213267
-Node: Basic High Level1213948
-Ref: figure-general-flow1214230
-Ref: figure-process-flow1214915
-Ref: Basic High Level-Footnote-11218216
-Node: Basic Data Typing1218401
-Node: Glossary1221729
-Node: Copying1253676
-Node: GNU Free Documentation License1291215
-Node: Index1316333
+Ref: POSIX String Comparison-Footnote-1376296
+Ref: POSIX String Comparison-Footnote-2376435
+Node: Boolean Ops376519
+Ref: Boolean Ops-Footnote-1381001
+Node: Conditional Exp381093
+Node: Function Calls382829
+Node: Precedence386706
+Node: Locales390365
+Node: Expressions Summary391997
+Node: Patterns and Actions394570
+Node: Pattern Overview395690
+Node: Regexp Patterns397367
+Node: Expression Patterns397909
+Node: Ranges401690
+Node: BEGIN/END404798
+Node: Using BEGIN/END405559
+Ref: Using BEGIN/END-Footnote-1408295
+Node: I/O And BEGIN/END408401
+Node: BEGINFILE/ENDFILE410715
+Node: Empty413622
+Node: Using Shell Variables413939
+Node: Action Overview416213
+Node: Statements418538
+Node: If Statement420386
+Node: While Statement421881
+Node: Do Statement423909
+Node: For Statement425057
+Node: Switch Statement428215
+Node: Break Statement430601
+Node: Continue Statement432693
+Node: Next Statement434520
+Node: Nextfile Statement436903
+Node: Exit Statement439555
+Node: Built-in Variables441958
+Node: User-modified443091
+Node: Auto-set450677
+Ref: Auto-set-Footnote-1465330
+Ref: Auto-set-Footnote-2465536
+Node: ARGC and ARGV465592
+Node: Pattern Action Summary469805
+Node: Arrays472235
+Node: Array Basics473564
+Node: Array Intro474408
+Ref: figure-array-elements476383
+Ref: Array Intro-Footnote-1479087
+Node: Reference to Elements479215
+Node: Assigning Elements481679
+Node: Array Example482170
+Node: Scanning an Array483929
+Node: Controlling Scanning486951
+Ref: Controlling Scanning-Footnote-1492350
+Node: Numeric Array Subscripts492666
+Node: Uninitialized Subscripts494850
+Node: Delete496469
+Ref: Delete-Footnote-1499221
+Node: Multidimensional499278
+Node: Multiscanning502373
+Node: Arrays of Arrays503964
+Node: Arrays Summary508731
+Node: Functions510824
+Node: Built-in511862
+Node: Calling Built-in512943
+Node: Numeric Functions514939
+Ref: Numeric Functions-Footnote-1519772
+Ref: Numeric Functions-Footnote-2520129
+Ref: Numeric Functions-Footnote-3520177
+Node: String Functions520449
+Ref: String Functions-Footnote-1543953
+Ref: String Functions-Footnote-2544081
+Ref: String Functions-Footnote-3544329
+Node: Gory Details544416
+Ref: table-sub-escapes546207
+Ref: table-sub-proposed547726
+Ref: table-posix-sub549089
+Ref: table-gensub-escapes550630
+Ref: Gory Details-Footnote-1551453
+Node: I/O Functions551607
+Ref: table-system-return-values558189
+Ref: I/O Functions-Footnote-1560169
+Ref: I/O Functions-Footnote-2560317
+Node: Time Functions560437
+Ref: Time Functions-Footnote-1570942
+Ref: Time Functions-Footnote-2571010
+Ref: Time Functions-Footnote-3571168
+Ref: Time Functions-Footnote-4571279
+Ref: Time Functions-Footnote-5571391
+Ref: Time Functions-Footnote-6571618
+Node: Bitwise Functions571884
+Ref: table-bitwise-ops572478
+Ref: Bitwise Functions-Footnote-1576816
+Node: Type Functions576989
+Node: I18N Functions579650
+Node: User-defined581301
+Node: Definition Syntax582106
+Ref: Definition Syntax-Footnote-1587793
+Node: Function Example587864
+Ref: Function Example-Footnote-1590786
+Node: Function Caveats590808
+Node: Calling A Function591326
+Node: Variable Scope592284
+Node: Pass By Value/Reference595278
+Node: Return Statement598777
+Node: Dynamic Typing601756
+Node: Indirect Calls602686
+Ref: Indirect Calls-Footnote-1612937
+Node: Functions Summary613065
+Node: Library Functions615770
+Ref: Library Functions-Footnote-1619377
+Ref: Library Functions-Footnote-2619520
+Node: Library Names619691
+Ref: Library Names-Footnote-1623151
+Ref: Library Names-Footnote-2623374
+Node: General Functions623460
+Node: Strtonum Function624563
+Node: Assert Function627585
+Node: Round Function630911
+Node: Cliff Random Function632452
+Node: Ordinal Functions633468
+Ref: Ordinal Functions-Footnote-1636531
+Ref: Ordinal Functions-Footnote-2636783
+Node: Join Function636993
+Ref: Join Function-Footnote-1638763
+Node: Getlocaltime Function638963
+Node: Readfile Function642705
+Node: Shell Quoting644677
+Node: Data File Management646078
+Node: Filetrans Function646710
+Node: Rewind Function650806
+Node: File Checking652712
+Ref: File Checking-Footnote-1654046
+Node: Empty Files654247
+Node: Ignoring Assigns656226
+Node: Getopt Function657776
+Ref: Getopt Function-Footnote-1669245
+Node: Passwd Functions669445
+Ref: Passwd Functions-Footnote-1678284
+Node: Group Functions678372
+Ref: Group Functions-Footnote-1686269
+Node: Walking Arrays686476
+Node: Library Functions Summary689484
+Node: Library Exercises690890
+Node: Sample Programs691355
+Node: Running Examples692125
+Node: Clones692853
+Node: Cut Program694077
+Node: Egrep Program704006
+Ref: Egrep Program-Footnote-1711518
+Node: Id Program711628
+Node: Split Program715308
+Ref: Split Program-Footnote-1718767
+Node: Tee Program718896
+Node: Uniq Program721686
+Node: Wc Program729112
+Ref: Wc Program-Footnote-1733367
+Node: Miscellaneous Programs733461
+Node: Dupword Program734674
+Node: Alarm Program736704
+Node: Translate Program741559
+Ref: Translate Program-Footnote-1746124
+Node: Labels Program746394
+Ref: Labels Program-Footnote-1749745
+Node: Word Sorting749829
+Node: History Sorting753901
+Node: Extract Program755736
+Node: Simple Sed763265
+Node: Igawk Program766339
+Ref: Igawk Program-Footnote-1780670
+Ref: Igawk Program-Footnote-2780872
+Ref: Igawk Program-Footnote-3780994
+Node: Anagram Program781109
+Node: Signature Program784171
+Node: Programs Summary785418
+Node: Programs Exercises786632
+Ref: Programs Exercises-Footnote-1790761
+Node: Advanced Features790852
+Node: Nondecimal Data792842
+Node: Array Sorting794433
+Node: Controlling Array Traversal795133
+Ref: Controlling Array Traversal-Footnote-1803500
+Node: Array Sorting Functions803618
+Ref: Array Sorting Functions-Footnote-1808709
+Node: Two-way I/O808905
+Ref: Two-way I/O-Footnote-1815455
+Ref: Two-way I/O-Footnote-2815642
+Node: TCP/IP Networking815724
+Node: Profiling818842
+Ref: Profiling-Footnote-1827335
+Node: Advanced Features Summary827658
+Node: Internationalization829502
+Node: I18N and L10N830982
+Node: Explaining gettext831669
+Ref: Explaining gettext-Footnote-1837561
+Ref: Explaining gettext-Footnote-2837746
+Node: Programmer i18n837911
+Ref: Programmer i18n-Footnote-1842766
+Node: Translator i18n842815
+Node: String Extraction843609
+Ref: String Extraction-Footnote-1844741
+Node: Printf Ordering844827
+Ref: Printf Ordering-Footnote-1847613
+Node: I18N Portability847677
+Ref: I18N Portability-Footnote-1850133
+Node: I18N Example850196
+Ref: I18N Example-Footnote-1853002
+Node: Gawk I18N853075
+Node: I18N Summary853720
+Node: Debugger855061
+Node: Debugging856083
+Node: Debugging Concepts856524
+Node: Debugging Terms858333
+Node: Awk Debugging860908
+Node: Sample Debugging Session861814
+Node: Debugger Invocation862348
+Node: Finding The Bug863734
+Node: List of Debugger Commands870212
+Node: Breakpoint Control871545
+Node: Debugger Execution Control875239
+Node: Viewing And Changing Data878601
+Node: Execution Stack881975
+Node: Debugger Info883612
+Node: Miscellaneous Debugger Commands887683
+Node: Readline Support892771
+Node: Limitations893667
+Ref: Limitations-Footnote-1897898
+Node: Debugging Summary897949
+Node: Arbitrary Precision Arithmetic899228
+Node: Computer Arithmetic900644
+Ref: table-numeric-ranges904235
+Ref: Computer Arithmetic-Footnote-1904957
+Node: Math Definitions905014
+Ref: table-ieee-formats908328
+Ref: Math Definitions-Footnote-1908931
+Node: MPFR features909036
+Node: FP Math Caution910753
+Ref: FP Math Caution-Footnote-1911825
+Node: Inexactness of computations912194
+Node: Inexact representation913154
+Node: Comparing FP Values914514
+Node: Errors accumulate915596
+Node: Getting Accuracy917029
+Node: Try To Round919739
+Node: Setting precision920638
+Ref: table-predefined-precision-strings921335
+Node: Setting the rounding mode923165
+Ref: table-gawk-rounding-modes923539
+Ref: Setting the rounding mode-Footnote-1926947
+Node: Arbitrary Precision Integers927126
+Ref: Arbitrary Precision Integers-Footnote-1932043
+Node: POSIX Floating Point Problems932192
+Ref: POSIX Floating Point Problems-Footnote-1936074
+Node: Floating point summary936112
+Node: Dynamic Extensions938302
+Node: Extension Intro939855
+Node: Plugin License941121
+Node: Extension Mechanism Outline941918
+Ref: figure-load-extension942357
+Ref: figure-register-new-function943922
+Ref: figure-call-new-function945014
+Node: Extension API Description947076
+Node: Extension API Functions Introduction948608
+Node: General Data Types953467
+Ref: General Data Types-Footnote-1959422
+Node: Memory Allocation Functions959721
+Ref: Memory Allocation Functions-Footnote-1962566
+Node: Constructor Functions962665
+Node: Registration Functions964410
+Node: Extension Functions965095
+Node: Exit Callback Functions967718
+Node: Extension Version String968968
+Node: Input Parsers969631
+Node: Output Wrappers979513
+Node: Two-way processors984025
+Node: Printing Messages986290
+Ref: Printing Messages-Footnote-1987461
+Node: Updating ERRNO987614
+Node: Requesting Values988353
+Ref: table-value-types-returned989090
+Node: Accessing Parameters989973
+Node: Symbol Table Access991208
+Node: Symbol table by name991720
+Node: Symbol table by cookie993741
+Ref: Symbol table by cookie-Footnote-1997893
+Node: Cached values997957
+Ref: Cached values-Footnote-11001464
+Node: Array Manipulation1001555
+Ref: Array Manipulation-Footnote-11002646
+Node: Array Data Types1002683
+Ref: Array Data Types-Footnote-11005341
+Node: Array Functions1005433
+Node: Flattening Arrays1009291
+Node: Creating Arrays1016199
+Node: Redirection API1020968
+Node: Extension API Variables1023799
+Node: Extension Versioning1024432
+Ref: gawk-api-version1024869
+Node: Extension API Informational Variables1026625
+Node: Extension API Boilerplate1027689
+Node: Finding Extensions1031503
+Node: Extension Example1032062
+Node: Internal File Description1032860
+Node: Internal File Ops1036940
+Ref: Internal File Ops-Footnote-11048702
+Node: Using Internal File Ops1048842
+Ref: Using Internal File Ops-Footnote-11051225
+Node: Extension Samples1051499
+Node: Extension Sample File Functions1053028
+Node: Extension Sample Fnmatch1060677
+Node: Extension Sample Fork1062164
+Node: Extension Sample Inplace1063382
+Node: Extension Sample Ord1066592
+Node: Extension Sample Readdir1067428
+Ref: table-readdir-file-types1068317
+Node: Extension Sample Revout1069122
+Node: Extension Sample Rev2way1069711
+Node: Extension Sample Read write array1070451
+Node: Extension Sample Readfile1072393
+Node: Extension Sample Time1073488
+Node: Extension Sample API Tests1074836
+Node: gawkextlib1075328
+Node: Extension summary1077775
+Node: Extension Exercises1081477
+Node: Language History1082975
+Node: V7/SVR3.11084631
+Node: SVR41086783
+Node: POSIX1088217
+Node: BTL1089596
+Node: POSIX/GNU1090325
+Node: Feature History1096187
+Node: Common Extensions1110557
+Node: Ranges and Locales1111840
+Ref: Ranges and Locales-Footnote-11116456
+Ref: Ranges and Locales-Footnote-21116483
+Ref: Ranges and Locales-Footnote-31116718
+Node: Contributors1116939
+Node: History summary1122499
+Node: Installation1123879
+Node: Gawk Distribution1124823
+Node: Getting1125307
+Node: Extracting1126268
+Node: Distribution contents1127906
+Node: Unix Installation1134000
+Node: Quick Installation1134682
+Node: Shell Startup Files1137096
+Node: Additional Configuration Options1138174
+Node: Configuration Philosophy1139979
+Node: Non-Unix Installation1142348
+Node: PC Installation1142806
+Node: PC Binary Installation1144126
+Node: PC Compiling1145978
+Ref: PC Compiling-Footnote-11148772
+Node: PC Testing1148881
+Node: PC Using1150061
+Ref: PC Using-Footnote-11154214
+Node: Cygwin1154287
+Node: MSYS1155057
+Node: VMS Installation1155558
+Node: VMS Compilation1156349
+Ref: VMS Compilation-Footnote-11157578
+Node: VMS Dynamic Extensions1157636
+Node: VMS Installation Details1159321
+Node: VMS Running1161574
+Node: VMS GNV1165853
+Node: VMS Old Gawk1166588
+Node: Bugs1167059
+Node: Other Versions1171374
+Node: Installation summary1177958
+Node: Notes1179009
+Node: Compatibility Mode1179874
+Node: Additions1180656
+Node: Accessing The Source1181581
+Node: Adding Code1183016
+Node: New Ports1189235
+Node: Derived Files1193723
+Ref: Derived Files-Footnote-11199208
+Ref: Derived Files-Footnote-21199243
+Ref: Derived Files-Footnote-31199841
+Node: Future Extensions1199955
+Node: Implementation Limitations1200613
+Node: Extension Design1201796
+Node: Old Extension Problems1202950
+Ref: Old Extension Problems-Footnote-11204468
+Node: Extension New Mechanism Goals1204525
+Ref: Extension New Mechanism Goals-Footnote-11207889
+Node: Extension Other Design Decisions1208078
+Node: Extension Future Growth1210191
+Node: Old Extension Mechanism1211027
+Node: Notes summary1212790
+Node: Basic Concepts1213972
+Node: Basic High Level1214653
+Ref: figure-general-flow1214935
+Ref: figure-process-flow1215620
+Ref: Basic High Level-Footnote-11218921
+Node: Basic Data Typing1219106
+Node: Glossary1222434
+Node: Copying1254381
+Node: GNU Free Documentation License1291920
+Node: Index1317038
 
 End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 91c4893e..60dfe961 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -12646,19 +12646,19 @@ One special place where @code{/foo/} is @emph{not} an abbreviation for
 where this is discussed in more detail.
 
 @node POSIX String Comparison
-@subsubsection String Comparison with POSIX Rules
+@subsubsection String Comparison Based on Locale Collating Order
 
-The POSIX standard says that string comparison is performed based
-on the locale's @dfn{collating order}. This is the order in which
-characters sort, as defined by the locale (for more discussion,
-@pxref{Locales}).  This order is usually very different
-from the results obtained when doing straight character-by-character
-comparison.@footnote{Technically, string comparison is supposed
-to behave the same way as if the strings were compared with the C
-@code{strcoll()} function.}
+The POSIX standard used to say that all string comparisons are
+performed based on the locale's @dfn{collating order}. This
+is the order in which characters sort, as defined by the locale
+(for more discussion, @pxref{Locales}).  This order is usually very
+different from the results obtained when doing straight byte-by-byte
+comparison.@footnote{Technically, string comparison is supposed to behave
+the same way as if the strings were compared with the C @code{strcoll()}
+function.}
 
 Because this behavior differs considerably from existing practice,
-@command{gawk} only implements it when in POSIX mode (@pxref{Options}).
+@command{gawk} only implemented it when in POSIX mode (@pxref{Options}).
 Here is an example to illustrate the difference, in an @code{en_US.UTF-8}
 locale:
 
@@ -12671,6 +12671,26 @@ $ @kbd{gawk --posix 'BEGIN @{ printf("ABC < abc = %s\n",}
 @print{} ABC < abc = FALSE
 @end example
 
+Fortunately, as of August 2016, comparison based on locale
+collating order is no longer required for the @code{==} and @code{!=}
+operators.@footnote{See @uref{http://austingroupbugs.net/view.php?id=1070,
+the Austin Group website}.} However, comparison based on locales is still
+required for @code{<}, @code{<=}, @code{>}, and @code{>=}.  POSIX thus
+recommends as follows:
+
+@quotation
+Since the @code{==} operator checks whether strings are identical,
+not whether they collate equally, applications needing to check whether
+strings collate equally can use:
+
+@example
+a <= b && a >= b
+@end example
+@end quotation
+
+As of @value{PVERSION} 4.2, @command{gawk} continues to use locale
+collating order for @code{<}, @code{<=}, @code{>}, and @code{>=} only
+in POSIX mode.
 
 @node Boolean Ops
 @subsection Boolean Expressions
@@ -37458,7 +37478,7 @@ and
 @uref{http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap09.html#tag_21_09_03_05, its rationale}.}
 
 By using this lovely technical term, the standard gives license
-to implementors to implement ranges in whatever way they choose.
+to implementers to implement ranges in whatever way they choose.
 The @command{gawk} maintainer chose to apply the pre-POSIX meaning
 both with the default regexp matching and when @option{--traditional} or
 @option{--posix} are used.
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index 6d7eceb9..546f7611 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -11965,19 +11965,19 @@ One special place where @code{/foo/} is @emph{not} an abbreviation for
 where this is discussed in more detail.
 
 @node POSIX String Comparison
-@subsubsection String Comparison with POSIX Rules
+@subsubsection String Comparison Based on Locale Collating Order
 
-The POSIX standard says that string comparison is performed based
-on the locale's @dfn{collating order}. This is the order in which
-characters sort, as defined by the locale (for more discussion,
-@pxref{Locales}).  This order is usually very different
-from the results obtained when doing straight character-by-character
-comparison.@footnote{Technically, string comparison is supposed
-to behave the same way as if the strings were compared with the C
-@code{strcoll()} function.}
+The POSIX standard used to say that all string comparisons are
+performed based on the locale's @dfn{collating order}. This
+is the order in which characters sort, as defined by the locale
+(for more discussion, @pxref{Locales}).  This order is usually very
+different from the results obtained when doing straight byte-by-byte
+comparison.@footnote{Technically, string comparison is supposed to behave
+the same way as if the strings were compared with the C @code{strcoll()}
+function.}
 
 Because this behavior differs considerably from existing practice,
-@command{gawk} only implements it when in POSIX mode (@pxref{Options}).
+@command{gawk} only implemented it when in POSIX mode (@pxref{Options}).
 Here is an example to illustrate the difference, in an @code{en_US.UTF-8}
 locale:
 
@@ -11990,6 +11990,26 @@ $ @kbd{gawk --posix 'BEGIN @{ printf("ABC < abc = %s\n",}
 @print{} ABC < abc = FALSE
 @end example
 
+Fortunately, as of August 2016, comparison based on locale
+collating order is no longer required for the @code{==} and @code{!=}
+operators.@footnote{See @uref{http://austingroupbugs.net/view.php?id=1070,
+the Austin Group website}.} However, comparison based on locales is still
+required for @code{<}, @code{<=}, @code{>}, and @code{>=}.  POSIX thus
+recommends as follows:
+
+@quotation
+Since the @code{==} operator checks whether strings are identical,
+not whether they collate equally, applications needing to check whether
+strings collate equally can use:
+
+@example
+a <= b && a >= b
+@end example
+@end quotation
+
+As of @value{PVERSION} 4.2, @command{gawk} continues to use locale
+collating order for @code{<}, @code{<=}, @code{>}, and @code{>=} only
+in POSIX mode.
 
 @node Boolean Ops
 @subsection Boolean Expressions
@@ -36540,7 +36560,7 @@ and
 @uref{http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap09.html#tag_21_09_03_05, its rationale}.}
 
 By using this lovely technical term, the standard gives license
-to implementors to implement ranges in whatever way they choose.
+to implementers to implement ranges in whatever way they choose.
 The @command{gawk} maintainer chose to apply the pre-POSIX meaning
 both with the default regexp matching and when @option{--traditional} or
 @option{--posix} are used.
diff --git a/eval.c b/eval.c
index 6bd854e9..bfe6b3c0 100644
--- a/eval.c
+++ b/eval.c
@@ -575,7 +575,7 @@ posix_compare(NODE *s1, NODE *s2)
 /* cmp_nodes --- compare two nodes, returning negative, 0, positive */
 
 int
-cmp_nodes(NODE *t1, NODE *t2)
+cmp_nodes(NODE *t1, NODE *t2, bool use_strcmp)
 {
 	int ret = 0;
 	size_t len1, len2;
@@ -598,7 +598,7 @@ cmp_nodes(NODE *t1, NODE *t2)
 	if (len1 == 0 || len2 == 0)
 		return ldiff;
 
-	if (do_posix)
+	if (do_posix && ! use_strcmp)
 		return posix_compare(t1, t2);
 
 	l = (ldiff <= 0 ? len1 : len2);
@@ -885,7 +885,7 @@ fmt_index(NODE *n)
 		emalloc(fmt_list, NODE **, fmt_num*sizeof(*fmt_list), "fmt_index");
 	n = force_string(n);
 	while (ix < fmt_hiwater) {
-		if (cmp_nodes(fmt_list[ix], n) == 0)
+		if (cmp_nodes(fmt_list[ix], n, true) == 0)
 			return ix;
 		ix++;
 	}
@@ -1514,10 +1514,15 @@ eval_condition(NODE *t)
 	return boolval(t);
 }
 
+typedef enum {
+	SCALAR_EQ_NEQ,
+	SCALAR_RELATIONAL
+} scalar_cmp_t;
+
 /* cmp_scalars -- compare two nodes on the stack */
 
 static inline int
-cmp_scalars()
+cmp_scalars(scalar_cmp_t comparison_type)
 {
 	NODE *t1, *t2;
 	int di;
@@ -1528,7 +1533,7 @@ cmp_scalars()
 		DEREF(t2);
 		fatal(_("attempt to use array `%s' in a scalar context"), array_vname(t1));
 	}
-	di = cmp_nodes(t1, t2);
+	di = cmp_nodes(t1, t2, comparison_type == SCALAR_EQ_NEQ);
 	DEREF(t1);
 	DEREF(t2);
 	return di;
diff --git a/extension/configure.ac b/extension/configure.ac
index b723a3c1..b5b27d03 100644
--- a/extension/configure.ac
+++ b/extension/configure.ac
@@ -23,7 +23,7 @@ dnl
 
 dnl Process this file with autoconf to produce a configure script.
 
-AC_INIT([GNU Awk Bundled Extensions],[4.1.3],[bug-gawk@gnu.org],[gawk-extensions])
+AC_INIT([GNU Awk Bundled Extensions],[4.1.4],[bug-gawk@gnu.org],[gawk-extensions])
 
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_AUX_DIR([build-aux])
diff --git a/interpret.h b/interpret.h
index 3bb4532e..5467aa87 100644
--- a/interpret.h
+++ b/interpret.h
@@ -444,37 +444,37 @@ uninitialized_scalar:
 			break;
 
 		case Op_equal:
-			r = node_Boolean[cmp_scalars() == 0];
+			r = node_Boolean[cmp_scalars(SCALAR_EQ_NEQ) == 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_notequal:
-			r = node_Boolean[cmp_scalars() != 0];
+			r = node_Boolean[cmp_scalars(SCALAR_EQ_NEQ) != 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_less:
-			r = node_Boolean[cmp_scalars() < 0];
+			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) < 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_greater:
-			r = node_Boolean[cmp_scalars() > 0];
+			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) > 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_leq:
-			r = node_Boolean[cmp_scalars() <= 0];
+			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) <= 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_geq:
-			r = node_Boolean[cmp_scalars() >= 0];
+			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) >= 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
@@ -832,12 +832,11 @@ mod:
 				t2 = TOP_SCALAR();	/* switch expression */
 				t2 = force_string(t2);
 				rp = re_update(m);
-				di = (research(rp, t2->stptr, 0, t2->stlen,
-							avoid_dfa(m, t2->stptr, t2->stlen)) >= 0);
+				di = (research(rp, t2->stptr, 0, t2->stlen, RE_NO_FLAGS) >= 0);
 			} else {
 				t1 = POP_SCALAR();	/* case value */
 				t2 = TOP_SCALAR();	/* switch expression */
-				di = (cmp_nodes(t2, t1) == 0);
+				di = (cmp_nodes(t2, t1, true) == 0);
 				DEREF(t1);
 			}
 
@@ -998,20 +997,7 @@ arrayfor:
 			t1 = *get_field(0, (Func_ptr *) 0);
 match_re:
 			rp = re_update(m);
-			/*
-			 * Any place where research() is called with a last parameter of
-			 * zero, we need to use the avoid_dfa test. This appears here and
-			 * in the code for Op_K_case.
-			 *
-			 * A new or improved dfa that distinguishes beginning/end of
-			 * string from beginning/end of line will allow us to get rid of
-			 * this hack.
-			 *
-			 * The avoid_dfa() function is in re.c; it is not very smart.
-			 */
-
-			di = research(rp, t1->stptr, 0, t1->stlen,
-								avoid_dfa(m, t1->stptr, t1->stlen));
+			di = research(rp, t1->stptr, 0, t1->stlen, RE_NO_FLAGS);
 			di = (di == -1) ^ (op != Op_nomatch);
 			if (op != Op_match_rec) {
 				decr_sp();
diff --git a/re.c b/re.c
index a4a03904..f05cc467 100644
--- a/re.c
+++ b/re.c
@@ -170,7 +170,6 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
 
 	emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
 	memset((char *) rp, 0, sizeof(*rp));
-	rp->dfareg = NULL;
 	rp->pat.allocated = 0;	/* regex will allocate the buffer */
 	emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
 
@@ -223,12 +222,11 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
 	/* gack. this must be done *after* re_compile_pattern */
 	rp->pat.newline_anchor = false; /* don't get \n in middle of string */
 	if (dfa && ! no_dfa) {
-		rp->dfa = true;
 		rp->dfareg = dfaalloc();
 		dfasyntax(rp->dfareg, dfa_syn, ignorecase, '\n');
 		dfacomp(buf, len, rp->dfareg, true);
 	} else
-		rp->dfa = false;
+		rp->dfareg = NULL;
 	rp->has_anchor = has_anchor;
 
 	/* Additional flags that help with RS as regexp. */
@@ -278,26 +276,25 @@ research(Regexp *rp, char *str, int start,
 	 * starts in the middle of a string, so don't bother trying it
 	 * in that case.
 	 */
-	if (rp->dfa && ! no_bol && start == 0) {
-		char save;
-		size_t count = 0;
+	if (rp->dfareg != NULL && ! no_bol && start == 0) {
 		struct dfa *superset = dfasuperset(rp->dfareg);
-		/*
-		 * dfa likes to stick a '\n' right after the matched
-		 * text.  So we just save and restore the character.
-		 */
-		save = str[start+len];
 		if (superset)
 			ret = dfaexec(superset, str+start, str+start+len,
 							true, NULL, NULL);
-		if (ret)
+
+		if (ret && ((! need_start && ! rp->has_anchor)
+				|| (! superset && dfaisfast(rp->dfareg))))
 			ret = dfaexec(rp->dfareg, str+start, str+start+len,
-						true, &count, &try_backref);
-		str[start+len] = save;
+						true, NULL, &try_backref);
 	}
 
 	if (ret) {
-		if (need_start || rp->dfa == false || try_backref) {
+		if (   rp->dfareg == NULL
+			|| start != 0
+			|| no_bol
+			|| need_start
+			|| rp->has_anchor
+			|| try_backref) {
 			/*
 			 * Passing NULL as last arg speeds up search for cases
 			 * where we don't need the start/end info.
@@ -326,7 +323,7 @@ refree(Regexp *rp)
 		free(rp->regs.start);
 	if (rp->regs.end)
 		free(rp->regs.end);
-	if (rp->dfa) {
+	if (rp->dfareg != NULL) {
 		dfafree(rp->dfareg);
 		free(rp->dfareg);
 	}
@@ -363,7 +360,7 @@ re_update(NODE *t)
 		}
 		if (t->re_text != NULL) {
 			/* if contents haven't changed, just return it */
-			if (cmp_nodes(t->re_text, t1) == 0)
+			if (cmp_nodes(t->re_text, t1, true) == 0)
 				return t->re_reg;
 			/* things changed, fall through to recompile */
 			unref(t->re_text);
@@ -429,32 +426,6 @@ resetup()
 	dfa_init();
 }
 
-/* avoid_dfa --- return true if we should not use the DFA matcher */
-
-int
-avoid_dfa(NODE *re, char *str, size_t len)
-{
-	char *end;
-
-	/*
-	 * f = @/.../
-	 * if ("foo" ~ f) ...
-	 *
-	 * This creates a Node_dynregex with NULL re_reg.
-	 */
-	if (re->re_reg == NULL)
-		return false;
-
-	if (! re->re_reg->has_anchor)
-		return false;
-
-	for (end = str + len; str < end; str++)
-		if (*str == '\n')
-			return true;
-
-	return false;
-}
-
 /* reisstring --- return true if the RE match is a simple string match */
 
 int
author	Arnold D. Robbins <arnold@skeeve.com>	2016-08-25 22:14:15 +0300
committer	Arnold D. Robbins <arnold@skeeve.com>	2016-08-25 22:14:15 +0300
commit	b03d089e9b87c4e64bd539a1703e740923a67aa4 (patch)
tree	c7351e0b46c45d282eba64e478c99c0771a055a1
parent	e0dd835cc155c900ca9725a0d36eb0f5a856d9bf (diff)
parent	00682d87a1a1c0535c0fa5adb27867578dc76d49 (diff)
download	egawk-b03d089e9b87c4e64bd539a1703e740923a67aa4.tar.gz egawk-b03d089e9b87c4e64bd539a1703e740923a67aa4.tar.bz2 egawk-b03d089e9b87c4e64bd539a1703e740923a67aa4.zip