diff options
-rw-r--r-- | ChangeLog | 14 | ||||
-rw-r--r-- | Makefile.am | 15 | ||||
-rw-r--r-- | Makefile.in | 15 | ||||
-rw-r--r-- | TODO | 2 | ||||
-rw-r--r-- | dfa.c | 855 | ||||
-rw-r--r-- | dfa.h | 13 | ||||
-rw-r--r-- | doc/ChangeLog | 8 | ||||
-rw-r--r-- | doc/Makefile.am | 5 | ||||
-rw-r--r-- | doc/Makefile.in | 5 | ||||
-rw-r--r-- | doc/awkcard.in | 6 | ||||
-rw-r--r-- | doc/gawk.info | 546 | ||||
-rw-r--r-- | doc/gawk.texi | 12 | ||||
-rw-r--r-- | doc/gawktexi.in | 12 | ||||
-rw-r--r-- | node.c | 2 | ||||
-rw-r--r-- | po/gawk.pot | 26 | ||||
-rw-r--r-- | re.c | 17 |
16 files changed, 822 insertions, 731 deletions
@@ -1,3 +1,17 @@ +2016-08-23 Arnold D. Robbins <arnold@skeeve.com> + + * dfa.h: Sync with grep. API changes. + * dfa.c: Sync with grep. + * re.c (make_regexp): Adjust for API changes, move call to dfasyntax + into stanza that compiles the regex. + (resetup): Call dfa_init. + * node.c (str2wstr): using_utf8 is now called dfa_using_utf8. + + Unrelated: + + * Makefile.am: Quote all uses of $(srcdir) and $(distdir). + (spell): New target. + 2016-08-18 Arnold D. Robbins <arnold@skeeve.com> * dfa.c: Sync with grep. diff --git a/Makefile.am b/Makefile.am index 0bc2143e..dce65018 100644 --- a/Makefile.am +++ b/Makefile.am @@ -185,16 +185,16 @@ check-local: gawk$(EXEEXT) # A little extra clean up when making distributions. # And additional set up for the pc directory. dist-hook: - cd $(distdir)/extension ; rm -f *.o *.so - cd $(srcdir)/pc ; \ + cd "$(distdir)"/extension ; rm -f *.o *.so + cd "$(srcdir)"/pc ; \ chmod u+w config.h ; \ sed -n -f configpk.sed < ../configure.ac > /tmp/tmp.sed ; \ sed -f config.sed < ../configh.in > /tmp/config.tmp ; \ sed -f /tmp/tmp.sed < /tmp/config.tmp > config.h ; \ $(RM) /tmp/tmp.sed /tmp/config.tmp pwd - chmod u+w $(distdir)/pc/config.h - cp $(srcdir)/pc/config.h $(distdir)/pc/config.h + chmod u+w "$(distdir)"/pc/config.h + cp "$(srcdir)"/pc/config.h "$(distdir)"/pc/config.h # Special rules for individual files # Use of awk instead of $(AWK) is deliberate, in case gawk doesn't build @@ -202,14 +202,14 @@ dist-hook: awkgram.c: awkgram.y $(YACC) $(AM_YFLAGS) $(YFLAGS) $< - sed 's/parse error/syntax error/g' < y.tab.c | awk -f $(srcdir)/bisonfix.awk awkgram > $*.c && rm y.tab.c + sed 's/parse error/syntax error/g' < y.tab.c | awk -f "$(srcdir)"/bisonfix.awk awkgram > $*.c && rm y.tab.c if test -f y.tab.h; then \ if cmp -s y.tab.h $*.h; then rm -f y.tab.h; else mv y.tab.h $*.h; fi; \ else :; fi command.c: command.y $(YACC) -p zz $< - sed 's/parse error/syntax error/g' < y.tab.c | awk -f $(srcdir)/bisonfix.awk command > $*.c && rm y.tab.c + sed 's/parse error/syntax error/g' < y.tab.c | awk -f "$(srcdir)"/bisonfix.awk command > $*.c && rm y.tab.c # This is for my development & testing. efence: gawk @@ -227,3 +227,6 @@ valgrind-noleak: cd test; rm -f log.[0-9]*; \ make check VALGRIND="valgrind --leak-check=no --log-file=log.%p"; \ make valgrind-scan + +spell: + cd "$(srcdir)"/doc ; $(MAKE) spell diff --git a/Makefile.in b/Makefile.in index 5585046e..036361cb 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1197,16 +1197,16 @@ check-local: gawk$(EXEEXT) # A little extra clean up when making distributions. # And additional set up for the pc directory. dist-hook: - cd $(distdir)/extension ; rm -f *.o *.so - cd $(srcdir)/pc ; \ + cd "$(distdir)"/extension ; rm -f *.o *.so + cd "$(srcdir)"/pc ; \ chmod u+w config.h ; \ sed -n -f configpk.sed < ../configure.ac > /tmp/tmp.sed ; \ sed -f config.sed < ../configh.in > /tmp/config.tmp ; \ sed -f /tmp/tmp.sed < /tmp/config.tmp > config.h ; \ $(RM) /tmp/tmp.sed /tmp/config.tmp pwd - chmod u+w $(distdir)/pc/config.h - cp $(srcdir)/pc/config.h $(distdir)/pc/config.h + chmod u+w "$(distdir)"/pc/config.h + cp "$(srcdir)"/pc/config.h "$(distdir)"/pc/config.h # Special rules for individual files # Use of awk instead of $(AWK) is deliberate, in case gawk doesn't build @@ -1214,14 +1214,14 @@ dist-hook: awkgram.c: awkgram.y $(YACC) $(AM_YFLAGS) $(YFLAGS) $< - sed 's/parse error/syntax error/g' < y.tab.c | awk -f $(srcdir)/bisonfix.awk awkgram > $*.c && rm y.tab.c + sed 's/parse error/syntax error/g' < y.tab.c | awk -f "$(srcdir)"/bisonfix.awk awkgram > $*.c && rm y.tab.c if test -f y.tab.h; then \ if cmp -s y.tab.h $*.h; then rm -f y.tab.h; else mv y.tab.h $*.h; fi; \ else :; fi command.c: command.y $(YACC) -p zz $< - sed 's/parse error/syntax error/g' < y.tab.c | awk -f $(srcdir)/bisonfix.awk command > $*.c && rm y.tab.c + sed 's/parse error/syntax error/g' < y.tab.c | awk -f "$(srcdir)"/bisonfix.awk command > $*.c && rm y.tab.c # This is for my development & testing. efence: gawk @@ -1240,6 +1240,9 @@ valgrind-noleak: make check VALGRIND="valgrind --leak-check=no --log-file=log.%p"; \ make valgrind-scan +spell: + cd "$(srcdir)"/doc ; $(MAKE) spell + # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: @@ -15,6 +15,8 @@ TODO Minor Cleanups and Code Improvements ------------------------------------ + Use fwrite_unlocked if available. Brings about 7% speedup for output. + API: ??? #if !defined(GAWK) && !defined(GAWK_OMIT_CONVENIENCE_MACROS) @@ -355,14 +355,85 @@ struct mb_char_classes size_t nchars; }; +struct regex_syntax +{ + /* Syntax bits controlling the behavior of the lexical analyzer. */ + reg_syntax_t syntax_bits; + bool syntax_bits_set; + + /* Flag for case-folding letters into sets. */ + bool case_fold; + + /* End-of-line byte in data. */ + unsigned char eolbyte; + + /* Cache of char-context values. */ + int sbit[NOTCHAR]; + + /* If never_trail[B], the byte B cannot be a non-initial byte in a + multibyte character. */ + bool never_trail[NOTCHAR]; + + /* Set of characters considered letters. */ + charclass letters; + + /* Set of characters that are newline. */ + charclass newline; +}; + +/* Lexical analyzer. All the dross that deals with the obnoxious + GNU Regex syntax bits is located here. The poor, suffering + reader is referred to the GNU Regex documentation for the + meaning of the @#%!@#%^!@ syntax bits. */ +struct lexer_state +{ + char const *lexptr; /* Pointer to next input character. */ + size_t lexleft; /* Number of characters remaining. */ + token lasttok; /* Previous token returned; initially END. */ + size_t parens; /* Count of outstanding left parens. */ + int minrep, maxrep; /* Repeat counts for {m,n}. */ + + /* Wide character representation of the current multibyte character, + or WEOF if there was an encoding error. Used only if + MB_CUR_MAX > 1. */ + wint_t wctok; + + /* Length of the multibyte representation of wctok. */ + int cur_mb_len; + + /* We're separated from beginning or (, | only by zero-width characters. */ + bool laststart; +}; + +/* Recursive descent parser for regular expressions. */ + +struct parser_state +{ + token tok; /* Lookahead token. */ + size_t depth; /* Current depth of a hypothetical stack + holding deferred productions. This is + used to determine the depth that will be + required of the real stack later on in + dfaanalyze. */ +}; + /* A compiled regular expression. */ struct dfa { + /* Syntax configuration */ + struct regex_syntax syntax; + /* Fields filled by the scanner. */ charclass *charclasses; /* Array of character sets for CSET tokens. */ size_t cindex; /* Index for adding new charclasses. */ size_t calloc; /* Number of charclasses allocated. */ + /* Scanner state */ + struct lexer_state lexstate; + + /* Parser state */ + struct parser_state parsestate; + /* Fields filled by the parser. */ token *tokens; /* Postfix parse array. */ size_t tindex; /* Index for adding new tokens. */ @@ -478,7 +549,7 @@ struct dfa #define ACCEPTS_IN_CONTEXT(prev, curr, state, dfa) \ SUCCEEDS_IN_CONTEXT ((dfa).states[state].constraint, prev, curr) -static void regexp (void); +static void regexp (struct dfa *dfa); /* A table indexed by byte values that contains the corresponding wide character (if any) for that byte. WEOF means the byte is not a @@ -697,39 +768,6 @@ dfa_charclass_index (struct dfa *d, charclass const s) return i; } -/* A pointer to the current dfa is kept here during parsing. */ -static struct dfa *dfa; - -/* Find the index of charclass S in the current DFA, or allocate a new one. */ -static size_t -charclass_index (charclass const s) -{ - return dfa_charclass_index (dfa, s); -} - -/* Syntax bits controlling the behavior of the lexical analyzer. */ -static reg_syntax_t syntax_bits; -static bool syntax_bits_set; - -/* Flag for case-folding letters into sets. */ -static bool case_fold; - -/* End-of-line byte in data. */ -static unsigned char eolbyte; - -/* Cache of char-context values. */ -static int sbit[NOTCHAR]; - -/* If never_trail[B], the byte B cannot be a non-initial byte in a - multibyte character. */ -static bool never_trail[NOTCHAR]; - -/* Set of characters considered letters. */ -static charclass letters; - -/* Set of characters that are newline. */ -static charclass newline; - static bool unibyte_word_constituent (unsigned char c) { @@ -737,25 +775,29 @@ unibyte_word_constituent (unsigned char c) } static int -char_context (unsigned char c) +char_context (struct dfa const *dfa, unsigned char c) { - if (c == eolbyte) + if (c == dfa->syntax.eolbyte) return CTX_NEWLINE; if (unibyte_word_constituent (c)) return CTX_LETTER; return CTX_NONE; } -/* Entry point to set syntax options. */ -void -dfasyntax (reg_syntax_t bits, bool fold, unsigned char eol) +/* UTF-8 encoding allows some optimizations that we can't otherwise + assume in a multibyte encoding. */ +static bool using_utf8; + +bool +dfa_using_utf8 (void) { - int i; - syntax_bits_set = true; - syntax_bits = bits; - case_fold = fold; - eolbyte = eol; + return using_utf8; +} +static void +init_mbrtowc_cache (void) +{ + int i; for (i = CHAR_MIN; i <= CHAR_MAX; ++i) { char c = i; @@ -763,23 +805,39 @@ dfasyntax (reg_syntax_t bits, bool fold, unsigned char eol) mbstate_t s = { 0 }; wchar_t wc; mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF; + } +} + +/* Entry point to set syntax options. */ +void +dfasyntax (struct dfa *dfa, reg_syntax_t bits, bool fold, unsigned char eol) +{ + int i; + dfa->syntax.syntax_bits_set = true; + dfa->syntax.syntax_bits = bits; + dfa->syntax.case_fold = fold; + dfa->syntax.eolbyte = eol; - /* Now that mbrtowc_cache[uc] is set, use it to calculate sbit. */ - sbit[uc] = char_context (uc); - switch (sbit[uc]) + for (i = CHAR_MIN; i <= CHAR_MAX; ++i) + { + unsigned char uc = i; + + /* Use mbrtowc_cache to calculate sbit. */ + dfa->syntax.sbit[uc] = char_context (dfa, uc); + switch (dfa->syntax.sbit[uc]) { case CTX_LETTER: - setbit (uc, letters); + setbit (uc, dfa->syntax.letters); break; case CTX_NEWLINE: - setbit (uc, newline); + setbit (uc, dfa->syntax.newline); break; } /* POSIX requires that the five bytes in "\n\r./" (including the terminating NUL) cannot occur inside a multibyte character. */ - never_trail[uc] = (using_utf8 () ? (uc & 0xc0) != 0x80 - : strchr ("\n\r./", uc) != NULL); + dfa->syntax.never_trail[uc] = (using_utf8 ? (uc & 0xc0) != 0x80 + : strchr ("\n\r./", uc) != NULL); } } @@ -811,21 +869,21 @@ setbit_case_fold_c (int b, charclass c) setbit (i, c); } +static void check_utf8 (void) +{ + wchar_t wc; + mbstate_t mbs = { 0 }; + using_utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100; +} +static bool unibyte_c; -/* UTF-8 encoding allows some optimizations that we can't otherwise - assume in a multibyte encoding. */ -bool -using_utf8 (void) +static void check_unibyte_c (void) { - static int utf8 = -1; - if (utf8 < 0) - { - wchar_t wc; - mbstate_t mbs = { 0 }; - utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100; - } - return utf8; + char const *locale = setlocale (LC_ALL, NULL); + unibyte_c = (!locale + || STREQ (locale, "C") + || STREQ (locale, "POSIX")); } /* The current locale is known to be a unibyte locale @@ -834,7 +892,7 @@ using_utf8 (void) processed more efficiently. */ static bool -using_simple_locale (void) +using_simple_locale (struct dfa const *dfa) { /* The native character set is known to be compatible with the C locale. The following test isn't perfect, but it's good @@ -852,44 +910,9 @@ using_simple_locale (void) && '}' == 125 && '~' == 126) }; - if (! native_c_charset || dfa->multibyte) - return false; - else - { - static int unibyte_c = -1; - if (unibyte_c < 0) - { - char const *locale = setlocale (LC_ALL, NULL); - unibyte_c = (!locale - || STREQ (locale, "C") - || STREQ (locale, "POSIX")); - } - return unibyte_c; - } + return (!native_c_charset || dfa->multibyte) ? false : unibyte_c; } -/* Lexical analyzer. All the dross that deals with the obnoxious - GNU Regex syntax bits is located here. The poor, suffering - reader is referred to the GNU Regex documentation for the - meaning of the @#%!@#%^!@ syntax bits. */ - -static char const *lexptr; /* Pointer to next input character. */ -static size_t lexleft; /* Number of characters remaining. */ -static token lasttok; /* Previous token returned; initially END. */ -static bool laststart; /* We're separated from beginning or (, - | only by zero-width characters. */ -static size_t parens; /* Count of outstanding left parens. */ -static int minrep, maxrep; /* Repeat counts for {m,n}. */ - -static int cur_mb_len = 1; /* Length of the multibyte representation of - wctok. */ - -static wint_t wctok; /* Wide character representation of the current - multibyte character, or WEOF if there was - an encoding error. Used only if - MB_CUR_MAX > 1. */ - - /* Fetch the next lexical input character. Set C (of type int) to the next input byte, except set C to EOF if the input is a multibyte character of length greater than 1. Set WC (of type wint_t) to the @@ -897,24 +920,25 @@ static wint_t wctok; /* Wide character representation of the current of length 1); otherwise set WC to WEOF. If there is no more input, report EOFERR if EOFERR is not null, and return lasttok = END otherwise. */ -# define FETCH_WC(c, wc, eoferr) \ +# define FETCH_WC(dfa, c, wc, eoferr) \ do { \ - if (! lexleft) \ + if (! dfa->lexstate.lexleft) \ { \ if ((eoferr) != 0) \ dfaerror (eoferr); \ else \ - return lasttok = END; \ + return dfa->lexstate.lasttok = END; \ } \ else \ { \ wint_t _wc; \ - size_t nbytes = mbs_to_wchar (&_wc, lexptr, lexleft, dfa); \ - cur_mb_len = nbytes; \ + size_t nbytes = mbs_to_wchar (&_wc, dfa->lexstate.lexptr, \ + dfa->lexstate.lexleft, dfa); \ + dfa->lexstate.cur_mb_len = nbytes; \ (wc) = _wc; \ - (c) = nbytes == 1 ? to_uchar (*lexptr) : EOF; \ - lexptr += nbytes; \ - lexleft -= nbytes; \ + (c) = nbytes == 1 ? to_uchar (*dfa->lexstate.lexptr) : EOF; \ + dfa->lexstate.lexptr += nbytes; \ + dfa->lexstate.lexleft -= nbytes; \ } \ } while (false) @@ -1011,7 +1035,7 @@ find_pred (const char *str) /* Multibyte character handling sub-routine for lex. Parse a bracket expression and build a struct mb_char_classes. */ static token -parse_bracket_exp (void) +parse_bracket_exp (struct dfa *dfa) { bool invert; int c, c1, c2; @@ -1055,12 +1079,12 @@ parse_bracket_exp (void) work_mbc = NULL; memset (ccl, 0, sizeof ccl); - FETCH_WC (c, wc, _("unbalanced [")); + FETCH_WC (dfa, c, wc, _("unbalanced [")); if (c == '^') { - FETCH_WC (c, wc, _("unbalanced [")); + FETCH_WC (dfa, c, wc, _("unbalanced [")); invert = true; - known_bracket_exp = using_simple_locale (); + known_bracket_exp = using_simple_locale (dfa); } else invert = false; @@ -1077,9 +1101,9 @@ parse_bracket_exp (void) dfa is ever called. */ if (c == '[') { - FETCH_WC (c1, wc1, _("unbalanced [")); + FETCH_WC (dfa, c1, wc1, _("unbalanced [")); - if ((c1 == ':' && (syntax_bits & RE_CHAR_CLASSES)) + if ((c1 == ':' && (dfa->syntax.syntax_bits & RE_CHAR_CLASSES)) || c1 == '.' || c1 == '=') { enum { MAX_BRACKET_STRING_LEN = 32 }; @@ -1087,8 +1111,9 @@ parse_bracket_exp (void) size_t len = 0; for (;;) { - FETCH_WC (c, wc, _("unbalanced [")); - if ((c == c1 && *lexptr == ']') || lexleft == 0) + FETCH_WC (dfa, c, wc, _("unbalanced [")); + if ((c == c1 && *dfa->lexstate.lexptr == ']') + || dfa->lexstate.lexleft == 0) break; if (len < MAX_BRACKET_STRING_LEN) str[len++] = c; @@ -1099,7 +1124,7 @@ parse_bracket_exp (void) str[len] = '\0'; /* Fetch bracket. */ - FETCH_WC (c, wc, _("unbalanced [")); + FETCH_WC (dfa, c, wc, _("unbalanced [")); if (c1 == ':') /* Build character class. POSIX allows character classes to match multicharacter collating elements, @@ -1107,8 +1132,9 @@ parse_bracket_exp (void) worry about that possibility. */ { char const *class - = (case_fold && (STREQ (str, "upper") - || STREQ (str, "lower")) ? "alpha" : str); + = (dfa->syntax.case_fold && (STREQ (str, "upper") + || STREQ (str, "lower")) ? + "alpha" : str); const struct dfa_ctype *pred = find_pred (class); if (!pred) dfaerror (_("invalid character class")); @@ -1126,7 +1152,7 @@ parse_bracket_exp (void) colon_warning_state |= 8; /* Fetch new lookahead character. */ - FETCH_WC (c1, wc1, _("unbalanced [")); + FETCH_WC (dfa, c1, wc1, _("unbalanced [")); continue; } @@ -1134,21 +1160,21 @@ parse_bracket_exp (void) are already set up. */ } - if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) - FETCH_WC (c, wc, _("unbalanced [")); + if (c == '\\' && (dfa->syntax.syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) + FETCH_WC (dfa, c, wc, _("unbalanced [")); if (c1 == NOTCHAR) - FETCH_WC (c1, wc1, _("unbalanced [")); + FETCH_WC (dfa, c1, wc1, _("unbalanced [")); if (c1 == '-') /* build range characters. */ { - FETCH_WC (c2, wc2, _("unbalanced [")); + FETCH_WC (dfa, c2, wc2, _("unbalanced [")); /* A bracket expression like [a-[.aa.]] matches an unknown set. Treat it like [-a[.aa.]] while parsing it, and remember that the set is unknown. */ - if (c2 == '[' && *lexptr == '.') + if (c2 == '[' && *dfa->lexstate.lexptr == '.') { known_bracket_exp = false; c2 = ']'; @@ -1158,28 +1184,29 @@ parse_bracket_exp (void) { /* In the case [x-], the - is an ordinary hyphen, which is left in c1, the lookahead character. */ - lexptr -= cur_mb_len; - lexleft += cur_mb_len; + dfa->lexstate.lexptr -= dfa->lexstate.cur_mb_len; + dfa->lexstate.lexleft += dfa->lexstate.cur_mb_len; } else { - if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) - FETCH_WC (c2, wc2, _("unbalanced [")); + if (c2 == '\\' && (dfa->syntax.syntax_bits + & RE_BACKSLASH_ESCAPE_IN_LISTS)) + FETCH_WC (dfa, c2, wc2, _("unbalanced [")); colon_warning_state |= 8; - FETCH_WC (c1, wc1, _("unbalanced [")); + FETCH_WC (dfa, c1, wc1, _("unbalanced [")); /* Treat [x-y] as a range if x != y. */ if (wc != wc2 || wc == WEOF) { if (dfa->multibyte) known_bracket_exp = false; - else if (using_simple_locale ()) + else if (using_simple_locale (dfa)) { int ci; for (ci = c; ci <= c2; ci++) setbit (ci, ccl); - if (case_fold) + if (dfa->syntax.case_fold) { int uc = toupper (c); int uc2 = toupper (c2); @@ -1203,7 +1230,7 @@ parse_bracket_exp (void) if (!dfa->multibyte) { - if (case_fold) + if (dfa->syntax.case_fold) setbit_case_fold_c (c, ccl); else setbit (c, ccl); @@ -1216,7 +1243,7 @@ parse_bracket_exp (void) { wchar_t folded[CASE_FOLDED_BUFSIZE + 1]; unsigned int i; - unsigned int n = (case_fold + unsigned int n = (dfa->syntax.case_fold ? case_folded_counterparts (wc, folded + 1) + 1 : 1); folded[0] = wc; @@ -1241,7 +1268,7 @@ parse_bracket_exp (void) if (dfa->multibyte) { work_mbc->invert = invert; - work_mbc->cset = emptyset (ccl) ? -1 : charclass_index (ccl); + work_mbc->cset = emptyset (ccl) ? -1 : dfa_charclass_index (dfa, ccl); return MBCSET; } @@ -1249,29 +1276,29 @@ parse_bracket_exp (void) { assert (!dfa->multibyte); notset (ccl); - if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) + if (dfa->syntax.syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) clrbit ('\n', ccl); } - return CSET + charclass_index (ccl); + return CSET + dfa_charclass_index (dfa, ccl); } #define PUSH_LEX_STATE(s) \ do \ { \ - char const *lexptr_saved = lexptr; \ - size_t lexleft_saved = lexleft; \ - lexptr = (s); \ - lexleft = strlen (lexptr) + char const *lexptr_saved = dfa->lexstate.lexptr; \ + size_t lexleft_saved = dfa->lexstate.lexleft; \ + dfa->lexstate.lexptr = (s); \ + dfa->lexstate.lexleft = strlen (dfa->lexstate.lexptr) #define POP_LEX_STATE() \ - lexptr = lexptr_saved; \ - lexleft = lexleft_saved; \ + dfa->lexstate.lexptr = lexptr_saved; \ + dfa->lexstate.lexleft = lexleft_saved; \ } \ while (false) static token -lex (void) +lex (struct dfa *dfa) { int c, c2; bool backslash = false; @@ -1286,14 +1313,14 @@ lex (void) "if (backslash) ...". */ for (i = 0; i < 2; ++i) { - FETCH_WC (c, wctok, NULL); + FETCH_WC (dfa, c, dfa->lexstate.wctok, NULL); switch (c) { case '\\': if (backslash) goto normal_char; - if (lexleft == 0) + if (dfa->lexstate.lexleft == 0) dfaerror (_("unfinished \\ escape")); backslash = true; break; @@ -1301,25 +1328,29 @@ lex (void) case '^': if (backslash) goto normal_char; - if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS - || lasttok == END || lasttok == LPAREN || lasttok == OR) - return lasttok = BEGLINE; + if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS + || dfa->lexstate.lasttok == END || dfa->lexstate.lasttok == LPAREN + || dfa->lexstate.lasttok == OR) + return dfa->lexstate.lasttok = BEGLINE; goto normal_char; case '$': if (backslash) goto normal_char; - if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS - || lexleft == 0 - || (syntax_bits & RE_NO_BK_PARENS - ? lexleft > 0 && *lexptr == ')' - : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == ')') - || (syntax_bits & RE_NO_BK_VBAR - ? lexleft > 0 && *lexptr == '|' - : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == '|') - || ((syntax_bits & RE_NEWLINE_ALT) - && lexleft > 0 && *lexptr == '\n')) - return lasttok = ENDLINE; + if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS + || dfa->lexstate.lexleft == 0 + || (dfa->syntax.syntax_bits & RE_NO_BK_PARENS + ? dfa->lexstate.lexleft > 0 && *dfa->lexstate.lexptr == ')' + : dfa->lexstate.lexleft > 1 && dfa->lexstate.lexptr[0] == '\\' + && dfa->lexstate.lexptr[1] == ')') + || (dfa->syntax.syntax_bits & RE_NO_BK_VBAR + ? dfa->lexstate.lexleft > 0 && *dfa->lexstate.lexptr == '|' + : dfa->lexstate.lexleft > 1 && dfa->lexstate.lexptr[0] == '\\' + && dfa->lexstate.lexptr[1] == '|') + || ((dfa->syntax.syntax_bits & RE_NEWLINE_ALT) + && dfa->lexstate.lexleft > 0 + && *dfa->lexstate.lexptr == '\n')) + return dfa->lexstate.lasttok = ENDLINE; goto normal_char; case '1': @@ -1331,74 +1362,84 @@ lex (void) case '7': case '8': case '9': - if (backslash && !(syntax_bits & RE_NO_BK_REFS)) + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_BK_REFS)) { - laststart = false; - return lasttok = BACKREF; + dfa->lexstate.laststart = false; + return dfa->lexstate.lasttok = BACKREF; } goto normal_char; case '`': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = BEGLINE; /* FIXME: should be beginning of string */ + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) + { + /* FIXME: should be beginning of string */ + return dfa->lexstate.lasttok = BEGLINE; + } goto normal_char; case '\'': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = ENDLINE; /* FIXME: should be end of string */ + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) + { + /* FIXME: should be end of string */ + return dfa->lexstate.lasttok = ENDLINE; + } goto normal_char; case '<': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = BEGWORD; + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) + return dfa->lexstate.lasttok = BEGWORD; goto normal_char; case '>': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = ENDWORD; + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) + return dfa->lexstate.lasttok = ENDWORD; goto normal_char; case 'b': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = LIMWORD; + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) + return dfa->lexstate.lasttok = LIMWORD; goto normal_char; case 'B': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = NOTLIMWORD; + if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) + return dfa->lexstate.lasttok = NOTLIMWORD; goto normal_char; case '?': - if (syntax_bits & RE_LIMITED_OPS) + if (dfa->syntax.syntax_bits & RE_LIMITED_OPS) goto normal_char; - if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0)) + if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0)) goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart) + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) + && dfa->lexstate.laststart) goto normal_char; - return lasttok = QMARK; + return dfa->lexstate.lasttok = QMARK; case '*': if (backslash) goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart) + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) + && dfa->lexstate.laststart) goto normal_char; - return lasttok = STAR; + return dfa->lexstate.lasttok = STAR; case '+': - if (syntax_bits & RE_LIMITED_OPS) + if (dfa->syntax.syntax_bits & RE_LIMITED_OPS) goto normal_char; - if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0)) + if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0)) goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart) + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) + && dfa->lexstate.laststart) goto normal_char; - return lasttok = PLUS; + return dfa->lexstate.lasttok = PLUS; case '{': - if (!(syntax_bits & RE_INTERVALS)) + if (!(dfa->syntax.syntax_bits & RE_INTERVALS)) goto normal_char; - if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0)) + if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_BRACES) == 0)) goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart) + if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS) + && dfa->lexstate.laststart) goto normal_char; /* Cases: @@ -1408,79 +1449,86 @@ lex (void) {,} - 0 to infinity (same as '*') {M,N} - M through N */ { - char const *p = lexptr; - char const *lim = p + lexleft; - minrep = maxrep = -1; + char const *p = dfa->lexstate.lexptr; + char const *lim = p + dfa->lexstate.lexleft; + dfa->lexstate.minrep = dfa->lexstate.maxrep = -1; for (; p != lim && ISASCIIDIGIT (*p); p++) { - if (minrep < 0) - minrep = *p - '0'; + if (dfa->lexstate.minrep < 0) + dfa->lexstate.minrep = *p - '0'; else - minrep = MIN (RE_DUP_MAX + 1, minrep * 10 + *p - '0'); + dfa->lexstate.minrep = MIN (RE_DUP_MAX + 1, + (dfa->lexstate.minrep + * 10 + *p - '0')); } if (p != lim) { if (*p != ',') - maxrep = minrep; + dfa->lexstate.maxrep = dfa->lexstate.minrep; else { - if (minrep < 0) - minrep = 0; + if (dfa->lexstate.minrep < 0) + dfa->lexstate.minrep = 0; while (++p != lim && ISASCIIDIGIT (*p)) { - if (maxrep < 0) - maxrep = *p - '0'; + if (dfa->lexstate.maxrep < 0) + dfa->lexstate.maxrep = *p - '0'; else - maxrep = MIN (RE_DUP_MAX + 1, maxrep * 10 + *p - '0'); + dfa->lexstate.maxrep = MIN (RE_DUP_MAX + 1, + (dfa->lexstate.maxrep + * 10 + *p - '0')); } } } if (! ((! backslash || (p != lim && *p++ == '\\')) && p != lim && *p++ == '}' - && 0 <= minrep && (maxrep < 0 || minrep <= maxrep))) + && 0 <= dfa->lexstate.minrep + && (dfa->lexstate.maxrep < 0 + || dfa->lexstate.minrep <= dfa->lexstate.maxrep))) { - if (syntax_bits & RE_INVALID_INTERVAL_ORD) + if (dfa->syntax.syntax_bits & RE_INVALID_INTERVAL_ORD) goto normal_char; dfaerror (_("invalid content of \\{\\}")); } - if (RE_DUP_MAX < maxrep) + if (RE_DUP_MAX < dfa->lexstate.maxrep) dfaerror (_("regular expression too big")); - lexptr = p; - lexleft = lim - p; + dfa->lexstate.lexptr = p; + dfa->lexstate.lexleft = lim - p; } - laststart = false; - return lasttok = REPMN; + dfa->lexstate.laststart = false; + return dfa->lexstate.lasttok = REPMN; case '|': - if (syntax_bits & RE_LIMITED_OPS) + if (dfa->syntax.syntax_bits & RE_LIMITED_OPS) goto normal_char; - if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0)) + if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_VBAR) == 0)) goto normal_char; - laststart = true; - return lasttok = OR; + dfa->lexstate.laststart = true; + return dfa->lexstate.lasttok = OR; case '\n': - if (syntax_bits & RE_LIMITED_OPS - || backslash || !(syntax_bits & RE_NEWLINE_ALT)) + if (dfa->syntax.syntax_bits & RE_LIMITED_OPS + || backslash || !(dfa->syntax.syntax_bits & RE_NEWLINE_ALT)) goto normal_char; - laststart = true; - return lasttok = OR; + dfa->lexstate.laststart = true; + return dfa->lexstate.lasttok = OR; case '(': - if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0)) + if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0)) goto normal_char; - ++parens; - laststart = true; - return lasttok = LPAREN; + ++dfa->lexstate.parens; + dfa->lexstate.laststart = true; + return dfa->lexstate.lasttok = LPAREN; case ')': - if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0)) + if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0)) goto normal_char; - if (parens == 0 && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD) + if (dfa->lexstate.parens == 0 + && dfa->syntax.syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD) goto normal_char; - --parens; - laststart = false; - return lasttok = RPAREN; + --dfa->lexstate.parens; + dfa->lexstate.laststart = false; + return dfa->lexstate.lasttok = RPAREN; case '.': if (backslash) @@ -1489,21 +1537,21 @@ lex (void) { /* In multibyte environment period must match with a single character not a byte. So we use ANYCHAR. */ - laststart = false; - return lasttok = ANYCHAR; + dfa->lexstate.laststart = false; + return dfa->lexstate.lasttok = ANYCHAR; } zeroset (ccl); notset (ccl); - if (!(syntax_bits & RE_DOT_NEWLINE)) + if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE)) clrbit ('\n', ccl); - if (syntax_bits & RE_DOT_NOT_NULL) + if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL) clrbit ('\0', ccl); - laststart = false; - return lasttok = CSET + charclass_index (ccl); + dfa->lexstate.laststart = false; + return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa, ccl); case 's': case 'S': - if (!backslash || (syntax_bits & RE_NO_GNU_OPS)) + if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) goto normal_char; if (!dfa->multibyte) { @@ -1513,8 +1561,9 @@ lex (void) setbit (c2, ccl); if (c == 'S') notset (ccl); - laststart = false; - return lasttok = CSET + charclass_index (ccl); + dfa->lexstate.laststart = false; + return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa, + ccl); } /* FIXME: see if optimizing this, as is done with ANYCHAR and @@ -1525,16 +1574,16 @@ lex (void) strings, each minus its "already processed" '['. */ PUSH_LEX_STATE (c == 's' ? "[:space:]]" : "^[:space:]]"); - lasttok = parse_bracket_exp (); + dfa->lexstate.lasttok = parse_bracket_exp (dfa); POP_LEX_STATE (); - laststart = false; - return lasttok; + dfa->lexstate.laststart = false; + return dfa->lexstate.lasttok; case 'w': case 'W': - if (!backslash || (syntax_bits & RE_NO_GNU_OPS)) + if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)) goto normal_char; if (!dfa->multibyte) @@ -1545,8 +1594,9 @@ lex (void) setbit (c2, ccl); if (c == 'W') notset (ccl); - laststart = false; - return lasttok = CSET + charclass_index (ccl); + dfa->lexstate.laststart = false; + return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa, + ccl); } /* FIXME: see if optimizing this, as is done with ANYCHAR and @@ -1557,35 +1607,36 @@ lex (void) strings, each minus its "already processed" '['. */ PUSH_LEX_STATE (c == 'w' ? "_[:alnum:]]" : "^_[:alnum:]]"); - lasttok = parse_bracket_exp (); + dfa->lexstate.lasttok = parse_bracket_exp (dfa); POP_LEX_STATE (); - laststart = false; - return lasttok; + dfa->lexstate.laststart = false; + return dfa->lexstate.lasttok; case '[': if (backslash) goto normal_char; - laststart = false; - return lasttok = parse_bracket_exp (); + dfa->lexstate.laststart = false; + return dfa->lexstate.lasttok = parse_bracket_exp (dfa); default: normal_char: - laststart = false; + dfa->lexstate.laststart = false; /* For multibyte character sets, folding is done in atom. Always return WCHAR. */ if (dfa->multibyte) - return lasttok = WCHAR; + return dfa->lexstate.lasttok = WCHAR; - if (case_fold && isalpha (c)) + if (dfa->syntax.case_fold && isalpha (c)) { zeroset (ccl); setbit_case_fold_c (c, ccl); - return lasttok = CSET + charclass_index (ccl); + return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa, + ccl); } - return lasttok = c; + return dfa->lexstate.lasttok = c; } } @@ -1595,17 +1646,8 @@ lex (void) return END; /* keeps pedantic compilers happy. */ } -/* Recursive descent parser for regular expressions. */ - -static token tok; /* Lookahead token. */ -static size_t depth; /* Current depth of a hypothetical stack - holding deferred productions. This is - used to determine the depth that will be - required of the real stack later on in - dfaanalyze. */ - static void -addtok_mb (token t, int mbprop) +addtok_mb (struct dfa *dfa, token t, int mbprop) { if (dfa->talloc == dfa->tindex) { @@ -1628,7 +1670,7 @@ addtok_mb (token t, int mbprop) case CAT: case OR: - --depth; + --dfa->parsestate.depth; break; case BACKREF: @@ -1638,19 +1680,19 @@ addtok_mb (token t, int mbprop) ++dfa->nleaves; /* fallthrough */ case EMPTY: - ++depth; + ++dfa->parsestate.depth; break; } - if (depth > dfa->depth) - dfa->depth = depth; + if (dfa->parsestate.depth > dfa->depth) + dfa->depth = dfa->parsestate.depth; } -static void addtok_wc (wint_t wc); +static void addtok_wc (struct dfa *dfa, wint_t wc); /* Add the given token to the parse tree, maintaining the depth count and updating the maximum depth if necessary. */ static void -addtok (token t) +addtok (struct dfa *dfa, token t) { if (dfa->multibyte && t == MBCSET) { @@ -1662,9 +1704,9 @@ addtok (token t) This does not require UTF-8. */ for (i = 0; i < work_mbc->nchars; i++) { - addtok_wc (work_mbc->chars[i]); + addtok_wc (dfa, work_mbc->chars[i]); if (need_or) - addtok (OR); + addtok (dfa, OR); need_or = true; } work_mbc->nchars = 0; @@ -1673,14 +1715,14 @@ addtok (token t) that the mbcset is empty now. Do nothing in that case. */ if (work_mbc->cset != -1) { - addtok (CSET + work_mbc->cset); + addtok (dfa, CSET + work_mbc->cset); if (need_or) - addtok (OR); + addtok (dfa, OR); } } else { - addtok_mb (t, 3); + addtok_mb (dfa, t, 3); } } @@ -1691,7 +1733,7 @@ addtok (token t) <mb1(1st-byte)><mb1(2nd-byte)><CAT><mb1(3rd-byte)><CAT> <mb2(1st-byte)><mb2(2nd-byte)><CAT><mb2(3rd-byte)><CAT><CAT> */ static void -addtok_wc (wint_t wc) +addtok_wc (struct dfa *dfa, wint_t wc) { unsigned char buf[MB_LEN_MAX]; mbstate_t s = { 0 }; @@ -1699,25 +1741,25 @@ addtok_wc (wint_t wc) size_t stored_bytes = wcrtomb ((char *) buf, wc, &s); if (stored_bytes != (size_t) -1) - cur_mb_len = stored_bytes; + dfa->lexstate.cur_mb_len = stored_bytes; else { /* This is merely stop-gap. buf[0] is undefined, yet skipping the addtok_mb call altogether can corrupt the heap. */ - cur_mb_len = 1; + dfa->lexstate.cur_mb_len = 1; buf[0] = 0; } - addtok_mb (buf[0], cur_mb_len == 1 ? 3 : 1); - for (i = 1; i < cur_mb_len; i++) + addtok_mb (dfa, buf[0], dfa->lexstate.cur_mb_len == 1 ? 3 : 1); + for (i = 1; i < dfa->lexstate.cur_mb_len; i++) { - addtok_mb (buf[i], i == cur_mb_len - 1 ? 2 : 0); - addtok (CAT); + addtok_mb (dfa, buf[i], i == dfa->lexstate.cur_mb_len - 1 ? 2 : 0); + addtok (dfa, CAT); } } static void -add_utf8_anychar (void) +add_utf8_anychar (struct dfa *dfa) { static charclass const utf8_classes[5] = { /* 80-bf: non-leading bytes. */ @@ -1746,12 +1788,12 @@ add_utf8_anychar (void) copyset (utf8_classes[i], c); if (i == 1) { - if (!(syntax_bits & RE_DOT_NEWLINE)) + if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE)) clrbit ('\n', c); - if (syntax_bits & RE_DOT_NOT_NULL) + if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL) clrbit ('\0', c); } - dfa->utf8_anychar_classes[i] = CSET + charclass_index (c); + dfa->utf8_anychar_classes[i] = CSET + dfa_charclass_index (dfa, c); } /* A valid UTF-8 character is @@ -1765,12 +1807,12 @@ add_utf8_anychar (void) and you get "B|(C|(D|EA)A)A". And since the token buffer is in reverse Polish notation, you get "B C D E A CAT OR A CAT OR A CAT OR". */ for (i = 1; i < n; i++) - addtok (dfa->utf8_anychar_classes[i]); + addtok (dfa, dfa->utf8_anychar_classes[i]); while (--i > 1) { - addtok (dfa->utf8_anychar_classes[0]); - addtok (CAT); - addtok (OR); + addtok (dfa, dfa->utf8_anychar_classes[0]); + addtok (dfa, CAT); + addtok (dfa, OR); } } @@ -1810,31 +1852,32 @@ add_utf8_anychar (void) The parser builds a parse tree in postfix form in an array of tokens. */ static void -atom (void) +atom (struct dfa *dfa) { - if (tok == WCHAR) + if (dfa->parsestate.tok == WCHAR) { - if (wctok == WEOF) - addtok (BACKREF); + if (dfa->lexstate.wctok == WEOF) + addtok (dfa, BACKREF); else { - addtok_wc (wctok); + addtok_wc (dfa, dfa->lexstate.wctok); - if (case_fold) + if (dfa->syntax.case_fold) { wchar_t folded[CASE_FOLDED_BUFSIZE]; - unsigned int i, n = case_folded_counterparts (wctok, folded); + unsigned int i, n = case_folded_counterparts (dfa->lexstate.wctok, + folded); for (i = 0; i < n; i++) { - addtok_wc (folded[i]); - addtok (OR); + addtok_wc (dfa, folded[i]); + addtok (dfa, OR); } } } - tok = lex (); + dfa->parsestate.tok = lex (dfa); } - else if (tok == ANYCHAR && using_utf8 ()) + else if (dfa->parsestate.tok == ANYCHAR && using_utf8) { /* For UTF-8 expand the period to a series of CSETs that define a valid UTF-8 character. This avoids using the slow multibyte path. I'm @@ -1843,32 +1886,35 @@ atom (void) it is done above in add_utf8_anychar. So, let's start with UTF-8: it is the most used, and the structure of the encoding makes the correctness more obvious. */ - add_utf8_anychar (); - tok = lex (); + add_utf8_anychar (dfa); + dfa->parsestate.tok = lex (dfa); } - else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF - || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD - || tok == ANYCHAR || tok == MBCSET - || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD) + else if ((dfa->parsestate.tok >= 0 && dfa->parsestate.tok < NOTCHAR) + || dfa->parsestate.tok >= CSET || dfa->parsestate.tok == BACKREF + || dfa->parsestate.tok == BEGLINE || dfa->parsestate.tok == ENDLINE + || dfa->parsestate.tok == BEGWORD || dfa->parsestate.tok == ANYCHAR + || dfa->parsestate.tok == MBCSET || dfa->parsestate.tok == ENDWORD + || dfa->parsestate.tok == LIMWORD + || dfa->parsestate.tok == NOTLIMWORD) { - addtok (tok); - tok = lex (); + addtok (dfa, dfa->parsestate.tok); + dfa->parsestate.tok = lex (dfa); } - else if (tok == LPAREN) + else if (dfa->parsestate.tok == LPAREN) { - tok = lex (); - regexp (); - if (tok != RPAREN) + dfa->parsestate.tok = lex (dfa); + regexp (dfa); + if (dfa->parsestate.tok != RPAREN) dfaerror (_("unbalanced (")); - tok = lex (); + dfa->parsestate.tok = lex (dfa); } else - addtok (EMPTY); + addtok (dfa, EMPTY); } /* Return the number of tokens in the given subexpression. */ static size_t _GL_ATTRIBUTE_PURE -nsubtoks (size_t tindex) +nsubtoks (struct dfa const *dfa, size_t tindex) { size_t ntoks1; @@ -1879,90 +1925,93 @@ nsubtoks (size_t tindex) case QMARK: case STAR: case PLUS: - return 1 + nsubtoks (tindex - 1); + return 1 + nsubtoks (dfa, tindex - 1); case CAT: case OR: - ntoks1 = nsubtoks (tindex - 1); - return 1 + ntoks1 + nsubtoks (tindex - 1 - ntoks1); + ntoks1 = nsubtoks (dfa, tindex - 1); + return 1 + ntoks1 + nsubtoks (dfa, tindex - 1 - ntoks1); } } /* Copy the given subexpression to the top of the tree. */ static void -copytoks (size_t tindex, size_t ntokens) +copytoks (struct dfa *dfa, size_t tindex, size_t ntokens) { size_t i; if (dfa->multibyte) for (i = 0; i < ntokens; ++i) - addtok_mb (dfa->tokens[tindex + i], dfa->multibyte_prop[tindex + i]); + addtok_mb (dfa, dfa->tokens[tindex + i], dfa->multibyte_prop[tindex + i]); else for (i = 0; i < ntokens; ++i) - addtok_mb (dfa->tokens[tindex + i], 3); + addtok_mb (dfa, dfa->tokens[tindex + i], 3); } static void -closure (void) +closure (struct dfa *dfa) { int i; size_t tindex, ntokens; - atom (); - while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN) - if (tok == REPMN && (minrep || maxrep)) + atom (dfa); + while (dfa->parsestate.tok == QMARK || dfa->parsestate.tok == STAR + || dfa->parsestate.tok == PLUS || dfa->parsestate.tok == REPMN) + if (dfa->parsestate.tok == REPMN + && (dfa->lexstate.minrep || dfa->lexstate.maxrep)) { - ntokens = nsubtoks (dfa->tindex); + ntokens = nsubtoks (dfa, dfa->tindex); tindex = dfa->tindex - ntokens; - if (maxrep < 0) - addtok (PLUS); - if (minrep == 0) - addtok (QMARK); - for (i = 1; i < minrep; ++i) + if (dfa->lexstate.maxrep < 0) + addtok (dfa, PLUS); + if (dfa->lexstate.minrep == 0) + addtok (dfa, QMARK); + for (i = 1; i < dfa->lexstate.minrep; ++i) { - copytoks (tindex, ntokens); - addtok (CAT); + copytoks (dfa, tindex, ntokens); + addtok (dfa, CAT); } - for (; i < maxrep; ++i) + for (; i < dfa->lexstate.maxrep; ++i) { - copytoks (tindex, ntokens); - addtok (QMARK); - addtok (CAT); + copytoks (dfa, tindex, ntokens); + addtok (dfa, QMARK); + addtok (dfa, CAT); } - tok = lex (); + dfa->parsestate.tok = lex (dfa); } - else if (tok == REPMN) + else if (dfa->parsestate.tok == REPMN) { - dfa->tindex -= nsubtoks (dfa->tindex); - tok = lex (); - closure (); + dfa->tindex -= nsubtoks (dfa, dfa->tindex); + dfa->parsestate.tok = lex (dfa); + closure (dfa); } else { - addtok (tok); - tok = lex (); + addtok (dfa, dfa->parsestate.tok); + dfa->parsestate.tok = lex (dfa); } } static void -branch (void) +branch (struct dfa* dfa) { - closure (); - while (tok != RPAREN && tok != OR && tok >= 0) + closure (dfa); + while (dfa->parsestate.tok != RPAREN && dfa->parsestate.tok != OR + && dfa->parsestate.tok >= 0) { - closure (); - addtok (CAT); + closure (dfa); + addtok (dfa, CAT); } } static void -regexp (void) +regexp (struct dfa *dfa) { - branch (); - while (tok == OR) + branch (dfa); + while (dfa->parsestate.tok == OR) { - tok = lex (); - branch (); - addtok (OR); + dfa->parsestate.tok = lex (dfa); + branch (dfa); + addtok (dfa, OR); } } @@ -1972,34 +2021,33 @@ regexp (void) static void dfaparse (char const *s, size_t len, struct dfa *d) { - dfa = d; - lexptr = s; - lexleft = len; - lasttok = END; - laststart = true; - parens = 0; - if (dfa->multibyte) + d->lexstate.lexptr = s; + d->lexstate.lexleft = len; + d->lexstate.lasttok = END; + d->lexstate.laststart = true; + d->lexstate.parens = 0; + if (d->multibyte) { - cur_mb_len = 0; + d->lexstate.cur_mb_len = 0; memset (&d->mbs, 0, sizeof d->mbs); } - if (!syntax_bits_set) + if (!d->syntax.syntax_bits_set) dfaerror (_("no syntax specified")); - tok = lex (); - depth = d->depth; + d->parsestate.tok = lex (d); + d->parsestate.depth = d->depth; - regexp (); + regexp (d); - if (tok != END) + if (d->parsestate.tok != END) dfaerror (_("unbalanced )")); - addtok (END - d->nregexps); - addtok (CAT); + addtok (d, END - d->nregexps); + addtok (d, CAT); if (d->nregexps) - addtok (OR); + addtok (d, OR); ++d->nregexps; } @@ -2270,19 +2318,19 @@ epsclosure (position_set *s, struct dfa const *d, char *visited) character included in C. */ static int -charclass_context (charclass c) +charclass_context (struct dfa const *dfa, charclass c) { int context = 0; unsigned int j; - if (tstbit (eolbyte, c)) + if (tstbit (dfa->syntax.eolbyte, c)) context |= CTX_NEWLINE; for (j = 0; j < CHARCLASS_WORDS; ++j) { - if (c[j] & letters[j]) + if (c[j] & dfa->syntax.letters[j]) context |= CTX_LETTER; - if (c[j] & ~(letters[j] | newline[j])) + if (c[j] & ~(dfa->syntax.letters[j] | dfa->syntax.newline[j])) context |= CTX_NONE; } @@ -2677,15 +2725,15 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, CTX_NEWLINE)) for (j = 0; j < CHARCLASS_WORDS; ++j) - matches[j] &= ~newline[j]; + matches[j] &= ~d->syntax.newline[j]; if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, CTX_LETTER)) for (j = 0; j < CHARCLASS_WORDS; ++j) - matches[j] &= ~letters[j]; + matches[j] &= ~d->syntax.letters[j]; if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, CTX_NONE)) for (j = 0; j < CHARCLASS_WORDS; ++j) - matches[j] &= letters[j] | newline[j]; + matches[j] &= d->syntax.letters[j] | d->syntax.newline[j]; /* If there are no characters left, there's no point in going on. */ for (j = 0; j < CHARCLASS_WORDS && !matches[j]; ++j) @@ -2791,7 +2839,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) for (i = 0; i < NOTCHAR; ++i) trans[i] = unibyte_word_constituent (i) ? state_letter : state; - trans[eolbyte] = state_newline; + trans[d->syntax.eolbyte] = state_newline; } else for (i = 0; i < NOTCHAR; ++i) @@ -2847,7 +2895,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) } /* Find out if the new state will want any context information. */ - possible_contexts = charclass_context (labels[i]); + possible_contexts = charclass_context (d, labels[i]); separate_contexts = state_separate_contexts (&follows); /* Find the state(s) corresponding to the union of the follows. */ @@ -2894,7 +2942,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) { int c = j * CHARCLASS_WORD_BITS + k; - if (c == eolbyte) + if (c == d->syntax.eolbyte) trans[c] = state_newline; else if (unibyte_word_constituent (c)) trans[c] = state_letter; @@ -3020,8 +3068,8 @@ build_state (state_num s, struct dfa *d) /* Keep the newline transition in a special place so we can use it as a sentinel. */ - d->newlines[s] = trans[eolbyte]; - trans[eolbyte] = -1; + d->newlines[s] = trans[d->syntax.eolbyte]; + trans[d->syntax.eolbyte] = -1; if (ACCEPTING (s, *d)) d->fails[s] = trans; @@ -3040,7 +3088,7 @@ transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const **pp) { state_num *t; - if (**pp == eolbyte) + if (**pp == d->syntax.eolbyte) { /* S is always an initial state in transit_state, so the transition table for the state must have been built already. */ @@ -3083,7 +3131,7 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp, size_t i, j; int mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d); - int context = wc == eolbyte ? CTX_NEWLINE : CTX_NONE; + int context = wc == d->syntax.eolbyte ? CTX_NEWLINE : CTX_NONE; bool context_newline = context == CTX_NEWLINE; /* This state has some operators which can match a multibyte character. */ @@ -3201,7 +3249,7 @@ skip_remains_mb (struct dfa *d, unsigned char const *p, unsigned char const *mbp, char const *end, wint_t *wcp) { wint_t wc = WEOF; - if (never_trail[*p]) + if (d->syntax.never_trail[*p]) return p; while (mbp < p) mbp += mbs_to_wchar (&wc, (char const *) mbp, @@ -3239,7 +3287,7 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl, unsigned char const *p, *mbp; /* Current input character. */ state_num **trans, *t; /* Copy of d->trans so it can be optimized into a register. */ - unsigned char eol = eolbyte; /* Likewise for eolbyte. */ + unsigned char eol = d->syntax.eolbyte; /* Likewise for eolbyte. */ unsigned char saved_end; size_t nlcount = 0; @@ -3306,8 +3354,8 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl, } if (d->states[s].mbps.nelem == 0 || (*p == eol && !allow_nl) - || (*p == '\n' && !(syntax_bits & RE_DOT_NEWLINE)) - || (*p == '\0' && (syntax_bits & RE_DOT_NOT_NULL)) + || (*p == '\n' && !(d->syntax.syntax_bits & RE_DOT_NEWLINE)) + || (*p == '\0' && (d->syntax.syntax_bits & RE_DOT_NOT_NULL)) || (char *) p >= end) { /* If an input character does not match ANYCHAR, do it @@ -3370,14 +3418,14 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl, } else if (d->fails[s]) { - if (d->success[s] & sbit[*p]) + if (d->success[s] & d->syntax.sbit[*p]) goto done; s1 = s; if (!multibyte || d->states[s].mbps.nelem == 0 || (*p == eol && !allow_nl) - || (*p == '\n' && !(syntax_bits & RE_DOT_NEWLINE)) - || (*p == '\0' && (syntax_bits & RE_DOT_NOT_NULL)) + || (*p == '\n' && !(d->syntax.syntax_bits & RE_DOT_NEWLINE)) + || (*p == '\0' && (d->syntax.syntax_bits & RE_DOT_NOT_NULL)) || (char *) p >= end) { /* If a input character does not match ANYCHAR, do it @@ -3479,17 +3527,6 @@ free_mbdata (struct dfa *d) } } -/* Initialize the components of a dfa that the other routines don't - initialize for themselves. */ -static void -dfainit (struct dfa *d) -{ - memset (d, 0, sizeof *d); - d->multibyte = MB_CUR_MAX > 1; - d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb; - d->fast = !d->multibyte; -} - /* Return true if every construct in D is supported by this DFA matcher. */ static bool _GL_ATTRIBUTE_PURE dfa_supported (struct dfa const *d) @@ -3521,7 +3558,7 @@ dfaoptimize (struct dfa *d) size_t i; bool have_backref = false; - if (!using_utf8 ()) + if (!using_utf8) return; for (i = 0; i < d->tindex; ++i) @@ -3640,7 +3677,6 @@ dfassbuild (struct dfa *d) void dfacomp (char const *s, size_t len, struct dfa *d, bool searchflag) { - dfainit (d); dfaparse (s, len, d); dfassbuild (d); @@ -3957,7 +3993,7 @@ dfamust (struct dfa const *d) size_t rj; bool need_begline = false; bool need_endline = false; - bool case_fold_unibyte = case_fold && MB_CUR_MAX == 1; + bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1; struct dfamust *dm; for (ri = 0; ri < d->tindex; ++ri) @@ -4194,7 +4230,20 @@ dfamustfree (struct dfamust *dm) struct dfa * dfaalloc (void) { - return xmalloc (sizeof (struct dfa)); + struct dfa *d = xcalloc (1, sizeof (struct dfa)); + d->multibyte = MB_CUR_MAX > 1; + d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb; + d->fast = !d->multibyte; + d->lexstate.cur_mb_len = 1; + return d; +} + +void +dfa_init (void) +{ + check_utf8 (); + check_unibyte_c (); + init_mbrtowc_cache (); } /* vim:set shiftwidth=2: */ @@ -54,10 +54,10 @@ extern struct dfamust *dfamust (struct dfa const *); /* Free the storage held by the components of a struct dfamust. */ extern void dfamustfree (struct dfamust *); -/* dfasyntax() takes three arguments; the first sets the syntax bits described - earlier in this file, the second sets the case-folding flag, and the - third specifies the line terminator. */ -extern void dfasyntax (reg_syntax_t, bool, unsigned char); +/* dfasyntax() takes four arguments; the first is the dfa to operate on, the + second sets the syntax bits described earlier in this file, the third sets + the case-folding flag, and the fourth specifies the line terminator. */ +extern void dfasyntax (struct dfa *, reg_syntax_t, bool, unsigned char); /* Compile the given string of the given length into the given struct dfa. Final argument is a flag specifying whether to build a searching or an @@ -104,4 +104,7 @@ extern void dfawarn (const char *); The user must supply a dfaerror. */ extern _Noreturn void dfaerror (const char *); -extern bool using_utf8 (void); +extern bool dfa_using_utf8 (void) _GL_ATTRIBUTE_PURE; + +/* This must be called before calling any of the above dfa*() functions. */ +extern void dfa_init (void); diff --git a/doc/ChangeLog b/doc/ChangeLog index 5513d61f..af436578 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,11 @@ +2016-08-23 Arnold D. Robbins <arnold@skeeve.com> + + * Makefile.am (EXTRA_DIST): Add new file, wordlist. + (spell): New target. + * wordlist: New file. + * gawktexi.in: Fix typos, adjust update date. + * awkcard.in: Update copyright years. + 2016-08-03 Arnold D. Robbins <arnold@skeeve.com> Restored doc on typed regexes. diff --git a/doc/Makefile.am b/doc/Makefile.am index 5eefaadf..a2015629 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -47,6 +47,7 @@ EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \ lflashlight-small.xpic lflashlight.eps lflashlight.pdf \ rflashlight-small.xpic rflashlight.eps rflashlight.pdf \ statist.jpg statist.eps statist.pdf \ + wordlist \ bc_notes # Get rid of generated files when cleaning @@ -103,3 +104,7 @@ awkcard.nc: $(CARDFILES) awkcard.pdf: awkcard.ps ps2pdf awkcard.ps awkcard.pdf + +spell: + export LC_ALL=C ; spell "$(srcdir)"/gawktexi.in | \ + sort -u | comm -23 - "$(srcdir)"/wordlist diff --git a/doc/Makefile.in b/doc/Makefile.in index 2fa1fac0..9c7bbc2e 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -373,6 +373,7 @@ EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \ lflashlight-small.xpic lflashlight.eps lflashlight.pdf \ rflashlight-small.xpic rflashlight.eps rflashlight.pdf \ statist.jpg statist.eps statist.pdf \ + wordlist \ bc_notes @@ -909,6 +910,10 @@ awkcard.nc: $(CARDFILES) awkcard.pdf: awkcard.ps ps2pdf awkcard.ps awkcard.pdf +spell: + export LC_ALL=C ; spell "$(srcdir)"/gawktexi.in | \ + sort -u | comm -23 - "$(srcdir)"/wordlist + # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/doc/awkcard.in b/doc/awkcard.in index 0b377ee5..16e4b19d 100644 --- a/doc/awkcard.in +++ b/doc/awkcard.in @@ -1,7 +1,7 @@ .\" AWK Reference Card --- Arnold Robbins, arnold@skeeve.com .\" .\" Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, -.\" 2003, 2004, 2005, 2007, 2009, 2010, 2011, 2012, 2013, 2014, 2015 +.\" 2003, 2004, 2005, 2007, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 .\" Free Software Foundation, Inc. .\" .\" Permission is granted to make and distribute verbatim copies of @@ -100,7 +100,7 @@ Brian Kernighan and Michael Brennan who reviewed it. \*(CD .SL .nf -\*(FRCopyright \(co 1996\(en2005, 2007, 2009\(en2014 +\*(FRCopyright \(co 1996\(en2005, 2007, 2009\(en2016 Free Software Foundation, Inc. .nf .BT @@ -1980,7 +1980,7 @@ maintains it.\*(CX .ES .fi \*(CDCopyright \(co 1996\(en2005, -2007, 2009\(en2014 Free Software Foundation, Inc. +2007, 2009\(en2016 Free Software Foundation, Inc. .sp .5 Permission is granted to make and distribute verbatim copies of this reference card provided the copyright notice and this permission notice diff --git a/doc/gawk.info b/doc/gawk.info index 97c9d3c0..eb62e782 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -13128,7 +13128,7 @@ would see the latter (undesirable) output. terminal device. On modern systems, this means your keyboard and screen. - (2) In private correspondance, Dr. Kernighan has indicated to me that + (2) In private correspondence, Dr. Kernighan has indicated to me that the way this was done was probably a mistake. @@ -15595,7 +15595,7 @@ anyway, because 'gawk' goes to the next file as soon as an 'ENDFILE' rule finishes!) You need to be careful calling 'rewind()'. You can end up causing -infinite recursion if you don't pay attenion. Here is an example use: +infinite recursion if you don't pay attention. Here is an example use: $ cat data -| a @@ -30837,7 +30837,7 @@ Reference Counts assumed by a variable is used in more than one place, only one copy of the value itself is kept, and the associated reference count is increased when the same value is used by an additional variable, - and decresed when the related variable is no longer in use. When + and decreased when the related variable is no longer in use. When the reference count goes to zero, the memory space used to store the value of the variable is freed. @@ -33382,13 +33382,13 @@ Index * elements of arrays: Reference to Elements. (line 6) * email address for bug reports, bug-gawk@gnu.org: Bugs. (line 30) -* EMISTERED: TCP/IP Networking. (line 6) * empty array elements: Reference to Elements. (line 18) * empty pattern: Empty. (line 6) * empty strings: awk split records. (line 114) * empty strings, See null strings: Regexp Field Splitting. (line 43) +* EMRED: TCP/IP Networking. (line 6) * enable breakpoint: Breakpoint Control. (line 73) * enable debugger command: Breakpoint Control. (line 73) * end debugger command: Debugger Execution Control. @@ -35610,274 +35610,274 @@ Node: Shell Quoting643970 Node: Data File Management645371 Node: Filetrans Function646003 Node: Rewind Function650099 -Node: File Checking652004 -Ref: File Checking-Footnote-1653338 -Node: Empty Files653539 -Node: Ignoring Assigns655518 -Node: Getopt Function657068 -Ref: Getopt Function-Footnote-1668537 -Node: Passwd Functions668737 -Ref: Passwd Functions-Footnote-1677576 -Node: Group Functions677664 -Ref: Group Functions-Footnote-1685561 -Node: Walking Arrays685768 -Node: Library Functions Summary688776 -Node: Library Exercises690182 -Node: Sample Programs690647 -Node: Running Examples691417 -Node: Clones692145 -Node: Cut Program693369 -Node: Egrep Program703298 -Ref: Egrep Program-Footnote-1710810 -Node: Id Program710920 -Node: Split Program714600 -Ref: Split Program-Footnote-1718059 -Node: Tee Program718188 -Node: Uniq Program720978 -Node: Wc Program728404 -Ref: Wc Program-Footnote-1732659 -Node: Miscellaneous Programs732753 -Node: Dupword Program733966 -Node: Alarm Program735996 -Node: Translate Program740851 -Ref: Translate Program-Footnote-1745416 -Node: Labels Program745686 -Ref: Labels Program-Footnote-1749037 -Node: Word Sorting749121 -Node: History Sorting753193 -Node: Extract Program755028 -Node: Simple Sed762557 -Node: Igawk Program765631 -Ref: Igawk Program-Footnote-1779962 -Ref: Igawk Program-Footnote-2780164 -Ref: Igawk Program-Footnote-3780286 -Node: Anagram Program780401 -Node: Signature Program783463 -Node: Programs Summary784710 -Node: Programs Exercises785924 -Ref: Programs Exercises-Footnote-1790053 -Node: Advanced Features790144 -Node: Nondecimal Data792134 -Node: Array Sorting793725 -Node: Controlling Array Traversal794425 -Ref: Controlling Array Traversal-Footnote-1802792 -Node: Array Sorting Functions802910 -Ref: Array Sorting Functions-Footnote-1808001 -Node: Two-way I/O808197 -Ref: Two-way I/O-Footnote-1814747 -Ref: Two-way I/O-Footnote-2814934 -Node: TCP/IP Networking815016 -Node: Profiling818134 -Ref: Profiling-Footnote-1826627 -Node: Advanced Features Summary826950 -Node: Internationalization828794 -Node: I18N and L10N830274 -Node: Explaining gettext830961 -Ref: Explaining gettext-Footnote-1836853 -Ref: Explaining gettext-Footnote-2837038 -Node: Programmer i18n837203 -Ref: Programmer i18n-Footnote-1842058 -Node: Translator i18n842107 -Node: String Extraction842901 -Ref: String Extraction-Footnote-1844033 -Node: Printf Ordering844119 -Ref: Printf Ordering-Footnote-1846905 -Node: I18N Portability846969 -Ref: I18N Portability-Footnote-1849425 -Node: I18N Example849488 -Ref: I18N Example-Footnote-1852294 -Node: Gawk I18N852367 -Node: I18N Summary853012 -Node: Debugger854353 -Node: Debugging855375 -Node: Debugging Concepts855816 -Node: Debugging Terms857625 -Node: Awk Debugging860200 -Node: Sample Debugging Session861106 -Node: Debugger Invocation861640 -Node: Finding The Bug863026 -Node: List of Debugger Commands869504 -Node: Breakpoint Control870837 -Node: Debugger Execution Control874531 -Node: Viewing And Changing Data877893 -Node: Execution Stack881267 -Node: Debugger Info882904 -Node: Miscellaneous Debugger Commands886975 -Node: Readline Support892063 -Node: Limitations892959 -Ref: Limitations-Footnote-1897190 -Node: Debugging Summary897241 -Node: Arbitrary Precision Arithmetic898520 -Node: Computer Arithmetic899936 -Ref: table-numeric-ranges903527 -Ref: Computer Arithmetic-Footnote-1904249 -Node: Math Definitions904306 -Ref: table-ieee-formats907620 -Ref: Math Definitions-Footnote-1908223 -Node: MPFR features908328 -Node: FP Math Caution910045 -Ref: FP Math Caution-Footnote-1911117 -Node: Inexactness of computations911486 -Node: Inexact representation912446 -Node: Comparing FP Values913806 -Node: Errors accumulate914888 -Node: Getting Accuracy916321 -Node: Try To Round919031 -Node: Setting precision919930 -Ref: table-predefined-precision-strings920627 -Node: Setting the rounding mode922457 -Ref: table-gawk-rounding-modes922831 -Ref: Setting the rounding mode-Footnote-1926239 -Node: Arbitrary Precision Integers926418 -Ref: Arbitrary Precision Integers-Footnote-1931335 -Node: POSIX Floating Point Problems931484 -Ref: POSIX Floating Point Problems-Footnote-1935366 -Node: Floating point summary935404 -Node: Dynamic Extensions937594 -Node: Extension Intro939147 -Node: Plugin License940413 -Node: Extension Mechanism Outline941210 -Ref: figure-load-extension941649 -Ref: figure-register-new-function943214 -Ref: figure-call-new-function944306 -Node: Extension API Description946368 -Node: Extension API Functions Introduction947900 -Node: General Data Types952759 -Ref: General Data Types-Footnote-1958714 -Node: Memory Allocation Functions959013 -Ref: Memory Allocation Functions-Footnote-1961858 -Node: Constructor Functions961957 -Node: Registration Functions963702 -Node: Extension Functions964387 -Node: Exit Callback Functions967010 -Node: Extension Version String968260 -Node: Input Parsers968923 -Node: Output Wrappers978805 -Node: Two-way processors983317 -Node: Printing Messages985582 -Ref: Printing Messages-Footnote-1986753 -Node: Updating ERRNO986906 -Node: Requesting Values987645 -Ref: table-value-types-returned988382 -Node: Accessing Parameters989265 -Node: Symbol Table Access990500 -Node: Symbol table by name991012 -Node: Symbol table by cookie993033 -Ref: Symbol table by cookie-Footnote-1997185 -Node: Cached values997249 -Ref: Cached values-Footnote-11000756 -Node: Array Manipulation1000847 -Ref: Array Manipulation-Footnote-11001938 -Node: Array Data Types1001975 -Ref: Array Data Types-Footnote-11004633 -Node: Array Functions1004725 -Node: Flattening Arrays1008583 -Node: Creating Arrays1015491 -Node: Redirection API1020260 -Node: Extension API Variables1023091 -Node: Extension Versioning1023724 -Ref: gawk-api-version1024161 -Node: Extension API Informational Variables1025917 -Node: Extension API Boilerplate1026981 -Node: Finding Extensions1030795 -Node: Extension Example1031354 -Node: Internal File Description1032152 -Node: Internal File Ops1036232 -Ref: Internal File Ops-Footnote-11047994 -Node: Using Internal File Ops1048134 -Ref: Using Internal File Ops-Footnote-11050517 -Node: Extension Samples1050791 -Node: Extension Sample File Functions1052320 -Node: Extension Sample Fnmatch1059969 -Node: Extension Sample Fork1061456 -Node: Extension Sample Inplace1062674 -Node: Extension Sample Ord1065884 -Node: Extension Sample Readdir1066720 -Ref: table-readdir-file-types1067609 -Node: Extension Sample Revout1068414 -Node: Extension Sample Rev2way1069003 -Node: Extension Sample Read write array1069743 -Node: Extension Sample Readfile1071685 -Node: Extension Sample Time1072780 -Node: Extension Sample API Tests1074128 -Node: gawkextlib1074620 -Node: Extension summary1077067 -Node: Extension Exercises1080769 -Node: Language History1082267 -Node: V7/SVR3.11083923 -Node: SVR41086075 -Node: POSIX1087509 -Node: BTL1088888 -Node: POSIX/GNU1089617 -Node: Feature History1095479 -Node: Common Extensions1109849 -Node: Ranges and Locales1111132 -Ref: Ranges and Locales-Footnote-11115748 -Ref: Ranges and Locales-Footnote-21115775 -Ref: Ranges and Locales-Footnote-31116010 -Node: Contributors1116231 -Node: History summary1121791 -Node: Installation1123171 -Node: Gawk Distribution1124115 -Node: Getting1124599 -Node: Extracting1125560 -Node: Distribution contents1127198 -Node: Unix Installation1133292 -Node: Quick Installation1133974 -Node: Shell Startup Files1136388 -Node: Additional Configuration Options1137466 -Node: Configuration Philosophy1139271 -Node: Non-Unix Installation1141640 -Node: PC Installation1142098 -Node: PC Binary Installation1143418 -Node: PC Compiling1145270 -Ref: PC Compiling-Footnote-11148064 -Node: PC Testing1148173 -Node: PC Using1149353 -Ref: PC Using-Footnote-11153506 -Node: Cygwin1153579 -Node: MSYS1154349 -Node: VMS Installation1154850 -Node: VMS Compilation1155641 -Ref: VMS Compilation-Footnote-11156870 -Node: VMS Dynamic Extensions1156928 -Node: VMS Installation Details1158613 -Node: VMS Running1160866 -Node: VMS GNV1165145 -Node: VMS Old Gawk1165880 -Node: Bugs1166351 -Node: Other Versions1170666 -Node: Installation summary1177250 -Node: Notes1178301 -Node: Compatibility Mode1179166 -Node: Additions1179948 -Node: Accessing The Source1180873 -Node: Adding Code1182308 -Node: New Ports1188527 -Node: Derived Files1193015 -Ref: Derived Files-Footnote-11198500 -Ref: Derived Files-Footnote-21198535 -Ref: Derived Files-Footnote-31199133 -Node: Future Extensions1199247 -Node: Implementation Limitations1199905 -Node: Extension Design1201088 -Node: Old Extension Problems1202242 -Ref: Old Extension Problems-Footnote-11203760 -Node: Extension New Mechanism Goals1203817 -Ref: Extension New Mechanism Goals-Footnote-11207181 -Node: Extension Other Design Decisions1207370 -Node: Extension Future Growth1209483 -Node: Old Extension Mechanism1210319 -Node: Notes summary1212082 -Node: Basic Concepts1213264 -Node: Basic High Level1213945 -Ref: figure-general-flow1214227 -Ref: figure-process-flow1214912 -Ref: Basic High Level-Footnote-11218213 -Node: Basic Data Typing1218398 -Node: Glossary1221726 -Node: Copying1253672 -Node: GNU Free Documentation License1291211 -Node: Index1316329 +Node: File Checking652005 +Ref: File Checking-Footnote-1653339 +Node: Empty Files653540 +Node: Ignoring Assigns655519 +Node: Getopt Function657069 +Ref: Getopt Function-Footnote-1668538 +Node: Passwd Functions668738 +Ref: Passwd Functions-Footnote-1677577 +Node: Group Functions677665 +Ref: Group Functions-Footnote-1685562 +Node: Walking Arrays685769 +Node: Library Functions Summary688777 +Node: Library Exercises690183 +Node: Sample Programs690648 +Node: Running Examples691418 +Node: Clones692146 +Node: Cut Program693370 +Node: Egrep Program703299 +Ref: Egrep Program-Footnote-1710811 +Node: Id Program710921 +Node: Split Program714601 +Ref: Split Program-Footnote-1718060 +Node: Tee Program718189 +Node: Uniq Program720979 +Node: Wc Program728405 +Ref: Wc Program-Footnote-1732660 +Node: Miscellaneous Programs732754 +Node: Dupword Program733967 +Node: Alarm Program735997 +Node: Translate Program740852 +Ref: Translate Program-Footnote-1745417 +Node: Labels Program745687 +Ref: Labels Program-Footnote-1749038 +Node: Word Sorting749122 +Node: History Sorting753194 +Node: Extract Program755029 +Node: Simple Sed762558 +Node: Igawk Program765632 +Ref: Igawk Program-Footnote-1779963 +Ref: Igawk Program-Footnote-2780165 +Ref: Igawk Program-Footnote-3780287 +Node: Anagram Program780402 +Node: Signature Program783464 +Node: Programs Summary784711 +Node: Programs Exercises785925 +Ref: Programs Exercises-Footnote-1790054 +Node: Advanced Features790145 +Node: Nondecimal Data792135 +Node: Array Sorting793726 +Node: Controlling Array Traversal794426 +Ref: Controlling Array Traversal-Footnote-1802793 +Node: Array Sorting Functions802911 +Ref: Array Sorting Functions-Footnote-1808002 +Node: Two-way I/O808198 +Ref: Two-way I/O-Footnote-1814748 +Ref: Two-way I/O-Footnote-2814935 +Node: TCP/IP Networking815017 +Node: Profiling818135 +Ref: Profiling-Footnote-1826628 +Node: Advanced Features Summary826951 +Node: Internationalization828795 +Node: I18N and L10N830275 +Node: Explaining gettext830962 +Ref: Explaining gettext-Footnote-1836854 +Ref: Explaining gettext-Footnote-2837039 +Node: Programmer i18n837204 +Ref: Programmer i18n-Footnote-1842059 +Node: Translator i18n842108 +Node: String Extraction842902 +Ref: String Extraction-Footnote-1844034 +Node: Printf Ordering844120 +Ref: Printf Ordering-Footnote-1846906 +Node: I18N Portability846970 +Ref: I18N Portability-Footnote-1849426 +Node: I18N Example849489 +Ref: I18N Example-Footnote-1852295 +Node: Gawk I18N852368 +Node: I18N Summary853013 +Node: Debugger854354 +Node: Debugging855376 +Node: Debugging Concepts855817 +Node: Debugging Terms857626 +Node: Awk Debugging860201 +Node: Sample Debugging Session861107 +Node: Debugger Invocation861641 +Node: Finding The Bug863027 +Node: List of Debugger Commands869505 +Node: Breakpoint Control870838 +Node: Debugger Execution Control874532 +Node: Viewing And Changing Data877894 +Node: Execution Stack881268 +Node: Debugger Info882905 +Node: Miscellaneous Debugger Commands886976 +Node: Readline Support892064 +Node: Limitations892960 +Ref: Limitations-Footnote-1897191 +Node: Debugging Summary897242 +Node: Arbitrary Precision Arithmetic898521 +Node: Computer Arithmetic899937 +Ref: table-numeric-ranges903528 +Ref: Computer Arithmetic-Footnote-1904250 +Node: Math Definitions904307 +Ref: table-ieee-formats907621 +Ref: Math Definitions-Footnote-1908224 +Node: MPFR features908329 +Node: FP Math Caution910046 +Ref: FP Math Caution-Footnote-1911118 +Node: Inexactness of computations911487 +Node: Inexact representation912447 +Node: Comparing FP Values913807 +Node: Errors accumulate914889 +Node: Getting Accuracy916322 +Node: Try To Round919032 +Node: Setting precision919931 +Ref: table-predefined-precision-strings920628 +Node: Setting the rounding mode922458 +Ref: table-gawk-rounding-modes922832 +Ref: Setting the rounding mode-Footnote-1926240 +Node: Arbitrary Precision Integers926419 +Ref: Arbitrary Precision Integers-Footnote-1931336 +Node: POSIX Floating Point Problems931485 +Ref: POSIX Floating Point Problems-Footnote-1935367 +Node: Floating point summary935405 +Node: Dynamic Extensions937595 +Node: Extension Intro939148 +Node: Plugin License940414 +Node: Extension Mechanism Outline941211 +Ref: figure-load-extension941650 +Ref: figure-register-new-function943215 +Ref: figure-call-new-function944307 +Node: Extension API Description946369 +Node: Extension API Functions Introduction947901 +Node: General Data Types952760 +Ref: General Data Types-Footnote-1958715 +Node: Memory Allocation Functions959014 +Ref: Memory Allocation Functions-Footnote-1961859 +Node: Constructor Functions961958 +Node: Registration Functions963703 +Node: Extension Functions964388 +Node: Exit Callback Functions967011 +Node: Extension Version String968261 +Node: Input Parsers968924 +Node: Output Wrappers978806 +Node: Two-way processors983318 +Node: Printing Messages985583 +Ref: Printing Messages-Footnote-1986754 +Node: Updating ERRNO986907 +Node: Requesting Values987646 +Ref: table-value-types-returned988383 +Node: Accessing Parameters989266 +Node: Symbol Table Access990501 +Node: Symbol table by name991013 +Node: Symbol table by cookie993034 +Ref: Symbol table by cookie-Footnote-1997186 +Node: Cached values997250 +Ref: Cached values-Footnote-11000757 +Node: Array Manipulation1000848 +Ref: Array Manipulation-Footnote-11001939 +Node: Array Data Types1001976 +Ref: Array Data Types-Footnote-11004634 +Node: Array Functions1004726 +Node: Flattening Arrays1008584 +Node: Creating Arrays1015492 +Node: Redirection API1020261 +Node: Extension API Variables1023092 +Node: Extension Versioning1023725 +Ref: gawk-api-version1024162 +Node: Extension API Informational Variables1025918 +Node: Extension API Boilerplate1026982 +Node: Finding Extensions1030796 +Node: Extension Example1031355 +Node: Internal File Description1032153 +Node: Internal File Ops1036233 +Ref: Internal File Ops-Footnote-11047995 +Node: Using Internal File Ops1048135 +Ref: Using Internal File Ops-Footnote-11050518 +Node: Extension Samples1050792 +Node: Extension Sample File Functions1052321 +Node: Extension Sample Fnmatch1059970 +Node: Extension Sample Fork1061457 +Node: Extension Sample Inplace1062675 +Node: Extension Sample Ord1065885 +Node: Extension Sample Readdir1066721 +Ref: table-readdir-file-types1067610 +Node: Extension Sample Revout1068415 +Node: Extension Sample Rev2way1069004 +Node: Extension Sample Read write array1069744 +Node: Extension Sample Readfile1071686 +Node: Extension Sample Time1072781 +Node: Extension Sample API Tests1074129 +Node: gawkextlib1074621 +Node: Extension summary1077068 +Node: Extension Exercises1080770 +Node: Language History1082268 +Node: V7/SVR3.11083924 +Node: SVR41086076 +Node: POSIX1087510 +Node: BTL1088889 +Node: POSIX/GNU1089618 +Node: Feature History1095480 +Node: Common Extensions1109850 +Node: Ranges and Locales1111133 +Ref: Ranges and Locales-Footnote-11115749 +Ref: Ranges and Locales-Footnote-21115776 +Ref: Ranges and Locales-Footnote-31116011 +Node: Contributors1116232 +Node: History summary1121792 +Node: Installation1123172 +Node: Gawk Distribution1124116 +Node: Getting1124600 +Node: Extracting1125561 +Node: Distribution contents1127199 +Node: Unix Installation1133293 +Node: Quick Installation1133975 +Node: Shell Startup Files1136389 +Node: Additional Configuration Options1137467 +Node: Configuration Philosophy1139272 +Node: Non-Unix Installation1141641 +Node: PC Installation1142099 +Node: PC Binary Installation1143419 +Node: PC Compiling1145271 +Ref: PC Compiling-Footnote-11148065 +Node: PC Testing1148174 +Node: PC Using1149354 +Ref: PC Using-Footnote-11153507 +Node: Cygwin1153580 +Node: MSYS1154350 +Node: VMS Installation1154851 +Node: VMS Compilation1155642 +Ref: VMS Compilation-Footnote-11156871 +Node: VMS Dynamic Extensions1156929 +Node: VMS Installation Details1158614 +Node: VMS Running1160867 +Node: VMS GNV1165146 +Node: VMS Old Gawk1165881 +Node: Bugs1166352 +Node: Other Versions1170667 +Node: Installation summary1177251 +Node: Notes1178302 +Node: Compatibility Mode1179167 +Node: Additions1179949 +Node: Accessing The Source1180874 +Node: Adding Code1182309 +Node: New Ports1188528 +Node: Derived Files1193016 +Ref: Derived Files-Footnote-11198501 +Ref: Derived Files-Footnote-21198536 +Ref: Derived Files-Footnote-31199134 +Node: Future Extensions1199248 +Node: Implementation Limitations1199906 +Node: Extension Design1201089 +Node: Old Extension Problems1202243 +Ref: Old Extension Problems-Footnote-11203761 +Node: Extension New Mechanism Goals1203818 +Ref: Extension New Mechanism Goals-Footnote-11207182 +Node: Extension Other Design Decisions1207371 +Node: Extension Future Growth1209484 +Node: Old Extension Mechanism1210320 +Node: Notes summary1212083 +Node: Basic Concepts1213265 +Node: Basic High Level1213946 +Ref: figure-general-flow1214228 +Ref: figure-process-flow1214913 +Ref: Basic High Level-Footnote-11218214 +Node: Basic Data Typing1218399 +Node: Glossary1221727 +Node: Copying1253674 +Node: GNU Free Documentation License1291213 +Node: Index1316331 End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index df4a1942..90437bc9 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -56,7 +56,7 @@ @c applies to and all the info about who's publishing this edition @c These apply across the board. -@set UPDATE-MONTH June, 2016 +@set UPDATE-MONTH August, 2016 @set VERSION 4.1 @set PATCHLEVEL 4 @@ -18717,7 +18717,7 @@ signal (bit 7) and if so, the guilty signal number (bits 0--6). Traditionally, @command{awk}'s @code{system()} function has simply returned the exit status value divided by 256. In the normal case this gives the exit status but in the case of death-by-signal it yields -a fractional floating-point value.@footnote{In private correspondance, +a fractional floating-point value.@footnote{In private correspondence, Dr.@: Kernighan has indicated to me that the way this was done was probably a mistake.} POSIX states that @command{awk}'s @code{system()} should return the full 16-bit value. @@ -22082,7 +22082,7 @@ Because of this, you should not call it from an @code{ENDFILE} rule. file as soon as an @code{ENDFILE} rule finishes!) You need to be careful calling @code{rewind()}. You can end up -causing infinite recursion if you don't pay attenion. Here is an +causing infinite recursion if you don't pay attention. Here is an example use: @example @@ -27964,7 +27964,7 @@ programming and knowledge of the behavior of the coprocess are required. @cindex files, @code{/inet4/@dots{}} (@command{gawk}) @cindex @code{/inet6/@dots{}} special files (@command{gawk}) @cindex files, @code{/inet6/@dots{}} (@command{gawk}) -@cindex @code{EMISTERED} +@cindex @code{EMRED} @ifnotdocbook @quotation @code{EMRED}:@* @@ -27979,7 +27979,7 @@ programming and knowledge of the behavior of the coprocess are required. @docbook <blockquote> <attribution>Mike O'Brien (aka Mr. Protocol)</attribution> -<literallayout class="normal"><literal>EMISTERED</literal>: +<literallayout class="normal"><literal>EMRED</literal>: <emphasis>A host is a host from coast to coast,</emphasis> <emphasis>and no-one can talk to host that's close,</emphasis> <emphasis>unless the host that isn't close</emphasis> @@ -41490,7 +41490,7 @@ An internal mechanism in @command{gawk} to minimize the amount of memory needed to store the value of string variables. If the value assumed by a variable is used in more than one place, only one copy of the value itself is kept, and the associated reference count is increased when the -same value is used by an additional variable, and decresed when the related +same value is used by an additional variable, and decreased when the related variable is no longer in use. When the reference count goes to zero, the memory space used to store the value of the variable is freed. diff --git a/doc/gawktexi.in b/doc/gawktexi.in index 2b0a5c26..33da97ee 100644 --- a/doc/gawktexi.in +++ b/doc/gawktexi.in @@ -51,7 +51,7 @@ @c applies to and all the info about who's publishing this edition @c These apply across the board. -@set UPDATE-MONTH June, 2016 +@set UPDATE-MONTH August, 2016 @set VERSION 4.1 @set PATCHLEVEL 4 @@ -17900,7 +17900,7 @@ signal (bit 7) and if so, the guilty signal number (bits 0--6). Traditionally, @command{awk}'s @code{system()} function has simply returned the exit status value divided by 256. In the normal case this gives the exit status but in the case of death-by-signal it yields -a fractional floating-point value.@footnote{In private correspondance, +a fractional floating-point value.@footnote{In private correspondence, Dr.@: Kernighan has indicated to me that the way this was done was probably a mistake.} POSIX states that @command{awk}'s @code{system()} should return the full 16-bit value. @@ -21164,7 +21164,7 @@ Because of this, you should not call it from an @code{ENDFILE} rule. file as soon as an @code{ENDFILE} rule finishes!) You need to be careful calling @code{rewind()}. You can end up -causing infinite recursion if you don't pay attenion. Here is an +causing infinite recursion if you don't pay attention. Here is an example use: @example @@ -27046,7 +27046,7 @@ programming and knowledge of the behavior of the coprocess are required. @cindex files, @code{/inet4/@dots{}} (@command{gawk}) @cindex @code{/inet6/@dots{}} special files (@command{gawk}) @cindex files, @code{/inet6/@dots{}} (@command{gawk}) -@cindex @code{EMISTERED} +@cindex @code{EMRED} @ifnotdocbook @quotation @code{EMRED}:@* @@ -27061,7 +27061,7 @@ programming and knowledge of the behavior of the coprocess are required. @docbook <blockquote> <attribution>Mike O'Brien (aka Mr. Protocol)</attribution> -<literallayout class="normal"><literal>EMISTERED</literal>: +<literallayout class="normal"><literal>EMRED</literal>: <emphasis>A host is a host from coast to coast,</emphasis> <emphasis>and no-one can talk to host that's close,</emphasis> <emphasis>unless the host that isn't close</emphasis> @@ -40572,7 +40572,7 @@ An internal mechanism in @command{gawk} to minimize the amount of memory needed to store the value of string variables. If the value assumed by a variable is used in more than one place, only one copy of the value itself is kept, and the associated reference count is increased when the -same value is used by an additional variable, and decresed when the related +same value is used by an additional variable, and decreased when the related variable is no longer in use. When the reference count goes to zero, the memory space used to store the value of the variable is freed. @@ -752,7 +752,7 @@ str2wstr(NODE *n, size_t **ptr) * stopping early. This is particularly important * for match() where we need to build the indices. */ - if (using_utf8()) { + if (dfa_using_utf8()) { count = 1; wc = 0xFFFD; /* unicode replacement character */ goto set_wc; diff --git a/po/gawk.pot b/po/gawk.pot index 609f90d3..e17f194e 100644 --- a/po/gawk.pot +++ b/po/gawk.pot @@ -6,9 +6,9 @@ #, fuzzy msgid "" msgstr "" -"Project-Id-Version: gawk 4.1.3i\n" +"Project-Id-Version: gawk 4.1.3j\n" "Report-Msgid-Bugs-To: bug-gawk@gnu.org\n" -"POT-Creation-Date: 2016-08-01 22:56+0300\n" +"POT-Creation-Date: 2016-08-23 05:55+0300\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" "Language-Team: LANGUAGE <LL@li.org>\n" @@ -1798,40 +1798,40 @@ msgstr "" msgid "No symbol `%s' in current context" msgstr "" -#: dfa.c:1029 dfa.c:1032 dfa.c:1051 dfa.c:1061 dfa.c:1073 dfa.c:1100 dfa.c:1109 -#: dfa.c:1112 dfa.c:1117 dfa.c:1138 dfa.c:1141 +#: dfa.c:1090 dfa.c:1093 dfa.c:1112 dfa.c:1122 dfa.c:1135 dfa.c:1163 dfa.c:1172 +#: dfa.c:1175 dfa.c:1180 dfa.c:1202 dfa.c:1205 msgid "unbalanced [" msgstr "" -#: dfa.c:1085 +#: dfa.c:1148 msgid "invalid character class" msgstr "" -#: dfa.c:1207 +#: dfa.c:1271 msgid "character class syntax is [[:space:]], not [:space:]" msgstr "" -#: dfa.c:1269 +#: dfa.c:1332 msgid "unfinished \\ escape" msgstr "" -#: dfa.c:1416 +#: dfa.c:1499 msgid "invalid content of \\{\\}" msgstr "" -#: dfa.c:1419 +#: dfa.c:1502 msgid "regular expression too big" msgstr "" -#: dfa.c:1835 +#: dfa.c:1916 msgid "unbalanced (" msgstr "" -#: dfa.c:1961 +#: dfa.c:2044 msgid "no syntax specified" msgstr "" -#: dfa.c:1969 +#: dfa.c:2052 msgid "unbalanced )" msgstr "" @@ -3327,7 +3327,7 @@ msgstr "" msgid "redir2str: unknown redirection type %d" msgstr "" -#: re.c:607 +#: re.c:610 #, c-format msgid "regexp component `%.*s' should probably be `[%.*s]'" msgstr "" @@ -203,14 +203,11 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal) syn &= ~RE_ICASE; } - /* only call dfasyntax if we're using dfa; saves time */ - if (dfa && ! no_dfa) { - dfa_syn = syn; - /* FIXME: dfa doesn't pay attention RE_ICASE */ - if (ignorecase) - dfa_syn |= RE_ICASE; - dfasyntax(dfa_syn, ignorecase, '\n'); - } + dfa_syn = syn; + /* FIXME: dfa doesn't pay attention RE_ICASE */ + if (ignorecase) + dfa_syn |= RE_ICASE; + re_set_syntax(syn); if ((rerr = re_compile_pattern(buf, len, &(rp->pat))) != NULL) { @@ -228,6 +225,7 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal) if (dfa && ! no_dfa) { rp->dfa = true; rp->dfareg = dfaalloc(); + dfasyntax(rp->dfareg, dfa_syn, ignorecase, '\n'); dfacomp(buf, len, rp->dfareg, true); } else rp->dfa = false; @@ -427,7 +425,8 @@ resetup() syn |= RE_INTERVALS | RE_INVALID_INTERVAL_ORD | RE_NO_BK_BRACES; (void) re_set_syntax(syn); - dfasyntax(syn, false, '\n'); + + dfa_init(); } /* avoid_dfa --- return true if we should not use the DFA matcher */ |