diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2016-04-24 18:07:11 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2016-04-24 18:07:11 +0300 |
commit | c65ebdef5333b1a9b891d4235367dc158d3f05b4 (patch) | |
tree | e5ced173167edaec30608e62affdf0842b336ae5 /dfa.c | |
parent | 82242cb6dfd7ca365ab65966a648b7cf884a1859 (diff) | |
download | egawk-c65ebdef5333b1a9b891d4235367dc158d3f05b4.tar.gz egawk-c65ebdef5333b1a9b891d4235367dc158d3f05b4.tar.bz2 egawk-c65ebdef5333b1a9b891d4235367dc158d3f05b4.zip |
Sync dfa.c with grep.
Diffstat (limited to 'dfa.c')
-rw-r--r-- | dfa.c | 70 |
1 files changed, 27 insertions, 43 deletions
@@ -385,11 +385,6 @@ struct dfa */ int *multibyte_prop; - /* A table indexed by byte values that contains the corresponding wide - character (if any) for that byte. WEOF means the byte is not a - valid single-byte character. */ - wint_t mbrtowc_cache[NOTCHAR]; - /* Array of the bracket expression in the DFA. */ struct mb_char_classes *mbcsets; size_t nmbcsets; @@ -466,19 +461,10 @@ struct dfa static void regexp (void); -static void -dfambcache (struct dfa *d) -{ - int i; - for (i = CHAR_MIN; i <= CHAR_MAX; ++i) - { - char c = i; - unsigned char uc = i; - mbstate_t s = { 0 }; - wchar_t wc; - d->mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF; - } -} +/* A table indexed by byte values that contains the corresponding wide + character (if any) for that byte. WEOF means the byte is not a + valid single-byte character. */ +static wint_t mbrtowc_cache[NOTCHAR]; /* Store into *PWC the result of converting the leading bytes of the multibyte buffer S of length N bytes, using the mbrtowc_cache in *D @@ -501,7 +487,7 @@ static size_t mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d) { unsigned char uc = s[0]; - wint_t wc = d->mbrtowc_cache[uc]; + wint_t wc = mbrtowc_cache[uc]; if (wc == WEOF) { @@ -706,25 +692,18 @@ static charclass letters; /* Set of characters that are newline. */ static charclass newline; -/* Add this to the test for whether a byte is word-constituent, since on - BSD-based systems, many values in the 128..255 range are classified as - alphabetic, while on glibc-based systems, they are not. */ -#ifdef __GLIBC__ -# define is_valid_unibyte_character(c) 1 -#else -# define is_valid_unibyte_character(c) (btowc (c) != WEOF) -#endif - -/* C is a "word-constituent" byte. */ -#define IS_WORD_CONSTITUENT(C) \ - (is_valid_unibyte_character (C) && (isalnum (C) || (C) == '_')) +static bool +unibyte_word_constituent (unsigned char c) +{ + return mbrtowc_cache[c] != WEOF && (isalnum (c) || (c) == '_'); +} static int char_context (unsigned char c) { if (c == eolbyte) return CTX_NEWLINE; - if (IS_WORD_CONSTITUENT (c)) + if (unibyte_word_constituent (c)) return CTX_LETTER; return CTX_NONE; } @@ -743,23 +722,29 @@ wchar_context (wint_t wc) void dfasyntax (reg_syntax_t bits, int fold, unsigned char eol) { - unsigned int i; - + int i; syntax_bits_set = 1; syntax_bits = bits; case_fold = fold != 0; eolbyte = eol; - for (i = 0; i < NOTCHAR; ++i) + for (i = CHAR_MIN; i <= CHAR_MAX; ++i) { - sbit[i] = char_context (i); - switch (sbit[i]) + char c = i; + unsigned char uc = i; + mbstate_t s = { 0 }; + wchar_t wc; + mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF; + + /* Now that mbrtowc_cache[uc] is set, use it to calculate sbit. */ + sbit[uc] = char_context (uc); + switch (sbit[uc]) { case CTX_LETTER: - setbit (i, letters); + setbit (uc, letters); break; case CTX_NEWLINE: - setbit (i, newline); + setbit (uc, newline); break; } } @@ -1528,7 +1513,7 @@ lex (void) { zeroset (ccl); for (c2 = 0; c2 < NOTCHAR; ++c2) - if (IS_WORD_CONSTITUENT (c2)) + if (unibyte_word_constituent (c2)) setbit (c2, ccl); if (c == 'W') notset (ccl); @@ -2753,7 +2738,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) state_letter = state; for (i = 0; i < NOTCHAR; ++i) - trans[i] = (IS_WORD_CONSTITUENT (i)) ? state_letter : state; + trans[i] = unibyte_word_constituent (i) ? state_letter : state; trans[eolbyte] = state_newline; } else @@ -2859,7 +2844,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) if (c == eolbyte) trans[c] = state_newline; - else if (IS_WORD_CONSTITUENT (c)) + else if (unibyte_word_constituent (c)) trans[c] = state_letter; else if (c < NOTCHAR) trans[c] = state; @@ -3666,7 +3651,6 @@ void dfacomp (char const *s, size_t len, struct dfa *d, int searchflag) { dfainit (d); - dfambcache (d); dfaparse (s, len, d); dfassbuild (d); |