diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2011-12-06 21:30:15 +0200 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2011-12-06 21:30:15 +0200 |
commit | 958b6cb452c88e7ba064253b6f461ebacfc1b736 (patch) | |
tree | a9cfdbc979d446fa7ae5d5184ce16a7df5825567 /dfa.c | |
parent | fd3e8f9c332bd76e7f8fa0c61c1451f52f3b1998 (diff) | |
download | egawk-958b6cb452c88e7ba064253b6f461ebacfc1b736.tar.gz egawk-958b6cb452c88e7ba064253b6f461ebacfc1b736.tar.bz2 egawk-958b6cb452c88e7ba064253b6f461ebacfc1b736.zip |
Sync dfa.c with GNU grep.
Diffstat (limited to 'dfa.c')
-rw-r--r-- | dfa.c | 37 |
1 files changed, 23 insertions, 14 deletions
@@ -608,7 +608,8 @@ setbit_c (int b, charclass c) } #else # define setbit_c setbit -static inline bool setbit_wc (wint_t wc, charclass c) +static inline bool +setbit_wc (wint_t wc, charclass c) { abort (); /*NOTREACHED*/ @@ -782,7 +783,7 @@ static const struct dfa_ctype prednames[] = { { NULL, NULL, false } }; -static const struct dfa_ctype * +static const struct dfa_ctype * _GL_ATTRIBUTE_PURE find_pred (const char *str) { unsigned int i; @@ -802,7 +803,6 @@ parse_bracket_exp (void) int invert; int c, c1, c2; charclass ccl; - wint_t wc1 = 0; /* Used to warn about [:space:]. Bit 0 = first character is a colon. @@ -813,6 +813,7 @@ parse_bracket_exp (void) wint_t wc; wint_t wc2; + wint_t wc1 = 0; /* Work area to build a mb_char_classes. */ struct mb_char_classes *work_mbc; @@ -1080,8 +1081,18 @@ parse_bracket_exp (void) return CSET + charclass_index(ccl); } +/* Add this to the test for whether a byte is word-constituent, since on + BSD-based systems, many values in the 128..255 range are classified as + alphabetic, while on glibc-based systems, they are not. */ +#ifdef __GLIBC__ +# define is_valid_unibyte_character(c) 1 +#else +# define is_valid_unibyte_character(c) (MBS_SUPPORT && btowc (c) != WEOF) +#endif + /* Return non-zero if C is a `word-constituent' byte; zero otherwise. */ -#define IS_WORD_CONSTITUENT(C) (isalnum(C) || (C) == '_') +#define IS_WORD_CONSTITUENT(C) \ + (is_valid_unibyte_character(C) && (isalnum(C) || (C) == '_')) static token lex (void) @@ -1678,7 +1689,7 @@ atom (void) } /* Return the number of tokens in the given subexpression. */ -static int +static int _GL_ATTRIBUTE_PURE nsubtoks (int tindex) { int ntoks1; @@ -2128,9 +2139,7 @@ dfaanalyze (struct dfa *d, int searchflag) CALLOC(d->follows, d->tindex); for (i = 0; i < d->tindex; ++i) -#ifdef DEBUG - { /* Nonsyntactic #ifdef goo... */ -#endif + { switch (d->tokens[i]) { case EMPTY: @@ -2255,8 +2264,8 @@ dfaanalyze (struct dfa *d, int searchflag) prtok(d->tokens[lastpos[j].index]); } putc('\n', stderr); - } #endif + } /* For each follow set that is the follow set of a real position, replace it with its epsilon closure. */ @@ -3256,7 +3265,7 @@ dfaexec (struct dfa *d, char const *begin, char *end, for (;;) { if (d->mb_cur_max > 1) - while ((t = trans[s])) + while ((t = trans[s]) != NULL) { if (p > buf_end) break; @@ -3289,10 +3298,10 @@ dfaexec (struct dfa *d, char const *begin, char *end, } else { - while ((t = trans[s]) != 0) + while ((t = trans[s]) != NULL) { s1 = t[*p++]; - if ((t = trans[s1]) == 0) + if ((t = trans[s1]) == NULL) { int tmp = s; s = s1; s1 = tmp; /* swap */ break; @@ -3603,7 +3612,7 @@ icpyalloc (char const *string) return icatalloc (NULL, string); } -static char * +static char * _GL_ATTRIBUTE_PURE istrstr (char const *lookin, char const *lookfor) { char const *cp; @@ -4015,7 +4024,7 @@ dfaalloc (void) return xmalloc (sizeof (struct dfa)); } -struct dfamust * +struct dfamust * _GL_ATTRIBUTE_PURE dfamusts (struct dfa const *d) { return d->musts; |