diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2018-12-21 11:29:52 +0200 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2018-12-21 11:29:52 +0200 |
commit | de5f5e33aaf66cf5de836ef9224e68fc0546ae11 (patch) | |
tree | 67e267720b6a6badf753e6cc2077a9d1bd3c5d6a /support | |
parent | 8f6299093d8277b312f927a9bf36fceacae38f4d (diff) | |
download | egawk-de5f5e33aaf66cf5de836ef9224e68fc0546ae11.tar.gz egawk-de5f5e33aaf66cf5de836ef9224e68fc0546ae11.tar.bz2 egawk-de5f5e33aaf66cf5de836ef9224e68fc0546ae11.zip |
Bug fix in dfa, with test.
Diffstat (limited to 'support')
-rw-r--r-- | support/ChangeLog | 5 | ||||
-rw-r--r-- | support/dfa.c | 35 |
2 files changed, 35 insertions, 5 deletions
diff --git a/support/ChangeLog b/support/ChangeLog index 00753047..424dbdd5 100644 --- a/support/ChangeLog +++ b/support/ChangeLog @@ -1,3 +1,8 @@ +2018-12-21 Arnold D. Robbins <arnold@skeeve.com> + + * dfa.c; Sync with GNULIB, bugfix for \b (\y in gawk) + in the C locale. + 2018-12-18 Arnold D. Robbins <arnold@skeeve.com> * Makefile.am (distclean-local): Remove .deps directory. diff --git a/support/dfa.c b/support/dfa.c index 612faa10..7687aca0 100644 --- a/support/dfa.c +++ b/support/dfa.c @@ -2345,6 +2345,26 @@ epsclosure (struct dfa const *d) free (tmp.elems); } +/* Returns the set of contexts for which there is at least one + character included in C. */ + +static int +charclass_context (struct dfa const *dfa, charclass const *c) +{ + int context = 0; + + for (unsigned int j = 0; j < CHARCLASS_WORDS; ++j) + { + if (c->w[j] & dfa->syntax.newline.w[j]) + context |= CTX_NEWLINE; + if (c->w[j] & dfa->syntax.letters.w[j]) + context |= CTX_LETTER; + if (c->w[j] & ~(dfa->syntax.letters.w[j] | dfa->syntax.newline.w[j])) + context |= CTX_NONE; + } + + return context; +} /* Returns the contexts on which the position set S depends. Each context in the set of returned contexts (let's call it SC) may have a different follow set than other contexts in SC, and also different from the @@ -3137,17 +3157,22 @@ build_state (state_num s, struct dfa *d, unsigned char uc) /* Find out if the new state will want any context information, by calculating possible contexts that the group can match, and separate contexts that the new state wants to know. */ + int possible_contexts = charclass_context (d, &label); int separate_contexts = state_separate_contexts (d, &group); /* Find the state(s) corresponding to the union of the follows. */ - if (d->syntax.sbit[uc] & separate_contexts & CTX_NEWLINE) - state = state_index (d, &group, CTX_NEWLINE); - else if (d->syntax.sbit[uc] & separate_contexts & CTX_LETTER) - state = state_index (d, &group, CTX_LETTER); - else + if (possible_contexts & ~separate_contexts) state = state_index (d, &group, separate_contexts ^ CTX_ANY); + else + state = -1; + if (separate_contexts & possible_contexts & CTX_NEWLINE) + state_newline = state_index (d, &group, CTX_NEWLINE); + else state_newline = state; + if (separate_contexts & possible_contexts & CTX_LETTER) + state_letter = state_index (d, &group, CTX_LETTER); + else state_letter = state; /* Reallocate now, to reallocate any newline transition properly. */ |