aboutsummaryrefslogtreecommitdiffstats
path: root/support
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2018-12-21 11:29:52 +0200
committerArnold D. Robbins <arnold@skeeve.com>2018-12-21 11:29:52 +0200
commitde5f5e33aaf66cf5de836ef9224e68fc0546ae11 (patch)
tree67e267720b6a6badf753e6cc2077a9d1bd3c5d6a /support
parent8f6299093d8277b312f927a9bf36fceacae38f4d (diff)
downloadegawk-de5f5e33aaf66cf5de836ef9224e68fc0546ae11.tar.gz
egawk-de5f5e33aaf66cf5de836ef9224e68fc0546ae11.tar.bz2
egawk-de5f5e33aaf66cf5de836ef9224e68fc0546ae11.zip
Bug fix in dfa, with test.
Diffstat (limited to 'support')
-rw-r--r--support/ChangeLog5
-rw-r--r--support/dfa.c35
2 files changed, 35 insertions, 5 deletions
diff --git a/support/ChangeLog b/support/ChangeLog
index 00753047..424dbdd5 100644
--- a/support/ChangeLog
+++ b/support/ChangeLog
@@ -1,3 +1,8 @@
+2018-12-21 Arnold D. Robbins <arnold@skeeve.com>
+
+ * dfa.c; Sync with GNULIB, bugfix for \b (\y in gawk)
+ in the C locale.
+
2018-12-18 Arnold D. Robbins <arnold@skeeve.com>
* Makefile.am (distclean-local): Remove .deps directory.
diff --git a/support/dfa.c b/support/dfa.c
index 612faa10..7687aca0 100644
--- a/support/dfa.c
+++ b/support/dfa.c
@@ -2345,6 +2345,26 @@ epsclosure (struct dfa const *d)
free (tmp.elems);
}
+/* Returns the set of contexts for which there is at least one
+ character included in C. */
+
+static int
+charclass_context (struct dfa const *dfa, charclass const *c)
+{
+ int context = 0;
+
+ for (unsigned int j = 0; j < CHARCLASS_WORDS; ++j)
+ {
+ if (c->w[j] & dfa->syntax.newline.w[j])
+ context |= CTX_NEWLINE;
+ if (c->w[j] & dfa->syntax.letters.w[j])
+ context |= CTX_LETTER;
+ if (c->w[j] & ~(dfa->syntax.letters.w[j] | dfa->syntax.newline.w[j]))
+ context |= CTX_NONE;
+ }
+
+ return context;
+}
/* Returns the contexts on which the position set S depends. Each context
in the set of returned contexts (let's call it SC) may have a different
follow set than other contexts in SC, and also different from the
@@ -3137,17 +3157,22 @@ build_state (state_num s, struct dfa *d, unsigned char uc)
/* Find out if the new state will want any context information,
by calculating possible contexts that the group can match,
and separate contexts that the new state wants to know. */
+ int possible_contexts = charclass_context (d, &label);
int separate_contexts = state_separate_contexts (d, &group);
/* Find the state(s) corresponding to the union of the follows. */
- if (d->syntax.sbit[uc] & separate_contexts & CTX_NEWLINE)
- state = state_index (d, &group, CTX_NEWLINE);
- else if (d->syntax.sbit[uc] & separate_contexts & CTX_LETTER)
- state = state_index (d, &group, CTX_LETTER);
- else
+ if (possible_contexts & ~separate_contexts)
state = state_index (d, &group, separate_contexts ^ CTX_ANY);
+ else
+ state = -1;
+ if (separate_contexts & possible_contexts & CTX_NEWLINE)
+ state_newline = state_index (d, &group, CTX_NEWLINE);
+ else
state_newline = state;
+ if (separate_contexts & possible_contexts & CTX_LETTER)
+ state_letter = state_index (d, &group, CTX_LETTER);
+ else
state_letter = state;
/* Reallocate now, to reallocate any newline transition properly. */