diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2019-12-13 14:04:45 +0200 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2019-12-13 14:04:45 +0200 |
commit | 893bd9fd3d78eb6d3093971ee3c3700d71da1e77 (patch) | |
tree | c11b232ca22c5b182daf949f120594504355214b /support | |
parent | 3e0eec7b43484fc639d8e78d3f64ae10939257ee (diff) | |
download | egawk-893bd9fd3d78eb6d3093971ee3c3700d71da1e77.tar.gz egawk-893bd9fd3d78eb6d3093971ee3c3700d71da1e77.tar.bz2 egawk-893bd9fd3d78eb6d3093971ee3c3700d71da1e77.zip |
Update dfa.
Diffstat (limited to 'support')
-rw-r--r-- | support/ChangeLog | 4 | ||||
-rw-r--r-- | support/dfa.c | 252 | ||||
-rw-r--r-- | support/dfa.h | 11 |
3 files changed, 138 insertions, 129 deletions
diff --git a/support/ChangeLog b/support/ChangeLog index 5ef08afb..bb323e96 100644 --- a/support/ChangeLog +++ b/support/ChangeLog @@ -1,3 +1,7 @@ +2019-12-13 Arnold D. Robbins <arnold@skeeve.com> + + * dfah, dfa.c: Updated from GNULIB. + 2019-11-21 Arnold D. Robbins <arnold@skeeve.com> * regexec.c: Updated from GNULIB. diff --git a/support/dfa.c b/support/dfa.c index fec637a6..cfa54211 100644 --- a/support/dfa.c +++ b/support/dfa.c @@ -355,7 +355,7 @@ enum a constraint. */ typedef struct { - size_t index; /* Index into the parse array. */ + ptrdiff_t index; /* Index into the parse array. */ unsigned int constraint; /* Constraint for matching this position. */ } position; @@ -437,9 +437,9 @@ struct regex_syntax struct lexer_state { char const *ptr; /* Pointer to next input character. */ - size_t left; /* Number of characters remaining. */ + ptrdiff_t left; /* Number of characters remaining. */ token lasttok; /* Previous token returned; initially END. */ - size_t parens; /* Count of outstanding left parens. */ + ptrdiff_t parens; /* Count of outstanding left parens. */ int minrep, maxrep; /* Repeat counts for {m,n}. */ /* Wide character representation of the current multibyte character, @@ -462,7 +462,7 @@ struct lexer_state struct parser_state { token tok; /* Lookahead token. */ - size_t depth; /* Current depth of a hypothetical stack + ptrdiff_t depth; /* Current depth of a hypothetical stack holding deferred productions. This is used to determine the depth that will be required of the real stack later on in @@ -479,7 +479,7 @@ struct dfa charclass *charclasses; /* Array of character sets for CSET tokens. */ ptrdiff_t cindex; /* Index for adding new charclasses. */ ptrdiff_t calloc; /* Number of charclasses allocated. */ - size_t canychar; /* Index of anychar class, or (size_t) -1. */ + ptrdiff_t canychar; /* Index of anychar class, or -1. */ /* Scanner state */ struct lexer_state lex; @@ -489,13 +489,13 @@ struct dfa /* Fields filled by the parser. */ token *tokens; /* Postfix parse array. */ - size_t tindex; /* Index for adding new tokens. */ - size_t talloc; /* Number of tokens currently allocated. */ - size_t depth; /* Depth required of an evaluation stack + ptrdiff_t tindex; /* Index for adding new tokens. */ + ptrdiff_t talloc; /* Number of tokens currently allocated. */ + ptrdiff_t depth; /* Depth required of an evaluation stack used for depth-first traversal of the parse tree. */ - size_t nleaves; /* Number of leaves on the parse tree. */ - size_t nregexps; /* Count of parallel regexps being built + ptrdiff_t nleaves; /* Number of leaves on the parse tree. */ + ptrdiff_t nregexps; /* Count of parallel regexps being built with dfaparse. */ bool fast; /* The DFA is fast. */ token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales. */ @@ -642,7 +642,7 @@ static void regexp (struct dfa *dfa); * The return value is always in the range 1..N. * D->mbs is always valid afterwards. * *PWC is always set to something. */ -static size_t +static int mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d) { unsigned char uc = s[0]; @@ -999,8 +999,8 @@ using_simple_locale (bool multibyte) static int fetch_wc (struct dfa *dfa) { - size_t nbytes = mbs_to_wchar (&dfa->lex.wctok, dfa->lex.ptr, dfa->lex.left, - dfa); + int nbytes = mbs_to_wchar (&dfa->lex.wctok, dfa->lex.ptr, dfa->lex.left, + dfa); dfa->lex.cur_mb_len = nbytes; int c = nbytes == 1 ? to_uchar (dfa->lex.ptr[0]) : EOF; dfa->lex.ptr += nbytes; @@ -1050,7 +1050,7 @@ static const struct dfa_ctype prednames[] = { static const struct dfa_ctype *_GL_ATTRIBUTE_PURE find_pred (const char *str) { - for (unsigned int i = 0; prednames[i].name; ++i) + for (int i = 0; prednames[i].name; i++) if (streq (str, prednames[i].name)) return &prednames[i]; return NULL; @@ -1105,7 +1105,7 @@ parse_bracket_exp (struct dfa *dfa) { enum { MAX_BRACKET_STRING_LEN = 32 }; char str[MAX_BRACKET_STRING_LEN + 1]; - size_t len = 0; + int len = 0; for (;;) { c = bracket_fetch_wc (dfa); @@ -1243,11 +1243,11 @@ parse_bracket_exp (struct dfa *dfa) else { wchar_t folded[CASE_FOLDED_BUFSIZE + 1]; - unsigned int n = (dfa->syntax.case_fold - ? case_folded_counterparts (wc, folded + 1) + 1 - : 1); + int n = (dfa->syntax.case_fold + ? case_folded_counterparts (wc, folded + 1) + 1 + : 1); folded[0] = wc; - for (unsigned int i = 0; i < n; i++) + for (int i = 0; i < n; i++) if (!setbit_wc (folded[i], &ccl)) { dfa->lex.brack.chars @@ -1286,7 +1286,7 @@ parse_bracket_exp (struct dfa *dfa) struct lexptr { char const *ptr; - size_t left; + ptrdiff_t left; }; static void @@ -1534,7 +1534,7 @@ lex (struct dfa *dfa) case '.': if (backslash) goto normal_char; - if (dfa->canychar == (size_t) -1) + if (dfa->canychar < 0) { charclass ccl; fillset (&ccl); @@ -1657,8 +1657,8 @@ addtok_mb (struct dfa *dfa, token t, char mbprop) { if (dfa->talloc == dfa->tindex) { - dfa->tokens = x2nrealloc (dfa->tokens, &dfa->talloc, - sizeof *dfa->tokens); + dfa->tokens = xpalloc (dfa->tokens, &dfa->talloc, 1, -1, + sizeof *dfa->tokens); if (dfa->localeinfo.multibyte) dfa->multibyte_prop = xnrealloc (dfa->multibyte_prop, dfa->talloc, sizeof *dfa->multibyte_prop); @@ -1780,11 +1780,11 @@ add_utf8_anychar (struct dfa *dfa) /* f0-f7: 4-byte sequence. */ CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0, 0xff0000) }; - const unsigned int n = sizeof (utf8_classes) / sizeof (utf8_classes[0]); + int n = sizeof utf8_classes / sizeof *utf8_classes; /* Define the five character classes that are needed below. */ if (dfa->utf8_anychar_classes[0] == 0) - for (unsigned int i = 0; i < n; i++) + for (int i = 0; i < n; i++) { charclass c = utf8_classes[i]; if (i == 1) @@ -1807,7 +1807,7 @@ add_utf8_anychar (struct dfa *dfa) which I'll write more concisely "B|CA|DAA|EAAA". Factor the [0x00-0x7f] and you get "B|(C|(D|EA)A)A". And since the token buffer is in reverse Polish notation, you get "B C D E A CAT OR A CAT OR A CAT OR". */ - unsigned int i; + int i; for (i = 1; i < n; i++) addtok (dfa, dfa->utf8_anychar_classes[i]); while (--i > 1) @@ -1890,9 +1890,8 @@ atom (struct dfa *dfa) if (dfa->syntax.case_fold) { wchar_t folded[CASE_FOLDED_BUFSIZE]; - unsigned int n = case_folded_counterparts (dfa->lex.wctok, - folded); - for (unsigned int i = 0; i < n; i++) + int n = case_folded_counterparts (dfa->lex.wctok, folded); + for (int i = 0; i < n; i++) { addtok_wc (dfa, folded[i]); addtok (dfa, OR); @@ -1915,8 +1914,8 @@ atom (struct dfa *dfa) } /* Return the number of tokens in the given subexpression. */ -static size_t _GL_ATTRIBUTE_PURE -nsubtoks (struct dfa const *dfa, size_t tindex) +static ptrdiff_t _GL_ATTRIBUTE_PURE +nsubtoks (struct dfa const *dfa, ptrdiff_t tindex) { switch (dfa->tokens[tindex - 1]) { @@ -1929,7 +1928,7 @@ nsubtoks (struct dfa const *dfa, size_t tindex) case CAT: case OR: { - size_t ntoks1 = nsubtoks (dfa, tindex - 1); + ptrdiff_t ntoks1 = nsubtoks (dfa, tindex - 1); return 1 + ntoks1 + nsubtoks (dfa, tindex - 1 - ntoks1); } } @@ -1937,14 +1936,14 @@ nsubtoks (struct dfa const *dfa, size_t tindex) /* Copy the given subexpression to the top of the tree. */ static void -copytoks (struct dfa *dfa, size_t tindex, size_t ntokens) +copytoks (struct dfa *dfa, ptrdiff_t tindex, ptrdiff_t ntokens) { if (dfa->localeinfo.multibyte) - for (size_t i = 0; i < ntokens; ++i) + for (ptrdiff_t i = 0; i < ntokens; ++i) addtok_mb (dfa, dfa->tokens[tindex + i], dfa->multibyte_prop[tindex + i]); else - for (size_t i = 0; i < ntokens; ++i) + for (ptrdiff_t i = 0; i < ntokens; ++i) addtok_mb (dfa, dfa->tokens[tindex + i], 3); } @@ -1956,8 +1955,8 @@ closure (struct dfa *dfa) || dfa->parse.tok == PLUS || dfa->parse.tok == REPMN) if (dfa->parse.tok == REPMN && (dfa->lex.minrep || dfa->lex.maxrep)) { - size_t ntokens = nsubtoks (dfa, dfa->tindex); - size_t tindex = dfa->tindex - ntokens; + ptrdiff_t ntokens = nsubtoks (dfa, dfa->tindex); + ptrdiff_t tindex = dfa->tindex - ntokens; if (dfa->lex.maxrep < 0) addtok (dfa, PLUS); if (dfa->lex.minrep == 0) @@ -2013,10 +2012,9 @@ regexp (struct dfa *dfa) } } -/* Main entry point for the parser. S is a string to be parsed, len is the - length of the string, so s can include NUL characters. D is a pointer to - the struct dfa to parse into. */ -static void +/* Parse a string S of length LEN into D. S can include NUL characters. + This is the main entry point for the parser. */ +void dfaparse (char const *s, size_t len, struct dfa *d) { d->lex.ptr = s; @@ -2065,7 +2063,7 @@ copy (position_set const *src, position_set *dst) } static void -alloc_position_set (position_set *s, size_t size) +alloc_position_set (position_set *s, ptrdiff_t size) { s->elems = xnmalloc (size, sizeof *s->elems); s->alloc = size; @@ -2174,19 +2172,19 @@ merge2 (position_set *dst, position_set const *src, position_set *m) /* Delete a position from a set. Return the nonzero constraint of the deleted position, or zero if there was no such position. */ static unsigned int -delete (size_t del, position_set *s) +delete (ptrdiff_t del, position_set *s) { - size_t count = s->nelem; - size_t lo = 0, hi = count; + ptrdiff_t count = s->nelem; + ptrdiff_t lo = 0, hi = count; while (lo < hi) { - size_t mid = (lo + hi) >> 1; + ptrdiff_t mid = (lo + hi) >> 1; if (s->elems[mid].index < del) lo = mid + 1; else if (s->elems[mid].index == del) { unsigned int c = s->elems[mid].constraint; - size_t i; + ptrdiff_t i; for (i = mid; i + 1 < count; i++) s->elems[i] = s->elems[i + 1]; s->nelem = i; @@ -2200,7 +2198,7 @@ delete (size_t del, position_set *s) /* Replace a position with the followed set. */ static void -replace (position_set *dst, size_t del, position_set *add, +replace (position_set *dst, ptrdiff_t del, position_set *add, unsigned int constraint, position_set *tmp) { unsigned int c = delete (del, dst) & constraint; @@ -2224,7 +2222,10 @@ state_index (struct dfa *d, position_set const *s, int context) token first_end = 0; for (i = 0; i < s->nelem; ++i) - hash ^= s->elems[i].index + s->elems[i].constraint; + { + size_t ind = s->elems[i].index; + hash ^= ind + s->elems[i].constraint; + } /* Try to find a state that exactly matches the proposed one. */ for (i = 0; i < d->sindex; ++i) @@ -2242,10 +2243,10 @@ state_index (struct dfa *d, position_set const *s, int context) } #ifdef DEBUG - fprintf (stderr, "new state %zd\n nextpos:", i); + fprintf (stderr, "new state %td\n nextpos:", i); for (state_num j = 0; j < s->nelem; j++) { - fprintf (stderr, " %zu:", s->elems[j].index); + fprintf (stderr, " %td:", s->elems[j].index); prtok (d->tokens[s->elems[j].index]); } fprintf (stderr, "\n context:"); @@ -2307,7 +2308,7 @@ epsclosure (struct dfa const *d) { position_set tmp; alloc_position_set (&tmp, d->nleaves); - for (size_t i = 0; i < d->tindex; ++i) + for (ptrdiff_t i = 0; i < d->tindex; ++i) if (d->follows[i].nelem > 0 && d->tokens[i] >= NOTCHAR && d->tokens[i] != BACKREF && d->tokens[i] != ANYCHAR && d->tokens[i] != MBCSET && d->tokens[i] < CSET) @@ -2340,7 +2341,7 @@ epsclosure (struct dfa const *d) delete (i, &d->follows[i]); - for (size_t j = 0; j < d->tindex; j++) + for (ptrdiff_t j = 0; j < d->tindex; j++) if (i != j && d->follows[j].nelem > 0) replace (&d->follows[j], i, &d->follows[i], constraint, &tmp); } @@ -2355,7 +2356,7 @@ charclass_context (struct dfa const *dfa, charclass const *c) { int context = 0; - for (unsigned int j = 0; j < CHARCLASS_WORDS; ++j) + for (int j = 0; j < CHARCLASS_WORDS; j++) { if (c->w[j] & dfa->syntax.newline.w[j]) context |= CTX_NEWLINE; @@ -2379,7 +2380,7 @@ state_separate_contexts (struct dfa *d, position_set const *s) { int separate_contexts = 0; - for (size_t j = 0; j < s->nelem; j++) + for (ptrdiff_t j = 0; j < s->nelem; j++) separate_contexts |= d->separates[s->elems[j].index]; return separate_contexts; @@ -2406,7 +2407,7 @@ enum }; static void -merge_nfa_state (struct dfa *d, size_t tindex, char *flags, +merge_nfa_state (struct dfa *d, ptrdiff_t tindex, char *flags, position_set *merged) { position_set *follows = d->follows; @@ -2416,7 +2417,7 @@ merge_nfa_state (struct dfa *d, size_t tindex, char *flags, for (ptrdiff_t i = 0; i < follows[tindex].nelem; i++) { - size_t sindex = follows[tindex].elems[i].index; + ptrdiff_t sindex = follows[tindex].elems[i].index; /* Skip the node as pruned in future. */ unsigned int iconstraint = follows[tindex].elems[i].constraint; @@ -2435,7 +2436,7 @@ merge_nfa_state (struct dfa *d, size_t tindex, char *flags, for (j = 0; j < nelem; j++) { - size_t dindex = follows[tindex].elems[j].index; + ptrdiff_t dindex = follows[tindex].elems[j].index; if (follows[tindex].elems[j].constraint != iconstraint) continue; @@ -2471,13 +2472,8 @@ merge_nfa_state (struct dfa *d, size_t tindex, char *flags, static int compare (const void *a, const void *b) { - int aindex; - int bindex; - - aindex = (int) ((position *) a)->index; - bindex = (int) ((position *) b)->index; - - return aindex - bindex; + position const *p = a, *q = b; + return p->index < q->index ? -1 : p->index > q->index; } static void @@ -2566,7 +2562,7 @@ dfaoptimize (struct dfa *d) flags = xmalloc (d->tindex * sizeof *flags); memset (flags, 0, d->tindex * sizeof *flags); - for (size_t i = 0; i < d->tindex; i++) + for (ptrdiff_t i = 0; i < d->tindex; i++) { for (ptrdiff_t j = 0; j < d->follows[i].nelem; j++) { @@ -2668,8 +2664,8 @@ dfaanalyze (struct dfa *d, bool searchflag) bool nullable; /* Counts of firstpos and lastpos sets. */ - size_t nfirstpos; - size_t nlastpos; + ptrdiff_t nfirstpos; + ptrdiff_t nlastpos; } *stkalloc = xnmalloc (d->depth, sizeof *stkalloc), *stk = stkalloc; position_set merged; /* Result of merging sets. */ @@ -2678,9 +2674,9 @@ dfaanalyze (struct dfa *d, bool searchflag) #ifdef DEBUG fprintf (stderr, "dfaanalyze:\n"); - for (size_t i = 0; i < d->tindex; ++i) + for (ptrdiff_t i = 0; i < d->tindex; ++i) { - fprintf (stderr, " %zu:", i); + fprintf (stderr, " %td:", i); prtok (d->tokens[i]); } putc ('\n', stderr); @@ -2690,7 +2686,7 @@ dfaanalyze (struct dfa *d, bool searchflag) alloc_position_set (&merged, d->nleaves); d->follows = xcalloc (d->tindex, sizeof *d->follows); - for (size_t i = 0; i < d->tindex; ++i) + for (ptrdiff_t i = 0; i < d->tindex; ++i) { switch (d->tokens[i]) { @@ -2711,7 +2707,7 @@ dfaanalyze (struct dfa *d, bool searchflag) tmp.elems = firstpos - stk[-1].nfirstpos; tmp.nelem = stk[-1].nfirstpos; position *p = lastpos - stk[-1].nlastpos; - for (size_t j = 0; j < stk[-1].nlastpos; j++) + for (ptrdiff_t j = 0; j < stk[-1].nlastpos; j++) { merge (&tmp, &d->follows[p[j].index], &merged); copy (&merged, &d->follows[p[j].index]); @@ -2731,7 +2727,7 @@ dfaanalyze (struct dfa *d, bool searchflag) tmp.nelem = stk[-1].nfirstpos; tmp.elems = firstpos - stk[-1].nfirstpos; position *p = lastpos - stk[-1].nlastpos - stk[-2].nlastpos; - for (size_t j = 0; j < stk[-2].nlastpos; j++) + for (ptrdiff_t j = 0; j < stk[-2].nlastpos; j++) { merge (&tmp, &d->follows[p[j].index], &merged); copy (&merged, &d->follows[p[j].index]); @@ -2752,7 +2748,7 @@ dfaanalyze (struct dfa *d, bool searchflag) else { position *p = lastpos - stk[-1].nlastpos - stk[-2].nlastpos; - for (size_t j = 0; j < stk[-1].nlastpos; j++) + for (ptrdiff_t j = 0; j < stk[-1].nlastpos; j++) p[j] = p[j + stk[-2].nlastpos]; lastpos -= stk[-2].nlastpos; stk[-2].nlastpos = stk[-1].nlastpos; @@ -2795,21 +2791,21 @@ dfaanalyze (struct dfa *d, bool searchflag) } #ifdef DEBUG /* ... balance the above nonsyntactic #ifdef goo... */ - fprintf (stderr, "node %zu:", i); + fprintf (stderr, "node %td:", i); prtok (d->tokens[i]); putc ('\n', stderr); fprintf (stderr, stk[-1].nullable ? " nullable: yes\n" : " nullable: no\n"); fprintf (stderr, " firstpos:"); - for (size_t j = 0; j < stk[-1].nfirstpos; j++) + for (ptrdiff_t j = 0; j < stk[-1].nfirstpos; j++) { - fprintf (stderr, " %zu:", firstpos[j - stk[-1].nfirstpos].index); + fprintf (stderr, " %td:", firstpos[j - stk[-1].nfirstpos].index); prtok (d->tokens[firstpos[j - stk[-1].nfirstpos].index]); } fprintf (stderr, "\n lastpos:"); - for (size_t j = 0; j < stk[-1].nlastpos; j++) + for (ptrdiff_t j = 0; j < stk[-1].nlastpos; j++) { - fprintf (stderr, " %zu:", lastpos[j - stk[-1].nlastpos].index); + fprintf (stderr, " %td:", lastpos[j - stk[-1].nlastpos].index); prtok (d->tokens[lastpos[j - stk[-1].nlastpos].index]); } putc ('\n', stderr); @@ -2823,17 +2819,17 @@ dfaanalyze (struct dfa *d, bool searchflag) dfaoptimize (d); #ifdef DEBUG - for (size_t i = 0; i < d->tindex; ++i) + for (ptrdiff_t i = 0; i < d->tindex; ++i) if (d->tokens[i] == BEG || d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF || d->tokens[i] == ANYCHAR || d->tokens[i] == MBCSET || d->tokens[i] >= CSET) { - fprintf (stderr, "follows(%zu:", i); + fprintf (stderr, "follows(%td:", i); prtok (d->tokens[i]); fprintf (stderr, "):"); - for (size_t j = 0; j < d->follows[i].nelem; j++) + for (ptrdiff_t j = 0; j < d->follows[i].nelem; j++) { - fprintf (stderr, " %zu:", d->follows[i].elems[j].index); + fprintf (stderr, " %td:", d->follows[i].elems[j].index); prtok (d->tokens[d->follows[i].elems[j].index]); } putc ('\n', stderr); @@ -3012,8 +3008,8 @@ build_state (state_num s, struct dfa *d, unsigned char uc) /* Find the union of the follows of the positions of the group. This is a hideously inefficient loop. Fix it someday. */ - for (size_t j = 0; j < d->states[s].elems.nelem; ++j) - for (size_t k = 0; + for (ptrdiff_t j = 0; j < d->states[s].elems.nelem; j++) + for (ptrdiff_t k = 0; k < d->follows[d->states[s].elems.elems[j].index].nelem; ++k) insert (d->follows[d->states[s].elems.elems[j].index].elems[k], &follows); @@ -3025,7 +3021,7 @@ build_state (state_num s, struct dfa *d, unsigned char uc) charclass label; fillset (&label); - for (size_t i = 0; i < follows.nelem; ++i) + for (ptrdiff_t i = 0; i < follows.nelem; ++i) { charclass matches; /* Set of matching characters. */ position pos = follows.elems[i]; @@ -3072,15 +3068,15 @@ build_state (state_num s, struct dfa *d, unsigned char uc) { if (!succeeds_in_context (pos.constraint, d->states[s].context, CTX_NEWLINE)) - for (size_t j = 0; j < CHARCLASS_WORDS; ++j) + for (int j = 0; j < CHARCLASS_WORDS; j++) matches.w[j] &= ~d->syntax.newline.w[j]; if (!succeeds_in_context (pos.constraint, d->states[s].context, CTX_LETTER)) - for (size_t j = 0; j < CHARCLASS_WORDS; ++j) + for (int j = 0; j < CHARCLASS_WORDS; ++j) matches.w[j] &= ~d->syntax.letters.w[j]; if (!succeeds_in_context (pos.constraint, d->states[s].context, CTX_NONE)) - for (size_t j = 0; j < CHARCLASS_WORDS; ++j) + for (int j = 0; j < CHARCLASS_WORDS; ++j) matches.w[j] &= d->syntax.letters.w[j] | d->syntax.newline.w[j]; /* If there are no characters left, there's no point in going on. */ @@ -3095,24 +3091,24 @@ build_state (state_num s, struct dfa *d, unsigned char uc) } #ifdef DEBUG - fprintf (stderr, " nextpos %zu:", pos.index); + fprintf (stderr, " nextpos %td:", pos.index); prtok (d->tokens[pos.index]); fprintf (stderr, " of"); - for (size_t j = 0; j < NOTCHAR; j++) + for (unsigned j = 0; j < NOTCHAR; j++) if (tstbit (j, &matches)) - fprintf (stderr, " 0x%02zx", j); + fprintf (stderr, " 0x%02x", j); fprintf (stderr, "\n"); #endif if (matched) { - for (size_t k = 0; k < CHARCLASS_WORDS; ++k) + for (int k = 0; k < CHARCLASS_WORDS; ++k) label.w[k] &= matches.w[k]; append (pos, &group); } else { - for (size_t k = 0; k < CHARCLASS_WORDS; ++k) + for (int k = 0; k < CHARCLASS_WORDS; ++k) label.w[k] &= ~matches.w[k]; } } @@ -3146,7 +3142,7 @@ build_state (state_num s, struct dfa *d, unsigned char uc) if (!mergeit) { mergeit = true; - for (size_t j = 0; mergeit && j < group.nelem; j++) + for (ptrdiff_t j = 0; mergeit && j < group.nelem; j++) mergeit &= d->multibyte_prop[group.elems[j].index]; } if (mergeit) @@ -3197,7 +3193,7 @@ build_state (state_num s, struct dfa *d, unsigned char uc) } /* Set the transitions for each character in the label. */ - for (size_t i = 0; i < NOTCHAR; i++) + for (int i = 0; i < NOTCHAR; i++) if (tstbit (i, &label)) switch (d->syntax.sbit[i]) { @@ -3214,7 +3210,7 @@ build_state (state_num s, struct dfa *d, unsigned char uc) #ifdef DEBUG fprintf (stderr, "trans table %td", s); - for (size_t i = 0; i < NOTCHAR; ++i) + for (int i = 0; i < NOTCHAR; ++i) { if (!(i & 0xf)) fprintf (stderr, "\n"); @@ -3455,7 +3451,7 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl, alloc_position_set (&d->mb_follows, d->nleaves); } - size_t nlcount = 0; + ptrdiff_t nlcount = 0; for (;;) { state_num *t; @@ -3645,7 +3641,7 @@ free_mbdata (struct dfa *d) static bool _GL_ATTRIBUTE_PURE dfa_supported (struct dfa const *d) { - for (size_t i = 0; i < d->tindex; i++) + for (ptrdiff_t i = 0; i < d->tindex; i++) { switch (d->tokens[i]) { @@ -3673,7 +3669,7 @@ maybe_disable_superset_dfa (struct dfa *d) return; bool have_backref = false; - for (size_t i = 0; i < d->tindex; ++i) + for (ptrdiff_t i = 0; i < d->tindex; ++i) { switch (d->tokens[i]) { @@ -3738,8 +3734,8 @@ dfassbuild (struct dfa *d) bool have_achar = false; bool have_nchar = false; - size_t j; - for (size_t i = j = 0; i < d->tindex; i++) + ptrdiff_t j; + for (ptrdiff_t i = j = 0; i < d->tindex; i++) { switch (d->tokens[i]) { @@ -3788,11 +3784,15 @@ dfassbuild (struct dfa *d) } } -/* Parse and analyze a single string of the given length. */ +/* Parse a string S of length LEN into D (but skip this step if S is null). + Then analyze D and build a matcher for it. + SEARCHFLAG says whether to build a searching or an exact matcher. */ void dfacomp (char const *s, size_t len, struct dfa *d, bool searchflag) { - dfaparse (s, len, d); + if (s != NULL) + dfaparse (s, len, d); + dfassbuild (d); if (dfa_supported (d)) @@ -3825,7 +3825,7 @@ dfafree (struct dfa *d) free (d->constraints); free (d->separates); - for (size_t i = 0; i < d->sindex; ++i) + for (ptrdiff_t i = 0; i < d->sindex; ++i) { free (d->states[i].elems.elems); free (d->states[i].mbps.elems); @@ -3834,14 +3834,14 @@ dfafree (struct dfa *d) if (d->follows) { - for (size_t i = 0; i < d->tindex; ++i) + for (ptrdiff_t i = 0; i < d->tindex; ++i) free (d->follows[i].elems); free (d->follows); } if (d->trans) { - for (size_t i = 0; i < d->tralloc; ++i) + for (ptrdiff_t i = 0; i < d->tralloc; ++i) { free (d->trans[i]); free (d->fails[i]); @@ -3945,10 +3945,10 @@ dfafree (struct dfa *d) static char * icatalloc (char *old, char const *new) { - size_t newsize = strlen (new); + ptrdiff_t newsize = strlen (new); if (newsize == 0) return old; - size_t oldsize = strlen (old); + ptrdiff_t oldsize = strlen (old); char *result = xrealloc (old, oldsize + newsize + 1); memcpy (result + oldsize, new, newsize + 1); return result; @@ -3962,20 +3962,20 @@ freelist (char **cpp) } static char ** -enlist (char **cpp, char *new, size_t len) +enlist (char **cpp, char *new, ptrdiff_t len) { new = memcpy (xmalloc (len + 1), new, len); new[len] = '\0'; /* Is there already something in the list that's new (or longer)? */ - size_t i; - for (i = 0; cpp[i] != NULL; ++i) + ptrdiff_t i; + for (i = 0; cpp[i] != NULL; i++) if (strstr (cpp[i], new) != NULL) { free (new); return cpp; } /* Eliminate any obsoleted strings. */ - for (size_t j = 0; cpp[j] != NULL; ) + for (ptrdiff_t j = 0; cpp[j] != NULL; ) if (strstr (new, cpp[j]) == NULL) ++j; else @@ -4002,11 +4002,11 @@ comsubs (char *left, char const *right) for (char *lcp = left; *lcp != '\0'; lcp++) { - size_t len = 0; + ptrdiff_t len = 0; char *rcp = strchr (right, *lcp); while (rcp != NULL) { - size_t i; + ptrdiff_t i; for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i) continue; if (i > len) @@ -4034,9 +4034,9 @@ inboth (char **left, char **right) { char **both = xzalloc (sizeof *both); - for (size_t lnum = 0; left[lnum] != NULL; ++lnum) + for (ptrdiff_t lnum = 0; left[lnum] != NULL; lnum++) { - for (size_t rnum = 0; right[rnum] != NULL; ++rnum) + for (ptrdiff_t rnum = 0; right[rnum] != NULL; rnum++) { char **temp = comsubs (left[lnum], right[rnum]); both = addlists (both, temp); @@ -4061,7 +4061,7 @@ struct must }; static must * -allocmust (must *mp, size_t size) +allocmust (must *mp, ptrdiff_t size) { must *new_mp = xmalloc (sizeof *new_mp); new_mp->in = xzalloc (sizeof *new_mp->in); @@ -4107,7 +4107,7 @@ dfamust (struct dfa const *d) bool need_endline = false; bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1; - for (size_t ri = 1; ri + 1 < d->tindex; ri++) + for (ptrdiff_t ri = 1; ri + 1 < d->tindex; ri++) { token t = d->tokens[ri]; switch (t) @@ -4147,7 +4147,7 @@ dfamust (struct dfa const *d) char **new; must *rmp = mp; must *lmp = mp = mp->prev; - size_t j, ln, rn, n; + ptrdiff_t j, ln, rn, n; /* Guaranteed to be. Unlikely, but ... */ if (streq (lmp->is, rmp->is)) @@ -4162,7 +4162,7 @@ dfamust (struct dfa const *d) lmp->endline = false; } /* Left side--easy */ - size_t i = 0; + ptrdiff_t i = 0; while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i]) ++i; lmp->left[i] = '\0'; @@ -4192,7 +4192,7 @@ dfamust (struct dfa const *d) case END: assert (!mp->prev); - for (size_t i = 0; mp->in[i] != NULL; ++i) + for (ptrdiff_t i = 0; mp->in[i] != NULL; i++) if (strlen (mp->in[i]) > strlen (result)) result = mp->in[i]; if (streq (result, mp->is)) @@ -4216,8 +4216,8 @@ dfamust (struct dfa const *d) lmp->in = addlists (lmp->in, rmp->in); if (lmp->right[0] != '\0' && rmp->left[0] != '\0') { - size_t lrlen = strlen (lmp->right); - size_t rllen = strlen (rmp->left); + ptrdiff_t lrlen = strlen (lmp->right); + ptrdiff_t rllen = strlen (rmp->left); char *tp = xmalloc (lrlen + rllen); memcpy (tp, lmp->right, lrlen); memcpy (tp + lrlen, rmp->left, rllen); @@ -4282,7 +4282,7 @@ dfamust (struct dfa const *d) } } - size_t rj = ri + 2; + ptrdiff_t rj = ri + 2; if (d->tokens[ri + 1] == CAT) { for (; rj < d->tindex - 1; rj += 2) @@ -4297,7 +4297,7 @@ dfamust (struct dfa const *d) mp->is[0] = mp->left[0] = mp->right[0] = case_fold_unibyte ? toupper (t) : t; - size_t i; + ptrdiff_t i; for (i = 1; ri + 2 < rj; i++) { ri += 2; diff --git a/support/dfa.h b/support/dfa.h index 2e25ef9f..53d6d333 100644 --- a/support/dfa.h +++ b/support/dfa.h @@ -65,15 +65,20 @@ enum extern void dfasyntax (struct dfa *, struct localeinfo const *, reg_syntax_t, int); -/* Build and return the struct dfamust from the given struct dfa. */ +/* Parse the given string of given length into the given struct dfa. */ +extern void dfaparse (char const *, size_t, struct dfa *); + +/* Allocate and return a struct dfamust from a struct dfa that was + initialized by dfaparse and not yet given to dfacomp. */ extern struct dfamust *dfamust (struct dfa const *); /* Free the storage held by the components of a struct dfamust. */ extern void dfamustfree (struct dfamust *); /* Compile the given string of the given length into the given struct dfa. - Final argument is a flag specifying whether to build a searching or an - exact matcher. */ + The last argument says whether to build a searching or an exact matcher. + A null first argument means the struct dfa has already been + initialized by dfaparse; the second argument is ignored. */ extern void dfacomp (char const *, size_t, struct dfa *, bool); /* Search through a buffer looking for a match to the given struct dfa. |