diff options
-rw-r--r-- | ChangeLog | 16 | ||||
-rw-r--r-- | regcomp.c | 18 | ||||
-rw-r--r-- | regex.h | 2 | ||||
-rw-r--r-- | regex_internal.c | 2 | ||||
-rw-r--r-- | regex_internal.h | 5 | ||||
-rw-r--r-- | regexec.c | 16 |
6 files changed, 38 insertions, 21 deletions
@@ -1,3 +1,19 @@ +2016-01-18 Paul Eggert <eggert@cs.ucla.edu> + + Diagnose ERE '()|\1' + Problem reported by Hanno Boeck in: http://bugs.gnu.org/21513 + + * lib/regcomp.c (parse_reg_exp): While parsing alternatives, keep + track of the set of previously-completed subexpressions available + before the first alternative, and restore this set just before + parsing each subsequent alternative. This lets us diagnose the + invalid back-reference in the ERE '()|\1'. + + Unrelated: General minor cleanups (spelling, code) from Gnulib: + + * regex.h, regex_internal.c, regex_internal.h, regexec.c: Minor + cleanups. + 2016-01-14 Arnold D. Robbins <arnold@skeeve.com> * eval.c (r_get_lhs): If original array was Node_var_new, @@ -137,7 +137,7 @@ static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node); POSIX doesn't require that we do anything for REG_NOERROR, but why not be nice? */ -const char __re_error_msgid[] attribute_hidden = +static const char __re_error_msgid[] attribute_hidden = { #define REG_NOERROR_IDX 0 gettext_noop ("Success") /* REG_NOERROR */ @@ -191,7 +191,7 @@ const char __re_error_msgid[] attribute_hidden = gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ }; -const size_t __re_error_msgid_idx[] attribute_hidden = +static const size_t __re_error_msgid_idx[] attribute_hidden = { REG_NOERROR_IDX, REG_NOMATCH_IDX, @@ -345,7 +345,8 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, #ifdef RE_ENABLE_I18N if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) { - unsigned char *buf = re_malloc (unsigned char, dfa->mb_cur_max), *p; + unsigned char buf[MB_LEN_MAX]; + unsigned char *p; wchar_t wc; mbstate_t state; @@ -361,7 +362,6 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, && (__wcrtomb ((char *) buf, towlower (wc), &state) != (size_t) -1)) re_set_fastmap (fastmap, 0, buf[0]); - re_free (buf); } #endif } @@ -809,7 +809,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length, __libc_lock_init (dfa->lock); err = re_string_construct (®exp, pattern, length, preg->translate, - syntax & RE_ICASE, dfa); + (syntax & RE_ICASE) != 0, dfa); if (BE (err != REG_NOERROR, 0)) { re_compile_internal_free_return: @@ -2206,6 +2206,7 @@ parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, { re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree, *branch = NULL; + bitset_word_t initial_bkref_map = dfa->completed_bkref_map; tree = parse_branch (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; @@ -2216,6 +2217,8 @@ parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, if (token->type != OP_ALT && token->type != END_OF_RE && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) { + bitset_word_t accumulated_bkref_map = dfa->completed_bkref_map; + dfa->completed_bkref_map = initial_bkref_map; branch = parse_branch (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && branch == NULL, 0)) { @@ -2223,6 +2226,7 @@ parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, postorder (tree, free_tree, NULL); return NULL; } + dfa->completed_bkref_map |= accumulated_bkref_map; } else branch = NULL; @@ -2753,7 +2757,7 @@ build_range_exp (reg_syntax_t syntax, bitset_t sbcset, #endif if (start_wc == WEOF || end_wc == WEOF) return REG_ECOLLATE; - else if ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc) + else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0)) return REG_ERANGE; /* Got valid collation sequence values, add them as a new entry. @@ -3521,7 +3525,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) /* This isn't a valid character. */ return REG_ECOLLATE; - /* Build single byte matcing table for this equivalence class. */ + /* Build single byte matching table for this equivalence class. */ char_buf[1] = (unsigned char) '\0'; len = weights[idx1 & 0xffffff]; for (ch = 0; ch < SBC_MAX; ++ch) @@ -327,7 +327,7 @@ typedef enum /* POSIX regcomp return error codes. (In the order listed in the standard.) */ REG_BADPAT, /* Invalid pattern. */ - REG_ECOLLATE, /* Inalid collating element. */ + REG_ECOLLATE, /* Invalid collating element. */ REG_ECTYPE, /* Invalid character class name. */ REG_EESCAPE, /* Trailing backslash. */ REG_ESUBREG, /* Invalid back reference. */ diff --git a/regex_internal.c b/regex_internal.c index fcfa4ada..2d170932 100644 --- a/regex_internal.c +++ b/regex_internal.c @@ -1632,7 +1632,7 @@ free_state (re_dfastate_t *state) re_free (state); } -/* Create the new state which is independ of contexts. +/* Create the new state which is independent of contexts. Return the new state if succeeded, otherwise return NULL. */ static re_dfastate_t * diff --git a/regex_internal.h b/regex_internal.h index 9e104d8d..6cc84a79 100644 --- a/regex_internal.h +++ b/regex_internal.h @@ -159,9 +159,6 @@ is_blank (int c) # define __attribute_warn_unused_result__ #endif -extern const char __re_error_msgid[] attribute_hidden; -extern const size_t __re_error_msgid_idx[] attribute_hidden; - /* An integer used to represent a set of bits. It must be unsigned, and must be at least as wide as unsigned int. */ typedef unsigned long int bitset_word_t; @@ -774,7 +771,7 @@ bitset_mask (bitset_t dest, const bitset_t src) } #ifdef RE_ENABLE_I18N -/* Inline functions for re_string. */ +/* Functions for re_string. */ static int internal_function __attribute__ ((pure, unused)) re_string_char_size_at (const re_string_t *pstr, int idx) @@ -306,7 +306,7 @@ compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); concerned. If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match - and all groups is stroed in REGS. (For the "_2" variants, the offsets are + and all groups is stored in REGS. (For the "_2" variants, the offsets are computed relative to the concatenation, not relative to the individual strings.) @@ -627,7 +627,7 @@ re_exec (s) /* Searches for a compiled pattern PREG in the string STRING, whose length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same - mingings with regexec. START, and RANGE have the same meanings + meaning as with regexec. START, and RANGE have the same meanings with re_search. Return REG_NOERROR if we find a match, and REG_NOMATCH if not, otherwise return the error code. @@ -695,7 +695,8 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, fl_longest_match = (nmatch != 0 || dfa->nbackref); err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, - preg->translate, preg->syntax & RE_ICASE, dfa); + preg->translate, (preg->syntax & RE_ICASE) != 0, + dfa); if (BE (err != REG_NOERROR, 0)) goto free_return; mctx.input.stop = stop; @@ -912,7 +913,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, goto free_return; } - /* At last, add the offset to the each registers, since we slided + /* At last, add the offset to each register, since we slid the buffers so that we could assume that the matching starts from 0. */ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) @@ -963,8 +964,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, static reg_errcode_t __attribute_warn_unused_result__ -prune_impossible_nodes (mctx) - re_match_context_t *mctx; +prune_impossible_nodes (re_match_context_t *mctx) { const re_dfa_t *const dfa = mctx->dfa; int halt_node, match_last; @@ -2420,7 +2420,7 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, /* Skip bytes in the input that correspond to part of a multi-byte match, then look in the log for a state from which to restart matching. */ -re_dfastate_t * +static re_dfastate_t * internal_function find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) { @@ -2449,7 +2449,7 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) /* From the node set CUR_NODES, pick up the nodes whose types are OP_OPEN_SUBEXP and which have corresponding back references in the regular expression. And register them to use them later for evaluating the - correspoding back references. */ + corresponding back references. */ static reg_errcode_t internal_function |