aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog16
-rw-r--r--regcomp.c18
-rw-r--r--regex.h2
-rw-r--r--regex_internal.c2
-rw-r--r--regex_internal.h5
-rw-r--r--regexec.c16
6 files changed, 38 insertions, 21 deletions
diff --git a/ChangeLog b/ChangeLog
index c406b133..09d5732d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2016-01-18 Paul Eggert <eggert@cs.ucla.edu>
+
+ Diagnose ERE '()|\1'
+ Problem reported by Hanno Boeck in: http://bugs.gnu.org/21513
+
+ * lib/regcomp.c (parse_reg_exp): While parsing alternatives, keep
+ track of the set of previously-completed subexpressions available
+ before the first alternative, and restore this set just before
+ parsing each subsequent alternative. This lets us diagnose the
+ invalid back-reference in the ERE '()|\1'.
+
+ Unrelated: General minor cleanups (spelling, code) from Gnulib:
+
+ * regex.h, regex_internal.c, regex_internal.h, regexec.c: Minor
+ cleanups.
+
2016-01-14 Arnold D. Robbins <arnold@skeeve.com>
* eval.c (r_get_lhs): If original array was Node_var_new,
diff --git a/regcomp.c b/regcomp.c
index 80dcb0ce..35a6c218 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -137,7 +137,7 @@ static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
POSIX doesn't require that we do anything for REG_NOERROR,
but why not be nice? */
-const char __re_error_msgid[] attribute_hidden =
+static const char __re_error_msgid[] attribute_hidden =
{
#define REG_NOERROR_IDX 0
gettext_noop ("Success") /* REG_NOERROR */
@@ -191,7 +191,7 @@ const char __re_error_msgid[] attribute_hidden =
gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
};
-const size_t __re_error_msgid_idx[] attribute_hidden =
+static const size_t __re_error_msgid_idx[] attribute_hidden =
{
REG_NOERROR_IDX,
REG_NOMATCH_IDX,
@@ -345,7 +345,8 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
#ifdef RE_ENABLE_I18N
if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
{
- unsigned char *buf = re_malloc (unsigned char, dfa->mb_cur_max), *p;
+ unsigned char buf[MB_LEN_MAX];
+ unsigned char *p;
wchar_t wc;
mbstate_t state;
@@ -361,7 +362,6 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
&& (__wcrtomb ((char *) buf, towlower (wc), &state)
!= (size_t) -1))
re_set_fastmap (fastmap, 0, buf[0]);
- re_free (buf);
}
#endif
}
@@ -809,7 +809,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length,
__libc_lock_init (dfa->lock);
err = re_string_construct (&regexp, pattern, length, preg->translate,
- syntax & RE_ICASE, dfa);
+ (syntax & RE_ICASE) != 0, dfa);
if (BE (err != REG_NOERROR, 0))
{
re_compile_internal_free_return:
@@ -2206,6 +2206,7 @@ parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
bin_tree_t *tree, *branch = NULL;
+ bitset_word_t initial_bkref_map = dfa->completed_bkref_map;
tree = parse_branch (regexp, preg, token, syntax, nest, err);
if (BE (*err != REG_NOERROR && tree == NULL, 0))
return NULL;
@@ -2216,6 +2217,8 @@ parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
if (token->type != OP_ALT && token->type != END_OF_RE
&& (nest == 0 || token->type != OP_CLOSE_SUBEXP))
{
+ bitset_word_t accumulated_bkref_map = dfa->completed_bkref_map;
+ dfa->completed_bkref_map = initial_bkref_map;
branch = parse_branch (regexp, preg, token, syntax, nest, err);
if (BE (*err != REG_NOERROR && branch == NULL, 0))
{
@@ -2223,6 +2226,7 @@ parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
postorder (tree, free_tree, NULL);
return NULL;
}
+ dfa->completed_bkref_map |= accumulated_bkref_map;
}
else
branch = NULL;
@@ -2753,7 +2757,7 @@ build_range_exp (reg_syntax_t syntax, bitset_t sbcset,
#endif
if (start_wc == WEOF || end_wc == WEOF)
return REG_ECOLLATE;
- else if ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc)
+ else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0))
return REG_ERANGE;
/* Got valid collation sequence values, add them as a new entry.
@@ -3521,7 +3525,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
/* This isn't a valid character. */
return REG_ECOLLATE;
- /* Build single byte matcing table for this equivalence class. */
+ /* Build single byte matching table for this equivalence class. */
char_buf[1] = (unsigned char) '\0';
len = weights[idx1 & 0xffffff];
for (ch = 0; ch < SBC_MAX; ++ch)
diff --git a/regex.h b/regex.h
index cd470a04..0a183dfc 100644
--- a/regex.h
+++ b/regex.h
@@ -327,7 +327,7 @@ typedef enum
/* POSIX regcomp return error codes. (In the order listed in the
standard.) */
REG_BADPAT, /* Invalid pattern. */
- REG_ECOLLATE, /* Inalid collating element. */
+ REG_ECOLLATE, /* Invalid collating element. */
REG_ECTYPE, /* Invalid character class name. */
REG_EESCAPE, /* Trailing backslash. */
REG_ESUBREG, /* Invalid back reference. */
diff --git a/regex_internal.c b/regex_internal.c
index fcfa4ada..2d170932 100644
--- a/regex_internal.c
+++ b/regex_internal.c
@@ -1632,7 +1632,7 @@ free_state (re_dfastate_t *state)
re_free (state);
}
-/* Create the new state which is independ of contexts.
+/* Create the new state which is independent of contexts.
Return the new state if succeeded, otherwise return NULL. */
static re_dfastate_t *
diff --git a/regex_internal.h b/regex_internal.h
index 9e104d8d..6cc84a79 100644
--- a/regex_internal.h
+++ b/regex_internal.h
@@ -159,9 +159,6 @@ is_blank (int c)
# define __attribute_warn_unused_result__
#endif
-extern const char __re_error_msgid[] attribute_hidden;
-extern const size_t __re_error_msgid_idx[] attribute_hidden;
-
/* An integer used to represent a set of bits. It must be unsigned,
and must be at least as wide as unsigned int. */
typedef unsigned long int bitset_word_t;
@@ -774,7 +771,7 @@ bitset_mask (bitset_t dest, const bitset_t src)
}
#ifdef RE_ENABLE_I18N
-/* Inline functions for re_string. */
+/* Functions for re_string. */
static int
internal_function __attribute__ ((pure, unused))
re_string_char_size_at (const re_string_t *pstr, int idx)
diff --git a/regexec.c b/regexec.c
index cff69bfc..8fe016af 100644
--- a/regexec.c
+++ b/regexec.c
@@ -306,7 +306,7 @@ compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
concerned.
If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
- and all groups is stroed in REGS. (For the "_2" variants, the offsets are
+ and all groups is stored in REGS. (For the "_2" variants, the offsets are
computed relative to the concatenation, not relative to the individual
strings.)
@@ -627,7 +627,7 @@ re_exec (s)
/* Searches for a compiled pattern PREG in the string STRING, whose
length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
- mingings with regexec. START, and RANGE have the same meanings
+ meaning as with regexec. START, and RANGE have the same meanings
with re_search.
Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
otherwise return the error code.
@@ -695,7 +695,8 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
fl_longest_match = (nmatch != 0 || dfa->nbackref);
err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
- preg->translate, preg->syntax & RE_ICASE, dfa);
+ preg->translate, (preg->syntax & RE_ICASE) != 0,
+ dfa);
if (BE (err != REG_NOERROR, 0))
goto free_return;
mctx.input.stop = stop;
@@ -912,7 +913,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
goto free_return;
}
- /* At last, add the offset to the each registers, since we slided
+ /* At last, add the offset to each register, since we slid
the buffers so that we could assume that the matching starts
from 0. */
for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
@@ -963,8 +964,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
static reg_errcode_t
__attribute_warn_unused_result__
-prune_impossible_nodes (mctx)
- re_match_context_t *mctx;
+prune_impossible_nodes (re_match_context_t *mctx)
{
const re_dfa_t *const dfa = mctx->dfa;
int halt_node, match_last;
@@ -2420,7 +2420,7 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
/* Skip bytes in the input that correspond to part of a
multi-byte match, then look in the log for a state
from which to restart matching. */
-re_dfastate_t *
+static re_dfastate_t *
internal_function
find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
{
@@ -2449,7 +2449,7 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
/* From the node set CUR_NODES, pick up the nodes whose types are
OP_OPEN_SUBEXP and which have corresponding back references in the regular
expression. And register them to use them later for evaluating the
- correspoding back references. */
+ corresponding back references. */
static reg_errcode_t
internal_function