aboutsummaryrefslogtreecommitdiffstats
path: root/support/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'support/regexec.c')
-rw-r--r--support/regexec.c204
1 files changed, 80 insertions, 124 deletions
diff --git a/support/regexec.c b/support/regexec.c
index a3ee618c..6309deac 100644
--- a/support/regexec.c
+++ b/support/regexec.c
@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002-2020 Free Software Foundation, Inc.
+ Copyright (C) 2002-2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -59,7 +59,7 @@ static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
Idx cur_idx, Idx nmatch);
static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
Idx str_idx, Idx dest_node, Idx nregs,
- regmatch_t *regs,
+ regmatch_t *regs, regmatch_t *prevregs,
re_node_set *eps_via_nodes);
static reg_errcode_t set_regs (const regex_t *preg,
const re_match_context_t *mctx,
@@ -186,7 +186,8 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
REG_NOTBOL is set, then ^ does not match at the beginning of the
string; if REG_NOTEOL is set, then $ does not match at the end.
- We return 0 if we find a match and REG_NOMATCH if not. */
+ Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if
+ EFLAGS is invalid. */
int
regexec (const regex_t *__restrict preg, const char *__restrict string,
@@ -269,8 +270,8 @@ compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
strings.)
On success, re_match* functions return the length of the match, re_search*
- return the position of the start of the match. Return value -1 means no
- match was found and -2 indicates an internal error. */
+ return the position of the start of the match. They return -1 on
+ match failure, -2 on error. */
regoff_t
re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
@@ -1206,27 +1207,26 @@ check_halt_state_context (const re_match_context_t *mctx,
/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
corresponding to the DFA).
Return the destination node, and update EPS_VIA_NODES;
- return -1 in case of errors. */
+ return -1 on match failure, -2 on error. */
static Idx
proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
+ regmatch_t *prevregs,
Idx *pidx, Idx node, re_node_set *eps_via_nodes,
struct re_fail_stack_t *fs)
{
const re_dfa_t *const dfa = mctx->dfa;
- Idx i;
- bool ok;
if (IS_EPSILON_NODE (dfa->nodes[node].type))
{
re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
re_node_set *edests = &dfa->edests[node];
- Idx dest_node;
- ok = re_node_set_insert (eps_via_nodes, node);
+ bool ok = re_node_set_insert (eps_via_nodes, node);
if (__glibc_unlikely (! ok))
return -2;
- /* Pick up a valid destination, or return -1 if none
- is found. */
- for (dest_node = -1, i = 0; i < edests->nelem; ++i)
+
+ /* Pick a valid destination, or return -1 if none is found. */
+ Idx dest_node = -1;
+ for (Idx i = 0; i < edests->nelem; i++)
{
Idx candidate = edests->elems[i];
if (!re_node_set_contains (cur_nodes, candidate))
@@ -1244,7 +1244,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
/* Otherwise, push the second epsilon-transition on the fail stack. */
else if (fs != NULL
&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
- eps_via_nodes))
+ prevregs, eps_via_nodes))
return -2;
/* We know we are going to exit. */
@@ -1288,7 +1288,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
if (naccepted == 0)
{
Idx dest_node;
- ok = re_node_set_insert (eps_via_nodes, node);
+ bool ok = re_node_set_insert (eps_via_nodes, node);
if (__glibc_unlikely (! ok))
return -2;
dest_node = dfa->edests[node].elems[0];
@@ -1317,7 +1317,8 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
static reg_errcode_t
__attribute_warn_unused_result__
push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
- Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
+ Idx nregs, regmatch_t *regs, regmatch_t *prevregs,
+ re_node_set *eps_via_nodes)
{
reg_errcode_t err;
Idx num = fs->num++;
@@ -1333,28 +1334,39 @@ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
}
fs->stack[num].idx = str_idx;
fs->stack[num].node = dest_node;
- fs->stack[num].regs = re_malloc (regmatch_t, nregs);
+ fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs);
if (fs->stack[num].regs == NULL)
return REG_ESPACE;
memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
+ memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs);
err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
return err;
}
static Idx
pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
- regmatch_t *regs, re_node_set *eps_via_nodes)
+ regmatch_t *regs, regmatch_t *prevregs,
+ re_node_set *eps_via_nodes)
{
+ if (fs == NULL || fs->num == 0)
+ return -1;
Idx num = --fs->num;
- DEBUG_ASSERT (num >= 0);
*pidx = fs->stack[num].idx;
memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
+ memcpy (prevregs, fs->stack[num].regs + nregs, sizeof (regmatch_t) * nregs);
re_node_set_free (eps_via_nodes);
re_free (fs->stack[num].regs);
*eps_via_nodes = fs->stack[num].eps_via_nodes;
+ DEBUG_ASSERT (0 <= fs->stack[num].node);
return fs->stack[num].node;
}
+
+#define DYNARRAY_STRUCT regmatch_list
+#define DYNARRAY_ELEMENT regmatch_t
+#define DYNARRAY_PREFIX regmatch_list_
+#include <malloc/dynarray-skeleton.c>
+
/* Set the positions where the subexpressions are starts/ends to registers
PMATCH.
Note: We assume that pmatch[0] is already set, and
@@ -1370,8 +1382,8 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
re_node_set eps_via_nodes;
struct re_fail_stack_t *fs;
struct re_fail_stack_t fs_body = { 0, 2, NULL };
- regmatch_t *prev_idx_match;
- bool prev_idx_match_malloced = false;
+ struct regmatch_list prev_match;
+ regmatch_list_init (&prev_match);
DEBUG_ASSERT (nmatch > 1);
DEBUG_ASSERT (mctx->state_log != NULL);
@@ -1388,53 +1400,45 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
cur_node = dfa->init_node;
re_node_set_init_empty (&eps_via_nodes);
- if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
- prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
- else
+ if (!regmatch_list_resize (&prev_match, nmatch))
{
- prev_idx_match = re_malloc (regmatch_t, nmatch);
- if (prev_idx_match == NULL)
- {
- free_fail_stack_return (fs);
- return REG_ESPACE;
- }
- prev_idx_match_malloced = true;
+ regmatch_list_free (&prev_match);
+ free_fail_stack_return (fs);
+ return REG_ESPACE;
}
+ regmatch_t *prev_idx_match = regmatch_list_begin (&prev_match);
memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
{
update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
- if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
+ if ((idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
+ || re_node_set_contains (&eps_via_nodes, cur_node))
{
Idx reg_idx;
+ cur_node = -1;
if (fs)
{
for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
- break;
- if (reg_idx == nmatch)
- {
- re_node_set_free (&eps_via_nodes);
- if (prev_idx_match_malloced)
- re_free (prev_idx_match);
- return free_fail_stack_return (fs);
- }
- cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
- &eps_via_nodes);
+ {
+ cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+ prev_idx_match, &eps_via_nodes);
+ break;
+ }
}
- else
+ if (cur_node < 0)
{
re_node_set_free (&eps_via_nodes);
- if (prev_idx_match_malloced)
- re_free (prev_idx_match);
- return REG_NOERROR;
+ regmatch_list_free (&prev_match);
+ return free_fail_stack_return (fs);
}
}
/* Proceed to next node. */
- cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
+ cur_node = proceed_next_node (mctx, nmatch, pmatch, prev_idx_match,
+ &idx, cur_node,
&eps_via_nodes, fs);
if (__glibc_unlikely (cur_node < 0))
@@ -1442,26 +1446,23 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
if (__glibc_unlikely (cur_node == -2))
{
re_node_set_free (&eps_via_nodes);
- if (prev_idx_match_malloced)
- re_free (prev_idx_match);
+ regmatch_list_free (&prev_match);
free_fail_stack_return (fs);
return REG_ESPACE;
}
- if (fs)
- cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
- &eps_via_nodes);
- else
+ cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+ prev_idx_match, &eps_via_nodes);
+ if (cur_node < 0)
{
re_node_set_free (&eps_via_nodes);
- if (prev_idx_match_malloced)
- re_free (prev_idx_match);
+ regmatch_list_free (&prev_match);
+ free_fail_stack_return (fs);
return REG_NOMATCH;
}
}
}
re_node_set_free (&eps_via_nodes);
- if (prev_idx_match_malloced)
- re_free (prev_idx_match);
+ regmatch_list_free (&prev_match);
return free_fail_stack_return (fs);
}
@@ -1499,10 +1500,10 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
}
else if (type == OP_CLOSE_SUBEXP)
{
+ /* We are at the last node of this sub expression. */
Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
if (reg_num < nmatch)
{
- /* We are at the last node of this sub expression. */
if (pmatch[reg_num].rm_so < cur_idx)
{
pmatch[reg_num].rm_eo = cur_idx;
@@ -2199,6 +2200,7 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
/* Return the next state to which the current state STATE will transit by
accepting the current input byte, and update STATE_LOG if necessary.
+ Return NULL on failure.
If STATE can accept a multibyte char/collating element/back reference
update the destination of STATE_LOG. */
@@ -2399,7 +2401,7 @@ check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
#if 0
/* Return the next state to which the current state STATE will transit by
- accepting the current input byte. */
+ accepting the current input byte. Return NULL on failure. */
static re_dfastate_t *
transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
@@ -2821,7 +2823,8 @@ find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
LAST_NODE at LAST_STR. We record the path onto PATH since it will be
heavily reused.
- Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
+ Return REG_NOERROR if it can arrive, REG_NOMATCH if it cannot,
+ REG_ESPACE if memory is exhausted. */
static reg_errcode_t
__attribute_warn_unused_result__
@@ -3251,7 +3254,7 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
/* Build transition table for the state.
Return true if successful. */
-static bool
+static bool __attribute_noinline__
build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
{
reg_errcode_t err;
@@ -3259,36 +3262,20 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
int ch;
bool need_word_trtable = false;
bitset_word_t elem, mask;
- bool dests_node_malloced = false;
- bool dest_states_malloced = false;
Idx ndests; /* Number of the destination states from 'state'. */
re_dfastate_t **trtable;
- re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
- re_node_set follows, *dests_node;
- bitset_t *dests_ch;
+ re_dfastate_t *dest_states[SBC_MAX];
+ re_dfastate_t *dest_states_word[SBC_MAX];
+ re_dfastate_t *dest_states_nl[SBC_MAX];
+ re_node_set follows;
bitset_t acceptable;
- struct dests_alloc
- {
- re_node_set dests_node[SBC_MAX];
- bitset_t dests_ch[SBC_MAX];
- } *dests_alloc;
-
/* We build DFA states which corresponds to the destination nodes
from 'state'. 'dests_node[i]' represents the nodes which i-th
destination state contains, and 'dests_ch[i]' represents the
characters which i-th destination state accepts. */
- if (__libc_use_alloca (sizeof (struct dests_alloc)))
- dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
- else
- {
- dests_alloc = re_malloc (struct dests_alloc, 1);
- if (__glibc_unlikely (dests_alloc == NULL))
- return false;
- dests_node_malloced = true;
- }
- dests_node = dests_alloc->dests_node;
- dests_ch = dests_alloc->dests_ch;
+ re_node_set dests_node[SBC_MAX];
+ bitset_t dests_ch[SBC_MAX];
/* Initialize transition table. */
state->word_trtable = state->trtable = NULL;
@@ -3298,8 +3285,6 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
if (__glibc_unlikely (ndests <= 0))
{
- if (dests_node_malloced)
- re_free (dests_alloc);
/* Return false in case of an error, true otherwise. */
if (ndests == 0)
{
@@ -3314,38 +3299,14 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
err = re_node_set_alloc (&follows, ndests + 1);
if (__glibc_unlikely (err != REG_NOERROR))
- goto out_free;
-
- /* Avoid arithmetic overflow in size calculation. */
- size_t ndests_max
- = ((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
- / (3 * sizeof (re_dfastate_t *)));
- if (__glibc_unlikely (ndests_max < ndests))
- goto out_free;
-
- if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
- + ndests * 3 * sizeof (re_dfastate_t *)))
- dest_states = (re_dfastate_t **)
- alloca (ndests * 3 * sizeof (re_dfastate_t *));
- else
{
- dest_states = re_malloc (re_dfastate_t *, ndests * 3);
- if (__glibc_unlikely (dest_states == NULL))
- {
-out_free:
- if (dest_states_malloced)
- re_free (dest_states);
- re_node_set_free (&follows);
- for (i = 0; i < ndests; ++i)
- re_node_set_free (dests_node + i);
- if (dests_node_malloced)
- re_free (dests_alloc);
- return false;
- }
- dest_states_malloced = true;
+ out_free:
+ re_node_set_free (&follows);
+ for (i = 0; i < ndests; ++i)
+ re_node_set_free (dests_node + i);
+ return false;
}
- dest_states_word = dest_states + ndests;
- dest_states_nl = dest_states_word + ndests;
+
bitset_empty (acceptable);
/* Then build the states for all destinations. */
@@ -3470,23 +3431,17 @@ out_free:
}
}
- if (dest_states_malloced)
- re_free (dest_states);
-
re_node_set_free (&follows);
for (i = 0; i < ndests; ++i)
re_node_set_free (dests_node + i);
-
- if (dests_node_malloced)
- re_free (dests_alloc);
-
return true;
}
/* Group all nodes belonging to STATE into several destinations.
Then for all destinations, set the nodes belonging to the destination
to DESTS_NODE[i] and set the characters accepted by the destination
- to DEST_CH[i]. This function return the number of destinations. */
+ to DEST_CH[i]. Return the number of destinations if successful,
+ -1 on internal error. */
static Idx
group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
@@ -4264,7 +4219,8 @@ match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
}
/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
- at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
+ at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.
+ Return the new entry if successful, NULL if memory is exhausted. */
static re_sub_match_last_t *
match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)