summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2014-08-12 20:37:52 -0700
committerKaz Kylheku <kaz@kylheku.com>2014-08-12 20:37:52 -0700
commit9aee8dd619826e3fa73f665e72522a342742af0b (patch)
treedd1b9ee77ea67ae3d2a95954651cde272faf969e
parent17c43f985b8c707e997fe65a20ce31430f51bb36 (diff)
downloadtxr-9aee8dd619826e3fa73f665e72522a342742af0b.tar.gz
txr-9aee8dd619826e3fa73f665e72522a342742af0b.tar.bz2
txr-9aee8dd619826e3fa73f665e72522a342742af0b.zip
When a variable is delimited by some form other than
the contents of a variable, fixed string or regex, we now use the entire tail of the specline to find the match. So for instance @var@(trailer)foo works as intuition might expect. * match.c (search_form): Static function removed. (search_match): New static function based on search_form. Does not handle regexes, and does not update c->bindings. (h_var): Renamed local variable pat to next. Added a few missing rlcp's. Combined the cases when pat is a cons to one block so consp isn't repeatedly tested. Function now handles a var followed by (sys:text ...) elements specially; the first element of the text block is pulled out and matched. Implemented "var delimiting spec" general case which matches the entire tail of the spec at successive character positions until a match is found, and the skipped text goes into the variable.
-rw-r--r--ChangeLog21
-rw-r--r--match.c205
2 files changed, 131 insertions, 95 deletions
diff --git a/ChangeLog b/ChangeLog
index d9a06d9a..f042933a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+2014-08-12 Kaz Kylheku <kaz@kylheku.com>
+
+ When a variable is delimited by some form other than
+ the contents of a variable, fixed string or regex, we now
+ use the entire tail of the specline to find the match.
+ So for instance @var@(trailer)foo works as intuition
+ might expect.
+
+ * match.c (search_form): Static function removed.
+ (search_match): New static function based on search_form.
+ Does not handle regexes, and does not update c->bindings.
+ (h_var): Renamed local variable pat to next. Added a few
+ missing rlcp's. Combined the cases when pat is a cons to
+ one block so consp isn't repeatedly tested. Function now
+ handles a var followed by (sys:text ...) elements specially;
+ the first element of the text block is pulled out and
+ matched. Implemented "var delimiting spec" general case
+ which matches the entire tail of the spec at successive
+ character positions until a match is found, and the skipped
+ text goes into the variable.
+
2014-08-11 Kaz Kylheku <kaz@kylheku.com>
First cut at restructuring how variable matching works in the pattern
diff --git a/match.c b/match.c
index 0ffc1220..fe6a918f 100644
--- a/match.c
+++ b/match.c
@@ -450,43 +450,35 @@ static void consume_prefix(match_line_ctx *c)
}
-static val search_form(match_line_ctx *c, val needle_form, val from_end)
+static val search_match(match_line_ctx *c, val from_end)
{
- if (regexp(first(needle_form))) {
- return search_regex(c->dataline, first(needle_form), c->pos, from_end);
- } else {
- val spec = cons(needle_form, nil);
- val pos = from_end ? length_str(c->dataline) : c->pos;
- val step = from_end ? negone : one;
-
- rlcp(spec, needle_form);
-
- for (; (from_end && ge(pos, c->pos)) ||
- (!from_end && length_str_ge(c->dataline, pos));
- pos = plus(pos, step))
- {
- cons_bind (new_bindings, new_pos,
- match_line(ml_specline_pos(*c, spec, pos)));
- if (new_pos == t) {
- return cons(pos, t);
- } else if (new_pos) {
- new_pos = minus(new_pos, c->base);
- c->bindings = new_bindings;
- return cons(pos, minus(new_pos, pos));
- }
+ val spec = c->specline;
+ val pos = from_end ? length_str(c->dataline) : c->pos;
+ val step = from_end ? negone : one;
- consume_prefix(c);
+ for (; (from_end && ge(pos, c->pos)) ||
+ (!from_end && length_str_ge(c->dataline, pos));
+ pos = plus(pos, step))
+ {
+ val new_pos = cdr(match_line(ml_specline_pos(*c, spec, pos)));
+ if (new_pos == t) {
+ return cons(pos, t);
+ } else if (new_pos) {
+ new_pos = minus(new_pos, c->base);
+ return cons(pos, minus(new_pos, pos));
}
- return nil;
+ consume_prefix(c);
}
+
+ return nil;
}
static val h_var(match_line_ctx *c)
{
val elem = pop(&c->specline);
val sym = second(elem);
- val pat = first(c->specline);
+ val next = first(c->specline);
val modifiers = third(elem);
val modifier = first(modifiers);
val pair = if2(sym, assoc(sym, c->bindings)); /* exists? */
@@ -502,9 +494,9 @@ static val h_var(match_line_ctx *c)
if (pair) {
/* If the variable already has a binding, we replace
it with its value, and treat it as a string match.
- The spec looks like ((var <sym> <pat>) ...)
+ The spec looks like ((var <sym>) <next> ...)
and it must be transformed into
- (<sym-substituted> <pat> ...).
+ (<sym-substituted> <next> ...).
But if the variable is a fix sized field match,
then we treat that specially: it has to match
that much text. */
@@ -547,8 +539,8 @@ static val h_var(match_line_ctx *c)
c->bindings = acons(sym, sub_str(c->dataline, c->pos, new_pos), new_bindings);
c->pos = new_pos;
/* This may have another variable attached */
- if (pat) {
- c->specline = rlcp(cons(pat, rest(c->specline)), c->specline);
+ if (next) {
+ c->specline = rlcp(cons(next, rest(c->specline)), c->specline);
return repeat_spec_k;
}
} else if (fixnump(modifier)) { /* fixed field */
@@ -563,19 +555,19 @@ static val h_var(match_line_ctx *c)
c->bindings = acons(sym, trim_str(sub_str(c->dataline, c->pos, past)), c->bindings);
c->pos = past;
/* This may have another variable attached */
- if (pat) {
- c->specline = cons(pat, rest(c->specline));
+ if (next) {
+ c->specline = rlcp(cons(next, rest(c->specline)), c->specline);
return repeat_spec_k;
}
} else if (modifier && modifier != t) {
sem_error(elem, lit("invalid modifier ~s on variable ~s"),
modifier, sym, nao);
- } else if (pat == nil) { /* no modifier, no elem -> to end of line */
+ } else if (next == nil) { /* no modifier, no elem -> to end of line */
if (sym)
c->bindings = acons(sym, sub_str(c->dataline, c->pos, nil), c->bindings);
c->pos = length_str(c->dataline);
- } else if (type(pat) == STR) {
- val find = search_str(c->dataline, pat, c->pos, modifier);
+ } else if (type(next) == STR) {
+ val find = search_str(c->dataline, next, c->pos, modifier);
if (!find) {
LOG_MISMATCH("var delimiting string");
return nil;
@@ -583,78 +575,101 @@ static val h_var(match_line_ctx *c)
LOG_MATCH("var delimiting string", find);
if (sym)
c->bindings = acons(sym, sub_str(c->dataline, c->pos, find), c->bindings);
- c->pos = plus(find, length_str(pat));
- } else if (consp(pat) && first(pat) != var_s) {
- val find = search_form(c, pat, modifier);
- val fpos = car(find);
- val flen = cdr(find);
- if (!find) {
- LOG_MISMATCH("var delimiting form");
- return nil;
- }
- LOG_MATCH("var delimiting form", fpos);
- if (sym)
- c->bindings = acons(sym, sub_str(c->dataline, c->pos, fpos), c->bindings);
- c->pos = if3(flen == t, t, plus(fpos, flen));
- } else if (consp(pat)) {
- /* Unbound var followed by var: the following one must either
- be bound, or must specify a regex. */
- val second_sym = second(pat);
- val next_modifiers = third(pat);
- val next_modifier = first(next_modifiers);
- val pair = if2(second_sym, assoc(second_sym, c->bindings)); /* exists? */
-
- if (gt(length_list(next_modifiers), one)) {
- sem_error(elem, lit("multiple modifiers on variable ~s"),
- second_sym, nao);
- }
+ c->pos = plus(find, length_str(next));
+ } else if (consp(next)) {
+ val op = first(next);
- if (!pair && consp(next_modifier)) {
- val find = search_form(c, next_modifier, modifier);
+ if (regexp(op)) {
+ val find = search_regex(c->dataline, op, c->pos, modifier);
val fpos = car(find);
val flen = cdr(find);
-
if (!find) {
- LOG_MISMATCH("double var regex");
+ LOG_MISMATCH("var delimiting regex");
return nil;
}
+ LOG_MATCH("var delimiting regex", fpos);
+ if (sym)
+ c->bindings = acons(sym, sub_str(c->dataline, c->pos, fpos), c->bindings);
+ c->pos = if3(flen == t, t, plus(fpos, flen));
+ } else if (op == var_s) {
+ /* Unbound var followed by var: the following one must either
+ be bound, or must specify a regex. */
+ val second_sym = second(next);
+ val next_modifiers = third(next);
+ val next_modifier = first(next_modifiers);
+ val pair = if2(second_sym, assoc(second_sym, c->bindings)); /* exists? */
+
+ if (gt(length_list(next_modifiers), one)) {
+ sem_error(elem, lit("multiple modifiers on variable ~s"),
+ second_sym, nao);
+ }
+
+ if (!pair && consp(next_modifier)) {
+ val find = search_regex(c->dataline, first(next_modifier), c->pos, modifier);
+ val fpos = car(find);
+ val flen = cdr(find);
- /* Text from here to start of regex match goes to this
- variable. */
+ if (!find) {
+ LOG_MISMATCH("double var regex");
+ return nil;
+ }
+
+ /* Text from here to start of regex match goes to this
+ variable. */
+ if (sym)
+ c->bindings = acons(sym, sub_str(c->dataline, c->pos, fpos),
+ c->bindings);
+ /* Text from start of regex match to end goes to the
+ second variable */
+ if (second_sym)
+ c->bindings = acons(second_sym,
+ sub_str(c->dataline, fpos, plus(fpos, flen)),
+ c->bindings);
+ LOG_MATCH("double var regex (first var)", fpos);
+ c->pos = fpos;
+ LOG_MATCH("double var regex (second var)", plus(fpos, flen));
+ c->pos = plus(fpos, flen);
+ return next_spec_k;
+ } else if (!pair) {
+ sem_error(elem, lit("consecutive unbound variables"), nao);
+ } else {
+ /* Re-generate a new spec with an edited version of
+ the element we just processed, and repeat. */
+ val new_elem = list(var_s, sym, cdr(pair), modifier, nao);
+ c->specline = cons(elem, cons(new_elem, rest(c->specline)));
+ return repeat_spec_k;
+ }
+ } else if (op == text_s) {
+ /* Clumped texts: break out the first one. */
+ val text_elem = rlcp(second(next), c->specline);
+ val rest_texts = cons(text_s, rest(rest(next)));
+ c->specline = cons(elem, cons(text_elem,
+ cons(rest_texts, rest(c->specline))));
+ return repeat_spec_k;
+ } else if (consp(op) || stringp(op)) {
+ cons_bind (find, len, search_str_tree(c->dataline, next, c->pos, modifier));
+ if (!find) {
+ LOG_MISMATCH("string");
+ return nil;
+ }
if (sym)
- c->bindings = acons(sym, sub_str(c->dataline, c->pos, fpos),
- c->bindings);
- /* Text from start of regex match to end goes to the
- second variable */
- if (second_sym)
- c->bindings = acons(second_sym,
- sub_str(c->dataline, fpos, plus(fpos, flen)),
- c->bindings);
- LOG_MATCH("double var regex (first var)", fpos);
- c->pos = fpos;
- LOG_MATCH("double var regex (second var)", plus(fpos, flen));
- c->pos = plus(fpos, flen);
- return next_spec_k;
- } else if (!pair) {
- sem_error(elem, lit("consecutive unbound variables"), nao);
+ c->bindings = acons(sym, sub_str(c->dataline, c->pos, find), c->bindings);
+ c->pos = plus(find, len);
} else {
- /* Re-generate a new spec with an edited version of
- the element we just processed, and repeat. */
- val new_elem = list(var_s, sym, cdr(pair), modifier, nao);
- c->specline = cons(elem, cons(new_elem, rest(c->specline)));
+ val find = search_match(c, modifier);
+ val fpos = car(find);
+ if (!find) {
+ LOG_MISMATCH("var delimiting spec");
+ return nil;
+ }
+ LOG_MATCH("var delimiting spec", fpos);
+ if (sym)
+ c->bindings = acons(sym, sub_str(c->dataline, c->pos, fpos), c->bindings);
+ c->pos = fpos;
return repeat_spec_k;
}
- } else if (consp(pat) && (consp(first(pat)) || stringp(first(pat)))) {
- cons_bind (find, len, search_str(c->dataline, pat, c->pos, modifier));
- if (!find) {
- LOG_MISMATCH("string");
- return nil;
- }
- if (sym)
- c->bindings = acons(sym, sub_str(c->dataline, c->pos, find), c->bindings);
- c->pos = plus(find, len);
} else {
- sem_error(elem, lit("variable followed by invalid element"), nao);
+ sem_error(elem, lit("variable followed by invalid element: ~s"), next, nao);
}
return next_spec_k;