summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2011-10-08 22:05:30 -0700
committerKaz Kylheku <kaz@kylheku.com>2011-10-08 22:05:30 -0700
commitfebdeaea6766ecc3340f7e89c03f51b51efb601a (patch)
treea52f0770ba872a9a1853a20ea39d31233d438ea4
parente4f1c4c7efa33df19a22cc577c93e590ca99543b (diff)
downloadtxr-febdeaea6766ecc3340f7e89c03f51b51efb601a.tar.gz
txr-febdeaea6766ecc3340f7e89c03f51b51efb601a.tar.bz2
txr-febdeaea6766ecc3340f7e89c03f51b51efb601a.zip
* match.c (vars_to_bindings): New function.
(match_line): keyword argument :vars implemented for coll. * txr.1: Documented :vars.
-rw-r--r--ChangeLog7
-rw-r--r--match.c68
-rw-r--r--txr.172
3 files changed, 124 insertions, 23 deletions
diff --git a/ChangeLog b/ChangeLog
index 599611bf..bb20768d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
2011-10-08 Kaz Kylheku <kaz@kylheku.com>
+ * match.c (vars_to_bindings): New function.
+ (match_line): keyword argument :vars implemented for coll.
+
+ * txr.1: Documented :vars.
+
+2011-10-08 Kaz Kylheku <kaz@kylheku.com>
+
* match.c (vars_k): New symbol variable.
(match_files): Implemented :vars in collect.
(match_init): New symbol variable initialized.
diff --git a/match.c b/match.c
index aec7e63d..615b5f38 100644
--- a/match.c
+++ b/match.c
@@ -285,6 +285,30 @@ static val dest_bind(val linenum, val bindings, val pattern, val value)
return t;
}
+static val eval_form(val lineno, val form, val bindings);
+
+static val vars_to_bindings(val lineno, val vars, val bindings)
+{
+ val iter;
+ list_collect_decl (fixed_vars, tail);
+
+ if (!consp(vars))
+ sem_error(lineno, lit("not a valid variable list: ~a"), vars, nao);
+
+ for (iter = vars; iter; iter = cdr(iter)) {
+ val item = car(iter);
+ if (bindable(item)) {
+ list_collect (tail, cons(item, nil));
+ } else if (consp(item) && bindable(first(item))) {
+ list_collect (tail, cons(first(item),
+ cdr(eval_form(lineno, second(item), bindings))));
+ } else {
+ sem_error(lineno, lit("not a variable spec: ~a"), item, nao);
+ }
+ }
+ return fixed_vars;
+}
+
static val match_line(val bindings, val specline, val dataline,
val pos, val spec_lineno, val data_lineno,
val file)
@@ -563,6 +587,7 @@ static val match_line(val bindings, val specline, val dataline,
val mintimes = getplist(args, mintimes_k);
val maxtimes = getplist(args, maxtimes_k);
val chars = getplist(args, chars_k);
+ val vars = getplist(args, vars_k);
cnum cmax = nump(gap) ? c_num(gap) : (nump(max) ? c_num(max) : 0);
cnum cmin = nump(gap) ? c_num(gap) : (nump(min) ? c_num(min) : 0);
cnum mincounter = cmin, maxcounter = 0;
@@ -574,6 +599,8 @@ static val match_line(val bindings, val specline, val dataline,
cnum timescounter = 0, charscounter = 0;
val iter;
+ vars = vars_to_bindings(spec_lineno, vars, bindings);
+
if (((times || maxtimes) && ctimax == 0) || (chars && cchars == 0))
break;
@@ -616,13 +643,30 @@ static val match_line(val bindings, val specline, val dataline,
bindings, eq_f, nil);
LOG_MATCH("coll", new_pos);
+ for (iter = vars; iter; iter = cdr(iter)) {
+ cons_bind (var, dfl, car(iter));
+ val exists = assoc(new_bindings, var);
+
+ if (!exists) {
+ if (!dfl)
+ sem_error(spec_lineno, lit("coll failed to bind ~a"),
+ var, nao);
+ else
+ strictly_new_bindings = acons(strictly_new_bindings,
+ var, dfl);
+ }
+ }
+
for (iter = strictly_new_bindings; iter; iter = cdr(iter))
{
val binding = car(iter);
- val existing = assoc(bindings_coll, car(binding));
+ val vars_binding = assoc(vars, car(binding));
- bindings_coll = acons_new(bindings_coll, car(binding),
- cons(cdr(binding), cdr(existing)));
+ if (!vars || vars_binding) {
+ val existing = assoc(bindings_coll, car(binding));
+ bindings_coll = acons_new(bindings_coll, car(binding),
+ cons(cdr(binding), cdr(existing)));
+ }
}
}
@@ -1654,23 +1698,7 @@ repeat_spec_same_data:
if (gap && (max || min))
sem_error(spec_linenum, lit("collect: cannot mix :gap with :mingap or :maxgap"), nao);
- if (vars) {
- list_collect_decl (fixed_vars, tail);
-
- if (!consp(vars))
- sem_error(spec_linenum, lit("collect: invalid argument to :vars"), nao);
- for (iter = vars; iter; iter = cdr(iter)) {
- val item = car(iter);
- if (bindable(item)) {
- list_collect (tail, cons(item, nil));
- } else if (consp(item) && bindable(first(item))) {
- list_collect (tail, cons(first(item), second(item)));
- } else {
- sem_error(spec_linenum, lit("not a variable spec: ~a"), item, nao);
- }
- }
- vars = fixed_vars;
- }
+ vars = vars_to_bindings(spec_linenum, vars, bindings);
if ((times && ctimes == 0) || (lines && clines == 0)) {
if ((spec = rest(spec)) == nil)
diff --git a/txr.1 b/txr.1
index 84b73a21..a7b004e8 100644
--- a/txr.1
+++ b/txr.1
@@ -1701,10 +1701,76 @@ other supported keywords are :times, :mintimes, :maxtimes and lines.
The shorthand :times N means the same thing as :mintimes N :maxtimes N.
These specify how many matches should be collected. If there are fewer
than mintimes matches, the collect fails. If maxtimes matches are collected,
-collect stops collecting immediately.
+collect stops collecting immediately. Example:
+
+ @(collect :times 3)
+ @a @b
+ @(end)
+
+This will collect a match for "@a @b" exactly three times. If three
+matches are not found, it will fail.
+
+The :lines parameter specifies the upper bound on how many lines
+should be scanned by collect, measuring from the starting position.
+The extent of the collect body is not counted. Example:
+
+ @(collect :lines 2)
+ foo: @a
+ bar: @b
+ baz: @c
+ @(end)
+
+The above collect will look for a match only twice: at the current position,
+and one line down.
+
+There is one more keyword, :vars, discussed in the following section.
+
+.SS Specifying Variables in Collect
+
+Normally, any variable for which a new binding occurs in a collect is
+collected. A collect clause may be sloppy: it can neglect to collect some
+variables on some iterations, or bind some variables which behave like
+local temporaries, but end up collated into lists.
+
+The :vars keyword allows the query writer to tame the collect body.
+
+The argument to :vars is a list of variable specs. A variable spec is either a
+symbol, or a (<symbol> <expression>) pair, where the expression specifies a
+default value.
+
+When a :vars list is specified, it means that only the given variables can
+emerge from the successful collect. Any newly introduced bindings for other
+variables do not propagate.
+
+Furthermore, for any variable which is not specified with a default value, the
+collect body, whenever it matches successfully, must bind that variable. If it
+neglects to bind the variable, an exception of type query_error is thrown.
+
+For any variable which has a default value, if the collect body neglects to
+bind that variable, the behavior is as if the collect did bind that variable to that default value.
+
+The default values are expressions, and so can be quasiliterals.
+
+Example:
+
+ @(collect :vars (a b (c "foo")))
+ @a @c
+ @(end)
+
+Here, if the body "@a @c" matches, an error will be thrown because one of the
+mandatory variables is b, and the body neglects to produce a binding for b.
+
+Example:
+
+ @(collect :vars (a (c "foo")))
+ @a @b
+ @(end)
+
+Here, if "@a @b" matches, only a will be collected, but not b, because b is not
+in the variable list. Furthermore, because there is no binding for c in the
+body, a binding is created with the value "foo", exactly as if c matched
+such a piece of text.
-Finally, the :lines parameter specifies the upper bound on how many lines
-should be scanned by the collect.
.SS The Coll Directive