summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog27
-rw-r--r--match.c76
-rw-r--r--txr.114
3 files changed, 89 insertions, 28 deletions
diff --git a/ChangeLog b/ChangeLog
index 3ac4b107..80823cbe 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,30 @@
+2014-02-19 Kaz Kylheku <kaz@kylheku.com>
+
+ Fixing a long-running issue in the TXR pattern language: premature
+ opening of files, prior to directives that actually need data.
+ The documentation basically lied that this is the case: namely,
+ the text "A file isn't opened until the query demands material from
+ that file, and then the contents are read on demand, not all at
+ once." This is now a fact.
+
+ * match.c (non_matching_directive_table): New global variable.
+ (open_data_source): New static function. Contains an almost
+ verbatim migration of the source-opening logic that used
+ to be in match_files. The useless assignment to c->nil is gone,
+ and c->data == t is explicitly tested for. Instead of assuming
+ that only the @(next) directive does not need to have a data source
+ open, the table of non-matching directives is consulted. Opening
+ the data source is now skipped for numerous directives.
+ (match_files): Call open_data_source within the loop. This
+ means that even after processing numerous non-matching directives,
+ we will still correctly set up the data lazy list.
+ (dir_tables_init): Initialize non_matching_directive_table,
+ protect from GC and populate with numerous directives.
+
+ * txr.1: Improved documentation for @(next :args), and removed
+ a description of the hack that a single @(next) at the top of the
+ query suppressed the opening of the data source.
+
2014-02-18 Kaz Kylheku <kaz@kylheku.com>
* eval.c (env_hash): new function.
diff --git a/match.c b/match.c
index 73452c8c..0da93ca8 100644
--- a/match.c
+++ b/match.c
@@ -68,6 +68,7 @@ val filter_s;
val noval_s;
static val h_directive_table, v_directive_table;
+static val non_matching_directive_table;
static void debuglf(val form, val fmt, ...)
{
@@ -3650,27 +3651,28 @@ static val h_do(match_line_ctx *c)
return next_spec_k;
}
-static val match_files(match_files_ctx c)
+static void open_data_source(match_files_ctx *c)
{
- debug_enter;
-
- gc_hint(c.data);
+ /* c->data == t is set up by the top level call to match_files.
+ * It indicates that we have not yet opened any data source.
+ */
- if (listp(c.data)) { /* recursive call with lazy list */
- ; /* no special initialization */
- } else if (c.files) { /* c.data == t: toplevel call with file list */
- val source_spec = first(c.files);
+ if (c->data == t && c->files) {
+ val source_spec = first(c->files);
val name = consp(source_spec) ? cdr(source_spec) : source_spec;
if (stringp(name)) {
fpip_t fp = (errno = 0, complex_open(name, nil, nil));
- spec_bind (specline, first_spec, c.spec);
+ spec_bind (specline, first_spec, c->spec);
- if (consp(first_spec) && eq(first(first_spec), next_s) && !rest(specline)) {
+ if (consp(first_spec) && (gethash(non_matching_directive_table,
+ first(first_spec))))
+ {
debuglf(first_spec, lit("not opening source ~a "
- "since query starts with next directive"), name, nao);
+ "since query starts with non-matching "
+ "directive."), name, nao);
} else {
- val spec = first(c.spec);
+ val spec = first(c->spec);
debuglf(spec, lit("opening data source ~a"), name, nao);
if (complex_open_failed(fp)) {
@@ -3686,20 +3688,25 @@ static val match_files(match_files_ctx c)
debug_return (nil);
}
- c.files = cons(name, cdr(c.files)); /* Get rid of cons and nothrow */
+ c->files = cons(name, cdr(c->files)); /* Get rid of cons and nothrow */
- if ((c.data = complex_snarf(fp, name)) != nil)
- c.data_lineno = num(1);
+ if ((c->data = complex_snarf(fp, name)) != nil)
+ c->data_lineno = num(1);
}
} else if (streamp(name)) {
- if ((c.data = lazy_stream_cons(name)))
- c.data_lineno = num(1);
+ if ((c->data = lazy_stream_cons(name)))
+ c->data_lineno = num(1);
} else {
- c.data = nil;
+ c->data = nil;
}
- } else { /* toplevel call with no data or file list */
- c.data = nil;
}
+}
+
+static val match_files(match_files_ctx c)
+{
+ debug_enter;
+
+ gc_hint(c.data);
for (; c.spec; c.spec = rest(c.spec),
c.data = rest(c.data),
@@ -3708,6 +3715,8 @@ repeat_spec_same_data:
{
spec_bind (specline, first_spec, c.spec);
+ open_data_source(&c);
+
debug_check(first_spec, c.bindings, c.data, c.data_lineno, nil, nil);
if (consp(first_spec) && !rest(specline)) {
@@ -3898,8 +3907,10 @@ static void dir_tables_init(void)
{
h_directive_table = make_hash(nil, nil, nil);
v_directive_table = make_hash(nil, nil, nil);
+ non_matching_directive_table = make_hash(nil, nil, nil);
- protect(&h_directive_table, &v_directive_table, (val *) 0);
+ protect(&h_directive_table, &v_directive_table,
+ &non_matching_directive_table, (val *) 0);
sethash(v_directive_table, skip_s, cptr((mem_t *) v_skip));
sethash(v_directive_table, fuzz_s, cptr((mem_t *) v_fuzz));
@@ -3963,6 +3974,29 @@ static void dir_tables_init(void)
sethash(h_directive_table, eol_s, cptr((mem_t *) h_eol));
sethash(h_directive_table, do_s, cptr((mem_t *) h_do));
sethash(h_directive_table, require_s, cptr((mem_t *) hv_trampoline));
+
+ sethash(non_matching_directive_table, block_s, t);
+ sethash(non_matching_directive_table, accept_s, t);
+ sethash(non_matching_directive_table, fail_s, t);
+ sethash(non_matching_directive_table, next_s, t);
+ sethash(non_matching_directive_table, forget_s, t);
+ sethash(non_matching_directive_table, local_s, t);
+ sethash(non_matching_directive_table, merge_s, t);
+ sethash(non_matching_directive_table, bind_s, t);
+ sethash(non_matching_directive_table, rebind_s, t);
+ sethash(non_matching_directive_table, set_s, t);
+ sethash(non_matching_directive_table, cat_s, t);
+ sethash(non_matching_directive_table, output_s, t);
+ sethash(non_matching_directive_table, define_s, t);
+ sethash(non_matching_directive_table, try_s, t);
+ sethash(non_matching_directive_table, defex_s, t);
+ sethash(non_matching_directive_table, throw_s, t);
+ sethash(non_matching_directive_table, deffilter_s, t);
+ sethash(non_matching_directive_table, filter_s, t);
+ sethash(non_matching_directive_table, require_s, t);
+ sethash(non_matching_directive_table, do_s, t);
+ sethash(non_matching_directive_table, load_s, t);
+ sethash(non_matching_directive_table, close_s, t);
}
void match_init(void)
diff --git a/txr.1 b/txr.1
index f9c50347..90d45953 100644
--- a/txr.1
+++ b/txr.1
@@ -1461,13 +1461,13 @@ match failure.
The variant @(next :args) means that the remaining command line arguments are to
be treated as a data source. For this purpose, each argument is considered to
be a line of text. If an argument is currently being processed as an input
-source, that argument is included. Note that if the first entry in the argument
-list is not intended to name an input source, then the query should begin with
-@(next :args) or some other form of next directive, to prevent an attempt to
-open the input source named by that argument. If the very first directive of a query is any variant of the next directive, then
-.B TXR
-avoids opening the first input source, but it does open the input source for
-any other directive, even one which does not consume any data.
+source, that argument is included at the front of the list. As the arguments
+are matched, they are consumed. This means that if a @(next) directive without
+arguments is executed in the scope of @(next :args), it opens the file named
+by the first unconsumed argument.
+
+To process arguments, and then continue with the original file and argument
+list, wrap the argument processing in a @(block).
The variant @(next :env) means that the list of process environment variables
is treated as a source of data. It looks like a text file stream