summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog50
-rw-r--r--Makefile2
-rw-r--r--filter.c163
-rw-r--r--filter.h36
-rw-r--r--gc.c3
-rw-r--r--hash.c31
-rw-r--r--hash.h4
-rw-r--r--lib.c78
-rw-r--r--lib.h10
-rw-r--r--match.c122
-rw-r--r--parser.y15
-rw-r--r--txr.137
12 files changed, 505 insertions, 46 deletions
diff --git a/ChangeLog b/ChangeLog
index 3a01570b..b22ba7c5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,53 @@
+2011-09-25 Kaz Kylheku <kaz@kylheku.com>
+
+ Filtering feature for variable substitution in output.
+
+ * filter.c, filter.h: New files.
+
+ * Makefile (OBJS): filter.o added.
+
+ * gc.c (mark_obj): Mark new alloc field of string objets.
+
+ * hash.c (struct hash): New member, userdata.
+ (hash_mark): Mark new userdata member of hash.
+ (make_hash): Initialize userdata.
+ (get_hash_userdata, set_hash_userdata, hashp): New functions.
+
+ * hash.h (get_hash_userdata, set_hash_userdata, hashp): New functions
+ declared.
+
+ * lib.c (getplist, string_extend, cobjp): New functions.
+ (string_own, string, string_utf8): Initialize new alloc field to nil.
+ (mkstring, mkustring): Initialize new alloc field to actual size.
+ (length_str): When length is computed and cached, also compute
+ and cache alloc.
+ (init): Call filter_init.
+
+ * lib.h (string string): New member, alloc.
+ (num_fast): Macro converted to inline function.
+ (getplist, string_extend, cobjp): New functions declared.
+
+ * match.c (match_line): Follows change of modifier s-exp syntax.
+ (format_field): New parameter, filter.
+ New modifier syntax parsed. Filter retrieved, and applied.
+ (subst_vars): New parameter, filter. Filter is either applied
+ in this function or passed to format_field, as needed.
+ (eval_form): Pass nil to new parameter of subst_vars.
+ (do_output_line): New parameter, filter. Passed down to subst_vars.
+ (do_output): New parameter, filter. Passed down to do_output_line.
+ (match_files): Pass nil filter to subst_vars in cat directive.
+ Output directive refactored to parse keywords, extract the
+ filter and pass down to do_output.
+
+ * parser.y (regex): Generate (sys:regex regex syntax ...)
+ instead of (regex syntax ...).
+ (elem, expr): Updated w.r.t. regex syntax change.
+ (var): Cases '{' IDENT regex '}' and '{' IDENT NUMBER '}'
+ are removed. new syntax '{' IDENT exprs '}' to handle these
+ more generally and allow for keywords.
+
+ * txr.1: Updated.
+
2011-09-23 Kaz Kylheku <kaz@kylheku.com>
Numeric constants become real constants.
diff --git a/Makefile b/Makefile
index 7f842d5f..b555b657 100644
--- a/Makefile
+++ b/Makefile
@@ -37,7 +37,7 @@ CFLAGS := $(filter-out -Wmissing-prototypes -Wstrict-prototypes,$(CFLAGS))
endif
OBJS := txr.o lex.yy.o y.tab.o match.o lib.o regex.o gc.o unwind.o stream.o
-OBJS += hash.o utf8.o
+OBJS += hash.o utf8.o filter.o
PROG := ./txr
diff --git a/filter.c b/filter.c
new file mode 100644
index 00000000..6b253473
--- /dev/null
+++ b/filter.c
@@ -0,0 +1,163 @@
+/* Copyright 2011
+ * Kaz Kylheku <kkylheku@gmail.com>
+ * Vancouver, Canada
+ * All rights reserved.
+ *
+ * BSD License:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <stddef.h>
+#include <setjmp.h>
+#include "config.h"
+#include "lib.h"
+#include "hash.h"
+#include "unwind.h"
+#include "filter.h"
+
+static val make_trie(void)
+{
+ return make_hash(nil, nil);
+}
+
+static val trie_add(val trie, val key, val value)
+{
+ val node, i, len = length_str(key);
+
+ for (node = trie, i = zero; lt(i, len); i = plus(i, one)) {
+ val ch = chr_str(key, i);
+ val newnode_p;
+ val *loc = gethash_l(node, ch, &newnode_p);
+ if (newnode_p)
+ *loc = make_hash(nil, nil);
+ node = *loc;
+ }
+
+ set_hash_userdata(node, value);
+ return node;
+}
+
+val trie_lookup_begin(val trie)
+{
+ return trie;
+}
+
+val trie_value_at(val node)
+{
+ return get_hash_userdata(node);
+}
+
+val trie_lookup_feed_char(val node, val ch)
+{
+ return gethash(node, ch);
+}
+
+val get_filter_trie(val sym)
+{
+ return gethash(filters, sym);
+}
+
+struct filter_pair {
+ wchar_t *key, *value;
+};
+
+static val build_filter(struct filter_pair *pair)
+{
+ int i;
+ val trie = make_trie();
+
+ for (i = 0; pair[i].key; i++)
+ trie_add(trie, static_str(pair[i].key), static_str(pair[i].value));
+
+ return trie;
+}
+
+static struct filter_pair to_html_table[] = {
+ { L"<", L"&lt;" },
+ { L">", L"&gt;" },
+ { L"&", L"&amp;" },
+ { L"\"", L"&quot;" },
+ { 0, 0 }
+};
+
+static val trie_filter_string(val filter, val str)
+{
+ val len = length_str(str);
+ val i;
+ val out = string(L"");
+
+ for (i = zero; lt(i, len); ) {
+ val node = trie_lookup_begin(filter);
+ val match = nil;
+ val subst;
+ val j;
+
+ for (j = i; lt(j, len); j = plus(j, one)) {
+ val ch = chr_str(str, j);
+ val nnode = trie_lookup_feed_char(node, ch);
+ val nsubst;
+
+ if (!nnode)
+ break;
+
+ if ((nsubst = trie_value_at(nnode))) {
+ match = j;
+ subst = nsubst;
+ }
+
+ node = nnode;
+ }
+
+ if (match) {
+ string_extend(out, subst);
+ i = plus(match, one);
+ } else {
+ string_extend(out, chr_str(str, i));
+ i = plus(i, one);
+ }
+ }
+
+ return out;
+}
+
+val filters;
+val filter_k, to_html_k;
+
+val filter_string(val filter, val str)
+{
+ val type = typeof(filter);
+
+ if (type == null)
+ return str;
+ if (type == hash_s)
+ return trie_filter_string(filter, str);
+ else if (type == fun_s)
+ return funcall1(filter, str);
+ return str;
+ uw_throwf(error_s, lit("filter_string: invalid filter ~a"), filter, nao);
+}
+
+void filter_init(void)
+{
+ filters = make_hash(nil, nil);
+ filter_k = intern(lit("filter"), keyword_package);
+ to_html_k = intern(lit("to_html"), keyword_package);
+ sethash(filters, to_html_k, build_filter(to_html_table));
+}
diff --git a/filter.h b/filter.h
new file mode 100644
index 00000000..f8d86632
--- /dev/null
+++ b/filter.h
@@ -0,0 +1,36 @@
+/* Copyright 2011
+ * Kaz Kylheku <kkylheku@gmail.com>
+ * Vancouver, Canada
+ * All rights reserved.
+ *
+ * BSD License:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+extern val filters;
+extern val filter_k, to_html_k;
+
+val trie_lookup_begin(val trie);
+val trie_value_at(val node);
+val trie_lookup_feed_char(val node, val ch);
+val get_filter_trie(val sym);
+val filter_string(val trie, val str);
+
+void filter_init(void);
diff --git a/gc.c b/gc.c
index ff6ad3ec..00073425 100644
--- a/gc.c
+++ b/gc.c
@@ -234,7 +234,8 @@ tail_call:
mark_obj(obj->c.car);
mark_obj_tail(obj->c.cdr);
case STR:
- mark_obj_tail(obj->st.len);
+ mark_obj(obj->st.len);
+ mark_obj_tail(obj->st.alloc);
case CHR:
case NUM:
case LIT:
diff --git a/hash.c b/hash.c
index f26e83c2..dcd28011 100644
--- a/hash.c
+++ b/hash.c
@@ -51,6 +51,7 @@ struct hash {
val table;
cnum modulus;
cnum count;
+ val userdata;
};
/*
@@ -141,6 +142,8 @@ static void hash_mark(val hash)
struct hash *h = (struct hash *) hash->co.handle;
cnum i;
+ gc_mark(h->userdata);
+
switch (h->flags) {
case hash_weak_none:
/* If the hash is not weak, we can simply mark the table
@@ -235,6 +238,7 @@ val make_hash(val weak_keys, val weak_vals)
h->modulus = c_num(mod);
h->count = 0;
h->table = table;
+ h->userdata = nil;
return hash;
}
@@ -258,6 +262,14 @@ val gethash(val hash, val key)
return cdr(found);
}
+val gethash_f(val hash, val key, val *found)
+{
+ struct hash *h = (struct hash *) hash->co.handle;
+ val chain = *vecref_l(h->table, num(ll_hash(key) % h->modulus));
+ *found = assoc(chain, key);
+ return cdr(*found);
+}
+
val sethash(val hash, val key, val value)
{
val new_p;
@@ -275,6 +287,25 @@ val remhash(val hash, val key)
return nil;
}
+val get_hash_userdata(val hash)
+{
+ struct hash *h = (struct hash *) hash->co.handle;
+ return h->userdata;
+}
+
+val set_hash_userdata(val hash, val data)
+{
+ struct hash *h = (struct hash *) hash->co.handle;
+ val olddata = h->userdata;
+ h->userdata = data;
+ return olddata;
+}
+
+val hashp(val obj)
+{
+ return typeof(obj) == hash_s ? t : nil;
+}
+
/*
* Called from garbage collector. Hash module must process all weak tables
* that were visited during the marking phase, maintained in the list
diff --git a/hash.h b/hash.h
index cd293ce9..b5bd200a 100644
--- a/hash.h
+++ b/hash.h
@@ -28,8 +28,12 @@ val hash_obj(val);
val make_hash(val weak_keys, val weak_vals);
val *gethash_l(val hash, val key, val *new_p);
val gethash(val hash, val key);
+val gethash_f(val hash, val key, val *found);
val sethash(val hash, val key, val value);
val remhash(val hash, val key);
+val get_hash_userdata(val hash);
+val set_hash_userdata(val hash, val data);
+val hashp(val obj);
void hash_process_weak(void);
void hash_init(void);
diff --git a/lib.c b/lib.c
index 62881137..9aa95d84 100644
--- a/lib.c
+++ b/lib.c
@@ -41,6 +41,7 @@
#include "unwind.h"
#include "stream.h"
#include "utf8.h"
+#include "filter.h"
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min(a, b) ((a) < (b) ? (a) : (b))
@@ -654,6 +655,17 @@ val length(val list)
return num(len);
}
+val getplist(val list, val key)
+{
+ for (; list; list = cdr(cdr(list))) {
+ val ind = first(list);
+ if (eq(ind, key))
+ return second(list);
+ }
+
+ return nil;
+}
+
val num(cnum n)
{
numeric_assert (n >= NUM_MIN && n <= NUM_MAX);
@@ -747,6 +759,7 @@ val string_own(wchar_t *str)
obj->st.type = STR;
obj->st.str = str;
obj->st.len = nil;
+ obj->st.alloc = nil;
return obj;
}
@@ -756,6 +769,7 @@ val string(const wchar_t *str)
obj->st.type = STR;
obj->st.str = (wchar_t *) chk_strdup(str);
obj->st.len = nil;
+ obj->st.alloc = nil;
return obj;
}
@@ -765,6 +779,7 @@ val string_utf8(const char *str)
obj->st.type = STR;
obj->st.str = utf8_dup_from(str);
obj->st.len = nil;
+ obj->st.alloc = nil;
return obj;
}
@@ -775,6 +790,7 @@ val mkstring(val len, val ch)
val s = string_own(str);
wmemset(str, c_chr(ch), nchar);
s->st.len = len;
+ s->st.alloc = plus(len, one);
return s;
}
@@ -785,6 +801,7 @@ val mkustring(val len)
val s = string_own(str);
str[l] = 0;
s->st.len = len;
+ s->st.alloc = plus(len, one);
return s;
}
@@ -799,6 +816,52 @@ val copy_str(val str)
return string(c_str(str));
}
+val string_extend(val str, val tail)
+{
+ type_check(str, STR);
+ {
+ cnum len = c_num(length_str(str));
+ cnum alloc = c_num(str->st.alloc);
+ val needed;
+ val room = zero;
+
+ if (stringp(tail))
+ needed = length_str(tail);
+ else if (chrp(tail))
+ needed = one;
+ else
+ uw_throwf(error_s, lit("string_extend: tail ~s bad type"), str, nao);
+
+ room = num(alloc - len - 1);
+
+ while (gt(needed, room) && alloc < NUM_MAX) {
+ if (alloc > NUM_MAX / 2) {
+ alloc = NUM_MAX;
+ } else {
+ alloc *= 2;
+ }
+ room = num(alloc - len - 1);
+ }
+
+ if (gt(needed, room))
+ uw_throwf(error_s, lit("string_extend: overflow"), nao);
+
+ str->st.str = (wchar_t *) chk_realloc((mem_t *) str->st.str,
+ alloc * sizeof *str->st.str);
+ str->st.alloc = num(alloc);
+ str->st.len = plus(str->st.len, needed);
+
+ if (stringp(tail)) {
+ wmemcpy(str->st.str + len, c_str(tail), c_num(needed) + 1);
+ } else {
+ str->st.str[len] = c_chr(tail);
+ str->st.str[len + 1] = 0;
+ }
+ }
+
+ return str;
+}
+
val stringp(val str)
{
switch (tag(str)) {
@@ -834,8 +897,10 @@ val length_str(val str)
return length_str(str->ls.prefix);
}
- if (!str->st.len)
+ if (!str->st.len) {
str->st.len = num(wcslen(str->st.str));
+ str->st.alloc = plus(str->st.len, one);
+ }
return str->st.len;
}
}
@@ -1711,6 +1776,16 @@ val cobj(mem_t *handle, val cls_sym, struct cobj_ops *ops)
return obj;
}
+val cobjp(val obj)
+{
+ if (!obj) {
+ return nil;
+ } else {
+ type_t ty = type(obj);
+ return (ty == COBJ) ? t : nil;
+ }
+}
+
mem_t *cobj_handle(val cobj, val cls_sym)
{
class_check(cobj, cls_sym);
@@ -2222,6 +2297,7 @@ void init(const wchar_t *pn, mem_t *(*oom)(mem_t *, size_t),
obj_init();
uw_init();
stream_init();
+ filter_init();
gc_state(gc_save);
}
diff --git a/lib.h b/lib.h
index 7b69cbbf..f72111e4 100644
--- a/lib.h
+++ b/lib.h
@@ -68,6 +68,7 @@ struct string {
type_t type;
wchar_t *str;
val len;
+ val alloc;
};
struct sym {
@@ -201,6 +202,11 @@ INLINE wchar_t *litptr(val obj)
return (wchar_t *) ((cnum) obj & ~TAG_MASK);
}
+INLINE val num_fast(cnum n)
+{
+ return (val) ((n << TAG_SHIFT) | TAG_NUM);
+}
+
#define lit_noex(strlit) ((obj_t *) ((cnum) (L ## strlit) | TAG_LIT))
#define lit(strlit) lit_noex(strlit)
@@ -277,6 +283,7 @@ val atom(val obj);
val listp(val obj);
val proper_listp(val obj);
val length(val list);
+val getplist(val list, val key);
val num(cnum val);
cnum c_num(val num);
val nump(val num);
@@ -298,6 +305,7 @@ val mkstring(val len, val ch);
val mkustring(val len); /* must initialize immediately with init_str! */
val init_str(val str, const wchar_t *);
val copy_str(val str);
+val string_extend(val str, val tail);
val stringp(val str);
val lazy_stringp(val str);
val length_str(val str);
@@ -357,6 +365,7 @@ val length_str_ge(val str, val len);
val length_str_lt(val str, val len);
val length_str_le(val str, val len);
val cobj(mem_t *handle, val cls_sym, struct cobj_ops *ops);
+val cobjp(val obj);
mem_t *cobj_handle(val cobj, val cls_sym);
val assoc(val list, val key);
val acons_new(val list, val key, val value);
@@ -428,7 +437,6 @@ val match(val spec, val data);
obj_t *CAR = car(c_o_n_s ## CAR ## CDR); \
obj_t *CDR = cdr(c_o_n_s ## CAR ## CDR)
-#define num_fast(n) ((val) ((n << TAG_SHIFT) | TAG_NUM))
#define zero num_fast(0)
#define one num_fast(1)
#define two num_fast(2)
diff --git a/match.c b/match.c
index d833f9df..e92d9687 100644
--- a/match.c
+++ b/match.c
@@ -42,6 +42,7 @@
#include "parser.h"
#include "txr.h"
#include "utf8.h"
+#include "filter.h"
#include "match.h"
int output_produced;
@@ -336,6 +337,13 @@ static val match_line(val bindings, val specline, val dataline,
}
continue;
} else if (pat == nil) { /* match to end of line or with regex */
+ if (gt(length(modifier), one)) {
+ sem_error(spec_lineno, lit("multiple modifiers on variable ~s"),
+ sym, nao);
+ }
+
+ modifier = car(modifier);
+
if (consp(modifier)) {
val past = match_regex(dataline, car(modifier), pos);
if (nullp(past)) {
@@ -519,14 +527,45 @@ static val match_line(val bindings, val specline, val dataline,
return cons(bindings, pos);
}
-static val format_field(val string_or_list, val spec)
+static val format_field(val string_or_list, val modifier, val filter)
{
+ val n = zero;
+ val plist = nil;
+
if (!stringp(string_or_list))
return string_or_list;
+ for (; modifier; pop(&modifier)) {
+ val item = first(modifier);
+ if (nump(item))
+ n = item;
+ if (regexp(item))
+ uw_throw(query_error_s, lit("format_field: regex modifier in output"));
+ if (keywordp(item)) {
+ plist = modifier;
+ break;
+ }
+ }
+
{
- val right = lt(spec, zero);
- val width = if3(lt(spec, zero), neg(spec), spec);
+ val filter_sym = getplist(plist, filter_k);
+
+ if (filter_sym) {
+ filter = get_filter_trie(filter_sym);
+
+ if (!filter) {
+ uw_throwf(query_error_s, lit("format_field: filter ~s not known"),
+ filter_sym, nao);
+ }
+
+ string_or_list = filter_string(filter, cat_str(list(string_or_list, nao),
+ nil));
+ }
+ }
+
+ {
+ val right = lt(n, zero);
+ val width = if3(lt(n, zero), neg(n), n);
val diff = minus(width, length_str(string_or_list));
if (le(diff, zero))
@@ -545,7 +584,7 @@ static val format_field(val string_or_list, val spec)
}
}
-static val subst_vars(val spec, val bindings)
+static val subst_vars(val spec, val bindings, val filter)
{
list_collect_decl(out, iter);
@@ -556,25 +595,25 @@ static val subst_vars(val spec, val bindings)
if (first(elem) == var_s) {
val sym = second(elem);
val pat = third(elem);
- val modifier = fourth(elem);
+ val modifiers = fourth(elem);
val pair = assoc(bindings, sym);
if (pair) {
if (pat)
- spec = cons(cdr(pair), cons(pat, rest(spec)));
- else if (nump(modifier))
- spec = cons(format_field(cdr(pair), modifier), rest(spec));
+ spec = cons(filter_string(filter, cdr(pair)), cons(pat, rest(spec)));
+ else if (modifiers)
+ spec = cons(format_field(cdr(pair), modifiers, filter), rest(spec));
else
- spec = cons(cdr(pair), rest(spec));
+ spec = cons(filter_string(filter, cdr(pair)), rest(spec));
continue;
}
} else if (first(elem) == quasi_s) {
- val nested = subst_vars(rest(elem), bindings);
+ val nested = subst_vars(rest(elem), bindings, filter);
list_collect_append(iter, nested);
spec = cdr(spec);
continue;
} else {
- val nested = subst_vars(elem, bindings);
+ val nested = subst_vars(elem, bindings, filter);
list_collect_append(iter, nested);
spec = cdr(spec);
continue;
@@ -596,7 +635,7 @@ static val eval_form(val form, val bindings)
return assoc(bindings, form);
} else if (consp(form)) {
if (car(form) == quasi_s) {
- return cons(t, cat_str(subst_vars(rest(form), bindings), nil));
+ return cons(t, cat_str(subst_vars(rest(form), bindings, nil), nil));
} else if (regexp(car(form))) {
return cons(t, form);
} else {
@@ -740,7 +779,7 @@ static val extract_bindings(val bindings, val output_spec)
}
static void do_output_line(val bindings, val specline,
- val spec_lineno, val out)
+ val spec_lineno, val filter, val out)
{
for (; specline; specline = rest(specline)) {
val elem = first(specline);
@@ -751,7 +790,8 @@ static void do_output_line(val bindings, val specline,
val directive = first(elem);
if (directive == var_s) {
- val str = cat_str(subst_vars(cons(elem, nil), bindings), nil);
+ val str = cat_str(subst_vars(cons(elem, nil),
+ bindings, filter), nil);
if (str == nil)
sem_error(spec_lineno, lit("bad substitution: ~a"),
second(elem), nao);
@@ -770,10 +810,10 @@ static void do_output_line(val bindings, val specline,
nao)));
if (equal(max_depth, zero) && empty_clauses) {
- do_output_line(bindings, empty_clauses, spec_lineno, out);
+ do_output_line(bindings, empty_clauses, spec_lineno, filter, out);
} else if (equal(max_depth, one) && single_clauses) {
val bind_a = mapcar(func_n1(bind_car), bind_cp);
- do_output_line(bind_a, single_clauses, spec_lineno, out);
+ do_output_line(bind_a, single_clauses, spec_lineno, filter, out);
} else if (!zerop(max_depth)) {
cnum i;
@@ -782,11 +822,11 @@ static void do_output_line(val bindings, val specline,
val bind_d = mapcar(func_n1(bind_cdr), bind_cp);
if (i == 0 && first_clauses) {
- do_output_line(bind_a, first_clauses, spec_lineno, out);
+ do_output_line(bind_a, first_clauses, spec_lineno, filter, out);
} else if (i == c_num(max_depth) - 1 && last_clauses) {
- do_output_line(bind_a, last_clauses, spec_lineno, out);
+ do_output_line(bind_a, last_clauses, spec_lineno, filter, out);
} else {
- do_output_line(bind_a, main_clauses, spec_lineno, out);
+ do_output_line(bind_a, main_clauses, spec_lineno, filter, out);
}
bind_cp = bind_d;
@@ -810,7 +850,7 @@ static void do_output_line(val bindings, val specline,
}
}
-static void do_output(val bindings, val specs, val out)
+static void do_output(val bindings, val specs, val filter, val out)
{
if (equal(specs, null_list))
return;
@@ -836,10 +876,10 @@ static void do_output(val bindings, val specs, val out)
nao)));
if (equal(max_depth, zero) && empty_clauses) {
- do_output(bind_cp, empty_clauses, out);
+ do_output(bind_cp, empty_clauses, filter, out);
} else if (equal(max_depth, one) && single_clauses) {
val bind_a = mapcar(func_n1(bind_car), bind_cp);
- do_output(bind_a, single_clauses, out);
+ do_output(bind_a, single_clauses, filter, out);
} else if (!zerop(max_depth)) {
cnum i;
@@ -848,11 +888,11 @@ static void do_output(val bindings, val specs, val out)
val bind_d = mapcar(func_n1(bind_cdr), bind_cp);
if (i == 0 && first_clauses) {
- do_output(bind_a, first_clauses, out);
+ do_output(bind_a, first_clauses, filter, out);
} else if (i == c_num(max_depth) - 1 && last_clauses) {
- do_output(bind_a, last_clauses, out);
+ do_output(bind_a, last_clauses, filter, out);
} else {
- do_output(bind_a, main_clauses, out);
+ do_output(bind_a, main_clauses, filter, out);
}
bind_cp = bind_d;
@@ -862,7 +902,7 @@ static void do_output(val bindings, val specs, val out)
}
}
- do_output_line(bindings, specline, spec_lineno, out);
+ do_output_line(bindings, specline, spec_lineno, filter, out);
put_char(out, chr('\n'));
}
}
@@ -1393,7 +1433,7 @@ repeat_spec_same_data:
val sep = nil;
if (rest(specline)) {
- val sub = subst_vars(rest(specline), bindings);
+ val sub = subst_vars(rest(specline), bindings, nil);
sep = cat_str(sub, nil);
}
@@ -1410,14 +1450,14 @@ repeat_spec_same_data:
val specs = second(first_spec);
val dest_spec = third(first_spec);
val nothrow = nil;
- val dest;
+ val dest = lit("-");
+ val filter = nil;
fpip_t fp;
if (eq(first(dest_spec), nothrow_k)) {
if (rest(dest_spec))
sem_error(spec_linenum, lit("material after :nothrow in output"), nao);
- dest = string(L"-");
- } else {
+ } else if (!keywordp(first(dest_spec))) {
val form = first(dest_spec);
val val = eval_form(form, bindings);
@@ -1425,8 +1465,24 @@ repeat_spec_same_data:
sem_error(spec_linenum,
lit("output: unbound variable in form ~a"), form, nao);
- nothrow = eq(second(dest_spec), nothrow_k);
- dest = or2(cdr(val), string(L"-"));
+ dest = or2(cdr(val), dest);
+ pop(&dest_spec);
+ }
+
+ if (eq(first(dest_spec), nothrow_k)) {
+ nothrow = t;
+ pop(&dest_spec);
+ }
+
+ if (keywordp(first(dest_spec))) {
+ val filter_sym = getplist(dest_spec, filter_k);
+
+ if (filter_sym) {
+ filter = get_filter_trie(filter_sym);
+
+ if (!filter)
+ sem_error(spec_linenum, lit("unknown filter ~s"), filter_sym, nao);
+ }
}
fp = (errno = 0, complex_open(dest, t));
@@ -1446,7 +1502,7 @@ repeat_spec_same_data:
}
} else {
val stream = complex_stream(fp, dest);
- do_output(bindings, specs, stream);
+ do_output(bindings, specs, filter, stream);
close_stream(stream, t);
}
diff --git a/parser.y b/parser.y
index d2bcf4ad..e6e66d1d 100644
--- a/parser.y
+++ b/parser.y
@@ -194,7 +194,8 @@ elems : elem { $$ = cons($1, nil); }
elem : TEXT { $$ = string_own($1); }
| var { $$ = $1; }
| list { $$ = $1; }
- | regex { $$ = cons(regex_compile($1), $1); }
+ | regex { $$ = cons(regex_compile(rest($1)),
+ rest($1)); }
| COLL elems END { $$ = list(coll_s, $2, nao); }
| COLL elems
UNTIL elems END { $$ = list(coll_s, $2, $4, nao); }
@@ -400,11 +401,8 @@ var : IDENT { $$ = list(var_s, intern(string_own($1), nil),
nao); }
| '{' IDENT '}' elem { $$ = list(var_s, intern(string_own($2), nil),
$4, nao); }
- | '{' IDENT regex '}' { $$ = list(var_s, intern(string_own($2), nil),
- nil, cons(regex_compile($3), $3),
- nao); }
- | '{' IDENT NUMBER '}' { $$ = list(var_s, intern(string_own($2), nil),
- nil, num($3), nao); }
+ | '{' IDENT exprs '}' { $$ = list(var_s, intern(string_own($2), nil),
+ nil, $3, nao); }
| var_op IDENT { $$ = list(var_s, intern(string_own($2), nil),
nil, $1, nao); }
| var_op IDENT elem { $$ = list(var_s, intern(string_own($2), nil),
@@ -445,13 +443,14 @@ expr : IDENT { $$ = intern(string_own($1), nil); }
keyword_package); }
| NUMBER { $$ = num($1); }
| list { $$ = $1; }
- | regex { $$ = cons(regex_compile($1), $1); }
+ | regex { $$ = cons(regex_compile(rest($1)),
+ rest($1)); }
| chrlit { $$ = $1; }
| strlit { $$ = $1; }
| quasilit { $$ = $1; }
;
-regex : '/' regexpr '/' { $$ = $2; end_of_regex(); }
+regex : '/' regexpr '/' { $$ = cons(regex_s, $2); end_of_regex(); }
| '/' error { $$ = nil;
yybadtoken(yychar, lit("regex"));
end_of_regex(); }
diff --git a/txr.1 b/txr.1
index 2721edab..83c99412 100644
--- a/txr.1
+++ b/txr.1
@@ -2274,7 +2274,7 @@ usual printing of the variable bindings or the word false.
The syntax of the @(output) directive is:
- @(output [ DESTINATION ] [ :nothrow ])
+ @(output [ DESTINATION ] [ :nothrow ] [ { keyword value } * ])
.
. one or more output directives or lines
.
@@ -2295,6 +2295,12 @@ asynchronously, a failing command will not throw an immediate exception that
can be suppressed with :nothrow. This is for synchronous errors, like
trying to open a destination file, but not having permissions, etc.
+The keyword value list is used for passing additional options.
+Currently, the only keyword supported is the :filter keyword.
+This specifies a filter to be applied to the variable substitutions occuring
+within the output clause.
+
+
.SS Output Text
Text in an output clause is not matched against anything, but is output
@@ -2318,6 +2324,9 @@ field. If the text is shorter than the field, then it is left-adjusted within
that field, if the width is specified as a positive number, and right-adjusted
if the width is specified as negative.
+An output variable may specify a filter which overrides any filter established
+for the output clause. The syntax for this is @(NAME :filter <filtername>}.
+
.SS The Repeat Directive
The repeat directive is generates repeated text from a ``boilerplate'',
@@ -2465,6 +2474,32 @@ spaces each one, except the last which has no space.
If the list has exactly one item, then the @(last) applies to it
instead of the main clause: it is produced with no trailing space.
+.SS Output Filtering
+
+Often it is necessary to transform the output to preserve its meaning
+under the convention of a given data format. For instance, if a piece of
+text contains the characters < or >, then if that text is being
+substituted into HTML, these should be replaced by &lt; and &gt;.
+This is what filtering is for. Filtering is applied to the contents of output
+variables, not to any template text.
+.B txr
+implements named filters. Currently, the only built-in filter available is
+called :to_html and user-defined filters are not possible.
+
+To escape HTML characters in all variable substitutions occuring in
+an output clause, specify :filter :to_html in the directive:
+
+ @(output :filter :to_html)
+ ...
+ @(end)
+
+To filter an individual variable, add the syntax to the variable spec:
+
+ @(output)
+ @{x :filter :to_html}
+ @(end)
+
+
.SH EXCEPTIONS
.SS Introduction