summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2015-11-01 19:18:57 -0800
committerKaz Kylheku <kaz@kylheku.com>2015-11-01 19:18:57 -0800
commit18dd42f65e620326bb21ffcde92004cc9705cbf8 (patch)
tree7d343914189779a0470bc74f85ba5593bab89c9e
parentaea62af9451ce1da9db494aa07cdfb0087fa1473 (diff)
downloadtxr-18dd42f65e620326bb21ffcde92004cc9705cbf8.tar.gz
txr-18dd42f65e620326bb21ffcde92004cc9705cbf8.tar.bz2
txr-18dd42f65e620326bb21ffcde92004cc9705cbf8.zip
New range type, distinct from cons cell.
* eval.c (eval_init): Register intrinsic functions rcons, rangep from and to. (eval_init): Register rangep intrinsic. * gc.c (mark_obj): Traverse RNG objects. (finalize): Handle RNG in switch. * hash.c (equal_hash, eql_hash): Hashing for for RNG objects. * lib.c (range_s, rcons_s): New symbol variables. (code2type): Handle RNG type. (eql, equal): Equality for ranges. (less_tab_init): Table extended to cover RNG. (less): Semantics defined for ranges. (rcons, rangep, from, to): New functions. (obj_init): range_s and rcons_s variables initialized. (obj_print_impl): Produce #R notation for ranges. (generic_funcall, dwim_set): Recognize range objects for indexing * lib.h (enum type): New enum member, RNG. MAXTYPE redefined to RNG value. (TYPE_SHIFT): Increased to 5 since there are now 16 type codes. (struct range): New struct type. (union obj): New member rn, of type struct range. (range_s, rcons_s, rcons, rangep, from, to): Declared. (range_bind): New macro. * parser.l (grammar): New rule for recognizing the #R sequence as HASH_R token. * parser.y (HASH_R): New terminal symbol. (range): New nonterminal symbol. (n_expr): Derives the new range symbol. The n_expr DOTDOT n_expr rule produces rcons expression rather than const. * match.c (format_field): Recognize rcons syntax in fields which is now what ranges translate to. Also recognize range object. * tests/013/maze.tl (neigh): Fix code which destructures range as a cons. That can't be done any more. * txr.1: Document ranges.
-rw-r--r--eval.c5
-rw-r--r--gc.c4
-rw-r--r--hash.c6
-rw-r--r--lib.c106
-rw-r--r--lib.h23
-rw-r--r--match.c4
-rw-r--r--parser.l5
-rw-r--r--parser.y17
-rw-r--r--tests/013/maze.tl3
-rw-r--r--txr.1123
10 files changed, 253 insertions, 43 deletions
diff --git a/eval.c b/eval.c
index 052c7994..21e1dcea 100644
--- a/eval.c
+++ b/eval.c
@@ -4776,6 +4776,11 @@ void eval_init(void)
reg_fun(intern(lit("where"), user_package), func_n2(where));
reg_fun(intern(lit("select"), user_package), func_n2(sel));
+ reg_fun(intern(lit("rcons"), user_package), func_n2(rcons));
+ reg_fun(intern(lit("rangep"), user_package), func_n1(rangep));
+ reg_fun(intern(lit("from"), user_package), func_n1(from));
+ reg_fun(intern(lit("to"), user_package), func_n1(to));
+
reg_fun(intern(lit("make-like"), user_package), func_n2(make_like));
reg_fun(intern(lit("nullify"), user_package), func_n1(nullify));
diff --git a/gc.c b/gc.c
index 4892f133..bae671ec 100644
--- a/gc.c
+++ b/gc.c
@@ -240,6 +240,7 @@ static void finalize(val obj)
case LSTR:
case ENV:
case FLNUM:
+ case RNG:
return;
case SYM:
free(obj->s.slot_cache);
@@ -366,6 +367,9 @@ tail_call:
mark_obj(obj->e.vbindings);
mark_obj(obj->e.fbindings);
mark_obj_tail(obj->e.up_env);
+ case RNG:
+ mark_obj(obj->rn.from);
+ mark_obj_tail(obj->rn.to);
}
assert (0 && "corrupt type field");
diff --git a/hash.c b/hash.c
index 380a4615..265da1af 100644
--- a/hash.c
+++ b/hash.c
@@ -174,6 +174,9 @@ static cnum equal_hash(val obj)
return hash_double(obj->fl.n);
case COBJ:
return obj->co.ops->hash(obj) & NUM_MAX;
+ case RNG:
+ return (equal_hash(obj->rn.from)
+ + 32 * (equal_hash(obj->rn.to) & (NUM_MAX / 16))) & NUM_MAX;
}
internal_error("unhandled case in equal function");
@@ -190,6 +193,9 @@ static cnum eql_hash(val obj)
return mp_hash(mp(obj)) & NUM_MAX;
case FLNUM:
return hash_double(obj->fl.n);
+ case RNG:
+ return (eql_hash(obj->rn.from)
+ + 32 * (eql_hash(obj->rn.to) & (NUM_MAX / 16))) & NUM_MAX;
default:
switch (sizeof (mem_t *)) {
case 4:
diff --git a/lib.c b/lib.c
index 21f27816..6305cdee 100644
--- a/lib.c
+++ b/lib.c
@@ -81,7 +81,7 @@ val system_package_s, keyword_package_s, user_package_s;
val null_s, t, cons_s, str_s, chr_s, fixnum_s, sym_s, pkg_s, fun_s, vec_s;
val lit_s, stream_s, hash_s, hash_iter_s, lcons_s, lstr_s, cobj_s, cptr_s;
val atom_s, integer_s, number_s, sequence_s, string_s;
-val env_s, bignum_s, float_s;
+val env_s, bignum_s, float_s, range_s, rcons_s;
val var_s, expr_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s;
val nongreedy_s;
val quote_s, qquote_s, unquote_s, splice_s;
@@ -164,6 +164,7 @@ static val code2type(int code)
case ENV: return env_s;
case BGNUM: return bignum_s;
case FLNUM: return float_s;
+ case RNG: return range_s;
}
return nil;
}
@@ -1833,9 +1834,12 @@ cnum c_num(val num);
val eql(val left, val right)
{
/* eql is the same as eq except that numbers
- are compared by value. This means that bignum and
- floatinmg point objects which are distinct are
- treated through the equal function. */
+ are compared by value, and ranges are
+ specially treated also. This means that bignum and
+ floating point objects which are distinct are
+ treated through the equal function.
+ Two ranges are eql if they are the same object,
+ or if their corresponding parts are eql. */
if (left == right)
return t;
@@ -1843,6 +1847,12 @@ val eql(val left, val right)
case BGNUM:
case FLNUM:
return equal(left, right);
+ case RNG:
+ if (type(right) == RNG &&
+ eql(from(left), from(right)) &&
+ eql(to(left), to(right)))
+ return t;
+ /* fallthrough */
default:
return nil;
}
@@ -1953,6 +1963,12 @@ val equal(val left, val right)
if (type(right) == FLNUM && left->fl.n == right->fl.n)
return t;
return nil;
+ case RNG:
+ if (type(right) == RNG &&
+ equal(from(left), from(right)) &&
+ equal(to(left), to(right)))
+ return t;
+ return nil;
case COBJ:
if (type(right) == COBJ)
return left->co.ops->equal(left, right);
@@ -3516,6 +3532,7 @@ static void less_tab_init(void)
7, /* ENV */
0, /* BGNUM */
0, /* FLNUM */
+ 0, /* RNG */
};
for (l = 0; l <= MAXTYPE; l++)
@@ -3607,6 +3624,10 @@ val less(val left, val right)
return tnil(lenl < lenr);
}
+ case RNG:
+ if (less(from(left), from(right)))
+ return t;
+ return less(to(left), to(right));
case FUN:
case PKG:
case ENV:
@@ -4534,15 +4555,17 @@ val generic_funcall(val fun, struct args *args_in)
case 0:
callerror(fun, lit("missing required arguments"));
case 1:
- if (consp(args->arg[0])) {
- cons_bind (x, y, args->arg[0]);
- if (atom(y))
- return sub(fun, x, y);
+ switch (type(args->arg[0])) {
+ case NIL:
+ case CONS:
+ case LCONS:
+ case VEC:
return sel(fun, args->arg[0]);
+ case RNG:
+ return sub(fun, args->arg[0]->rn.from, args->arg[0]->rn.to);
+ default:
+ return ref(fun, args->arg[0]);
}
- if (vectorp(args->arg[0]))
- return sel(fun, args->arg[0]);
- return ref(fun, args->arg[0]);
case 2:
return sub(fun, args->arg[0], args->arg[1]);
default:
@@ -7120,20 +7143,23 @@ val replace(val seq, val items, val from, val to)
val dwim_set(val seq, val ind_range, val newval)
{
- if (consp(ind_range) && !hashp(seq)) {
- cons_bind (x, y, ind_range);
-
- if (atom(y))
- return replace(seq, newval, x, y);
- return replace(seq, newval, ind_range, colon_k);
- } else if (vectorp(ind_range)) {
+ switch (type(ind_range)) {
+ case NIL:
+ case CONS:
+ case LCONS:
+ case VEC:
return replace(seq, newval, ind_range, colon_k);
- } else {
+ case RNG:
+ if (!hashp(seq))
+ {
+ range_bind (x, y, ind_range);
+ return replace(seq, newval, x, y);
+ }
+ /* fallthrough */
+ default:
(void) refset(seq, ind_range, newval);
return seq;
}
-
- return newval;
}
val dwim_del(val seq, val ind_range)
@@ -7141,8 +7167,8 @@ val dwim_del(val seq, val ind_range)
if (hashp(seq)) {
(void) remhash(seq, ind_range);
return seq;
- } else if (consp(ind_range)) {
- return replace(seq, nil, car(ind_range), cdr(ind_range));
+ } else if (rangep(ind_range)) {
+ return replace(seq, nil, from(ind_range), to(ind_range));
} else {
return replace(seq, nil, ind_range, succ(ind_range));
}
@@ -7180,7 +7206,7 @@ val update(val seq, val fun)
return hash_update(seq, fun);
/* fallthrough */
default:
- type_mismatch(lit("replace: ~s is not a sequence"), seq, nao);
+ type_mismatch(lit("update: ~s is not a sequence"), seq, nao);
}
return seq;
@@ -7344,6 +7370,32 @@ val sel(val seq_in, val where_in)
return make_like(out, seq_in);
}
+val rcons(val from, val to)
+{
+ val obj = make_obj();
+ obj->rn.type = RNG;
+ obj->rn.from = from;
+ obj->rn.to = to;
+ return obj;
+}
+
+val rangep(val obj)
+{
+ return type(obj) == RNG ? t : nil;
+}
+
+val from(val range)
+{
+ type_check(range, RNG);
+ return range->rn.from;
+}
+
+val to(val range)
+{
+ type_check(range, RNG);
+ return range->rn.to;
+}
+
val env(void)
{
if (env_list) {
@@ -7438,6 +7490,8 @@ static void obj_init(void)
env_s = intern(lit("env"), user_package);
bignum_s = intern(lit("bignum"), user_package);
float_s = intern(lit("float"), user_package);
+ range_s = intern(lit("range"), user_package);
+ rcons_s = intern(lit("rcons"), user_package);
var_s = intern(lit("var"), system_package);
expr_s = intern(lit("expr"), system_package);
regex_s = intern(lit("regex"), system_package);
@@ -7779,6 +7833,10 @@ finish:
case ENV:
format(out, lit("#<environment: ~p>"), obj, nao);
break;
+ case RNG:
+ format(out, if3(pretty, lit("#R(~a ~a)"), lit("#R(~s ~s)")),
+ from(obj), to(obj), nao);
+ break;
default:
format(out, lit("#<garbage: ~p>"), obj, nao);
break;
diff --git a/lib.h b/lib.h
index b96ae19d..60ef6d8c 100644
--- a/lib.h
+++ b/lib.h
@@ -56,13 +56,13 @@ typedef int_ptr_t cnum;
#endif
typedef enum type {
- NIL, NUM = TAG_NUM, CHR = TAG_CHR, LIT = TAG_LIT, CONS,
+ NIL = TAG_PTR, NUM = TAG_NUM, CHR = TAG_CHR, LIT = TAG_LIT, CONS,
STR, SYM, PKG, FUN, VEC, LCONS, LSTR, COBJ, ENV,
- BGNUM, FLNUM, MAXTYPE = FLNUM
+ BGNUM, FLNUM, RNG, MAXTYPE = RNG
/* If extending, check TYPE_SHIFT */
} type_t;
-#define TYPE_SHIFT 4
+#define TYPE_SHIFT 5
#define TYPE_PAIR(A, B) ((A) << TYPE_SHIFT | (B))
typedef enum functype
@@ -251,6 +251,11 @@ struct flonum {
double n;
};
+struct range {
+ obj_common;
+ val from, to;
+};
+
union obj {
struct any t;
struct cons c;
@@ -265,6 +270,7 @@ union obj {
struct env e;
struct bignum bn;
struct flonum fl;
+ struct range rn;
};
#if CONFIG_GEN_GC
@@ -393,7 +399,7 @@ extern val null_s, t, cons_s, str_s, chr_s, fixnum_sl;
extern val sym_s, pkg_s, fun_s, vec_s;
extern val stream_s, hash_s, hash_iter_s, lcons_s, lstr_s, cobj_s, cptr_s;
extern val atom_s, integer_s, number_s, sequence_s, string_s;
-extern val env_s, bignum_s, float_s;
+extern val env_s, bignum_s, float_s, range_s, rcons_s;
extern val var_s, expr_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s;
extern val nongreedy_s;
extern val quote_s, qquote_s, unquote_s, splice_s;
@@ -901,6 +907,10 @@ val update(val seq, val fun);
val search(val seq, val key, val from, val to);
val where(val func, val seq);
val sel(val seq, val where);
+val rcons(val from, val to);
+val rangep(val obj);
+val from(val range);
+val to(val range);
val env(void);
val obj_print_impl(val obj, val out, val pretty);
val obj_print(val obj, val stream);
@@ -1000,6 +1010,11 @@ loc list_collect_append(loc pptail, val obj);
CDR = cdr(c_o_n_s ## CAR ## CDR); \
} while (0)
+#define range_bind(FROM, TO, RANGE) \
+ obj_t *r_n_g ## FROM ## TO = RANGE; \
+ obj_t *FROM = from(r_n_g ## FROM ## TO); \
+ obj_t *TO = ((r_n_g ## FROM ## TO)->rn.to)
+
#define zero num_fast(0)
#define one num_fast(1)
#define two num_fast(2)
diff --git a/match.c b/match.c
index 98b35cbc..383cfd80 100644
--- a/match.c
+++ b/match.c
@@ -1353,7 +1353,7 @@ val format_field(val obj, val modifier, val filter, val eval_fun)
} else if (consp(item) && car(item) == dwim_s) {
val arg_expr = second(item);
- if (consp(arg_expr) && car(arg_expr) == cons_s) {
+ if (consp(arg_expr) && car(arg_expr) == range_s) {
val from = funcall1(eval_fun, second(arg_expr));
val to = funcall1(eval_fun, third(arg_expr));
@@ -1362,6 +1362,8 @@ val format_field(val obj, val modifier, val filter, val eval_fun)
val arg = funcall1(eval_fun, arg_expr);
if (bignump(arg) || fixnump(arg)) {
obj = ref(obj, arg);
+ } else if (rangep(arg)) {
+ obj = sub(obj, from(arg), to(arg));
} else {
uw_throwf(query_error_s, lit("format_field: bad index: ~s"),
arg, nao);
diff --git a/parser.l b/parser.l
index 9a78f988..63941223 100644
--- a/parser.l
+++ b/parser.l
@@ -695,6 +695,11 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
return HASH_S;
}
+<NESTED,BRACED>#R {
+ yylval->lineno = yyextra->lineno;
+ return HASH_R;
+}
+
<NESTED>\.\. {
yylval->lineno = yyextra->lineno;
return DOTDOT;
diff --git a/parser.y b/parser.y
index 0208386d..d63eba4f 100644
--- a/parser.y
+++ b/parser.y
@@ -100,7 +100,7 @@ int yyparse(scanner_t *, parser_t *);
%token <lineno> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY
%token <lineno> MOD MODLAST DEFINE TRY CATCH FINALLY
%token <lineno> ERRTOK /* deliberately not used in grammar */
-%token <lineno> HASH_BACKSLASH HASH_SLASH DOTDOT HASH_H HASH_S
+%token <lineno> HASH_BACKSLASH HASH_SLASH DOTDOT HASH_H HASH_S HASH_R
%token <lineno> WORDS WSPLICE QWORDS QWSPLICE
%token <lineno> SECRET_ESCAPE_R SECRET_ESCAPE_E
@@ -116,7 +116,7 @@ int yyparse(scanner_t *, parser_t *);
%type <val> output_clause define_clause try_clause catch_clauses_opt
%type <val> if_clause elif_clauses_opt else_clause_opt
%type <val> line elems_opt elems clause_parts_h additional_parts_h
-%type <val> text texts elem var var_op modifiers vector hash struct
+%type <val> text texts elem var var_op modifiers vector hash struct range
%type <val> list exprs exprs_opt expr n_exprs r_exprs n_expr n_exprs_opt
%type <val> out_clauses out_clauses_opt out_clause
%type <val> repeat_clause repeat_parts_opt o_line
@@ -737,6 +737,16 @@ struct : HASH_S list { if (unquotes_occur($2, 0))
$$ = rlcp(strct, num($1)); } }
;
+range : HASH_R list { if (length($2) != two)
+ yyerr("range literal needs two elements");
+
+ if (unquotes_occur($2, 0))
+ $$ = rlcp(cons(rcons_s, $2), num($1));
+ else
+ { val range = rcons(first($2), second($2));
+ $$ = rlcp(range, num($1)); } }
+ ;
+
list : '(' n_exprs ')' { $$ = rl($2, num($1)); }
| '(' ')' { $$ = nil; }
| '(' LAMBDOT n_expr ')' { $$ = $3; }
@@ -818,6 +828,7 @@ n_expr : SYMTOK { $$ = symhlpr($1, t); }
| vector { $$ = $1; }
| hash { $$ = $1; }
| struct { $$ = $1; }
+ | range { $$ = $1; }
| lisp_regex { $$ = $1; }
| chrlit { $$ = $1; }
| strlit { $$ = $1; }
@@ -833,7 +844,7 @@ n_expr : SYMTOK { $$ = symhlpr($1, t); }
| SPLICE n_expr { $$ = rl(rlcp(list(sys_splice_s, $2, nao), $2),
num(parser->lineno)); }
| n_expr DOTDOT n_expr { uses_or2;
- $$ = rlcp(list(cons_s, $1, $3, nao),
+ $$ = rlcp(list(rcons_s, $1, $3, nao),
or2($1, $3)); }
| n_expr '.' n_expr { uses_or2;
if (consp($3) && car($3) == qref_s) {
diff --git a/tests/013/maze.tl b/tests/013/maze.tl
index f65bc9e0..5cb989d6 100644
--- a/tests/013/maze.tl
+++ b/tests/013/maze.tl
@@ -13,7 +13,8 @@
(if list [list (rand (length list))]))
(defun neigh (loc)
- (tree-bind (x . y) loc
+ (let ((x (from loc))
+ (y (to loc)))
(list (- x 1)..y (+ x 1)..y
x..(- y 1) x..(+ y 1))))
diff --git a/txr.1 b/txr.1
index 58feddd9..cb08499e 100644
--- a/txr.1
+++ b/txr.1
@@ -9537,22 +9537,36 @@ hash table based on the
.code eql
function, with no weak semantics.
+.NP* Range Literals
+
+.meIP >> #R( from << to )
+
+The notation
+.code #R
+followed by a two-element list syntax denotes a range literal.
+
.coNP The @ .. notation
In \*(TL, there is a special "dotdot" notation consisting of a pair of dots.
This can be written between successive atoms or compound expressions, and is a
-shorthand for cons.
+shorthand for
+.codn rcons .
That is to say,
.code A .. B
translates to
-.codn "(cons A B)" ,
+.codn "(rcons A B)" ,
and so for instance
.code (a b .. (c d) e .. f . g)
means
-.codn "(a (cons b (c d)) (cons e f) . g)" .
+.codn "(a (rcons b (c d)) (rcons e f) . g)" .
-This is a syntactic sugar useful in certain situations in which a cons is used
-to represent a pair of numbers or other objects. For instance, if
+The
+.code rcons
+function constructs a range object, which denotes a pair of values.
+Range objects are most commonly used for referencing subranges of
+sequences.
+
+For instance, if
.code L
is a list, then
.code [L 1 .. 3]
@@ -9565,18 +9579,33 @@ Note that if this notation is used in the dot position of an improper
list, the transformation still applies. That is, the syntax
.code (a . b .. c)
is valid and produces the object
-.code (a . (cons b c))
+.code (a . (rcons b c))
which is another way of writing
-.codn (a cons b c) .
+.codn (a rcons b c) ,
+which is quite probably nonsense.
The notation's
.code ..
operator associates right to left, so that
.code a..b..c
denotes
-.code (cons a (cons b c))
-or
-.codn (a b . c) .
+.codn (rcons a (rcons b c)) .
+
+Note that range objects are not printed using the dotdot notation.
+A range literal has the syntax of a two-element list, prefixed by
+.codn #R .
+(See Range Literals above).
+
+In any context where the dotdot notation may be used, and where
+it is evaluated to its value, a range literal may also be specified.
+If an evaluated dotdot notation specifies two constant expressions, then
+an equivalent range literal can replace it. For instance the
+form
+.code [L 1 .. 3]
+can also be written
+.codn [L #R(1 3)] .
+The two are syntactically different, and so if these expressions are being
+considered for their syntax rather than value, they are not the same.
.NP* The DWIM Brackets
\*(TL has a square bracket notation. The syntax
@@ -13048,6 +13077,8 @@ an abstract type:
|
+--- env
|
+ +--- range
+ |
+--- pkg
|
+--- fun
@@ -13441,6 +13472,16 @@ yields
.codn nil ;
the comparison operation which finds these numbers equal is the
.codn (= 0.0 0) .
+The
+.code eql
+function also specially treats range objects. Two distinct range objects are
+.code eql
+if their corresponding
+.meta from
+and
+.meta to
+fields are
+.codn eql .
For all other object types,
.code eql
behaves like
@@ -13506,6 +13547,14 @@ corresponding keys from each respective hash are
.code equal
objects.
+Two ranges are
+.code equal
+if their corresponding
+.meta to
+and
+.meta from
+fields are equal.
+
For some aggregate objects, there is no special semantics. Two arguments
which are symbols, packages, or streams are
.code equal
@@ -16294,6 +16343,60 @@ might step over the endpoint value, and
therefore never attain it. In this situation, the sequence also stops, and the
excess value which surpasses the endpoint is excluded from the sequence.
+.SS* Ranges
+.coNP Function @ rcons
+.synb
+.mets (rcons < from << to )
+.syne
+The
+.code rcons
+function constructs a range object which holds the values
+.meta from
+and
+.metn to .
+
+Though range objects are effectively binary cells like conses, they are atoms.
+They also aren't considered sequences, nor are they structures.
+
+Range objects are used for indicating numeric ranges, such as substrings of
+lists, arrays and strings. The dotdot notation serves as a syntactic sugar for
+.codn rcons .
+The syntax
+.code a..b
+denotes the expression
+.codn (rcons a b) .
+
+Note that ranges are immutable, meaning that it is not possible to
+replace the values in a range.
+
+.coNP Function @ rangep
+.synb
+.mets (rangep << value )
+.syne
+The
+.code rangep
+function returns
+.code t
+if
+.meta value
+is a range. Otherwise it returns
+.codn nil .
+
+.coNP Functions @ from and @ to
+.synb
+.mets (from << range )
+.mets (to << range )
+.syne
+The
+.code from
+and
+.code to
+functions retrieve, respectively, the from and to fields
+of a range.
+
+Note that these functions are not accessors, which is because
+ranges are immutable.
+
.SS* Characters and Strings
.coNP Function @ mkstring
.synb