diff options
Diffstat (limited to 'parser.y')
-rw-r--r-- | parser.y | 573 |
1 files changed, 425 insertions, 148 deletions
@@ -1,6 +1,6 @@ %{ -/* Copyright 2009-2020 +/* Copyright 2009-2024 * Kaz Kylheku <kaz@kylheku.com> * Vancouver, Canada * All rights reserved. @@ -8,33 +8,31 @@ * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. */ #include <stddef.h> #include <stdio.h> #include <assert.h> #include <limits.h> -#include <dirent.h> #include <stdlib.h> -#include <stdarg.h> -#include <setjmp.h> #include <wchar.h> #include <signal.h> #include "config.h" @@ -44,34 +42,34 @@ #include "unwind.h" #include "regex.h" #include "match.h" +#include "filter.h" #include "hash.h" #include "struct.h" #include "eval.h" #include "tree.h" #include "y.tab.h" -#include "gc.h" #include "debug.h" #include "txr.h" #include "itypes.h" #include "buf.h" #include "parser.h" +static void set_syntax_tree(parser_t *parser, val tree); static val sym_helper(parser_t *parser, wchar_t *lexeme, val meta_allowed); static val repeat_rep_helper(val sym, val args, val main, val parts); static void process_catch_exprs(val exprs); static val define_transform(parser_t *parser, val define_form); static val optimize_text(val text_form); -static val unquotes_occur(val quoted_form, int level); static val rlrec(parser_t *, val form, val line); static val rlcp_parser(parser_t *parser, val to, val from); static wchar_t char_from_name(const wchar_t *name); static val make_expr(parser_t *, val sym, val rest, val lineno); static val check_parse_time_action(val spec_rev); -static void misplaced_consing_dot_check(scanner_t *scanner, val term_atom_cons); static val uref_helper(parser_t *, val expr); static val uoref_helper(parser_t *, val expr); static val qref_helper(parser_t *, val lexpr, val rexpr); static val fname_helper(parser_t *, val name); +static val output_helper(parser_t *, val sym, val exprs, val clauses); #if YYBISON union YYSTYPE; @@ -118,19 +116,19 @@ INLINE val expand_form_ver(val form, int ver) %token <lexeme> SPACE TEXT SYMTOK %token <lineno> ALL SOME NONE MAYBE CASES BLOCK CHOOSE GATHER %token <lineno> AND OR END COLLECT -%token <lineno> UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY +%token <lineno> UNTIL COLL OUTPUT REPEAT PUSH REP SINGLE FIRST LAST EMPTY %token <lineno> MOD MODLAST DEFINE TRY CATCH FINALLY IF %token <lineno> ERRTOK /* deliberately not used in grammar */ -%token <lineno> HASH_BACKSLASH HASH_SLASH DOTDOT HASH_H HASH_S HASH_R HASH_SEMI -%token <lineno> HASH_B_QUOTE HASH_N HASH_T +%token <lineno> HASH_BACKSLASH HASH_SLASH DOTDOT HASH_H HASH_S HASH_R HASH_J +%token <lineno> HASH_SEMI HASH_B_QUOTE HASH_N HASH_T %token <lineno> WORDS WSPLICE QWORDS QWSPLICE -%token <lineno> SECRET_ESCAPE_R SECRET_ESCAPE_E SECRET_ESCAPE_I +%token <lineno> SECRET_ESCAPE_R SECRET_ESCAPE_E SECRET_ESCAPE_I SECRET_ESCAPE_J %token <lineno> OLD_DOTDOT -%token <val> NUMBER METANUM +%token <val> NUMBER METANUM JSKW %token <val> HASH_N_EQUALS HASH_N_HASH -%token <chr> REGCHAR REGTOKEN LITCHAR SPLICE +%token <chr> REGCHAR REGTOKEN LITCHAR SPLICE JSPLICE OLD_AT %token <chr> CONSDOT LAMBDOT UREFDOT OREFDOT UOREFDOT %type <val> spec hash_semi_or_n_expr hash_semi_or_i_expr @@ -139,16 +137,18 @@ INLINE val expand_form_ver(val form, int ver) %type <val> cases_clause choose_clause gather_clause collect_clause until_last %type <val> collect_repeat %type <val> clause_parts additional_parts gather_parts additional_gather_parts -%type <val> output_clause define_clause try_clause catch_clauses_opt +%type <val> output_clause output_push define_clause try_clause catch_clauses_opt %type <val> if_clause elif_clauses_opt else_clause_opt %type <val> line elems_opt elems clause_parts_h additional_parts_h %type <val> text texts elem var var_op modifiers %type <val> vector hash struct range tnode tree -%type <val> exprs exprs_opt n_exprs r_exprs i_expr i_dot_expr +%type <val> json json_val json_vals json_pairs json_col +%type <val> exprs exprs_opt n_exprs listacc i_expr i_dot_expr %type <val> n_expr n_exprs_opt n_dot_expr %type <val> list dwim meta compound %type <val> out_clauses out_clauses_opt out_clause %type <val> repeat_clause repeat_parts_opt o_line +%type <val> out_if_clause out_elif_clauses_opt out_else_clause_opt %type <val> o_elems_opt o_elems o_elem o_var q_var rep_elem rep_parts_opt %type <val> regex lisp_regex regexpr regbranch %type <val> regterm regtoken regclass regclassterm regrange @@ -164,23 +164,24 @@ INLINE val expand_form_ver(val form, int ver) %right OUTPUT REPEAT REP FIRST LAST EMPTY DEFINE IF ELIF ELSE %right SPACE TEXT NUMBER METANUM HASH_N_EQUALS HASH_N_HASH HASH_B_QUOTE %nonassoc '[' ']' '(' ')' -%left '-' ',' '\'' '^' SPLICE '@' +%left '-' ',' '\'' '^' SPLICE OLD_AT %left '|' '/' %left '&' %right '~' '*' '?' '+' '%' %right DOTDOT %right '.' CONSDOT LAMBDOT UREFDOT OREFDOT UOREFDOT REGCHAR REGTOKEN LITCHAR -%right OLD_DOTDOT +%right OLD_DOTDOT '@' %% -spec : clauses_opt { parser->syntax_tree = $1; } - | SECRET_ESCAPE_R regexpr { parser->syntax_tree = $2; end_of_regex(scnr); } +spec : clauses_opt { set_syntax_tree(parser, $1); } + | SECRET_ESCAPE_R regexpr { set_syntax_tree(parser, $2); + end_of_regex(scnr); } | SECRET_ESCAPE_E hash_semi_or_n_expr - { parser->syntax_tree = $2; YYACCEPT; } + { set_syntax_tree(parser, $2); YYACCEPT; } byacc_fool { internal_error("notreached"); } | SECRET_ESCAPE_I hash_semi_or_i_expr - { parser->syntax_tree = $2; YYACCEPT; } + { set_syntax_tree(parser, $2); YYACCEPT; } byacc_fool { internal_error("notreached"); } | SECRET_ESCAPE_E { if (yychar == YYEOF) { parser->syntax_tree = nao; @@ -196,6 +197,16 @@ spec : clauses_opt { parser->syntax_tree = $1; } yybadtok(yychar, nil); parser->syntax_tree = nil; } } + | SECRET_ESCAPE_J json_val { set_syntax_tree(parser, $2); + YYACCEPT; } + byacc_fool { internal_error("notreached"); } + | SECRET_ESCAPE_J { if (yychar == YYEOF) { + parser->syntax_tree = nao; + YYACCEPT; + } else { + yybadtok(yychar, nil); + parser->syntax_tree = nil; + } } | error '\n' { parser->syntax_tree = nil; if (parser->errors >= 8) YYABORT; @@ -489,6 +500,9 @@ elem : texts { $$ = rlc(cons(text_s, $1), $1); $$ = rlc(cons(sym, expand_forms(rest($1), nil)), $1); + else if (sym == mdo_s) + { eval_intrinsic(cons(progn_s, cdr($1)), nil, nil); + $$ = cons(do_s, nil); } else { $$ = match_expand_elem($1); match_reg_elem($$); } } @@ -607,53 +621,51 @@ catch_clauses_opt : CATCH ')' newl ; -output_clause : OUTPUT ')' o_elems '\n' +output_clause : output_push ')' o_elems '\n' out_clauses END newl { $$ = nil; - yyerr("obsolete output syntax: trailing material"); } - | OUTPUT ')' newl - END newl { $$ = rl(list(output_s, nao), num($1)); } - | OUTPUT ')' newl + yyerrorf(scnr, lit("~a: traling material"), + car($1), nao); } + | output_push ')' newl + END newl { $$ = rl(list(car($1), nao), $1); } + | output_push ')' newl out_clauses - END newl { $$ = rl(list(output_s, $4, nao), num($1)); } - | OUTPUT exprs ')' newl + END newl { $$ = rl(list(car($1), $4, nao), $1); } + | output_push exprs ')' newl out_clauses - END newl { cons_bind (dest, rest, $2); - val dest_ex = expand_form_ver(dest, 166); - val args = if3(dest_ex == dest, - $2, cons(dest_ex, rest)); - $$ = list(output_s, $5, args, nao); - rl($$, num($1)); - { val into_var = second(memql(into_k, args)); - val named_var = second(memql(named_k, args)); - match_reg_var(into_var); - match_reg_var(named_var); } } - | OUTPUT exprs ')' o_elems '\n' + END newl { $$ = output_helper(parser, car($1), $2, $5); + rl($$, $1); } + | output_push exprs ')' o_elems '\n' out_clauses END newl { $$ = nil; yyerr("invalid combination of old and " - "new syntax in output directive"); } - | OUTPUT error { $$ = nil; + "new syntax in output directive"); } + | output_push error { $$ = nil; yybadtok(yychar, lit("output directive")); } - | OUTPUT ')' o_elems '\n' + | output_push ')' o_elems '\n' error { $$ = nil; yybadtok(yychar, lit("output clause")); } - | OUTPUT ')' newl + | output_push ')' newl error { $$ = nil; yybadtok(yychar, lit("output clause")); } - | OUTPUT exprs ')' o_elems '\n' + | output_push exprs ')' o_elems '\n' error { $$ = nil; yybadtok(yychar, lit("output clause")); } - | OUTPUT exprs ')' newl + | output_push exprs ')' newl error { $$ = nil; yybadtok(yychar, lit("output clause")); } ; +output_push : OUTPUT { $$ = cons(output_s, num($1)); } + | PUSH { $$ = cons(push_s, num($1)); } + ; + out_clauses : out_clause { $$ = cons($1, nil); } | out_clause out_clauses { $$ = cons($1, $2); } ; out_clause : repeat_clause { $$ = cons($1, nil); } + | out_if_clause { $$ = cons($1, nil); } | o_line { $$ = $1; } ; @@ -706,6 +718,44 @@ repeat_parts_opt : SINGLE newl | /* empty */ { $$ = nil; } ; +out_if_clause : IF n_expr ')' newl + out_clauses_opt + out_elif_clauses_opt + out_else_clause_opt + END newl { val expr = expand($2, nil); + val ifs = $5; + val branch = cons(cons(expr, ifs), nil); + val elifs = $6; + val els = $7; + $$ = cons(if_s, + nappend2(branch, nappend2(elifs, els))); + rl($$, num($1)); } + | IF ')' + { $$ = nil; + yyerr("if requires expression"); } + | IF n_expr ')' newl + error { $$ = nil; yybadtok(yychar, lit("if clause")); } + ; + +out_elif_clauses_opt : ELIF n_exprs_opt ')' newl + out_clauses_opt + out_elif_clauses_opt + { val expr = expand(car($2), nil); + val elifs = $5; + val branch = cons(cons(expr, elifs), nil); + if (null($2)) + yyerr("elif requires expression"); + else if (cdr($2)) + yyerr("extra expression in elif"); + $$ = nappend2(branch, $6); } + | { $$ = nil; } + ; + +out_else_clause_opt : ELSE newl + out_clauses_opt + { $$ = cons(cons(t, $3), nil); } + | { $$ = nil; } + ; out_clauses_opt : out_clauses { $$ = $1; } | /* empty */ { $$ = nil; } @@ -783,13 +833,16 @@ var : SYMTOK { $$ = list(var_s, symhlpr($1, nil), nao); } | var_op SYMTOK { $$ = list(var_s, symhlpr($2, nil), $1, nao); } | var_op '{' SYMTOK '}' { $$ = list(var_s, symhlpr($3, nil), $1, nao); } | var_op '{' SYMTOK regex '}' { $$ = nil; + free($3); yyerr("longest match " "not useable with regex"); } | var_op '{' SYMTOK NUMBER '}' { $$ = nil; + free($3); yyerr("longest match " "not useable with " "fixed width match"); } | SYMTOK error { $$ = nil; + free($1); yybadtok(yychar, lit("variable spec")); } | var_op error { $$ = nil; yybadtok(yychar, lit("variable spec")); } @@ -821,6 +874,7 @@ o_var : SYMTOK { val expr = symhlpr($1, nil); val quasi_items = cons(quasi_var, nil); $$ = car(expand_quasi(quasi_items, nil)); } } | SYMTOK error { $$ = nil; + free($1); yybadtok(yychar, lit("variable spec")); } ; @@ -832,7 +886,7 @@ q_var : '@' '{' n_expr n_exprs_opt '}' ; -vector : '#' list { if (parser->quasi_level > 0 && unquotes_occur($2, 0)) +vector : '#' list { if (parser->quasi_level > 0) $$ = rlc(cons(vector_lit_s, cons($2, nil)), $2); else @@ -841,7 +895,9 @@ vector : '#' list { if (parser->quasi_level > 0 && unquotes_occur( yybadtok(yychar, lit("unassigned/reserved # notation")); } ; -hash : HASH_H list { if (parser->quasi_level > 0 && unquotes_occur($2, 0)) +hash : HASH_H list { if (parser->ignore) + $$ = nil; + else if (parser->quasi_level > 0) $$ = rl(cons(hash_lit_s, $2), num($1)); else $$ = rl(hash_construct(first($2), @@ -851,9 +907,12 @@ hash : HASH_H list { if (parser->quasi_level > 0 && unquotes_occur( yybadtok(yychar, lit("hash literal")); } ; -struct : HASH_S list { if (parser->quasi_level > 0 && unquotes_occur($2, 0)) - $$ = rl(cons(struct_lit_s, $2), - num($1)); +struct : HASH_S list { if (parser->ignore) + { $$ = nil; } + else if ((parser->quasi_level > 0) || + (parser->read_unknown_structs && + !find_struct_type(first($2)))) + { $$ = rl(cons(struct_lit_s, $2), num($1)); } else { val strct = make_struct_lit(first($2), rest($2)); @@ -879,8 +938,10 @@ tnode : HASH_N list { if (gt(length($2), three)) yybadtok(yychar, lit("tree node literal")); } ; -tree : HASH_T list { if (parser->quasi_level > 0 && unquotes_occur($2, 0)) - $$ = rl(cons(tree_lit_s, $2), num($1)); +tree : HASH_T list { if (parser->ignore) + { $$ = nil; } + else if (parser->quasi_level > 0) + { $$ = rl(cons(tree_lit_s, $2), num($1)); } else { val opts = first($2); val key_fn_name = pop(&opts); @@ -890,12 +951,109 @@ tree : HASH_T list { if (parser->quasi_level > 0 && unquotes_occur($ val less_fn = fname_helper(parser, less_fn_name); val equal_fn = fname_helper(parser, equal_fn_name); val tr = tree(rest($2), key_fn, - less_fn, equal_fn); + less_fn, equal_fn, t); $$ = rl(tr, num($1)); } } | HASH_T error { $$ = nil; yybadtok(yychar, lit("tree node literal")); } ; +json : HASH_J json_val { $$ = list(json_s, quote_s, $2, nao); + end_of_json(scnr); } + | HASH_J '^' { parser->quasi_level++; } + json_val { parser->quasi_level--; + end_of_json(scnr); + $$ = list(json_s, sys_qquote_s, $4, nao); } +json_val : NUMBER { $$ = $1; } + | JSKW { $$ = $1; } + | '"' '"' { $$ = null_string; } + | '"' litchars '"' { $$ = $2; + rl($$, num(parser->lineno)); } + | '[' ']' { $$ = vector(zero, nil); } + | '[' json_vals + opt_comma ']' { $$ = if3(vectorp($2), + $2, + rl(cons(vector_lit_s, + cons(nreverse($2), nil)), + $2)); } + | '{' '}' { $$ = make_hash(hash_weak_none, t); } + | '{' json_pairs + opt_comma '}' { $$ = if3(hashp($2), + $2, + rl(cons(hash_lit_s, + cons(nil, nreverse($2))), + $2)); } + | '~' { parser->quasi_level--; } + n_dot_expr { parser->quasi_level++; + end_of_json_unquote(scnr); + $$ = rl(rlc(list(sys_unquote_s, $3, nao), $3), + num(parser->lineno)); } + | JSPLICE { parser->quasi_level--; } + n_dot_expr { parser->quasi_level++; + end_of_json_unquote(scnr); + $$ = rl(rlc(list(sys_splice_s, $3, nao), $3), + num(parser->lineno)); } + | HASH_N_EQUALS { parser_circ_def(parser, $1, unique_s); } + json_val { parser_circ_def(parser, $1, $3); + $$ = $3; } + | HASH_N_HASH { $$ = parser_circ_ref(parser, $1); } + | '"' error { $$ = nil; + yybadtok(yychar, lit("JSON string")); } + | '[' error { $$ = nil; + yybadtok(yychar, lit("JSON array")); } + | '{' error { $$ = nil; + yybadtok(yychar, lit("JSON hash")); } + ; + +opt_comma : ',' { if (!parser->read_bad_json) + yyerr("trailing comma in JSON array"); } + | + ; + +json_vals : json_val { $$ = if3(parser->quasi_level > 0, + cons($1, nil), + vector(one, $1)); } + | json_vals ',' json_val { if (consp($1)) + { $$ = cons($3, $1); } + else if (parser->quasi_level > 0) + { val li = list_vec($1); + $$ = cons($3, li); } + else + { vec_push($1, $3); + $$ = $1; } } + | json_vals json_val { yyerr("missing comma in JSON array"); + $$ = $1; } + | json_vals error { yybadtok(yychar, lit("JSON array")); + $$ = $1; } + ; + +json_pairs : json_val json_col json_val { if (parser->quasi_level > 0) + { $$ = cons(list($1, $3, nao), nil); } + else + { $$ = make_hash(hash_weak_none, t); + sethash($$, $1, $3); } } + | json_pairs ',' + json_val json_col json_val { if (consp($1)) + { $$ = cons(list($3, $5, nao), $1); } + else if (parser->quasi_level > 0) + { val pa = hash_pairs($1); + $$ = cons(list($3, $5, nao), pa); } + else + { sethash($1, $3, $5); + $$ = $1; } } + | json_val json_val { yyerr("missing colon in JSON hash"); } + | json_pairs json_val + error { yyerr("missing comma in JSON hash"); } + | json_val error { yybadtok(yychar, lit("JSON hash")); } + ; + +json_col : SYMTOK { if ($1[0] == ':' && $1[1] == 0) + { $$ = nil; } + else + { yybadtok(yychar, lit("JSON hash")); } } + | ':' { $$ = nil; } + ; + + list : '(' n_exprs ')' { $$ = rl($2, num($1)); } | '(' '.' n_exprs ')' { val a = car($3); val ur = uref_helper(parser, a); @@ -919,12 +1077,23 @@ meta : '@' n_expr { if (consp($2)) yybadtok(yychar, lit("meta expression")); } ; +meta : OLD_AT n_expr { if (consp($2)) + $$ = rl(cons(expr_s, cons($2, nil)), num($1)); + else + $$ = rl(cons(var_s, cons($2, nil)), + num($1)); } + | OLD_AT error { $$ = nil; + yybadtok(yychar, lit("meta expression")); } + ; + dwim : '[' '.' n_exprs ']' { val a = car($3); val ur = uref_helper(parser, a); $$ = rlcp_tree(cons(dwim_s, cons(ur, cdr($3))), ur); } | '[' n_exprs ']' { $$ = rl(cons(dwim_s, $2), num($1)); } | '[' ']' { $$ = rl(cons(dwim_s, nil), num($1)); } + | '[' LAMBDOT n_expr ']' { $$ = rl(cons(dwim_s, $3), num($1)); } + | '[' CONSDOT n_expr ']' { $$ = rl(cons(dwim_s, $3), num($1)); } | '[' error { $$ = nil; yybadtok(yychar, lit("DWIM expression")); } ; @@ -941,54 +1110,47 @@ exprs_opt : exprs { $$ = $1; } | /* empty */ { $$ = nil; } ; -n_exprs : r_exprs { val term_atom = pop(&$1); - val tail_cons = $1; - $$ = us_nreverse($1); - if (term_atom != unique_s) - rplacd(tail_cons, term_atom); } +n_exprs : listacc { $$ = $1->c.cdr; + $1->c.cdr = nil; + if ($$->c.car == nao) + $$ = $$->c.cdr; } + | listacc CONSDOT n_expr + { $$ = $1->c.cdr; + $1->c.cdr = $3; } ; -r_exprs : n_expr { val exprs = cons($1, nil); - rlc(exprs, $1); - $$ = rlc(cons(unique_s, exprs), exprs); } +listacc : n_expr { $$ = cons($1, nil); + rlc($$, $1); + $$->c.cdr = $$; } | HASH_SEMI { parser->ignore = 1; } n_expr { parser->ignore = 0; - $$ = cons(unique_s, nil); } - | r_exprs HASH_SEMI { parser->ignore = 1; } + $$ = cons(nao, nil); + $$->c.cdr = $$; } + | HASH_SEMI '.' { parser->ignore = 1; } + n_expr { parser->ignore = 0; + $$ = cons(nao, nil); + $$->c.cdr = $$; } + | listacc HASH_SEMI { parser->ignore = 1; } n_expr { parser->ignore = 0; $$ = $1; } - | r_exprs n_expr { uses_or2; - val term_atom_cons = $1; - val exprs = cdr($1); - misplaced_consing_dot_check(scnr, term_atom_cons); - rplacd(term_atom_cons, - rlc(cons($2, exprs), or2($2, exprs))); - $$ = term_atom_cons; } - | r_exprs CONSDOT n_expr - { val term_atom_cons = $1; - misplaced_consing_dot_check(scnr, term_atom_cons); - rplaca(term_atom_cons, $3); + | listacc HASH_SEMI '.' { parser->ignore = 1; } + n_expr { parser->ignore = 0; $$ = $1; } - | WSPLICE wordslit { $$ = cons(unique_s, us_nreverse(rl($2, num($1)))); - rlc($$, cdr($$)); } - | r_exprs WSPLICE - wordslit { val term_atom_cons = $1; - val exprs = cdr($1); - misplaced_consing_dot_check(scnr, term_atom_cons); - rplacd(term_atom_cons, - nappend2(rl(us_nreverse($3), num($2)), - exprs)); - $$ = term_atom_cons; } - | QWSPLICE wordsqlit { $$ = cons(unique_s, rl($2, num($1))); - rlc($$, cdr($$)); } - | r_exprs QWSPLICE - wordsqlit { val term_atom_cons = $1; - val exprs = cdr($1); - misplaced_consing_dot_check(scnr, term_atom_cons); - rplacd(term_atom_cons, - nappend2(rl(us_nreverse($3), num($2)), - exprs)); - $$ = term_atom_cons; } + | listacc n_expr { uses_or2; + $$ = rlc(cons($2, $1->c.cdr), or2($2, $1->c.cdr)); + $1->c.cdr = $$; } + | WSPLICE wordslit { $$ = lastcons(rl($2, num($1))); + $$->c.cdr = $2; } + | listacc WSPLICE + wordslit { $$ = lastcons(rl($3, num($2))); + $$->c.cdr = $1->c.cdr; + $1->c.cdr = $3; } + | QWSPLICE wordsqlit { $$ = lastcons(rl($2, num($1))); + $$->c.cdr = $2; } + | listacc QWSPLICE + wordsqlit { $$ = lastcons(rl($3, num($2))); + $$->c.cdr = $1->c.cdr; + $1->c.cdr = $3; } ; i_expr : SYMTOK { $$ = ifnign(symhlpr($1, t)); } @@ -1002,6 +1164,7 @@ i_expr : SYMTOK { $$ = ifnign(symhlpr($1, t)); } | range { $$ = $1; } | tnode { $$ = $1; } | tree { $$ = $1; } + | json { $$ = $1; } | lisp_regex { $$ = $1; } | chrlit { $$ = $1; } | strlit { $$ = $1; } @@ -1043,6 +1206,7 @@ n_expr : SYMTOK { $$ = ifnign(symhlpr($1, t)); } | range { $$ = $1; } | tnode { $$ = $1; } | tree { $$ = $1; } + | json { $$ = $1; } | lisp_regex { $$ = $1; } | chrlit { $$ = $1; } | strlit { $$ = $1; } @@ -1155,6 +1319,7 @@ regterm : regterm '*' { $$ = list(zeroplus_s, $1, nao); } | '-' { $$ = chr('-'); } | REGCHAR { $$ = chr($1); } | regtoken { $$ = $1; } + | TEXT { $$ = list(compound_s, string_own($1), nao); } | '(' regexpr ')' { $$ = $2; } | '(' error { $$ = nil; yybadtok(yychar, lit("regex subexpression")); } @@ -1213,7 +1378,7 @@ strlit : '"' '"' { $$ = null_string; } chrlit : HASH_BACKSLASH SYMTOK { wchar_t ch; val str = string_own($2); - const wchar_t *cstr = c_str(str); + const wchar_t *cstr = c_str(str, nil); if (cstr[1] == 0) { ch = cstr[0]; } @@ -1226,6 +1391,10 @@ chrlit : HASH_BACKSLASH SYMTOK { wchar_t ch; $$ = chr(ch); } | HASH_BACKSLASH LITCHAR { $$ = chr($2); end_of_char(scnr); } + | HASH_BACKSLASH TEXT { free($2); + yyerrorf(scnr, + lit("invalid UTF-8 used as character name"), + nao); } | HASH_BACKSLASH error { $$ = nil; yybadtok(yychar, lit("character literal")); } @@ -1246,7 +1415,6 @@ quasi_items : quasi_item { $$ = cons($1, nil); ; quasi_item : litchars { $$ = $1; } - | TEXT { $$ = string_own($1); } | q_var { $$ = $1; } | METANUM { $$ = cons(var_s, cons($1, nil)); rl($$, num(parser->lineno)); } @@ -1259,11 +1427,15 @@ quasi_item : litchars { $$ = $1; } litchars : LITCHAR { $$ = mkstring(one, chr($1)); } | LITCHAR restlitchar { val ch = mkstring(one, chr($1)); - $$ = string_extend(ch, $2); } + $$ = string_extend(ch, $2, t); } + | TEXT { $$ = string_own($1); } + | TEXT restlitchar { $$ = string_extend(string_own($1), $2, t); } ; restlitchar : LITCHAR { $$ = mkstring(one, chr($1)); } - | restlitchar LITCHAR { $$ = string_extend($1, chr($2)); } + | restlitchar LITCHAR { $$ = string_extend($1, chr($2), nil); } + | TEXT { $$ = string_own($1); } + | restlitchar TEXT { $$ = string_extend($1, string_own($2), nil); } ; wordslit : '"' { $$ = nil; } @@ -1317,8 +1489,6 @@ not_a_clause : ALL { $$ = mkexp(all_s, nil, num(parser->lineno)); } | OR { $$ = mkexp(or_s, nil, num(parser->lineno)); } | TRY { $$ = mkexp(try_s, nil, num(parser->lineno)); } | FINALLY { $$ = mkexp(finally_s, nil, num(parser->lineno)); } - | ELSE { $$ = mkexp(else_s, nil, num(parser->lineno)); } - | ELIF { $$ = mkexp(elif_s, nil, num(parser->lineno)); } | BLOCK exprs_opt ')' { $$ = mkexp(block_s, $2, nil); } | CHOOSE @@ -1334,10 +1504,14 @@ not_a_clause : ALL { $$ = mkexp(all_s, nil, num(parser->lineno)); } | CATCH exprs_opt ')' { $$ = mkexp(catch_s, $2, nil); } | IF - exprs_opt ')' { $$ = mkexp(if_s, $2, nil); } + n_expr n_expr exprs_opt ')' { $$ = mkexp(if_s, + cons($2, + cons($3, $4)), + nil); } | OUTPUT exprs_opt ')' { yyerr("@(output) doesn't nest"); } - + | PUSH + exprs_opt ')' { yyerr("@(push) doesn't nest"); } ; %% @@ -1350,9 +1524,19 @@ void yydebug_onoff(int val) { #if YYDEBUG yydebug = val; +#else + (void) val; #endif } +static void set_syntax_tree(parser_t *parser, val tree) +{ + if (tree == nao) + parser->syntax_tree = tree; + else + set(mkloc(parser->syntax_tree, parser->parser), tree); +} + static val sym_helper(parser_t *parser, wchar_t *lexeme, val meta_allowed) { scanner_t *scnr = parser->scanner; @@ -1450,15 +1634,70 @@ static val expand_repeat_rep_args(val args) } } else if (exp_pair) { match_reg_var(arg); + ptail = list_collect(ptail, arg); } exp_pair = exp_pairs = nil; - ptail = list_collect(ptail, arg); } return out; } +static val extract_vars(val output_spec) +{ + list_collect_decl (vars, tai); + + if (consp(output_spec)) { + val sym = first(output_spec); + if (sym == var_s) { + val expr = second(output_spec); + val modifiers = third(output_spec); + + if (bindable(expr)) { + tai = list_collect(tai, expr); + } else if (opt_compat && opt_compat <= 128) { + tai = list_collect_nconc(tai, extract_vars(expr)); + } else { + val frefs = expand_with_free_refs(expr, nil, nil); + tai = list_collect_nconc(tai, second(frefs)); + } + + for (; modifiers; modifiers = cdr(modifiers)) { + val mod = car(modifiers); + if (bindable(mod)) { + tai = list_collect(tai, mod); + } else if (consp(mod)) { + val msym = car(mod); + + if (msym == dwim_s) { + val arg = second(mod); + + if (bindable(arg)) { + tai = list_collect(tai, arg); + } else if (consp(arg) && car(arg) == rcons_s) { + val f = second(arg); + val t = third(arg); + if (bindable(f)) + tai = list_collect(tai, f); + if (bindable(t)) + tai = list_collect(tai, t); + } + } + } + } + } else if (sym == expr_s) { + val expr = second(output_spec); + val frefs = expand_with_free_refs(expr, nil, nil); + tai = list_collect_nconc(tai, second(frefs)); + } else { + for (; output_spec; output_spec = cdr(output_spec)) + tai = list_collect_nconc(tai, extract_vars(car(output_spec))); + } + } + + return vars; +} + static val repeat_rep_helper(val sym, val args, val main, val parts) { uses_or2; @@ -1469,6 +1708,7 @@ static val repeat_rep_helper(val sym, val args, val main, val parts) val empty_parts = nil, empty_parts_p = nil; val mod_parts = nil, mod_parts_p = nil; val modlast_parts = nil, modlast_parts_p = nil; + val occur_vars = nil; val iter; for (iter = parts; iter != nil; iter = cdr(iter)) { @@ -1506,9 +1746,17 @@ static val repeat_rep_helper(val sym, val args, val main, val parts) mod_parts = or2(nreverse(mod_parts), mod_parts_p); modlast_parts = or2(nreverse(modlast_parts), modlast_parts_p); + occur_vars = extract_vars(main); + occur_vars = nappend2(occur_vars, extract_vars(single_parts)); + occur_vars = nappend2(occur_vars, extract_vars(first_parts)); + occur_vars = nappend2(occur_vars, extract_vars(last_parts)); + occur_vars = nappend2(occur_vars, extract_vars(empty_parts)); + occur_vars = nappend2(occur_vars, extract_vars(mod_parts)); + occur_vars = uniq(occur_vars); + return list(sym, exp_args, main, single_parts, first_parts, last_parts, empty_parts, nreverse(mod_parts), - nreverse(modlast_parts), nao); + nreverse(modlast_parts), occur_vars, nao); } static void process_catch_exprs(val exprs) @@ -1570,23 +1818,6 @@ static val optimize_text(val text_form) return text_form; } -static val unquotes_occur(val quoted_form, int level) -{ - uses_or2; - - if (atom(quoted_form)) { - return nil; - } else { - val sym = car(quoted_form); - if (sym == sys_unquote_s || sym == sys_splice_s) - return (level == 0) ? t : unquotes_occur(cdr(quoted_form), level - 1); - if (sym == sys_qquote_s) - return unquotes_occur(cdr(quoted_form), level + 1); - return or2(unquotes_occur(sym, level), - unquotes_occur(cdr(quoted_form), level)); - } -} - val expand_meta(val form, val menv) { val sym; @@ -1766,12 +1997,8 @@ static val check_parse_time_action(val spec_rev) if (sym == include_s) { return nappend2(nreverse(include(line)), rest(spec_rev)); } - if (sym == mdo_s) { - eval_intrinsic(cons(progn_s, cdr(elem)), nil); - return nil; - } if (sym == in_package_s) { - eval_intrinsic(elem, nil); + eval_intrinsic(elem, nil, nil); return nil; } } @@ -1779,14 +2006,6 @@ static val check_parse_time_action(val spec_rev) return spec_rev; } -static void misplaced_consing_dot_check(scanner_t *scanner, val term_atom_cons) -{ - if (car(term_atom_cons) != unique_s) { - yyerrorf(scanner, lit("misplaced consing dot"), nao); - rplaca(term_atom_cons, unique_s); - } -} - static val uref_helper(parser_t *parser, val expr) { if (consp(expr) && car(expr) == qref_s) { @@ -1838,6 +2057,52 @@ static val fname_helper(parser_t *parser, val name) return nil; } +static val output_helper(parser_t *parser, val sym, val exprs, val clauses) +{ + cons_bind (dest, rest, exprs); + + val dest_ex = expand_form_ver(dest, 166); + val args = if3(dest_ex == dest, exprs, cons(dest_ex, rest)); + val args_kw = keywordp(car(args)); + val alist = improper_plist_to_alist(if3(args_kw, args, cdr(args)), + v_output_keys); + + if (!args_kw && sym == push_s) + { + yyerrorf(parser->scanner, lit("~s: doesn't support destination argument"), + sym, nao); + } + + + while (alist) { + val key = car(pop(&alist)); + + if (key == filter_k) + continue; + + if (sym != push_s) { + if (key == nothrow_k || key == append_k || + key == named_k || key == continue_k || + key == finish_k || key == into_k) + { + continue; + } + } + + yyerrorf(parser->scanner, lit("~s: unsupported keyword ~s"), + sym, key, nao); + } + + if (sym != push_s) { + val into_var = second(memql(into_k, args)); + val named_var = second(memql(named_k, args)); + match_reg_var(into_var); + match_reg_var(named_var); + } + + return list(sym, clauses, args, nao); +} + #ifndef YYEOF #define YYEOF 0 #endif @@ -1859,7 +2124,9 @@ void yybadtoken(parser_t *parser, int tok, val context) case NONE: problem = lit("\"none\""); break; case MAYBE: problem = lit("\"maybe\""); break; case CASES: problem = lit("\"cases\""); break; + case BLOCK: problem = lit("\"block\""); break; case CHOOSE: problem = lit("\"choose\""); break; + case GATHER: problem = lit("\"gather\""); break; case AND: problem = lit("\"and\""); break; case OR: problem = lit("\"or\""); break; case END: problem = lit("\"end\""); break; @@ -1873,6 +2140,8 @@ void yybadtoken(parser_t *parser, int tok, val context) case FIRST: problem = lit("\"first\""); break; case LAST: problem = lit("\"last\""); break; case EMPTY: problem = lit("\"empty\""); break; + case MOD: problem = lit("\"mod\""); break; + case MODLAST: problem = lit("\"modlast\""); break; case DEFINE: problem = lit("\"define\""); break; case TRY: problem = lit("\"try\""); break; case CATCH: problem = lit("\"catch\""); break; @@ -1881,9 +2150,12 @@ void yybadtoken(parser_t *parser, int tok, val context) case ELIF: problem = lit("\"elif\""); break; case ELSE: problem = lit("\"else\""); break; case NUMBER: problem = lit("number"); break; + case JSKW: problem = lit("JSON keyword"); break; case REGCHAR: problem = lit("regular expression character"); break; case REGTOKEN: problem = lit("regular expression token"); break; case LITCHAR: problem = lit("string literal character"); break; + case SPLICE: problem = lit("*"); break; + case JSPLICE: problem = lit("~*"); break; case CONSDOT: case LAMBDOT: problem = lit("consing dot"); break; case DOTDOT: problem = lit(".."); break; @@ -1898,6 +2170,7 @@ void yybadtoken(parser_t *parser, int tok, val context) case HASH_R: problem = lit("#R"); break; case HASH_N: problem = lit("#N"); break; case HASH_T: problem = lit("#T"); break; + case HASH_J: problem = lit("#J"); break; case HASH_SEMI: problem = lit("#;"); break; case HASH_N_EQUALS: problem = lit("#<n>="); break; case HASH_N_HASH: problem = lit("#<n>#"); break; @@ -1906,6 +2179,7 @@ void yybadtoken(parser_t *parser, int tok, val context) case WSPLICE: problem = lit("#*\""); break; case QWORDS: problem = lit("#`"); break; case QWSPLICE: problem = lit("#*`"); break; + case OLD_AT: problem = lit("@"); break; } if (problem != 0) @@ -1999,8 +2273,11 @@ int parse(parser_t *parser, val name, enum prime_parser prim) if (parser->errors && parser->syntax_tree == nil && parser->lineno != start_line) { - yyerrorf(parser->scanner, lit("while parsing form starting at line ~a"), - num(start_line), nao); + cnum curline = parser->lineno; + parser->lineno = start_line; + yyerrorf(parser->scanner, + lit("while parsing expression starting on this line"), nao); + parser->lineno = curline; } return res; |