summaryrefslogtreecommitdiffstats
path: root/parser.l
Commit message (Collapse)AuthorAgeFilesLines
* c_num: now takes self argument.Kaz Kylheku2020-06-291-2/+3
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The c_num and c_unum functions now take a self argument for identifying the calling function. This requires changes in a large number of places. In a few places, additional functions acquire a self argument. The ffi module has the most extensive example of this. Some functions mention their name in a larger string, or have scattered literals giving their name; with the introduction of the self local variable, these are replaced by references to self. In the following changelog, the notation TS stands for "take self argument", meaning that the functions acquires a new "val self" argument. The notation DS means "define self": the functions in question defines a self variable, which they pass down. The notation PS means that the functions pass down an existing self variable to functions that now require it. * args.h (args_count): TS. * arith.c (c_unum, c_num): TS. (toint, exptv): DS. * buf.c (buf_check_len, buf_check_alloc_size, buf_check_index, buf_do_set_len, replace_buf, buf_put_buf, buf_put_i8, buf_put_u8, buf_put_char, buf_put_uchar, buf_get_bytes, buf_get_i8, buf_get_u8, buf_get_cptr, buf_strm_get_byte_callback, buf_strm_unget_byte, buf_swap32, str_buf, buf_int, buf_uint, int_buf, uint_buf): PS. (make_duplicate_buf, buf_shrink, sub_buf, buf_print, buf_pprint): DS. * chskum.c (sha256_stream_impl, sha256_buf, crc32_buf, md5_stream_impl, md5_buf): TS. (chksum_ensure_buf, sha256_stream, sha256, sha256_hash, md5_stream, md5, md5_hash): PS. (crc32_stream): DS. * combi.c (perm_while_fun, perm_gen_fun_common, perm_str_gen_fun, rperm_gen_fun, comb_vec_gen_fun, comb_str_gen_fun, rcomb_vec_gen_fun, rcomb_str_gen_fun): DS. * diff.c (dbg_clear, dbg_set, dbg_restore): DS. * eval.c (do_eval, gather_free_refs, maprodv, maprendv, maprodo, do_args_apf, do_args_ipf): DS. (op_dwim, me_op, map_common): PS. (prod_common): TS. * ffi.c (struct txr_ffi_type): release member TS. (make_ffi_type_pointer): PS and release argument TS. (ffi_varray_dynsize, ffi_array_in, ffi_array_put_common, ffi_array_get_common, ffi_varray_in, ffi_varray_null_term): PS. (ffi_simple_release, ffi_ptr_in_release, ffi_struct_release, ffi_wchar_array_get, ffi_array_release_common, ffi_array_release, ffi_varray_release): TS. (ffi_float_put, double_put, ffi_be_i16_put, ffi_be_u16_put, ffi_le_i16_put, ffi_le_u16_put, ffi_be_i32_put, ffi_be_u32_put, ffi_le_i32_put, ffi_sbit_put, ffi_ubit_put, ffi_buf_d_put, make_ffi_type_array, make_ffi_type_enum, ffi_type_compile, make_ffi_type_desc, ffi_make_call_desc, ffi_call_wrap, ffi_closure_dispatch_save, ffi_put_into, ffi_in, ffi_get, ffi_put, carray_set_length, carray_blank, carray_buf, carray_buf_sync, carray_cptr, carray_refset, carray_sub, carray_replace, carray_uint, carray_int): PS. (carray_vec, carray_list): DS. * filter.c (url_encode, url_decode, base64_stream_enc_impl): DS. * ftw.c (ftw_callback, ftw_wrap): DS. * gc.c (mark_obj, gc_set_delta): DS. * glob.c (glob_wrap): DS. * hash.c (equal_hash, eql_hash, eq_hash, do_make_hash, hash_equal, set_hash_traversal_limit, gen_hash_seed): DS. * itypes.c (c_i8, c_u8, c_i16, c_u16, c_i32, c_u32, c_i64, c_u64, c_short, c_ushort, c_int, c_uint, c_long, c_ulong): PS. * lib.c (seq_iter_rewind): TS and becomes internal. (seq_iter_init_with_info, seq_setpos, replace_str, less, replace_vec, diff, isec, obj_print_impl): PS. (nthcdr, equal, mkstring, mkustring, upcase_str, downcase_str, search_str, sub_str, cat_str, scat2, scat3, fmt_join, split_str_keep, split_str_set, trim_str, int_str, chr_int, chr_str, chr_str_set, vector, vecref, vecref_l, list_vec, copy_vec, sub_vec, cat_vec, lazy_str_put, lazy_str_gt, length_str_ge, length_str_lt, length_str_le, cptr_size_hint, cptr_int, out_lazy_str, out_quasi_str, time_string_local_time, time_string_utc, time_fields_local_time, time_fields_utc, time_struct_local, time_struct_utc, make_time, time_meth, time_parse_meth): DS. (init_str, cat_str_init, cat_str_measure, cat_str_append, vscat, time_fields_to_tm, time_struct_to_tm, make_time_impl): TS. * lib.h (seq_iter_rewind): Declaration removed. (c_num, c_unum, init_str): Declarations updated. * match.c (LOG_MISMATCH, LOG_MATCH): PS. (h_skip, h_coll, do_output_line, do_output, v_skip, v_fuzz, v_collect): DS. * parser.c (parser, circ_backpatch, report_security_problem, hist_save, repl, lino_fileno, lino_getch, lineno_getl, lineno_gets, lineno_open): DS. (parser_set_lineno, lisp_parse_impl): PS. * parser.l (YY_INPUT): PS. * rand.c (make_random_state): PS. * regex.c (print_rec): DS. (search_regex): PS. * signal.c (kill_wrap, raise_wrap, get_sig_handler, getitimer_wrap, setitimer_wrap): DS. * socket.c (addrinfo_in, sockaddr_pack, fd_timeout, to_connect, open_sockfd, sock_mark_connected, sock_timeout): TS. (getaddrinfo_wrap, dgram_set_sock_peer, sock_bind, sock_connect, sock_listen, sock_accept, sock_shutdown, sock_send_timeout, sock_recv_timeout, socketpair_wrap): DS. * stream.c (generic_fill_buf, errno_to_string, stdio_truncate, string_out_put_string, open_fileno, open_command, base_name, dir-name): DS. (unget_byte, put_buf, fill_buf, fill_buf_adjust, get_line_as_buf, formatv, put_byte, test_set_indent_mode, test_neq_set_indent_mode, set_indent_mode, set_indent, inc_indent, set_max_length, set_max_depth, open_subprocess, run ): PS. (fds_subst, fds_swizzle): TS. * struct.c (make_struct_type, super, umethod_args_fun): PS. (method_args_fun): DS. * strudel.c (strudel_put_buf, strudel_fill_buf): DS. * sysif.c (errno_wrap, exit_wrap, usleep_wrap, mkdir_wrap, ensure_dir, makedev_wrap, minor_wrap, major_wrap, mknod_wrap, mkfifo_wrap, wait_wrap, wifexited, wexitstatus, wifsignaled, wtermsig, wcoredump, wifstopped, wstopsig, wifcontinued, dup_wrap, close_wrap, exit_star_wrap, umask_wrap, setuid_wrap, seteuid_wrap, setgid_wrap, setegid_wrap, simulate_setuid_setgid, getpwuid_wrap, fnmatch_wrap, dlopen_wrap): DS. (chmod_wrap, do_chown, flock_pack, do_utimes, poll_wrap, setgroups_wrap, setresuid_wrap, setresgid_wrap, getgrgid_wrap): PS. (c_time): TS. * sysif.h (c_time): Declaration updated. * syslog.c (openlog_wrap, syslog_wrap): DS. * termios.c (termios_pack): TS. (tcgetattr_wrap, tcsetattr_wrap, tcsendbreak_wrap, tcdrain_wrap, tcflush_wrap, tcflow_rap, encode_speeds, decode_speeds): DS. * txr.c (compato, array_dim, gc_delta): DS. * unwind.c (uw_find_frames_by_mask): DS. * vm.c (vm_make_desc): PS. (vm_make_closure, vm_swtch): DS.
* Remove unnecessary #include directives.Kaz Kylheku2020-04-221-6/+0
| | | | | | | | | | Time for some spring cleaning. * args.c, arith.c, buf.c, cadr.c, chksum.c, debug.c, ftw.c, gc.c, gencadr.txr, glob.c, hash.c, lisplib.c, match.c, parser.c, parser.l, parser.y, rand.c, signal.c, stream.c, strudel.c, syslog.c, tree.c, unwind.c, utf8.c, vm.c: Numerous unnecessary #include directives removed.
* parser: C++ cast fix.Kaz Kylheku2020-04-221-1/+1
| | | | | * parser.l (YY_INPUT): Must use coerce because we are changing from char * to unsigned char *.
* parser: batched read operation for speed.Kaz Kylheku2020-04-221-7/+5
| | | | | | | | | | | | | | | | As a result of this change, the startup time is reduced. The command txr -e '(compile-toplevel nil)' shows a 54% speedup: around 110 milliseconds down from around 170. Programs that read large amounts of TXR Lisp data will benefit. * parser.l (YY_INPUT): Use new get_bytes function instead of get_byte to read a buffer at a time. * stream.c (get_bytes): New function. * stream.h (get_bytes): Declared.
* Copyright year bump 2020.Kaz Kylheku2019-12-311-1/+1
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | * LICENSE, LICENSE-CYG, METALICENSE, Makefile, alloca.h, args.c, args.h, arith.c, arith.h, buf.c, buf.h, cadr.c, cadr.h, chksum.c, chksum.h, chksums/crc32.c, chksums/crc32.h, combi.c, combi.h, configure, debug.c, debug.h, eval.c, eval.h, ffi.c, ffi.h, filter.c, filter.h, ftw.c, ftw.h, gc.c, gc.h, glob.c, glob.h, hash.c, hash.h, itypes.c, itypes.h, jmp.S, lib.c, lib.h, linenoise/linenoise.c, linenoise/linenoise.h, lisplib.c, lisplib.h, match.c, match.h, parser.c, parser.h, parser.l, parser.y, protsym.c, rand.c, rand.h, regex.c, regex.h, share/txr/stdlib/asm.tl, share/txr/stdlib/awk.tl, share/txr/stdlib/build.tl, share/txr/stdlib/cadr.tl, share/txr/stdlib/compiler.tl, share/txr/stdlib/conv.tl, share/txr/stdlib/debugger.tl, share/txr/stdlib/defset.tl, share/txr/stdlib/doloop.tl, share/txr/stdlib/error.tl, share/txr/stdlib/except.tl, share/txr/stdlib/ffi.tl, share/txr/stdlib/getopts.tl, share/txr/stdlib/getput.tl, share/txr/stdlib/hash.tl, share/txr/stdlib/ifa.tl, share/txr/stdlib/keyparams.tl, share/txr/stdlib/op.tl, share/txr/stdlib/package.tl, share/txr/stdlib/param.tl, share/txr/stdlib/path-test.tl, share/txr/stdlib/place.tl, share/txr/stdlib/pmac.tl, share/txr/stdlib/save-exe.tl, share/txr/stdlib/socket.tl, share/txr/stdlib/stream-wrap.tl, share/txr/stdlib/struct.tl, share/txr/stdlib/tagbody.tl, share/txr/stdlib/termios.tl, share/txr/stdlib/trace.tl, share/txr/stdlib/txr-case.tl, share/txr/stdlib/type.tl, share/txr/stdlib/vm-param.tl, share/txr/stdlib/with-resources.tl, share/txr/stdlib/with-stream.tl, share/txr/stdlib/yield.tl, signal.c, signal.h, socket.c, socket.h, stream.c, stream.h, struct.c, struct.h, strudel.c, strudel.h, sysif.c, sysif.h, syslog.c, syslog.h, termios.c, termios.h, tree.c, tree.h, txr.1, txr.c, txr.h, unwind.c, unwind.h, utf8.c, utf8.h, vm.c, vm.h, vmop.h, win/cleansvg.txr: Extended copyright notices to 2020.
* syntax: new .? operator for null-safe object access.Kaz Kylheku2019-11-051-0/+10
| | | | | | | | | | | | | | | | | | | | | | | | | | | * lib.c (obj_print_impl): Render the new syntactic conventions introduced in qref/uref back into the .? syntax. The printers for qref and uref are united into a single implementation to reduce code proliferation. * parser.l (grammar): Produce new tokens OREFDOT and UOREFDOT. * parser.y (OREFDOT, UREFDOT): New terminal symbols. (n_expr): Handle .? syntax via the new OREFDOT and UOREFDOT token via qref_helper and uoref_helper. Logic for the existing referencing dot is moved into the new qref_helper function. (n_dot_expr): Handle .? syntax via uoref_helper. (uoref_helper, qref_helper): New static functions. * share/txr/stdlib/struct.tl (qref): Handle the new case when the expression which gives the object is (t expr). Handle the new case when the first argument after the object has this form, and is followed by more arguments. Both these cases emit the right conditional code. (uref): Handle the leading .? syntax indicated by a leading t by generating a lambda which checks its argument for nil. Transformations to qref handle the other cases. * txr.1: Documentation updated in several places.
* parser: use eq-based hash for source location info.Kaz Kylheku2019-10-111-1/+1
| | | | | * parser.l (parser_l_init): Assign an eq-based hash to form_to_ln_hash.
* New data structure: binary search trees.Kaz Kylheku2019-09-251-0/+5
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Adding binary search trees based on the new tnode cell. The scapegoat algorithm is used, which requires no additional storage in a cell. In the future we may go to something else, like red-black trees, and carve out a bit in the tag field of the cell for the red/black color. Tree cells store only single key objects, not key/value pairs. However, which part of the key object is compared is determined by a custom key function stored in the tree container. For instance, tree nodes can be cons cells, and car can be used as the key function; the cdr then stores an associated value. Trees have a printed notation #T(<props> <key>*) where <props> is a list of up to three items: <props> ::= ([<key-fn> [<less-fn> [<equal-fn>]]]) key-fn, less-fn and equal-fn are function names. If they are missing or nil, they default, respectively, to identity, less and equal. For security, the printed notation is machine-readable only if these options are symbols, not lambda expressions. Furthermore, the symbols must be listed in the special variable *tree-fun-whitelist*. * eval.c (less_s): New symbol variable. (eval_init): Initialize less_s. * eval.h (less_s): Declard. * parser.h (grammar): New #T token recognized, mapped to HASH_T. * parser.y (HASH_T): New terminal symbol. (tree): New non-terminal symbol. (i_expr, n_expr): Add tree to productions. (fname_helper): New static function. (yybadtoken): Map HASH_T to "#T". * protsym.c: Tweaked accidentally; remove. * tree.c (TREE_DEPTH_MAX): New macro. (struct tree): New struct type. (enum tree_iter_state): New enumeration. (struct tree_iter): New struct type. (tree_iter_init): New macro. (tree_s, tree_fun_whitelist_s): New symbol variables. (tn_size, tn_size_one_child, tn_lookup, tn_find_next, tn_flatten, tn_build_tree, tr_rebuild, tr_find_rebuild_scapegoat, tr_insert, tr_lookup, tr_do_delete, tr_delete, tree_insert_node, tree_insert, tree_lookup_node, tree_lookup, tree_delete, tree_root, tree_equal_op, tree_print_op, tree_mark, tree_hash_op): New static functions. (tree_ops): New static struct. (tree): New function. (tree_init): Initialize tree_s and tree_fun_whitelist_s symbol variables. Register intrinsic functions tree, tree-insert-node, tree-insert, tree-lookup-node, tree-lookup, tree-delete, tree-root. Register special variable *tree-fun-whitelist*. * tree.h (tree_s, tree_fun_whitelist_s, tree): Declared. (tree_fun_whitelist): New macro.
* New data type: tnode.Kaz Kylheku2019-09-221-0/+5
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Binary search tree nodes are being added as a basic heap data type. The C type tag is TNOD, and the Lisp type is tnode. Binary search tree nodes have three elements: a key, a left child and a right child. The printed notation is #N(key left right). Quasiquoting is supported: ^#N(,foo ,bar) but not splicing. Because tnodes have three elements, they they fit into TXR's four-word heap cell, not requiring any additional memory allocation. These nodes are going to be the basis for a binary search tree container, which will use the scapegoat tree algorithm for maintaining balance. * tree.c, tree.h: New files. * Makefile (OBJS): Adding tree.o. * eval.c (expand_qquote_rec): Recurse through tnode cells, so unquotes work inside #N syntax. * gc.c (finalize): Add TNOD to no-op case in switch; tnodes don't require finalization. (mark_obj): Traverse tnode cell. * hash.c (equal_hash): Add TNOD case. * lib.c (tnode_s): New symbol variable. (seq_kind_tab): New entry for TNOD, mapping to SEQ_NOTSEQ. (code2type, equal): Handle TNOD. (obj_init): Initialize tnode_s variable. (obj_print_impl, populate_obj_hash): Handle TNOD. (init): Call tree_init function in tree.c. * lib.h (enum type, type_t): New enumeration TNOD. (struct tnod): New struct type. (union obj, obj_t): New union member tn of type struct tnod. (tnode_s): Declard. * parserc.c (circ_backpatch): Handle TNOD, so circular notation works through tnode cells. * parser.l (grammar): Recognize #N prefix, mapping to HASH_N token. * parser.y (HASH_N): New grammar terminal symbol. (tnode): New nonterminal symbol. (i_expr, n_expr): Add tnode cases to productions. (yybadtoken): Map HASH_N to "#N" string.
* lexer: no leading 0 in 2-digit hex chars in diagnostic.Kaz Kylheku2019-08-201-3/+3
| | | | | | | * parser.l (grammar): Fix incorrect format strings that are supposed to show an invalid input character in hex. For instance the character 1 comes out as #\x 1 rather than #\x01.
* parser: catch out-of-range floats.Kaz Kylheku2019-03-121-3/+15
| | | | | | | | | | * parser.l (out_of_range_float): New static function. (grammar): Check for flo_str returning nil in several places; that value is returned for out of range floats. This is not documented! * txr.1: Document athat flo-str returns nil for out-of-range floats.
* parser: security: UTF-8 and NUL handling in literals.Kaz Kylheku2019-02-051-6/+14
| | | | | | | | | | | | | | | | | A null byte in regex and string literals is being processed as a #\nul instead of correctly turning into #\pnul. Bad UTF-8 is not being rejected. * parser.l (REGCHAR, LITCHAR): Use utf8_from_buffer to properly convert yytext using its true length, rather than utf8_from which assumes a null-terminated string. Thus null bytes (including the case of a yytext being single NUL) are handled properly. Check that the result is exactly one character (null-terminated buffer, two characters wide). * utf8.c (utf8_from): Unused function removed. * utf8.h (utf8_from): Declaration removed.
* Copyright year bump 2019.Kaz Kylheku2019-01-161-1/+1
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | * LICENSE, LICENSE-CYG, METALICENSE, Makefile, args.c, args.h, arith.c, arith.h, buf.c, buf.h, cadr.c, cadr.h, combi.c, combi.h, configure, debug.c, debug.h, eval.c, eval.h, ffi.c, ffi.h, filter.c, filter.h, ftw.h, gc.c, gc.h, glob.c, glob.h, hash.c, hash.h, itypes.c, itypes.h, jmp.S, lib.c, lib.h, lisplib.c, lisplib.h, match.c, match.h, parser.c, parser.h, parser.l, parser.y, protsym.c, rand.c, rand.h, regex.c, regex.h, share/txr/stdlib/asm.tl, share/txr/stdlib/awk.tl, share/txr/stdlib/build.tl, share/txr/stdlib/cadr.tl, share/txr/stdlib/compiler.tl, share/txr/stdlib/conv.tl, share/txr/stdlib/doloop.tl, share/txr/stdlib/error.tl, share/txr/stdlib/except.tl, share/txr/stdlib/ffi.tl, share/txr/stdlib/getopts.tl, share/txr/stdlib/getput.tl, share/txr/stdlib/hash.tl, share/txr/stdlib/ifa.tl, share/txr/stdlib/keyparams.tl, share/txr/stdlib/op.tl, share/txr/stdlib/package.tl, share/txr/stdlib/path-test.tl, share/txr/stdlib/place.tl, share/txr/stdlib/pmac.tl, share/txr/stdlib/socket.tl, share/txr/stdlib/stream-wrap.tl, share/txr/stdlib/struct.tl, share/txr/stdlib/tagbody.tl, share/txr/stdlib/termios.tl, share/txr/stdlib/trace.tl, share/txr/stdlib/txr-case.tl, share/txr/stdlib/type.tl, share/txr/stdlib/vm-param.tl, share/txr/stdlib/with-resources.tl, share/txr/stdlib/with-stream.tl, share/txr/stdlib/yield.tl, signal.c, signal.h, socket.c, socket.h, stream.c, stream.h, struct.c, struct.h, strudel.c, strudel.h, sysif.c, sysif.h, syslog.c, syslog.h, termios.c, termios.h, txr.1, txr.c, txr.h, unwind.c, unwind.h, utf8.c, utf8.h, vm.c, vm.h, vmop.h, win/cleansvg.txr: Extended Copyright line to 2018.
* C++ fixes related to recent Unicode work.Kaz Kylheku2018-05-181-3/+3
| | | | | | | * lib.c (chk_wrealloc): convert needs to be a coerce. * parser.l (grammar): Use yyg instead of yyscanner; the latter is the same pointer but of void * type.
* Allow Unicode characters in identifiers.Kaz Kylheku2018-05-111-5/+46
| | | | | | | | | | * parser.l (unicode_ident): New static function. (BSCHR, NSCHR): Include UONLY match. (grammar): Use unicode_ident function to validate tokens obtained from BTOK and NTOK. * txr.1: Documented changing definition of bident and lident.
* lexer: eliminate regex alias used in one place.Kaz Kylheku2018-05-111-3/+2
| | | | | | | * parser.l (SYM, TOK): These are just aliases for the same pattern. SYM is referenced only in one place; TOK in multiple places, so let's remove SYM and replace that one reference by TOK.
* parser: duplicate package prefix in diagnostic.Kaz Kylheku2018-04-101-2/+2
| | | | | * parser.l (directive_tok): Fix printing of duplicate package prefix on symbol.
* Copyright year bump 2018.Kaz Kylheku2018-02-151-1/+1
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | * LICENSE, LICENSE-CYG, METALICENSE, Makefile, args.c, args.h, arith.c, arith.h, buf.c, buf.h, cadr.c, cadr.h, combi.c, combi.h, configure, debug.c, debug.h, eval.c, eval.h, ffi.c, ffi.h, filter.c, filter.h, ftw.c, ftw.h, gc.c, gc.h, glob.c, glob.h, hash.c, hash.h, itypes.c, itypes.h, jmp.S, lib.c, lib.h, lisplib.c, lisplib.h, match.c, match.h, parser.c, parser.h, parser.l, parser.y, protsym.c, rand.c, rand.h, regex.c, regex.h, share/txr/stdlib/awk.tl, share/txr/stdlib/build.tl, share/txr/stdlib/cadr.tl, share/txr/stdlib/conv.tl, share/txr/stdlib/doloop.tl, share/txr/stdlib/error.tl, share/txr/stdlib/except.tl, share/txr/stdlib/ffi.tl, share/txr/stdlib/getopts.tl, share/txr/stdlib/getput.tl, share/txr/stdlib/hash.tl, share/txr/stdlib/ifa.tl, share/txr/stdlib/keyparams.tl, share/txr/stdlib/op.tl, share/txr/stdlib/package.tl, share/txr/stdlib/path-test.tl, share/txr/stdlib/place.tl, share/txr/stdlib/pmac.tl, share/txr/stdlib/socket.tl, share/txr/stdlib/stream-wrap.tl, share/txr/stdlib/struct.tl, share/txr/stdlib/tagbody.tl, share/txr/stdlib/termios.tl, share/txr/stdlib/txr-case.tl, share/txr/stdlib/type.tl, share/txr/stdlib/with-resources.tl, share/txr/stdlib/with-stream.tl, share/txr/stdlib/yield.tl, signal.c, signal.h, socket.c, socket.h, stream.c, stream.h, struct.c, struct.h, strudel.c, strudel.h, sysif.c, sysif.h, syslog.c, syslog.h, termios.c, termios.h, txr.1, txr.c, txr.h, unwind.c, unwind.h, utf8.c, utf8.h, win/cleansvg.txr: Extended Copyright line to 2018.
* cleanup: remove unnecessary header includes.Kaz Kylheku2017-09-191-1/+0
| | | | | | | | | | * eval.c: doesn't need rand.h. * filter.c: doesn't need gc.h. * parser.l: doesn't need eval.h. * parser.y: doesn't need utf8.h, stream.h, args.h or cadr.h.
* parser: fix precedence of DOTDOT.Kaz Kylheku2017-09-071-1/+1
| | | | | | | | | | | | | | | | | | | | | | | | | | | | The problem is that a.b .. c.d parses as (qref a b..c d), which is useless and counterintuitive. Let's fix it, but with a backward compatibility switch to give more leeway to any hapless people out there whose code happens to depend on this unfortunate situation. We basically use two token numbers for the .. token: OLD_DOTDOT, and DOTDOT. Both are wired into the grammar. In backward compatibility mode, the lexer pumps out OLD_DOTDOT. Otherwise DOTDOT. * parser.l (grammar): When .. is scanned, return OLD_DOTDOT when in compatibility with 185 or earlier. Otherwise DOTDOT. * parser.y (OLD_DOTDOT): New terminal symbol; introduced at the same high precedence previously occupied by DOTDOT. (DOTDOT): Changes precedence to lower than '.' and UREFDOT. (n_expr): Two productions added involving OLD_DOTDOT. These are copy and paste of the existing productions involving DOTDOT; the only difference is that OLD_DOTDOT replaces DOTDOT. (yybadtoken): Handle OLD_DOTDOT. * txr.1: Compat notes added.
* parser: bugfix: set line number on <lineno> tokens.Kaz Kylheku2017-05-181-0/+7
| | | | | | | | | | | | | | | | | This issue was revealed as a garbage line number in an unbound variable warning diagnostic, where the variable occurs in a quasi word list literal. A small test case is (list #`@var`) where var unbound. The fix is, in the lexer, to set the yylval->lineno for all tokens which are declared as <lineno> in the grammar file, for which doing so has beens neglected. We do this even for those tokens whose line number values are never accessd in any rule; it could arise in the future. * parser.l (grammar): Set the yylval->lineno for the tokens HASH_BACKSLASH, HASH_B_QUOTE, HASH_SLASH, WORDS, WSPLICE, QWORDS and QWSPLICE.
* Continuing implementation of buffers.Kaz Kylheku2017-04-211-1/+36
| | | | | | | | | | | | | | | | | | | | | | | | | | * Makefile (OBJS): New objects itypes.o and buf.o. * buf.c, buf.h: New files. * itypes.c, itypes.h: New files. * lib.c (obj_print_impl): Handle BUF via buf_print and buf_pprint. (init): Call itypes_init and buf_init. * parser.h (end_of_buflit): Declared. * parser.l (BUFLIT): New exclusive state. (grammar): New rules for recognizing start of buffer literal and its interior. (end_of_buflit): New function. * parser.y (HASH_B_QUOTE): New token. (buflit, buflit_items, buflit_item): New nonterminals and corresponding grammar rules. (i_expr, n_expr): These symbols now generate a buflit; a buffer literal is a kind of expression. (yybadtoken): Handle HASH_B_QUOTE case.
* parser: C++ regression.Kaz Kylheku2017-04-041-30/+30
| | | | | | | * parser.l (grammar): Pass yyg to directive_tok rather than yyscanner. It has the yyguts_t * type, whereas yyscanner is a void * version of the same pointer.
* parser: bugfix: don't scan @NUM in QSPECIAL state.Kaz Kylheku2017-04-041-3/+6
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | The problem is syntax like `@@12a` being scanned as if it were `@{@12}a` rather than @{@12a}`. When the scanner is in the middle of a quasiliteral, in the QSILIT state and sees a @, it transitions to the QSPECIAL state. In the QSPECIAL state, the METANUM token syntax is recognized consisting of @ followed by a decimal, octal or hex number. In the same QSPECIAL state, however, a meta-variable like @abc is not recognized as a unit; rather, a @ is recognized by itself, and abc by itself. Thus when @12a is seen in the QSPECIAL state, the @12 is the longest match. The fix is to treat METANUM tokens the same way in the QSPECIAL state: just recognize a number without the @ prefix, and report as a METANUM. * parser.l (grammar): Split the pattern in all four METANUM rules so that in the NESTED, BRACED, QSLIT and QWLIT states, the number is recognized together with the @ prefix. But in the QSPECIAL state, indicating that one or more @ characters have been seen, just recognize a number without the prefix as a METANUM.
* parser: do not reject 0.1..0.2 range.Kaz Kylheku2017-04-021-2/+1
| | | | | * parser.l: Remove the pattern match which causes 0.1..0 to be rejected.
* parser: diagnose syntax like 0.1.2 and .1.1.Kaz Kylheku2017-04-021-3/+3
| | | | | | | | | Currently (list .1.1) yields (0.1 0.1). This is evading the rule for catching cramped floating-point literals. * parser.l (grammar): Carefully weaken the pattern match in the relevant rule for catching cramped floating-point literals, so it matches these cases.
* Bugfix: .1 treated as dot if preceded by space.Kaz Kylheku2017-04-021-4/+4
| | | | | | | | | | | | Some recent work in supporting .slot syntax (uref dot) broke the treatment of floating point literals. This is because part of the trick is that a uref dot is recognized with leading whitespace as part of the token. But that of course means it steals the match for some floating-point tokens; oops! * parser.l (grammar): All rules for floating-point tokens which can match a leading decimal point now munch optional whitespace first.
* Package prefix handling on directive symbols.Kaz Kylheku2017-03-271-30/+61
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The directives which are involved in special phrase structure syntax like @(collect), @(end), @(and) and many others have always been a hack, recognized specially in the lexical analyzer and handled in the parser. The identifiers were not treated via the normal Lisp interning mechanism. In this patch, we try to make the illusion more complete and functional. Going forward, these symbols are understood as being interned in the usr package. As a special relaxation, keyword symbols may be used in their place, so that @(:end) is the same as @(end) and @(:collect) is the same as @(collect). Suppose that @(collect) is scanned, but the collect symbol interned in the current package isn't usr:collect, or keyword:collect. Then this is an error. Further, package prefixes may be used. The syntax @(abc:collect) is still valid and is still recognized as the head of the @(collect) phrase structure syntax. However, if abc:collect isn't the same symbol as either usr:collect or :collect, then an error is triggered. * parser.l (grammar): Recognize optional package prefixes on directive phrase structure identifiers. (directive_tok): Extract package prefix and symbol from lexeme. Implement the above described checks for all the cases. * txr.1: Added description of this under the Packages and Symbols section.
* Lexer refactoring: special syntax tokens.Kaz Kylheku2017-03-271-90/+43
| | | | | | * parser.l (directive_tok): New static function. (grammar): Replace repeated code with calls to directive_tok.
* uref: the a.b.c syntax extended to .a.b.cKaz Kylheku2017-03-061-1/+6
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Now it is possible to use a leading dot on the referencing dot syntax. This is the is the "unbound reference dot". It expands to the uref macro, which denotes an unbound-reference: it produces a function which takes an object as the argument, and curries the reference implied by the remaining arguments. * eval.c (uref_s): New global symbol variable. (eval_init): Intern uref symbol and init uref_s. * eval.h (uref_s): Declared. * lib.c (simple_qref_args_p): A qref expression is now also not simple if it contains an embedded uref, meaning that it cannot be rendered into the dot notation without ambiguity. (obj_print_impl): Support printing (uref a b c) as .a.b.c. * lisplib.c (struct_set_entries): Add uref to the list of autoload triggers for struct.tl. * parser.l (DOTDOT): Consume any leading whitespace as part of recognizing the DOTDOT token. Otherwise the new rule for UREFDOT, which matches (mandatory) leading space will take precedence, causing " .." to be scanned wrong. (UREFDOT): Rule for new kind of dot token, which is preceded by mandatory whitespace, and isn't consing dot (which has mandatory trailing whitespace too, matched by an earlier rule). * parser.y (UREFDOT): New token type. (i_dot_expr, n_dot_expr): New grammar rules. (list): Handle a leading dot on the first element of a list as a special case. Things are done this way because trying to work a UREFDOT into the grammar otherwise causes intractable conflicts. (i_expr): The ^, ' and , punctuators are now followed by an i_dot_expr, so that the expression can be an unbound dot. (n_expr): Same change as in i_expr, but using n_dot_expr. Plus new UREFDOT n_expr production. * share/txr/stdlib/struct.tl (uref): New macro. * txr.1: Documented.
* parser: diagnose run-on symbols.Kaz Kylheku2017-02-011-0/+14
| | | | | | | | | | * parser.l (grammar): Add rules which capture two symbols glued together, and diagnose as bad token. Of course a legitimate symbol token can be divided into two that are glued together. This rule is placed after the legitimate symbol matching rule, so that if a token can be interpreted as a single symbol token or as two, the first interpretation is taken.
* parser: diagnose more kinds of junk after float.Kaz Kylheku2017-02-011-1/+2
| | | | | | | | | * parser.l (grammar): Add a rule that if a floating-point (of the type that ends in decimal digits with an optional exponent) is immediately followed by a period which is not followed by another period (range syntax), it is trailing junk. For instance 1.0.3 or .2.$, or 1.0. followed by no other input.
* Bump copyright year to 2017.Kaz Kylheku2017-01-231-1/+1
| | | | | | | | | | | | | | | | | | | | | | | | * LICENSE, LICENSE-CYG, METALICENSE, Makefile, args.c, args.h, arith.c, arith.h, cadr.c, cadr.h, combi.c, combi.h, configure, debug.c, debug.h, eval.c, eval.h, filter.c, filter.h, ftw.c, ftw.h, gc.c, gc.h, glob.c, glob.h, hash.c, hash.h, jmp.S, lib.c, lib.h, lisplib.c, lisplib.h, match.c, match.h, parser.c, parser.h, parser.l, parser.y, rand.c, rand.h, regex.c, regex.h, signal.c, signal.h, stream.c, stream.h, struct.c, struct.h, sysif.c, sysif.h, syslog.c, syslog.h, termios.c, termios.h, txr.1, txr.c, txr.h, unwind.c, unwind.h, utf8.c, utf8.h, share/txr/stdlib/awk.tl, share/txr/stdlib/build.tl, share/txr/stdlib/cadr.tl, share/txr/stdlib/conv.tl, share/txr/stdlib/except.tl, share/txr/stdlib/getopts.tl, share/txr/stdlib/getput.tl, share/txr/stdlib/hash.tl, share/txr/stdlib/ifa.tl, share/txr/stdlib/package.tl, share/txr/stdlib/path-test.tl, share/txr/stdlib/place.tl, share/txr/stdlib/socket.tl, share/txr/stdlib/struct.tl, share/txr/stdlib/tagbody.tl, share/txr/stdlib/termios.tl, share/txr/stdlib/txr-case.tl, share/txr/stdlib/type.tl, share/txr/stdlib/with-resources.tl, share/txr/stdlib/with-stream.tl, share/txr/stdlib/yield.tl: Add 2017 to all copyright headers and strings.
* Fix some C style casts to use casting macros.Kaz Kylheku2016-12-071-2/+2
| | | | | | | | | | | | | | | | | | | | This is uncovered by compiling with g++ using -Wold-style-cast. * mpi/mpi.c (mp_get_intptr): Use convert macro. Also in one of the rules producing REGCHAR. * parser.l (num_esc): Likewise. * struct.c (static_slot_set, static_slot_ens_rec, get_equal_method): Use coerce macro for int to pointer conversion. * sysif.c (setgroups_wrap): Use convert macro. * termios.c (termios_unpack, termios_pack): Likewise. * txr.c (sysroot_init): Likewise.
* Removes stray debug printf from lexer.Kaz Kylheku2016-12-041-1/+0
| | | | | | * parser.l: A stray printf was committed in November 2015. The spurious output only occurs when certain invalid floating-point syntax is encountered.
* Harden processing of character escapes.Kaz Kylheku2016-12-021-4/+8
| | | | | | | | | | | | | | | | | | Weakness uncovered by fuzzing with AFL (fast) 2.30b. The failing test case is regex syntax like [\1111111...111abc], where the bad character escape allows an invalid, negatively valued character object to escape out of the parser into the system leading to an an out-of-bounds array access in the char set code in the regex compiler. * parser.l (num_esc): Make sure that an out-of-range character is mapped to zero. Set up a default value of zero for the return variable. If the character token has too many digits, don't pass them through strtol at all, which will produce a garbage value. Then in the final range check, actually replace the value with zero if it is out of range: issuing a diagnostic is not enough.
* Support #: reading for uninterned symbols.Kaz Kylheku2016-11-071-4/+4
| | | | | | | | | | | | | | * parser.l (BTKEY, NTKEY): Renamed to BTKWUN and NTKWUN ("keyword and uninterned") respectively. Include an optional match for the # character. (BTOK, NTOK): Refer to BTKEY and NTKEY respectively * parser.y (sym_helper): Implement uninterned symbols by detecting when the package name string is "#" and handling specially. * txr.1: Documented package prefixes and uninterned symbols.
* New #; syntax for erasing following object.Kaz Kylheku2016-11-071-0/+5
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | * parser.c (parser_circ_ref): Don't generate the circular reference if circular suppression is in effect. * parser.h (struct parser): New member, circ_suppress. We use this for suppressing the generation of circular #n# references in erased objects. * parser.l (grammar): Scan #; producing HASH_SEMI token. * parser.y (HASH_SEMI): New token. (hash_semis_n_expr, hash_semis_i_expr, ignored_i_exprs, ignored_n_exprs): New nonterminals, needed for supporting the use of #; in front of top-level forms. (spec): Use hash_semis_n_expr and hash_semis_i_expr instead of n_expr and i_expr. (r_expr): Support object erasure within nested syntax. (yybadtoken): Handle H_SEMI token. (parse): Initialize new circ_suppress member of parser struct to zero. * txr.1: Documented. * genvim.txr (txr_ign_par, txr_ign_bkt, txr_ign_par_interior, txr_ign_bkt_interior): New regions for colorizing erased objects (partial support). (txr_list, txr_bracket, txr_mlist, txr_mbrackets): Include erased objects by including regions txr_ign_par and txr_ign_bkt. * txr.vim, tl.vim: Regenerated.
* Adding notation for cycles and shared structure.Kaz Kylheku2016-10-181-0/+12
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | This commit implements the parse-side support for handling a notation that exists in ANSI Common Lisp for specifying objects with cycles and shared substructure. * parser.h (struct parser): New members, circ_ref_hash and circ_count. (circref_s, parser_resolve_circ, parser_circ_def, parser_circ_ref): Declared. * parser.c (circref_s): New symbol variable. (parser_mark): Visit the new circ_ref_hash member of the parser structure. (parser_common_init): Initialize new members circ_ref_hash and circ_count of parser structure. (patch_ref, circ_backpatch): New static functions. (parser_resolve_circ, parser_circ_def, parser_circ_ref): New functions. (circref): New static function. (parse_init): Initialize circref_s as sys:circref symbol. Register sys:circref function. * parser.l (grammar): Scan #<num>= and #<num># notation as tokens, extracting their numeric value. * parser.y (HASH_N_EQUALS, HASH_N_HASH): New token types. (i_expr, n_expr): Adding phrases for hash-equalsign and hash-hash syntax. (yybadtoken): Handle new token types in switch. (parse_once): Call parser_resolve_circ after parsing to rewrite any remaining #<num># references in the structure to the objects they denote. (parse): Reset new struct parse members to initial state. Call parser_resolve_circ after parsing to rewrite any remaining #<num># references.
* Synchronize license comments with LICENSE.Kaz Kylheku2016-10-011-16/+17
| | | | | | | | | | | | | | | | | | | | * Makefile, args.c, args.h, arith.c, arith.h, cadr.c, cadr.h, combi.c, combi.h, configure, debug.c, debug.h, eval.c, eval.h, filter.c, filter.h, ftw.c, ftw.h, gc.c, gc.h, glob.c, glob.h, hash.c, hash.h, jmp.S, lib.c, lib.h, lisplib.c, lisplib.h, match.c, match.h, parser.c, parser.h, parser.l, parser.y, rand.c, rand.h, regex.c, regex.h, share/txr/stdlib/awk.tl, share/txr/stdlib/build.tl, share/txr/stdlib/cadr.tl, share/txr/stdlib/conv.tl, share/txr/stdlib/except.tl, share/txr/stdlib/hash.tl, share/txr/stdlib/ifa.tl, share/txr/stdlib/path-test.tl, share/txr/stdlib/place.tl, share/txr/stdlib/socket.tl, share/txr/stdlib/struct.tl, share/txr/stdlib/termios.tl, share/txr/stdlib/txr-case.tl, share/txr/stdlib/type.tl, share/txr/stdlib/with-resources.tl, share/txr/stdlib/with-stream.tl, share/txr/stdlib/yield.tl, signal.c, signal.h, socket.c, socket.h, stream.c, stream.h, struct.c, struct.h, sysif.c, sysif.h, syslog.c, syslog.h, termios.c, termios.h, txr.1, txr.c, txr.h, unwind.c, unwind.h, utf8.c, utf8.h: Revert to verbatim 2-Clause BSD.
* Allow whitespace between @ and ; in comments.Kaz Kylheku2016-05-231-2/+2
| | | | | | | * parser.l (grammar): Recognize {WS}* between @ and ; (or the legacy #) in comments. * txr.1: Documentation updated.
* Handle non-UTF-8 byte in regex scanned from string.Kaz Kylheku2016-04-211-0/+6
| | | | | | | | The current behavior is that there is no lex rule for this, so such a byte gets echoed. parser.l (grammar): Add fallback rule to match one byte in SREGEX state and turn it into 0xDCxx character.
* Better job of diagnosing out-of-range char escapes.Kaz Kylheku2016-04-211-2/+9
| | | | | | * parser.l (num_esc): Check for converted value being out of the range of wchar_t or beyond 0x10FFFF, whichever is less.
* Bugfix: allow newline in regex parsing from string.Kaz Kylheku2016-04-181-1/+7
| | | | | | | * parser.l (grammar): The newline character is incorrectly handled by the same rule under the SREGEX and REGEX states. In the SREGEX state, just return it as a REGCHAR, not forgetting to increment the line number.
* Trailing whitespace.Kaz Kylheku2016-04-181-1/+1
| | | | * parser.l: Remove trailing whitespace.
* Revamp bad character messages in lexer.Kaz Kylheku2016-04-011-4/+15
| | | | | | | | * parser.l (grammar): Drop colon from unrecognized escape message. "bad character in directive" handles various cases to avoid printing junk to the terminal. Basic message harmonizes with the one in the yybadtoken function in the parser. Non-UTF-8 byte printed as TXR hex integer literal.
* gc bug: prepared_msg field of struct parser.Kaz Kylheku2016-03-071-1/+2
| | | | | | | * parser.l (yyerrprepf): Replace wrong bare assignment to parser->prepared_msg with proper set macro which handles the mutation of a mature generation object such that it points to a baby object.
* New :mandatory keyword in until/last clauses.Kaz Kylheku2016-01-151-5/+4
| | | | | | | | | | | | | | | | | | | * match.c (mandatory_k): New keyword variable. (h_coll, v_gather, v_collect): Implement :mandatory logic. (syms_init): Initialize mandatory_k. * parser.l (grammar): The UNTIL and LAST tokens must be matched similarly to collect, without consuming the closing parenthesis, allowing a list of items to be parsed between the symbol and the closure, in the NESTED state. * parser.y (gather_clause, collect_clause, elem, repeat_parts_opt, rep_parts_opt): Adjust to new until/last syntax. In the matching productions, the abstract syntax changes to incorporate the options. In the output productions, we throw an error if options are present. * txr.1: Documented :mandatory for collect, coll and gather.
* Copyright year bump.Kaz Kylheku2015-12-311-1/+1
| | | | | | | | | | | | | | | | | | | | | | | * LICENSE, METALICENSE, Makefile, args.c, args.h, arith.c, arith.h, cadr.c, cadr.h, combi.c, combi.h, configure, debug.c, debug.h, eval.c, eval.h, filter.c, filter.h, gc.c, gc.h, glob.c, glob.h, hash.c, hash.h, jmp.S, lib.c, lib.h, lisplib.c, lisplib.h, match.c, match.h, parser.c, parser.h, parser.l, parser.y, rand.c, rand.h, regex.c, regex.h, share/txr/stdlib/cadr.tl, share/txr/stdlib/except.tl, share/txr/stdlib/hash.tl, share/txr/stdlib/ifa.tl, share/txr/stdlib/path-test.tl, share/txr/stdlib/place.tl, share/txr/stdlib/struct.tl, share/txr/stdlib/txr-case.tl, share/txr/stdlib/type.tl, share/txr/stdlib/with-resources.tl, share/txr/stdlib/with-stream.tl, share/txr/stdlib/yield.tl, signal.c, signal.h, stream.c, stream.h, struct.c, struct.h, sysif.c, sysif.h, syslog.c, syslog.h, txr.1, txr.c, txr.h, unwind.c, unwind.h, utf8.c, utf8.h: Add 2016 copyright. * linenoise/LICENSE, linenoise/linenoise.c, linenoise/linenoise.h: Bump one principal author's copyright from 2014 to 2015. The code is based on a snapshot of 2015 upstream work.
* Implementing *print-base* and ~d format directive.Kaz Kylheku2015-11-141-3/+3
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | * debug.c (show_bindings): Use ~d for level, so as not to be influenced by *print-base*. (debug): Use ~d for line numbers. * lib.c (gensym): Use ~d conversion specifier for formatting gensym counter into symbol name. * match.c (LOG_MISMATCH, LOG_MATCH): Use ~d for line number references. (h_skip, h_coll, h_fun, h_chr, match_line_completely, v_skip, v_fuzz, v_gather, v_collect, v_output, v_filter, v_fun, v_assert, v_load, v_line, h_assert, open_data_source): Use ~d for line refs, number of iterations, errno values. * parser.c (repl): Use ~d for prompt line numbers, numbered variables and the expr-<n> string in error messages. * parser.l (yyerrorf, source_loc_str): Use ~d for line numbers. * stream.c (print_base_s): New symbol variable. (formatv): Implement *print-base*. (stdio_maybe_read_error, stdio_maybe_error, stdio_close, pipe_close, open_directory, open_file, open_fileno, open_tail, open_process, run, remove_path): Use ~d for errno values. (stream_init): Initialize print_base_s and register *print-base* special variable. sysif.c (mkdir_wrap, ensure_dir, getcwd_wrap, mknod_wrap, chmod_wrap, symlink_wrap, link_wrap, readlink_wrap, excec_wrap, stat_impl, pipe_wrap, poll_wrap, getgroups_wrap, setuid_wrap, seteuid_wrap, setgid_wrap): Use ~d for errno values and system function results. * txr.1: Documented *print-base* and ~d conversion specifier.