/* Copyright 2009-2014 * Kaz Kylheku * Vancouver, Canada * All rights reserved. * * BSD License: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ %{ #include #include #include #include #include #include #include #include #include #include #include "config.h" #if HAVE_UNISTD_H #include #endif #include "lib.h" #include "y.tab.h" #include "gc.h" #include "stream.h" #include "utf8.h" #include "signal.h" #include "unwind.h" #include "hash.h" #include "parser.h" #define YY_INPUT(buf, result, max_size) \ do { \ val c = get_byte(yyin_stream); \ int n = 0; \ if (c) \ buf[n++] = (char) c_num(c); \ result = n; \ } while (0) val yyin_stream; cnum lineno = 1; int opt_loglevel = 1; /* 0 - quiet; 1 - normal; 2 - verbose */ int errors; val spec_file_str; val form_to_ln_hash; static val prepared_error_message; void yyerror(const char *s) { yyerrorf(lit("~a"), string_utf8(s), nao); if (prepared_error_message) { yyerrorf(lit("~a"), prepared_error_message, nao); prepared_error_message = nil; } } void yyerrorf(val fmt, ...) { if (opt_loglevel >= 1) { va_list vl; va_start (vl, fmt); format(std_error, lit("~a: (~a:~a): "), prog_string, spec_file_str, num(lineno), nao); vformat(std_error, fmt, vl); put_char(chr('\n'), std_error); va_end (vl); } errors++; } static void yyerrprepf(val fmt, ...) { if (opt_loglevel >= 1) { va_list vl; va_start (vl, fmt); prepared_error_message = vformat_to_string(fmt, vl); va_end (vl); } } static wchar_t char_esc(int letter) { switch (letter) { case ' ': return L' '; case 'a': return L'\a'; case 'b': return L'\b'; case 't': return L'\t'; case 'n': return L'\n'; case 'v': return L'\v'; case 'f': return L'\f'; case 'r': return L'\r'; case 'e': return 27; case '"': return L'"'; case '\'': return L'\''; case '`': return L'`'; case '/': return L'/'; case '\\': return L'\\'; } internal_error("unhandled escape character"); } static wchar_t num_esc(char *num) { if (num[0] == 'x') { if (strlen(num) > 7) yyerror("too many digits in hex character escape"); return strtol(num + 1, 0, 16); } else { if (num[0] == 'o') num++; if (strlen(num) > 8) yyerror("too many digits in octal character escape"); return strtol(num, 0, 8); } } %} %option stack %option nounput %option noinput SYM [a-zA-Z0-9_]+ SGN [+\-] EXP [eE][+\-]?[0-9]+ DIG [0-9] XDIG [0-9A-Fa-f] NUM {SGN}?{DIG}+ FLO {SGN}?({DIG}*[.]{DIG}+{EXP}?|{DIG}+[.]?{EXP}) FLODOT {SGN}?{DIG}+[.] XNUM #x{SGN}?{XDIG}+ ONUM #o{SGN}?[0-7]+ BNUM #b{SGN}?[0-1]+ BSCHR [a-zA-Z0-9!$%&*+\-<=>?\\_~] NSCHR [a-zA-Z0-9!$%&*+\-<=>?\\_~/] ID_END [^a-zA-Z0-9!$%&*+\-<=>?\\_~/] EXTRA [#^] TOK {SYM} BT0 {BSCHR}({BSCHR}|{EXTRA})* BT1 @{BT0}+ BT2 ({BSCHR}|{EXTRA})+ BTREG (({BT0}|{BT1})?:{BT2}|({BT0}|{BT1})(:{BT2})?|:) BTKEY @?:{BT2}? BTOK {BTREG}|{BTKEY} NT0 {NSCHR}({NSCHR}|{EXTRA})* NT1 @{NT0}+ NT2 ({NSCHR}|{EXTRA})+ NTREG (({NT0}|{NT1})?:{NT2}|({NT0}|{NT1})(:{NT2})?|:) NTKEY @?:{NT2}? NTOK {NTREG}|{NTKEY} WS [\t ]* HEX [0-9A-Fa-f] OCT [0-7] ASC [\x00-\x7f] ASCN [\x00-\t\v-\x7f] U [\x80-\xbf] U2 [\xc2-\xdf] U3 [\xe0-\xef] U4 [\xf0-\xf4] UANY {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} UANYN {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} %x SPECIAL BRACED NESTED REGEX STRLIT CHRLIT QSILIT %% {NUM} { val str = string_own(utf8_dup_from(yytext)); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); yylval.val = int_str(str, num(10)); return NUMBER; } {XNUM} { val str = string_own(utf8_dup_from(yytext + 2)); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); yylval.val = int_str(str, num(16)); return NUMBER; } {ONUM} { val str = string_own(utf8_dup_from(yytext + 2)); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); yylval.val = int_str(str, num(8)); return NUMBER; } {BNUM} { val str = string_own(utf8_dup_from(yytext + 2)); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); yylval.val = int_str(str, num(2)); return NUMBER; } {FLO} { val str = string_own(utf8_dup_from(yytext)); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); yylval.val = flo_str(str); return NUMBER; } {FLODOT}/[^.] { val str = string_own(utf8_dup_from(yytext)); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); yylval.val = flo_str(str); return NUMBER; } ({FLO}|{FLODOT}){TOK} | ({FLO}|{FLODOT}){BTOK} | ({FLO}|{FLODOT}){NTOK} { val str = string_utf8(yytext); yyerrorf(lit("trailing junk in floating-point literal: ~a"), str, nao); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); yylval.val = flo_str(str); return NUMBER; } @{NUM} { val str = string_own(utf8_dup_from(yytext + 1)); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); yylval.val = int_str(str, num(10)); return METANUM; } @{XNUM} { val str = string_own(utf8_dup_from(yytext + 3)); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); yylval.val = int_str(str, num(16)); return METANUM; } @{ONUM} { val str = string_own(utf8_dup_from(yytext + 3)); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); yylval.val = int_str(str, num(8)); return METANUM; } @{BNUM} { val str = string_own(utf8_dup_from(yytext + 3)); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); yylval.val = int_str(str, num(2)); return METANUM; } {TOK} | {BTOK} | {NTOK} { if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); yylval.lexeme = utf8_dup_from(yytext); return SYMTOK; } \({WS}all{WS}\) { yy_pop_state(); yylval.lineno = lineno; return ALL; } \({WS}some/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; return SOME; } \({WS}none{WS}\) { yy_pop_state(); yylval.lineno = lineno; return NONE; } \({WS}maybe{WS}\) { yy_pop_state(); yylval.lineno = lineno; return MAYBE; } \({WS}cases{WS}\) { yy_pop_state(); yylval.lineno = lineno; return CASES; } \({WS}block/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; return BLOCK; } \({WS}choose/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; return CHOOSE; } \({WS}gather/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; return GATHER; } \({WS}and{WS}\) { yy_pop_state(); yylval.lineno = lineno; return AND; } \({WS}or{WS}\) { yy_pop_state(); yylval.lineno = lineno; return OR; } \({WS}end{WS}\) { yy_pop_state(); yylval.lineno = lineno; return END; } \({WS}collect/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; return COLLECT; } \({WS}coll/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; return COLL; } \({WS}until{WS}\) { yy_pop_state(); yylval.lineno = lineno; return UNTIL; } \({WS}output/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; return OUTPUT; } \({WS}repeat/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; return REPEAT; } \({WS}rep/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; return REP; } \({WS}single{WS}\) { yy_pop_state(); yylval.lineno = lineno; return SINGLE; } \({WS}first{WS}\) { yy_pop_state(); yylval.lineno = lineno; return FIRST; } \({WS}last{WS}\) { yy_pop_state(); yylval.lineno = lineno; return LAST; } \({WS}empty{WS}\) { yy_pop_state(); yylval.lineno = lineno; return EMPTY; } \({WS}mod/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; return MOD; } \({WS}modlast/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; return MODLAST; } \({WS}define/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; return DEFINE; } \({WS}try{WS}\) { yy_pop_state(); yylval.lineno = lineno; return TRY; } \({WS}catch/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; return CATCH; } \({WS}finally{WS}\) { yy_pop_state(); yylval.lineno = lineno; return FINALLY; } \({WS}if/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; return IF; } \({WS}elif/{ID_END} { yy_push_state(NESTED); yylval.lineno = lineno; return ELIF; } \({WS}else{WS}\) { yy_pop_state(); yylval.lineno = lineno; return ELSE; } [{] { yy_push_state(BRACED); yylval.lineno = lineno; return yytext[0]; } [(\[] { yy_push_state(NESTED); yylval.lineno = lineno; return yytext[0]; } @ { yylval.lineno = lineno; return yytext[0]; } ,[*] { yylval.chr = '*'; return SPLICE; } [,'^] { yylval.chr = yytext[0]; return yytext[0]; } [}] { yy_pop_state(); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); return yytext[0]; } [)\]] { yy_pop_state(); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); return yytext[0]; } {WS} { /* Eat whitespace in directive */ } \" { yy_push_state(STRLIT); return '"'; } #\\ { yy_push_state(CHRLIT); return HASH_BACKSLASH; } #[/] { yy_push_state(REGEX); return HASH_SLASH; } ` { yy_push_state(QSILIT); return '`'; } # { return '#'; } #H { yylval.lineno = lineno; return HASH_H; } \.\. { yylval.lineno = lineno; return DOTDOT; } @ { yy_pop_state(); yylval.lexeme = chk_strdup(L"@"); return TEXT; } \n { lineno++; } [/] { yy_push_state(REGEX); return '/'; } \. { yylval.chr = '.'; return '.'; } [\\]\n{WS} { if (YYSTATE == SPECIAL) yy_pop_state(); /* @\ continuation */ lineno++; } [\\][abtnvfre ] { wchar_t lexeme[2]; lexeme[0] = char_esc(yytext[1]); lexeme[1] = 0; yylval.lexeme = chk_strdup(lexeme); yy_pop_state(); return TEXT; } [\\](x{HEX}+|{OCT}+) { wchar_t lexeme[2]; lexeme[0] = num_esc(yytext + 1); lexeme[1] = 0; yylval.lexeme = chk_strdup(lexeme); yy_pop_state(); return TEXT; } [\\]. { yyerrorf(lit("unrecognized escape: \\~a"), chr(yytext[1]), nao); } [;].* { /* comment */ } {UANYN} { yyerrprepf(lit("bad character in directive: '~a'"), string_utf8(yytext), nao); return ERRTOK; } . { yyerrprepf(lit("non-UTF-8 byte in directive: " "'\\x~02x'"), num((unsigned char) yytext[0]), nao); return ERRTOK; } [/] { yylval.chr = '/'; return '/'; } [\\][abtnvfre\\ ] { yylval.chr = char_esc(yytext[1]); return REGCHAR; } [\\](x{HEX}+|{OCT}+);? { yylval.chr = num_esc(yytext + 1); return REGCHAR; } [\\][sSdDwW] { yylval.chr = yytext[1]; return REGTOKEN; } {WS}[\\]\n{WS} { lineno++; } \n { lineno++; yyerrprepf(lit("newline in regex"), nao); return ERRTOK; } [.*?+~&%] { yylval.chr = yytext[0]; return yytext[0]; } [\[\]\-] { yylval.chr = yytext[0]; return yytext[0]; } [()|] { yylval.chr = yytext[0]; return yytext[0]; } [\\]. { yylval.chr = yytext[1]; return REGCHAR; } {UANYN} { wchar_t buf[8]; utf8_from(buf, yytext); yylval.chr = buf[0]; return REGCHAR; } . { yyerrprepf(lit("non-UTF-8 byte in regex: '\\x~02x'"), num((unsigned char) yytext[0]), nao); return ERRTOK; } [ ]+ { yylval.lexeme = utf8_dup_from(yytext); return SPACE; } ({UONLY}|[^@\n ])+ { yylval.lexeme = utf8_dup_from(yytext); return TEXT; } \n { lineno++; return '\n'; } @{WS}\* { yy_push_state(SPECIAL); return '*'; } @ { yy_push_state(SPECIAL); } @\x01R { yy_push_state(REGEX); return SECRET_ESCAPE_R; } @\x01E { yy_push_state(SPECIAL); yy_push_state(NESTED); return SECRET_ESCAPE_E; } ^@[#;].*\n { /* eat whole line comment */ lineno++; } @[#;].* { /* comment to end of line */ } \" { yy_pop_state(); return yytext[0]; } ` { yy_pop_state(); return yytext[0]; } [\\][abtnvfre "`'\\ ] { yylval.chr = char_esc(yytext[1]); return LITCHAR; } {WS}[\\]\n{WS} { lineno++; } [\\](x{HEX}+|{OCT}+);? { yylval.chr = num_esc(yytext+1); return LITCHAR; } [\\]. { yyerrorf(lit("unrecognized escape: \\~a"), chr(yytext[1]), nao); } (x{HEX}+|o{OCT}+) { yylval.chr = num_esc(yytext); return LITCHAR; } {SYM} { yylval.lexeme = utf8_dup_from(yytext); return SYMTOK; } [^ \t\n] { yylval.lexeme = utf8_dup_from(yytext); return SYMTOK; /* hack */ } \n { yyerrprepf(lit("newline in string literal"), nao); lineno++; yylval.chr = yytext[0]; return ERRTOK; } \n { yyerrprepf(lit("newline in character literal"), nao); lineno++; yylval.chr = yytext[0]; return ERRTOK; } \n { yyerrprepf(lit("newline in string quasiliteral"), nao); lineno++; yylval.chr = yytext[0]; return ERRTOK; } @ { yy_push_state(SPECIAL); } {UANYN} { wchar_t buf[8]; utf8_from(buf, yytext); yylval.chr = buf[0]; return LITCHAR; } . { yyerrprepf(lit("non-UTF-8 byte in literal: '\\x~02x'"), num((unsigned char) yytext[0]), nao); return ERRTOK; } %% void end_of_regex(void) { if (YYSTATE != REGEX) internal_error("end_of_regex called in wrong scanner state"); yy_pop_state(); if (YYSTATE != INITIAL) { if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); } } void end_of_char(void) { if (YYSTATE != CHRLIT) internal_error("end_of_char called in wrong scanner state"); yy_pop_state(); } val source_loc(val form) { return gethash(form_to_ln_hash, form); } val source_loc_str(val form) { cons_bind (line, file, gethash(form_to_ln_hash, form)); return if3(line, format(nil, lit("~a:~a"), file, line, nao), lit("source location n/a")); } void parse_init(void) { protect(&yyin_stream, &prepared_error_message, &form_to_ln_hash, (val *) 0); form_to_ln_hash = make_hash(t, nil, nil); } void parse_reset(val spec_file) { errors = 0; lineno = 1; spec_file_str = spec_file; { FILE *in = w_fopen(c_str(spec_file_str), L"r"); if (in == 0) { spec_file_str = cat_str(list(spec_file_str, lit("txr"), nao), lit(".")); in = w_fopen(c_str(spec_file_str), L"r"); if (in == 0) uw_throwf(file_error_s, lit("unable to open ~a"), spec_file, nao); } yyin_stream = make_stdio_stream(in, spec_file_str); } } val regex_parse(val string, val error_stream) { uses_or2; val parse_string = cat_str(list(lit("@\x01R"), string, nao), nil); val save_stream = std_error; yyin_stream = make_string_byte_input_stream(parse_string); errors = 0; lineno = 1; error_stream = default_bool_arg(error_stream); std_error = if3(error_stream == t, std_output, or2(error_stream, std_null)); { int gc = gc_state(0); spec_file_str = if3(std_error != std_null, format(nil, lit("regex --> ~a"), string, nao), lit("")); yyparse(); yylex_destroy(); gc_state(gc); } std_error = save_stream; return errors ? nil : get_spec(); } val lisp_parse(val source_in, val error_stream) { uses_or2; val source = default_bool_arg(source_in); val input_stream = if3(stringp(source), make_string_byte_input_stream(source), or2(source, std_input)); val secret_token_stream = make_string_byte_input_stream(lit("@\x01" "E")); val name = if3(stringp(source), format(nil, lit("expr --> ~a"), source, nao), stream_get_prop(input_stream, name_k)); val save_stream = std_error; yyin_stream = make_catenated_stream(list(secret_token_stream, input_stream, nao)); errors = 0; lineno = 1; error_stream = default_bool_arg(error_stream); std_error = if3(error_stream == t, std_output, or2(error_stream, std_null)); { int gc = gc_state(0); spec_file_str = if3(std_error != std_null, name, lit("")); yyparse(); yylex_destroy(); gc_state(gc); } std_error = save_stream; return errors ? nil : get_spec(); }