diff options
Diffstat (limited to 'awkgram.y')
-rw-r--r-- | awkgram.y | 274 |
1 files changed, 129 insertions, 145 deletions
@@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-2014 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-2015 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -57,7 +57,6 @@ static int include_source(INSTRUCTION *file); static int load_library(INSTRUCTION *file); static void next_sourcefile(void); static char *tokexpand(void); -static bool is_deferred_variable(const char *name); #define instruction(t) bcalloc(t, 1, 0) @@ -74,13 +73,10 @@ static INSTRUCTION *mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op) static INSTRUCTION *mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op); static INSTRUCTION *mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op); static INSTRUCTION *mk_getline(INSTRUCTION *op, INSTRUCTION *opt_var, INSTRUCTION *redir, int redirtype); -static NODE *make_regnode(int type, NODE *exp); static int count_expressions(INSTRUCTION **list, bool isarg); static INSTRUCTION *optimize_assignment(INSTRUCTION *exp); static void add_lint(INSTRUCTION *list, LINTTYPE linttype); -static void process_deferred(); - enum defref { FUNC_DEFINE, FUNC_USE, FUNC_EXT }; static void func_use(const char *name, enum defref how); static void check_funcs(void); @@ -88,11 +84,12 @@ static void check_funcs(void); static ssize_t read_one_line(int fd, void *buffer, size_t count); static int one_line_close(int fd); static void split_comment(void); +static void check_comment(void); +static bool at_seen = false; static bool want_source = false; static bool want_regexp = false; /* lexical scanning kludge */ static char *in_function; /* parsing kludge */ -static bool symtab_used = false; /* program used SYMTAB */ static int rule = 0; const char *const ruletab[] = { @@ -146,12 +143,15 @@ static INSTRUCTION *ip_atexit = NULL; static INSTRUCTION *ip_end; static INSTRUCTION *ip_endfile; static INSTRUCTION *ip_beginfile; +INSTRUCTION *main_beginfile; static INSTRUCTION *comment = NULL; static INSTRUCTION *program_comment = NULL; static INSTRUCTION *function_comment = NULL; +static INSTRUCTION *block_comment = NULL; static bool func_first = true; +static bool first_rule = true; static inline INSTRUCTION *list_create(INSTRUCTION *x); static inline INSTRUCTION *list_append(INSTRUCTION *l, INSTRUCTION *x); @@ -213,8 +213,6 @@ program | program LEX_EOF { next_sourcefile(); - if (sourcefile == srcfiles) - process_deferred(); } | program error { @@ -231,6 +229,7 @@ rule : pattern action { (void) append_rule($1, $2); + first_rule = false; } | pattern statement_term { @@ -252,11 +251,13 @@ rule | '@' LEX_INCLUDE source statement_term { want_source = false; + at_seen = false; yyerrok; } | '@' LEX_LOAD library statement_term { want_source = false; + at_seen = false; yyerrok; } ; @@ -334,7 +335,11 @@ pattern ($1->nexti + 1)->condpair_left = $1->lasti; ($1->nexti + 1)->condpair_right = $4->lasti; } - $$ = list_append(list_merge($1, $4), tp); + if (comment != NULL) { + $$ = list_append(list_merge(list_prepend($1, comment), $4), tp); + comment = NULL; + } else + $$ = list_append(list_merge($1, $4), tp); rule = Rule; } | LEX_BEGIN @@ -348,6 +353,7 @@ pattern $1->in_rule = rule = BEGIN; $1->source_file = source; + check_comment(); $$ = $1; } | LEX_END @@ -361,6 +367,7 @@ pattern $1->in_rule = rule = END; $1->source_file = source; + check_comment(); $$ = $1; } | LEX_BEGINFILE @@ -368,6 +375,7 @@ pattern func_first = false; $1->in_rule = rule = BEGINFILE; $1->source_file = source; + check_comment(); $$ = $1; } | LEX_ENDFILE @@ -375,6 +383,7 @@ pattern func_first = false; $1->in_rule = rule = ENDFILE; $1->source_file = source; + check_comment(); $$ = $1; } ; @@ -403,7 +412,10 @@ func_name YYABORT; } | '@' LEX_EVAL - { $$ = $2; } + { + $$ = $2; + at_seen = false; + } ; lex_builtin @@ -1684,12 +1696,24 @@ func_call */ $$ = list_prepend($2, t); + at_seen = false; } ; direct_func_call : FUNC_CALL '(' opt_expression_list r_paren { + NODE *n; + + if (! at_seen) { + n = lookup($1->func_name); + if (n != NULL && n->type != Node_func + && n->type != Node_ext_func && n->type != Node_old_ext_func) { + error_ln($1->source_line, + _("attempt to use non-function `%s' in function call"), + $1->func_name); + } + } param_sanity($3); $1->opcode = Op_func_call; $1->func_body = NULL; @@ -1925,7 +1949,6 @@ static const struct token tokentab[] = { {"dcngettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext, 0}, {"default", Op_K_default, LEX_DEFAULT, GAWKX, 0, 0}, {"delete", Op_K_delete, LEX_DELETE, NOT_OLD, 0, 0}, -{"div", Op_builtin, LEX_BUILTIN, GAWKX|A(3), do_div, MPF(div)}, {"do", Op_K_do, LEX_DO, NOT_OLD|BREAK|CONTINUE, 0, 0}, {"else", Op_K_else, LEX_ELSE, 0, 0, 0}, {"eval", Op_symbol, LEX_EVAL, 0, 0, 0}, @@ -1946,6 +1969,7 @@ static const struct token tokentab[] = { {"include", Op_symbol, LEX_INCLUDE, GAWKX, 0, 0}, {"index", Op_builtin, LEX_BUILTIN, A(2), do_index, 0}, {"int", Op_builtin, LEX_BUILTIN, A(1), do_int, MPF(int)}, +{"intdiv", Op_builtin, LEX_BUILTIN, GAWKX|A(3), do_intdiv, MPF(intdiv)}, {"isarray", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_isarray, 0}, {"length", Op_builtin, LEX_LENGTH, A(0)|A(1), do_length, 0}, {"load", Op_symbol, LEX_LOAD, GAWKX, 0, 0}, @@ -1983,7 +2007,6 @@ static const struct token tokentab[] = { {"xor", Op_builtin, LEX_BUILTIN, GAWKX, do_xor, MPF(xor)}, }; -#if MBS_SUPPORT /* Variable containing the current shift state. */ static mbstate_t cur_mbstate; /* Ring buffer containing current characters. */ @@ -1995,10 +2018,6 @@ static int cur_ring_idx; /* This macro means that last nextc() return a singlebyte character or 1st byte of a multibyte character. */ #define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1) -#else /* MBS_SUPPORT */ -/* a dummy */ -#define nextc_is_1stbyte 1 -#endif /* MBS_SUPPORT */ /* getfname --- return name of a builtin function (for pretty printing) */ @@ -2287,11 +2306,9 @@ mk_program() cp = end_block; else cp = list_merge(begin_block, end_block); - /* - * We don't need to clear the comment variables - * since they're not used anymore after this - * function is called. - */ + if (program_comment != NULL) { + (void) list_prepend(cp, program_comment); + } if (comment != NULL) (void) list_append(cp, comment); (void) list_append(cp, ip_atexit); @@ -2339,6 +2356,10 @@ out: /* delete the Op_list, not needed */ tmp = cp->nexti; bcfree(cp); + /* these variables are not used again but zap them anyway. */ + comment = NULL; + function_comment = NULL; + program_comment = NULL; return tmp; #undef begin_block @@ -2365,7 +2386,7 @@ parse_program(INSTRUCTION **pcode) ip_newfile = ip_rec = ip_atexit = ip_beginfile = ip_endfile = NULL; else { ip_endfile = instruction(Op_no_op); - ip_beginfile = instruction(Op_no_op); + main_beginfile = ip_beginfile = instruction(Op_no_op); ip_rec = instruction(Op_get_record); /* target for `next', also ip_newfile */ ip_newfile = bcalloc(Op_newfile, 2, 0); /* target for `nextfile' */ ip_newfile->target_jmp = ip_end; @@ -2395,6 +2416,9 @@ parse_program(INSTRUCTION **pcode) if (ret == 0) /* avoid spurious warning if parser aborted with YYABORT */ check_funcs(); + if (do_posix && ! check_param_names()) + errcount++; + if (args_array == NULL) emalloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program"); else @@ -2900,8 +2924,6 @@ check_bad_char(int c) /* nextc --- get the next input character */ -#if MBS_SUPPORT - static int nextc(bool check_for_bad) { @@ -2972,43 +2994,40 @@ again: } } -#else /* MBS_SUPPORT */ - -int -nextc(bool check_for_bad) -{ - do { - if (lexeof) - return END_FILE; - if (lexptr && lexptr < lexend) { - if (check_for_bad) - check_bad_char(*lexptr); - return ((int) (unsigned char) *lexptr++); - } - } while (get_src_buf()); - return END_SRC; -} - -#endif /* MBS_SUPPORT */ - /* pushback --- push a character back on the input */ static inline void pushback(void) { -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 : cur_ring_idx - 1; -#endif (! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr); } +/* check_comment --- check for block comment */ -/* get_comment --- collect comment text */ +void +check_comment(void) +{ + if (comment != NULL) { + if (first_rule) { + program_comment = comment; + } else + block_comment = comment; + comment = NULL; + } + first_rule = false; +} + +/* + * get_comment --- collect comment text. + * Flag = EOL_COMMENT for end-of-line comments. + * Flag = FULL_COMMENT for self-contained comments. + */ int -get_comment(void) +get_comment(int flag) { int c; int sl; @@ -3020,6 +3039,12 @@ get_comment(void) while ((c = nextc(false)) != '\n' && c != END_FILE) { tokadd(c); } + if (flag == EOL_COMMENT) { + /* comment at end of line. */ + if (c == '\n') + tokadd(c); + break; + } if (c == '\n') { tokadd(c); sourceline++; @@ -3034,6 +3059,7 @@ get_comment(void) break; else if (c != '#') { pushback(); + sourceline--; break; } else tokadd(c); @@ -3043,6 +3069,7 @@ get_comment(void) comment = bcalloc(Op_comment, 1, sl); comment->source_file = source; comment->memory = make_str_node(tokstart, tok - tokstart, 0); + comment->memory->comment_type = flag; return c; } @@ -3094,7 +3121,7 @@ allow_newline(void) if (c == '#') { if (do_pretty_print && ! do_profile) { /* collect comment byte code iff doing pretty print but not profiling. */ - c = get_comment(); + c = get_comment(EOL_COMMENT); } else { while ((c = nextc(false)) != '\n' && c != END_FILE) continue; @@ -3170,7 +3197,7 @@ yylex(void) if (lasttok == LEX_EOF) /* error earlier in current source, must give up !! */ return 0; - c = nextc(true); + c = nextc(! want_regexp); if (c == END_SRC) return 0; if (c == END_FILE) @@ -3212,30 +3239,31 @@ yylex(void) want_regexp = false; tok = tokstart; for (;;) { - c = nextc(true); + c = nextc(false); if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) { case '[': /* one day check for `.' and `=' too */ - if (nextc(true) == ':' || in_brack == 0) + if (nextc(false) == ':' || in_brack == 0) in_brack++; pushback(); break; case ']': - if (tokstart[0] == '[' - && (tok == tokstart + 1 - || (tok == tokstart + 2 - && tokstart[1] == '^'))) + if (tok[-1] == '[' + || (tok[-2] == '[' && tok[-1] == '^')) /* do nothing */; else in_brack--; break; case '\\': - if ((c = nextc(true)) == END_FILE) { + if ((c = nextc(false)) == END_FILE) { pushback(); yyerror(_("unterminated regexp ends with `\\' at end of file")); goto end_regexp; /* kludge */ - } else if (c == '\n') { + } + if (c == '\r') /* allow MS-DOS files. bleah */ + c = nextc(true); + if (c == '\n') { sourceline++; continue; } else { @@ -3288,9 +3316,7 @@ retry: thisline = NULL; tok = tokstart; -#if MBS_SUPPORT if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) -#endif switch (c) { case END_SRC: return 0; @@ -3308,7 +3334,10 @@ retry: * Collect comment byte code iff doing pretty print * but not profiling. */ - c = get_comment(); + if (lasttok == NEWLINE || lasttok == 0) + c = get_comment(FULL_COMMENT); + else + c = get_comment(EOL_COMMENT); if (c == END_FILE) return lasttok = NEWLINE_EOF; @@ -3322,6 +3351,7 @@ retry: return lasttok = NEWLINE; case '@': + at_seen = true; return lasttok = '@'; case '\\': @@ -3345,7 +3375,7 @@ retry: _("use of `\\ #...' line continuation is not portable")); } if (do_pretty_print && ! do_profile) - c = get_comment(); + c = get_comment(EOL_COMMENT); else { while ((c = nextc(false)) != '\n') if (c == END_FILE) @@ -3447,7 +3477,7 @@ retry: return lasttok = '*'; case '/': - if (nextc(true) == '=') { + if (nextc(false) == '=') { pushback(); return lasttok = SLASH_BEFORE_EQUAL; } @@ -3582,6 +3612,8 @@ retry: if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) && c == '\\') { c = nextc(true); + if (c == '\r') /* allow MS-DOS files. bleah */ + c = nextc(true); if (c == '\n') { sourceline++; continue; @@ -4111,9 +4143,9 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) arg = subn->nexti; if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push) arg->nexti->opcode = Op_push_arg; /* argument may be array */ - } else if (r->builtin == do_div + } else if (r->builtin == do_intdiv #ifdef HAVE_MPFR - || r->builtin == MPF(div) + || r->builtin == MPF(intdiv) #endif ) { arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */ @@ -4465,7 +4497,7 @@ install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist) int pcount = 0; r = lookup(fname); - if (r != NULL || is_deferred_variable(fname)) { + if (r != NULL) { error_ln(fi->source_line, _("function name `%s' previously defined"), fname); return -1; } @@ -4658,50 +4690,6 @@ param_sanity(INSTRUCTION *arglist) } } -/* deferred variables --- those that are only defined if needed. */ - -/* - * Is there any reason to use a hash table for deferred variables? At the - * moment, there are only 1 to 3 such variables, so it may not be worth - * the overhead. If more modules start using this facility, it should - * probably be converted into a hash table. - */ - -static struct deferred_variable { - NODE *(*load_func)(void); - struct deferred_variable *next; - char name[1]; /* variable-length array */ -} *deferred_variables; - -/* register_deferred_variable --- add a var name and loading function to the list */ - -void -register_deferred_variable(const char *name, NODE *(*load_func)(void)) -{ - struct deferred_variable *dv; - size_t sl = strlen(name); - - emalloc(dv, struct deferred_variable *, sizeof(*dv)+sl, - "register_deferred_variable"); - dv->load_func = load_func; - dv->next = deferred_variables; - memcpy(dv->name, name, sl+1); - deferred_variables = dv; -} - -/* is_deferred_variable --- check if NAME is a deferred variable */ - -static bool -is_deferred_variable(const char *name) -{ - struct deferred_variable *dv; - for (dv = deferred_variables; dv != NULL; dv = dv->next) - if (strcmp(name, dv->name) == 0) - return true; - return false; -} - - /* variable --- make sure NAME is in the symbol table */ NODE * @@ -4713,47 +4701,17 @@ variable(int location, char *name, NODETYPE type) if (r->type == Node_func || r->type == Node_ext_func ) error_ln(location, _("function `%s' called with space between name and `(',\nor used as a variable or an array"), r->vname); - if (r == symbol_table) - symtab_used = true; } else { /* not found */ - struct deferred_variable *dv; - - for (dv = deferred_variables; true; dv = dv->next) { - if (dv == NULL) { - /* - * This is the only case in which we may not free the string. - */ - return install_symbol(name, type); - } - if (strcmp(name, dv->name) == 0) { - r = (*dv->load_func)(); - break; - } - } + return install_symbol(name, type); } efree(name); return r; } -/* process_deferred --- if the program uses SYMTAB, load deferred variables */ - -static void -process_deferred() -{ - struct deferred_variable *dv; - - if (! symtab_used) - return; - - for (dv = deferred_variables; dv != NULL; dv = dv->next) { - (void) dv->load_func(); - } -} - /* make_regnode --- make a regular expression node */ -static NODE * +NODE * make_regnode(int type, NODE *exp) { NODE *n; @@ -5200,7 +5158,11 @@ append_rule(INSTRUCTION *pattern, INSTRUCTION *action) (rp + 1)->lasti = action->lasti; (rp + 2)->first_line = pattern->source_line; (rp + 2)->last_line = lastline; - ip = list_prepend(action, rp); + if (block_comment != NULL) { + ip = list_prepend(list_prepend(action, block_comment), rp); + block_comment = NULL; + } else + ip = list_prepend(action, rp); } else { rp = bcalloc(Op_rule, 3, 0); @@ -5905,13 +5867,26 @@ lookup_builtin(const char *name) { int mid = check_special(name); - if (mid == -1 || tokentab[mid].class != LEX_BUILTIN) + if (mid == -1) + return NULL; + + switch (tokentab[mid].class) { + case LEX_BUILTIN: + case LEX_LENGTH: + break; + default: return NULL; + } + #ifdef HAVE_MPFR if (do_mpfr) return tokentab[mid].ptr2; #endif + /* And another special case... */ + if (tokentab[mid].value == Op_sub_builtin) + return (builtin_func_t) do_sub; + return tokentab[mid].ptr; } @@ -5921,11 +5896,20 @@ void install_builtins(void) { int i, j; + int flags_that_must_be_clear = DEBUG_USE; + + if (do_traditional) + flags_that_must_be_clear |= GAWKX; + + if (do_posix) + flags_that_must_be_clear |= NOT_POSIX; + j = sizeof(tokentab) / sizeof(tokentab[0]); for (i = 0; i < j; i++) { - if ( tokentab[i].class == LEX_BUILTIN - && (tokentab[i].flags & DEBUG_USE) == 0) { + if ( (tokentab[i].class == LEX_BUILTIN + || tokentab[i].class == LEX_LENGTH) + && (tokentab[i].flags & flags_that_must_be_clear) == 0) { (void) install_symbol(tokentab[i].operator, Node_builtin_func); } } |