diff options
Diffstat (limited to 'awkgram.y')
-rw-r--r-- | awkgram.y | 258 |
1 files changed, 197 insertions, 61 deletions
@@ -155,8 +155,6 @@ static inline INSTRUCTION *list_merge(INSTRUCTION *l1, INSTRUCTION *l2); extern double fmod(double x, double y); #define YYSTYPE INSTRUCTION * - -#define is_identchar(c) (isalnum(c) || (c) == '_') %} %token FUNC_CALL NAME REGEXP FILENAME @@ -1406,21 +1404,12 @@ simp_exp | LEX_GETLINE opt_variable input_redir { /* - * In BEGINFILE/ENDFILE, allow `getline var < file' + * In BEGINFILE/ENDFILE, allow `getline [var] < file' */ - if (rule == BEGINFILE || rule == ENDFILE) { - if ($2 != NULL && $3 != NULL) - ; /* all ok */ - else { - if ($2 != NULL) - error_ln($1->source_line, - _("`getline var' invalid inside `%s' rule"), ruletab[rule]); - else - error_ln($1->source_line, - _("`getline' invalid inside `%s' rule"), ruletab[rule]); - } - } + if ((rule == BEGINFILE || rule == ENDFILE) && $3 == NULL) + error_ln($1->source_line, + _("non-redirected `getline' invalid inside `%s' rule"), ruletab[rule]); if (do_lint && rule == END && $3 == NULL) lintwarn_ln($1->source_line, _("non-redirected `getline' undefined inside END action")); @@ -1815,6 +1804,7 @@ struct token { # define GAWKX 0x0400 /* gawk extension */ # define BREAK 0x0800 /* break allowed inside */ # define CONTINUE 0x1000 /* continue allowed inside */ +# define DEBUG_USE 0x2000 /* for use by developers */ NODE *(*ptr)(int); /* function that implements this keyword */ NODE *(*ptr2)(int); /* alternate arbitrary-precision function */ @@ -1853,7 +1843,7 @@ static const struct token tokentab[] = { {"END", Op_rule, LEX_END, 0, 0, 0}, {"ENDFILE", Op_rule, LEX_ENDFILE, GAWKX, 0, 0}, #ifdef ARRAYDEBUG -{"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_adump, 0}, +{"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|DEBUG_USE, do_adump, 0}, #endif {"and", Op_builtin, LEX_BUILTIN, GAWKX, do_and, MPF(and)}, {"asort", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asort, 0}, @@ -1870,6 +1860,7 @@ static const struct token tokentab[] = { {"dcngettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext, 0}, {"default", Op_K_default, LEX_DEFAULT, GAWKX, 0, 0}, {"delete", Op_K_delete, LEX_DELETE, NOT_OLD, 0, 0}, +{"div", Op_builtin, LEX_BUILTIN, GAWKX|A(3), do_div, MPF(div)}, {"do", Op_K_do, LEX_DO, NOT_OLD|BREAK|CONTINUE, 0, 0}, {"else", Op_K_else, LEX_ELSE, 0, 0, 0}, {"eval", Op_symbol, LEX_EVAL, 0, 0, 0}, @@ -1912,7 +1903,7 @@ static const struct token tokentab[] = { {"sqrt", Op_builtin, LEX_BUILTIN, A(1), do_sqrt, MPF(sqrt)}, {"srand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand, MPF(srand)}, #if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */ -{"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0), stopme, 0}, +{"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|DEBUG_USE, stopme, 0}, #endif {"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime, 0}, {"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum, MPF(strtonum)}, @@ -1954,7 +1945,7 @@ getfname(NODE *(*fptr)(int)) j = sizeof(tokentab) / sizeof(tokentab[0]); /* linear search, no other way to do it */ for (i = 0; i < j; i++) - if (tokentab[i].ptr == fptr) + if (tokentab[i].ptr == fptr || tokentab[i].ptr2 == fptr) return tokentab[i].operator; return NULL; @@ -2334,6 +2325,15 @@ parse_program(INSTRUCTION **pcode) return (ret || errcount); } +/* free_srcfile --- free a SRCFILE struct */ + +void +free_srcfile(SRCFILE *thisfile) +{ + efree(thisfile->src); + efree(thisfile); +} + /* do_add_srcfile --- add one item to srcfiles */ static SRCFILE * @@ -2792,12 +2792,40 @@ tokexpand() return tok; } +/* check_bad_char --- fatal if c isn't allowed in gawk source code */ + +/* + * The error message was inspired by someone who decided to put + * a physical \0 byte into the source code to see what would + * happen and then filed a bug report about it. Sigh. + */ + +static void +check_bad_char(int c) +{ + /* allow escapes. needed for autoconf. bleah. */ + switch (c) { + case '\a': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + return; + default: + break; + } + + if (iscntrl(c) && ! isspace(c)) + fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), c); +} + /* nextc --- get the next input character */ #if MBS_SUPPORT static int -nextc(void) +nextc(bool check_for_bad) { if (gawk_mb_cur_max > 1) { again: @@ -2848,14 +2876,19 @@ again: 0 : work_ring_idx + 1; cur_char_ring[work_ring_idx] = 0; } + if (check_for_bad) + check_bad_char(*lexptr); return (int) (unsigned char) *lexptr++; } else { do { if (lexeof) return END_FILE; - if (lexptr && lexptr < lexend) - return ((int) (unsigned char) *lexptr++); + if (lexptr && lexptr < lexend) { + if (check_for_bad) + check_bad_char(*lexptr); + return ((int) (unsigned char) *lexptr++); + } } while (get_src_buf()); return END_SRC; } @@ -2864,13 +2897,16 @@ again: #else /* MBS_SUPPORT */ int -nextc() +nextc(bool check_for_bad) { do { if (lexeof) return END_FILE; - if (lexptr && lexptr < lexend) + if (lexptr && lexptr < lexend) { + if (check_for_bad) + check_bad_char(*lexptr); return ((int) (unsigned char) *lexptr++); + } } while (get_src_buf()); return END_SRC; } @@ -2899,13 +2935,13 @@ allow_newline(void) int c; for (;;) { - c = nextc(); + c = nextc(true); if (c == END_FILE) { pushback(); break; } if (c == '#') { - while ((c = nextc()) != '\n' && c != END_FILE) + while ((c = nextc(false)) != '\n' && c != END_FILE) continue; if (c == END_FILE) { pushback(); @@ -2977,7 +3013,7 @@ yylex(void) if (lasttok == LEX_EOF) /* error earlier in current source, must give up !! */ return 0; - c = nextc(); + c = nextc(true); if (c == END_SRC) return 0; if (c == END_FILE) @@ -3019,12 +3055,12 @@ yylex(void) want_regexp = false; tok = tokstart; for (;;) { - c = nextc(); + c = nextc(true); if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) { case '[': /* one day check for `.' and `=' too */ - if (nextc() == ':' || in_brack == 0) + if (nextc(true) == ':' || in_brack == 0) in_brack++; pushback(); break; @@ -3038,7 +3074,7 @@ yylex(void) in_brack--; break; case '\\': - if ((c = nextc()) == END_FILE) { + if ((c = nextc(true)) == END_FILE) { pushback(); yyerror(_("unterminated regexp ends with `\\' at end of file")); goto end_regexp; /* kludge */ @@ -3058,7 +3094,7 @@ end_regexp: yylval = GET_INSTRUCTION(Op_token); yylval->lextok = estrdup(tokstart, tok - tokstart); if (do_lint) { - int peek = nextc(); + int peek = nextc(true); pushback(); if (peek == 'i' || peek == 's') { @@ -3088,7 +3124,7 @@ end_regexp: retry: /* skipping \r is a hack, but windows is just too pervasive. sigh. */ - while ((c = nextc()) == ' ' || c == '\t' || c == '\r') + while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r') continue; lexeme = lexptr ? lexptr - 1 : lexptr; @@ -3110,7 +3146,7 @@ retry: return lasttok = NEWLINE; case '#': /* it's a comment */ - while ((c = nextc()) != '\n') { + while ((c = nextc(false)) != '\n') { if (c == END_FILE) return lasttok = NEWLINE_EOF; } @@ -3130,7 +3166,7 @@ retry: */ if (! do_traditional) { /* strip trailing white-space and/or comment */ - while ((c = nextc()) == ' ' || c == '\t' || c == '\r') + while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r') continue; if (c == '#') { static bool warned = false; @@ -3140,16 +3176,16 @@ retry: lintwarn( _("use of `\\ #...' line continuation is not portable")); } - while ((c = nextc()) != '\n') + while ((c = nextc(false)) != '\n') if (c == END_FILE) break; } pushback(); } #endif /* RELAXED_CONTINUATION */ - c = nextc(); + c = nextc(true); if (c == '\r') /* allow MS-DOS files. bleah */ - c = nextc(); + c = nextc(true); if (c == '\n') { sourceline++; goto retry; @@ -3188,7 +3224,7 @@ retry: case '[': return lasttok = c; case ']': - c = nextc(); + c = nextc(true); pushback(); if (c == '[') { yylval = GET_INSTRUCTION(Op_sub_array); @@ -3200,7 +3236,7 @@ retry: return ']'; case '*': - if ((c = nextc()) == '=') { + if ((c = nextc(true)) == '=') { yylval = GET_INSTRUCTION(Op_assign_times); return lasttok = ASSIGNOP; } else if (do_posix) { @@ -3211,7 +3247,7 @@ retry: /* make ** and **= aliases for ^ and ^= */ static bool did_warn_op = false, did_warn_assgn = false; - if (nextc() == '=') { + if (nextc(true) == '=') { if (! did_warn_assgn) { did_warn_assgn = true; if (do_lint) @@ -3239,7 +3275,7 @@ retry: return lasttok = '*'; case '/': - if (nextc() == '=') { + if (nextc(true) == '=') { pushback(); return lasttok = SLASH_BEFORE_EQUAL; } @@ -3248,7 +3284,7 @@ retry: return lasttok = '/'; case '%': - if (nextc() == '=') { + if (nextc(true) == '=') { yylval = GET_INSTRUCTION(Op_assign_mod); return lasttok = ASSIGNOP; } @@ -3260,7 +3296,7 @@ retry: { static bool did_warn_op = false, did_warn_assgn = false; - if (nextc() == '=') { + if (nextc(true) == '=') { if (do_lint_old && ! did_warn_assgn) { did_warn_assgn = true; warning(_("operator `^=' is not supported in old awk")); @@ -3278,7 +3314,7 @@ retry: } case '+': - if ((c = nextc()) == '=') { + if ((c = nextc(true)) == '=') { yylval = GET_INSTRUCTION(Op_assign_plus); return lasttok = ASSIGNOP; } @@ -3291,7 +3327,7 @@ retry: return lasttok = '+'; case '!': - if ((c = nextc()) == '=') { + if ((c = nextc(true)) == '=') { yylval = GET_INSTRUCTION(Op_notequal); return lasttok = RELOP; } @@ -3304,7 +3340,7 @@ retry: return lasttok = '!'; case '<': - if (nextc() == '=') { + if (nextc(true) == '=') { yylval = GET_INSTRUCTION(Op_leq); return lasttok = RELOP; } @@ -3313,7 +3349,7 @@ retry: return lasttok = '<'; case '=': - if (nextc() == '=') { + if (nextc(true) == '=') { yylval = GET_INSTRUCTION(Op_equal); return lasttok = RELOP; } @@ -3322,7 +3358,7 @@ retry: return lasttok = ASSIGN; case '>': - if ((c = nextc()) == '=') { + if ((c = nextc(true)) == '=') { yylval = GET_INSTRUCTION(Op_geq); return lasttok = RELOP; } else if (c == '>') { @@ -3361,7 +3397,7 @@ retry: case '"': string: esc_seen = false; - while ((c = nextc()) != '"') { + while ((c = nextc(true)) != '"') { if (c == '\n') { pushback(); yyerror(_("unterminated string")); @@ -3369,7 +3405,7 @@ retry: } if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) && c == '\\') { - c = nextc(); + c = nextc(true); if (c == '\n') { sourceline++; continue; @@ -3403,7 +3439,7 @@ retry: return lasttok = YSTRING; case '-': - if ((c = nextc()) == '=') { + if ((c = nextc(true)) == '=') { yylval = GET_INSTRUCTION(Op_assign_minus); return lasttok = ASSIGNOP; } @@ -3416,7 +3452,7 @@ retry: return lasttok = '-'; case '.': - c = nextc(); + c = nextc(true); pushback(); if (! isdigit(c)) return lasttok = '.'; @@ -3444,7 +3480,7 @@ retry: if (do_traditional) goto done; if (tok == tokstart + 2) { - int peek = nextc(); + int peek = nextc(true); if (isxdigit(peek)) { inhex = true; @@ -3472,8 +3508,8 @@ retry: break; } seen_e = true; - if ((c = nextc()) == '-' || c == '+') { - int c2 = nextc(); + if ((c = nextc(true)) == '-' || c == '+') { + int c2 = nextc(true); if (isdigit(c2)) { tokadd(c); @@ -3520,7 +3556,7 @@ retry: } if (gotnumber) break; - c = nextc(); + c = nextc(true); } pushback(); @@ -3569,7 +3605,7 @@ retry: return lasttok = YNUMBER; case '&': - if ((c = nextc()) == '&') { + if ((c = nextc(true)) == '&') { yylval = GET_INSTRUCTION(Op_and); allow_newline(); return lasttok = LEX_AND; @@ -3579,7 +3615,7 @@ retry: return lasttok = '&'; case '|': - if ((c = nextc()) == '|') { + if ((c = nextc(true)) == '|') { yylval = GET_INSTRUCTION(Op_or); allow_newline(); return lasttok = LEX_OR; @@ -3600,7 +3636,7 @@ retry: } } - if (c != '_' && ! isalpha(c)) { + if (c != '_' && ! is_alpha(c)) { yyerror(_("invalid char '%c' in expression"), c); return lasttok = LEX_EOF; } @@ -3620,7 +3656,7 @@ retry: * occasions where the interactions are funny. */ if (! do_traditional && c == '_' && lasttok != '$') { - if ((c = nextc()) == '"') { + if ((c = nextc(true)) == '"') { intlstr = true; goto string; } @@ -3632,7 +3668,7 @@ retry: tok = tokstart; while (c != END_FILE && is_identchar(c)) { tokadd(c); - c = nextc(); + c = nextc(true); } tokadd('\0'); pushback(); @@ -3870,7 +3906,7 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) } #ifdef HAVE_MPFR - /* N.B.: There isn't any special processing for an alternate function below */ + /* N.B.: If necessary, add special processing for alternate builtin, below */ if (do_mpfr && tokentab[idx].ptr2) r->builtin = tokentab[idx].ptr2; else @@ -3899,6 +3935,15 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) arg = subn->nexti; if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push) arg->nexti->opcode = Op_push_arg; /* argument may be array */ + } else if (r->builtin == do_div +#ifdef HAVE_MPFR + || r->builtin == MPF(div) +#endif + ) { + arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */ + ip = arg->lasti; + if (ip->opcode == Op_push) + ip->opcode = Op_push_array; } else if (r->builtin == do_match) { static bool warned = false; @@ -5670,3 +5715,94 @@ one_line_close(int fd) } +/* lookup_builtin --- find a builtin function or return NULL */ + +builtin_func_t +lookup_builtin(const char *name) +{ + int mid = check_special(name); + + if (mid == -1 || tokentab[mid].class != LEX_BUILTIN) + return NULL; +#ifdef HAVE_MPFR + if (do_mpfr) + return tokentab[mid].ptr2; +#endif + + return tokentab[mid].ptr; +} + +/* install_builtins --- add built-in functions to FUNCTAB */ + +void +install_builtins(void) +{ + int i, j; + + j = sizeof(tokentab) / sizeof(tokentab[0]); + for (i = 0; i < j; i++) { + if ( tokentab[i].class == LEX_BUILTIN + && (tokentab[i].flags & DEBUG_USE) == 0) { + (void) install_symbol(tokentab[i].operator, Node_builtin_func); + } + } +} + +/* + * 9/2014: Gawk cannot use <ctype.h> isalpha or isalnum when + * parsing the program since that can let through non-English + * letters. So, we supply our own. !@#$%^&*()-ing locales! + */ + +/* is_alpha --- return true if c is an English letter */ + +/* + * The scene of the murder was grisly to look upon. When the inspector + * arrived, the sergeant turned to him and said, "Another programmer stabbed + * in the back. He never knew what happened." + * + * The inspector replied, "Looks like the MO of isalpha, and his even meaner + * big brother, isalnum. The Locale brothers." The sergeant merely + * shuddered in horror. + */ + +bool +is_alpha(int c) +{ +#ifdef I_DONT_KNOW_WHAT_IM_DOING + return isalpha(c); +#else /* ! I_DONT_KNOW_WHAT_IM_DOING */ + switch (c) { + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + return true; + } + return false; +#endif /* ! I_DONT_KNOW_WHAT_IM_DOING */ +} + +/* is_alnum --- return true for alphanumeric, English only letters */ + +bool +is_alnum(int c) +{ + /* digit test is good for EBCDIC too. so there. */ + return (is_alpha(c) || ('0' <= c && c <= '9')); +} + + +/* is_identchar --- return true if c can be in an identifier */ + +bool +is_identchar(int c) +{ + return (is_alnum(c) || c == '_'); +} |