diff options
Diffstat (limited to 'awkgram.c')
-rw-r--r-- | awkgram.c | 130 |
1 files changed, 88 insertions, 42 deletions
@@ -4199,6 +4199,7 @@ static const struct token tokentab[] = { {"dcngettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext, 0}, {"default", Op_K_default, LEX_DEFAULT, GAWKX, 0, 0}, {"delete", Op_K_delete, LEX_DELETE, NOT_OLD, 0, 0}, +{"div", Op_builtin, LEX_BUILTIN, GAWKX|A(3), do_div, MPF(div)}, {"do", Op_K_do, LEX_DO, NOT_OLD|BREAK|CONTINUE, 0, 0}, {"else", Op_K_else, LEX_ELSE, 0, 0, 0}, {"eval", Op_symbol, LEX_EVAL, 0, 0, 0}, @@ -5130,12 +5131,40 @@ tokexpand() return tok; } +/* check_bad_char --- fatal if c isn't allowed in gawk source code */ + +/* + * The error message was inspired by someone who decided to put + * a physical \0 byte into the source code to see what would + * happen and then filed a bug report about it. Sigh. + */ + +static void +check_bad_char(int c) +{ + /* allow escapes. needed for autoconf. bleah. */ + switch (c) { + case '\a': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + return; + default: + break; + } + + if (iscntrl(c) && ! isspace(c)) + fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), c); +} + /* nextc --- get the next input character */ #if MBS_SUPPORT static int -nextc(void) +nextc(bool check_for_bad) { if (gawk_mb_cur_max > 1) { again: @@ -5186,14 +5215,19 @@ again: 0 : work_ring_idx + 1; cur_char_ring[work_ring_idx] = 0; } + if (check_for_bad) + check_bad_char(*lexptr); return (int) (unsigned char) *lexptr++; } else { do { if (lexeof) return END_FILE; - if (lexptr && lexptr < lexend) - return ((int) (unsigned char) *lexptr++); + if (lexptr && lexptr < lexend) { + if (check_for_bad) + check_bad_char(*lexptr); + return ((int) (unsigned char) *lexptr++); + } } while (get_src_buf()); return END_SRC; } @@ -5202,13 +5236,16 @@ again: #else /* MBS_SUPPORT */ int -nextc() +nextc(bool check_for_bad) { do { if (lexeof) return END_FILE; - if (lexptr && lexptr < lexend) + if (lexptr && lexptr < lexend) { + if (check_for_bad) + check_bad_char(*lexptr); return ((int) (unsigned char) *lexptr++); + } } while (get_src_buf()); return END_SRC; } @@ -5237,13 +5274,13 @@ allow_newline(void) int c; for (;;) { - c = nextc(); + c = nextc(true); if (c == END_FILE) { pushback(); break; } if (c == '#') { - while ((c = nextc()) != '\n' && c != END_FILE) + while ((c = nextc(false)) != '\n' && c != END_FILE) continue; if (c == END_FILE) { pushback(); @@ -5315,7 +5352,7 @@ yylex(void) if (lasttok == LEX_EOF) /* error earlier in current source, must give up !! */ return 0; - c = nextc(); + c = nextc(true); if (c == END_SRC) return 0; if (c == END_FILE) @@ -5357,12 +5394,12 @@ yylex(void) want_regexp = false; tok = tokstart; for (;;) { - c = nextc(); + c = nextc(true); if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) { case '[': /* one day check for `.' and `=' too */ - if (nextc() == ':' || in_brack == 0) + if (nextc(true) == ':' || in_brack == 0) in_brack++; pushback(); break; @@ -5376,7 +5413,7 @@ yylex(void) in_brack--; break; case '\\': - if ((c = nextc()) == END_FILE) { + if ((c = nextc(true)) == END_FILE) { pushback(); yyerror(_("unterminated regexp ends with `\\' at end of file")); goto end_regexp; /* kludge */ @@ -5396,7 +5433,7 @@ end_regexp: yylval = GET_INSTRUCTION(Op_token); yylval->lextok = estrdup(tokstart, tok - tokstart); if (do_lint) { - int peek = nextc(); + int peek = nextc(true); pushback(); if (peek == 'i' || peek == 's') { @@ -5426,7 +5463,7 @@ end_regexp: retry: /* skipping \r is a hack, but windows is just too pervasive. sigh. */ - while ((c = nextc()) == ' ' || c == '\t' || c == '\r') + while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r') continue; lexeme = lexptr ? lexptr - 1 : lexptr; @@ -5448,7 +5485,7 @@ retry: return lasttok = NEWLINE; case '#': /* it's a comment */ - while ((c = nextc()) != '\n') { + while ((c = nextc(false)) != '\n') { if (c == END_FILE) return lasttok = NEWLINE_EOF; } @@ -5468,7 +5505,7 @@ retry: */ if (! do_traditional) { /* strip trailing white-space and/or comment */ - while ((c = nextc()) == ' ' || c == '\t' || c == '\r') + while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r') continue; if (c == '#') { static bool warned = false; @@ -5478,16 +5515,16 @@ retry: lintwarn( _("use of `\\ #...' line continuation is not portable")); } - while ((c = nextc()) != '\n') + while ((c = nextc(false)) != '\n') if (c == END_FILE) break; } pushback(); } #endif /* RELAXED_CONTINUATION */ - c = nextc(); + c = nextc(true); if (c == '\r') /* allow MS-DOS files. bleah */ - c = nextc(); + c = nextc(true); if (c == '\n') { sourceline++; goto retry; @@ -5526,7 +5563,7 @@ retry: case '[': return lasttok = c; case ']': - c = nextc(); + c = nextc(true); pushback(); if (c == '[') { yylval = GET_INSTRUCTION(Op_sub_array); @@ -5538,7 +5575,7 @@ retry: return ']'; case '*': - if ((c = nextc()) == '=') { + if ((c = nextc(true)) == '=') { yylval = GET_INSTRUCTION(Op_assign_times); return lasttok = ASSIGNOP; } else if (do_posix) { @@ -5549,7 +5586,7 @@ retry: /* make ** and **= aliases for ^ and ^= */ static bool did_warn_op = false, did_warn_assgn = false; - if (nextc() == '=') { + if (nextc(true) == '=') { if (! did_warn_assgn) { did_warn_assgn = true; if (do_lint) @@ -5577,7 +5614,7 @@ retry: return lasttok = '*'; case '/': - if (nextc() == '=') { + if (nextc(true) == '=') { pushback(); return lasttok = SLASH_BEFORE_EQUAL; } @@ -5586,7 +5623,7 @@ retry: return lasttok = '/'; case '%': - if (nextc() == '=') { + if (nextc(true) == '=') { yylval = GET_INSTRUCTION(Op_assign_mod); return lasttok = ASSIGNOP; } @@ -5598,7 +5635,7 @@ retry: { static bool did_warn_op = false, did_warn_assgn = false; - if (nextc() == '=') { + if (nextc(true) == '=') { if (do_lint_old && ! did_warn_assgn) { did_warn_assgn = true; warning(_("operator `^=' is not supported in old awk")); @@ -5616,7 +5653,7 @@ retry: } case '+': - if ((c = nextc()) == '=') { + if ((c = nextc(true)) == '=') { yylval = GET_INSTRUCTION(Op_assign_plus); return lasttok = ASSIGNOP; } @@ -5629,7 +5666,7 @@ retry: return lasttok = '+'; case '!': - if ((c = nextc()) == '=') { + if ((c = nextc(true)) == '=') { yylval = GET_INSTRUCTION(Op_notequal); return lasttok = RELOP; } @@ -5642,7 +5679,7 @@ retry: return lasttok = '!'; case '<': - if (nextc() == '=') { + if (nextc(true) == '=') { yylval = GET_INSTRUCTION(Op_leq); return lasttok = RELOP; } @@ -5651,7 +5688,7 @@ retry: return lasttok = '<'; case '=': - if (nextc() == '=') { + if (nextc(true) == '=') { yylval = GET_INSTRUCTION(Op_equal); return lasttok = RELOP; } @@ -5660,7 +5697,7 @@ retry: return lasttok = ASSIGN; case '>': - if ((c = nextc()) == '=') { + if ((c = nextc(true)) == '=') { yylval = GET_INSTRUCTION(Op_geq); return lasttok = RELOP; } else if (c == '>') { @@ -5699,7 +5736,7 @@ retry: case '"': string: esc_seen = false; - while ((c = nextc()) != '"') { + while ((c = nextc(true)) != '"') { if (c == '\n') { pushback(); yyerror(_("unterminated string")); @@ -5707,7 +5744,7 @@ retry: } if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) && c == '\\') { - c = nextc(); + c = nextc(true); if (c == '\n') { sourceline++; continue; @@ -5741,7 +5778,7 @@ retry: return lasttok = YSTRING; case '-': - if ((c = nextc()) == '=') { + if ((c = nextc(true)) == '=') { yylval = GET_INSTRUCTION(Op_assign_minus); return lasttok = ASSIGNOP; } @@ -5754,7 +5791,7 @@ retry: return lasttok = '-'; case '.': - c = nextc(); + c = nextc(true); pushback(); if (! isdigit(c)) return lasttok = '.'; @@ -5782,7 +5819,7 @@ retry: if (do_traditional) goto done; if (tok == tokstart + 2) { - int peek = nextc(); + int peek = nextc(true); if (isxdigit(peek)) { inhex = true; @@ -5810,8 +5847,8 @@ retry: break; } seen_e = true; - if ((c = nextc()) == '-' || c == '+') { - int c2 = nextc(); + if ((c = nextc(true)) == '-' || c == '+') { + int c2 = nextc(true); if (isdigit(c2)) { tokadd(c); @@ -5858,7 +5895,7 @@ retry: } if (gotnumber) break; - c = nextc(); + c = nextc(true); } pushback(); @@ -5907,7 +5944,7 @@ retry: return lasttok = YNUMBER; case '&': - if ((c = nextc()) == '&') { + if ((c = nextc(true)) == '&') { yylval = GET_INSTRUCTION(Op_and); allow_newline(); return lasttok = LEX_AND; @@ -5917,7 +5954,7 @@ retry: return lasttok = '&'; case '|': - if ((c = nextc()) == '|') { + if ((c = nextc(true)) == '|') { yylval = GET_INSTRUCTION(Op_or); allow_newline(); return lasttok = LEX_OR; @@ -5958,7 +5995,7 @@ retry: * occasions where the interactions are funny. */ if (! do_traditional && c == '_' && lasttok != '$') { - if ((c = nextc()) == '"') { + if ((c = nextc(true)) == '"') { intlstr = true; goto string; } @@ -5970,7 +6007,7 @@ retry: tok = tokstart; while (c != END_FILE && is_identchar(c)) { tokadd(c); - c = nextc(); + c = nextc(true); } tokadd('\0'); pushback(); @@ -6208,7 +6245,7 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) } #ifdef HAVE_MPFR - /* N.B.: There isn't any special processing for an alternate function below */ + /* N.B.: If necessary, add special processing for alternate builtin, below */ if (do_mpfr && tokentab[idx].ptr2) r->builtin = tokentab[idx].ptr2; else @@ -6237,6 +6274,15 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) arg = subn->nexti; if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push) arg->nexti->opcode = Op_push_arg; /* argument may be array */ + } else if (r->builtin == do_div +#ifdef HAVE_MPFR + || r->builtin == MPF(div) +#endif + ) { + arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */ + ip = arg->lasti; + if (ip->opcode == Op_push) + ip->opcode = Op_push_array; } else if (r->builtin == do_match) { static bool warned = false; |