aboutsummaryrefslogtreecommitdiffstats
path: root/awkgram.y
diff options
context:
space:
mode:
Diffstat (limited to 'awkgram.y')
-rw-r--r--awkgram.y274
1 files changed, 129 insertions, 145 deletions
diff --git a/awkgram.y b/awkgram.y
index 64ed3c56..d429c9cd 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991-2014 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991-2015 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Programming Language.
@@ -57,7 +57,6 @@ static int include_source(INSTRUCTION *file);
static int load_library(INSTRUCTION *file);
static void next_sourcefile(void);
static char *tokexpand(void);
-static bool is_deferred_variable(const char *name);
#define instruction(t) bcalloc(t, 1, 0)
@@ -74,13 +73,10 @@ static INSTRUCTION *mk_binary(INSTRUCTION *s1, INSTRUCTION *s2, INSTRUCTION *op)
static INSTRUCTION *mk_boolean(INSTRUCTION *left, INSTRUCTION *right, INSTRUCTION *op);
static INSTRUCTION *mk_assignment(INSTRUCTION *lhs, INSTRUCTION *rhs, INSTRUCTION *op);
static INSTRUCTION *mk_getline(INSTRUCTION *op, INSTRUCTION *opt_var, INSTRUCTION *redir, int redirtype);
-static NODE *make_regnode(int type, NODE *exp);
static int count_expressions(INSTRUCTION **list, bool isarg);
static INSTRUCTION *optimize_assignment(INSTRUCTION *exp);
static void add_lint(INSTRUCTION *list, LINTTYPE linttype);
-static void process_deferred();
-
enum defref { FUNC_DEFINE, FUNC_USE, FUNC_EXT };
static void func_use(const char *name, enum defref how);
static void check_funcs(void);
@@ -88,11 +84,12 @@ static void check_funcs(void);
static ssize_t read_one_line(int fd, void *buffer, size_t count);
static int one_line_close(int fd);
static void split_comment(void);
+static void check_comment(void);
+static bool at_seen = false;
static bool want_source = false;
static bool want_regexp = false; /* lexical scanning kludge */
static char *in_function; /* parsing kludge */
-static bool symtab_used = false; /* program used SYMTAB */
static int rule = 0;
const char *const ruletab[] = {
@@ -146,12 +143,15 @@ static INSTRUCTION *ip_atexit = NULL;
static INSTRUCTION *ip_end;
static INSTRUCTION *ip_endfile;
static INSTRUCTION *ip_beginfile;
+INSTRUCTION *main_beginfile;
static INSTRUCTION *comment = NULL;
static INSTRUCTION *program_comment = NULL;
static INSTRUCTION *function_comment = NULL;
+static INSTRUCTION *block_comment = NULL;
static bool func_first = true;
+static bool first_rule = true;
static inline INSTRUCTION *list_create(INSTRUCTION *x);
static inline INSTRUCTION *list_append(INSTRUCTION *l, INSTRUCTION *x);
@@ -213,8 +213,6 @@ program
| program LEX_EOF
{
next_sourcefile();
- if (sourcefile == srcfiles)
- process_deferred();
}
| program error
{
@@ -231,6 +229,7 @@ rule
: pattern action
{
(void) append_rule($1, $2);
+ first_rule = false;
}
| pattern statement_term
{
@@ -252,11 +251,13 @@ rule
| '@' LEX_INCLUDE source statement_term
{
want_source = false;
+ at_seen = false;
yyerrok;
}
| '@' LEX_LOAD library statement_term
{
want_source = false;
+ at_seen = false;
yyerrok;
}
;
@@ -334,7 +335,11 @@ pattern
($1->nexti + 1)->condpair_left = $1->lasti;
($1->nexti + 1)->condpair_right = $4->lasti;
}
- $$ = list_append(list_merge($1, $4), tp);
+ if (comment != NULL) {
+ $$ = list_append(list_merge(list_prepend($1, comment), $4), tp);
+ comment = NULL;
+ } else
+ $$ = list_append(list_merge($1, $4), tp);
rule = Rule;
}
| LEX_BEGIN
@@ -348,6 +353,7 @@ pattern
$1->in_rule = rule = BEGIN;
$1->source_file = source;
+ check_comment();
$$ = $1;
}
| LEX_END
@@ -361,6 +367,7 @@ pattern
$1->in_rule = rule = END;
$1->source_file = source;
+ check_comment();
$$ = $1;
}
| LEX_BEGINFILE
@@ -368,6 +375,7 @@ pattern
func_first = false;
$1->in_rule = rule = BEGINFILE;
$1->source_file = source;
+ check_comment();
$$ = $1;
}
| LEX_ENDFILE
@@ -375,6 +383,7 @@ pattern
func_first = false;
$1->in_rule = rule = ENDFILE;
$1->source_file = source;
+ check_comment();
$$ = $1;
}
;
@@ -403,7 +412,10 @@ func_name
YYABORT;
}
| '@' LEX_EVAL
- { $$ = $2; }
+ {
+ $$ = $2;
+ at_seen = false;
+ }
;
lex_builtin
@@ -1684,12 +1696,24 @@ func_call
*/
$$ = list_prepend($2, t);
+ at_seen = false;
}
;
direct_func_call
: FUNC_CALL '(' opt_expression_list r_paren
{
+ NODE *n;
+
+ if (! at_seen) {
+ n = lookup($1->func_name);
+ if (n != NULL && n->type != Node_func
+ && n->type != Node_ext_func && n->type != Node_old_ext_func) {
+ error_ln($1->source_line,
+ _("attempt to use non-function `%s' in function call"),
+ $1->func_name);
+ }
+ }
param_sanity($3);
$1->opcode = Op_func_call;
$1->func_body = NULL;
@@ -1925,7 +1949,6 @@ static const struct token tokentab[] = {
{"dcngettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext, 0},
{"default", Op_K_default, LEX_DEFAULT, GAWKX, 0, 0},
{"delete", Op_K_delete, LEX_DELETE, NOT_OLD, 0, 0},
-{"div", Op_builtin, LEX_BUILTIN, GAWKX|A(3), do_div, MPF(div)},
{"do", Op_K_do, LEX_DO, NOT_OLD|BREAK|CONTINUE, 0, 0},
{"else", Op_K_else, LEX_ELSE, 0, 0, 0},
{"eval", Op_symbol, LEX_EVAL, 0, 0, 0},
@@ -1946,6 +1969,7 @@ static const struct token tokentab[] = {
{"include", Op_symbol, LEX_INCLUDE, GAWKX, 0, 0},
{"index", Op_builtin, LEX_BUILTIN, A(2), do_index, 0},
{"int", Op_builtin, LEX_BUILTIN, A(1), do_int, MPF(int)},
+{"intdiv", Op_builtin, LEX_BUILTIN, GAWKX|A(3), do_intdiv, MPF(intdiv)},
{"isarray", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_isarray, 0},
{"length", Op_builtin, LEX_LENGTH, A(0)|A(1), do_length, 0},
{"load", Op_symbol, LEX_LOAD, GAWKX, 0, 0},
@@ -1983,7 +2007,6 @@ static const struct token tokentab[] = {
{"xor", Op_builtin, LEX_BUILTIN, GAWKX, do_xor, MPF(xor)},
};
-#if MBS_SUPPORT
/* Variable containing the current shift state. */
static mbstate_t cur_mbstate;
/* Ring buffer containing current characters. */
@@ -1995,10 +2018,6 @@ static int cur_ring_idx;
/* This macro means that last nextc() return a singlebyte character
or 1st byte of a multibyte character. */
#define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
-#else /* MBS_SUPPORT */
-/* a dummy */
-#define nextc_is_1stbyte 1
-#endif /* MBS_SUPPORT */
/* getfname --- return name of a builtin function (for pretty printing) */
@@ -2287,11 +2306,9 @@ mk_program()
cp = end_block;
else
cp = list_merge(begin_block, end_block);
- /*
- * We don't need to clear the comment variables
- * since they're not used anymore after this
- * function is called.
- */
+ if (program_comment != NULL) {
+ (void) list_prepend(cp, program_comment);
+ }
if (comment != NULL)
(void) list_append(cp, comment);
(void) list_append(cp, ip_atexit);
@@ -2339,6 +2356,10 @@ out:
/* delete the Op_list, not needed */
tmp = cp->nexti;
bcfree(cp);
+ /* these variables are not used again but zap them anyway. */
+ comment = NULL;
+ function_comment = NULL;
+ program_comment = NULL;
return tmp;
#undef begin_block
@@ -2365,7 +2386,7 @@ parse_program(INSTRUCTION **pcode)
ip_newfile = ip_rec = ip_atexit = ip_beginfile = ip_endfile = NULL;
else {
ip_endfile = instruction(Op_no_op);
- ip_beginfile = instruction(Op_no_op);
+ main_beginfile = ip_beginfile = instruction(Op_no_op);
ip_rec = instruction(Op_get_record); /* target for `next', also ip_newfile */
ip_newfile = bcalloc(Op_newfile, 2, 0); /* target for `nextfile' */
ip_newfile->target_jmp = ip_end;
@@ -2395,6 +2416,9 @@ parse_program(INSTRUCTION **pcode)
if (ret == 0) /* avoid spurious warning if parser aborted with YYABORT */
check_funcs();
+ if (do_posix && ! check_param_names())
+ errcount++;
+
if (args_array == NULL)
emalloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program");
else
@@ -2900,8 +2924,6 @@ check_bad_char(int c)
/* nextc --- get the next input character */
-#if MBS_SUPPORT
-
static int
nextc(bool check_for_bad)
{
@@ -2972,43 +2994,40 @@ again:
}
}
-#else /* MBS_SUPPORT */
-
-int
-nextc(bool check_for_bad)
-{
- do {
- if (lexeof)
- return END_FILE;
- if (lexptr && lexptr < lexend) {
- if (check_for_bad)
- check_bad_char(*lexptr);
- return ((int) (unsigned char) *lexptr++);
- }
- } while (get_src_buf());
- return END_SRC;
-}
-
-#endif /* MBS_SUPPORT */
-
/* pushback --- push a character back on the input */
static inline void
pushback(void)
{
-#if MBS_SUPPORT
if (gawk_mb_cur_max > 1)
cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
cur_ring_idx - 1;
-#endif
(! lexeof && lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
}
+/* check_comment --- check for block comment */
-/* get_comment --- collect comment text */
+void
+check_comment(void)
+{
+ if (comment != NULL) {
+ if (first_rule) {
+ program_comment = comment;
+ } else
+ block_comment = comment;
+ comment = NULL;
+ }
+ first_rule = false;
+}
+
+/*
+ * get_comment --- collect comment text.
+ * Flag = EOL_COMMENT for end-of-line comments.
+ * Flag = FULL_COMMENT for self-contained comments.
+ */
int
-get_comment(void)
+get_comment(int flag)
{
int c;
int sl;
@@ -3020,6 +3039,12 @@ get_comment(void)
while ((c = nextc(false)) != '\n' && c != END_FILE) {
tokadd(c);
}
+ if (flag == EOL_COMMENT) {
+ /* comment at end of line. */
+ if (c == '\n')
+ tokadd(c);
+ break;
+ }
if (c == '\n') {
tokadd(c);
sourceline++;
@@ -3034,6 +3059,7 @@ get_comment(void)
break;
else if (c != '#') {
pushback();
+ sourceline--;
break;
} else
tokadd(c);
@@ -3043,6 +3069,7 @@ get_comment(void)
comment = bcalloc(Op_comment, 1, sl);
comment->source_file = source;
comment->memory = make_str_node(tokstart, tok - tokstart, 0);
+ comment->memory->comment_type = flag;
return c;
}
@@ -3094,7 +3121,7 @@ allow_newline(void)
if (c == '#') {
if (do_pretty_print && ! do_profile) {
/* collect comment byte code iff doing pretty print but not profiling. */
- c = get_comment();
+ c = get_comment(EOL_COMMENT);
} else {
while ((c = nextc(false)) != '\n' && c != END_FILE)
continue;
@@ -3170,7 +3197,7 @@ yylex(void)
if (lasttok == LEX_EOF) /* error earlier in current source, must give up !! */
return 0;
- c = nextc(true);
+ c = nextc(! want_regexp);
if (c == END_SRC)
return 0;
if (c == END_FILE)
@@ -3212,30 +3239,31 @@ yylex(void)
want_regexp = false;
tok = tokstart;
for (;;) {
- c = nextc(true);
+ c = nextc(false);
if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
case '[':
/* one day check for `.' and `=' too */
- if (nextc(true) == ':' || in_brack == 0)
+ if (nextc(false) == ':' || in_brack == 0)
in_brack++;
pushback();
break;
case ']':
- if (tokstart[0] == '['
- && (tok == tokstart + 1
- || (tok == tokstart + 2
- && tokstart[1] == '^')))
+ if (tok[-1] == '['
+ || (tok[-2] == '[' && tok[-1] == '^'))
/* do nothing */;
else
in_brack--;
break;
case '\\':
- if ((c = nextc(true)) == END_FILE) {
+ if ((c = nextc(false)) == END_FILE) {
pushback();
yyerror(_("unterminated regexp ends with `\\' at end of file"));
goto end_regexp; /* kludge */
- } else if (c == '\n') {
+ }
+ if (c == '\r') /* allow MS-DOS files. bleah */
+ c = nextc(true);
+ if (c == '\n') {
sourceline++;
continue;
} else {
@@ -3288,9 +3316,7 @@ retry:
thisline = NULL;
tok = tokstart;
-#if MBS_SUPPORT
if (gawk_mb_cur_max == 1 || nextc_is_1stbyte)
-#endif
switch (c) {
case END_SRC:
return 0;
@@ -3308,7 +3334,10 @@ retry:
* Collect comment byte code iff doing pretty print
* but not profiling.
*/
- c = get_comment();
+ if (lasttok == NEWLINE || lasttok == 0)
+ c = get_comment(FULL_COMMENT);
+ else
+ c = get_comment(EOL_COMMENT);
if (c == END_FILE)
return lasttok = NEWLINE_EOF;
@@ -3322,6 +3351,7 @@ retry:
return lasttok = NEWLINE;
case '@':
+ at_seen = true;
return lasttok = '@';
case '\\':
@@ -3345,7 +3375,7 @@ retry:
_("use of `\\ #...' line continuation is not portable"));
}
if (do_pretty_print && ! do_profile)
- c = get_comment();
+ c = get_comment(EOL_COMMENT);
else {
while ((c = nextc(false)) != '\n')
if (c == END_FILE)
@@ -3447,7 +3477,7 @@ retry:
return lasttok = '*';
case '/':
- if (nextc(true) == '=') {
+ if (nextc(false) == '=') {
pushback();
return lasttok = SLASH_BEFORE_EQUAL;
}
@@ -3582,6 +3612,8 @@ retry:
if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
c == '\\') {
c = nextc(true);
+ if (c == '\r') /* allow MS-DOS files. bleah */
+ c = nextc(true);
if (c == '\n') {
sourceline++;
continue;
@@ -4111,9 +4143,9 @@ snode(INSTRUCTION *subn, INSTRUCTION *r)
arg = subn->nexti;
if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
arg->nexti->opcode = Op_push_arg; /* argument may be array */
- } else if (r->builtin == do_div
+ } else if (r->builtin == do_intdiv
#ifdef HAVE_MPFR
- || r->builtin == MPF(div)
+ || r->builtin == MPF(intdiv)
#endif
) {
arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */
@@ -4465,7 +4497,7 @@ install_function(char *fname, INSTRUCTION *fi, INSTRUCTION *plist)
int pcount = 0;
r = lookup(fname);
- if (r != NULL || is_deferred_variable(fname)) {
+ if (r != NULL) {
error_ln(fi->source_line, _("function name `%s' previously defined"), fname);
return -1;
}
@@ -4658,50 +4690,6 @@ param_sanity(INSTRUCTION *arglist)
}
}
-/* deferred variables --- those that are only defined if needed. */
-
-/*
- * Is there any reason to use a hash table for deferred variables? At the
- * moment, there are only 1 to 3 such variables, so it may not be worth
- * the overhead. If more modules start using this facility, it should
- * probably be converted into a hash table.
- */
-
-static struct deferred_variable {
- NODE *(*load_func)(void);
- struct deferred_variable *next;
- char name[1]; /* variable-length array */
-} *deferred_variables;
-
-/* register_deferred_variable --- add a var name and loading function to the list */
-
-void
-register_deferred_variable(const char *name, NODE *(*load_func)(void))
-{
- struct deferred_variable *dv;
- size_t sl = strlen(name);
-
- emalloc(dv, struct deferred_variable *, sizeof(*dv)+sl,
- "register_deferred_variable");
- dv->load_func = load_func;
- dv->next = deferred_variables;
- memcpy(dv->name, name, sl+1);
- deferred_variables = dv;
-}
-
-/* is_deferred_variable --- check if NAME is a deferred variable */
-
-static bool
-is_deferred_variable(const char *name)
-{
- struct deferred_variable *dv;
- for (dv = deferred_variables; dv != NULL; dv = dv->next)
- if (strcmp(name, dv->name) == 0)
- return true;
- return false;
-}
-
-
/* variable --- make sure NAME is in the symbol table */
NODE *
@@ -4713,47 +4701,17 @@ variable(int location, char *name, NODETYPE type)
if (r->type == Node_func || r->type == Node_ext_func )
error_ln(location, _("function `%s' called with space between name and `(',\nor used as a variable or an array"),
r->vname);
- if (r == symbol_table)
- symtab_used = true;
} else {
/* not found */
- struct deferred_variable *dv;
-
- for (dv = deferred_variables; true; dv = dv->next) {
- if (dv == NULL) {
- /*
- * This is the only case in which we may not free the string.
- */
- return install_symbol(name, type);
- }
- if (strcmp(name, dv->name) == 0) {
- r = (*dv->load_func)();
- break;
- }
- }
+ return install_symbol(name, type);
}
efree(name);
return r;
}
-/* process_deferred --- if the program uses SYMTAB, load deferred variables */
-
-static void
-process_deferred()
-{
- struct deferred_variable *dv;
-
- if (! symtab_used)
- return;
-
- for (dv = deferred_variables; dv != NULL; dv = dv->next) {
- (void) dv->load_func();
- }
-}
-
/* make_regnode --- make a regular expression node */
-static NODE *
+NODE *
make_regnode(int type, NODE *exp)
{
NODE *n;
@@ -5200,7 +5158,11 @@ append_rule(INSTRUCTION *pattern, INSTRUCTION *action)
(rp + 1)->lasti = action->lasti;
(rp + 2)->first_line = pattern->source_line;
(rp + 2)->last_line = lastline;
- ip = list_prepend(action, rp);
+ if (block_comment != NULL) {
+ ip = list_prepend(list_prepend(action, block_comment), rp);
+ block_comment = NULL;
+ } else
+ ip = list_prepend(action, rp);
} else {
rp = bcalloc(Op_rule, 3, 0);
@@ -5905,13 +5867,26 @@ lookup_builtin(const char *name)
{
int mid = check_special(name);
- if (mid == -1 || tokentab[mid].class != LEX_BUILTIN)
+ if (mid == -1)
+ return NULL;
+
+ switch (tokentab[mid].class) {
+ case LEX_BUILTIN:
+ case LEX_LENGTH:
+ break;
+ default:
return NULL;
+ }
+
#ifdef HAVE_MPFR
if (do_mpfr)
return tokentab[mid].ptr2;
#endif
+ /* And another special case... */
+ if (tokentab[mid].value == Op_sub_builtin)
+ return (builtin_func_t) do_sub;
+
return tokentab[mid].ptr;
}
@@ -5921,11 +5896,20 @@ void
install_builtins(void)
{
int i, j;
+ int flags_that_must_be_clear = DEBUG_USE;
+
+ if (do_traditional)
+ flags_that_must_be_clear |= GAWKX;
+
+ if (do_posix)
+ flags_that_must_be_clear |= NOT_POSIX;
+
j = sizeof(tokentab) / sizeof(tokentab[0]);
for (i = 0; i < j; i++) {
- if ( tokentab[i].class == LEX_BUILTIN
- && (tokentab[i].flags & DEBUG_USE) == 0) {
+ if ( (tokentab[i].class == LEX_BUILTIN
+ || tokentab[i].class == LEX_LENGTH)
+ && (tokentab[i].flags & flags_that_must_be_clear) == 0) {
(void) install_symbol(tokentab[i].operator, Node_builtin_func);
}
}