aboutsummaryrefslogtreecommitdiffstats
path: root/awk.y
diff options
context:
space:
mode:
Diffstat (limited to 'awk.y')
-rw-r--r--awk.y639
1 files changed, 229 insertions, 410 deletions
diff --git a/awk.y b/awk.y
index 854bbec5..32093e7f 100644
--- a/awk.y
+++ b/awk.y
@@ -1,176 +1,5 @@
/*
- * gawk -- GNU version of awk
- * awk.y --- yacc/bison parser for awk
- *
- * $Log: awk.y,v $
- * Revision 1.35 89/03/31 13:24:41 david
- * GNU license; MSDOS support; YYDEBUG inside #ifdef DEBUG
- *
- * Revision 1.34 89/03/30 20:55:55 david
- * avoid constructing lists in the case of one instance of a rule, statement
- * or BEGIN or END clause
- *
- * Revision 1.33 89/03/29 21:53:26 david
- * wierd: this stuff worked just fine with cc, but I had to add a lot
- * of $$ = $1 lines for it to work with gcc -- I thought that $$ = $1
- * was the default action
- *
- * Revision 1.32 89/03/29 14:16:08 david
- * grammar fix
- * delinting
- * some code movement -- devopen to awk7.c, variable() to here
- * change interface to devopen()
- *
- * Revision 1.31 89/03/24 21:08:13 david
- * STREQN takes care of extra test
- *
- * Revision 1.30 89/03/24 15:52:15 david
- * add getline production to rexp
- * merge HASHNODE with NODE
- *
- * Revision 1.29 89/03/21 11:57:49 david
- * substantial cleanup and code movement from awk1.c
- * this and previous two changes represent a major reworking of the grammar
- * to fix a number of bugs; two general problems were in I/O redirection
- * specifications and in the handling of whitespace -- the general strategies
- * in fixing these problems were to define some more specific grammatical
- * elements (e.g. simp_exp and rexp) and use these in particular places;
- * also got rid of want_concat and want_redirect kludges
- *
- * Revision 1.28 89/03/15 21:58:01 david
- * more grammar changes (explanation to come) plus changes from Arnold:
- * new case stuff added and old removed
- * tolower and toupper added
- * fix vararg stuff
- * add new escape sequences
- * fix bug in reporting unterminated regexps
- * fix to allow -f -
- * /dev/fd/N etc special files added
- *
- * Revision 1.27 89/03/02 21:10:09 david
- * intermediate step in major revision -- description later
- *
- * Revision 1.26 89/01/18 20:39:58 david
- * allow regexp && regexp as pattern and get rid of remaining reduce/reduce conflicts
- *
- * Revision 1.25 89/01/04 21:53:21 david
- * purge obstack remnants
- *
- * Revision 1.24 88/12/15 12:52:58 david
- * changes from Jay to get rid of some reduce/reduce conflicts - some remain
- *
- * Revision 1.23 88/12/07 19:59:25 david
- * changes for incorporating source filename in error messages
- *
- * Revision 1.22 88/11/23 21:37:24 david
- * Arnold: refinements of AWKPATH code
- *
- * Revision 1.21 88/11/22 13:46:45 david
- * Arnold: changes for case-insensitive matching
- *
- * Revision 1.20 88/11/15 10:13:37 david
- * Arnold: allow multiple -f options and search in directories for awk libraries,
- * directories specified by AWKPATH env. variable; cleanupo of comments and
- * #includes
- *
- * Revision 1.19 88/11/14 21:51:30 david
- * Arnold: added error message for BEGIN or END without any action at all;
- * unlink temporary source file right after creation so it goes away on bomb
- *
- * Revision 1.18 88/10/19 22:00:56 david
- * generalize (and correct) what pattern can be in pattern {action}; this
- * introduces quite a few new conflicts that should be checked thoroughly
- * at some point, but they don't seem to do any harm at first glance
- * replace malloc with emalloc
- *
- * Revision 1.17 88/10/17 19:52:01 david
- * Arnold: cleanup, purge FAST
- *
- * Revision 1.16 88/10/13 22:02:16 david
- * cleanup of yyerror and other error messages
- *
- * Revision 1.15 88/10/06 23:24:57 david
- * accept var space ++var
- * accept underscore as first character of a variable name
- *
- * Revision 1.14 88/06/13 18:01:46 david
- * delete \a (change from Arnold)
- *
- * Revision 1.13 88/06/08 00:29:42 david
- * better attempt at keeping track of line numbers
- * change grammar to properly handle newlines after && or ||
- *
- * Revision 1.12 88/06/07 23:39:02 david
- * little delint
- *
- * Revision 1.11 88/06/05 22:17:40 david
- * make_name() becomes make_param() (again!)
- * func_level goes away, param_counter makes entrance
- *
- * Revision 1.10 88/05/30 09:49:02 david
- * obstack_free was being called at end of function definition, freeing
- * memory that might be part of global variables referenced only inside
- * functions; commented out for now, will have to selectively free later.
- * cleanup: regexp now returns a NODE *
- *
- * Revision 1.9 88/05/27 11:04:53 david
- * added print[f] '(' ... ')' (optional parentheses)
- * for some reason want_redirect wasn't getting set for PRINT, so I set it in
- * yylex()
- *
- * Revision 1.8 88/05/26 22:52:14 david
- * fixed cmd | getline
- * added compound patterns (they got lost somewhere along the line)
- * fixed error message in yylex()
- * added null statement
- *
- * Revision 1.7 88/05/13 22:05:29 david
- * moved BEGIN and END block merging here
- * BEGIN, END and function defs. are no longer incorporated into main parse tree
- * fixed command | getline
- * fixed function install and definition
- *
- * Revision 1.6 88/05/09 17:47:50 david
- * Arnold's coded binary search
- *
- * Revision 1.5 88/05/04 12:31:13 david
- * be a bit more careful about types
- * make_for_loop() now returns a NODE *
- * keyword search now uses bsearch() -- need a public domain version of this
- * added back stuff in yylex() that got lost somewhere along the line
- * malloc() tokens in yylex() since they were previously just pointers into
- * current line that got overwritten by the next fgets() -- these need to get
- * freed at some point
- * fixed backslash line continuation interaction with CONCAT
- *
- * Revision 1.4 88/04/14 17:03:51 david
- * reinstalled a fix to do with line continuation
- *
- * Revision 1.3 88/04/14 14:41:01 david
- * Arnold's changes to yylex to read program from a file
- *
- * Revision 1.5 88/03/18 21:00:07 david
- * Baseline -- hoefully all the functionality of the new awk added.
- * Just debugging and tuning to do.
- *
- * Revision 1.4 87/11/19 14:37:20 david
- * added a bunch of ew builtin functions
- * added new rules for getline to provide new functionality
- * minor cleanup of redirection handling
- * generalized make_param into make_name
- *
- * Revision 1.3 87/11/09 21:22:33 david
- * added macinery for user-defined functions (including return)
- * added delete, do-while and system
- * reformatted and revised grammer to improve error-handling
- * changes to yyerror to give improved error messages
- *
- * Revision 1.2 87/10/29 21:33:28 david
- * added test for membership in an array, as in: if ("yes" in answers) ...
- *
- * Revision 1.1 87/10/27 15:23:21 david
- * Initial revision
- *
+ * awk.y --- yacc/bison parser
*/
/*
@@ -198,10 +27,16 @@
#ifdef DEBUG
#define YYDEBUG 12
#endif
-#define YYIMPROVE
#include "awk.h"
+/*
+ * This line is necessary since the Bison parser skeleton uses bcopy.
+ * Systems without memcpy should use -DMEMCPY_MISSING, per the Makefile.
+ * It should not hurt anything if Yacc is being used instead of Bison.
+ */
+#define bcopy(s,d,n) memcpy((d),(s),(n))
+
extern void msg();
extern struct re_pattern_buffer *mk_re_parse();
@@ -223,17 +58,20 @@ static int yylex ();
static void yyerror();
static int want_regexp; /* lexical scanning kludge */
+static int want_assign; /* lexical scanning kludge */
+static int can_return; /* lexical scanning kludge */
+static int io_allowed = 1; /* lexical scanning kludge */
static int lineno = 1; /* for error msgs */
static char *lexptr; /* pointer to next char during parsing */
static char *lexptr_begin; /* keep track of where we were for error msgs */
static int curinfile = -1; /* index into sourcefiles[] */
+static int param_counter;
NODE *variables[HASHSIZE];
extern int errcount;
extern NODE *begin_block;
extern NODE *end_block;
-extern int param_counter;
%}
%union {
@@ -247,28 +85,28 @@ extern int param_counter;
%type <nodeval> function_prologue function_body
%type <nodeval> rexp exp start program rule simp_exp
-%type <nodeval> simp_pattern pattern
+%type <nodeval> pattern
%type <nodeval> action variable param_list
%type <nodeval> rexpression_list opt_rexpression_list
%type <nodeval> expression_list opt_expression_list
%type <nodeval> statements statement if_statement opt_param_list
-%type <nodeval> opt_exp opt_variable regexp p_regexp
+%type <nodeval> opt_exp opt_variable regexp
%type <nodeval> input_redir output_redir
%type <nodetypeval> r_paren comma nls opt_nls print
%type <sval> func_name
-%token <sval> FUNC_CALL NAME REGEXP YSTRING
-%token <lval> ERROR INCDEC
-%token <fval> NUMBER
+%token <sval> FUNC_CALL NAME REGEXP
+%token <lval> ERROR
+%token <nodeval> NUMBER YSTRING
%token <nodetypeval> RELOP APPEND_OP
%token <nodetypeval> ASSIGNOP MATCHOP NEWLINE CONCAT_OP
%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
%token <nodetypeval> LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
-%token <nodetypeval> LEX_GETLINE LEX_SUB LEX_MATCH
+%token <nodetypeval> LEX_GETLINE
%token <nodetypeval> LEX_IN
%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
-%token <ptrval> LEX_BUILTIN
+%token <ptrval> LEX_BUILTIN LEX_LENGTH
/* these are just yylval numbers */
@@ -278,26 +116,24 @@ extern int param_counter;
%left LEX_OR
%left LEX_AND
%left LEX_GETLINE
-%left NUMBER
-%left FUNC_CALL LEX_SUB LEX_BUILTIN LEX_MATCH
+%nonassoc LEX_IN
+%left FUNC_CALL LEX_BUILTIN LEX_LENGTH
%nonassoc MATCHOP
%nonassoc RELOP '<' '>' '|' APPEND_OP
-%left NAME
-%nonassoc LEX_IN
-%left YSTRING
-%left '(' ')'
%left CONCAT_OP
+%left YSTRING NUMBER
%left '+' '-'
%left '*' '/' '%'
%right '!' UNARY
%right '^'
%left INCREMENT DECREMENT
%left '$'
+%left '(' ')'
%%
start
- : opt_nls program
+ : opt_nls program opt_nls
{ expression_value = $2; }
;
@@ -331,32 +167,36 @@ program
;
rule
- : LEX_BEGIN action
+ : LEX_BEGIN { io_allowed = 0; }
+ action
{
if (begin_block) {
if (begin_block->type != Node_rule_list)
begin_block = node(begin_block, Node_rule_list,
(NODE *)NULL);
append_right (begin_block, node(
- node((NODE *)NULL, Node_rule_node, $2),
+ node((NODE *)NULL, Node_rule_node, $3),
Node_rule_list, (NODE *)NULL) );
} else
- begin_block = node((NODE *)NULL, Node_rule_node, $2);
+ begin_block = node((NODE *)NULL, Node_rule_node, $3);
$$ = NULL;
+ io_allowed = 1;
yyerrok;
}
- | LEX_END action
+ | LEX_END { io_allowed = 0; }
+ action
{
if (end_block) {
if (end_block->type != Node_rule_list)
end_block = node(end_block, Node_rule_list,
(NODE *)NULL);
append_right (end_block, node(
- node((NODE *)NULL, Node_rule_node, $2),
+ node((NODE *)NULL, Node_rule_node, $3),
Node_rule_list, (NODE *)NULL));
} else
- end_block = node((NODE *)NULL, Node_rule_node, $2);
+ end_block = node((NODE *)NULL, Node_rule_node, $3);
$$ = NULL;
+ io_allowed = 1;
yyerrok;
}
| LEX_BEGIN statement_term
@@ -400,46 +240,26 @@ function_prologue
func_name '(' opt_param_list r_paren opt_nls
{
$$ = append_right(make_param($3), $5);
+ can_return = 1;
}
;
function_body
: l_brace statements r_brace
- { $$ = $2; }
+ {
+ $$ = $2;
+ can_return = 0;
+ }
;
-simp_pattern
- : exp
- { $$ = $1; }
- | p_regexp
- { $$ = $1; }
- | p_regexp LEX_AND simp_pattern
- { $$ = node ($1, Node_and, $3); }
- | p_regexp LEX_OR simp_pattern
- { $$ = node ($1, Node_or, $3); }
- | '!' p_regexp %prec UNARY
- { $$ = node ($2, Node_not,(NODE *) NULL); }
- | '(' p_regexp r_paren
- { $$ = $2; }
- ;
-
pattern
- : simp_pattern
+ : exp
{ $$ = $1; }
- | simp_pattern comma simp_pattern
+ | exp comma exp
{ $$ = mkrangenode ( node($1, Node_cond_pair, $3) ); }
;
-p_regexp
- : regexp
- {
- $$ = node(
- node(make_number((AWKNUM)0),Node_field_spec,(NODE*)NULL),
- Node_match, $1);
- }
- ;
-
regexp
/*
* In this rule, want_regexp tells yylex that the next thing
@@ -472,7 +292,7 @@ statements
{ $$ = $1; }
| statements statement
{
- if ($1->type != Node_statement_list)
+ if ($1 == NULL || $1->type != Node_statement_list)
$1 = node($1, Node_statement_list,(NODE *)NULL);
$$ = append_right($1,
node( $2, Node_statement_list, (NODE *)NULL));
@@ -495,6 +315,8 @@ statement_term
statement
: semi opt_nls
{ $$ = NULL; }
+ | l_brace r_brace
+ { $$ = NULL; }
| l_brace statements r_brace
{ $$ = $2; }
| if_statement
@@ -527,12 +349,16 @@ statement
{ $$ = node ($3, $1, $5); }
| print opt_rexpression_list output_redir statement_term
{ $$ = node ($2, $1, $3); }
- | LEX_NEXT statement_term
+ | LEX_NEXT
+ { if (! io_allowed) yyerror("next used in BEGIN or END action"); }
+ statement_term
{ $$ = node ((NODE *)NULL, Node_K_next, (NODE *)NULL); }
| LEX_EXIT opt_exp statement_term
{ $$ = node ($2, Node_K_exit, (NODE *)NULL); }
- | LEX_RETURN opt_exp statement_term
- { $$ = node ($2, Node_K_return, (NODE *)NULL); }
+ | LEX_RETURN
+ { if (! can_return) yyerror("return used outside function context"); }
+ opt_exp statement_term
+ { $$ = node ($3, Node_K_return, (NODE *)NULL); }
| LEX_DELETE NAME '[' expression_list ']' statement_term
{ $$ = node (variable($2), Node_K_delete, $4); }
| exp statement_term
@@ -582,11 +408,11 @@ input_redir
output_redir
: /* empty */
{ $$ = NULL; }
- | '>' simp_exp
+ | '>' exp
{ $$ = node ($2, Node_redirect_output, (NODE *)NULL); }
- | APPEND_OP simp_exp
+ | APPEND_OP exp
{ $$ = node ($2, Node_redirect_append, (NODE *)NULL); }
- | '|' simp_exp
+ | '|' exp
{ $$ = node ($2, Node_redirect_pipe, (NODE *)NULL); }
;
@@ -671,8 +497,10 @@ expression_list
;
/* Expressions, not including the comma operator. */
-exp : variable ASSIGNOP exp
- { $$ = node ($1, $2, $3); }
+exp : variable ASSIGNOP
+ { want_assign = 0; }
+ exp
+ { $$ = node ($1, $2, $4); }
| '(' expression_list r_paren LEX_IN NAME
{ $$ = node (variable($5), Node_in_array, $2); }
| exp '|' LEX_GETLINE opt_variable
@@ -682,16 +510,23 @@ exp : variable ASSIGNOP exp
}
| LEX_GETLINE opt_variable input_redir
{
+ /* "too painful to do right" */
+ /*
+ if (! io_allowed && $3 == NULL)
+ yyerror("non-redirected getline illegal inside BEGIN or END action");
+ */
$$ = node ($2, Node_K_getline, $3);
}
| exp LEX_AND exp
{ $$ = node ($1, Node_and, $3); }
| exp LEX_OR exp
{ $$ = node ($1, Node_or, $3); }
- | exp MATCHOP regexp
- { $$ = node ($1, $2, $3); }
| exp MATCHOP exp
{ $$ = node ($1, $2, $3); }
+ | regexp
+ { $$ = $1; }
+ | '!' regexp %prec UNARY
+ { $$ = node((NODE *) NULL, Node_nomatch, $2); }
| exp LEX_IN NAME
{ $$ = node (variable($3), Node_in_array, $1); }
| exp RELOP exp
@@ -702,25 +537,34 @@ exp : variable ASSIGNOP exp
{ $$ = node ($1, Node_greater, $3); }
| exp '?' exp ':' exp
{ $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
- | exp exp %prec CONCAT_OP
- { $$ = node ($1, Node_concat, $2); }
| simp_exp
{ $$ = $1; }
+ | exp exp %prec CONCAT_OP
+ { $$ = node ($1, Node_concat, $2); }
;
rexp
- : variable ASSIGNOP rexp
- { $$ = node ($1, $2, $3); }
+ : variable ASSIGNOP
+ { want_assign = 0; }
+ rexp
+ { $$ = node ($1, $2, $4); }
| rexp LEX_AND rexp
{ $$ = node ($1, Node_and, $3); }
| rexp LEX_OR rexp
{ $$ = node ($1, Node_or, $3); }
| LEX_GETLINE opt_variable input_redir
{
+ /* "too painful to do right" */
+ /*
+ if (! io_allowed && $3 == NULL)
+ yyerror("non-redirected getline illegal inside BEGIN or END action");
+ */
$$ = node ($2, Node_K_getline, $3);
}
- | rexp MATCHOP regexp
- { $$ = node ($1, $2, $3); }
+ | regexp
+ { $$ = $1; }
+ | '!' regexp %prec UNARY
+ { $$ = node((NODE *) NULL, Node_nomatch, $2); }
| rexp MATCHOP rexp
{ $$ = node ($1, $2, $3); }
| rexp LEX_IN NAME
@@ -729,10 +573,10 @@ rexp
{ $$ = node ($1, $2, $3); }
| rexp '?' rexp ':' rexp
{ $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
- | rexp rexp %prec CONCAT_OP
- { $$ = node ($1, Node_concat, $2); }
| simp_exp
{ $$ = $1; }
+ | rexp rexp %prec CONCAT_OP
+ { $$ = node ($1, Node_concat, $2); }
;
simp_exp
@@ -742,16 +586,10 @@ simp_exp
{ $$ = $2; }
| LEX_BUILTIN '(' opt_expression_list r_paren
{ $$ = snode ($3, Node_builtin, $1); }
- | LEX_BUILTIN
+ | LEX_LENGTH '(' opt_expression_list r_paren
+ { $$ = snode ($3, Node_builtin, $1); }
+ | LEX_LENGTH
{ $$ = snode ((NODE *)NULL, Node_builtin, $1); }
- | LEX_SUB '(' regexp comma expression_list r_paren
- { $$ = node($5, $1, $3); }
- | LEX_SUB '(' exp comma expression_list r_paren
- { $$ = node($5, $1, $3); }
- | LEX_MATCH '(' exp comma regexp r_paren
- { $$ = node($3, $1, $5); }
- | LEX_MATCH '(' exp comma exp r_paren
- { $$ = node($3, $1, $5); }
| FUNC_CALL '(' opt_expression_list r_paren
{
$$ = node ($3, Node_func_call, make_string($1, strlen($1)));
@@ -767,9 +605,9 @@ simp_exp
| variable
{ $$ = $1; }
| NUMBER
- { $$ = make_number ($1); }
+ { $$ = $1; }
| YSTRING
- { $$ = make_string ($1, -1); }
+ { $$ = $1; }
/* Binary operators in order of decreasing precedence. */
| simp_exp '^' simp_exp
@@ -799,11 +637,11 @@ opt_variable
variable
: NAME
- { $$ = variable ($1); }
+ { want_assign = 1; $$ = variable ($1); }
| NAME '[' expression_list ']'
- { $$ = node (variable($1), Node_subscript, $3); }
+ { want_assign = 1; $$ = node (variable($1), Node_subscript, $3); }
| '$' simp_exp
- { $$ = node ($2, Node_field_spec, (NODE *)NULL); }
+ { want_assign = 1; $$ = node ($2, Node_field_spec, (NODE *)NULL); }
;
l_brace
@@ -840,16 +678,13 @@ struct token {
NODE *(*ptr) (); /* function that implements this keyword */
};
-#ifndef NULL
-#define NULL 0
-#endif
-
extern NODE
*do_exp(), *do_getline(), *do_index(), *do_length(),
*do_sqrt(), *do_log(), *do_sprintf(), *do_substr(),
*do_split(), *do_system(), *do_int(), *do_close(),
*do_atan2(), *do_sin(), *do_cos(), *do_rand(),
- *do_srand(), *do_match(), *do_tolower(), *do_toupper();
+ *do_srand(), *do_match(), *do_tolower(), *do_toupper(),
+ *do_sub(), *do_gsub();
/* Special functions for debugging */
#ifdef DEBUG
@@ -878,14 +713,14 @@ static struct token tokentab[] = {
{ "func", Node_K_function, LEX_FUNCTION, 0, 0 },
{ "function", Node_K_function, LEX_FUNCTION, 0, 0 },
{ "getline", Node_K_getline, LEX_GETLINE, 0, 0 },
- { "gsub", Node_gsub, LEX_SUB, 0, 0 },
+ { "gsub", Node_builtin, LEX_BUILTIN, 0, do_gsub },
{ "if", Node_K_if, LEX_IF, 0, 0 },
{ "in", Node_illegal, LEX_IN, 0, 0 },
{ "index", Node_builtin, LEX_BUILTIN, 0, do_index },
{ "int", Node_builtin, LEX_BUILTIN, 0, do_int },
- { "length", Node_builtin, LEX_BUILTIN, 0, do_length },
+ { "length", Node_builtin, LEX_LENGTH, 0, do_length },
{ "log", Node_builtin, LEX_BUILTIN, 0, do_log },
- { "match", Node_K_match, LEX_MATCH, 0, 0 },
+ { "match", Node_builtin, LEX_BUILTIN, 0, do_match },
{ "next", Node_K_next, LEX_NEXT, 0, 0 },
{ "print", Node_K_print, LEX_PRINT, 0, 0 },
{ "printf", Node_K_printf, LEX_PRINTF, 0, 0 },
@@ -899,14 +734,16 @@ static struct token tokentab[] = {
{ "sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf },
{ "sqrt", Node_builtin, LEX_BUILTIN, 0, do_sqrt },
{ "srand", Node_builtin, LEX_BUILTIN, 0, do_srand },
- { "sub", Node_sub, LEX_SUB, 0, 0 },
+ { "sub", Node_builtin, LEX_BUILTIN, 0, do_sub },
{ "substr", Node_builtin, LEX_BUILTIN, 0, do_substr },
{ "system", Node_builtin, LEX_BUILTIN, 0, do_system },
- { "tolower", Node_builtin, LEX_BUILTIN, 1, do_tolower },
- { "toupper", Node_builtin, LEX_BUILTIN, 1, do_toupper },
+ { "tolower", Node_builtin, LEX_BUILTIN, 0, do_tolower },
+ { "toupper", Node_builtin, LEX_BUILTIN, 0, do_toupper },
{ "while", Node_K_while, LEX_WHILE, 0, 0 },
};
+static char *token_start;
+
/* VARARGS0 */
static void
yyerror(va_alist)
@@ -914,64 +751,40 @@ va_dcl
{
va_list args;
char *mesg;
- char *a1;
register char *ptr, *beg;
- static int list = 0;
char *scan;
errcount++;
va_start(args);
mesg = va_arg(args, char *);
- if (! list)
- a1 = va_arg(args, char *);
va_end(args);
- if (mesg || !list) {
- /* Find the current line in the input file */
- if (!lexptr) {
- beg = "(END OF FILE)";
- ptr = beg + 13;
- } else {
- if (*lexptr == '\n' && lexptr != lexptr_begin)
- --lexptr;
- for (beg = lexptr; beg != lexptr_begin && *beg != '\n'; --beg)
- ;
- /* NL isn't guaranteed */
- for (ptr = lexptr; *ptr && *ptr != '\n'; ptr++)
- ;
- if (beg != lexptr_begin)
- beg++;
- }
- msg("syntax error near line %d:\n%.*s", lineno, ptr - beg, beg);
- scan = beg;
- while (scan <= lexptr)
- if (*scan++ == '\t')
- putc('\t', stderr);
- else
- putc(' ', stderr);
- putc('^', stderr);
- putc(' ', stderr);
- if (mesg) {
- vfprintf(stderr, mesg, args);
- putc('\n', stderr);
- exit(1);
- } else {
- if (a1) {
- fputs("expecting: ", stderr);
- fputs(a1, stderr);
- list = 1;
- return;
- }
- }
- return;
- }
- if (a1) {
- fputs(" or ", stderr);
- fputs(a1, stderr);
- putc('\n', stderr);
- return;
+ /* Find the current line in the input file */
+ if (! lexptr) {
+ beg = "(END OF FILE)";
+ ptr = beg + 13;
+ } else {
+ if (*lexptr == '\n' && lexptr != lexptr_begin)
+ --lexptr;
+ for (beg = lexptr; beg != lexptr_begin && *beg != '\n'; --beg)
+ ;
+ /* NL isn't guaranteed */
+ for (ptr = lexptr; *ptr && *ptr != '\n'; ptr++)
+ ;
+ if (beg != lexptr_begin)
+ beg++;
}
+ msg("syntax error near line %d:\n%.*s", lineno, ptr - beg, beg);
+ scan = beg;
+ while (scan < token_start)
+ if (*scan++ == '\t')
+ putc('\t', stderr);
+ else
+ putc(' ', stderr);
+ putc('^', stderr);
+ putc(' ', stderr);
+ vfprintf(stderr, mesg, args);
putc('\n', stderr);
- list = 0;
+ exit(1);
}
/*
@@ -989,19 +802,17 @@ va_dcl
* zeros. A value of 0 does not mean end of string.
*/
-static int
+int
parse_escape(string_ptr)
char **string_ptr;
{
register int c = *(*string_ptr)++;
register int i;
+ register int count = 0;
switch (c) {
case 'a':
- if (strict)
- goto def;
- else
- return BELL;
+ return BELL;
case 'b':
return '\b';
case 'f':
@@ -1013,15 +824,12 @@ char **string_ptr;
case 't':
return '\t';
case 'v':
- if (strict)
- goto def;
- else
- return '\v';
+ return '\v';
case '\n':
return -2;
case 0:
(*string_ptr)--;
- return 0;
+ return -1;
case '0':
case '1':
case '2':
@@ -1030,25 +838,19 @@ char **string_ptr;
case '5':
case '6':
case '7':
- {
- register int i = c - '0';
- register int count = 0;
-
- while (++count < 3) {
- if ((c = *(*string_ptr)++) >= '0' && c <= '7') {
- i *= 8;
- i += c - '0';
- } else {
- (*string_ptr)--;
- break;
- }
+ i = c - '0';
+ count = 0;
+ while (++count < 3) {
+ if ((c = *(*string_ptr)++) >= '0' && c <= '7') {
+ i *= 8;
+ i += c - '0';
+ } else {
+ (*string_ptr)--;
+ break;
}
- return i;
}
+ return i;
case 'x':
- if (strict)
- goto def;
-
i = 0;
while (1) {
if (isxdigit((c = *(*string_ptr)++))) {
@@ -1065,7 +867,6 @@ char **string_ptr;
}
return i;
default:
- def:
return c;
}
}
@@ -1089,6 +890,7 @@ yylex()
* hacking the grammar. */
int seen_e = 0; /* These are for numbers */
int seen_point = 0;
+ int esc_seen;
extern char **sourcefile;
extern int tempsource, numfiles;
static int file_opened = 0;
@@ -1114,7 +916,7 @@ yylex()
if ((fin = pathopen (sourcefile[++curinfile])) == NULL)
fatal("cannot open `%s' for reading (%s)",
sourcefile[curinfile],
- sys_errlist[errno]);
+ strerror(errno));
*(lexptr = cbuf) = '\0';
/*
* immediately unlink the tempfile so that it will
@@ -1136,19 +938,18 @@ retry:
lexptr = lexptr_begin = cbuf;
if (want_regexp) {
- want_regexp = 0;
+ int in_brack = 0;
- /*
- * there is a potential bug if a regexp is followed by an
- * equal sign: "/foo/=bar" would result in assign_quotient
- * being returned as the next token. Nothing is done about
- * it since it is not valid awk, but maybe something should
- * be done anyway.
- */
-
- tokstart = lexptr;
+ want_regexp = 0;
+ token_start = tokstart = lexptr;
while (c = *lexptr++) {
switch (c) {
+ case '[':
+ in_brack = 1;
+ break;
+ case ']':
+ in_brack = 0;
+ break;
case '\\':
if (*lexptr++ == '\0') {
yyerror("unterminated regexp ends with \\");
@@ -1157,6 +958,9 @@ retry:
goto retry;
break;
case '/': /* end of the regexp */
+ if (in_brack)
+ break;
+
lexptr--;
yylval.sval = tokstart;
return REGEXP;
@@ -1179,7 +983,7 @@ retry:
while (*lexptr == ' ' || *lexptr == '\t')
lexptr++;
- tokstart = lexptr;
+ token_start = tokstart = lexptr;
switch (c = *lexptr++) {
case 0:
@@ -1243,7 +1047,7 @@ retry:
return c;
case '/':
- if (*lexptr == '=') {
+ if (want_assign && *lexptr == '=') {
yylval.nodetypeval = Node_assign_quotient;
lexptr++;
return ASSIGNOP;
@@ -1346,18 +1150,24 @@ retry:
return NEWLINE;
case '"':
+ esc_seen = 0;
while (*lexptr != '\0') {
switch (*lexptr++) {
case '\\':
+ esc_seen = 1;
+ if (*lexptr == '\n')
+ yyerror("newline in string");
if (*lexptr++ != '\0')
break;
/* fall through */
case '\n':
+ lexptr--;
yyerror("unterminated string");
return ERROR;
- case '\"':
- /* Skip the doublequote */
- yylval.sval = tokstart + 1;
+ case '"':
+ yylval.nodeval = make_str_node(tokstart + 1,
+ lexptr-tokstart-2, esc_seen);
+ yylval.nodeval->flags |= PERM;
return YSTRING;
}
}
@@ -1374,23 +1184,9 @@ retry:
lexptr++;
return DECREMENT;
}
+ yylval.nodetypeval = Node_illegal;
+ return c;
- /*
- * It looks like space tab comma and newline are the legal
- * places for a UMINUS. Have we missed any?
- */
- if ((! isdigit(*lexptr) && *lexptr != '.') ||
- (lexptr > lexptr_begin + 1 &&
- ! index(" \t,\n", lexptr[-2]))) {
-
- /*
- * set node type to ILLEGAL because the action should
- * set it to the right thing
- */
- yylval.nodetypeval = Node_illegal;
- return c;
- }
- /* FALL through into number code */
case '0':
case '1':
case '2':
@@ -1403,11 +1199,7 @@ retry:
case '9':
case '.':
/* It's a number */
- if (c == '-')
- namelen = 1;
- else
- namelen = 0;
- for (; (c = tokstart[namelen]) != '\0'; namelen++) {
+ for (namelen = 0; (c = tokstart[namelen]) != '\0'; namelen++) {
switch (c) {
case '.':
if (seen_point)
@@ -1441,7 +1233,12 @@ retry:
got_number:
lexptr = tokstart + namelen;
- yylval.fval = atof(tokstart);
+ /*
+ yylval.nodeval = make_string(tokstart, namelen);
+ (void) force_number(yylval.nodeval);
+ */
+ yylval.nodeval = make_number(atof(tokstart));
+ yylval.nodeval->flags |= PERM;
return NUMBER;
case '&':
@@ -1454,7 +1251,7 @@ got_number:
;
if (c == '\n')
lineno++;
- else if (!isspace(c))
+ else if (! isspace(c))
break;
}
return LEX_AND;
@@ -1471,16 +1268,16 @@ got_number:
;
if (c == '\n')
lineno++;
- else if (!isspace(c))
+ else if (! isspace(c))
break;
}
return LEX_OR;
}
- yylval.nodetypeval = Node_illegal;
- return c;
- }
+ yylval.nodetypeval = Node_illegal;
+ return c;
+ }
- if (c != '_' && !isalpha(c)) {
+ if (c != '_' && ! isalpha(c)) {
yyerror("Invalid char '%c' in expression\n", c);
return ERROR;
}
@@ -1489,7 +1286,7 @@ got_number:
for (namelen = 0; is_identchar(tokstart[namelen]); namelen++)
/* null */ ;
emalloc(tokkey, char *, namelen+1, "yylex");
- (void) strncpy (tokkey, tokstart, namelen);
+ memcpy(tokkey, tokstart, namelen);
tokkey[namelen] = '\0';
/* See if it is a special token. */
@@ -1499,8 +1296,6 @@ got_number:
int i, c;
mid = (low + high) / 2;
-
- compare:
c = *tokstart - tokentab[mid].operator[0];
i = c ? c : strcmp (tokkey, tokentab[mid].operator);
@@ -1512,7 +1307,8 @@ got_number:
lexptr = tokstart + namelen;
if (strict && tokentab[mid].nostrict)
break;
- if (tokentab[mid].class == LEX_BUILTIN)
+ if (tokentab[mid].class == LEX_BUILTIN
+ || tokentab[mid].class == LEX_LENGTH)
yylval.ptrval = tokentab[mid].ptr;
else
yylval.nodetypeval = tokentab[mid].value;
@@ -1548,7 +1344,10 @@ char *file;
char *awkpath, *cp;
char trypath[BUFSIZ];
FILE *fp;
+#ifdef DEBUG
extern int debugging;
+#endif
+ int fd;
if (strcmp (file, "-") == 0)
return (stdin);
@@ -1560,37 +1359,53 @@ char *file;
first = 0;
if ((awkpath = getenv ("AWKPATH")) != NULL && *awkpath)
savepath = awkpath; /* used for restarting */
-#ifdef MSDOS
- else if ((awkpath = getenv ("INIT")) != NULL && *awkpath)
- savepath = awkpath; /* MSC 5.1 users may prefer */
- /* to use INIT */
-#endif
}
awkpath = savepath;
/* some kind of path name, no search */
#ifndef MSDOS
- if (index (file, '/') != NULL)
+ if (strchr (file, '/') != NULL)
#else
- if (index (file, '/') != NULL || index (file, '\\') != NULL
- || index (file, ':') != NULL)
+ if (strchr (file, '/') != NULL || strchr (file, '\\') != NULL
+ || strchr (file, ':') != NULL)
#endif
- return (fdopen(devopen (file, "r"), "r"));
+ return ( (fd = devopen (file, "r")) >= 0 ?
+ fdopen(fd, "r") :
+ NULL);
do {
+ trypath[0] = '\0';
/* this should take into account limits on size of trypath */
for (cp = trypath; *awkpath && *awkpath != ENVSEP; )
*cp++ = *awkpath++;
- *cp++ = '/';
- *cp = '\0'; /* clear left over junk */
- strcat (cp, file);
- if ((fp = fdopen(devopen (trypath, "r"), "r")) != NULL)
+
+ if (cp != trypath) { /* nun-null element in path */
+ *cp++ = '/';
+ strcpy (cp, file);
+ } else
+ strcpy (trypath, file);
+#ifdef DEBUG
+ if (debugging)
+ fprintf(stderr, "trying: %s\n", trypath);
+#endif
+ if ((fd = devopen (trypath, "r")) >= 0
+ && (fp = fdopen(fd, "r")) != NULL)
return (fp);
/* no luck, keep going */
- awkpath++; /* skip colon */
+ if(*awkpath == ENVSEP && awkpath[1] != '\0')
+ awkpath++; /* skip colon */
} while (*awkpath);
+#ifdef MSDOS
+ /*
+ * Under DOS (and probably elsewhere) you might have one of the awk
+ * paths defined, WITHOUT the current working directory in it.
+ * Therefore you should try to open the file in the current directory.
+ */
+ return ( (fd = devopen(file, "r")) >= 0 ? fdopen(fd, "r") : NULL);
+#else
return (NULL);
+#endif
}
static NODE *
@@ -1604,7 +1419,7 @@ NODETYPE op;
r = newnode(op);
r->source_line = lineno;
- if (numfiles > 1 && !tempsource)
+ if (numfiles > -1 && ! tempsource)
r->source_file = sourcefile[curinfile];
else
r->source_file = NULL;
@@ -1709,9 +1524,8 @@ NODE *value;
hp->hlength = len;
hp->hvalue = value;
emalloc(hp->hname, char *, len + 1, "install");
- bcopy(name, hp->hname, len);
+ memcpy(hp->hname, name, len);
hp->hname[len] = '\0';
- hp->hvalue->varname = hp->hname;
return hp->hvalue;
}
@@ -1762,21 +1576,26 @@ int hashsize;
}
/*
- * Add new to the rightmost branch of LIST. This uses n^2 time, but doesn't
- * get used enough to make optimizing worth it. . .
+ * Add new to the rightmost branch of LIST. This uses n^2 time, so we make
+ * a simple attempt at optimizing it.
*/
-/* You don't believe me? Profile it yourself! */
static NODE *
append_right(list, new)
NODE *list, *new;
{
register NODE *oldlist;
+ static NODE *savefront = NULL, *savetail = NULL;
oldlist = list;
+ if (savefront == oldlist) {
+ savetail = savetail->rnode = new;
+ return oldlist;
+ } else
+ savefront = oldlist;
while (list->rnode != NULL)
list = list->rnode;
- list->rnode = new;
+ savetail = list->rnode = new;
return oldlist;
}