aboutsummaryrefslogtreecommitdiffstats
path: root/awk.y
diff options
context:
space:
mode:
Diffstat (limited to 'awk.y')
-rw-r--r--awk.y1940
1 files changed, 1222 insertions, 718 deletions
diff --git a/awk.y b/awk.y
index 180505f6..fb0f0d80 100644
--- a/awk.y
+++ b/awk.y
@@ -1,9 +1,124 @@
-
/*
* gawk -- GNU version of awk
* Copyright (C) 1986 Free Software Foundation
* Written by Paul Rubin, August 1986
*
+ * $Log: awk.y,v $
+ * Revision 1.24 88/12/15 12:52:58 david
+ * changes from Jay to get rid of some reduce/reduce conflicts - some remain
+ *
+ * Revision 1.23 88/12/07 19:59:25 david
+ * changes for incorporating source filename in error messages
+ *
+ * Revision 1.22 88/11/23 21:37:24 david
+ * Arnold: refinements of AWKPATH code
+ *
+ * Revision 1.21 88/11/22 13:46:45 david
+ * Arnold: changes for case-insensitive matching
+ *
+ * Revision 1.20 88/11/15 10:13:37 david
+ * Arnold: allow multiple -f options and search in directories for awk libraries,
+ * directories specified by AWKPATH env. variable; cleanupo of comments and
+ * #includes
+ *
+ * Revision 1.19 88/11/14 21:51:30 david
+ * Arnold: added error message for BEGIN or END without any action at all;
+ * unlink temporary source file right after creation so it goes away on bomb
+ *
+ * Revision 1.18 88/10/19 22:00:56 david
+ * generalize (and correct) what pattern can be in pattern {action}; this
+ * introduces quite a few new conflicts that should be checked thoroughly
+ * at some point, but they don't seem to do any harm at first glance
+ * replace malloc with emalloc
+ *
+ * Revision 1.17 88/10/17 19:52:01 david
+ * Arnold: cleanup, purge FAST
+ *
+ * Revision 1.16 88/10/13 22:02:16 david
+ * cleanup of yyerror and other error messages
+ *
+ * Revision 1.15 88/10/06 23:24:57 david
+ * accept var space ++var
+ * accept underscore as first character of a variable name
+ *
+ * Revision 1.14 88/06/13 18:01:46 david
+ * delete \a (change from Arnold)
+ *
+ * Revision 1.13 88/06/08 00:29:42 david
+ * better attempt at keeping track of line numbers
+ * change grammar to properly handle newlines after && or ||
+ *
+ * Revision 1.12 88/06/07 23:39:02 david
+ * little delint
+ *
+ * Revision 1.11 88/06/05 22:17:40 david
+ * make_name() becomes make_param() (again!)
+ * func_level goes away, param_counter makes entrance
+ *
+ * Revision 1.10 88/05/30 09:49:02 david
+ * obstack_free was being called at end of function definition, freeing
+ * memory that might be part of global variables referenced only inside
+ * functions; commented out for now, will have to selectively free later.
+ * cleanup: regexp now returns a NODE *
+ *
+ * Revision 1.9 88/05/27 11:04:53 david
+ * added print[f] '(' ... ')' (optional parentheses)
+ * for some reason want_redirect wasn't getting set for PRINT, so I set it in
+ * yylex()
+ *
+ * Revision 1.8 88/05/26 22:52:14 david
+ * fixed cmd | getline
+ * added compound patterns (they got lost somewhere along the line)
+ * fixed error message in yylex()
+ * added null statement
+ *
+ * Revision 1.7 88/05/13 22:05:29 david
+ * moved BEGIN and END block merging here
+ * BEGIN, END and function defs. are no longer incorporated into main parse tree
+ * fixed command | getline
+ * fixed function install and definition
+ *
+ * Revision 1.6 88/05/09 17:47:50 david
+ * Arnold's coded binary search
+ *
+ * Revision 1.5 88/05/04 12:31:13 david
+ * be a bit more careful about types
+ * make_for_loop() now returns a NODE *
+ * keyword search now uses bsearch() -- need a public domain version of this
+ * added back stuff in yylex() that got lost somewhere along the line
+ * malloc() tokens in yylex() since they were previously just pointers into
+ * current line that got overwritten by the next fgets() -- these need to get
+ * freed at some point
+ * fixed backslash line continuation interaction with CONCAT
+ *
+ * Revision 1.4 88/04/14 17:03:51 david
+ * reinstalled a fix to do with line continuation
+ *
+ * Revision 1.3 88/04/14 14:41:01 david
+ * Arnold's changes to yylex to read program from a file
+ *
+ * Revision 1.5 88/03/18 21:00:07 david
+ * Baseline -- hoefully all the functionality of the new awk added.
+ * Just debugging and tuning to do.
+ *
+ * Revision 1.4 87/11/19 14:37:20 david
+ * added a bunch of ew builtin functions
+ * added new rules for getline to provide new functionality
+ * minor cleanup of redirection handling
+ * generalized make_param into make_name
+ *
+ * Revision 1.3 87/11/09 21:22:33 david
+ * added macinery for user-defined functions (including return)
+ * added delete, do-while and system
+ * reformatted and revised grammer to improve error-handling
+ * changes to yyerror to give improved error messages
+ *
+ * Revision 1.2 87/10/29 21:33:28 david
+ * added test for membership in an array, as in: if ("yes" in answers) ...
+ *
+ * Revision 1.1 87/10/27 15:23:21 david
+ * Initial revision
+ *
*/
/*
@@ -26,191 +141,291 @@ anyone else from sharing it farther. Help stamp out software hoarding!
%{
#define YYDEBUG 12
+#define YYIMPROVE
-#include <stdio.h>
#include "awk.h"
- static int yylex ();
-
-
- /*
- * The following variable is used for a very sickening thing.
- * The awk language uses white space as the string concatenation
- * operator, but having a white space token that would have to appear
- * everywhere in all the grammar rules would be unbearable.
- * It turns out we can return CONCAT_OP exactly when there really
- * is one, just from knowing what kinds of other tokens it can appear
- * between (namely, constants, variables, or close parentheses).
- * This is because concatenation has the lowest priority of all
- * operators. want_concat_token is used to remember that something
- * that could be the left side of a concat has just been returned.
- *
- * If anyone knows a cleaner way to do this (don't look at the Un*x
- * code to find one, though), please suggest it.
- */
- static int want_concat_token;
-
- /* Two more horrible kludges. The same comment applies to these two too */
- static int want_regexp; /* lexical scanning kludge */
- static int want_redirect; /* similarly */
- int lineno = 1; /* JF for error msgs */
+static int yylex ();
+
+/*
+ * The following variable is used for a very sickening thing.
+ * The awk language uses white space as the string concatenation
+ * operator, but having a white space token that would have to appear
+ * everywhere in all the grammar rules would be unbearable.
+ * It turns out we can return CONCAT_OP exactly when there really
+ * is one, just from knowing what kinds of other tokens it can appear
+ * between (namely, constants, variables, or close parentheses).
+ * This is because concatenation has the lowest priority of all
+ * operators. want_concat_token is used to remember that something
+ * that could be the left side of a concat has just been returned.
+ *
+ * If anyone knows a cleaner way to do this (don't look at the Un*x
+ * code to find one, though), please suggest it.
+ */
+static int want_concat_token;
+
+/* Two more horrible kludges. The same comment applies to these two too */
+static int want_regexp; /* lexical scanning kludge */
+static int want_redirect; /* similarly */
+int lineno = 1; /* for error msgs */
/* During parsing of a gawk program, the pointer to the next character
is in this variable. */
- char *lexptr; /* JF moved it up here */
- char *lexptr_begin; /* JF for error msgs */
+char *lexptr; /* moved it up here */
+char *lexptr_begin; /* for error msgs */
+char *func_def;
+extern int errcount;
+extern NODE *begin_block;
+extern NODE *end_block;
+extern struct re_pattern_buffer *mk_re_parse();
+extern int param_counter;
+struct re_pattern_buffer *rp;
%}
%union {
- long lval;
- AWKNUM fval;
- NODE *nodeval;
- NODETYPE nodetypeval;
- char *sval;
- NODE *(*ptrval)();
+ long lval;
+ AWKNUM fval;
+ NODE *nodeval;
+ NODETYPE nodetypeval;
+ char *sval;
+ NODE *(*ptrval)();
}
-%type <nodeval> exp start program rule pattern conditional
-%type <nodeval> action variable redirection expression_list
-%type <nodeval> statements statement if_statement
-%type <nodeval> opt_exp v_exp
-%type <nodetypeval> whitespace
+%type <nodeval> function_prologue function_body
+%type <nodeval> exp sub_exp start program rule pattern expression_list
+%type <nodeval> action variable redirection param_list opt_expression_list
+%type <nodeval> statements statement if_statement opt_param_list
+%type <nodeval> opt_exp opt_variable regexp
+%type <nodetypeval> whitespace r_paren
%token <sval> NAME REGEXP YSTRING
%token <lval> ERROR INCDEC
%token <fval> NUMBER
%token <nodetypeval> ASSIGNOP RELOP MATCHOP NEWLINE REDIRECT_OP CONCAT_OP
-%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE
-%token <nodetypeval> LEX_WHILE LEX_FOR LEX_BREAK LEX_CONTINUE
-%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT
-%token LEX_IN
+%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
+%token <nodetypeval> LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
+%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
+%token <nodetypeval> LEX_GETLINE LEX_SUB LEX_MATCH
+%token <nodetypeval> LEX_IN
%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
%token <ptrval> LEX_BUILTIN
/* these are just yylval numbers */
-/* %token <lval> CHAR JF this isn't used anymore */
/* Lowest to highest */
+%right ASSIGNOP
+%right '?' ':'
%left LEX_OR
%left LEX_AND
-%right ASSIGNOP
+%left LEX_IN
+%nonassoc MATCHOP
+%nonassoc RELOP
+%nonassoc REDIRECT_OP
%left CONCAT_OP
%left '+' '-'
%left '*' '/' '%'
%right UNARY
-%nonassoc MATCHOP RELOP
+%right '^'
+%left INCREMENT DECREMENT
+%left '$'
%%
-start : optional_newlines program
+start
+ : opt_newlines program
{ expression_value = $2; }
;
-
-program : rule
- { $$ = node ($1, Node_rule_list,(NODE *) NULL); }
+program
+ : rule
+ {
+ if ($1 != NULL)
+ $$ = node ($1, Node_rule_list,(NODE *) NULL);
+ else
+ $$ = NULL;
+ yyerrok;
+ }
| program rule
/* cons the rule onto the tail of list */
- { $$ = append_right ($1, node($2, Node_rule_list,(NODE *) NULL)); }
+ {
+ if ($2 == NULL)
+ $$ = $1;
+ else if ($1 == NULL)
+ $$ = node($2, Node_rule_list,(NODE *) NULL);
+ else
+ $$ = append_right ($1,
+ node($2, Node_rule_list,(NODE *) NULL));
+ yyerrok;
+ }
+ | error { $$ = NULL; }
+ | program error
;
-rule : pattern action NEWLINE optional_newlines
- { $$ = node ($1, Node_rule_node, $2); }
+rule
+ : LEX_BEGIN action
+ {
+ if (begin_block)
+ append_right (begin_block, node(
+ node((NODE *)NULL, Node_rule_node, $2),
+ Node_rule_list, (NODE *)NULL) );
+ else
+ begin_block = node(node((NODE *)NULL,Node_rule_node,$2),
+ Node_rule_list, (NODE *)NULL);
+ $$ = NULL;
+ yyerrok;
+ }
+ | LEX_END action
+ {
+ if (end_block)
+ append_right (end_block, node(
+ node((NODE *)NULL, Node_rule_node, $2),
+ Node_rule_list, (NODE *)NULL));
+ else
+ end_block = node(node((NODE *)NULL, Node_rule_node, $2),
+ Node_rule_list, (NODE *)NULL);
+ $$ = NULL;
+ yyerrok;
+ }
+ | LEX_BEGIN statement_term
+ {
+ msg ("error near line %d: BEGIN blocks must have an action part", lineno);
+ errcount++;
+ yyerrok;
+ }
+ | LEX_END statement_term
+ {
+ msg ("error near line %d: END blocks must have an action part", lineno);
+ errcount++;
+ yyerrok;
+ }
+ | pattern action
+ { $$ = node ($1, Node_rule_node, $2); yyerrok; }
+ | pattern statement_term
+ { if($1) $$ = node ($1, Node_rule_node, (NODE *)NULL); yyerrok; }
+ | function_prologue function_body
+ {
+ func_install($1, $2);
+ $$ = NULL;
+ yyerrok;
+ }
+ ;
+
+function_prologue
+ : LEX_FUNCTION
+ {
+ param_counter = 0;
+ }
+ NAME whitespace '(' opt_param_list r_paren whitespace
+ {
+ $$ = append_right(make_param($3), $6);
+ }
;
+function_body
+ : l_brace statements r_brace statement_term
+ { $$ = $2; }
+ ;
-pattern : /* empty */
+pattern
+ : /* empty */
{ $$ = NULL; }
- | conditional
+ | sub_exp
{ $$ = $1; }
- | conditional ',' conditional
- { $$ = mkrangenode ( node($1, Node_cond_pair, $3) ); } /*jfw*/
- ;
-
-
-conditional :
- LEX_BEGIN
- { $$ = node ((NODE *)NULL, Node_K_BEGIN,(NODE *) NULL); }
- | LEX_END
- { $$ = node ((NODE *)NULL, Node_K_END,(NODE *) NULL); }
- | '!' conditional %prec UNARY
- { $$ = node ($2, Node_not,(NODE *) NULL); }
- | conditional LEX_AND conditional
+ | regexp
+ {
+ $$ = node(
+ node(make_number((AWKNUM)0),Node_field_spec,(NODE*)NULL),
+ Node_match, $1);
+ }
+ | pattern LEX_AND pattern
{ $$ = node ($1, Node_and, $3); }
- | conditional LEX_OR conditional
+ | pattern LEX_OR pattern
{ $$ = node ($1, Node_or, $3); }
- | '(' conditional ')'
- {
- $$ = $2;
- want_concat_token = 0;
- }
+ | '!' pattern %prec UNARY
+ { $$ = node ($2, Node_not,(NODE *) NULL); }
+ | '(' pattern r_paren
+ { $$ = $2; }
+ | pattern ',' pattern
+ { $$ = mkrangenode ( node($1, Node_cond_pair, $3) ); }
+ ;
- /* In these rules, want_regexp tells yylex that the next thing
+regexp
+ /* In this rule, want_regexp tells yylex that the next thing
is a regexp so it should read up to the closing slash. */
-
- | '/'
+ : '/'
{ ++want_regexp; }
REGEXP '/'
{ want_regexp = 0;
- $$ = node (node (make_number ((AWKNUM)0), Node_field_spec, (NODE *)NULL),
- Node_match, (NODE *)make_regexp ($3));
+ rp = mk_re_parse($3);
+ $$ = node((NODE *)NULL, Node_regex, (NODE *)rp);
}
- | exp MATCHOP '/'
- { ++want_regexp; }
- REGEXP '/'
- { want_regexp = 0;
- $$ = node ($1, $2, (NODE *)make_regexp($5));
- }
- | exp RELOP exp
- { $$ = node ($1, $2, $3); }
- | exp /* JF */
- { $$ = $1; }
;
-
-action : /* empty */
- { $$ = NULL; }
- | '{' whitespace statements '}'
- { $$ = $3; }
+action
+ : l_brace r_brace
+ {
+ /* empty actions are different from missing actions */
+ $$ = node ((NODE *) NULL, Node_illegal, (NODE *) NULL);
+ }
+ | l_brace statements r_brace
+ { $$ = $2 ; }
;
-
-statements : /* EMPTY */
- { $$ = NULL; }
- | statement
+statements
+ : statement
{ $$ = node ($1, Node_statement_list, (NODE *)NULL); }
| statements statement
- { $$ = append_right($1, node( $2, Node_statement_list, (NODE *)NULL)); }
+ {
+ $$ = append_right($1,
+ node( $2, Node_statement_list, (NODE *)NULL));
+ yyerrok;
+ }
+ | error
+ { $$ = NULL; }
+ | statements error
;
-statement_term :
- NEWLINE optional_newlines
- { $<nodetypeval>$ = Node_illegal; }
- | ';' optional_newlines
- { $<nodetypeval>$ = Node_illegal; }
+statement_term
+ : NEWLINE opt_newlines
+ { $<nodetypeval>$ = Node_illegal; want_redirect = 0; }
+ | semi_colon opt_newlines
+ { $<nodetypeval>$ = Node_illegal; want_redirect = 0; }
;
-whitespace :
- /* blank */
- { $$ = Node_illegal; }
- | CONCAT_OP
+whitespace
+ : /* blank */
+ { $<nodetypeval>$ = Node_illegal; }
+ | CONCAT_OP
+ { $<nodetypeval>$ = Node_illegal; }
| NEWLINE
+ { $<nodetypeval>$ = Node_illegal; }
| whitespace CONCAT_OP
+ { $<nodetypeval>$ = Node_illegal; }
| whitespace NEWLINE
+ { $<nodetypeval>$ = Node_illegal; }
;
-statement :
- '{' whitespace statements '}' whitespace
- { $$ = $3; }
+
+statement
+ : semi_colon opt_newlines
+ { $$ = NULL; }
+ | l_brace statements r_brace whitespace
+ { $$ = $2; }
| if_statement
{ $$ = $1; }
- | LEX_WHILE '(' conditional ')' whitespace statement
+ | LEX_WHILE '(' exp r_paren whitespace statement
{ $$ = node ($3, Node_K_while, $6); }
- | LEX_FOR '(' opt_exp ';' conditional ';' opt_exp ')' whitespace statement
+ | LEX_DO whitespace statement LEX_WHILE '(' exp r_paren whitespace
+ { $$ = node ($6, Node_K_do, $3); }
+ | LEX_FOR '(' opt_exp semi_colon exp semi_colon opt_exp r_paren whitespace statement
{ $$ = node ($10, Node_K_for, (NODE *)make_for_loop ($3, $5, $7)); }
- | LEX_FOR '(' opt_exp ';' ';' opt_exp ')' whitespace statement
+ | LEX_FOR '(' opt_exp semi_colon semi_colon opt_exp r_paren whitespace statement
{ $$ = node ($9, Node_K_for, (NODE *)make_for_loop ($3, (NODE *)NULL, $6)); }
- | LEX_FOR '(' NAME CONCAT_OP LEX_IN NAME ')' whitespace statement
- { $$ = node ($9, Node_K_arrayfor, (NODE *)make_for_loop(variable($3), (NODE *)NULL, variable($6))); }
+ | LEX_FOR '(' NAME CONCAT_OP LEX_IN NAME r_paren whitespace statement
+ {
+ $$ = node ($9, Node_K_arrayfor,
+ make_for_loop(variable($3),
+ (NODE *)NULL, variable($6)));
+ }
| LEX_BREAK statement_term
/* for break, maybe we'll have to remember where to break to */
{ $$ = node ((NODE *)NULL, Node_K_break, (NODE *)NULL); }
@@ -219,680 +434,969 @@ statement :
{ $$ = node ((NODE *)NULL, Node_K_continue, (NODE *)NULL); }
| LEX_PRINT
{ ++want_redirect; }
- expression_list redirection statement_term
- {
- want_redirect = 0;
- /* $4->lnode = NULL; */
- $$ = node ($3, Node_K_print, $4);
- }
+ opt_expression_list redirection statement_term
+ { $$ = node ($3, Node_K_print, $4); }
+ | LEX_PRINT '(' opt_expression_list r_paren
+ { ++want_redirect; want_concat_token = 0; }
+ redirection statement_term
+ { $$ = node ($3, Node_K_print, $6); }
| LEX_PRINTF
{ ++want_redirect; }
- expression_list redirection statement_term
- {
- want_redirect = 0;
- /* $4->lnode = NULL; */
- $$ = node ($3, Node_K_printf, $4);
- }
- | LEX_PRINTF '(' expression_list ')'
- { ++want_redirect;
- want_concat_token = 0; }
- redirection statement_term
- {
- want_redirect = 0;
- $$ = node ($3, Node_K_printf, $6);
- }
+ opt_expression_list redirection statement_term
+ { $$ = node ($3, Node_K_printf, $4); }
+ | LEX_PRINTF '(' opt_expression_list r_paren
+ { ++want_redirect; want_concat_token = 0; }
+ redirection statement_term
+ { $$ = node ($3, Node_K_printf, $6); }
| LEX_NEXT statement_term
{ $$ = node ((NODE *)NULL, Node_K_next, (NODE *)NULL); }
- | LEX_EXIT statement_term
- { $$ = node ((NODE *)NULL, Node_K_exit, (NODE *)NULL); }
- | LEX_EXIT '(' exp ')' statement_term
- { $$ = node ($3, Node_K_exit, (NODE *)NULL); }
+ | LEX_EXIT opt_exp statement_term
+ { $$ = node ($2, Node_K_exit, (NODE *)NULL); }
+ | LEX_RETURN opt_exp statement_term
+ { $$ = node ($2, Node_K_return, (NODE *)NULL); }
+ | LEX_DELETE NAME '[' expression_list ']' statement_term
+ { $$ = node (variable($2), Node_K_delete, $4); }
| exp statement_term
{ $$ = $1; }
;
-
-if_statement:
- LEX_IF '(' conditional ')' whitespace statement
+if_statement
+ : LEX_IF '(' exp r_paren whitespace statement
{ $$ = node ($3, Node_K_if,
node ($6, Node_if_branches, (NODE *)NULL)); }
- | LEX_IF '(' conditional ')' whitespace statement
+ | LEX_IF '(' exp r_paren whitespace statement
LEX_ELSE whitespace statement
{ $$ = node ($3, Node_K_if,
node ($6, Node_if_branches, $9)); }
;
-optional_newlines :
- /* empty */
- | optional_newlines NEWLINE
+opt_newlines
+ : /* empty */
+ | opt_newlines NEWLINE
{ $<nodetypeval>$ = Node_illegal; }
;
-redirection :
- /* empty */
- { $$ = NULL; /* node (NULL, Node_redirect_nil, NULL); */ }
- /* | REDIRECT_OP NAME
- { $$ = node ($2, $1, NULL); } */
- | REDIRECT_OP exp
- { $$ = node ($2, $1, (NODE *)NULL); }
+redirection
+ : /* empty */
+ { want_redirect = 0; $$ = NULL; }
+ | REDIRECT_OP
+ { want_redirect = 0; }
+ exp
+ { $$ = node ($3, $1, (NODE *)NULL); }
+ ;
+
+opt_param_list
+ : /* empty */
+ { $$ = NULL; }
+ | param_list
+ /* $$ = $1 */
;
+param_list
+ : NAME
+ {
+ $$ = make_param($1);
+ }
+ | param_list ',' NAME
+ {
+ $$ = append_right($1, make_param($3));
+ yyerrok;
+ }
+ | error
+ { $$ = NULL; }
+ | param_list error
+ | param_list ',' error
+ ;
/* optional expression, as in for loop */
-opt_exp :
+opt_exp
+ : /* empty */
{ $$ = NULL; /* node(NULL, Node_builtin, NULL); */ }
| exp
- { $$ = $1; }
;
-expression_list :
- /* empty */
+opt_expression_list
+ : /* empty */
{ $$ = NULL; }
- | exp
+ | expression_list
+ { $$ = $1; }
+ ;
+
+expression_list
+ : exp
{ $$ = node ($1, Node_expression_list, (NODE *)NULL); }
| expression_list ',' exp
- { $$ = append_right($1, node( $3, Node_expression_list, (NODE *)NULL)); }
+ {
+ $$ = append_right($1,
+ node( $3, Node_expression_list, (NODE *)NULL));
+ yyerrok;
+ }
+ | error
+ { $$ = NULL; }
+ | expression_list error
+ | expression_list error exp
+ | expression_list ',' error
;
-
/* Expressions, not including the comma operator. */
-exp : LEX_BUILTIN '(' expression_list ')'
- { $$ = snode ($3, Node_builtin, $1); }
- | LEX_BUILTIN
- { $$ = snode ((NODE *)NULL, Node_builtin, $1); }
- | '(' exp ')'
- { $$ = $2; }
- | '-' exp %prec UNARY
- { $$ = node ($2, Node_unary_minus, (NODE *)NULL); }
- | INCREMENT variable %prec UNARY
- { $$ = node ($2, Node_preincrement, (NODE *)NULL); }
- | DECREMENT variable %prec UNARY
- { $$ = node ($2, Node_predecrement, (NODE *)NULL); }
- | variable INCREMENT %prec UNARY
- { $$ = node ($1, Node_postincrement, (NODE *)NULL); }
- | variable DECREMENT %prec UNARY
- { $$ = node ($1, Node_postdecrement, (NODE *)NULL); }
- | variable
- { $$ = $1; } /* JF was variable($1) */
- | NUMBER
- { $$ = make_number ($1); }
- | YSTRING
- { $$ = make_string ($1, -1); }
+exp : sub_exp
+ | exp LEX_AND whitespace exp
+ { $$ = node ($1, Node_and, $4); }
+ | exp LEX_OR whitespace exp
+ { $$ = node ($1, Node_or, $4); }
+ | '!' exp %prec UNARY
+ { $$ = node ($2, Node_not,(NODE *) NULL); }
+ | '(' exp r_paren
+ { $$ = $2; }
+ ;
+
+sub_exp : LEX_BUILTIN '(' opt_expression_list r_paren
+ { $$ = snode ($3, Node_builtin, $1); }
+ | LEX_BUILTIN
+ { $$ = snode ((NODE *)NULL, Node_builtin, $1); }
+ | exp MATCHOP regexp
+ { $$ = node ($1, $2, $3); }
+ | exp MATCHOP exp
+ { $$ = node ($1, $2, $3); }
+ | exp CONCAT_OP LEX_IN NAME
+ { $$ = node (variable($4), Node_in_array, $1); }
+ | '(' expression_list r_paren CONCAT_OP LEX_IN NAME
+ { $$ = node (variable($6), Node_in_array, $2); }
+ | LEX_SUB '(' regexp ',' expression_list r_paren
+ { $$ = node($5, $1, $3); }
+ | LEX_SUB '(' exp ',' expression_list r_paren
+ { $$ = node($5, $1, $3); }
+ | LEX_MATCH '(' exp ',' regexp r_paren
+ { $$ = node($3, $1, $5); }
+ | LEX_MATCH '(' exp ',' exp r_paren
+ { $$ = node($3, $1, $5); }
+ | LEX_GETLINE
+ {++want_redirect; }
+ opt_variable redirection
+ {
+ $$ = node ($3, Node_K_getline, $4);
+ }
+ | exp '|' LEX_GETLINE opt_variable
+ {
+ $$ = node ($4, Node_K_getline,
+ node ($1, Node_redirect_pipein, (NODE *)NULL));
+ }
+ | exp RELOP exp
+ { $$ = node ($1, $2, $3); }
+ | exp '?' exp ':' exp
+ { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5)); }
+ | NAME '(' opt_expression_list r_paren
+ {
+ $$ = node ($3, Node_func_call, make_string($1, strlen($1)));
+ }
+ | '-' exp %prec UNARY
+ { $$ = node ($2, Node_unary_minus, (NODE *)NULL); }
+ | '+' exp %prec UNARY
+ { $$ = $2; }
+ | INCREMENT variable
+ { $$ = node ($2, Node_preincrement, (NODE *)NULL); }
+ | DECREMENT variable
+ { $$ = node ($2, Node_predecrement, (NODE *)NULL); }
+ | variable INCREMENT
+ { $$ = node ($1, Node_postincrement, (NODE *)NULL); }
+ | variable DECREMENT
+ { $$ = node ($1, Node_postdecrement, (NODE *)NULL); }
+ | variable
+ { $$ = $1; }
+ | NUMBER
+ { $$ = make_number ($1); }
+ | YSTRING
+ { $$ = make_string ($1, -1); }
/* Binary operators in order of decreasing precedence. */
- | exp '*' exp
- { $$ = node ($1, Node_times, $3); }
- | exp '/' exp
- { $$ = node ($1, Node_quotient, $3); }
- | exp '%' exp
- { $$ = node ($1, Node_mod, $3); }
- | exp '+' exp
- { $$ = node ($1, Node_plus, $3); }
- | exp '-' exp
- { $$ = node ($1, Node_minus, $3); }
+ | exp '^' exp
+ { $$ = node ($1, Node_exp, $3); }
+ | exp '*' exp
+ { $$ = node ($1, Node_times, $3); }
+ | exp '/' exp
+ { $$ = node ($1, Node_quotient, $3); }
+ | exp '%' exp
+ { $$ = node ($1, Node_mod, $3); }
+ | exp '+' exp
+ { $$ = node ($1, Node_plus, $3); }
+ | exp '-' exp
+ { $$ = node ($1, Node_minus, $3); }
/* Empty operator. See yylex for disgusting details. */
- | exp CONCAT_OP exp
- { $$ = node ($1, Node_concat, $3); }
- | variable ASSIGNOP exp
- { $$ = node ($1, $2, $3); }
+ | exp CONCAT_OP exp
+ { $$ = node ($1, Node_concat, $3); }
+ | variable ASSIGNOP exp
+ { $$ = node ($1, $2, $3); }
;
-v_exp : LEX_BUILTIN '(' expression_list ')'
- { $$ = snode ($3, Node_builtin, $1); }
- | LEX_BUILTIN
- { $$ = snode ((NODE *)NULL, Node_builtin, $1); }
- | '(' exp ')'
- { $$ = $2; }
- | '-' exp %prec UNARY
- { $$ = node ($2, Node_unary_minus, (NODE *)NULL); }
- | INCREMENT variable %prec UNARY
- { $$ = node ($2, Node_preincrement, (NODE *)NULL); }
- | DECREMENT variable %prec UNARY
- { $$ = node ($2, Node_predecrement, (NODE *)NULL); }
- | variable INCREMENT %prec UNARY
- { $$ = node ($1, Node_postincrement, (NODE *)NULL); }
- | variable DECREMENT %prec UNARY
- { $$ = node ($1, Node_postdecrement, (NODE *)NULL); }
- | variable
- { $$ = $1; } /* JF was variable($1) */
- | NUMBER
- { $$ = make_number ($1); }
- | YSTRING
- { $$ = make_string ($1, -1); }
+opt_variable
+ : /* empty */
+ { $$ = NULL; }
+ | variable
+ ;
-/* Binary operators in order of decreasing precedence. */
- | v_exp '*' exp
- { $$ = node ($1, Node_times, $3); }
- | v_exp '/' exp
- { $$ = node ($1, Node_quotient, $3); }
- | v_exp '%' exp
- { $$ = node ($1, Node_mod, $3); }
- | v_exp '+' exp
- { $$ = node ($1, Node_plus, $3); }
- | v_exp '-' exp
- { $$ = node ($1, Node_minus, $3); }
- /* Empty operator. See yylex for disgusting details. */
- | v_exp CONCAT_OP exp
- { $$ = node ($1, Node_concat, $3); }
+variable
+ : NAME
+ { $$ = variable ($1); }
+ | NAME '[' expression_list ']'
+ { $$ = node (variable($1), Node_subscript, $3); }
+ | '$' exp
+ { $$ = node ($2, Node_field_spec, (NODE *)NULL); }
+ ;
+
+l_brace
+ : '{' whitespace
;
-variable :
- NAME
- { $$ = variable ($1); }
- | NAME '[' exp ']'
- { $$ = node (variable($1), Node_subscript, $3); }
- | '$' v_exp %prec UNARY
- { $$ = node ($2, Node_field_spec, (NODE *)NULL); }
+r_brace
+ : '}' { yyerrok; }
+ ;
+
+r_paren
+ : ')' { $<nodetypeval>$ = Node_illegal; yyerrok; }
+ ;
+
+semi_colon
+ : ';' { yyerrok; }
;
%%
-
struct token {
- char *operator;
- NODETYPE value;
- int class;
- NODE *(*ptr)();
+ char *operator;
+ NODETYPE value;
+ int class;
+ NODE *(*ptr) ();
};
#define NULL 0
NODE *do_exp(), *do_getline(), *do_index(), *do_length(),
*do_sqrt(), *do_log(), *do_sprintf(), *do_substr(),
- *do_split(), *do_int();
+ *do_split(), *do_system(), *do_int(), *do_close(),
+ *do_atan2(), *do_sin(), *do_cos(), *do_rand(),
+ *do_srand(), *do_match();
- /* Special functions for debugging */
-#ifndef FAST
-NODE *do_prvars(), *do_bp();
+/* Special functions for debugging */
+#ifdef DEBUG
+NODE *do_prvars(), *do_bp();
#endif
/* Tokentab is sorted ascii ascending order, so it can be binary searched. */
-/* (later. Right now its just sort of linear search (SLOW!!) */
static struct token tokentab[] = {
- {"BEGIN", Node_illegal, LEX_BEGIN, 0},
- {"END", Node_illegal, LEX_END, 0},
-#ifndef FAST
- {"bp", Node_builtin, LEX_BUILTIN, do_bp},
+ { "BEGIN", Node_illegal, LEX_BEGIN, 0 },
+ { "END", Node_illegal, LEX_END, 0 },
+ { "atan2", Node_builtin, LEX_BUILTIN, do_atan2 },
+#ifdef DEBUG
+ { "bp", Node_builtin, LEX_BUILTIN, do_bp },
#endif
- {"break", Node_K_break, LEX_BREAK, 0},
- {"continue", Node_K_continue, LEX_CONTINUE, 0},
- {"else", Node_illegal, LEX_ELSE, 0},
- {"exit", Node_K_exit, LEX_EXIT, 0},
- {"exp", Node_builtin, LEX_BUILTIN, do_exp},
- {"for", Node_K_for, LEX_FOR, 0},
- {"getline", Node_builtin, LEX_BUILTIN, do_getline},
- {"if", Node_K_if, LEX_IF, 0},
- {"in", Node_illegal, LEX_IN, 0},
- {"index", Node_builtin, LEX_BUILTIN, do_index},
- {"int", Node_builtin, LEX_BUILTIN, do_int},
- {"length", Node_builtin, LEX_BUILTIN, do_length},
- {"log", Node_builtin, LEX_BUILTIN, do_log},
- {"next", Node_K_next, LEX_NEXT, 0},
- {"print", Node_K_print, LEX_PRINT, 0},
- {"printf", Node_K_printf, LEX_PRINTF, 0},
-#ifndef FAST
- {"prvars", Node_builtin, LEX_BUILTIN, do_prvars},
+ { "break", Node_K_break, LEX_BREAK, 0 },
+ { "close", Node_builtin, LEX_BUILTIN, do_close },
+ { "continue", Node_K_continue, LEX_CONTINUE, 0 },
+ { "cos", Node_builtin, LEX_BUILTIN, do_cos },
+ { "delete", Node_K_delete, LEX_DELETE, 0 },
+ { "do", Node_K_do, LEX_DO, 0 },
+ { "else", Node_illegal, LEX_ELSE, 0 },
+ { "exit", Node_K_exit, LEX_EXIT, 0 },
+ { "exp", Node_builtin, LEX_BUILTIN, do_exp },
+ { "for", Node_K_for, LEX_FOR, 0 },
+ { "func", Node_K_function, LEX_FUNCTION, 0 },
+ { "function", Node_K_function, LEX_FUNCTION, 0 },
+ { "getline", Node_K_getline, LEX_GETLINE, 0 },
+ { "gsub", Node_gsub, LEX_SUB, 0 },
+ { "if", Node_K_if, LEX_IF, 0 },
+ { "in", Node_illegal, LEX_IN, 0 },
+ { "index", Node_builtin, LEX_BUILTIN, do_index },
+ { "int", Node_builtin, LEX_BUILTIN, do_int },
+ { "length", Node_builtin, LEX_BUILTIN, do_length },
+ { "log", Node_builtin, LEX_BUILTIN, do_log },
+ { "match", Node_K_match, LEX_MATCH, 0 },
+ { "next", Node_K_next, LEX_NEXT, 0 },
+ { "print", Node_K_print, LEX_PRINT, 0 },
+ { "printf", Node_K_printf, LEX_PRINTF, 0 },
+#ifdef DEBUG
+ { "prvars", Node_builtin, LEX_BUILTIN, do_prvars },
#endif
- {"split", Node_builtin, LEX_BUILTIN, do_split},
- {"sprintf", Node_builtin, LEX_BUILTIN, do_sprintf},
- {"sqrt", Node_builtin, LEX_BUILTIN, do_sqrt},
- {"substr", Node_builtin, LEX_BUILTIN, do_substr},
- {"while", Node_K_while, LEX_WHILE, 0},
- {NULL, Node_illegal, ERROR, 0}
+ { "rand", Node_builtin, LEX_BUILTIN, do_rand },
+ { "return", Node_K_return, LEX_RETURN, 0 },
+ { "sin", Node_builtin, LEX_BUILTIN, do_sin },
+ { "split", Node_builtin, LEX_BUILTIN, do_split },
+ { "sprintf", Node_builtin, LEX_BUILTIN, do_sprintf },
+ { "sqrt", Node_builtin, LEX_BUILTIN, do_sqrt },
+ { "srand", Node_builtin, LEX_BUILTIN, do_srand },
+ { "sub", Node_sub, LEX_SUB, 0 },
+ { "substr", Node_builtin, LEX_BUILTIN, do_substr },
+ { "system", Node_builtin, LEX_BUILTIN, do_system },
+ { "while", Node_K_while, LEX_WHILE, 0 },
};
-/* Read one token, getting characters through lexptr. */
+/* VARARGS0 */
+yyerror(va_alist)
+va_dcl
+{
+ va_list args;
+ char *mesg;
+ char *a1;
+ register char *ptr, *beg;
+ static int list = 0;
+ char *scan;
+
+ errcount++;
+ va_start(args);
+ mesg = va_arg(args, char *);
+ if (mesg || !list) {
+ /* Find the current line in the input file */
+ if (!lexptr) {
+ beg = "(END OF FILE)";
+ ptr = beg + 13;
+ } else {
+ if (*lexptr == '\n' && lexptr != lexptr_begin)
+ --lexptr;
+ for (beg = lexptr; beg != lexptr_begin && *beg != '\n'; --beg)
+ ;
+ /* NL isn't guaranteed */
+ for (ptr = lexptr; *ptr && *ptr != '\n'; ptr++)
+ ;
+ if (beg != lexptr_begin)
+ beg++;
+ }
+ msg("syntax error near line %d:\n%.*s", lineno, ptr - beg, beg);
+ scan = beg;
+ while (scan <= lexptr)
+ if (*scan++ == '\t')
+ putc('\t', stderr);
+ else
+ putc(' ', stderr);
+ putc('^', stderr);
+ putc(' ', stderr);
+ if (mesg) {
+ vfprintf(stderr, mesg, args);
+ va_end(args);
+ putc('\n', stderr);
+ exit(1);
+ } else {
+ a1 = va_arg(args, char *);
+ if (a1) {
+ fputs("expecting: ", stderr);
+ fputs(a1, stderr);
+ list = 1;
+ va_end(args);
+ return;
+ }
+ }
+ va_end(args);
+ return;
+ }
+ a1 = va_arg(args, char *);
+ if (a1) {
+ fputs(" or ", stderr);
+ fputs(a1, stderr);
+ va_end(args);
+ putc('\n', stderr);
+ return;
+ }
+ putc('\n', stderr);
+ list = 0;
+ va_end(args);
+}
+
+/*
+ * Parse a C escape sequence. STRING_PTR points to a variable containing a
+ * pointer to the string to parse. That pointer is updated past the
+ * characters we use. The value of the escape sequence is returned.
+ *
+ * A negative value means the sequence \ newline was seen, which is supposed to
+ * be equivalent to nothing at all.
+ *
+ * If \ is followed by a null character, we return a negative value and leave
+ * the string pointer pointing at the null character.
+ *
+ * If \ is followed by 000, we return 0 and leave the string pointer after the
+ * zeros. A value of 0 does not mean end of string.
+ */
static int
-yylex ()
+parse_escape(string_ptr)
+char **string_ptr;
{
- register int c;
- register int namelen;
- register char *tokstart;
- register struct token *toktab;
- double atof(); /* JF know what happens if you forget this? */
-
-
- static did_newline = 0; /* JF the grammar insists that actions end
- with newlines. This was easier than hacking
- the grammar. */
- int do_concat;
-
- int seen_e = 0; /* These are for numbers */
- int seen_point = 0;
-
- retry:
-
- if(!lexptr)
- return 0;
-
- if (want_regexp) {
- want_regexp = 0;
- /* there is a potential bug if a regexp is followed by an equal sign:
- "/foo/=bar" would result in assign_quotient being returned as the
- next token. Nothing is done about it since it is not valid awk,
- but maybe something should be done anyway. */
-
- tokstart = lexptr;
- while (c = *lexptr++) {
- switch (c) {
- case '\\':
- if (*lexptr++ == '\0') {
- yyerror ("unterminated regexp ends with \\");
- return ERROR;
+ register int c = *(*string_ptr)++;
+
+ switch (c) {
+ case 'b':
+ return '\b';
+ case 'f':
+ return '\f';
+ case 'n':
+ return '\n';
+ case 'r':
+ return '\r';
+ case 't':
+ return '\t';
+ case 'v':
+ return '\v';
+ case '\n':
+ return -2;
+ case 0:
+ (*string_ptr)--;
+ return 0;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ {
+ register int i = c - '0';
+ register int count = 0;
+
+ while (++count < 3) {
+ if ((c = *(*string_ptr)++) >= '0' && c <= '7') {
+ i *= 8;
+ i += c - '0';
+ } else {
+ (*string_ptr)--;
+ break;
+ }
+ }
+ return i;
+ }
+ default:
+ return c;
}
- break;
- case '/': /* end of the regexp */
- lexptr--;
- yylval.sval = tokstart;
- return REGEXP;
- case '\n':
- case '\0':
- yyerror ("unterminated regexp");
- return ERROR;
- }
- }
- }
- do_concat=want_concat_token;
- want_concat_token=0;
-
- if(*lexptr=='\0') {
- lexptr=0;
- return NEWLINE;
- }
-
- /* if lexptr is at white space between two terminal tokens or parens,
- it is a concatenation operator. */
- if(do_concat && (*lexptr==' ' || *lexptr=='\t')) {
- while (*lexptr == ' ' || *lexptr == '\t')
- lexptr++;
- if (isalnum(*lexptr) || *lexptr == '\"' || *lexptr == '('
- || *lexptr == '.' || *lexptr == '$') /* the '.' is for decimal pt */
- return CONCAT_OP;
- }
-
- while (*lexptr == ' ' || *lexptr == '\t')
- lexptr++;
-
- tokstart = lexptr; /* JF */
-
- switch (c = *lexptr++) {
- case 0:
- return 0;
-
- case '\n':
- lineno++;
- return NEWLINE;
-
- case '#': /* it's a comment */
- while (*lexptr != '\n' && *lexptr != '\0')
- lexptr++;
- goto retry;
-
- case '\\':
- if(*lexptr=='\n') {
- lexptr++;
- goto retry;
- } else break;
- case ')':
- case ']':
- ++want_concat_token;
- /* fall through */
- case '(': /* JF these were above, but I don't see why they should turn on concat. . . &*/
- case '[':
-
- case '{':
- case ',': /* JF */
- case '$':
- case ';':
- /* set node type to ILLEGAL because the action should set it to
- the right thing */
- yylval.nodetypeval = Node_illegal;
- return c;
-
- case '*':
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_assign_times;
- lexptr++;
- return ASSIGNOP;
- }
- yylval.nodetypeval=Node_illegal;
- return c;
-
- case '/':
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_assign_quotient;
- lexptr++;
- return ASSIGNOP;
- }
- yylval.nodetypeval=Node_illegal;
- return c;
-
- case '%':
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_assign_mod;
- lexptr++;
- return ASSIGNOP;
- }
- yylval.nodetypeval=Node_illegal;
- return c;
-
- case '+':
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_assign_plus;
- lexptr++;
- return ASSIGNOP;
- }
- if(*lexptr=='+') {
- yylval.nodetypeval=Node_illegal;
- lexptr++;
- return INCREMENT;
- }
- yylval.nodetypeval=Node_illegal;
- return c;
-
- case '!':
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_notequal;
- lexptr++;
- return RELOP;
- }
- if(*lexptr=='~') {
- yylval.nodetypeval=Node_nomatch;
- lexptr++;
- return MATCHOP;
- }
- yylval.nodetypeval=Node_illegal;
- return c;
-
- case '<':
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_leq;
- lexptr++;
- return RELOP;
- }
- yylval.nodetypeval=Node_less;
- return RELOP;
-
- case '=':
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_equal;
- lexptr++;
- return RELOP;
- }
- yylval.nodetypeval=Node_assign;
- return ASSIGNOP;
-
- case '>':
- if(want_redirect) {
- if (*lexptr == '>') {
- yylval.nodetypeval = Node_redirect_append;
- lexptr++;
- } else
- yylval.nodetypeval = Node_redirect_output;
- return REDIRECT_OP;
- }
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_geq;
- lexptr++;
- return RELOP;
- }
- yylval.nodetypeval=Node_greater;
- return RELOP;
-
- case '~':
- yylval.nodetypeval=Node_match;
- return MATCHOP;
-
- case '}': /* JF added did newline stuff. Easier than hacking the grammar */
- if(did_newline) {
- did_newline=0;
- return c;
- }
- did_newline++;
- --lexptr;
- return NEWLINE;
-
- case '"':
- while (*lexptr != '\0') {
- switch (*lexptr++) {
- case '\\':
- if (*lexptr++ != '\0')
- break;
- /* fall through */
- case '\n':
- yyerror ("unterminated string");
- return ERROR;
- case '\"':
- yylval.sval = tokstart + 1; /* JF Skip the doublequote */
- ++want_concat_token;
- return YSTRING;
- }
- }
- return ERROR; /* JF this was one level up, wrong? */
-
- case '-':
- if(*lexptr=='=') {
- yylval.nodetypeval=Node_assign_minus;
- lexptr++;
- return ASSIGNOP;
- }
- if(*lexptr=='-') {
- yylval.nodetypeval=Node_illegal;
- lexptr++;
- return DECREMENT;
- }
- /* JF I think space tab comma and newline are the legal places for
- a UMINUS. Have I missed any? */
- if((!isdigit(*lexptr) && *lexptr!='.') || (lexptr>lexptr_begin+1 &&
- !index(" \t,\n",lexptr[-2]))) {
- /* set node type to ILLEGAL because the action should set it to
- the right thing */
- yylval.nodetypeval = Node_illegal;
- return c;
- }
- /* FALL through into number code */
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- case '.':
- /* It's a number */
- if(c=='-') namelen=1;
- else namelen=0;
- for (; (c = tokstart[namelen]) != '\0'; namelen++) {
- switch (c) {
- case '.':
- if (seen_point)
- goto got_number;
- ++seen_point;
- break;
- case 'e':
- case 'E':
- if (seen_e)
- goto got_number;
- ++seen_e;
- if (tokstart[namelen+1] == '-' || tokstart[namelen+1] == '+')
- namelen++;
- break;
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- break;
- default:
- goto got_number;
- }
- }
-
-got_number:
- lexptr = tokstart + namelen;
- yylval.fval = atof(tokstart);
- ++want_concat_token;
- return NUMBER;
-
- case '&':
- if(*lexptr=='&') {
- yylval.nodetypeval=Node_and;
- lexptr++;
- return LEX_AND;
- }
- return ERROR;
-
- case '|':
- if(want_redirect) {
- lexptr++;
- yylval.nodetypeval = Node_redirect_pipe;
- return REDIRECT_OP;
- }
- if(*lexptr=='|') {
- yylval.nodetypeval=Node_or;
- lexptr++;
- return LEX_OR;
- }
- return ERROR;
- }
-
- if (!isalpha(c)) {
- yyerror ("Invalid char '%c' in expression\n", c);
- return ERROR;
- }
-
- /* its some type of name-type-thing. Find its length */
- for (namelen = 0; is_identchar(tokstart[namelen]); namelen++)
- ;
-
-
- /* See if it is a special token. */
- for (toktab = tokentab; toktab->operator != NULL; toktab++) {
- if(*tokstart==toktab->operator[0] &&
- !strncmp(tokstart,toktab->operator,namelen) &&
- toktab->operator[namelen]=='\0') {
- lexptr=tokstart+namelen;
- if(toktab->class == LEX_BUILTIN)
- yylval.ptrval = toktab->ptr;
- else
- yylval.nodetypeval = toktab->value;
- return toktab->class;
- }
- }
-
- /* It's a name. See how long it is. */
- yylval.sval = tokstart;
- lexptr = tokstart+namelen;
- ++want_concat_token;
- return NAME;
}
-/*VARARGS1*/
-yyerror (mesg,a1,a2,a3,a4,a5,a6,a7,a8)
- char *mesg;
+/*
+ * Read the input and turn it into tokens. Input is now read from a file
+ * instead of from malloc'ed memory. The main program takes a program
+ * passed as a command line argument and writes it to a temp file. Otherwise
+ * the file name is made available in an external variable.
+ */
+
+int curinfile = -1;
+
+static int
+yylex()
{
- register char *ptr,*beg;
-
- /* Find the current line in the input file */
- if(!lexptr) {
- beg="(END OF FILE)";
- ptr=beg+13;
- } else {
- if (*lexptr == '\n' && lexptr!=lexptr_begin)
- --lexptr;
- for (beg = lexptr;beg!=lexptr_begin && *beg != '\n';--beg)
- ;
- for (ptr = lexptr;*ptr && *ptr != '\n';ptr++) /*jfw: NL isn't guaranteed*/
- ;
- if(beg!=lexptr_begin)
- beg++;
- }
- fprintf (stderr, "Error near line %d, '%.*s'\n",lineno, ptr-beg, beg);
- /* figure out line number, etc. later */
- fprintf (stderr, mesg, a1, a2, a3, a4, a5, a6, a7, a8);
- fprintf (stderr,"\n");
- exit (1);
-}
+ register int c;
+ register int namelen;
+ register char *tokstart;
+ register struct token *tokptr;
+ char *tokkey;
+ extern double atof(); /* know what happens if you forget this? */
+ static did_newline = 0; /* the grammar insists that actions end
+ * with newlines. This was easier than
+ * hacking the grammar. */
+ int do_concat;
+ int seen_e = 0; /* These are for numbers */
+ int seen_point = 0;
+ extern char **sourcefile;
+ extern int tempsource, numfiles;
+ extern FILE *pathopen();
+ static int file_opened = 0;
+ static FILE *fin;
+ static char cbuf[BUFSIZ];
+ int low, mid, high;
+ extern int debugging;
+
+ if (! file_opened) {
+ file_opened = 1;
+#ifdef DEBUG
+ if (debugging) {
+ int i;
+
+ for (i = 0; i <= numfiles; i++)
+ fprintf (stderr, "sourcefile[%d] = %s\n", i,
+ sourcefile[i]);
+ }
+#endif
+ nextfile:
+ if ((fin = pathopen (sourcefile[++curinfile])) == NULL)
+ fatal("cannot open `%s' for reading (%s)",
+ sourcefile[curinfile],
+ sys_errlist[errno]);
+ *(lexptr = cbuf) = '\0';
+ /*
+ * immediately unlink the tempfile so that it will
+ * go away cleanly if we bomb.
+ */
+ if (tempsource && curinfile == 0)
+ (void) unlink (sourcefile[curinfile]);
+ }
+
+retry:
+ if (! *lexptr)
+ if (fgets (cbuf, sizeof cbuf, fin) == NULL) {
+ if (fin != NULL)
+ fclose (fin); /* be neat and clean */
+ if (curinfile < numfiles)
+ goto nextfile;
+ return 0;
+ } else
+ lexptr = lexptr_begin = cbuf;
+
+ if (want_regexp) {
+ want_regexp = 0;
+
+ /*
+ * there is a potential bug if a regexp is followed by an
+ * equal sign: "/foo/=bar" would result in assign_quotient
+ * being returned as the next token. Nothing is done about
+ * it since it is not valid awk, but maybe something should
+ * be done anyway.
+ */
+
+ tokstart = lexptr;
+ while (c = *lexptr++) {
+ switch (c) {
+ case '\\':
+ if (*lexptr++ == '\0') {
+ yyerror("unterminated regexp ends with \\");
+ return ERROR;
+ } else if (lexptr[-1] == '\n')
+ goto retry;
+ break;
+ case '/': /* end of the regexp */
+ lexptr--;
+ yylval.sval = tokstart;
+ return REGEXP;
+ case '\n':
+ lineno++;
+ case '\0':
+ yyerror("unterminated regexp");
+ return ERROR;
+ }
+ }
+ }
+ do_concat = want_concat_token;
+ want_concat_token = 0;
-/* Parse a C escape sequence. STRING_PTR points to a variable
- containing a pointer to the string to parse. That pointer
- is updated past the characters we use. The value of the
- escape sequence is returned.
+ if (*lexptr == '\n') {
+ lexptr++;
+ lineno++;
+ return NEWLINE;
+ }
- A negative value means the sequence \ newline was seen,
- which is supposed to be equivalent to nothing at all.
+ /*
+ * if lexptr is at white space between two terminal tokens or parens,
+ * it is a concatenation operator.
+ */
+ if (do_concat && (*lexptr == ' ' || *lexptr == '\t')) {
+ while (*lexptr == ' ' || *lexptr == '\t')
+ lexptr++;
+ if (isalnum(*lexptr) || *lexptr == '_' || *lexptr == '\"' ||
+ *lexptr == '(' || *lexptr == '.' || *lexptr == '$' ||
+ (*lexptr == '+' && *(lexptr+1) == '+') ||
+ (*lexptr == '-' && *(lexptr+1) == '-'))
+ /* the '.' is for decimal pt */
+ return CONCAT_OP;
+ }
+ while (*lexptr == ' ' || *lexptr == '\t')
+ lexptr++;
+
+ tokstart = lexptr;
+
+ switch (c = *lexptr++) {
+ case 0:
+ return 0;
+
+ case '\n':
+ lineno++;
+ return NEWLINE;
+
+ case '#': /* it's a comment */
+ while (*lexptr != '\n' && *lexptr != '\0')
+ lexptr++;
+ goto retry;
+
+ case '\\':
+ if (*lexptr == '\n') {
+ lineno++;
+ lexptr++;
+ want_concat_token = do_concat;
+ goto retry;
+ } else
+ break;
+ case ')':
+ case ']':
+ ++want_concat_token;
+ /* fall through */
+ case '(':
+ case '[':
+ case '$':
+ case ';':
+ case ':':
+ case '?':
+
+ /*
+ * set node type to ILLEGAL because the action should set it
+ * to the right thing
+ */
+ yylval.nodetypeval = Node_illegal;
+ return c;
+
+ case '{':
+ case ',':
+ while (isspace(*lexptr)) {
+ if (*lexptr == '\n')
+ lineno++;
+ lexptr++;
+ }
+ yylval.nodetypeval = Node_illegal;
+ return c;
+
+ case '*':
+ if (*lexptr == '=') {
+ yylval.nodetypeval = Node_assign_times;
+ lexptr++;
+ return ASSIGNOP;
+ } else if (*lexptr == '*') { /* make ** and **= aliases
+ * for ^ and ^= */
+ if (lexptr[1] == '=') {
+ yylval.nodetypeval = Node_assign_exp;
+ lexptr += 2;
+ return ASSIGNOP;
+ } else {
+ yylval.nodetypeval = Node_illegal;
+ lexptr++;
+ return '^';
+ }
+ }
+ yylval.nodetypeval = Node_illegal;
+ return c;
+
+ case '/':
+ if (*lexptr == '=') {
+ yylval.nodetypeval = Node_assign_quotient;
+ lexptr++;
+ return ASSIGNOP;
+ }
+ yylval.nodetypeval = Node_illegal;
+ return c;
+
+ case '%':
+ if (*lexptr == '=') {
+ yylval.nodetypeval = Node_assign_mod;
+ lexptr++;
+ return ASSIGNOP;
+ }
+ yylval.nodetypeval = Node_illegal;
+ return c;
+
+ case '^':
+ if (*lexptr == '=') {
+ yylval.nodetypeval = Node_assign_exp;
+ lexptr++;
+ return ASSIGNOP;
+ }
+ yylval.nodetypeval = Node_illegal;
+ return c;
+
+ case '+':
+ if (*lexptr == '=') {
+ yylval.nodetypeval = Node_assign_plus;
+ lexptr++;
+ return ASSIGNOP;
+ }
+ if (*lexptr == '+') {
+ yylval.nodetypeval = Node_illegal;
+ lexptr++;
+ return INCREMENT;
+ }
+ yylval.nodetypeval = Node_illegal;
+ return c;
+
+ case '!':
+ if (*lexptr == '=') {
+ yylval.nodetypeval = Node_notequal;
+ lexptr++;
+ return RELOP;
+ }
+ if (*lexptr == '~') {
+ yylval.nodetypeval = Node_nomatch;
+ if (! strict && lexptr[1] == '~') {
+ yylval.nodetypeval = Node_case_nomatch;
+ lexptr++;
+ }
+ lexptr++;
+ return MATCHOP;
+ }
+ yylval.nodetypeval = Node_illegal;
+ return c;
- If \ is followed by a null character, we return a negative
- value and leave the string pointer pointing at the null character.
+ case '<':
+ if (want_redirect) {
+ yylval.nodetypeval = Node_redirect_input;
+ return REDIRECT_OP;
+ }
+ if (*lexptr == '=') {
+ yylval.nodetypeval = Node_leq;
+ lexptr++;
+ return RELOP;
+ }
+ yylval.nodetypeval = Node_less;
+ return RELOP;
+
+ case '=':
+ if (*lexptr == '=') {
+ yylval.nodetypeval = Node_equal;
+ lexptr++;
+ return RELOP;
+ }
+ yylval.nodetypeval = Node_assign;
+ return ASSIGNOP;
+
+ case '>':
+ if (want_redirect) {
+ if (*lexptr == '>') {
+ yylval.nodetypeval = Node_redirect_append;
+ lexptr++;
+ } else
+ yylval.nodetypeval = Node_redirect_output;
+ return REDIRECT_OP;
+ }
+ if (*lexptr == '=') {
+ yylval.nodetypeval = Node_geq;
+ lexptr++;
+ return RELOP;
+ }
+ yylval.nodetypeval = Node_greater;
+ return RELOP;
+
+ case '~':
+ yylval.nodetypeval = Node_match;
+ if (! strict && *lexptr == '~') {
+ yylval.nodetypeval = Node_case_match;
+ lexptr++;
+ }
+ return MATCHOP;
+
+ case '}':
+ /*
+ * Added did newline stuff. Easier than
+ * hacking the grammar
+ */
+ if (did_newline) {
+ did_newline = 0;
+ return c;
+ }
+ did_newline++;
+ --lexptr;
+ return NEWLINE;
+
+ case '"':
+ while (*lexptr != '\0') {
+ switch (*lexptr++) {
+ case '\\':
+ if (*lexptr++ != '\0')
+ break;
+ /* fall through */
+ case '\n':
+ yyerror("unterminated string");
+ return ERROR;
+ case '\"':
+ /* Skip the doublequote */
+ yylval.sval = tokstart + 1;
+ ++want_concat_token;
+ return YSTRING;
+ }
+ }
+ return ERROR;
- If \ is followed by 000, we return 0 and leave the string pointer
- after the zeros. A value of 0 does not mean end of string. */
+ case '-':
+ if (*lexptr == '=') {
+ yylval.nodetypeval = Node_assign_minus;
+ lexptr++;
+ return ASSIGNOP;
+ }
+ if (*lexptr == '-') {
+ yylval.nodetypeval = Node_illegal;
+ lexptr++;
+ return DECREMENT;
+ }
-static int
-parse_escape (string_ptr)
- char **string_ptr;
-{
- register int c = *(*string_ptr)++;
- switch (c)
- {
- case 'a':
- return '\a';
- case 'b':
- return '\b';
- case 'e':
- return 033;
- case 'f':
- return '\f';
- case 'n':
- return '\n';
- case 'r':
- return '\r';
- case 't':
- return '\t';
- case 'v':
- return '\v';
- case '\n':
- return -2;
- case 0:
- (*string_ptr)--;
- return 0;
- case '^':
- c = *(*string_ptr)++;
- if (c == '\\')
- c = parse_escape (string_ptr);
- if (c == '?')
- return 0177;
- return (c & 0200) | (c & 037);
-
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- {
- register int i = c - '0';
- register int count = 0;
- while (++count < 3)
- {
- if ((c = *(*string_ptr)++) >= '0' && c <= '7')
- {
- i *= 8;
- i += c - '0';
- }
- else
- {
- (*string_ptr)--;
+ /*
+ * It looks like space tab comma and newline are the legal
+ * places for a UMINUS. Have we missed any?
+ */
+ if ((! isdigit(*lexptr) && *lexptr != '.') ||
+ (lexptr > lexptr_begin + 1 &&
+ ! index(" \t,\n", lexptr[-2]))) {
+
+ /*
+ * set node type to ILLEGAL because the action should
+ * set it to the right thing
+ */
+ yylval.nodetypeval = Node_illegal;
+ return c;
+ }
+ /* FALL through into number code */
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ case '.':
+ /* It's a number */
+ if (c == '-')
+ namelen = 1;
+ else
+ namelen = 0;
+ for (; (c = tokstart[namelen]) != '\0'; namelen++) {
+ switch (c) {
+ case '.':
+ if (seen_point)
+ goto got_number;
+ ++seen_point;
+ break;
+ case 'e':
+ case 'E':
+ if (seen_e)
+ goto got_number;
+ ++seen_e;
+ if (tokstart[namelen + 1] == '-' || tokstart[namelen + 1] == '+')
+ namelen++;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ break;
+ default:
+ goto got_number;
+ }
+ }
+
+got_number:
+ lexptr = tokstart + namelen;
+ yylval.fval = atof(tokstart);
+ ++want_concat_token;
+ return NUMBER;
+
+ case '&':
+ if (*lexptr == '&') {
+ yylval.nodetypeval = Node_and;
+ lexptr++;
+ return LEX_AND;
+ }
+ return ERROR;
+
+ case '|':
+ if (*lexptr == '|') {
+ yylval.nodetypeval = Node_or;
+ lexptr++;
+ return LEX_OR;
+ } else if (want_redirect) {
+ yylval.nodetypeval = Node_redirect_pipe;
+ return REDIRECT_OP;
+ } else {
+ yylval.nodetypeval = Node_illegal;
+ return c;
+ }
break;
- }
- }
- return i;
- }
- default:
- return c;
- }
+ }
+
+ if (c != '_' && !isalpha(c)) {
+ yyerror("Invalid char '%c' in expression\n", c);
+ return ERROR;
+ }
+
+ /* it's some type of name-type-thing. Find its length */
+ for (namelen = 0; is_identchar(tokstart[namelen]); namelen++)
+ /* null */ ;
+ emalloc(tokkey, char *, namelen+1, "yylex");
+ strncpy (tokkey, tokstart, namelen);
+ tokkey[namelen] = '\0';
+
+ /* See if it is a special token. */
+ low = 0;
+ high = (sizeof (tokentab) / sizeof (tokentab[0])) - 1;
+ while (low <= high) {
+ int i, c;
+
+ mid = (low + high) / 2;
+
+ compare:
+ c = *tokstart - tokentab[mid].operator[0];
+ i = c ? c : strcmp (tokkey, tokentab[mid].operator);
+
+ if (i < 0) { /* token < mid */
+ high = mid - 1;
+ } else if (i > 0) { /* token > mid */
+ low = mid + 1;
+ } else {
+ lexptr = tokstart + namelen;
+ if (tokentab[mid].class == LEX_BUILTIN)
+ yylval.ptrval = tokentab[mid].ptr;
+ else
+ yylval.nodetypeval = tokentab[mid].value;
+ if (tokentab[mid].class == LEX_PRINT)
+ want_redirect++;
+ return tokentab[mid].class;
+ }
+ }
+
+ /* It's a name. See how long it is. */
+ yylval.sval = tokkey;
+ lexptr = tokstart + namelen;
+ ++want_concat_token;
+ return NAME;
+}
+
+#ifndef DEFPATH
+#define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk"
+#endif
+
+FILE *
+pathopen (file)
+char *file;
+{
+ static char defpath[] = DEFPATH;
+ static char *savepath;
+ static int first = 1;
+ extern char *getenv ();
+ char *awkpath, *cp;
+ char trypath[BUFSIZ];
+ FILE *fp;
+ extern int debugging;
+
+ if (strict)
+ return (fopen (file, "r"));
+
+ if (first) {
+ first = 0;
+ if ((awkpath = getenv ("AWKPATH")) == NULL || ! *awkpath)
+ awkpath = defpath;
+ savepath = awkpath; /* savepath used for restarting */
+ } else
+ awkpath = savepath;
+
+ if (index (file, '/') != NULL) /* some kind of path name, no search */
+ return (fopen (file, "r"));
+
+ do {
+ for (cp = trypath; *awkpath && *awkpath != ':'; )
+ *cp++ = *awkpath++;
+ *cp++ = '/';
+ *cp = '\0'; /* clear left over junk */
+ strcat (cp, file);
+ if ((fp = fopen (trypath, "r")) != NULL)
+ return (fp);
+
+ /* no luck, keep going */
+ awkpath++; /* skip colon */
+ } while (*awkpath);
+ return (NULL);
}