1 files changed, 1222 insertions, 718 deletions
diff --git a/awk.y b/awk.y
index 180505f6..fb0f0d80 100644
--- a/awk.y
+++ b/awk.y
@@ -1,9 +1,124 @@
-
 /*
  * gawk -- GNU version of awk
  * Copyright (C) 1986 Free Software Foundation
  *   Written by Paul Rubin, August 1986
  *
+ * $Log:	awk.y,v $
+ * Revision 1.24  88/12/15  12:52:58  david
+ * changes from Jay to get rid of some reduce/reduce conflicts - some remain
+ * 
+ * Revision 1.23  88/12/07  19:59:25  david
+ * changes for incorporating source filename in error messages
+ * 
+ * Revision 1.22  88/11/23  21:37:24  david
+ * Arnold: refinements of AWKPATH code
+ * 
+ * Revision 1.21  88/11/22  13:46:45  david
+ * Arnold: changes for case-insensitive matching
+ * 
+ * Revision 1.20  88/11/15  10:13:37  david
+ * Arnold: allow multiple -f options and search in directories for awk libraries,
+ * directories specified by AWKPATH env. variable; cleanupo of comments and
+ * #includes
+ * 
+ * Revision 1.19  88/11/14  21:51:30  david
+ * Arnold: added error message for BEGIN or END without any action at all;
+ * unlink temporary source file right after creation so it goes away on bomb
+ * 
+ * Revision 1.18  88/10/19  22:00:56  david
+ * generalize (and correct) what pattern can be in pattern {action}; this
+ * introduces quite a few new conflicts that should be checked thoroughly
+ * at some point, but they don't seem to do any harm at first glance
+ * replace malloc with emalloc
+ * 
+ * Revision 1.17  88/10/17  19:52:01  david
+ * Arnold: cleanup, purge FAST
+ * 
+ * Revision 1.16  88/10/13  22:02:16  david
+ * cleanup of yyerror and other error messages
+ * 
+ * Revision 1.15  88/10/06  23:24:57  david
+ * accept     var space ++var
+ * accept underscore as first character of a variable name
+ * 
+ * Revision 1.14  88/06/13  18:01:46  david
+ * delete \a (change from Arnold)
+ * 
+ * Revision 1.13  88/06/08  00:29:42  david
+ * better attempt at keeping track of line numbers
+ * change grammar to properly handle newlines after && or ||
+ * 
+ * Revision 1.12  88/06/07  23:39:02  david
+ * little delint
+ * 
+ * Revision 1.11  88/06/05  22:17:40  david
+ * make_name() becomes make_param() (again!)
+ * func_level goes away, param_counter makes entrance
+ * 
+ * Revision 1.10  88/05/30  09:49:02  david
+ * obstack_free was being called at end of function definition, freeing
+ * memory that might be part of global variables referenced only inside
+ * functions; commented out for now, will have to selectively free later.
+ * cleanup: regexp now returns a NODE *
+ * 
+ * Revision 1.9  88/05/27  11:04:53  david
+ * added print[f] '(' ... ')'     (optional parentheses)
+ * for some reason want_redirect wasn't getting set for PRINT, so I set it in 
+ * yylex()
+ * 
+ * Revision 1.8  88/05/26  22:52:14  david
+ * fixed cmd | getline
+ * added compound patterns (they got lost somewhere along the line)
+ * fixed error message in yylex()
+ * added null statement 
+ * 
+ * Revision 1.7  88/05/13  22:05:29  david
+ * moved BEGIN and END block merging here
+ * BEGIN, END and function defs. are no longer incorporated into main parse tree
+ * fixed    command | getline
+ * fixed function install and definition
+ * 
+ * Revision 1.6  88/05/09  17:47:50  david
+ * Arnold's coded binary search
+ * 
+ * Revision 1.5  88/05/04  12:31:13  david
+ * be a bit more careful about types
+ * make_for_loop() now returns a NODE *
+ * keyword search now uses bsearch() -- need a public domain version of this
+ * added back stuff in yylex() that got lost somewhere along the line
+ * malloc() tokens in yylex() since they were previously just pointers into
+ *  current line that got overwritten by the next fgets() -- these need to get
+ *  freed at some point
+ * fixed backslash line continuation interaction with CONCAT
+ * 
+ * Revision 1.4  88/04/14  17:03:51  david
+ * reinstalled a fix to do with line continuation
+ * 
+ * Revision 1.3  88/04/14  14:41:01  david
+ * Arnold's changes to yylex to read program from a file
+ * 
+ * Revision 1.5  88/03/18  21:00:07  david
+ * Baseline -- hoefully all the functionality of the new awk added.
+ * Just debugging and tuning to do.
+ * 
+ * Revision 1.4  87/11/19  14:37:20  david
+ * added a bunch of ew builtin functions
+ * added new rules for getline to provide new functionality
+ * minor cleanup of redirection handling
+ * generalized make_param into make_name
+ * 
+ * Revision 1.3  87/11/09  21:22:33  david
+ * added macinery for user-defined functions (including return)
+ * added delete, do-while and system
+ * reformatted and revised grammer to improve error-handling
+ * changes to yyerror to give improved error messages
+ * 
+ * Revision 1.2  87/10/29  21:33:28  david
+ * added test for membership in an array, as in:  if ("yes" in answers) ...
+ * 
+ * Revision 1.1  87/10/27  15:23:21  david
+ * Initial revision
+ * 
  */
 
 /*
@@ -26,191 +141,291 @@ anyone else from sharing it farther.  Help stamp out software hoarding!
 
 %{
 #define YYDEBUG 12
+#define YYIMPROVE
 
-#include <stdio.h>
 #include "awk.h"
 
-  static int yylex ();
-
-
-  /*
-   * The following variable is used for a very sickening thing.
-   * The awk language uses white space as the string concatenation
-   * operator, but having a white space token that would have to appear
-   * everywhere in all the grammar rules would be unbearable.
-   * It turns out we can return CONCAT_OP exactly when there really
-   * is one, just from knowing what kinds of other tokens it can appear
-   * between (namely, constants, variables, or close parentheses).
-   * This is because concatenation has the lowest priority of all
-   * operators.  want_concat_token is used to remember that something
-   * that could be the left side of a concat has just been returned.
-   *
-   * If anyone knows a cleaner way to do this (don't look at the Un*x
-   * code to find one, though), please suggest it.
-   */
-  static int want_concat_token;
-
-  /* Two more horrible kludges.  The same comment applies to these two too */
-  static int want_regexp;	/* lexical scanning kludge */
-  static int want_redirect;	/* similarly */
-  int lineno = 1;	/* JF for error msgs */
+static int yylex ();
+
+/*
+ * The following variable is used for a very sickening thing.
+ * The awk language uses white space as the string concatenation
+ * operator, but having a white space token that would have to appear
+ * everywhere in all the grammar rules would be unbearable.
+ * It turns out we can return CONCAT_OP exactly when there really
+ * is one, just from knowing what kinds of other tokens it can appear
+ * between (namely, constants, variables, or close parentheses).
+ * This is because concatenation has the lowest priority of all
+ * operators.  want_concat_token is used to remember that something
+ * that could be the left side of a concat has just been returned.
+ *
+ * If anyone knows a cleaner way to do this (don't look at the Un*x
+ * code to find one, though), please suggest it.
+ */
+static int want_concat_token;
+
+/* Two more horrible kludges.  The same comment applies to these two too */
+static int want_regexp;		/* lexical scanning kludge */
+static int want_redirect;	/* similarly */
+int lineno = 1;			/* for error msgs */
 
 /* During parsing of a gawk program, the pointer to the next character
    is in this variable.  */
-  char *lexptr;		/* JF moved it up here */
-  char *lexptr_begin;	/* JF for error msgs */
+char *lexptr;		/* moved it up here */
+char *lexptr_begin;	/* for error msgs */
+char *func_def;
+extern int errcount;
+extern NODE *begin_block;
+extern NODE *end_block;
+extern struct re_pattern_buffer *mk_re_parse();
+extern int param_counter;
+struct re_pattern_buffer *rp;
 %}
 
 %union {
-  long lval;
-  AWKNUM fval;
-  NODE *nodeval;
-  NODETYPE nodetypeval;
-  char *sval;
-  NODE *(*ptrval)();
+	long lval;
+	AWKNUM fval;
+	NODE *nodeval;
+	NODETYPE nodetypeval;
+	char *sval;
+	NODE *(*ptrval)();
 }
 
-%type <nodeval> exp start program rule pattern conditional
-%type <nodeval>	action variable redirection expression_list
-%type <nodeval>	statements statement if_statement
-%type <nodeval> opt_exp v_exp
-%type <nodetypeval> whitespace
+%type <nodeval> function_prologue function_body
+%type <nodeval> exp sub_exp start program rule pattern expression_list
+%type <nodeval>	action variable redirection param_list opt_expression_list
+%type <nodeval>	statements statement if_statement opt_param_list 
+%type <nodeval> opt_exp opt_variable regexp
+%type <nodetypeval> whitespace r_paren
 
 %token <sval> NAME REGEXP YSTRING
 %token <lval> ERROR INCDEC
 %token <fval> NUMBER
 %token <nodetypeval> ASSIGNOP RELOP MATCHOP NEWLINE REDIRECT_OP CONCAT_OP
-%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE
-%token <nodetypeval> LEX_WHILE LEX_FOR LEX_BREAK LEX_CONTINUE
-%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT
-%token  LEX_IN
+%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
+%token <nodetypeval> LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
+%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
+%token <nodetypeval> LEX_GETLINE LEX_SUB LEX_MATCH
+%token <nodetypeval> LEX_IN
 %token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
 %token <ptrval> LEX_BUILTIN
 
 /* these are just yylval numbers */
-/* %token <lval> CHAR JF this isn't used anymore */
 
 /* Lowest to highest */
+%right ASSIGNOP
+%right '?' ':'
 %left LEX_OR
 %left LEX_AND
-%right ASSIGNOP
+%left LEX_IN
+%nonassoc MATCHOP
+%nonassoc RELOP
+%nonassoc REDIRECT_OP
 %left CONCAT_OP
 %left '+' '-'
 %left '*' '/' '%'
 %right UNARY
-%nonassoc MATCHOP RELOP
+%right '^'
+%left INCREMENT DECREMENT
+%left '$'
 
 %%
 
-start   :  optional_newlines program
+start
+	: opt_newlines program
 		{ expression_value = $2; }
 	;
 
-
-program	: rule
-		{ $$ = node ($1, Node_rule_list,(NODE *) NULL); }
+program
+	: rule
+		{ 
+			if ($1 != NULL)
+				$$ = node ($1, Node_rule_list,(NODE *) NULL);
+			else
+				$$ = NULL;
+			yyerrok;
+		}
 	| program rule
 		/* cons the rule onto the tail of list */
-		{ $$ = append_right ($1, node($2, Node_rule_list,(NODE *) NULL)); }
+		{
+			if ($2 == NULL)
+				$$ = $1;
+			else if ($1 == NULL)
+				$$ = node($2, Node_rule_list,(NODE *) NULL);
+			else
+				$$ = append_right ($1,
+				   node($2, Node_rule_list,(NODE *) NULL));
+			yyerrok;
+		}
+	| error	{ $$ = NULL; }
+	| program error
 	;
 
-rule	: pattern action NEWLINE optional_newlines
-		{ $$ = node ($1, Node_rule_node, $2); }
+rule
+	: LEX_BEGIN action
+	  {
+		if (begin_block)
+			append_right (begin_block, node(
+			    node((NODE *)NULL, Node_rule_node, $2),
+			    Node_rule_list, (NODE *)NULL) );
+		else
+			begin_block = node(node((NODE *)NULL,Node_rule_node,$2),
+			    Node_rule_list, (NODE *)NULL);
+		$$ = NULL;
+		yyerrok;
+	  }
+	| LEX_END action
+	  {
+		if (end_block)
+			append_right (end_block, node(
+			    node((NODE *)NULL, Node_rule_node, $2),
+			    Node_rule_list, (NODE *)NULL));
+		else
+			end_block = node(node((NODE *)NULL, Node_rule_node, $2),
+			    Node_rule_list, (NODE *)NULL);
+		$$ = NULL;
+		yyerrok;
+	  }
+	| LEX_BEGIN statement_term
+	  {
+		msg ("error near line %d: BEGIN blocks must have an action part", lineno);
+		errcount++;
+		yyerrok;
+	  }
+	| LEX_END statement_term
+	  {
+		msg ("error near line %d: END blocks must have an action part", lineno);
+		errcount++;
+		yyerrok;
+	  }
+	| pattern action
+		{ $$ = node ($1, Node_rule_node, $2); yyerrok; }
+	| pattern statement_term
+		{ if($1) $$ = node ($1, Node_rule_node, (NODE *)NULL); yyerrok; }
+	| function_prologue function_body
+		{
+			func_install($1, $2);
+			$$ = NULL;
+			yyerrok;
+		}
+	;
+		
+function_prologue
+	: LEX_FUNCTION 
+		{
+			param_counter = 0;
+		}
+	  NAME whitespace '(' opt_param_list r_paren whitespace
+		{
+			$$ = append_right(make_param($3), $6);
+		}
 	;
 
+function_body
+	: l_brace statements r_brace statement_term
+		{ $$ = $2; }
+	;
 
-pattern	: /* empty */
+pattern
+	: /* empty */
 		{ $$ = NULL; }
-	| conditional
+	| sub_exp
 		{ $$ = $1; }
-	| conditional ',' conditional
-		{ $$ = mkrangenode ( node($1, Node_cond_pair, $3) ); } /*jfw*/
-	;
-
-
-conditional :
-	  LEX_BEGIN
-		{ $$ = node ((NODE *)NULL, Node_K_BEGIN,(NODE *) NULL); }
-	| LEX_END
-		{ $$ = node ((NODE *)NULL, Node_K_END,(NODE *) NULL); }
-	| '!' conditional %prec UNARY
-		{ $$ = node ($2, Node_not,(NODE *) NULL); }
-	| conditional LEX_AND conditional
+	| regexp
+		{ 
+		  $$ = node(
+		       node(make_number((AWKNUM)0),Node_field_spec,(NODE*)NULL),
+		       Node_match, $1);
+		}
+	| pattern LEX_AND pattern
 		{ $$ = node ($1, Node_and, $3); }
-	| conditional LEX_OR conditional
+	| pattern LEX_OR pattern
 		{ $$ = node ($1, Node_or, $3); }
-	| '(' conditional ')'
-		{
-		  $$ = $2;
-		  want_concat_token = 0;
-		}
+	| '!' pattern %prec UNARY
+		{ $$ = node ($2, Node_not,(NODE *) NULL); }
+	| '(' pattern r_paren
+		{ $$ = $2; }
+	| pattern ',' pattern
+		{ $$ = mkrangenode ( node($1, Node_cond_pair, $3) ); }
+	;
 
-	/* In these rules, want_regexp tells yylex that the next thing
+regexp
+	/* In this rule, want_regexp tells yylex that the next thing
 		is a regexp so it should read up to the closing slash. */
-
-	| '/'
+	: '/'
 		{ ++want_regexp; }
 	   REGEXP '/'
 		{ want_regexp = 0;
-		  $$ = node (node (make_number ((AWKNUM)0), Node_field_spec, (NODE *)NULL),
-			     Node_match, (NODE *)make_regexp ($3));
+		  rp = mk_re_parse($3);
+		  $$ = node((NODE *)NULL, Node_regex, (NODE *)rp);
 		}
-	| exp MATCHOP '/'
-		 { ++want_regexp; }
-	   REGEXP '/'
-		 { want_regexp = 0;
-		   $$ = node ($1, $2, (NODE *)make_regexp($5));
-		 }
-	| exp RELOP exp
-		{ $$ = node ($1, $2, $3); }
-	| exp	/* JF */
-		{ $$ = $1; }
 	;
 
-
-action	: /* empty */
-		{ $$ = NULL; }
-	|	'{' whitespace statements '}'
-		{ $$ = $3; }
+action
+	: l_brace r_brace 
+		{
+			/* empty actions are different from missing actions */
+			$$ = node ((NODE *) NULL, Node_illegal, (NODE *) NULL);
+		}
+	| l_brace statements r_brace
+		{ $$ = $2 ; }
 	;
 
-
-statements :			/* EMPTY */
-		{ $$ = NULL; }
-	| statement
+statements
+	: statement
 		{ $$ = node ($1, Node_statement_list, (NODE *)NULL); }
 	| statements statement
-		{ $$ = append_right($1, node( $2, Node_statement_list, (NODE *)NULL)); }
+		{
+	    		$$ = append_right($1,
+				node( $2, Node_statement_list, (NODE *)NULL));
+	    		yyerrok;
+		}
+	| error
+		{ $$ = NULL; }
+	| statements error
 	;
 
-statement_term :
-	NEWLINE optional_newlines
-		{ $<nodetypeval>$ = Node_illegal; }
-	| ';' optional_newlines
-		{ $<nodetypeval>$ = Node_illegal; }
+statement_term
+	: NEWLINE opt_newlines
+		{ $<nodetypeval>$ = Node_illegal; want_redirect = 0; }
+	| semi_colon opt_newlines
+		{ $<nodetypeval>$ = Node_illegal; want_redirect = 0; }
 	;
 
-whitespace :
-		/* blank */
-		{ $$ = Node_illegal; }
-	|  CONCAT_OP
+whitespace
+	: /* blank */
+		{ $<nodetypeval>$ = Node_illegal; }
+	| CONCAT_OP
+		{ $<nodetypeval>$ = Node_illegal; }
 	| NEWLINE
+		{ $<nodetypeval>$ = Node_illegal; }
 	| whitespace CONCAT_OP
+		{ $<nodetypeval>$ = Node_illegal; }
 	| whitespace NEWLINE
+		{ $<nodetypeval>$ = Node_illegal; }
 	;
-statement :
-	'{' whitespace statements '}' whitespace
-		{ $$ = $3; }
+	
+statement
+	: semi_colon opt_newlines
+		{ $$ = NULL; }
+	| l_brace statements r_brace whitespace
+		{ $$ = $2; }
 	| if_statement
 		{ $$ = $1; }
-	| LEX_WHILE '(' conditional ')' whitespace statement
+	| LEX_WHILE '(' exp r_paren whitespace statement
 		{ $$ = node ($3, Node_K_while, $6); }
-	| LEX_FOR '(' opt_exp ';' conditional ';' opt_exp ')' whitespace statement
+	| LEX_DO whitespace statement LEX_WHILE '(' exp r_paren whitespace
+		{ $$ = node ($6, Node_K_do, $3); }
+	| LEX_FOR '(' opt_exp semi_colon exp semi_colon opt_exp r_paren whitespace statement
 		{ $$ = node ($10, Node_K_for, (NODE *)make_for_loop ($3, $5, $7)); }
-	| LEX_FOR '(' opt_exp ';' ';' opt_exp ')' whitespace statement
+	| LEX_FOR '(' opt_exp semi_colon semi_colon opt_exp r_paren whitespace statement
 		{ $$ = node ($9, Node_K_for, (NODE *)make_for_loop ($3, (NODE *)NULL, $6)); }
-	| LEX_FOR '(' NAME CONCAT_OP LEX_IN NAME ')' whitespace statement
-		{ $$ = node ($9, Node_K_arrayfor, (NODE *)make_for_loop(variable($3), (NODE *)NULL, variable($6))); }
+	| LEX_FOR '(' NAME CONCAT_OP LEX_IN NAME r_paren whitespace statement
+		{
+			$$ = node ($9, Node_K_arrayfor,
+				make_for_loop(variable($3),
+					(NODE *)NULL, variable($6)));
+		}
 	| LEX_BREAK statement_term
 	   /* for break, maybe we'll have to remember where to break to */
 		{ $$ = node ((NODE *)NULL, Node_K_break, (NODE *)NULL); }
@@ -219,680 +434,969 @@ statement :
 		{ $$ = node ((NODE *)NULL, Node_K_continue, (NODE *)NULL); }
 	| LEX_PRINT
 		{ ++want_redirect; }
-	    expression_list redirection statement_term
-		{
-		  want_redirect = 0;
-		  /* $4->lnode = NULL; */
-		  $$ = node ($3, Node_K_print, $4);
-		}
+	  opt_expression_list redirection statement_term
+		{ $$ = node ($3, Node_K_print, $4); }
+	| LEX_PRINT '(' opt_expression_list r_paren 
+		{ ++want_redirect; want_concat_token = 0; }
+	  redirection statement_term
+		{ $$ = node ($3, Node_K_print, $6); }
 	| LEX_PRINTF
 		{ ++want_redirect; }
-	    expression_list redirection statement_term
-		{
-		  want_redirect = 0;
-		  /* $4->lnode = NULL; */
-		  $$ = node ($3, Node_K_printf, $4);
-		}
-	| LEX_PRINTF '(' expression_list ')'
-		{ ++want_redirect;
-		  want_concat_token = 0; }
-	    redirection statement_term
-		{
-		  want_redirect = 0;
-		  $$ = node ($3, Node_K_printf, $6);
-		}
+	  opt_expression_list redirection statement_term
+		{ $$ = node ($3, Node_K_printf, $4); }
+	| LEX_PRINTF '(' opt_expression_list r_paren
+		{ ++want_redirect; want_concat_token = 0; }
+	  redirection statement_term
+		{ $$ = node ($3, Node_K_printf, $6); }
 	| LEX_NEXT statement_term
 		{ $$ = node ((NODE *)NULL, Node_K_next, (NODE *)NULL); }
-	| LEX_EXIT statement_term
-		{ $$ = node ((NODE *)NULL, Node_K_exit, (NODE *)NULL); }
-	| LEX_EXIT '(' exp ')' statement_term
-		{ $$ = node ($3, Node_K_exit, (NODE *)NULL); }
+	| LEX_EXIT opt_exp statement_term
+		{ $$ = node ($2, Node_K_exit, (NODE *)NULL); }
+	| LEX_RETURN opt_exp statement_term
+		{ $$ = node ($2, Node_K_return, (NODE *)NULL); }
+	| LEX_DELETE NAME '[' expression_list ']' statement_term
+		{ $$ = node (variable($2), Node_K_delete, $4); }
 	| exp statement_term
 		{ $$ = $1; }
 	;
 
-
-if_statement:
-	LEX_IF '(' conditional ')' whitespace statement
+if_statement
+	: LEX_IF '(' exp r_paren whitespace statement
 		{ $$ = node ($3, Node_K_if,
 				node ($6, Node_if_branches, (NODE *)NULL)); }
-	| LEX_IF '(' conditional ')' whitespace statement
+	| LEX_IF '(' exp r_paren whitespace statement
 	     LEX_ELSE whitespace statement
 		{ $$ = node ($3, Node_K_if,
 				node ($6, Node_if_branches, $9)); }
 	;
 
-optional_newlines :
-	  /* empty */
-	| optional_newlines NEWLINE
+opt_newlines
+	: /* empty */
+	| opt_newlines NEWLINE
 		{ $<nodetypeval>$ = Node_illegal; }
 	;
 
-redirection :
-	  /* empty */
-		{ $$ = NULL; /* node (NULL, Node_redirect_nil, NULL); */ }
-	/* | REDIRECT_OP NAME
-		{ $$ = node ($2, $1, NULL); } */
-	| REDIRECT_OP exp
-		{ $$ = node ($2, $1, (NODE *)NULL); }
+redirection
+	: /* empty */
+		{ want_redirect = 0; $$ = NULL; }
+	| REDIRECT_OP 
+		{ want_redirect = 0; }
+	    exp
+		{ $$ = node ($3, $1, (NODE *)NULL); }
+	;
+
+opt_param_list
+	: /* empty */
+		{ $$ = NULL; }
+	| param_list
+		/* $$ = $1 */
 	;
 
+param_list
+	: NAME
+		{
+			$$ = make_param($1);
+		}
+	| param_list ',' NAME
+		{
+			$$ = append_right($1, make_param($3));
+			yyerrok;
+		}
+	| error
+		{ $$ = NULL; }
+	| param_list error
+	| param_list ',' error
+	;
 
 /* optional expression, as in for loop */
-opt_exp :
+opt_exp
+	: /* empty */
 		{ $$ = NULL; /* node(NULL, Node_builtin, NULL); */ }
 	| exp
-		{ $$ = $1; }
 	;
 
-expression_list :
-	  /* empty */
+opt_expression_list
+	: /* empty */
 		{ $$ = NULL; }
-	| exp
+	| expression_list
+		{ $$ = $1; }
+	;
+
+expression_list
+	: exp
 		{ $$ = node ($1, Node_expression_list, (NODE *)NULL); }
 	| expression_list ',' exp
-		{ $$ = append_right($1, node( $3, Node_expression_list, (NODE *)NULL)); }
+		{
+			$$ = append_right($1,
+				node( $3, Node_expression_list, (NODE *)NULL));
+			yyerrok;
+		}
+	| error
+		{ $$ = NULL; }
+	| expression_list error
+	| expression_list error exp
+	| expression_list ',' error
 	;
 
-
 /* Expressions, not including the comma operator.  */
-exp	:	LEX_BUILTIN '(' expression_list ')'
-			{ $$ = snode ($3, Node_builtin, $1); }
-	|	LEX_BUILTIN
-			{ $$ = snode ((NODE *)NULL, Node_builtin, $1); }
-	|	'(' exp ')'
-			{ $$ = $2; }
-	|	'-' exp    %prec UNARY
-			{ $$ = node ($2, Node_unary_minus, (NODE *)NULL); }
-	|	INCREMENT variable %prec UNARY
-			{ $$ = node ($2, Node_preincrement, (NODE *)NULL); }
-	|	DECREMENT variable %prec UNARY
-			{ $$ = node ($2, Node_predecrement, (NODE *)NULL); }
-	|	variable INCREMENT  %prec UNARY
-			{ $$ = node ($1, Node_postincrement, (NODE *)NULL); }
-	|	variable DECREMENT  %prec UNARY
-			{ $$ = node ($1, Node_postdecrement, (NODE *)NULL); }
-	|	variable
-			{ $$ = $1; }	/* JF was variable($1) */
-	|	NUMBER
-			{ $$ = make_number ($1); }
-	|	YSTRING
-			{ $$ = make_string ($1, -1); }
+exp	: sub_exp
+	| exp LEX_AND whitespace exp
+		{ $$ = node ($1, Node_and, $4); }
+	| exp LEX_OR whitespace exp
+		{ $$ = node ($1, Node_or, $4); }
+	| '!' exp %prec UNARY
+		{ $$ = node ($2, Node_not,(NODE *) NULL); }
+	| '(' exp r_paren
+		{ $$ = $2; }
+	;
+
+sub_exp	: LEX_BUILTIN '(' opt_expression_list r_paren
+		{ $$ = snode ($3, Node_builtin, $1); }
+	| LEX_BUILTIN
+		{ $$ = snode ((NODE *)NULL, Node_builtin, $1); }
+	| exp MATCHOP regexp
+		 { $$ = node ($1, $2, $3); }
+	| exp MATCHOP exp
+		 { $$ = node ($1, $2, $3); }
+	| exp CONCAT_OP LEX_IN NAME
+		{ $$ = node (variable($4), Node_in_array, $1); }
+	| '(' expression_list r_paren CONCAT_OP LEX_IN NAME
+		{ $$ = node (variable($6), Node_in_array, $2); }
+	| LEX_SUB '(' regexp ',' expression_list r_paren 
+		{ $$ = node($5, $1, $3); }
+	| LEX_SUB '(' exp ',' expression_list r_paren 
+		{ $$ = node($5, $1, $3); }
+	| LEX_MATCH '(' exp ',' regexp r_paren
+		{ $$ = node($3, $1, $5); }
+	| LEX_MATCH '(' exp ',' exp r_paren
+		{ $$ = node($3, $1, $5); }
+	| LEX_GETLINE
+		{++want_redirect; }
+	    opt_variable redirection
+		{
+		  $$ = node ($3, Node_K_getline, $4);
+		}
+	| exp '|' LEX_GETLINE opt_variable
+		{
+		  $$ = node ($4, Node_K_getline,
+			 node ($1, Node_redirect_pipein, (NODE *)NULL));
+		}
+	| exp RELOP exp
+		{ $$ = node ($1, $2, $3); }
+	| exp '?' exp ':' exp
+		{ $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5)); }
+	| NAME '(' opt_expression_list r_paren
+		{
+			$$ = node ($3, Node_func_call, make_string($1, strlen($1)));
+		}
+	| '-' exp    %prec UNARY
+		{ $$ = node ($2, Node_unary_minus, (NODE *)NULL); }
+	| '+' exp    %prec UNARY
+		{ $$ = $2; }
+	| INCREMENT variable
+		{ $$ = node ($2, Node_preincrement, (NODE *)NULL); }
+	| DECREMENT variable
+		{ $$ = node ($2, Node_predecrement, (NODE *)NULL); }
+	| variable INCREMENT
+		{ $$ = node ($1, Node_postincrement, (NODE *)NULL); }
+	| variable DECREMENT
+		{ $$ = node ($1, Node_postdecrement, (NODE *)NULL); }
+	| variable
+		{ $$ = $1; }
+	| NUMBER
+		{ $$ = make_number ($1); }
+	| YSTRING
+		{ $$ = make_string ($1, -1); }
 
 /* Binary operators in order of decreasing precedence.  */
-	|	exp '*' exp
-			{ $$ = node ($1, Node_times, $3); }
-	|	exp '/' exp
-			{ $$ = node ($1, Node_quotient, $3); }
-	|	exp '%' exp
-			{ $$ = node ($1, Node_mod, $3); }
-	|	exp '+' exp
-			{ $$ = node ($1, Node_plus, $3); }
-	|	exp '-' exp
-			{ $$ = node ($1, Node_minus, $3); }
+	| exp '^' exp
+		{ $$ = node ($1, Node_exp, $3); }
+	| exp '*' exp
+		{ $$ = node ($1, Node_times, $3); }
+	| exp '/' exp
+		{ $$ = node ($1, Node_quotient, $3); }
+	| exp '%' exp
+		{ $$ = node ($1, Node_mod, $3); }
+	| exp '+' exp
+		{ $$ = node ($1, Node_plus, $3); }
+	| exp '-' exp
+		{ $$ = node ($1, Node_minus, $3); }
 		/* Empty operator.  See yylex for disgusting details. */
-	|	exp CONCAT_OP exp
-			{ $$ = node ($1, Node_concat, $3); }
-	|	variable ASSIGNOP exp
-			{ $$ = node ($1, $2, $3); }
+	| exp CONCAT_OP exp
+		{ $$ = node ($1, Node_concat, $3); }
+	| variable ASSIGNOP exp
+		{ $$ = node ($1, $2, $3); }
 	;
 
-v_exp	:	LEX_BUILTIN '(' expression_list ')'
-			{ $$ = snode ($3, Node_builtin, $1); }
-	|	LEX_BUILTIN
-			{ $$ = snode ((NODE *)NULL, Node_builtin, $1); }
-	|	'(' exp ')'
-			{ $$ = $2; }
-	|	'-' exp    %prec UNARY
-			{ $$ = node ($2, Node_unary_minus, (NODE *)NULL); }
-	|	INCREMENT variable %prec UNARY
-			{ $$ = node ($2, Node_preincrement, (NODE *)NULL); }
-	|	DECREMENT variable %prec UNARY
-			{ $$ = node ($2, Node_predecrement, (NODE *)NULL); }
-	|	variable INCREMENT  %prec UNARY
-			{ $$ = node ($1, Node_postincrement, (NODE *)NULL); }
-	|	variable DECREMENT  %prec UNARY
-			{ $$ = node ($1, Node_postdecrement, (NODE *)NULL); }
-	|	variable
-			{ $$ = $1; }	/* JF was variable($1) */
-	|	NUMBER
-			{ $$ = make_number ($1); }
-	|	YSTRING
-			{ $$ = make_string ($1, -1); }
+opt_variable
+	: /* empty */
+		{ $$ = NULL; }
+	| variable
+	;
 
-/* Binary operators in order of decreasing precedence.  */
-	|	v_exp '*' exp
-			{ $$ = node ($1, Node_times, $3); }
-	|	v_exp '/' exp
-			{ $$ = node ($1, Node_quotient, $3); }
-	|	v_exp '%' exp
-			{ $$ = node ($1, Node_mod, $3); }
-	|	v_exp '+' exp
-			{ $$ = node ($1, Node_plus, $3); }
-	|	v_exp '-' exp
-			{ $$ = node ($1, Node_minus, $3); }
-		/* Empty operator.  See yylex for disgusting details. */
-	|	v_exp CONCAT_OP exp
-			{ $$ = node ($1, Node_concat, $3); }
+variable
+	: NAME
+		{ $$ = variable ($1); }
+	| NAME '[' expression_list ']'
+		{ $$ = node (variable($1), Node_subscript, $3); }
+	| '$' exp
+		{ $$ = node ($2, Node_field_spec, (NODE *)NULL); }
+	;
+
+l_brace
+	: '{' whitespace
 	;
 
-variable :
-	 	NAME
-			{ $$ = variable ($1); }
-	|	NAME '[' exp ']'
-			{ $$ = node (variable($1), Node_subscript, $3); }
-	|	'$' v_exp	  %prec UNARY
-			{ $$ = node ($2, Node_field_spec, (NODE *)NULL); }
+r_brace
+	: '}'	{ yyerrok; }
+	;
+
+r_paren
+	: ')'	{ $<nodetypeval>$ = Node_illegal; yyerrok; }
+	;
+
+semi_colon
+	: ';'	{ yyerrok; }
 	;
 
 %%
-
 
 struct token {
-  char *operator;
-  NODETYPE value;
-  int class;
-  NODE *(*ptr)();
+	char *operator;
+	NODETYPE value;
+	int class;
+	NODE *(*ptr) ();
 };
 
 #define NULL 0
 
 NODE	*do_exp(),	*do_getline(),	*do_index(),	*do_length(),
 	*do_sqrt(),	*do_log(),	*do_sprintf(),	*do_substr(),
-	*do_split(),	*do_int();
+	*do_split(),	*do_system(),	*do_int(),	*do_close(),
+	*do_atan2(),	*do_sin(),	*do_cos(),	*do_rand(),
+	*do_srand(),	*do_match();
 
-	/* Special functions for debugging */
-#ifndef FAST
-NODE	*do_prvars(),	*do_bp();
+/* Special functions for debugging */
+#ifdef DEBUG
+NODE *do_prvars(), *do_bp();
 #endif
 
 /* Tokentab is sorted ascii ascending order, so it can be binary searched. */
-/* (later.  Right now its just sort of linear search (SLOW!!) */
 
 static struct token tokentab[] = {
-  {"BEGIN",	Node_illegal,		LEX_BEGIN,	0},
-  {"END",	Node_illegal,		LEX_END,	0},
-#ifndef FAST
-  {"bp",	Node_builtin,		LEX_BUILTIN,	do_bp},
+	{ "BEGIN",	Node_illegal,		LEX_BEGIN,	0 },
+	{ "END",	Node_illegal,		LEX_END,	0 },
+	{ "atan2",	Node_builtin,		LEX_BUILTIN,	do_atan2 },
+#ifdef DEBUG
+	{ "bp",		Node_builtin,		LEX_BUILTIN,	do_bp },
 #endif
-  {"break",	Node_K_break,		LEX_BREAK,	0},
-  {"continue",	Node_K_continue,	LEX_CONTINUE,	0},
-  {"else",	Node_illegal,		LEX_ELSE,	0},
-  {"exit",	Node_K_exit,		LEX_EXIT,	0},
-  {"exp",	Node_builtin,		LEX_BUILTIN,	do_exp},
-  {"for",	Node_K_for,		LEX_FOR,	0},
-  {"getline",	Node_builtin,		LEX_BUILTIN,	do_getline},
-  {"if",	Node_K_if,		LEX_IF,		0},
-  {"in",	Node_illegal,		LEX_IN,		0},
-  {"index",	Node_builtin,		LEX_BUILTIN,	do_index},
-  {"int",	Node_builtin,		LEX_BUILTIN,	do_int},
-  {"length",	Node_builtin,		LEX_BUILTIN,	do_length},
-  {"log",	Node_builtin,		LEX_BUILTIN,	do_log},
-  {"next",	Node_K_next,		LEX_NEXT,	0},
-  {"print",	Node_K_print,		LEX_PRINT,	0},
-  {"printf",	Node_K_printf,		LEX_PRINTF,	0},
-#ifndef FAST
-  {"prvars",	Node_builtin,		LEX_BUILTIN,	do_prvars},
+	{ "break",	Node_K_break,		LEX_BREAK,	0 },
+	{ "close",	Node_builtin,		LEX_BUILTIN,	do_close },
+	{ "continue",	Node_K_continue,	LEX_CONTINUE,	0 },
+	{ "cos",	Node_builtin,		LEX_BUILTIN,	do_cos },
+	{ "delete",	Node_K_delete,		LEX_DELETE,	0 },
+	{ "do",		Node_K_do,		LEX_DO,		0 },
+	{ "else",	Node_illegal,		LEX_ELSE,	0 },
+	{ "exit",	Node_K_exit,		LEX_EXIT,	0 },
+	{ "exp",	Node_builtin,		LEX_BUILTIN,	do_exp },
+	{ "for",	Node_K_for,		LEX_FOR,	0 },
+	{ "func",	Node_K_function,	LEX_FUNCTION,	0 },
+	{ "function",	Node_K_function,	LEX_FUNCTION,	0 },
+	{ "getline",	Node_K_getline,		LEX_GETLINE,	0 },
+	{ "gsub",	Node_gsub,		LEX_SUB,	0 },
+	{ "if",		Node_K_if,		LEX_IF,		0 },
+	{ "in",		Node_illegal,		LEX_IN,		0 },
+	{ "index",	Node_builtin,		LEX_BUILTIN,	do_index },
+	{ "int",	Node_builtin,		LEX_BUILTIN,	do_int },
+	{ "length",	Node_builtin,		LEX_BUILTIN,	do_length },
+	{ "log",	Node_builtin,		LEX_BUILTIN,	do_log },
+	{ "match",	Node_K_match,		LEX_MATCH,	0 },
+	{ "next",	Node_K_next,		LEX_NEXT,	0 },
+	{ "print",	Node_K_print,		LEX_PRINT,	0 },
+	{ "printf",	Node_K_printf,		LEX_PRINTF,	0 },
+#ifdef DEBUG
+	{ "prvars",	Node_builtin,		LEX_BUILTIN,	do_prvars },
 #endif
-  {"split",	Node_builtin,		LEX_BUILTIN,	do_split},
-  {"sprintf",	Node_builtin,		LEX_BUILTIN,	do_sprintf},
-  {"sqrt",	Node_builtin,		LEX_BUILTIN,	do_sqrt},
-  {"substr",	Node_builtin,		LEX_BUILTIN,	do_substr},
-  {"while",	Node_K_while,		LEX_WHILE,	0},
-  {NULL,	Node_illegal,		ERROR,		0}
+	{ "rand",	Node_builtin,		LEX_BUILTIN,	do_rand },
+	{ "return",	Node_K_return,		LEX_RETURN,	0 },
+	{ "sin",	Node_builtin,		LEX_BUILTIN,	do_sin },
+	{ "split",	Node_builtin,		LEX_BUILTIN,	do_split },
+	{ "sprintf",	Node_builtin,		LEX_BUILTIN,	do_sprintf },
+	{ "sqrt",	Node_builtin,		LEX_BUILTIN,	do_sqrt },
+	{ "srand",	Node_builtin,		LEX_BUILTIN,	do_srand },
+	{ "sub",	Node_sub,		LEX_SUB,	0 },
+	{ "substr",	Node_builtin,		LEX_BUILTIN,	do_substr },
+	{ "system",	Node_builtin,		LEX_BUILTIN,	do_system },
+	{ "while",	Node_K_while,		LEX_WHILE,	0 },
 };
 
-/* Read one token, getting characters through lexptr.  */
+/* VARARGS0 */
+yyerror(va_alist)
+va_dcl
+{
+	va_list args;
+	char *mesg;
+	char *a1;
+	register char *ptr, *beg;
+	static int list = 0;
+	char *scan;
+
+	errcount++;
+	va_start(args);
+	mesg = va_arg(args, char *);
+	if (mesg || !list) {
+		/* Find the current line in the input file */
+		if (!lexptr) {
+			beg = "(END OF FILE)";
+			ptr = beg + 13;
+		} else {
+			if (*lexptr == '\n' && lexptr != lexptr_begin)
+				--lexptr;
+			for (beg = lexptr; beg != lexptr_begin && *beg != '\n'; --beg)
+				;
+			/* NL isn't guaranteed */
+			for (ptr = lexptr; *ptr && *ptr != '\n'; ptr++)
+				;
+			if (beg != lexptr_begin)
+				beg++;
+		}
+		msg("syntax error near line %d:\n%.*s", lineno, ptr - beg, beg);
+		scan = beg;
+		while (scan <= lexptr)
+			if (*scan++ == '\t')
+				putc('\t', stderr);
+			else
+				putc(' ', stderr);
+		putc('^', stderr);
+		putc(' ', stderr);
+		if (mesg) {
+			vfprintf(stderr, mesg, args);
+			va_end(args);
+		        putc('\n', stderr);
+			exit(1);
+		} else {
+			a1 = va_arg(args, char *);
+			if (a1) {
+				fputs("expecting: ", stderr);
+				fputs(a1, stderr);
+				list = 1;
+				va_end(args);
+				return;
+			}
+		}
+		va_end(args);
+		return;
+	}
+	a1 = va_arg(args, char *);
+	if (a1) {
+		fputs(" or ", stderr);
+		fputs(a1, stderr);
+		va_end(args);
+		putc('\n', stderr);
+		return;
+	}
+	putc('\n', stderr);
+	list = 0;
+	va_end(args);
+}
+
+/*
+ * Parse a C escape sequence.  STRING_PTR points to a variable containing a
+ * pointer to the string to parse.  That pointer is updated past the
+ * characters we use.  The value of the escape sequence is returned. 
+ *
+ * A negative value means the sequence \ newline was seen, which is supposed to
+ * be equivalent to nothing at all. 
+ *
+ * If \ is followed by a null character, we return a negative value and leave
+ * the string pointer pointing at the null character. 
+ *
+ * If \ is followed by 000, we return 0 and leave the string pointer after the
+ * zeros.  A value of 0 does not mean end of string.  
+ */
 
 static int
-yylex ()
+parse_escape(string_ptr)
+char **string_ptr;
 {
-  register int c;
-  register int namelen;
-  register char *tokstart;
-  register struct token *toktab;
-  double atof();	/* JF know what happens if you forget this? */
-
-
-  static did_newline = 0;	/* JF the grammar insists that actions end
-  				   with newlines.  This was easier than hacking
-				   the grammar. */
-  int do_concat;
-
-  int	seen_e = 0;		/* These are for numbers */
-  int	seen_point = 0;
-
-  retry:
-
-  if(!lexptr)
-    return 0;
-
-  if (want_regexp) {
-    want_regexp = 0;
-    /* there is a potential bug if a regexp is followed by an equal sign:
-       "/foo/=bar" would result in assign_quotient being returned as the
-       next token.  Nothing is done about it since it is not valid awk,
-       but maybe something should be done anyway. */
-
-    tokstart = lexptr;
-    while (c = *lexptr++) {
-      switch (c) {
-      case '\\':
-	if (*lexptr++ == '\0') {
-	  yyerror ("unterminated regexp ends with \\");
-	  return ERROR;
+	register int c = *(*string_ptr)++;
+
+	switch (c) {
+	case 'b':
+		return '\b';
+	case 'f':
+		return '\f';
+	case 'n':
+		return '\n';
+	case 'r':
+		return '\r';
+	case 't':
+		return '\t';
+	case 'v':
+		return '\v';
+	case '\n':
+		return -2;
+	case 0:
+		(*string_ptr)--;
+		return 0;
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+		{
+			register int i = c - '0';
+			register int count = 0;
+
+			while (++count < 3) {
+				if ((c = *(*string_ptr)++) >= '0' && c <= '7') {
+					i *= 8;
+					i += c - '0';
+				} else {
+					(*string_ptr)--;
+					break;
+				}
+			}
+			return i;
+		}
+	default:
+		return c;
 	}
-	break;
-      case '/':			/* end of the regexp */
-	lexptr--;
-	yylval.sval = tokstart;
-	return REGEXP;
-      case '\n':
-      case '\0':
-	yyerror ("unterminated regexp");
-	return ERROR;
-      }
-    }
-  }
-  do_concat=want_concat_token;
-  want_concat_token=0;
-
-  if(*lexptr=='\0') {
-    lexptr=0;
-    return NEWLINE;
-  }
-
-  /* if lexptr is at white space between two terminal tokens or parens,
-     it is a concatenation operator. */
-  if(do_concat && (*lexptr==' ' || *lexptr=='\t')) {
-    while (*lexptr == ' ' || *lexptr == '\t')
-      lexptr++;
-    if (isalnum(*lexptr) || *lexptr == '\"' || *lexptr == '('
-        || *lexptr == '.' || *lexptr == '$') /* the '.' is for decimal pt */
-      return CONCAT_OP;
-  }
-
-  while (*lexptr == ' ' || *lexptr == '\t')
-    lexptr++;
-
-  tokstart = lexptr;	/* JF */
-
-  switch (c = *lexptr++) {
-  case 0:
-    return 0;
-
-  case '\n':
-    lineno++;
-    return NEWLINE;
-
-  case '#':			/* it's a comment */
-    while (*lexptr != '\n' && *lexptr != '\0')
-      lexptr++;
-    goto retry;
-
-  case '\\':
-    if(*lexptr=='\n') {
-      lexptr++;
-      goto retry;
-    } else break;  
-  case ')':
-  case ']':
-    ++want_concat_token;
-    /* fall through */
-  case '(':	/* JF these were above, but I don't see why they should turn on concat. . . &*/
-  case '[':
-
-  case '{':
-  case ',':		/* JF */
-  case '$':
-  case ';':
-    /* set node type to ILLEGAL because the action should set it to
-       the right thing */
-    yylval.nodetypeval = Node_illegal;
-    return c;
-
-  case '*':
-    if(*lexptr=='=') {
-      yylval.nodetypeval=Node_assign_times;
-      lexptr++;
-      return ASSIGNOP;
-    }
-    yylval.nodetypeval=Node_illegal;
-    return c;
-
-  case '/':
-    if(*lexptr=='=') {
-      yylval.nodetypeval=Node_assign_quotient;
-      lexptr++;
-      return ASSIGNOP;
-    }
-    yylval.nodetypeval=Node_illegal;
-    return c;
-
-  case '%':
-    if(*lexptr=='=') {
-      yylval.nodetypeval=Node_assign_mod;
-      lexptr++;
-      return ASSIGNOP;
-    }
-    yylval.nodetypeval=Node_illegal;
-    return c;
-
-  case '+':
-    if(*lexptr=='=') {
-      yylval.nodetypeval=Node_assign_plus;
-      lexptr++;
-      return ASSIGNOP;
-    }
-    if(*lexptr=='+') {
-      yylval.nodetypeval=Node_illegal;
-      lexptr++;
-      return INCREMENT;
-    }
-    yylval.nodetypeval=Node_illegal;
-    return c;
-
-  case '!':
-    if(*lexptr=='=') {
-      yylval.nodetypeval=Node_notequal;
-      lexptr++;
-      return RELOP;
-    }
-    if(*lexptr=='~') {
-      yylval.nodetypeval=Node_nomatch;
-      lexptr++;
-      return MATCHOP;
-    }
-    yylval.nodetypeval=Node_illegal;
-    return c;
-
-  case '<':
-    if(*lexptr=='=') {
-      yylval.nodetypeval=Node_leq;
-      lexptr++;
-      return RELOP;
-    }
-    yylval.nodetypeval=Node_less;
-    return RELOP;
-
-  case '=':
-    if(*lexptr=='=') {
-      yylval.nodetypeval=Node_equal;
-      lexptr++;
-      return RELOP;
-    }
-    yylval.nodetypeval=Node_assign;
-    return ASSIGNOP;
-
-  case '>':
-    if(want_redirect) {
-      if (*lexptr == '>') {
-	yylval.nodetypeval = Node_redirect_append;
-	lexptr++;
-      } else 
-        yylval.nodetypeval = Node_redirect_output;
-      return REDIRECT_OP;
-    }
-    if(*lexptr=='=') {
-      yylval.nodetypeval=Node_geq;
-      lexptr++;
-      return RELOP;
-    }
-    yylval.nodetypeval=Node_greater;
-    return RELOP;
-
-  case '~':
-    yylval.nodetypeval=Node_match;
-    return MATCHOP;
-
-  case '}':		/* JF added did newline stuff.  Easier than hacking the grammar */
-    if(did_newline) {
-      did_newline=0;
-      return c;
-    }
-    did_newline++;
-    --lexptr;
-    return NEWLINE;
-
-  case '"':
-    while (*lexptr != '\0') {
-      switch (*lexptr++) {
-      case '\\':
-	if (*lexptr++ != '\0')
-	  break;
-	/* fall through */
-      case '\n':
-	yyerror ("unterminated string");
-	return ERROR;
-      case '\"':
-	yylval.sval = tokstart + 1;	/* JF Skip the doublequote */
-	++want_concat_token;
-	return YSTRING;
-      }
-    }
-    return ERROR;	/* JF this was one level up, wrong? */
-
-  case '-':
-    if(*lexptr=='=') {
-      yylval.nodetypeval=Node_assign_minus;
-      lexptr++;
-      return ASSIGNOP;
-    }
-    if(*lexptr=='-') {
-      yylval.nodetypeval=Node_illegal;
-      lexptr++;
-      return DECREMENT;
-    }
-    /* JF I think space tab comma and newline are the legal places for
-       a UMINUS.  Have I missed any? */
-    if((!isdigit(*lexptr) && *lexptr!='.') || (lexptr>lexptr_begin+1 &&
- !index(" \t,\n",lexptr[-2]))) {
-    /* set node type to ILLEGAL because the action should set it to
-       the right thing */
-      yylval.nodetypeval = Node_illegal;
-      return c;
-    }
-  	/* FALL through into number code */
-  case '0':
-  case '1':
-  case '2':
-  case '3':
-  case '4':
-  case '5':
-  case '6':
-  case '7':
-  case '8':
-  case '9':
-  case '.':
-    /* It's a number */
-    if(c=='-') namelen=1;
-    else namelen=0;
-    for (; (c = tokstart[namelen]) != '\0'; namelen++) {
-      switch (c) {
-      case '.':
-	if (seen_point)
-	  goto got_number;
-	++seen_point;
-	break;
-      case 'e':
-      case 'E':
-	if (seen_e)
-	  goto got_number;
-	++seen_e;
-	if (tokstart[namelen+1] == '-' || tokstart[namelen+1] == '+')
-	  namelen++;
-	break;
-      case '0': case '1': case '2': case '3': case '4': 
-      case '5': case '6': case '7': case '8': case '9': 
-	break;
-      default:
-	goto got_number;
-      }
-    }
-
-got_number:
-    lexptr = tokstart + namelen;
-    yylval.fval = atof(tokstart);
-    ++want_concat_token;
-    return NUMBER;
-
-  case '&':
-    if(*lexptr=='&') {
-      yylval.nodetypeval=Node_and;
-      lexptr++;
-      return LEX_AND;
-    }
-    return ERROR;
-
-  case '|':
-    if(want_redirect) {
-      lexptr++;
-      yylval.nodetypeval = Node_redirect_pipe;
-      return REDIRECT_OP;
-    }
-    if(*lexptr=='|') {
-      yylval.nodetypeval=Node_or;
-      lexptr++;
-      return LEX_OR;
-    }
-    return ERROR;
-  }
-  
-  if (!isalpha(c)) {
-    yyerror ("Invalid char '%c' in expression\n", c);
-    return ERROR;
-  }
-
-  /* its some type of name-type-thing.  Find its length */
-  for (namelen = 0; is_identchar(tokstart[namelen]); namelen++)
-    ;
-
-
-  /* See if it is a special token.  */
-  for (toktab = tokentab; toktab->operator != NULL; toktab++) {
-    if(*tokstart==toktab->operator[0] &&
-       !strncmp(tokstart,toktab->operator,namelen) &&
-       toktab->operator[namelen]=='\0') {
-      lexptr=tokstart+namelen;
-      if(toktab->class == LEX_BUILTIN)
-        yylval.ptrval = toktab->ptr;
-      else
-        yylval.nodetypeval = toktab->value;
-      return toktab->class;
-    }
-  }
-
-  /* It's a name.  See how long it is.  */
-  yylval.sval = tokstart;
-  lexptr = tokstart+namelen;
-  ++want_concat_token;
-  return NAME;
 }
 
-/*VARARGS1*/
-yyerror (mesg,a1,a2,a3,a4,a5,a6,a7,a8)
-     char *mesg;
+/*
+ * Read the input and turn it into tokens. Input is now read from a file
+ * instead of from malloc'ed memory. The main program takes a program
+ * passed as a command line argument and writes it to a temp file. Otherwise
+ * the file name is made available in an external variable.
+ */
+
+int curinfile = -1;
+
+static int
+yylex()
 {
-  register char *ptr,*beg;
-
-	/* Find the current line in the input file */
-  if(!lexptr) {
-    beg="(END OF FILE)";
-    ptr=beg+13;
-  } else {
-    if (*lexptr == '\n' && lexptr!=lexptr_begin)
-      --lexptr;
-    for (beg = lexptr;beg!=lexptr_begin && *beg != '\n';--beg)
-      ;
-    for (ptr = lexptr;*ptr && *ptr != '\n';ptr++) /*jfw: NL isn't guaranteed*/
-      ;
-    if(beg!=lexptr_begin)
-      beg++;
-  }
-  fprintf (stderr, "Error near line %d,  '%.*s'\n",lineno, ptr-beg, beg);
-  /* figure out line number, etc. later */
-  fprintf (stderr, mesg, a1, a2, a3, a4, a5, a6, a7, a8);
-  fprintf (stderr,"\n");
-  exit (1);
-}
+	register int c;
+	register int namelen;
+	register char *tokstart;
+	register struct token *tokptr;
+	char *tokkey;
+	extern double atof();	/* know what happens if you forget this? */
+	static did_newline = 0;	/* the grammar insists that actions end
+				 * with newlines.  This was easier than
+				 * hacking the grammar. */
+	int do_concat;
+	int seen_e = 0;		/* These are for numbers */
+	int seen_point = 0;
+	extern char **sourcefile;
+	extern int tempsource, numfiles;
+	extern FILE *pathopen();
+	static int file_opened = 0;
+	static FILE *fin;
+	static char cbuf[BUFSIZ];
+	int low, mid, high;
+	extern int debugging;
+
+	if (! file_opened) {
+		file_opened = 1;
+#ifdef DEBUG
+		if (debugging) {
+			int i;
+
+			for (i = 0; i <= numfiles; i++)
+				fprintf (stderr, "sourcefile[%d] = %s\n", i,
+						sourcefile[i]);
+		}
+#endif
+	nextfile:
+		if ((fin = pathopen (sourcefile[++curinfile])) == NULL)
+			fatal("cannot open `%s' for reading (%s)",
+				sourcefile[curinfile],
+				sys_errlist[errno]);
+		*(lexptr = cbuf) = '\0';
+		/*
+		 * immediately unlink the tempfile so that it will
+		 * go away cleanly if we bomb.
+		 */
+		if (tempsource && curinfile == 0)
+			(void) unlink (sourcefile[curinfile]);
+	}
+
+retry:
+	if (! *lexptr)
+		if (fgets (cbuf, sizeof cbuf, fin) == NULL) {
+			if (fin != NULL)
+				fclose (fin);	/* be neat and clean */
+			if (curinfile < numfiles)
+				goto nextfile;
+			return 0;
+		} else
+			lexptr = lexptr_begin = cbuf;
+
+	if (want_regexp) {
+		want_regexp = 0;
+
+		/*
+		 * there is a potential bug if a regexp is followed by an
+		 * equal sign: "/foo/=bar" would result in assign_quotient
+		 * being returned as the next token.  Nothing is done about
+		 * it since it is not valid awk, but maybe something should
+		 * be done anyway. 
+		 */
+
+		tokstart = lexptr;
+		while (c = *lexptr++) {
+			switch (c) {
+			case '\\':
+				if (*lexptr++ == '\0') {
+					yyerror("unterminated regexp ends with \\");
+					return ERROR;
+				} else if (lexptr[-1] == '\n')
+					goto retry;
+				break;
+			case '/':	/* end of the regexp */
+				lexptr--;
+				yylval.sval = tokstart;
+				return REGEXP;
+			case '\n':
+				lineno++;
+			case '\0':
+				yyerror("unterminated regexp");
+				return ERROR;
+			}
+		}
+	}
+	do_concat = want_concat_token;
+	want_concat_token = 0;
 
-/* Parse a C escape sequence.  STRING_PTR points to a variable
-   containing a pointer to the string to parse.  That pointer
-   is updated past the characters we use.  The value of the
-   escape sequence is returned.
+	if (*lexptr == '\n') {
+		lexptr++;
+		lineno++;
+		return NEWLINE;
+	}
 
-   A negative value means the sequence \ newline was seen,
-   which is supposed to be equivalent to nothing at all.
+	/*
+	 * if lexptr is at white space between two terminal tokens or parens,
+	 * it is a concatenation operator. 
+	 */
+	if (do_concat && (*lexptr == ' ' || *lexptr == '\t')) {
+		while (*lexptr == ' ' || *lexptr == '\t')
+			lexptr++;
+		if (isalnum(*lexptr) || *lexptr == '_' || *lexptr == '\"' ||
+		    *lexptr == '(' || *lexptr == '.' || *lexptr == '$' ||
+		    (*lexptr == '+' && *(lexptr+1) == '+') ||
+		    (*lexptr == '-' && *(lexptr+1) == '-'))
+					/* the '.' is for decimal pt */
+			return CONCAT_OP;
+	}
+	while (*lexptr == ' ' || *lexptr == '\t')
+		lexptr++;
+
+	tokstart = lexptr;
+
+	switch (c = *lexptr++) {
+	case 0:
+		return 0;
+
+	case '\n':
+		lineno++;
+		return NEWLINE;
+
+	case '#':		/* it's a comment */
+		while (*lexptr != '\n' && *lexptr != '\0')
+			lexptr++;
+		goto retry;
+
+	case '\\':
+		if (*lexptr == '\n') {
+			lineno++;
+			lexptr++;
+			want_concat_token = do_concat;
+			goto retry;
+		} else
+			break;
+	case ')':
+	case ']':
+		++want_concat_token;
+		/* fall through */
+	case '(':	
+	case '[':
+	case '$':
+	case ';':
+	case ':':
+	case '?':
+
+		/*
+		 * set node type to ILLEGAL because the action should set it
+		 * to the right thing 
+		 */
+		yylval.nodetypeval = Node_illegal;
+		return c;
+
+	case '{':
+	case ',':
+		while (isspace(*lexptr)) {
+			if (*lexptr == '\n')
+				lineno++;
+			lexptr++;
+		}
+		yylval.nodetypeval = Node_illegal;
+		return c;
+
+	case '*':
+		if (*lexptr == '=') {
+			yylval.nodetypeval = Node_assign_times;
+			lexptr++;
+			return ASSIGNOP;
+		} else if (*lexptr == '*') {	/* make ** and **= aliases
+						 * for ^ and ^= */
+			if (lexptr[1] == '=') {
+				yylval.nodetypeval = Node_assign_exp;
+				lexptr += 2;
+				return ASSIGNOP;
+			} else {
+				yylval.nodetypeval = Node_illegal;
+				lexptr++;
+				return '^';
+			}
+		}
+		yylval.nodetypeval = Node_illegal;
+		return c;
+
+	case '/':
+		if (*lexptr == '=') {
+			yylval.nodetypeval = Node_assign_quotient;
+			lexptr++;
+			return ASSIGNOP;
+		}
+		yylval.nodetypeval = Node_illegal;
+		return c;
+
+	case '%':
+		if (*lexptr == '=') {
+			yylval.nodetypeval = Node_assign_mod;
+			lexptr++;
+			return ASSIGNOP;
+		}
+		yylval.nodetypeval = Node_illegal;
+		return c;
+
+	case '^':
+		if (*lexptr == '=') {
+			yylval.nodetypeval = Node_assign_exp;
+			lexptr++;
+			return ASSIGNOP;
+		}
+		yylval.nodetypeval = Node_illegal;
+		return c;
+
+	case '+':
+		if (*lexptr == '=') {
+			yylval.nodetypeval = Node_assign_plus;
+			lexptr++;
+			return ASSIGNOP;
+		}
+		if (*lexptr == '+') {
+			yylval.nodetypeval = Node_illegal;
+			lexptr++;
+			return INCREMENT;
+		}
+		yylval.nodetypeval = Node_illegal;
+		return c;
+
+	case '!':
+		if (*lexptr == '=') {
+			yylval.nodetypeval = Node_notequal;
+			lexptr++;
+			return RELOP;
+		}
+		if (*lexptr == '~') {
+			yylval.nodetypeval = Node_nomatch;
+			if (! strict && lexptr[1] == '~') {
+				yylval.nodetypeval = Node_case_nomatch;
+				lexptr++;
+			}
+			lexptr++;
+			return MATCHOP;
+		}
+		yylval.nodetypeval = Node_illegal;
+		return c;
 
-   If \ is followed by a null character, we return a negative
-   value and leave the string pointer pointing at the null character.
+	case '<':
+		if (want_redirect) {
+			yylval.nodetypeval = Node_redirect_input;
+			return REDIRECT_OP;
+		}
+		if (*lexptr == '=') {
+			yylval.nodetypeval = Node_leq;
+			lexptr++;
+			return RELOP;
+		}
+		yylval.nodetypeval = Node_less;
+		return RELOP;
+
+	case '=':
+		if (*lexptr == '=') {
+			yylval.nodetypeval = Node_equal;
+			lexptr++;
+			return RELOP;
+		}
+		yylval.nodetypeval = Node_assign;
+		return ASSIGNOP;
+
+	case '>':
+		if (want_redirect) {
+			if (*lexptr == '>') {
+				yylval.nodetypeval = Node_redirect_append;
+				lexptr++;
+			} else
+				yylval.nodetypeval = Node_redirect_output;
+			return REDIRECT_OP;
+		}
+		if (*lexptr == '=') {
+			yylval.nodetypeval = Node_geq;
+			lexptr++;
+			return RELOP;
+		}
+		yylval.nodetypeval = Node_greater;
+		return RELOP;
+
+	case '~':
+		yylval.nodetypeval = Node_match;
+		if (! strict && *lexptr == '~') {
+			yylval.nodetypeval = Node_case_match;
+			lexptr++;
+		}
+		return MATCHOP;
+
+	case '}':
+		/*
+		 * Added did newline stuff.  Easier than
+		 * hacking the grammar
+		 */
+		if (did_newline) {
+			did_newline = 0;
+			return c;
+		}
+		did_newline++;
+		--lexptr;
+		return NEWLINE;
+
+	case '"':
+		while (*lexptr != '\0') {
+			switch (*lexptr++) {
+			case '\\':
+				if (*lexptr++ != '\0')
+					break;
+				/* fall through */
+			case '\n':
+				yyerror("unterminated string");
+				return ERROR;
+			case '\"':
+				/* Skip the doublequote */
+				yylval.sval = tokstart + 1;
+				++want_concat_token;
+				return YSTRING;
+			}
+		}
+		return ERROR;
 
-   If \ is followed by 000, we return 0 and leave the string pointer
-   after the zeros.  A value of 0 does not mean end of string.  */
+	case '-':
+		if (*lexptr == '=') {
+			yylval.nodetypeval = Node_assign_minus;
+			lexptr++;
+			return ASSIGNOP;
+		}
+		if (*lexptr == '-') {
+			yylval.nodetypeval = Node_illegal;
+			lexptr++;
+			return DECREMENT;
+		}
 
-static int
-parse_escape (string_ptr)
-     char **string_ptr;
-{
-  register int c = *(*string_ptr)++;
-  switch (c)
-    {
-    case 'a':
-      return '\a';
-    case 'b':
-      return '\b';
-    case 'e':
-      return 033;
-    case 'f':
-      return '\f';
-    case 'n':
-      return '\n';
-    case 'r':
-      return '\r';
-    case 't':
-      return '\t';
-    case 'v':
-      return '\v';
-    case '\n':
-      return -2;
-    case 0:
-      (*string_ptr)--;
-      return 0;
-    case '^':
-      c = *(*string_ptr)++;
-      if (c == '\\')
-	c = parse_escape (string_ptr);
-      if (c == '?')
-	return 0177;
-      return (c & 0200) | (c & 037);
-      
-    case '0':
-    case '1':
-    case '2':
-    case '3':
-    case '4':
-    case '5':
-    case '6':
-    case '7':
-      {
-	register int i = c - '0';
-	register int count = 0;
-	while (++count < 3)
-	  {
-	    if ((c = *(*string_ptr)++) >= '0' && c <= '7')
-	      {
-		i *= 8;
-		i += c - '0';
-	      }
-	    else
-	      {
-		(*string_ptr)--;
+		/*
+		 * It looks like space tab comma and newline are the legal
+		 * places for a UMINUS.  Have we missed any? 
+		 */
+		if ((! isdigit(*lexptr) && *lexptr != '.') ||
+			(lexptr > lexptr_begin + 1 &&
+				    ! index(" \t,\n", lexptr[-2]))) {
+
+			/*
+			 * set node type to ILLEGAL because the action should
+			 * set it to the right thing 
+			 */
+			yylval.nodetypeval = Node_illegal;
+			return c;
+		}
+		/* FALL through into number code */
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+	case '.':
+		/* It's a number */
+		if (c == '-')
+			namelen = 1;
+		else
+			namelen = 0;
+		for (; (c = tokstart[namelen]) != '\0'; namelen++) {
+			switch (c) {
+			case '.':
+				if (seen_point)
+					goto got_number;
+				++seen_point;
+				break;
+			case 'e':
+			case 'E':
+				if (seen_e)
+					goto got_number;
+				++seen_e;
+				if (tokstart[namelen + 1] == '-' || tokstart[namelen + 1] == '+')
+					namelen++;
+				break;
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+				break;
+			default:
+				goto got_number;
+			}
+		}
+
+got_number:
+		lexptr = tokstart + namelen;
+		yylval.fval = atof(tokstart);
+		++want_concat_token;
+		return NUMBER;
+
+	case '&':
+		if (*lexptr == '&') {
+			yylval.nodetypeval = Node_and;
+			lexptr++;
+			return LEX_AND;
+		}
+		return ERROR;
+
+	case '|':
+		if (*lexptr == '|') {
+			yylval.nodetypeval = Node_or;
+			lexptr++;
+			return LEX_OR;
+		} else if (want_redirect) {
+			yylval.nodetypeval = Node_redirect_pipe;
+			return REDIRECT_OP;
+		} else {
+			yylval.nodetypeval = Node_illegal;
+			return c;
+		}
 		break;
-	      }
-	  }
-	return i;
-      }
-    default:
-      return c;
-    }
+	}
+
+	if (c != '_' && !isalpha(c)) {
+		yyerror("Invalid char '%c' in expression\n", c);
+		return ERROR;
+	}
+
+	/* it's some type of name-type-thing.  Find its length */
+	for (namelen = 0; is_identchar(tokstart[namelen]); namelen++)
+		/* null */ ;
+	emalloc(tokkey, char *, namelen+1, "yylex");
+	strncpy (tokkey, tokstart, namelen);
+	tokkey[namelen] = '\0';
+
+	/* See if it is a special token.  */
+	low = 0;
+	high = (sizeof (tokentab) / sizeof (tokentab[0])) - 1;
+	while (low <= high) {
+		int i, c;
+
+		mid = (low + high) / 2;
+
+	compare:
+		c = *tokstart - tokentab[mid].operator[0];
+		i = c ? c : strcmp (tokkey, tokentab[mid].operator);
+
+		if (i < 0) {		/* token < mid */
+			high = mid - 1;
+		} else if (i > 0) {	/* token > mid */
+			low = mid + 1;
+		} else {
+			lexptr = tokstart + namelen;
+			if (tokentab[mid].class == LEX_BUILTIN)
+				yylval.ptrval = tokentab[mid].ptr;
+			else
+				yylval.nodetypeval = tokentab[mid].value;
+			if (tokentab[mid].class == LEX_PRINT)
+				want_redirect++;
+			return tokentab[mid].class;
+		}
+	}
+
+	/* It's a name.  See how long it is.  */
+	yylval.sval = tokkey;
+	lexptr = tokstart + namelen;
+	++want_concat_token;
+	return NAME;
+}
+
+#ifndef DEFPATH
+#define DEFPATH	".:/usr/lib/awk:/usr/local/lib/awk"
+#endif
+
+FILE *
+pathopen (file)
+char *file;
+{
+	static char defpath[] = DEFPATH;
+	static char *savepath;
+	static int first = 1;
+	extern char *getenv ();
+	char *awkpath, *cp;
+	char trypath[BUFSIZ];
+	FILE *fp;
+	extern int debugging;
+
+	if (strict)
+		return (fopen (file, "r"));
+
+	if (first) {
+		first = 0;
+		if ((awkpath = getenv ("AWKPATH")) == NULL || ! *awkpath)
+			awkpath = defpath;
+		savepath = awkpath;	/* savepath used for restarting */
+	} else
+		awkpath = savepath;
+
+	if (index (file, '/') != NULL)	/* some kind of path name, no search */
+		return (fopen (file, "r"));
+
+	do {
+		for (cp = trypath; *awkpath && *awkpath != ':'; )
+			*cp++ = *awkpath++;
+		*cp++ = '/';
+		*cp = '\0';	/* clear left over junk */
+		strcat (cp, file);
+		if ((fp = fopen (trypath, "r")) != NULL)
+			return (fp);
+
+		/* no luck, keep going */
+		awkpath++;	/* skip colon */
+	} while (*awkpath);
+	return (NULL);
 }