1 files changed, 898 insertions, 0 deletions
diff --git a/awk.y b/awk.y
new file mode 100644
index 00000000..180505f6
--- /dev/null
+++ b/awk.y
@@ -0,0 +1,898 @@
+
+/*
+ * gawk -- GNU version of awk
+ * Copyright (C) 1986 Free Software Foundation
+ *   Written by Paul Rubin, August 1986
+ *
+ */
+
+/*
+GAWK is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY.  No author or distributor accepts responsibility to anyone
+for the consequences of using it or for whether it serves any
+particular purpose or works at all, unless he says so in writing.
+Refer to the GAWK General Public License for full details.
+
+Everyone is granted permission to copy, modify and redistribute GAWK,
+but only under the conditions described in the GAWK General Public
+License.  A copy of this license is supposed to have been given to you
+along with GAWK so you can know your rights and responsibilities.  It
+should be in a file named COPYING.  Among other things, the copyright
+notice and this notice must be preserved on all copies.
+
+In other words, go ahead and share GAWK, but don't try to stop
+anyone else from sharing it farther.  Help stamp out software hoarding!
+*/
+
+%{
+#define YYDEBUG 12
+
+#include <stdio.h>
+#include "awk.h"
+
+  static int yylex ();
+
+
+  /*
+   * The following variable is used for a very sickening thing.
+   * The awk language uses white space as the string concatenation
+   * operator, but having a white space token that would have to appear
+   * everywhere in all the grammar rules would be unbearable.
+   * It turns out we can return CONCAT_OP exactly when there really
+   * is one, just from knowing what kinds of other tokens it can appear
+   * between (namely, constants, variables, or close parentheses).
+   * This is because concatenation has the lowest priority of all
+   * operators.  want_concat_token is used to remember that something
+   * that could be the left side of a concat has just been returned.
+   *
+   * If anyone knows a cleaner way to do this (don't look at the Un*x
+   * code to find one, though), please suggest it.
+   */
+  static int want_concat_token;
+
+  /* Two more horrible kludges.  The same comment applies to these two too */
+  static int want_regexp;	/* lexical scanning kludge */
+  static int want_redirect;	/* similarly */
+  int lineno = 1;	/* JF for error msgs */
+
+/* During parsing of a gawk program, the pointer to the next character
+   is in this variable.  */
+  char *lexptr;		/* JF moved it up here */
+  char *lexptr_begin;	/* JF for error msgs */
+%}
+
+%union {
+  long lval;
+  AWKNUM fval;
+  NODE *nodeval;
+  NODETYPE nodetypeval;
+  char *sval;
+  NODE *(*ptrval)();
+}
+
+%type <nodeval> exp start program rule pattern conditional
+%type <nodeval>	action variable redirection expression_list
+%type <nodeval>	statements statement if_statement
+%type <nodeval> opt_exp v_exp
+%type <nodetypeval> whitespace
+
+%token <sval> NAME REGEXP YSTRING
+%token <lval> ERROR INCDEC
+%token <fval> NUMBER
+%token <nodetypeval> ASSIGNOP RELOP MATCHOP NEWLINE REDIRECT_OP CONCAT_OP
+%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE
+%token <nodetypeval> LEX_WHILE LEX_FOR LEX_BREAK LEX_CONTINUE
+%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT
+%token  LEX_IN
+%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
+%token <ptrval> LEX_BUILTIN
+
+/* these are just yylval numbers */
+/* %token <lval> CHAR JF this isn't used anymore */
+
+/* Lowest to highest */
+%left LEX_OR
+%left LEX_AND
+%right ASSIGNOP
+%left CONCAT_OP
+%left '+' '-'
+%left '*' '/' '%'
+%right UNARY
+%nonassoc MATCHOP RELOP
+
+%%
+
+start   :  optional_newlines program
+		{ expression_value = $2; }
+	;
+
+
+program	: rule
+		{ $$ = node ($1, Node_rule_list,(NODE *) NULL); }
+	| program rule
+		/* cons the rule onto the tail of list */
+		{ $$ = append_right ($1, node($2, Node_rule_list,(NODE *) NULL)); }
+	;
+
+rule	: pattern action NEWLINE optional_newlines
+		{ $$ = node ($1, Node_rule_node, $2); }
+	;
+
+
+pattern	: /* empty */
+		{ $$ = NULL; }
+	| conditional
+		{ $$ = $1; }
+	| conditional ',' conditional
+		{ $$ = mkrangenode ( node($1, Node_cond_pair, $3) ); } /*jfw*/
+	;
+
+
+conditional :
+	  LEX_BEGIN
+		{ $$ = node ((NODE *)NULL, Node_K_BEGIN,(NODE *) NULL); }
+	| LEX_END
+		{ $$ = node ((NODE *)NULL, Node_K_END,(NODE *) NULL); }
+	| '!' conditional %prec UNARY
+		{ $$ = node ($2, Node_not,(NODE *) NULL); }
+	| conditional LEX_AND conditional
+		{ $$ = node ($1, Node_and, $3); }
+	| conditional LEX_OR conditional
+		{ $$ = node ($1, Node_or, $3); }
+	| '(' conditional ')'
+		{
+		  $$ = $2;
+		  want_concat_token = 0;
+		}
+
+	/* In these rules, want_regexp tells yylex that the next thing
+		is a regexp so it should read up to the closing slash. */
+
+	| '/'
+		{ ++want_regexp; }
+	   REGEXP '/'
+		{ want_regexp = 0;
+		  $$ = node (node (make_number ((AWKNUM)0), Node_field_spec, (NODE *)NULL),
+			     Node_match, (NODE *)make_regexp ($3));
+		}
+	| exp MATCHOP '/'
+		 { ++want_regexp; }
+	   REGEXP '/'
+		 { want_regexp = 0;
+		   $$ = node ($1, $2, (NODE *)make_regexp($5));
+		 }
+	| exp RELOP exp
+		{ $$ = node ($1, $2, $3); }
+	| exp	/* JF */
+		{ $$ = $1; }
+	;
+
+
+action	: /* empty */
+		{ $$ = NULL; }
+	|	'{' whitespace statements '}'
+		{ $$ = $3; }
+	;
+
+
+statements :			/* EMPTY */
+		{ $$ = NULL; }
+	| statement
+		{ $$ = node ($1, Node_statement_list, (NODE *)NULL); }
+	| statements statement
+		{ $$ = append_right($1, node( $2, Node_statement_list, (NODE *)NULL)); }
+	;
+
+statement_term :
+	NEWLINE optional_newlines
+		{ $<nodetypeval>$ = Node_illegal; }
+	| ';' optional_newlines
+		{ $<nodetypeval>$ = Node_illegal; }
+	;
+
+whitespace :
+		/* blank */
+		{ $$ = Node_illegal; }
+	|  CONCAT_OP
+	| NEWLINE
+	| whitespace CONCAT_OP
+	| whitespace NEWLINE
+	;
+statement :
+	'{' whitespace statements '}' whitespace
+		{ $$ = $3; }
+	| if_statement
+		{ $$ = $1; }
+	| LEX_WHILE '(' conditional ')' whitespace statement
+		{ $$ = node ($3, Node_K_while, $6); }
+	| LEX_FOR '(' opt_exp ';' conditional ';' opt_exp ')' whitespace statement
+		{ $$ = node ($10, Node_K_for, (NODE *)make_for_loop ($3, $5, $7)); }
+	| LEX_FOR '(' opt_exp ';' ';' opt_exp ')' whitespace statement
+		{ $$ = node ($9, Node_K_for, (NODE *)make_for_loop ($3, (NODE *)NULL, $6)); }
+	| LEX_FOR '(' NAME CONCAT_OP LEX_IN NAME ')' whitespace statement
+		{ $$ = node ($9, Node_K_arrayfor, (NODE *)make_for_loop(variable($3), (NODE *)NULL, variable($6))); }
+	| LEX_BREAK statement_term
+	   /* for break, maybe we'll have to remember where to break to */
+		{ $$ = node ((NODE *)NULL, Node_K_break, (NODE *)NULL); }
+	| LEX_CONTINUE statement_term
+	   /* similarly */
+		{ $$ = node ((NODE *)NULL, Node_K_continue, (NODE *)NULL); }
+	| LEX_PRINT
+		{ ++want_redirect; }
+	    expression_list redirection statement_term
+		{
+		  want_redirect = 0;
+		  /* $4->lnode = NULL; */
+		  $$ = node ($3, Node_K_print, $4);
+		}
+	| LEX_PRINTF
+		{ ++want_redirect; }
+	    expression_list redirection statement_term
+		{
+		  want_redirect = 0;
+		  /* $4->lnode = NULL; */
+		  $$ = node ($3, Node_K_printf, $4);
+		}
+	| LEX_PRINTF '(' expression_list ')'
+		{ ++want_redirect;
+		  want_concat_token = 0; }
+	    redirection statement_term
+		{
+		  want_redirect = 0;
+		  $$ = node ($3, Node_K_printf, $6);
+		}
+	| LEX_NEXT statement_term
+		{ $$ = node ((NODE *)NULL, Node_K_next, (NODE *)NULL); }
+	| LEX_EXIT statement_term
+		{ $$ = node ((NODE *)NULL, Node_K_exit, (NODE *)NULL); }
+	| LEX_EXIT '(' exp ')' statement_term
+		{ $$ = node ($3, Node_K_exit, (NODE *)NULL); }
+	| exp statement_term
+		{ $$ = $1; }
+	;
+
+
+if_statement:
+	LEX_IF '(' conditional ')' whitespace statement
+		{ $$ = node ($3, Node_K_if,
+				node ($6, Node_if_branches, (NODE *)NULL)); }
+	| LEX_IF '(' conditional ')' whitespace statement
+	     LEX_ELSE whitespace statement
+		{ $$ = node ($3, Node_K_if,
+				node ($6, Node_if_branches, $9)); }
+	;
+
+optional_newlines :
+	  /* empty */
+	| optional_newlines NEWLINE
+		{ $<nodetypeval>$ = Node_illegal; }
+	;
+
+redirection :
+	  /* empty */
+		{ $$ = NULL; /* node (NULL, Node_redirect_nil, NULL); */ }
+	/* | REDIRECT_OP NAME
+		{ $$ = node ($2, $1, NULL); } */
+	| REDIRECT_OP exp
+		{ $$ = node ($2, $1, (NODE *)NULL); }
+	;
+
+
+/* optional expression, as in for loop */
+opt_exp :
+		{ $$ = NULL; /* node(NULL, Node_builtin, NULL); */ }
+	| exp
+		{ $$ = $1; }
+	;
+
+expression_list :
+	  /* empty */
+		{ $$ = NULL; }
+	| exp
+		{ $$ = node ($1, Node_expression_list, (NODE *)NULL); }
+	| expression_list ',' exp
+		{ $$ = append_right($1, node( $3, Node_expression_list, (NODE *)NULL)); }
+	;
+
+
+/* Expressions, not including the comma operator.  */
+exp	:	LEX_BUILTIN '(' expression_list ')'
+			{ $$ = snode ($3, Node_builtin, $1); }
+	|	LEX_BUILTIN
+			{ $$ = snode ((NODE *)NULL, Node_builtin, $1); }
+	|	'(' exp ')'
+			{ $$ = $2; }
+	|	'-' exp    %prec UNARY
+			{ $$ = node ($2, Node_unary_minus, (NODE *)NULL); }
+	|	INCREMENT variable %prec UNARY
+			{ $$ = node ($2, Node_preincrement, (NODE *)NULL); }
+	|	DECREMENT variable %prec UNARY
+			{ $$ = node ($2, Node_predecrement, (NODE *)NULL); }
+	|	variable INCREMENT  %prec UNARY
+			{ $$ = node ($1, Node_postincrement, (NODE *)NULL); }
+	|	variable DECREMENT  %prec UNARY
+			{ $$ = node ($1, Node_postdecrement, (NODE *)NULL); }
+	|	variable
+			{ $$ = $1; }	/* JF was variable($1) */
+	|	NUMBER
+			{ $$ = make_number ($1); }
+	|	YSTRING
+			{ $$ = make_string ($1, -1); }
+
+/* Binary operators in order of decreasing precedence.  */
+	|	exp '*' exp
+			{ $$ = node ($1, Node_times, $3); }
+	|	exp '/' exp
+			{ $$ = node ($1, Node_quotient, $3); }
+	|	exp '%' exp
+			{ $$ = node ($1, Node_mod, $3); }
+	|	exp '+' exp
+			{ $$ = node ($1, Node_plus, $3); }
+	|	exp '-' exp
+			{ $$ = node ($1, Node_minus, $3); }
+		/* Empty operator.  See yylex for disgusting details. */
+	|	exp CONCAT_OP exp
+			{ $$ = node ($1, Node_concat, $3); }
+	|	variable ASSIGNOP exp
+			{ $$ = node ($1, $2, $3); }
+	;
+
+v_exp	:	LEX_BUILTIN '(' expression_list ')'
+			{ $$ = snode ($3, Node_builtin, $1); }
+	|	LEX_BUILTIN
+			{ $$ = snode ((NODE *)NULL, Node_builtin, $1); }
+	|	'(' exp ')'
+			{ $$ = $2; }
+	|	'-' exp    %prec UNARY
+			{ $$ = node ($2, Node_unary_minus, (NODE *)NULL); }
+	|	INCREMENT variable %prec UNARY
+			{ $$ = node ($2, Node_preincrement, (NODE *)NULL); }
+	|	DECREMENT variable %prec UNARY
+			{ $$ = node ($2, Node_predecrement, (NODE *)NULL); }
+	|	variable INCREMENT  %prec UNARY
+			{ $$ = node ($1, Node_postincrement, (NODE *)NULL); }
+	|	variable DECREMENT  %prec UNARY
+			{ $$ = node ($1, Node_postdecrement, (NODE *)NULL); }
+	|	variable
+			{ $$ = $1; }	/* JF was variable($1) */
+	|	NUMBER
+			{ $$ = make_number ($1); }
+	|	YSTRING
+			{ $$ = make_string ($1, -1); }
+
+/* Binary operators in order of decreasing precedence.  */
+	|	v_exp '*' exp
+			{ $$ = node ($1, Node_times, $3); }
+	|	v_exp '/' exp
+			{ $$ = node ($1, Node_quotient, $3); }
+	|	v_exp '%' exp
+			{ $$ = node ($1, Node_mod, $3); }
+	|	v_exp '+' exp
+			{ $$ = node ($1, Node_plus, $3); }
+	|	v_exp '-' exp
+			{ $$ = node ($1, Node_minus, $3); }
+		/* Empty operator.  See yylex for disgusting details. */
+	|	v_exp CONCAT_OP exp
+			{ $$ = node ($1, Node_concat, $3); }
+	;
+
+variable :
+	 	NAME
+			{ $$ = variable ($1); }
+	|	NAME '[' exp ']'
+			{ $$ = node (variable($1), Node_subscript, $3); }
+	|	'$' v_exp	  %prec UNARY
+			{ $$ = node ($2, Node_field_spec, (NODE *)NULL); }
+	;
+
+%%
+
+
+struct token {
+  char *operator;
+  NODETYPE value;
+  int class;
+  NODE *(*ptr)();
+};
+
+#define NULL 0
+
+NODE	*do_exp(),	*do_getline(),	*do_index(),	*do_length(),
+	*do_sqrt(),	*do_log(),	*do_sprintf(),	*do_substr(),
+	*do_split(),	*do_int();
+
+	/* Special functions for debugging */
+#ifndef FAST
+NODE	*do_prvars(),	*do_bp();
+#endif
+
+/* Tokentab is sorted ascii ascending order, so it can be binary searched. */
+/* (later.  Right now its just sort of linear search (SLOW!!) */
+
+static struct token tokentab[] = {
+  {"BEGIN",	Node_illegal,		LEX_BEGIN,	0},
+  {"END",	Node_illegal,		LEX_END,	0},
+#ifndef FAST
+  {"bp",	Node_builtin,		LEX_BUILTIN,	do_bp},
+#endif
+  {"break",	Node_K_break,		LEX_BREAK,	0},
+  {"continue",	Node_K_continue,	LEX_CONTINUE,	0},
+  {"else",	Node_illegal,		LEX_ELSE,	0},
+  {"exit",	Node_K_exit,		LEX_EXIT,	0},
+  {"exp",	Node_builtin,		LEX_BUILTIN,	do_exp},
+  {"for",	Node_K_for,		LEX_FOR,	0},
+  {"getline",	Node_builtin,		LEX_BUILTIN,	do_getline},
+  {"if",	Node_K_if,		LEX_IF,		0},
+  {"in",	Node_illegal,		LEX_IN,		0},
+  {"index",	Node_builtin,		LEX_BUILTIN,	do_index},
+  {"int",	Node_builtin,		LEX_BUILTIN,	do_int},
+  {"length",	Node_builtin,		LEX_BUILTIN,	do_length},
+  {"log",	Node_builtin,		LEX_BUILTIN,	do_log},
+  {"next",	Node_K_next,		LEX_NEXT,	0},
+  {"print",	Node_K_print,		LEX_PRINT,	0},
+  {"printf",	Node_K_printf,		LEX_PRINTF,	0},
+#ifndef FAST
+  {"prvars",	Node_builtin,		LEX_BUILTIN,	do_prvars},
+#endif
+  {"split",	Node_builtin,		LEX_BUILTIN,	do_split},
+  {"sprintf",	Node_builtin,		LEX_BUILTIN,	do_sprintf},
+  {"sqrt",	Node_builtin,		LEX_BUILTIN,	do_sqrt},
+  {"substr",	Node_builtin,		LEX_BUILTIN,	do_substr},
+  {"while",	Node_K_while,		LEX_WHILE,	0},
+  {NULL,	Node_illegal,		ERROR,		0}
+};
+
+/* Read one token, getting characters through lexptr.  */
+
+static int
+yylex ()
+{
+  register int c;
+  register int namelen;
+  register char *tokstart;
+  register struct token *toktab;
+  double atof();	/* JF know what happens if you forget this? */
+
+
+  static did_newline = 0;	/* JF the grammar insists that actions end
+  				   with newlines.  This was easier than hacking
+				   the grammar. */
+  int do_concat;
+
+  int	seen_e = 0;		/* These are for numbers */
+  int	seen_point = 0;
+
+  retry:
+
+  if(!lexptr)
+    return 0;
+
+  if (want_regexp) {
+    want_regexp = 0;
+    /* there is a potential bug if a regexp is followed by an equal sign:
+       "/foo/=bar" would result in assign_quotient being returned as the
+       next token.  Nothing is done about it since it is not valid awk,
+       but maybe something should be done anyway. */
+
+    tokstart = lexptr;
+    while (c = *lexptr++) {
+      switch (c) {
+      case '\\':
+	if (*lexptr++ == '\0') {
+	  yyerror ("unterminated regexp ends with \\");
+	  return ERROR;
+	}
+	break;
+      case '/':			/* end of the regexp */
+	lexptr--;
+	yylval.sval = tokstart;
+	return REGEXP;
+      case '\n':
+      case '\0':
+	yyerror ("unterminated regexp");
+	return ERROR;
+      }
+    }
+  }
+  do_concat=want_concat_token;
+  want_concat_token=0;
+
+  if(*lexptr=='\0') {
+    lexptr=0;
+    return NEWLINE;
+  }
+
+  /* if lexptr is at white space between two terminal tokens or parens,
+     it is a concatenation operator. */
+  if(do_concat && (*lexptr==' ' || *lexptr=='\t')) {
+    while (*lexptr == ' ' || *lexptr == '\t')
+      lexptr++;
+    if (isalnum(*lexptr) || *lexptr == '\"' || *lexptr == '('
+        || *lexptr == '.' || *lexptr == '$') /* the '.' is for decimal pt */
+      return CONCAT_OP;
+  }
+
+  while (*lexptr == ' ' || *lexptr == '\t')
+    lexptr++;
+
+  tokstart = lexptr;	/* JF */
+
+  switch (c = *lexptr++) {
+  case 0:
+    return 0;
+
+  case '\n':
+    lineno++;
+    return NEWLINE;
+
+  case '#':			/* it's a comment */
+    while (*lexptr != '\n' && *lexptr != '\0')
+      lexptr++;
+    goto retry;
+
+  case '\\':
+    if(*lexptr=='\n') {
+      lexptr++;
+      goto retry;
+    } else break;  
+  case ')':
+  case ']':
+    ++want_concat_token;
+    /* fall through */
+  case '(':	/* JF these were above, but I don't see why they should turn on concat. . . &*/
+  case '[':
+
+  case '{':
+  case ',':		/* JF */
+  case '$':
+  case ';':
+    /* set node type to ILLEGAL because the action should set it to
+       the right thing */
+    yylval.nodetypeval = Node_illegal;
+    return c;
+
+  case '*':
+    if(*lexptr=='=') {
+      yylval.nodetypeval=Node_assign_times;
+      lexptr++;
+      return ASSIGNOP;
+    }
+    yylval.nodetypeval=Node_illegal;
+    return c;
+
+  case '/':
+    if(*lexptr=='=') {
+      yylval.nodetypeval=Node_assign_quotient;
+      lexptr++;
+      return ASSIGNOP;
+    }
+    yylval.nodetypeval=Node_illegal;
+    return c;
+
+  case '%':
+    if(*lexptr=='=') {
+      yylval.nodetypeval=Node_assign_mod;
+      lexptr++;
+      return ASSIGNOP;
+    }
+    yylval.nodetypeval=Node_illegal;
+    return c;
+
+  case '+':
+    if(*lexptr=='=') {
+      yylval.nodetypeval=Node_assign_plus;
+      lexptr++;
+      return ASSIGNOP;
+    }
+    if(*lexptr=='+') {
+      yylval.nodetypeval=Node_illegal;
+      lexptr++;
+      return INCREMENT;
+    }
+    yylval.nodetypeval=Node_illegal;
+    return c;
+
+  case '!':
+    if(*lexptr=='=') {
+      yylval.nodetypeval=Node_notequal;
+      lexptr++;
+      return RELOP;
+    }
+    if(*lexptr=='~') {
+      yylval.nodetypeval=Node_nomatch;
+      lexptr++;
+      return MATCHOP;
+    }
+    yylval.nodetypeval=Node_illegal;
+    return c;
+
+  case '<':
+    if(*lexptr=='=') {
+      yylval.nodetypeval=Node_leq;
+      lexptr++;
+      return RELOP;
+    }
+    yylval.nodetypeval=Node_less;
+    return RELOP;
+
+  case '=':
+    if(*lexptr=='=') {
+      yylval.nodetypeval=Node_equal;
+      lexptr++;
+      return RELOP;
+    }
+    yylval.nodetypeval=Node_assign;
+    return ASSIGNOP;
+
+  case '>':
+    if(want_redirect) {
+      if (*lexptr == '>') {
+	yylval.nodetypeval = Node_redirect_append;
+	lexptr++;
+      } else 
+        yylval.nodetypeval = Node_redirect_output;
+      return REDIRECT_OP;
+    }
+    if(*lexptr=='=') {
+      yylval.nodetypeval=Node_geq;
+      lexptr++;
+      return RELOP;
+    }
+    yylval.nodetypeval=Node_greater;
+    return RELOP;
+
+  case '~':
+    yylval.nodetypeval=Node_match;
+    return MATCHOP;
+
+  case '}':		/* JF added did newline stuff.  Easier than hacking the grammar */
+    if(did_newline) {
+      did_newline=0;
+      return c;
+    }
+    did_newline++;
+    --lexptr;
+    return NEWLINE;
+
+  case '"':
+    while (*lexptr != '\0') {
+      switch (*lexptr++) {
+      case '\\':
+	if (*lexptr++ != '\0')
+	  break;
+	/* fall through */
+      case '\n':
+	yyerror ("unterminated string");
+	return ERROR;
+      case '\"':
+	yylval.sval = tokstart + 1;	/* JF Skip the doublequote */
+	++want_concat_token;
+	return YSTRING;
+      }
+    }
+    return ERROR;	/* JF this was one level up, wrong? */
+
+  case '-':
+    if(*lexptr=='=') {
+      yylval.nodetypeval=Node_assign_minus;
+      lexptr++;
+      return ASSIGNOP;
+    }
+    if(*lexptr=='-') {
+      yylval.nodetypeval=Node_illegal;
+      lexptr++;
+      return DECREMENT;
+    }
+    /* JF I think space tab comma and newline are the legal places for
+       a UMINUS.  Have I missed any? */
+    if((!isdigit(*lexptr) && *lexptr!='.') || (lexptr>lexptr_begin+1 &&
+ !index(" \t,\n",lexptr[-2]))) {
+    /* set node type to ILLEGAL because the action should set it to
+       the right thing */
+      yylval.nodetypeval = Node_illegal;
+      return c;
+    }
+  	/* FALL through into number code */
+  case '0':
+  case '1':
+  case '2':
+  case '3':
+  case '4':
+  case '5':
+  case '6':
+  case '7':
+  case '8':
+  case '9':
+  case '.':
+    /* It's a number */
+    if(c=='-') namelen=1;
+    else namelen=0;
+    for (; (c = tokstart[namelen]) != '\0'; namelen++) {
+      switch (c) {
+      case '.':
+	if (seen_point)
+	  goto got_number;
+	++seen_point;
+	break;
+      case 'e':
+      case 'E':
+	if (seen_e)
+	  goto got_number;
+	++seen_e;
+	if (tokstart[namelen+1] == '-' || tokstart[namelen+1] == '+')
+	  namelen++;
+	break;
+      case '0': case '1': case '2': case '3': case '4': 
+      case '5': case '6': case '7': case '8': case '9': 
+	break;
+      default:
+	goto got_number;
+      }
+    }
+
+got_number:
+    lexptr = tokstart + namelen;
+    yylval.fval = atof(tokstart);
+    ++want_concat_token;
+    return NUMBER;
+
+  case '&':
+    if(*lexptr=='&') {
+      yylval.nodetypeval=Node_and;
+      lexptr++;
+      return LEX_AND;
+    }
+    return ERROR;
+
+  case '|':
+    if(want_redirect) {
+      lexptr++;
+      yylval.nodetypeval = Node_redirect_pipe;
+      return REDIRECT_OP;
+    }
+    if(*lexptr=='|') {
+      yylval.nodetypeval=Node_or;
+      lexptr++;
+      return LEX_OR;
+    }
+    return ERROR;
+  }
+  
+  if (!isalpha(c)) {
+    yyerror ("Invalid char '%c' in expression\n", c);
+    return ERROR;
+  }
+
+  /* its some type of name-type-thing.  Find its length */
+  for (namelen = 0; is_identchar(tokstart[namelen]); namelen++)
+    ;
+
+
+  /* See if it is a special token.  */
+  for (toktab = tokentab; toktab->operator != NULL; toktab++) {
+    if(*tokstart==toktab->operator[0] &&
+       !strncmp(tokstart,toktab->operator,namelen) &&
+       toktab->operator[namelen]=='\0') {
+      lexptr=tokstart+namelen;
+      if(toktab->class == LEX_BUILTIN)
+        yylval.ptrval = toktab->ptr;
+      else
+        yylval.nodetypeval = toktab->value;
+      return toktab->class;
+    }
+  }
+
+  /* It's a name.  See how long it is.  */
+  yylval.sval = tokstart;
+  lexptr = tokstart+namelen;
+  ++want_concat_token;
+  return NAME;
+}
+
+/*VARARGS1*/
+yyerror (mesg,a1,a2,a3,a4,a5,a6,a7,a8)
+     char *mesg;
+{
+  register char *ptr,*beg;
+
+	/* Find the current line in the input file */
+  if(!lexptr) {
+    beg="(END OF FILE)";
+    ptr=beg+13;
+  } else {
+    if (*lexptr == '\n' && lexptr!=lexptr_begin)
+      --lexptr;
+    for (beg = lexptr;beg!=lexptr_begin && *beg != '\n';--beg)
+      ;
+    for (ptr = lexptr;*ptr && *ptr != '\n';ptr++) /*jfw: NL isn't guaranteed*/
+      ;
+    if(beg!=lexptr_begin)
+      beg++;
+  }
+  fprintf (stderr, "Error near line %d,  '%.*s'\n",lineno, ptr-beg, beg);
+  /* figure out line number, etc. later */
+  fprintf (stderr, mesg, a1, a2, a3, a4, a5, a6, a7, a8);
+  fprintf (stderr,"\n");
+  exit (1);
+}
+
+/* Parse a C escape sequence.  STRING_PTR points to a variable
+   containing a pointer to the string to parse.  That pointer
+   is updated past the characters we use.  The value of the
+   escape sequence is returned.
+
+   A negative value means the sequence \ newline was seen,
+   which is supposed to be equivalent to nothing at all.
+
+   If \ is followed by a null character, we return a negative
+   value and leave the string pointer pointing at the null character.
+
+   If \ is followed by 000, we return 0 and leave the string pointer
+   after the zeros.  A value of 0 does not mean end of string.  */
+
+static int
+parse_escape (string_ptr)
+     char **string_ptr;
+{
+  register int c = *(*string_ptr)++;
+  switch (c)
+    {
+    case 'a':
+      return '\a';
+    case 'b':
+      return '\b';
+    case 'e':
+      return 033;
+    case 'f':
+      return '\f';
+    case 'n':
+      return '\n';
+    case 'r':
+      return '\r';
+    case 't':
+      return '\t';
+    case 'v':
+      return '\v';
+    case '\n':
+      return -2;
+    case 0:
+      (*string_ptr)--;
+      return 0;
+    case '^':
+      c = *(*string_ptr)++;
+      if (c == '\\')
+	c = parse_escape (string_ptr);
+      if (c == '?')
+	return 0177;
+      return (c & 0200) | (c & 037);
+      
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+      {
+	register int i = c - '0';
+	register int count = 0;
+	while (++count < 3)
+	  {
+	    if ((c = *(*string_ptr)++) >= '0' && c <= '7')
+	      {
+		i *= 8;
+		i += c - '0';
+	      }
+	    else
+	      {
+		(*string_ptr)--;
+		break;
+	      }
+	  }
+	return i;
+      }
+    default:
+      return c;
+    }
+}