1 files changed, 101 insertions, 93 deletions
diff --git a/interpret.h b/interpret.h
index bb7cba96..8c9675bb 100644
--- a/interpret.h
+++ b/interpret.h
@@ -7,26 +7,35 @@
  * 
  * This file is part of GAWK, the GNU implementation of the
  * AWK Programming Language.
- * 
+ *
  * GAWK is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * GAWK is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
  */
 
+/*
+ * If "r" is a field, valref should normally be > 1, because the field is
+ * created initially with valref 1, and valref should be bumped when it is
+ * pushed onto the stack by Op_field_spec. On the other hand, if we are
+ * assigning to $n, then Op_store_field calls unref(*lhs) before assigning
+ * the new value, so that decrements valref. So if the RHS is a field with
+ * valref 1, that effectively means that this is an assignment like "$n = $n",
+ * so a no-op, other than triggering $0 reconstitution.
+ */
 #define UNFIELD(l, r) \
 { \
 	/* if was a field, turn it into a var */ \
-	if ((r->flags & FIELD) == 0 || r->valref == 1) { \
+	if ((r->flags & MALLOC) != 0 || r->valref == 1) { \
 		l = r; \
 	} else { \
 		l = dupnode(r); \
@@ -96,7 +105,7 @@ top:
 			/* avoid false source indications */
 			source = NULL;
 			sourceline = 0;
-			(void) nextfile(& curfile, true);	/* close input data file */ 
+			(void) nextfile(& curfile, true);	/* close input data file */
 			/*
 			 * This used to be:
 			 *
@@ -141,6 +150,7 @@ top:
 
 		case Op_push:
 		case Op_push_arg:
+		case Op_push_arg_untyped:
 		{
 			NODE *save_symbol;
 			bool isparam = false;
@@ -157,7 +167,7 @@ top:
 					m = m->orig_array;
 				}
 			}
-				
+
 			switch (m->type) {
 			case Node_var:
 				if (do_lint && var_uninitialized(m))
@@ -172,19 +182,23 @@ top:
 
 			case Node_var_new:
 uninitialized_scalar:
-				m->type = Node_var;
-				m->var_value = dupnode(Nnull_string);
+				if (op != Op_push_arg_untyped) {
+					/* convert untyped to scalar */
+					m->type = Node_var;
+					m->var_value = dupnode(Nnull_string);
+				}
 				if (do_lint)
 					lintwarn(isparam ?
 						_("reference to uninitialized argument `%s'") :
 						_("reference to uninitialized variable `%s'"),
 								save_symbol->vname);
-				m = dupnode(Nnull_string);
+				if (op != Op_push_arg_untyped)
+					m = dupnode(Nnull_string);
 				PUSH(m);
 				break;
 
 			case Node_var_array:
-				if (op == Op_push_arg)
+				if (op == Op_push_arg || op == Op_push_arg_untyped)
 					PUSH(m);
 				else
 					fatal(_("attempt to use array `%s' in a scalar context"),
@@ -195,7 +209,7 @@ uninitialized_scalar:
 				cant_happen();
 			}
 		}
-			break;	
+			break;
 
 		case Op_push_param:		/* function argument */
 			m = pc->memory;
@@ -233,7 +247,7 @@ uninitialized_scalar:
 			/* for FUNCTAB, get the name as the element value */
 			if (t1 == func_table) {
 				static bool warned = false;
-				
+
 				if (do_lint && ! warned) {
 					warned = true;
 					lintwarn(_("FUNCTAB is a gawk extension"));
@@ -251,7 +265,7 @@ uninitialized_scalar:
 			/* for SYMTAB, step through to the actual variable */
 			if (t1 == symbol_table) {
 				static bool warned = false;
-				
+
 				if (do_lint && ! warned) {
 					warned = true;
 					lintwarn(_("SYMTAB is a gawk extension"));
@@ -296,7 +310,7 @@ uninitialized_scalar:
 			t1 = POP_ARRAY();
 			if (do_lint && in_array(t1, t2) == NULL) {
 				t2 = force_string(t2);
-				if (pc->do_reference) 
+				if (pc->do_reference)
 					lintwarn(_("reference to uninitialized element `%s[\"%.*s\"]'"),
 						array_vname(t1), (int) t2->stlen, t2->stptr);
 				if (t2->stlen == 0)
@@ -349,12 +363,8 @@ uninitialized_scalar:
 			lhs = r_get_field(t1, (Func_ptr *) 0, true);
 			decr_sp();
 			DEREF(t1);
-			/* only for $0, up ref count */
-			if (*lhs == fields_arr[0]) {
-				r = *lhs;
-				UPREF(r);
-			} else
-				r = dupnode(*lhs);
+			r = *lhs;
+			UPREF(r);
 			PUSH(r);
 			break;
 
@@ -400,7 +410,7 @@ uninitialized_scalar:
 		case Op_jmp_true:
 			r = POP_SCALAR();
 			di = eval_condition(r);
-			DEREF(r);			
+			DEREF(r);
 			if (di)
 				JUMPTO(pc->target_jmp);
 			break;
@@ -436,37 +446,37 @@ uninitialized_scalar:
 			break;
 
 		case Op_equal:
-			r = node_Boolean[cmp_scalars() == 0];
+			r = node_Boolean[cmp_scalars(SCALAR_EQ_NEQ) == 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_notequal:
-			r = node_Boolean[cmp_scalars() != 0];
+			r = node_Boolean[cmp_scalars(SCALAR_EQ_NEQ) != 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_less:
-			r = node_Boolean[cmp_scalars() < 0];
+			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) < 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_greater:
-			r = node_Boolean[cmp_scalars() > 0];
+			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) > 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_leq:
-			r = node_Boolean[cmp_scalars() <= 0];
+			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) <= 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
 
 		case Op_geq:
-			r = node_Boolean[cmp_scalars() >= 0];
+			r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) >= 0];
 			UPREF(r);
 			REPLACE(r);
 			break;
@@ -491,7 +501,7 @@ plus:
 		case Op_minus:
 			t2 = POP_NUMBER();
 			x2 = t2->numbr;
-			DEREF(t2);			
+			DEREF(t2);
 minus:
 			t1 = TOP_NUMBER();
 			r = make_number(t1->numbr - x2);
@@ -541,7 +551,7 @@ quotient:
 			r = make_number(t1->numbr / x2);
 			DEREF(t1);
 			REPLACE(r);
-			break;		
+			break;
 
 		case Op_mod_i:
 			x2 = force_number(pc->memory)->numbr;
@@ -608,6 +618,11 @@ mod:
 			REPLACE(r);
 			break;
 
+		case Op_unary_plus:
+			// Force argument to be numeric
+			t1 = TOP_NUMBER();
+			break;
+
 		case Op_store_sub:
 			/*
 			 * array[sub] assignment optimization,
@@ -660,7 +675,7 @@ mod:
 			 * simple variable assignment optimization,
 			 * see awkgram.y (optimize_assignment)
 			 */
-	
+
 			lhs = get_lhs(pc->memory, false);
 			unref(*lhs);
 			r = pc->initval;	/* constant initializer */
@@ -703,37 +718,39 @@ mod:
 				*lhs = dupnode(t1);
 			}
 
-			if (t1 != t2 && t1->valref == 1 && (t1->flags & (MPFN|MPZN)) == 0) {
+			if (t1 != t2 && t1->valref == 1 && (t1->flags & (MALLOC|MPFN|MPZN)) == MALLOC) {
 				size_t nlen = t1->stlen + t2->stlen;
 
-				erealloc(t1->stptr, char *, nlen + 2, "r_interpret");
+				erealloc(t1->stptr, char *, nlen + 1, "r_interpret");
 				memcpy(t1->stptr + t1->stlen, t2->stptr, t2->stlen);
 				t1->stlen = nlen;
 				t1->stptr[nlen] = '\0';
-				t1->flags &= ~(NUMCUR|NUMBER|MAYBE_NUM|NUMINT|INTIND);
-				t1->flags |= (STRING|STRCUR);
-				t1->stfmt = -1;
+				/* clear flags except WSTRCUR (used below) */
+				t1->flags &= WSTRCUR;
+				/* configure as a string as in make_str_node */
+				t1->flags |= (MALLOC|STRING|STRCUR);
+				t1->stfmt = STFMT_UNUSED;
 
 				if ((t1->flags & WSTRCUR) != 0 && (t2->flags & WSTRCUR) != 0) {
 					size_t wlen = t1->wstlen + t2->wstlen;
 
 					erealloc(t1->wstptr, wchar_t *,
-							sizeof(wchar_t) * (wlen + 2), "r_interpret");
-					memcpy(t1->wstptr + t1->wstlen, t2->wstptr, t2->wstlen);
+							sizeof(wchar_t) * (wlen + 1), "r_interpret");
+					memcpy(t1->wstptr + t1->wstlen, t2->wstptr, t2->wstlen * sizeof(wchar_t));
 					t1->wstlen = wlen;
 					t1->wstptr[wlen] = L'\0';
-					t1->flags |= WSTRCUR;
 				} else
 					free_wstr(*lhs);
 			} else {
-				size_t nlen = t1->stlen + t2->stlen;  
+				size_t nlen = t1->stlen + t2->stlen;
 				char *p;
 
-				emalloc(p, char *, nlen + 2, "r_interpret");
+				emalloc(p, char *, nlen + 1, "r_interpret");
 				memcpy(p, t1->stptr, t1->stlen);
 				memcpy(p + t1->stlen, t2->stptr, t2->stlen);
+				/* N.B. No NUL-termination required, since make_str_node will do it. */
 				unref(*lhs);
-				t1 = *lhs = make_str_node(p, nlen, ALREADY_MALLOCED); 
+				t1 = *lhs = make_str_node(p, nlen, ALREADY_MALLOCED);
 			}
 			DEREF(t2);
 			break;
@@ -748,7 +765,7 @@ mod:
 			break;
 
 		case Op_subscript_assign:
-			/* conditionally execute post-assignment routine for an array element */ 
+			/* conditionally execute post-assignment routine for an array element */
 
 			if (set_idx != NULL) {
 				di = true;
@@ -825,12 +842,11 @@ mod:
 				t2 = TOP_SCALAR();	/* switch expression */
 				t2 = force_string(t2);
 				rp = re_update(m);
-				di = (research(rp, t2->stptr, 0, t2->stlen,
-							avoid_dfa(m, t2->stptr, t2->stlen)) >= 0);
+				di = (research(rp, t2->stptr, 0, t2->stlen, RE_NO_FLAGS) >= 0);
 			} else {
 				t1 = POP_SCALAR();	/* case value */
 				t2 = TOP_SCALAR();	/* switch expression */
-				di = (cmp_nodes(t2, t1) == 0);
+				di = (cmp_nodes(t2, t1, true) == 0);
 				DEREF(t1);
 			}
 
@@ -941,22 +957,30 @@ arrayfor:
 			break;
 
 		case Op_ext_builtin:
-		case Op_old_ext_builtin:
 		{
-			int arg_count = pc->expr_count;
+			size_t arg_count = pc->expr_count;
+			awk_ext_func_t *f = pc[1].c_func;
+			size_t min_req = f->min_required_args;
+			size_t max_expect = f->max_expected_args;
 			awk_value_t result;
 
+			if (arg_count < min_req)
+				fatal(_("%s: called with %lu arguments, expecting at least %lu"),
+						pc[1].func_name, arg_count, min_req);
+
+			if (do_lint && ! f->suppress_lint && arg_count > max_expect)
+				lintwarn(_("%s: called with %lu arguments, expecting no more than %lu"),
+						pc[1].func_name, arg_count, max_expect);
+
 			PUSH_CODE(pc);
-			if (op == Op_ext_builtin)
-				r = awk_value_to_node(pc->extfunc(arg_count, & result));
-			else
-				r = pc->builtin(arg_count);
+			r = awk_value_to_node(pc->extfunc(arg_count, & result, f));
 			(void) POP_CODE();
 			while (arg_count-- > 0) {
 				t1 = POP();
 				if (t1->type == Node_val)
 					DEREF(t1);
 			}
+			free_api_string_copies();
 			PUSH(r);
 		}
 			break;
@@ -984,29 +1008,19 @@ arrayfor:
 				r = POP_STRING();
 				unref(m->re_exp);
 				m->re_exp = r;
+			} else if (m->type == Node_val) {
+				assert((m->flags & REGEX) != 0);
+				UPREF(m);
 			}
 			PUSH(m);
 			break;
-			
+
 		case Op_match_rec:
 			m = pc->memory;
 			t1 = *get_field(0, (Func_ptr *) 0);
 match_re:
 			rp = re_update(m);
-			/*
-			 * Any place where research() is called with a last parameter of
-			 * zero, we need to use the avoid_dfa test. This appears here and
-			 * in the code for Op_K_case.
-			 *
-			 * A new or improved dfa that distinguishes beginning/end of
-			 * string from beginning/end of line will allow us to get rid of
-			 * this hack.
-			 *
-			 * The avoid_dfa() function is in re.c; it is not very smart.
-			 */
-
-			di = research(rp, t1->stptr, 0, t1->stlen,
-								avoid_dfa(m, t1->stptr, t1->stlen));
+			di = research(rp, t1->stptr, 0, t1->stlen, RE_NO_FLAGS);
 			di = (di == -1) ^ (op != Op_nomatch);
 			if (op != Op_match_rec) {
 				decr_sp();
@@ -1061,7 +1075,7 @@ match_re:
 			} else if (f->type == Node_builtin_func) {
 				int arg_count = (pc + 1)->expr_count;
 				builtin_func_t the_func = lookup_builtin(t1->stptr);
-				
+
 				assert(the_func != NULL);
 
 				/* call it */
@@ -1077,8 +1091,7 @@ match_re:
 				PUSH(r);
 				break;
 			} else if (f->type != Node_func) {
-				if (   f->type == Node_ext_func
-				    || f->type == Node_old_ext_func) {
+				if (f->type == Node_ext_func) {
 					/* code copied from below, keep in sync */
 					INSTRUCTION *bc;
 					char *fname = pc->func_name;
@@ -1089,20 +1102,17 @@ match_re:
 
 					bc = f->code_ptr;
 					assert(bc->opcode == Op_symbol);
-					if (f->type == Node_ext_func)
-						npc[0].opcode = Op_ext_builtin;	/* self modifying code */
-					else
-						npc[0].opcode = Op_old_ext_builtin;	/* self modifying code */
+					npc[0].opcode = Op_ext_builtin;	/* self modifying code */
 					npc[0].extfunc = bc->extfunc;
 					npc[0].expr_count = arg_count;		/* actual argument count */
 					npc[1] = pc[1];
 					npc[1].func_name = fname;	/* name of the builtin */
-					npc[1].expr_count = bc->expr_count;	/* defined max # of arguments */
-					ni = npc; 
+					npc[1].c_func = bc->c_func;
+					ni = npc;
 					JUMPTO(ni);
 				} else
 					fatal(_("function called indirectly through `%s' does not exist"),
-							pc->func_name);	
+							pc->func_name);
 			}
 			pc->func_body = f;     /* save for next call */
 
@@ -1118,12 +1128,12 @@ match_re:
 			f = pc->func_body;
 			if (f == NULL) {
 				f = lookup(pc->func_name);
-				if (f == NULL || (f->type != Node_func && f->type != Node_ext_func && f->type != Node_old_ext_func))
+				if (f == NULL || (f->type != Node_func && f->type != Node_ext_func))
 					fatal(_("function `%s' not defined"), pc->func_name);
 				pc->func_body = f;     /* save for next call */
 			}
 
-			if (f->type == Node_ext_func || f->type == Node_old_ext_func) {
+			if (f->type == Node_ext_func) {
 				/* keep in sync with indirect call code */
 				INSTRUCTION *bc;
 				char *fname = pc->func_name;
@@ -1131,15 +1141,12 @@ match_re:
 
 				bc = f->code_ptr;
 				assert(bc->opcode == Op_symbol);
-				if (f->type == Node_ext_func)
-					pc->opcode = Op_ext_builtin;	/* self modifying code */
-				else
-					pc->opcode = Op_old_ext_builtin;	/* self modifying code */
+				pc->opcode = Op_ext_builtin;	/* self modifying code */
 				pc->extfunc = bc->extfunc;
-				pc->expr_count = arg_count;		/* actual argument count */
+				pc->expr_count = arg_count;	/* actual argument count */
 				(pc + 1)->func_name = fname;	/* name of the builtin */
-				(pc + 1)->expr_count = bc->expr_count;	/* defined max # of arguments */
-				ni = pc; 
+				(pc + 1)->c_func = bc->c_func;	/* min and max args */
+				ni = pc;
 				JUMPTO(ni);
 			}
 
@@ -1151,7 +1158,7 @@ match_re:
 			m = POP_SCALAR();       /* return value */
 
 			ni = pop_fcall();
-	
+
 			/* put the return value back on stack */
 			PUSH(m);
 
@@ -1176,7 +1183,7 @@ match_re:
 
 					/* Save execution state so that we can return to it
 					 * from Op_after_beginfile or Op_after_endfile.
-					 */ 
+					 */
 
 					push_exec_state(pc, currule, source, stack_ptr);
 
@@ -1234,8 +1241,8 @@ match_re:
 				execute beginfile block */
 		}
 			break;
-			
-		case Op_get_record:		
+
+		case Op_get_record:
 		{
 			int errcode = 0;
 
@@ -1293,13 +1300,13 @@ match_re:
 					JUMPTO(ni);
 				} else {
 					/* do run ENDFILE block(s) first. */
-					
+
 					/* Execution state to return to in Op_after_endfile. */
 					push_exec_state(ni, currule, source, stack_ptr);
 
 					JUMPTO(pc->target_endfile);
-				}				
-			} /* else 
+				}
+			} /* else
 				Start over with the first rule. */
 
 			/* empty the run-time stack to avoid memory leak */
@@ -1390,7 +1397,7 @@ match_re:
 				/* not already triggered and left expression is true */
 				decr_sp();
 				ip->triggered = true;
-				JUMPTO(ip->target_jmp);	/* evaluate right expression */ 
+				JUMPTO(ip->target_jmp);	/* evaluate right expression */
 			}
 
 			result = ip->triggered || di;
@@ -1416,6 +1423,7 @@ match_re:
 		case Op_K_if:
 		case Op_K_else:
 		case Op_cond_exp:
+		case Op_comment:
 			break;
 
 		default: