aboutsummaryrefslogtreecommitdiffstats
path: root/awkgram.c
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2011-07-15 15:35:34 +0300
committerArnold D. Robbins <arnold@skeeve.com>2011-07-15 15:35:34 +0300
commit46f2db24d9e7f792f60149f5ee89ef4f22e3f4a9 (patch)
tree4522982b751d8643ee05022e60bdac757dfd0956 /awkgram.c
parent84658669a180b3f1e63d20b6ea166f7c5733786b (diff)
downloadegawk-46f2db24d9e7f792f60149f5ee89ef4f22e3f4a9.tar.gz
egawk-46f2db24d9e7f792f60149f5ee89ef4f22e3f4a9.tar.bz2
egawk-46f2db24d9e7f792f60149f5ee89ef4f22e3f4a9.zip
Fix gsub losing white space when working on fields.
Diffstat (limited to 'awkgram.c')
-rw-r--r--awkgram.c153
1 files changed, 83 insertions, 70 deletions
diff --git a/awkgram.c b/awkgram.c
index 4edec572..876821d5 100644
--- a/awkgram.c
+++ b/awkgram.c
@@ -4485,6 +4485,7 @@ struct token {
# define RESX 0x0800 /* Bell Labs Research extension */
# define BREAK 0x1000 /* break allowed inside */
# define CONTINUE 0x2000 /* continue allowed inside */
+
NODE *(*ptr)(int); /* function that implements this keyword */
};
@@ -4542,9 +4543,9 @@ static const struct token tokentab[] = {
{"for", Op_K_for, LEX_FOR, BREAK|CONTINUE, 0},
{"func", Op_func, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
{"function",Op_func, LEX_FUNCTION, NOT_OLD, 0},
-{"gensub", Op_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub},
+{"gensub", Op_sub_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), 0},
{"getline", Op_K_getline_redir, LEX_GETLINE, NOT_OLD, 0},
-{"gsub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
+{"gsub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0},
{"if", Op_K_if, LEX_IF, 0, 0},
{"in", Op_symbol, LEX_IN, 0, 0},
{"include", Op_symbol, LEX_INCLUDE, GAWKX, 0},
@@ -4575,7 +4576,7 @@ static const struct token tokentab[] = {
#endif
{"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime},
{"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum},
-{"sub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
+{"sub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0},
{"substr", Op_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
{"switch", Op_K_switch, LEX_SWITCH, GAWKX|BREAK, 0},
{"system", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
@@ -6286,8 +6287,6 @@ snode(INSTRUCTION *subn, INSTRUCTION *r)
assert(nexp > 0);
}
- r->builtin = tokentab[idx].ptr;
-
/* check against how many args. are allowed for this builtin */
args_allowed = tokentab[idx].flags & ARGS;
if (args_allowed && (args_allowed & A(nexp)) == 0) {
@@ -6296,7 +6295,86 @@ snode(INSTRUCTION *subn, INSTRUCTION *r)
return NULL;
}
+ /* special processing for sub, gsub and gensub */
+
+ if (tokentab[idx].value == Op_sub_builtin) {
+ const char *operator = tokentab[idx].operator;
+
+ r->sub_flags = 0;
+
+ arg = subn->nexti; /* first arg list */
+ (void) mk_rexp(arg);
+
+ if (strcmp(operator, "gensub") != 0) {
+ /* sub and gsub */
+
+ if (strcmp(operator, "gsub") == 0)
+ r->sub_flags |= GSUB;
+
+ arg = arg->lasti->nexti; /* 2nd arg list */
+ if (nexp == 2) {
+ INSTRUCTION *expr;
+
+ expr = list_create(instruction(Op_push_i));
+ expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
+ (void) mk_expression_list(subn,
+ list_append(expr, instruction(Op_field_spec)));
+ }
+
+ arg = arg->lasti->nexti; /* third arg list */
+ ip = arg->lasti;
+ if (ip->opcode == Op_push_i) {
+ if (do_lint)
+ lintwarn(_("%s: string literal as last arg of substitute has no effect"),
+ operator);
+ r->sub_flags |= LITERAL;
+ } else {
+ if (make_assignable(ip) == NULL)
+ yyerror(_("%s third parameter is not a changeable object"),
+ operator);
+ else
+ ip->do_reference = TRUE;
+ }
+
+ r->expr_count = count_expressions(&subn, FALSE);
+ ip = subn->lasti;
+
+ (void) list_append(subn, r);
+
+ /* add after_assign code */
+ if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) {
+ (void) list_append(subn, instruction(Op_var_assign));
+ subn->lasti->memory = ip->memory;
+ subn->lasti->assign_var = ip->memory->var_assign;
+ r->sub_flags |= AFTER_ASSIGN;
+ } else if (ip->opcode == Op_field_spec_lhs) {
+ (void) list_append(subn, instruction(Op_field_assign));
+ subn->lasti->field_assign = (Func_ptr) 0;
+ ip->target_assign = subn->lasti;
+ r->sub_flags |= AFTER_ASSIGN;
+ }
+ return subn;
+
+ } else {
+ /* gensub */
+
+ r->sub_flags |= GENSUB;
+ if (nexp == 3) {
+ ip = instruction(Op_push_i);
+ ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
+ (void) mk_expression_list(subn,
+ list_append(list_create(ip), instruction(Op_field_spec)));
+ }
+
+ r->expr_count = count_expressions(&subn, FALSE);
+ return list_append(subn, r);
+ }
+ }
+
+ r->builtin = tokentab[idx].ptr;
+
/* special case processing for a few builtins */
+
if (r->builtin == do_length) {
if (nexp == 0) {
/* no args. Use $0 */
@@ -6338,71 +6416,6 @@ snode(INSTRUCTION *subn, INSTRUCTION *r)
if (/*ip == arg->nexti && */ ip->opcode == Op_push)
ip->opcode = Op_push_array;
}
- } else if (r->builtin == do_sub || r->builtin == do_gsub) {
- int literal = FALSE;
-
- arg = subn->nexti; /* first arg list */
- (void) mk_rexp(arg);
-
- arg = arg->lasti->nexti; /* 2nd arg list */
- if (nexp == 2) {
- INSTRUCTION *expr;
- expr = list_create(instruction(Op_push_i));
- expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
- (void) mk_expression_list(subn,
- list_append(expr, instruction(Op_field_spec)));
- }
-
- arg = arg->lasti->nexti; /* third arg list */
- ip = arg->lasti;
- if (ip->opcode == Op_push_i) {
- if (do_lint)
- lintwarn(_("%s: string literal as last arg of substitute has no effect"),
- (r->builtin == do_sub) ? "sub" : "gsub");
- literal = TRUE;
- } else {
- if (make_assignable(ip) == NULL)
- yyerror(_("%s third parameter is not a changeable object"),
- (r->builtin == do_sub) ? "sub" : "gsub");
- else
- ip->do_reference = TRUE;
- }
-
- /* kludge: This is one of the few cases
- * when we need to know the type of item on stack.
- * In case of string literal as the last argument,
- * pass 4 as # of args (See sub_common code in builtin.c).
- * Other cases like length(array or scalar) seem
- * to work out ok.
- */
-
- r->expr_count = count_expressions(&subn, FALSE) + !!literal;
- ip = subn->lasti;
-
- (void) list_append(subn, r);
-
- /* add after_assign bytecode(s) */
- if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) {
- (void) list_append(subn, instruction(Op_var_assign));
- subn->lasti->memory = ip->memory;
- subn->lasti->assign_var = ip->memory->var_assign;
- } else if (ip->opcode == Op_field_spec_lhs) {
- (void) list_append(subn, instruction(Op_field_assign));
- subn->lasti->field_assign = (Func_ptr) 0;
- ip->target_assign = subn->lasti;
- }
- return subn;
- } else if (r->builtin == do_gensub) {
- if (nexp == 3) {
- arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */
- ip = instruction(Op_push_i);
- ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER));
- (void) mk_expression_list(subn,
- list_append(list_create(ip),
- instruction(Op_field_spec)));
- }
- arg = subn->nexti; /* first arg list */
- (void) mk_rexp(arg);
} else if (r->builtin == do_split) {
arg = subn->nexti->lasti->nexti; /* 2nd arg list */
ip = arg->lasti;