diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2011-07-15 15:35:34 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2011-07-15 15:35:34 +0300 |
commit | 46f2db24d9e7f792f60149f5ee89ef4f22e3f4a9 (patch) | |
tree | 4522982b751d8643ee05022e60bdac757dfd0956 | |
parent | 84658669a180b3f1e63d20b6ea166f7c5733786b (diff) | |
download | egawk-46f2db24d9e7f792f60149f5ee89ef4f22e3f4a9.tar.gz egawk-46f2db24d9e7f792f60149f5ee89ef4f22e3f4a9.tar.bz2 egawk-46f2db24d9e7f792f60149f5ee89ef4f22e3f4a9.zip |
Fix gsub losing white space when working on fields.
-rw-r--r-- | ChangeLog | 18 | ||||
-rw-r--r-- | awk.h | 15 | ||||
-rw-r--r-- | awkgram.c | 153 | ||||
-rw-r--r-- | awkgram.y | 153 | ||||
-rw-r--r-- | builtin.c | 207 | ||||
-rw-r--r-- | debug.c | 32 | ||||
-rw-r--r-- | eval.c | 36 | ||||
-rw-r--r-- | profile.c | 14 | ||||
-rw-r--r-- | test/ChangeLog | 5 | ||||
-rw-r--r-- | test/Makefile.am | 4 | ||||
-rw-r--r-- | test/Makefile.in | 9 | ||||
-rw-r--r-- | test/Maketests | 5 |
12 files changed, 388 insertions, 263 deletions
@@ -1,3 +1,21 @@ +2011-07-15 John Haque <j.eh@mchsi.com> + + * awk.h (Op_sub_builtin): New opcode. + (GSUB, GENSUB, AFTER_ASSIGN, LITERAL): New flags for + Op_sub_builtin. + * awkgram.y (struct tokentab): Change opcode to Op_sub_builtin + for sub, gsub and gensub. + (snode): Update processing of sub, gsub and gensub. + * builtin.c (do_sub, do_gsub, do_gensub): Nuke. + (sub_common): Renamed to do_sub. Relocate gensub argument + handling code from do_gensub to here; Simplify the code a + little bit. + * eval.c (r_interpret): Handle Op_sub_builtin. Avoid field + re-splitting or $0 rebuilding if (g)sub target string is + a field and no substitutions were done. + * pprint (profile.c): Add case for the new opcode. + * print_instruction (debug.c): Ditto. + 2011-07-15 Arnold D. Robbins <arnold@skeeve.com> * awk.h: Typo fix: "loner" --> longer. Thanks to Nelson Beebe. @@ -521,6 +521,7 @@ typedef enum opcodeval { Op_K_nextfile, Op_builtin, + Op_sub_builtin, /* sub, gsub and gensub */ Op_in_array, /* boolean test of membership in array */ /* function call instruction */ @@ -626,6 +627,16 @@ typedef struct exp_instruction { #define target_jmp d.di #define target_break x.xi +/* Op_sub_builtin */ +#define sub_flags d.dl +#define GSUB 0x01 /* builtin is gsub */ +#define GENSUB 0x02 /* builtin is gensub */ +#define AFTER_ASSIGN 0x04 /* (g)sub target is a field or a special var with + * set_XX routine. + */ +#define LITERAL 0x08 /* target is a literal string */ + + /* Op_K_exit */ #define target_end d.di #define target_atexit x.xi @@ -1181,9 +1192,7 @@ extern NODE *do_cos(int nargs); extern NODE *do_rand(int nargs); extern NODE *do_srand(int nargs); extern NODE *do_match(int nargs); -extern NODE *do_gsub(int nargs); -extern NODE *do_sub(int nargs); -extern NODE *do_gensub(int nargs); +extern NODE *do_sub(int nargs, unsigned int flags, int *num_matches); extern NODE *format_tree(const char *, size_t, NODE **, long); extern NODE *do_lshift(int nargs); extern NODE *do_rshift(int nargs); @@ -4485,6 +4485,7 @@ struct token { # define RESX 0x0800 /* Bell Labs Research extension */ # define BREAK 0x1000 /* break allowed inside */ # define CONTINUE 0x2000 /* continue allowed inside */ + NODE *(*ptr)(int); /* function that implements this keyword */ }; @@ -4542,9 +4543,9 @@ static const struct token tokentab[] = { {"for", Op_K_for, LEX_FOR, BREAK|CONTINUE, 0}, {"func", Op_func, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0}, {"function",Op_func, LEX_FUNCTION, NOT_OLD, 0}, -{"gensub", Op_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub}, +{"gensub", Op_sub_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), 0}, {"getline", Op_K_getline_redir, LEX_GETLINE, NOT_OLD, 0}, -{"gsub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub}, +{"gsub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0}, {"if", Op_K_if, LEX_IF, 0, 0}, {"in", Op_symbol, LEX_IN, 0, 0}, {"include", Op_symbol, LEX_INCLUDE, GAWKX, 0}, @@ -4575,7 +4576,7 @@ static const struct token tokentab[] = { #endif {"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime}, {"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum}, -{"sub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub}, +{"sub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0}, {"substr", Op_builtin, LEX_BUILTIN, A(2)|A(3), do_substr}, {"switch", Op_K_switch, LEX_SWITCH, GAWKX|BREAK, 0}, {"system", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system}, @@ -6286,8 +6287,6 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) assert(nexp > 0); } - r->builtin = tokentab[idx].ptr; - /* check against how many args. are allowed for this builtin */ args_allowed = tokentab[idx].flags & ARGS; if (args_allowed && (args_allowed & A(nexp)) == 0) { @@ -6296,7 +6295,86 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) return NULL; } + /* special processing for sub, gsub and gensub */ + + if (tokentab[idx].value == Op_sub_builtin) { + const char *operator = tokentab[idx].operator; + + r->sub_flags = 0; + + arg = subn->nexti; /* first arg list */ + (void) mk_rexp(arg); + + if (strcmp(operator, "gensub") != 0) { + /* sub and gsub */ + + if (strcmp(operator, "gsub") == 0) + r->sub_flags |= GSUB; + + arg = arg->lasti->nexti; /* 2nd arg list */ + if (nexp == 2) { + INSTRUCTION *expr; + + expr = list_create(instruction(Op_push_i)); + expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); + (void) mk_expression_list(subn, + list_append(expr, instruction(Op_field_spec))); + } + + arg = arg->lasti->nexti; /* third arg list */ + ip = arg->lasti; + if (ip->opcode == Op_push_i) { + if (do_lint) + lintwarn(_("%s: string literal as last arg of substitute has no effect"), + operator); + r->sub_flags |= LITERAL; + } else { + if (make_assignable(ip) == NULL) + yyerror(_("%s third parameter is not a changeable object"), + operator); + else + ip->do_reference = TRUE; + } + + r->expr_count = count_expressions(&subn, FALSE); + ip = subn->lasti; + + (void) list_append(subn, r); + + /* add after_assign code */ + if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) { + (void) list_append(subn, instruction(Op_var_assign)); + subn->lasti->memory = ip->memory; + subn->lasti->assign_var = ip->memory->var_assign; + r->sub_flags |= AFTER_ASSIGN; + } else if (ip->opcode == Op_field_spec_lhs) { + (void) list_append(subn, instruction(Op_field_assign)); + subn->lasti->field_assign = (Func_ptr) 0; + ip->target_assign = subn->lasti; + r->sub_flags |= AFTER_ASSIGN; + } + return subn; + + } else { + /* gensub */ + + r->sub_flags |= GENSUB; + if (nexp == 3) { + ip = instruction(Op_push_i); + ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); + (void) mk_expression_list(subn, + list_append(list_create(ip), instruction(Op_field_spec))); + } + + r->expr_count = count_expressions(&subn, FALSE); + return list_append(subn, r); + } + } + + r->builtin = tokentab[idx].ptr; + /* special case processing for a few builtins */ + if (r->builtin == do_length) { if (nexp == 0) { /* no args. Use $0 */ @@ -6338,71 +6416,6 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) if (/*ip == arg->nexti && */ ip->opcode == Op_push) ip->opcode = Op_push_array; } - } else if (r->builtin == do_sub || r->builtin == do_gsub) { - int literal = FALSE; - - arg = subn->nexti; /* first arg list */ - (void) mk_rexp(arg); - - arg = arg->lasti->nexti; /* 2nd arg list */ - if (nexp == 2) { - INSTRUCTION *expr; - expr = list_create(instruction(Op_push_i)); - expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); - (void) mk_expression_list(subn, - list_append(expr, instruction(Op_field_spec))); - } - - arg = arg->lasti->nexti; /* third arg list */ - ip = arg->lasti; - if (ip->opcode == Op_push_i) { - if (do_lint) - lintwarn(_("%s: string literal as last arg of substitute has no effect"), - (r->builtin == do_sub) ? "sub" : "gsub"); - literal = TRUE; - } else { - if (make_assignable(ip) == NULL) - yyerror(_("%s third parameter is not a changeable object"), - (r->builtin == do_sub) ? "sub" : "gsub"); - else - ip->do_reference = TRUE; - } - - /* kludge: This is one of the few cases - * when we need to know the type of item on stack. - * In case of string literal as the last argument, - * pass 4 as # of args (See sub_common code in builtin.c). - * Other cases like length(array or scalar) seem - * to work out ok. - */ - - r->expr_count = count_expressions(&subn, FALSE) + !!literal; - ip = subn->lasti; - - (void) list_append(subn, r); - - /* add after_assign bytecode(s) */ - if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) { - (void) list_append(subn, instruction(Op_var_assign)); - subn->lasti->memory = ip->memory; - subn->lasti->assign_var = ip->memory->var_assign; - } else if (ip->opcode == Op_field_spec_lhs) { - (void) list_append(subn, instruction(Op_field_assign)); - subn->lasti->field_assign = (Func_ptr) 0; - ip->target_assign = subn->lasti; - } - return subn; - } else if (r->builtin == do_gensub) { - if (nexp == 3) { - arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */ - ip = instruction(Op_push_i); - ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); - (void) mk_expression_list(subn, - list_append(list_create(ip), - instruction(Op_field_spec))); - } - arg = subn->nexti; /* first arg list */ - (void) mk_rexp(arg); } else if (r->builtin == do_split) { arg = subn->nexti->lasti->nexti; /* 2nd arg list */ ip = arg->lasti; @@ -1795,6 +1795,7 @@ struct token { # define RESX 0x0800 /* Bell Labs Research extension */ # define BREAK 0x1000 /* break allowed inside */ # define CONTINUE 0x2000 /* continue allowed inside */ + NODE *(*ptr)(int); /* function that implements this keyword */ }; @@ -1852,9 +1853,9 @@ static const struct token tokentab[] = { {"for", Op_K_for, LEX_FOR, BREAK|CONTINUE, 0}, {"func", Op_func, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0}, {"function",Op_func, LEX_FUNCTION, NOT_OLD, 0}, -{"gensub", Op_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub}, +{"gensub", Op_sub_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), 0}, {"getline", Op_K_getline_redir, LEX_GETLINE, NOT_OLD, 0}, -{"gsub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub}, +{"gsub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0}, {"if", Op_K_if, LEX_IF, 0, 0}, {"in", Op_symbol, LEX_IN, 0, 0}, {"include", Op_symbol, LEX_INCLUDE, GAWKX, 0}, @@ -1885,7 +1886,7 @@ static const struct token tokentab[] = { #endif {"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime}, {"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum}, -{"sub", Op_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub}, +{"sub", Op_sub_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), 0}, {"substr", Op_builtin, LEX_BUILTIN, A(2)|A(3), do_substr}, {"switch", Op_K_switch, LEX_SWITCH, GAWKX|BREAK, 0}, {"system", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system}, @@ -3596,8 +3597,6 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) assert(nexp > 0); } - r->builtin = tokentab[idx].ptr; - /* check against how many args. are allowed for this builtin */ args_allowed = tokentab[idx].flags & ARGS; if (args_allowed && (args_allowed & A(nexp)) == 0) { @@ -3606,7 +3605,86 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) return NULL; } + /* special processing for sub, gsub and gensub */ + + if (tokentab[idx].value == Op_sub_builtin) { + const char *operator = tokentab[idx].operator; + + r->sub_flags = 0; + + arg = subn->nexti; /* first arg list */ + (void) mk_rexp(arg); + + if (strcmp(operator, "gensub") != 0) { + /* sub and gsub */ + + if (strcmp(operator, "gsub") == 0) + r->sub_flags |= GSUB; + + arg = arg->lasti->nexti; /* 2nd arg list */ + if (nexp == 2) { + INSTRUCTION *expr; + + expr = list_create(instruction(Op_push_i)); + expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); + (void) mk_expression_list(subn, + list_append(expr, instruction(Op_field_spec))); + } + + arg = arg->lasti->nexti; /* third arg list */ + ip = arg->lasti; + if (ip->opcode == Op_push_i) { + if (do_lint) + lintwarn(_("%s: string literal as last arg of substitute has no effect"), + operator); + r->sub_flags |= LITERAL; + } else { + if (make_assignable(ip) == NULL) + yyerror(_("%s third parameter is not a changeable object"), + operator); + else + ip->do_reference = TRUE; + } + + r->expr_count = count_expressions(&subn, FALSE); + ip = subn->lasti; + + (void) list_append(subn, r); + + /* add after_assign code */ + if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) { + (void) list_append(subn, instruction(Op_var_assign)); + subn->lasti->memory = ip->memory; + subn->lasti->assign_var = ip->memory->var_assign; + r->sub_flags |= AFTER_ASSIGN; + } else if (ip->opcode == Op_field_spec_lhs) { + (void) list_append(subn, instruction(Op_field_assign)); + subn->lasti->field_assign = (Func_ptr) 0; + ip->target_assign = subn->lasti; + r->sub_flags |= AFTER_ASSIGN; + } + return subn; + + } else { + /* gensub */ + + r->sub_flags |= GENSUB; + if (nexp == 3) { + ip = instruction(Op_push_i); + ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); + (void) mk_expression_list(subn, + list_append(list_create(ip), instruction(Op_field_spec))); + } + + r->expr_count = count_expressions(&subn, FALSE); + return list_append(subn, r); + } + } + + r->builtin = tokentab[idx].ptr; + /* special case processing for a few builtins */ + if (r->builtin == do_length) { if (nexp == 0) { /* no args. Use $0 */ @@ -3648,71 +3726,6 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) if (/*ip == arg->nexti && */ ip->opcode == Op_push) ip->opcode = Op_push_array; } - } else if (r->builtin == do_sub || r->builtin == do_gsub) { - int literal = FALSE; - - arg = subn->nexti; /* first arg list */ - (void) mk_rexp(arg); - - arg = arg->lasti->nexti; /* 2nd arg list */ - if (nexp == 2) { - INSTRUCTION *expr; - expr = list_create(instruction(Op_push_i)); - expr->nexti->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); - (void) mk_expression_list(subn, - list_append(expr, instruction(Op_field_spec))); - } - - arg = arg->lasti->nexti; /* third arg list */ - ip = arg->lasti; - if (ip->opcode == Op_push_i) { - if (do_lint) - lintwarn(_("%s: string literal as last arg of substitute has no effect"), - (r->builtin == do_sub) ? "sub" : "gsub"); - literal = TRUE; - } else { - if (make_assignable(ip) == NULL) - yyerror(_("%s third parameter is not a changeable object"), - (r->builtin == do_sub) ? "sub" : "gsub"); - else - ip->do_reference = TRUE; - } - - /* kludge: This is one of the few cases - * when we need to know the type of item on stack. - * In case of string literal as the last argument, - * pass 4 as # of args (See sub_common code in builtin.c). - * Other cases like length(array or scalar) seem - * to work out ok. - */ - - r->expr_count = count_expressions(&subn, FALSE) + !!literal; - ip = subn->lasti; - - (void) list_append(subn, r); - - /* add after_assign bytecode(s) */ - if (ip->opcode == Op_push_lhs && ip->memory->type == Node_var && ip->memory->var_assign) { - (void) list_append(subn, instruction(Op_var_assign)); - subn->lasti->memory = ip->memory; - subn->lasti->assign_var = ip->memory->var_assign; - } else if (ip->opcode == Op_field_spec_lhs) { - (void) list_append(subn, instruction(Op_field_assign)); - subn->lasti->field_assign = (Func_ptr) 0; - ip->target_assign = subn->lasti; - } - return subn; - } else if (r->builtin == do_gensub) { - if (nexp == 3) { - arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */ - ip = instruction(Op_push_i); - ip->memory = mk_number((AWKNUM) 0.0, (PERM|NUMCUR|NUMBER)); - (void) mk_expression_list(subn, - list_append(list_create(ip), - instruction(Op_field_spec))); - } - arg = subn->nexti; /* first arg list */ - (void) mk_rexp(arg); } else if (r->builtin == do_split) { arg = subn->nexti->lasti->nexti; /* 2nd arg list */ ip = arg->lasti; @@ -72,7 +72,6 @@ extern NODE **fields_arr; extern int output_is_tty; extern FILE *output_fp; -static NODE *sub_common(int nargs, long how_many, int backdigs); #define POP_TWO_SCALARS(s1, s2) \ s2 = POP_SCALAR(); \ @@ -2319,7 +2318,7 @@ do_match(int nargs) return make_number((AWKNUM) rstart); } -/* sub_common --- the common code (does the work) for sub, gsub, and gensub */ +/* do_sub --- do the work for sub, gsub, and gensub */ /* * Gsub can be tricksy; particularly when handling the case of null strings. @@ -2412,12 +2411,12 @@ do_match(int nargs) * NB: `howmany' conflicts with a SunOS 4.x macro in <sys/param.h>. */ -static NODE * -sub_common(int nargs, long how_many, int backdigs) +NODE * +do_sub(int nargs, unsigned int flags, int *num_matches) { char *scan; char *bp, *cp; - char *buf; + char *buf = NULL; size_t buflen; char *matchend; size_t len; @@ -2434,38 +2433,77 @@ sub_common(int nargs, long how_many, int backdigs) NODE *s; /* subst. pattern */ NODE *t; /* string to make sub. in; $0 if none given */ NODE *tmp; - NODE **lhs; - int global = (how_many == -1); + NODE **lhs = NULL; + long how_many = 1; /* one substitution for sub, also gensub default */ + int global; long current; int lastmatchnonzero; char *mb_indices = NULL; - - tmp = PEEK(2); /* take care of regexp early, in case re_update is fatal */ - rp = re_update(tmp); - /* original string */ - if (nargs == 4) { /* kludge: no of items on stack is really 3, - * See snode(..) in awkgram.y - */ - lhs = NULL; - t = POP_STRING(); + if ((flags & GENSUB) != 0) { + double d; + NODE *t1; + + tmp = PEEK(3); + rp = re_update(tmp); + + t = POP_STRING(); /* original string */ + + t1 = POP_SCALAR(); /* value of global flag */ + if ((t1->flags & (STRCUR|STRING)) != 0) { + if (t1->stlen > 0 && (t1->stptr[0] == 'g' || t1->stptr[0] == 'G')) + how_many = -1; + else { + d = force_number(t1); + + if ((t1->flags & NUMCUR) != 0) + goto set_how_many; + + how_many = 1; + } + } else { + d = force_number(t1); +set_how_many: + if (d < 1) + how_many = 1; + else if (d < LONG_MAX) + how_many = d; + else + how_many = LONG_MAX; + if (d == 0) + warning(_("gensub: third argument of 0 treated as 1")); + } + DEREF(t1); + } else { - lhs = POP_ADDRESS(); - t = force_string(*lhs); + + /* take care of regexp early, in case re_update is fatal */ + + tmp = PEEK(2); + rp = re_update(tmp); + + if ((flags & GSUB) != 0) + how_many = -1; + + /* original string */ + + if ((flags & LITERAL) != 0) + t = POP_STRING(); + else { + lhs = POP_ADDRESS(); + t = force_string(*lhs); + } } + global = (how_many == -1); - s = POP_STRING(); /* replacement text */ + s = POP_STRING(); /* replacement text */ decr_sp(); /* regexp, already updated above */ /* do the search early to avoid work on non-match */ if (research(rp, t->stptr, 0, t->stlen, RE_NEED_START) == -1 || - RESTART(rp, t->stptr) > t->stlen) { - if (lhs == NULL) - DEREF(t); - DEREF(s); - return make_number((AWKNUM) 0.0); - } + RESTART(rp, t->stptr) > t->stlen) + goto done; t->flags |= STRING; @@ -2476,7 +2514,7 @@ sub_common(int nargs, long how_many, int backdigs) repl = s->stptr; replend = repl + s->stlen; repllen = replend - repl; - emalloc(buf, char *, buflen + 2, "sub_common"); + emalloc(buf, char *, buflen + 2, "do_sub"); buf[buflen] = '\0'; buf[buflen + 1] = '\0'; ampersands = 0; @@ -2490,7 +2528,7 @@ sub_common(int nargs, long how_many, int backdigs) * for example. */ if (gawk_mb_cur_max > 1 && repllen > 0) { - emalloc(mb_indices, char *, repllen * sizeof(char), "sub_common"); + emalloc(mb_indices, char *, repllen * sizeof(char), "do_sub"); index_multibyte_buffer(repl, mb_indices, repllen); } @@ -2500,7 +2538,7 @@ sub_common(int nargs, long how_many, int backdigs) repllen--; ampersands++; } else if (*scan == '\\') { - if (backdigs) { /* gensub, behave sanely */ + if (flags & GENSUB) { /* gensub, behave sanely */ if (isdigit((unsigned char) scan[1])) { ampersands++; scan++; @@ -2575,7 +2613,7 @@ sub_common(int nargs, long how_many, int backdigs) && (gawk_mb_cur_max == 1 || (repllen > 0 && mb_indices[scan - repl] == 1)) ) { - if (backdigs) { /* gensub, behave sanely */ + if (flags & GENSUB) { /* gensub, behave sanely */ if (isdigit((unsigned char) scan[1])) { int dig = scan[1] - '0'; if (dig < NUMSUBPATS(rp, t->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) { @@ -2619,7 +2657,7 @@ sub_common(int nargs, long how_many, int backdigs) textlen = text + textlen - matchend; text = matchend; - if ((current >= how_many && !global) + if ((current >= how_many && ! global) || ((long) textlen <= 0 && matchstart == matchend) || research(rp, t->stptr, text - t->stptr, textlen, RE_NEED_START) == -1) break; @@ -2628,7 +2666,7 @@ sub_common(int nargs, long how_many, int backdigs) sofar = bp - buf; if (buflen - sofar - textlen - 1) { buflen = sofar + textlen + 2; - erealloc(buf, char *, buflen, "sub_common"); + erealloc(buf, char *, buflen, "do_sub"); bp = buf + sofar; } for (scan = matchend; scan < text + textlen; scan++) @@ -2636,102 +2674,39 @@ sub_common(int nargs, long how_many, int backdigs) *bp = '\0'; textlen = bp - buf; - DEREF(s); - - if (lhs != NULL) { - if (matches > 0) { - unref(*lhs); - *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED); - } else - efree(buf); - } else { - efree(buf); - DEREF(t); - } - if (mb_indices != NULL) efree(mb_indices); - return make_number((AWKNUM) matches); -} - -/* do_gsub --- global substitution */ - -NODE * -do_gsub(int nargs) -{ - return sub_common(nargs, -1, FALSE); -} - -/* do_sub --- single substitution */ - -NODE * -do_sub(int nargs) -{ - return sub_common(nargs, 1, FALSE); -} - -/* do_gensub --- fix up the tree for sub_common for the gensub function */ - -NODE * -do_gensub(int nargs) -{ - NODE *t, *tmp, *target, *ret; - long how_many = 1; /* default is one substitution */ - double d; - - tmp = POP_STRING(); /* target */ - t = POP_SCALAR(); /* value of global flag */ - - /* - * We make copy of the original target string, and pass that - * in to sub_common() as the target to make the substitution in. - * We will then return the result string as the return value of - * this function. - */ - - target = make_string(tmp->stptr, tmp->stlen); - DEREF(tmp); - PUSH_ADDRESS(& target); - - if ((t->flags & (STRCUR|STRING)) != 0) { - if (t->stlen > 0 && (t->stptr[0] == 'g' || t->stptr[0] == 'G')) - how_many = -1; - else { - d = force_number(t); +done: + DEREF(s); - if ((t->flags & NUMCUR) != 0) - goto set_how_many; + *num_matches = matches; + if ((matches == 0 || (flags & LITERAL) != 0) && buf != NULL) + efree(buf); - how_many = 1; + if (flags & GENSUB) { + if (matches > 0) { + /* return the result string */ + DEREF(t); + return make_str_node(buf, textlen, ALREADY_MALLOCED); } - } else { - d = force_number(t); -set_how_many: - if (d < 1) - how_many = 1; - else if (d < LONG_MAX) - how_many = d; - else - how_many = LONG_MAX; - if (d == 0) - warning(_("gensub: third argument of 0 treated as 1")); - } - - DEREF(t); - ret = sub_common(3, how_many, TRUE); - unref(ret); + /* return the original string */ + return t; + } - /* - * Note that we don't care what sub_common() returns, since the - * easiest thing for the programmer is to return the string, even - * if no substitutions were done. - */ + /* For a string literal, must not change the original string. */ + if (flags & LITERAL) + DEREF(t); + else if (matches > 0) { + unref(*lhs); + *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED); + } - return target; + return make_number((AWKNUM) matches); } + /* make_integer - Convert an integer to a number node. */ static NODE * @@ -3740,7 +3740,16 @@ print_instruction(INSTRUCTION *pc, Func_print print_func, FILE *fp, int in_dump) break; case Op_var_assign: - print_func(fp, "[set_%s]\n", pc->memory->vname); + if (pc->assign_var) + print_func(fp, "[set_%s()]", pc->memory->vname); + print_func(fp, "\n"); + break; + + case Op_field_assign: + if (pc->field_assign) + print_func(fp, "[%s]", pc->field_assign == reset_record ? + "reset_record()" : "invalidate_field0()"); + print_func(fp, "\n"); break; case Op_field_spec_lhs: @@ -3830,6 +3839,27 @@ print_instruction(INSTRUCTION *pc, Func_print print_func, FILE *fp, int in_dump) pc->line_range, pc->target_jmp); break; + case Op_sub_builtin: + { + const char *fname = "sub"; + static const struct flagtab values[] = { + { GSUB, "GSUB" }, + { GENSUB, "GENSUB" }, + { AFTER_ASSIGN, "AFTER_ASSIGN" }, + { LITERAL, "LITERAL" }, + { 0, NULL } + }; + + if (pc->sub_flags & GSUB) + fname = "gsub"; + else if (pc->sub_flags & GENSUB) + fname = "gensub"; + print_func(fp, "%s [arg_count = %ld] [sub_flags = %s]\n", + fname, pc->expr_count, + genflags2str(pc->sub_flags, values)); + } + break; + case Op_builtin: { const char *fname = getfname(pc->builtin); @@ -348,6 +348,7 @@ static struct optypetab { { "Op_K_getline", "getline" }, { "Op_K_nextfile", "nextfile" }, { "Op_builtin", NULL }, + { "Op_sub_builtin", NULL }, { "Op_in_array", " in " }, { "Op_func_call", NULL }, { "Op_indirect_func_call", NULL }, @@ -2114,11 +2115,13 @@ post: break; case Op_var_assign: - pc->assign_var(); + if (pc->assign_var) + pc->assign_var(); break; case Op_field_assign: - pc->field_assign(); + if (pc->field_assign) + pc->field_assign(); break; case Op_concat: @@ -2256,7 +2259,34 @@ arrayfor: #endif PUSH(r); break; - + + case Op_sub_builtin: + { + /* sub, gsub and gensub */ + + int matches = 0; + + r = do_sub(pc->expr_count, pc->sub_flags, & matches); + PUSH(r); + + if (matches == 0 && (pc->sub_flags & AFTER_ASSIGN) != 0) { + + /* For sub and gsub, must not execute after_assign code; + * If the target is a FIELD, this means no field re-splitting or + * $0 reconstruction. For a special variable as target, + * set_XX routine is not called. + */ + + ni = pc->nexti; + assert(ni->opcode == Op_field_assign || ni->opcode == Op_var_assign); + if (ni->opcode == Op_field_assign) + ni->field_assign = (Func_ptr) 0; + else + ni->assign_var = (Func_ptr) 0; + } + } + break; + case Op_K_print: do_print(pc->expr_count, pc->redir_type); break; @@ -507,6 +507,20 @@ cleanup: case Op_after_endfile: break; + case Op_sub_builtin: + { + const char *fname = "sub"; + if (pc->sub_flags & GSUB) + fname = "gsub"; + else if (pc->sub_flags & GENSUB) + fname = "gensub"; + tmp = pp_list(pc->expr_count, "()", ", "); + str = pp_concat(fname, tmp, ""); + efree(tmp); + pp_push(Op_sub_builtin, str, CAN_FREE); + } + break; + case Op_builtin: { static char *ext_func = "extension_function()"; diff --git a/test/ChangeLog b/test/ChangeLog index 2ae225be..60c4f525 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,8 @@ +2011-07-15 Arnold D. Robbins <arnold@skeeve.com> + + * Makefile.am (gsubtst7): New test. + * gsubtst7.awk, gsubtst7.in, gsubtst7.ok: New files. + 2011-06-24 Arnold D. Robbins <arnold@skeeve.com> * Makefile.am (EXTRA_DIST): Add ChangeLog.0. diff --git a/test/Makefile.am b/test/Makefile.am index 2d7bf34f..12f64a62 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -318,6 +318,9 @@ EXTRA_DIST = \ gsubtst5.ok \ gsubtst6.awk \ gsubtst6.ok \ + gsubtst7.awk \ + gsubtst7.in \ + gsubtst7.ok \ gtlnbufv.awk \ hex.awk \ hex.ok \ @@ -768,6 +771,7 @@ BASIC_TESTS = \ fordel forref forsimp fsbs fsrs fsspcoln fstabplus funsemnl funsmnam \ funstack getline getline2 getline3 getlnbuf getnr2tb getnr2tm \ gsubasgn gsubtest gsubtst2 gsubtst3 gsubtst4 gsubtst5 gsubtst6 \ + gsubtst7 \ hex hsprint inputred intest intprec iobug1 leaddig leadnl litoct \ longsub longwrds manglprm math membug1 messages minusstr mmap8k \ mtchi18n nasty nasty2 negexp negrange nested nfldstr nfneg \ diff --git a/test/Makefile.in b/test/Makefile.in index 04ea041c..eee32eba 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -503,6 +503,9 @@ EXTRA_DIST = \ gsubtst5.ok \ gsubtst6.awk \ gsubtst6.ok \ + gsubtst7.awk \ + gsubtst7.in \ + gsubtst7.ok \ gtlnbufv.awk \ hex.awk \ hex.ok \ @@ -953,6 +956,7 @@ BASIC_TESTS = \ fordel forref forsimp fsbs fsrs fsspcoln fstabplus funsemnl funsmnam \ funstack getline getline2 getline3 getlnbuf getnr2tb getnr2tm \ gsubasgn gsubtest gsubtst2 gsubtst3 gsubtst4 gsubtst5 gsubtst6 \ + gsubtst7 \ hex hsprint inputred intest intprec iobug1 leaddig leadnl litoct \ longsub longwrds manglprm math membug1 messages minusstr mmap8k \ mtchi18n nasty nasty2 negexp negrange nested nfldstr nfneg \ @@ -2077,6 +2081,11 @@ gsubtst5: @AWKPATH=$(srcdir) $(AWK) -f $@.awk < $(srcdir)/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@ +gsubtst7: + @echo gsubtst7 + @AWKPATH=$(srcdir) $(AWK) -f $@.awk < $(srcdir)/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@ + hex: @echo hex @AWKPATH=$(srcdir) $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/Maketests b/test/Maketests index 9a16eb7c..9f364038 100644 --- a/test/Maketests +++ b/test/Maketests @@ -350,6 +350,11 @@ gsubtst5: @AWKPATH=$(srcdir) $(AWK) -f $@.awk < $(srcdir)/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@ +gsubtst7: + @echo gsubtst7 + @AWKPATH=$(srcdir) $(AWK) -f $@.awk < $(srcdir)/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@ + hex: @echo hex @AWKPATH=$(srcdir) $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ |