diff options
Diffstat (limited to 'builtin.c')
-rw-r--r-- | builtin.c | 119 |
1 files changed, 68 insertions, 51 deletions
@@ -1847,7 +1847,7 @@ do_substr(int nargs) * way to do things. */ memset(& mbs, 0, sizeof(mbs)); - emalloc(substr, char *, (length * gawk_mb_cur_max) + 2, "do_substr"); + emalloc(substr, char *, (length * gawk_mb_cur_max) + 1, "do_substr"); wp = t1->wstptr + indx; for (cp = substr; length > 0; length--) { result = wcrtomb(cp, *wp, & mbs); @@ -2597,7 +2597,7 @@ do_match(int nargs) sprintf(buff, "%d", ii); ilen = strlen(buff); - amt = ilen + subseplen + strlen("length") + 2; + amt = ilen + subseplen + strlen("length") + 1; if (oldamt == 0) { emalloc(buf, char *, amt, "do_match"); @@ -2771,42 +2771,42 @@ do_sub(int nargs, unsigned int flags) int ampersands; int matches = 0; Regexp *rp; - NODE *s; /* subst. pattern */ - NODE *t; /* string to make sub. in; $0 if none given */ + NODE *rep_node; /* replacement text */ + NODE *target; /* string to make sub. in; $0 if none given */ NODE *tmp; NODE **lhs = NULL; long how_many = 1; /* one substitution for sub, also gensub default */ - int global; + bool global; long current; bool lastmatchnonzero; char *mb_indices = NULL; if ((flags & GENSUB) != 0) { double d; - NODE *t1; + NODE *glob_flag; tmp = PEEK(3); rp = re_update(tmp); - t = POP_STRING(); /* original string */ + target = POP_STRING(); /* original string */ - t1 = POP_SCALAR(); /* value of global flag */ - if ((t1->flags & (STRCUR|STRING)) != 0) { - if (t1->stlen > 0 && (t1->stptr[0] == 'g' || t1->stptr[0] == 'G')) + glob_flag = POP_SCALAR(); /* value of global flag */ + if ((glob_flag->flags & (STRCUR|STRING)) != 0) { + if (glob_flag->stlen > 0 && (glob_flag->stptr[0] == 'g' || glob_flag->stptr[0] == 'G')) how_many = -1; else { - (void) force_number(t1); - d = get_number_d(t1); - if ((t1->flags & NUMCUR) != 0) + (void) force_number(glob_flag); + d = get_number_d(glob_flag); + if ((glob_flag->flags & NUMCUR) != 0) goto set_how_many; warning(_("gensub: third argument `%.*s' treated as 1"), - (int) t1->stlen, t1->stptr); + (int) glob_flag->stlen, glob_flag->stptr); how_many = 1; } } else { - (void) force_number(t1); - d = get_number_d(t1); + (void) force_number(glob_flag); + d = get_number_d(glob_flag); set_how_many: if (d < 1) how_many = 1; @@ -2817,10 +2817,8 @@ set_how_many: if (d <= 0) warning(_("gensub: third argument %g treated as 1"), d); } - DEREF(t1); - + DEREF(glob_flag); } else { - /* take care of regexp early, in case re_update is fatal */ tmp = PEEK(2); @@ -2832,30 +2830,30 @@ set_how_many: /* original string */ if ((flags & LITERAL) != 0) - t = POP_STRING(); + target = POP_STRING(); else { lhs = POP_ADDRESS(); - t = force_string(*lhs); + target = force_string(*lhs); } } global = (how_many == -1); - s = POP_STRING(); /* replacement text */ + rep_node = POP_STRING(); /* replacement text */ decr_sp(); /* regexp, already updated above */ /* do the search early to avoid work on non-match */ - if (research(rp, t->stptr, 0, t->stlen, RE_NEED_START) == -1 || - RESTART(rp, t->stptr) > t->stlen) + if (research(rp, target->stptr, 0, target->stlen, RE_NEED_START) == -1 || + RESTART(rp, target->stptr) > target->stlen) goto done; - t->flags |= STRING; + target->flags |= STRING; - text = t->stptr; - textlen = t->stlen; + text = target->stptr; + textlen = target->stlen; - repl = s->stptr; - replend = repl + s->stlen; + repl = rep_node->stptr; + replend = repl + rep_node->stlen; repllen = replend - repl; ampersands = 0; @@ -2873,6 +2871,7 @@ set_how_many: index_multibyte_buffer(repl, mb_indices, repllen); } + /* compute length of replacement string, number of ampersands */ for (scan = repl; scan < replend; scan++) { if ((gawk_mb_cur_max == 1 || (repllen > 0 && mb_indices[scan - repl] == 1)) && (*scan == '&')) { @@ -2917,24 +2916,32 @@ set_how_many: lastmatchnonzero = false; - /* guesstimate how much room to allocate; +2 forces > 0 */ - buflen = textlen + (ampersands + 1) * repllen + 2; - emalloc(buf, char *, buflen + 2, "do_sub"); + /* guesstimate how much room to allocate; +1 forces > 0 */ + buflen = textlen + (ampersands + 1) * repllen + 1; + emalloc(buf, char *, buflen + 1, "do_sub"); buf[buflen] = '\0'; - buf[buflen + 1] = '\0'; bp = buf; for (current = 1;; current++) { matches++; - matchstart = t->stptr + RESTART(rp, t->stptr); - matchend = t->stptr + REEND(rp, t->stptr); + matchstart = target->stptr + RESTART(rp, target->stptr); + matchend = target->stptr + REEND(rp, target->stptr); /* * create the result, copying in parts of the original - * string + * string. note that length of replacement string can + * vary since ampersand is actual text of regexp match. + */ + + /* + * add 1 to len to handle "empty" case where + * matchend == matchstart and we force a match on a single + * char. Use 'matchend - text' instead of 'matchstart - text' + * because we may not actually make any substitution depending + * on the 'global' and 'how_many' values. */ - len = matchstart - text + repllen - + ampersands * (matchend - matchstart); + len = matchend - text + repllen + + ampersands * (matchend - matchstart) + 1; sofar = bp - buf; while (buflen < (sofar + len + 1)) { buflen *= 2; @@ -2981,13 +2988,13 @@ set_how_many: if (flags & GENSUB) { /* gensub, behave sanely */ if (isdigit((unsigned char) scan[1])) { int dig = scan[1] - '0'; - if (dig < NUMSUBPATS(rp, t->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) { + if (dig < NUMSUBPATS(rp, target->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) { char *start, *end; - start = t->stptr - + SUBPATSTART(rp, t->stptr, dig); - end = t->stptr - + SUBPATEND(rp, t->stptr, dig); + start = target->stptr + + SUBPATSTART(rp, target->stptr, dig); + end = target->stptr + + SUBPATEND(rp, target->stptr, dig); for (cp = start; cp < end; cp++) *bp++ = *cp; @@ -3041,19 +3048,29 @@ set_how_many: textlen = text + textlen - matchend; text = matchend; +#if 0 + if (bp - buf > sofar + len) + fprintf(stderr, "debug: len = %zu, but used %ld\n", len, (long)((bp - buf) - (long)sofar)); +#endif + if ((current >= how_many && ! global) || ((long) textlen <= 0 && matchstart == matchend) - || research(rp, t->stptr, text - t->stptr, textlen, RE_NEED_START) == -1) + || research(rp, target->stptr, text - target->stptr, textlen, RE_NEED_START) == -1) break; } sofar = bp - buf; - if (buflen - sofar - textlen - 1) { - buflen = sofar + textlen + 2; + if (buflen < (sofar + textlen + 1)) { + buflen = sofar + textlen + 1; erealloc(buf, char *, buflen, "do_sub"); bp = buf + sofar; } - for (scan = matchend; scan < text + textlen; scan++) + /* + * Note that text == matchend, since that assignment is made before + * exiting the 'for' loop above. Thus we copy in the rest of the + * original string. + */ + for (scan = text; scan < text + textlen; scan++) *bp++ = *scan; *bp = '\0'; textlen = bp - buf; @@ -3062,7 +3079,7 @@ set_how_many: efree(mb_indices); done: - DEREF(s); + DEREF(rep_node); if ((matches == 0 || (flags & LITERAL) != 0) && buf != NULL) { efree(buf); @@ -3072,18 +3089,18 @@ done: if (flags & GENSUB) { if (matches > 0) { /* return the result string */ - DEREF(t); + DEREF(target); assert(buf != NULL); return make_str_node(buf, textlen, ALREADY_MALLOCED); } /* return the original string */ - return t; + return target; } /* For a string literal, must not change the original string. */ if ((flags & LITERAL) != 0) - DEREF(t); + DEREF(target); else if (matches > 0) { unref(*lhs); *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED); |