aboutsummaryrefslogtreecommitdiffstats
path: root/builtin.c
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2011-10-09 21:22:55 +0200
committerArnold D. Robbins <arnold@skeeve.com>2011-10-09 21:22:55 +0200
commit06f3e569c7c153a5d0f2dc5667822c562e77ca9f (patch)
tree2a7c5ba3d7fa182edd3cf5d93d0629da22ae9880 /builtin.c
parent194d9dc454d361ee5c707e71a5e3512fbe3a71cd (diff)
parent35d9ad9209592594905d1c8f15130d09af111411 (diff)
downloadegawk-06f3e569c7c153a5d0f2dc5667822c562e77ca9f.tar.gz
egawk-06f3e569c7c153a5d0f2dc5667822c562e77ca9f.tar.bz2
egawk-06f3e569c7c153a5d0f2dc5667822c562e77ca9f.zip
Merge branch 'gawk-4.0-stable'
Diffstat (limited to 'builtin.c')
-rw-r--r--builtin.c300
1 files changed, 155 insertions, 145 deletions
diff --git a/builtin.c b/builtin.c
index 724ea6df..53800fc2 100644
--- a/builtin.c
+++ b/builtin.c
@@ -72,7 +72,6 @@ extern NODE **fields_arr;
extern int output_is_tty;
extern FILE *output_fp;
-static NODE *sub_common(int nargs, long how_many, int backdigs);
#define POP_TWO_SCALARS(s1, s2) \
s2 = POP_SCALAR(); \
@@ -105,7 +104,7 @@ efwrite(const void *ptr,
if (fwrite(ptr, size, count, fp) != count)
goto wrerror;
if (flush
- && (output_is_tty
+ && ((fp == stdout && output_is_tty)
|| (rp != NULL && (rp->flag & RED_NOBUF)))) {
fflush(fp);
if (ferror(fp))
@@ -215,7 +214,7 @@ do_fflush(int nargs)
return make_number((AWKNUM) status);
}
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
/* strncasecmpmbs --- like strncasecmp (multibyte string version) */
int
@@ -313,7 +312,7 @@ do_index(int nargs)
const char *p1, *p2;
size_t l1, l2;
long ret;
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
int do_single_byte = FALSE;
mbstate_t mbs1, mbs2;
@@ -349,7 +348,7 @@ do_index(int nargs)
goto out;
}
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
s1 = force_wstring(s1);
s2 = force_wstring(s2);
@@ -367,7 +366,7 @@ do_index(int nargs)
while (l1 > 0) {
if (l2 > l1)
break;
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
if (! do_single_byte && gawk_mb_cur_max > 1) {
const wchar_t *pos;
@@ -390,7 +389,7 @@ do_index(int nargs)
}
l1--;
p1++;
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
}
#endif
}
@@ -403,7 +402,7 @@ do_index(int nargs)
ret = 1 + s1->stlen - l1;
break;
}
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
if (! do_single_byte && gawk_mb_cur_max > 1) {
const wchar_t *pos;
@@ -501,7 +500,7 @@ do_length(int nargs)
lintwarn(_("length: received non-string argument"));
(void) force_string(tmp);
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
tmp = force_wstring(tmp);
len = tmp->wstlen;
@@ -606,7 +605,7 @@ format_tree(
NODE *arg;
long fw, prec, argnum;
int used_dollar;
- int lj, alt, big, bigbig, small, have_prec, need_format;
+ int lj, alt, big_flag, bigbig_flag, small_flag, have_prec, need_format;
long *cur = NULL;
uintmax_t uval;
int sgn;
@@ -726,7 +725,7 @@ format_tree(
signchar = FALSE;
zero_flag = FALSE;
quote_flag = FALSE;
- lj = alt = big = bigbig = small = FALSE;
+ lj = alt = big_flag = bigbig_flag = small_flag = FALSE;
fill = sp;
cp = cend;
chbuf = lchbuf;
@@ -908,7 +907,7 @@ check_pos:
goto retry;
#endif
case 'l':
- if (big)
+ if (big_flag)
break;
else {
static short warned = FALSE;
@@ -922,10 +921,10 @@ check_pos:
goto out;
}
}
- big = TRUE;
+ big_flag = TRUE;
goto retry;
case 'L':
- if (bigbig)
+ if (bigbig_flag)
break;
else {
static short warned = FALSE;
@@ -939,10 +938,10 @@ check_pos:
goto out;
}
}
- bigbig = TRUE;
+ bigbig_flag = TRUE;
goto retry;
case 'h':
- if (small)
+ if (small_flag)
break;
else {
static short warned = FALSE;
@@ -956,7 +955,7 @@ check_pos:
goto out;
}
}
- small = TRUE;
+ small_flag = TRUE;
goto retry;
case 'c':
need_format = FALSE;
@@ -1008,7 +1007,7 @@ out0:
* used to work? 6/2003.)
*/
cp = arg->stptr;
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
/*
* First character can be multiple bytes if
* it's a multibyte character. Grr.
@@ -1539,7 +1538,7 @@ do_substr(int nargs)
if (nargs == 2) { /* third arg. missing */
/* use remainder of string */
length = t1->stlen - indx; /* default to bytes */
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
t1 = force_wstring(t1);
if (t1->wstlen > 0) /* use length of wide char string if we have one */
@@ -1558,7 +1557,7 @@ do_substr(int nargs)
}
/* get total len of input string, for following checks */
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
if (gawk_mb_cur_max > 1) {
t1 = force_wstring(t1);
src_len = t1->wstlen;
@@ -1581,7 +1580,7 @@ do_substr(int nargs)
length = src_len - indx;
}
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
/* force_wstring() already called */
if (gawk_mb_cur_max == 1 || t1->wstlen == t1->stlen)
/* single byte case */
@@ -1941,7 +1940,7 @@ do_print_rec(int nargs, int redirtype)
fflush(rp->fp);
}
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
/* is_wupper --- function version of iswupper for passing function pointers */
@@ -2030,7 +2029,7 @@ do_tolower(int nargs)
if (isupper(*cp))
*cp = tolower(*cp);
}
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
else {
force_wstring(t2);
wide_tolower(t2->wstptr, t2->wstlen);
@@ -2064,7 +2063,7 @@ do_toupper(int nargs)
if (islower(*cp))
*cp = toupper(*cp);
}
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
else {
force_wstring(t2);
wide_toupper(t2->wstptr, t2->wstlen);
@@ -2220,7 +2219,7 @@ do_match(int nargs)
size_t *wc_indices = NULL;
rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr); /* byte length */
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
if (rlength > 0 && gawk_mb_cur_max > 1) {
t1 = str2wstr(t1, & wc_indices);
rlength = wc_indices[rstart + rlength - 1] - wc_indices[rstart] + 1;
@@ -2248,7 +2247,7 @@ do_match(int nargs)
start = t1->stptr + s;
subpat_start = s;
subpat_len = len = SUBPATEND(rp, t1->stptr, ii) - s;
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
if (len > 0 && gawk_mb_cur_max > 1) {
subpat_start = wc_indices[s];
subpat_len = wc_indices[s + len - 1] - subpat_start + 1;
@@ -2319,7 +2318,7 @@ do_match(int nargs)
return make_number((AWKNUM) rstart);
}
-/* sub_common --- the common code (does the work) for sub, gsub, and gensub */
+/* do_sub --- do the work for sub, gsub, and gensub */
/*
* Gsub can be tricksy; particularly when handling the case of null strings.
@@ -2412,12 +2411,12 @@ do_match(int nargs)
* NB: `howmany' conflicts with a SunOS 4.x macro in <sys/param.h>.
*/
-static NODE *
-sub_common(int nargs, long how_many, int backdigs)
+NODE *
+do_sub(int nargs, unsigned int flags)
{
char *scan;
char *bp, *cp;
- char *buf;
+ char *buf = NULL;
size_t buflen;
char *matchend;
size_t len;
@@ -2434,38 +2433,77 @@ sub_common(int nargs, long how_many, int backdigs)
NODE *s; /* subst. pattern */
NODE *t; /* string to make sub. in; $0 if none given */
NODE *tmp;
- NODE **lhs;
- int global = (how_many == -1);
+ NODE **lhs = NULL;
+ long how_many = 1; /* one substitution for sub, also gensub default */
+ int global;
long current;
int lastmatchnonzero;
char *mb_indices = NULL;
-
- tmp = PEEK(2); /* take care of regexp early, in case re_update is fatal */
- rp = re_update(tmp);
- /* original string */
- if (nargs == 4) { /* kludge: no of items on stack is really 3,
- * See snode(..) in awkgram.y
- */
- lhs = NULL;
- t = POP_STRING();
+ if ((flags & GENSUB) != 0) {
+ double d;
+ NODE *t1;
+
+ tmp = PEEK(3);
+ rp = re_update(tmp);
+
+ t = POP_STRING(); /* original string */
+
+ t1 = POP_SCALAR(); /* value of global flag */
+ if ((t1->flags & (STRCUR|STRING)) != 0) {
+ if (t1->stlen > 0 && (t1->stptr[0] == 'g' || t1->stptr[0] == 'G'))
+ how_many = -1;
+ else {
+ d = force_number(t1);
+
+ if ((t1->flags & NUMCUR) != 0)
+ goto set_how_many;
+
+ how_many = 1;
+ }
+ } else {
+ d = force_number(t1);
+set_how_many:
+ if (d < 1)
+ how_many = 1;
+ else if (d < LONG_MAX)
+ how_many = d;
+ else
+ how_many = LONG_MAX;
+ if (d == 0)
+ warning(_("gensub: third argument of 0 treated as 1"));
+ }
+ DEREF(t1);
+
} else {
- lhs = POP_ADDRESS();
- t = force_string(*lhs);
+
+ /* take care of regexp early, in case re_update is fatal */
+
+ tmp = PEEK(2);
+ rp = re_update(tmp);
+
+ if ((flags & GSUB) != 0)
+ how_many = -1;
+
+ /* original string */
+
+ if ((flags & LITERAL) != 0)
+ t = POP_STRING();
+ else {
+ lhs = POP_ADDRESS();
+ t = force_string(*lhs);
+ }
}
+ global = (how_many == -1);
- s = POP_STRING(); /* replacement text */
+ s = POP_STRING(); /* replacement text */
decr_sp(); /* regexp, already updated above */
/* do the search early to avoid work on non-match */
if (research(rp, t->stptr, 0, t->stlen, RE_NEED_START) == -1 ||
- RESTART(rp, t->stptr) > t->stlen) {
- if (lhs == NULL)
- DEREF(t);
- DEREF(s);
- return make_number((AWKNUM) 0.0);
- }
+ RESTART(rp, t->stptr) > t->stlen)
+ goto done;
t->flags |= STRING;
@@ -2476,7 +2514,7 @@ sub_common(int nargs, long how_many, int backdigs)
repl = s->stptr;
replend = repl + s->stlen;
repllen = replend - repl;
- emalloc(buf, char *, buflen + 2, "sub_common");
+ emalloc(buf, char *, buflen + 2, "do_sub");
buf[buflen] = '\0';
buf[buflen + 1] = '\0';
ampersands = 0;
@@ -2490,7 +2528,7 @@ sub_common(int nargs, long how_many, int backdigs)
* for example.
*/
if (gawk_mb_cur_max > 1 && repllen > 0) {
- emalloc(mb_indices, char *, repllen * sizeof(char), "sub_common");
+ emalloc(mb_indices, char *, repllen * sizeof(char), "do_sub");
index_multibyte_buffer(repl, mb_indices, repllen);
}
@@ -2500,7 +2538,7 @@ sub_common(int nargs, long how_many, int backdigs)
repllen--;
ampersands++;
} else if (*scan == '\\') {
- if (backdigs) { /* gensub, behave sanely */
+ if (flags & GENSUB) { /* gensub, behave sanely */
if (isdigit((unsigned char) scan[1])) {
ampersands++;
scan++;
@@ -2508,13 +2546,30 @@ sub_common(int nargs, long how_many, int backdigs)
repllen--;
scan++;
}
- } else {
+ } else if (do_posix) {
/* \& --> &, \\ --> \ */
if (scan[1] == '&' || scan[1] == '\\') {
repllen--;
scan++;
} /* else
leave alone, it goes into the output */
+ } else {
+ /* gawk default behavior since 1996 */
+ if (strncmp(scan, "\\\\\\&", 4) == 0) {
+ /* \\\& --> \& */
+ repllen -= 2;
+ scan += 3;
+ } else if (strncmp(scan, "\\\\&", 3) == 0) {
+ /* \\& --> \<string> */
+ ampersands++;
+ repllen--;
+ scan += 2;
+ } else if (scan[1] == '&') {
+ /* \& --> & */
+ repllen--;
+ scan++;
+ } /* else
+ leave alone, it goes into the output */
}
}
}
@@ -2575,7 +2630,7 @@ sub_common(int nargs, long how_many, int backdigs)
&& (gawk_mb_cur_max == 1
|| (repllen > 0 && mb_indices[scan - repl] == 1))
) {
- if (backdigs) { /* gensub, behave sanely */
+ if (flags & GENSUB) { /* gensub, behave sanely */
if (isdigit((unsigned char) scan[1])) {
int dig = scan[1] - '0';
if (dig < NUMSUBPATS(rp, t->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) {
@@ -2592,11 +2647,30 @@ sub_common(int nargs, long how_many, int backdigs)
scan++;
} else /* \q for any q --> q */
*bp++ = *++scan;
- } else {
+ } else if (do_posix) {
/* \& --> &, \\ --> \ */
if (scan[1] == '&' || scan[1] == '\\')
scan++;
*bp++ = *scan;
+ } else {
+ /* gawk default behavior since 1996 */
+ if (strncmp(scan, "\\\\\\&", 4) == 0) {
+ /* \\\& --> \& */
+ *bp++ = '\\';
+ *bp++ = '&';
+ scan += 3;
+ } else if (strncmp(scan, "\\\\&", 3) == 0) {
+ /* \\& --> \<string> */
+ *bp++ = '\\';
+ for (cp = matchstart; cp < matchend; cp++)
+ *bp++ = *cp;
+ scan += 2;
+ } else if (scan[1] == '&') {
+ /* \& --> & */
+ *bp++ = '&';
+ scan++;
+ } else
+ *bp++ = *scan;
}
} else
*bp++ = *scan;
@@ -2619,7 +2693,7 @@ sub_common(int nargs, long how_many, int backdigs)
textlen = text + textlen - matchend;
text = matchend;
- if ((current >= how_many && !global)
+ if ((current >= how_many && ! global)
|| ((long) textlen <= 0 && matchstart == matchend)
|| research(rp, t->stptr, text - t->stptr, textlen, RE_NEED_START) == -1)
break;
@@ -2628,7 +2702,7 @@ sub_common(int nargs, long how_many, int backdigs)
sofar = bp - buf;
if (buflen - sofar - textlen - 1) {
buflen = sofar + textlen + 2;
- erealloc(buf, char *, buflen, "sub_common");
+ erealloc(buf, char *, buflen, "do_sub");
bp = buf + sofar;
}
for (scan = matchend; scan < text + textlen; scan++)
@@ -2636,102 +2710,38 @@ sub_common(int nargs, long how_many, int backdigs)
*bp = '\0';
textlen = bp - buf;
- DEREF(s);
-
- if (lhs != NULL) {
- if (matches > 0) {
- unref(*lhs);
- *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED);
- } else
- efree(buf);
- } else {
- efree(buf);
- DEREF(t);
- }
-
if (mb_indices != NULL)
efree(mb_indices);
- return make_number((AWKNUM) matches);
-}
-
-/* do_gsub --- global substitution */
-
-NODE *
-do_gsub(int nargs)
-{
- return sub_common(nargs, -1, FALSE);
-}
-
-/* do_sub --- single substitution */
-
-NODE *
-do_sub(int nargs)
-{
- return sub_common(nargs, 1, FALSE);
-}
-
-/* do_gensub --- fix up the tree for sub_common for the gensub function */
-
-NODE *
-do_gensub(int nargs)
-{
- NODE *t, *tmp, *target, *ret;
- long how_many = 1; /* default is one substitution */
- double d;
-
- tmp = POP_STRING(); /* target */
- t = POP_SCALAR(); /* value of global flag */
-
- /*
- * We make copy of the original target string, and pass that
- * in to sub_common() as the target to make the substitution in.
- * We will then return the result string as the return value of
- * this function.
- */
-
- target = make_string(tmp->stptr, tmp->stlen);
- DEREF(tmp);
- PUSH_ADDRESS(& target);
-
- if ((t->flags & (STRCUR|STRING)) != 0) {
- if (t->stlen > 0 && (t->stptr[0] == 'g' || t->stptr[0] == 'G'))
- how_many = -1;
- else {
- d = force_number(t);
+done:
+ DEREF(s);
- if ((t->flags & NUMCUR) != 0)
- goto set_how_many;
+ if ((matches == 0 || (flags & LITERAL) != 0) && buf != NULL)
+ efree(buf);
- how_many = 1;
+ if (flags & GENSUB) {
+ if (matches > 0) {
+ /* return the result string */
+ DEREF(t);
+ return make_str_node(buf, textlen, ALREADY_MALLOCED);
}
- } else {
- d = force_number(t);
-set_how_many:
- if (d < 1)
- how_many = 1;
- else if (d < LONG_MAX)
- how_many = d;
- else
- how_many = LONG_MAX;
- if (d == 0)
- warning(_("gensub: third argument of 0 treated as 1"));
- }
-
- DEREF(t);
- ret = sub_common(3, how_many, TRUE);
- unref(ret);
+ /* return the original string */
+ return t;
+ }
- /*
- * Note that we don't care what sub_common() returns, since the
- * easiest thing for the programmer is to return the string, even
- * if no substitutions were done.
- */
+ /* For a string literal, must not change the original string. */
+ if (flags & LITERAL)
+ DEREF(t);
+ else if (matches > 0) {
+ unref(*lhs);
+ *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED);
+ }
- return target;
+ return make_number((AWKNUM) matches);
}
+
/* make_integer - Convert an integer to a number node. */
static NODE *
@@ -3285,7 +3295,7 @@ do_bindtextdomain(int nargs)
static size_t
mbc_byte_count(const char *ptr, size_t numchars)
{
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
mbstate_t cur_state;
size_t sum = 0;
int mb_len;
@@ -3316,7 +3326,7 @@ mbc_byte_count(const char *ptr, size_t numchars)
static size_t
mbc_char_count(const char *ptr, size_t numbytes)
{
-#ifdef MBS_SUPPORT
+#if MBS_SUPPORT
mbstate_t cur_state;
size_t sum = 0;
int mb_len;