aboutsummaryrefslogtreecommitdiffstats
path: root/builtin.c
diff options
context:
space:
mode:
Diffstat (limited to 'builtin.c')
-rw-r--r--builtin.c628
1 files changed, 439 insertions, 189 deletions
diff --git a/builtin.c b/builtin.c
index 18c01f6e..c3e55596 100644
--- a/builtin.c
+++ b/builtin.c
@@ -2,22 +2,22 @@
* builtin.c - Builtin functions and various utility procedures.
*/
-/*
+/*
* Copyright (C) 1986, 1988, 1989, 1991-2016 the Free Software Foundation, Inc.
- *
+ *
* This file is part of GAWK, the GNU implementation of the
* AWK Programming Language.
- *
+ *
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
- *
+ *
* GAWK is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
@@ -133,10 +133,14 @@ wrerror:
if (fp == stdout && errno == EPIPE)
gawk_exit(EXIT_FATAL);
+
/* otherwise die verbosely */
- fatal(_("%s to \"%s\" failed (%s)"), from,
- rp ? rp->value : _("standard output"),
- errno ? strerror(errno) : _("reason unknown"));
+ if ((rp != NULL) ? is_non_fatal_redirect(rp->value) : is_non_fatal_std(fp))
+ update_ERRNO_int(errno);
+ else
+ fatal(_("%s to \"%s\" failed (%s)"), from,
+ rp ? rp->value : _("standard output"),
+ errno ? strerror(errno) : _("reason unknown"));
}
/* do_exp --- exponential function */
@@ -148,7 +152,7 @@ do_exp(int nargs)
double d, res;
tmp = POP_SCALAR();
- if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
+ if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
lintwarn(_("exp: received non-numeric argument"));
d = force_number(tmp)->numbr;
DEREF(tmp);
@@ -193,9 +197,9 @@ do_fflush(int nargs)
/*
* November, 2012.
- * It turns out that circa 2002, when BWK
+ * It turns out that circa 2002, when BWK
* added fflush() and fflush("") to his awk, he made both of
- * them flush everything.
+ * them flush everything.
*
* Now, with our inside agent getting ready to try to get fflush()
* standardized in POSIX, we are going to make our awk consistent
@@ -354,9 +358,9 @@ do_index(int nargs)
POP_TWO_SCALARS(s1, s2);
if (do_lint) {
- if ((s1->flags & (STRING|STRCUR)) == 0)
+ if ((fixtype(s1)->flags & STRING) == 0)
lintwarn(_("index: received non-string first argument"));
- if ((s2->flags & (STRING|STRCUR)) == 0)
+ if ((fixtype(s2)->flags & STRING) == 0)
lintwarn(_("index: received non-string second argument"));
}
@@ -386,7 +390,7 @@ do_index(int nargs)
* If we don't have valid wide character strings, use
* the real bytes.
*/
- do_single_byte = ((s1->wstlen == 0 && s1->stlen > 0)
+ do_single_byte = ((s1->wstlen == 0 && s1->stlen > 0)
|| (s2->wstlen == 0 && s2->stlen > 0));
}
@@ -469,7 +473,7 @@ do_int(int nargs)
double d;
tmp = POP_SCALAR();
- if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
+ if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
lintwarn(_("int: received non-numeric argument"));
d = force_number(tmp)->numbr;
d = double_to_int(d);
@@ -484,6 +488,12 @@ do_isarray(int nargs)
{
NODE *tmp;
int ret = 1;
+ static bool warned = false;
+
+ if (do_lint && ! warned) {
+ warned = true;
+ lintwarn(_("`isarray' is deprecated. Use `typeof' instead"));
+ }
tmp = POP();
if (tmp->type != Node_var_array) {
@@ -515,7 +525,7 @@ do_length(int nargs)
/*
* Support for deferred loading of array elements requires that
- * we use the array length interface even though it isn't
+ * we use the array length interface even though it isn't
* necessary for the built-in array types.
*
* 1/2015: The deferred arrays are gone, but this is probably
@@ -528,7 +538,7 @@ do_length(int nargs)
assert(tmp->type == Node_val);
- if (do_lint && (tmp->flags & (STRING|STRCUR)) == 0)
+ if (do_lint && (fixtype(tmp)->flags & STRING) == 0)
lintwarn(_("length: received non-string argument"));
tmp = force_string(tmp);
@@ -557,7 +567,7 @@ do_log(int nargs)
double d, arg;
tmp = POP_SCALAR();
- if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
+ if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
lintwarn(_("log: received non-numeric argument"));
arg = force_number(tmp)->numbr;
if (arg < 0.0)
@@ -716,7 +726,7 @@ format_tree(
emalloc(obuf, char *, INITIAL_OUT_SIZE, "format_tree");
obufout = obuf;
osiz = INITIAL_OUT_SIZE;
- ofre = osiz - 2;
+ ofre = osiz - 1;
cur_arg = 1;
@@ -956,7 +966,7 @@ check_pos:
case ' ': /* print ' ' or '-' */
/* 'space' flag is ignored */
/* if '+' already present */
- if (signchar != false)
+ if (signchar != false)
goto check_pos;
/* FALL THROUGH */
case '+': /* print '+' or '-' */
@@ -982,18 +992,18 @@ check_pos:
alt = true;
goto check_pos;
case '\'':
-#if defined(HAVE_LOCALE_H)
+#if defined(HAVE_LOCALE_H)
quote_flag = true;
goto check_pos;
#else
- goto retry;
+ goto retry;
#endif
case 'l':
if (big_flag)
break;
else {
static bool warned = false;
-
+
if (do_lint && ! warned) {
lintwarn(_("`l' is meaningless in awk formats; ignored"));
warned = true;
@@ -1010,7 +1020,7 @@ check_pos:
break;
else {
static bool warned = false;
-
+
if (do_lint && ! warned) {
lintwarn(_("`L' is meaningless in awk formats; ignored"));
warned = true;
@@ -1027,7 +1037,7 @@ check_pos:
break;
else {
static bool warned = false;
-
+
if (do_lint && ! warned) {
lintwarn(_("`h' is meaningless in awk formats; ignored"));
warned = true;
@@ -1043,8 +1053,7 @@ check_pos:
need_format = false;
parse_next_arg();
/* user input that looks numeric is numeric */
- if ((arg->flags & (MAYBE_NUM|NUMBER)) == MAYBE_NUM)
- (void) force_number(arg);
+ fixtype(arg);
if ((arg->flags & NUMBER) != 0) {
uval = get_number_uj(arg);
if (gawk_mb_cur_max > 1) {
@@ -1214,7 +1223,7 @@ out0:
jj = 0; /* keep using current val in loc.grouping[ii] */
else if (loc.grouping[ii+1] == CHAR_MAX)
quote_flag = false;
- else {
+ else {
ii++;
jj = 0;
}
@@ -1329,7 +1338,7 @@ mpf1:
zero_flag = (! lj
&& ((zero_flag && ! have_prec)
|| (fw == 0 && have_prec)));
-
+
(void) mpfr_get_z(mpzval, mf, MPFR_RNDZ); /* convert to GMP integer */
fmt_type = have_prec ? MP_INT_WITH_PREC : MP_INT_WITHOUT_PREC;
zi = mpzval;
@@ -1394,11 +1403,11 @@ mpf1:
PREPEND(ts[k]);
}
}
- if (loc.grouping[ii+1] == 0)
+ if (loc.grouping[ii+1] == 0)
jj = 0; /* keep using current val in loc.grouping[ii] */
- else if (loc.grouping[ii+1] == CHAR_MAX)
+ else if (loc.grouping[ii+1] == CHAR_MAX)
quote_flag = false;
- else {
+ else {
ii++;
jj = 0;
}
@@ -1578,7 +1587,7 @@ mpf1:
bchunk(s0, s1 - s0);
olen_final = obufout - obuf;
if (ofre > 0)
- erealloc(obuf, char *, olen_final + 2, "format_tree");
+ erealloc(obuf, char *, olen_final + 1, "format_tree");
r = make_str_node(obuf, olen_final, ALREADY_MALLOCED);
obuf = NULL;
out:
@@ -1649,7 +1658,7 @@ do_printf(int nargs, int redirtype)
FILE *fp = NULL;
NODE *tmp;
struct redirect *rp = NULL;
- int errflg; /* not used, sigh */
+ int errflg = 0;
NODE *redir_exp = NULL;
if (nargs == 0) {
@@ -1660,7 +1669,7 @@ do_printf(int nargs, int redirtype)
redir_exp = TOP();
if (redir_exp->type != Node_val)
fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp));
- rp = redirect(redir_exp, redirtype, & errflg);
+ rp = redirect(redir_exp, redirtype, & errflg, true);
DEREF(redir_exp);
decr_sp();
}
@@ -1673,14 +1682,22 @@ do_printf(int nargs, int redirtype)
redir_exp = PEEK(nargs);
if (redir_exp->type != Node_val)
fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp));
- rp = redirect(redir_exp, redirtype, & errflg);
+ rp = redirect(redir_exp, redirtype, & errflg, true);
if (rp != NULL) {
if ((rp->flag & RED_TWOWAY) != 0 && rp->output.fp == NULL) {
+ if (is_non_fatal_redirect(redir_exp->stptr)) {
+ update_ERRNO_int(EBADF);
+ return;
+ }
(void) close_rp(rp, CLOSE_ALL);
fatal(_("printf: attempt to write to closed write end of two-way pipe"));
}
fp = rp->output.fp;
}
+ else if (errflg) {
+ update_ERRNO_int(errflg);
+ return;
+ }
} else if (do_debug) /* only the debugger can change the default output */
fp = output_fp;
else
@@ -1713,7 +1730,7 @@ do_sqrt(int nargs)
double arg;
tmp = POP_SCALAR();
- if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
+ if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
lintwarn(_("sqrt: received non-numeric argument"));
arg = (double) force_number(tmp)->numbr;
DEREF(tmp);
@@ -1854,7 +1871,7 @@ do_substr(int nargs)
* way to do things.
*/
memset(& mbs, 0, sizeof(mbs));
- emalloc(substr, char *, (length * gawk_mb_cur_max) + 2, "do_substr");
+ emalloc(substr, char *, (length * gawk_mb_cur_max) + 1, "do_substr");
wp = t1->wstptr + indx;
for (cp = substr; length > 0; length--) {
result = wcrtomb(cp, *wp, & mbs);
@@ -1903,7 +1920,7 @@ do_strftime(int nargs)
unref(sub);
if (val != NULL) {
- if (do_lint && (val->flags & STRING) == 0)
+ if (do_lint && (fixtype(val)->flags & STRING) == 0)
lintwarn(_("strftime: format value in PROCINFO[\"strftime\"] has numeric type"));
val = force_string(val);
format = val->stptr;
@@ -1917,16 +1934,13 @@ do_strftime(int nargs)
if (nargs == 3) {
t3 = POP_SCALAR();
- if ((t3->flags & (NUMCUR|NUMBER)) != 0)
- do_gmt = (t3->numbr != 0);
- else
- do_gmt = (t3->stlen > 0);
+ do_gmt = boolval(t3);
DEREF(t3);
}
if (nargs >= 2) {
t2 = POP_SCALAR();
- if (do_lint && (t2->flags & (NUMCUR|NUMBER)) == 0)
+ if (do_lint && (fixtype(t2)->flags & NUMBER) == 0)
lintwarn(_("strftime: received non-numeric second argument"));
(void) force_number(t2);
clock_val = get_number_d(t2);
@@ -1952,9 +1966,9 @@ do_strftime(int nargs)
}
tmp = POP_SCALAR();
- if (do_lint && (tmp->flags & (STRING|STRCUR)) == 0)
+ if (do_lint && (fixtype(tmp)->flags & STRING) == 0)
lintwarn(_("strftime: received non-string first argument"));
-
+
t1 = force_string(tmp);
format = t1->stptr;
formatlen = t1->stlen;
@@ -2028,7 +2042,7 @@ do_mktime(int nargs)
char save;
t1 = POP_SCALAR();
- if (do_lint && (t1->flags & (STRING|STRCUR)) == 0)
+ if (do_lint && (fixtype(t1)->flags & STRING) == 0)
lintwarn(_("mktime: received non-string argument"));
t1 = force_string(t1);
@@ -2086,7 +2100,7 @@ do_system(int nargs)
(void) flush_io(); /* so output is synchronous with gawk's */
tmp = POP_SCALAR();
- if (do_lint && (tmp->flags & (STRING|STRCUR)) == 0)
+ if (do_lint && (fixtype(tmp)->flags & STRING) == 0)
lintwarn(_("system: received non-string argument"));
cmd = force_string(tmp)->stptr;
@@ -2114,22 +2128,12 @@ do_system(int nargs)
; /* leave it alone, full 16 bits */
else if (do_traditional)
#ifdef __MINGW32__
- ret = (((unsigned)status) & ~0xC0000000);
+ ret = (((unsigned)status) & ~0xC0000000);
#else
ret = (status / 256.0);
#endif
- else if (WIFEXITED(status))
- ret = WEXITSTATUS(status); /* normal exit */
- else if (WIFSIGNALED(status)) {
- bool coredumped = false;
-#ifdef WCOREDUMP
- coredumped = WCOREDUMP(status);
-#endif
- /* use 256 since exit values are 8 bits */
- ret = WTERMSIG(status) +
- (coredumped ? 512 : 256);
- } else
- ret = 0; /* shouldn't get here */
+ else
+ ret = sanitize_exit_status(status);
}
if ((BINMODE & BINMODE_INPUT) != 0)
@@ -2150,7 +2154,7 @@ void
do_print(int nargs, int redirtype)
{
struct redirect *rp = NULL;
- int errflg; /* not used, sigh */
+ int errflg = 0;
FILE *fp = NULL;
int i;
NODE *redir_exp = NULL;
@@ -2162,14 +2166,22 @@ do_print(int nargs, int redirtype)
redir_exp = PEEK(nargs);
if (redir_exp->type != Node_val)
fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp));
- rp = redirect(redir_exp, redirtype, & errflg);
+ rp = redirect(redir_exp, redirtype, & errflg, true);
if (rp != NULL) {
if ((rp->flag & RED_TWOWAY) != 0 && rp->output.fp == NULL) {
+ if (is_non_fatal_redirect(redir_exp->stptr)) {
+ update_ERRNO_int(EBADF);
+ return;
+ }
(void) close_rp(rp, CLOSE_ALL);
fatal(_("print: attempt to write to closed write end of two-way pipe"));
}
fp = rp->output.fp;
}
+ else if (errflg) {
+ update_ERRNO_int(errflg);
+ return;
+ }
} else if (do_debug) /* only the debugger can change the default output */
fp = output_fp;
else
@@ -2183,8 +2195,10 @@ do_print(int nargs, int redirtype)
fatal(_("attempt to use array `%s' in a scalar context"), array_vname(tmp));
}
- if ((tmp->flags & STRCUR) == 0 || (tmp->stfmt != -1 && tmp->stfmt != OFMTidx))
- args_array[i] = format_val(OFMT, OFMTidx, tmp);
+ if ( (tmp->flags & STRCUR) == 0
+ || ( tmp->stfmt != STFMT_UNUSED
+ && tmp->stfmt != OFMTidx))
+ args_array[i] = format_val(OFMT, OFMTidx, tmp);
}
if (redir_exp != NULL) {
@@ -2215,21 +2229,25 @@ do_print(int nargs, int redirtype)
/* do_print_rec --- special case printing of $0, for speed */
-void
+void
do_print_rec(int nargs, int redirtype)
{
FILE *fp = NULL;
NODE *f0;
struct redirect *rp = NULL;
- int errflg; /* not used, sigh */
+ int errflg = 0;
NODE *redir_exp = NULL;
assert(nargs == 0);
if (redirtype != 0) {
redir_exp = TOP();
- rp = redirect(redir_exp, redirtype, & errflg);
+ rp = redirect(redir_exp, redirtype, & errflg, true);
if (rp != NULL) {
if ((rp->flag & RED_TWOWAY) != 0 && rp->output.fp == NULL) {
+ if (is_non_fatal_redirect(redir_exp->stptr)) {
+ update_ERRNO_int(EBADF);
+ return;
+ }
(void) close_rp(rp, CLOSE_ALL);
fatal(_("print: attempt to write to closed write end of two-way pipe"));
}
@@ -2240,6 +2258,11 @@ do_print_rec(int nargs, int redirtype)
} else
fp = output_fp;
+ if (errflg) {
+ update_ERRNO_int(errflg);
+ return;
+ }
+
if (fp == NULL)
return;
@@ -2333,7 +2356,7 @@ do_tolower(int nargs)
NODE *t1, *t2;
t1 = POP_SCALAR();
- if (do_lint && (t1->flags & (STRING|STRCUR)) == 0)
+ if (do_lint && (fixtype(t1)->flags & STRING) == 0)
lintwarn(_("tolower: received non-string argument"));
t1 = force_string(t1);
t2 = make_string(t1->stptr, t1->stlen);
@@ -2364,7 +2387,7 @@ do_toupper(int nargs)
NODE *t1, *t2;
t1 = POP_SCALAR();
- if (do_lint && (t1->flags & (STRING|STRCUR)) == 0)
+ if (do_lint && (fixtype(t1)->flags & STRING) == 0)
lintwarn(_("toupper: received non-string argument"));
t1 = force_string(t1);
t2 = make_string(t1->stptr, t1->stlen);
@@ -2397,9 +2420,9 @@ do_atan2(int nargs)
POP_TWO_SCALARS(t1, t2);
if (do_lint) {
- if ((t1->flags & (NUMCUR|NUMBER)) == 0)
+ if ((fixtype(t1)->flags & NUMBER) == 0)
lintwarn(_("atan2: received non-numeric first argument"));
- if ((t2->flags & (NUMCUR|NUMBER)) == 0)
+ if ((fixtype(t2)->flags & NUMBER) == 0)
lintwarn(_("atan2: received non-numeric second argument"));
}
d1 = force_number(t1)->numbr;
@@ -2418,7 +2441,7 @@ do_sin(int nargs)
double d;
tmp = POP_SCALAR();
- if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
+ if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
lintwarn(_("sin: received non-numeric argument"));
d = sin((double) force_number(tmp)->numbr);
DEREF(tmp);
@@ -2434,7 +2457,7 @@ do_cos(int nargs)
double d;
tmp = POP_SCALAR();
- if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
+ if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
lintwarn(_("cos: received non-numeric argument"));
d = cos((double) force_number(tmp)->numbr);
DEREF(tmp);
@@ -2453,6 +2476,8 @@ static char *const state = (char *const) istate;
NODE *
do_rand(int nargs ATTRIBUTE_UNUSED)
{
+ double tmprand;
+#define RAND_DIVISOR ((double)GAWK_RANDOM_MAX+1.0)
if (firstrand) {
(void) initstate((unsigned) 1, state, SIZEOF_STATE);
/* don't need to srandom(1), initstate() does it for us. */
@@ -2464,7 +2489,66 @@ do_rand(int nargs ATTRIBUTE_UNUSED)
*
* 0 <= n < 1
*/
- return make_number((AWKNUM) (random() % GAWK_RANDOM_MAX) / GAWK_RANDOM_MAX);
+ /*
+ * Date: Wed, 28 Aug 2013 17:52:46 -0700
+ * From: Bob Jewett <jewett@bill.scs.agilent.com>
+ *
+ * Call random() twice to fill in more bits in the value
+ * of the double. Also, there is a bug in random() such
+ * that when the values of successive values are combined
+ * like (rand1*rand2)^2, (rand3*rand4)^2, ... the
+ * resulting time series is not white noise. The
+ * following also seems to fix that bug.
+ *
+ * The add/subtract 0.5 keeps small bits from filling
+ * below 2^-53 in the double, not that anyone should be
+ * looking down there.
+ *
+ * Date: Wed, 25 Sep 2013 10:45:38 -0600 (MDT)
+ * From: "Nelson H. F. Beebe" <beebe@math.utah.edu>
+ * (4) The code is typical of many published fragments for converting
+ * from integer to floating-point, and I discuss the serious pitfalls
+ * in my book, because it leads to platform-dependent behavior at the
+ * end points of the interval [0,1]
+ *
+ * (5) the documentation in the gawk info node says
+ *
+ * `rand()'
+ * Return a random number. The values of `rand()' are uniformly
+ * distributed between zero and one. The value could be zero but is
+ * never one.(1)
+ *
+ * The division by RAND_DIVISOR may not guarantee that 1.0 is never
+ * returned: the programmer forgot the platform-dependent issue of
+ * rounding.
+ *
+ * For points 4 and 5, the safe way is a loop:
+ *
+ * double
+ * rand(void) // return value in [0.0, 1.0)
+ * {
+ * value = internal_rand();
+ *
+ * while (value == 1.0)
+ * value = internal_rand();
+ *
+ * return (value);
+ * }
+ */
+
+ do {
+ long d1, d2;
+ /*
+ * Do the calls in predictable order to avoid
+ * compiler differences in order of evaluation.
+ */
+ d1 = random();
+ d2 = random();
+ tmprand = 0.5 + ( (d1/RAND_DIVISOR + d2) / RAND_DIVISOR );
+ tmprand -= 0.5;
+ } while (tmprand == 1.0);
+
+ return make_number((AWKNUM) tmprand);
}
/* do_srand --- seed the random number generator */
@@ -2487,7 +2571,7 @@ do_srand(int nargs)
srandom((unsigned int) (save_seed = (long) time((time_t *) 0)));
else {
tmp = POP_SCALAR();
- if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
+ if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
lintwarn(_("srand: received non-numeric argument"));
srandom((unsigned int) (save_seed = (long) force_number(tmp)->numbr));
DEREF(tmp);
@@ -2525,7 +2609,7 @@ do_match(int nargs)
tre = POP();
rp = re_update(tre);
t1 = POP_STRING();
-
+
rstart = research(rp, t1->stptr, 0, t1->stlen, RE_NEED_START);
if (rstart >= 0) { /* match succeded */
size_t *wc_indices = NULL;
@@ -2538,7 +2622,7 @@ do_match(int nargs)
}
rstart++; /* now it's 1-based indexing */
-
+
/* Build the array only if the caller wants the optional subpatterns */
if (dest != NULL) {
subsepstr = SUBSEP_node->var_value->stptr;
@@ -2554,7 +2638,7 @@ do_match(int nargs)
size_t subpat_len;
NODE **lhs;
NODE *sub;
-
+
start = t1->stptr + s;
subpat_start = s;
subpat_len = len = SUBPATEND(rp, t1->stptr, ii) - s;
@@ -2562,7 +2646,7 @@ do_match(int nargs)
subpat_start = wc_indices[s];
subpat_len = wc_indices[s + len - 1] - subpat_start + 1;
}
-
+
it = make_string(start, len);
it->flags |= MAYBE_NUM; /* user input */
@@ -2577,8 +2661,8 @@ do_match(int nargs)
sprintf(buff, "%d", ii);
ilen = strlen(buff);
- amt = ilen + subseplen + strlen("length") + 2;
-
+ amt = ilen + subseplen + strlen("length") + 1;
+
if (oldamt == 0) {
emalloc(buf, char *, amt, "do_match");
} else if (amt > oldamt) {
@@ -2588,9 +2672,9 @@ do_match(int nargs)
memcpy(buf, buff, ilen);
memcpy(buf + ilen, subsepstr, subseplen);
memcpy(buf + ilen + subseplen, "start", 6);
-
+
slen = ilen + subseplen + 5;
-
+
it = make_number((AWKNUM) subpat_start + 1);
sub = make_string(buf, slen);
lhs = assoc_lookup(dest, sub);
@@ -2599,13 +2683,13 @@ do_match(int nargs)
if (dest->astore != NULL)
(*dest->astore)(dest, sub);
unref(sub);
-
+
memcpy(buf, buff, ilen);
memcpy(buf + ilen, subsepstr, subseplen);
memcpy(buf + ilen + subseplen, "length", 7);
-
+
slen = ilen + subseplen + 6;
-
+
it = make_number((AWKNUM) subpat_len);
sub = make_string(buf, slen);
lhs = assoc_lookup(dest, sub);
@@ -2640,9 +2724,9 @@ do_match(int nargs)
* Gsub can be tricksy; particularly when handling the case of null strings.
* The following awk code was useful in debugging problems. It is too bad
* that it does not readily translate directly into the C code, below.
- *
+ *
* #! /usr/local/bin/mawk -f
- *
+ *
* BEGIN {
* true = 1; false = 0
* print "--->", mygsub("abc", "b+", "FOO")
@@ -2652,7 +2736,7 @@ do_match(int nargs)
* print "--->", mygsub("abc", "c+", "X")
* print "--->", mygsub("abc", "x*$", "X")
* }
- *
+ *
* function mygsub(str, regex, replace, origstr, newstr, eosflag, nonzeroflag)
* {
* origstr = str;
@@ -2690,7 +2774,7 @@ do_match(int nargs)
* }
* if (length(str) > 0)
* newstr = newstr str # rest of string
- *
+ *
* return newstr
* }
*/
@@ -2702,7 +2786,7 @@ do_match(int nargs)
*
* The relevant text is to be found on lines 6394-6407 (pages 166, 167) of the
* 2001 standard:
- *
+ *
* sub(ere, repl[, in ])
* Substitute the string repl in place of the first instance of the
* extended regular expression ERE in string in and return the number of
@@ -2751,56 +2835,48 @@ do_sub(int nargs, unsigned int flags)
int ampersands;
int matches = 0;
Regexp *rp;
- NODE *s; /* subst. pattern */
- NODE *t; /* string to make sub. in; $0 if none given */
+ NODE *rep_node; /* replacement text */
+ NODE *target; /* string to make sub. in; $0 if none given */
NODE *tmp;
NODE **lhs = NULL;
long how_many = 1; /* one substitution for sub, also gensub default */
- int global;
+ bool global;
long current;
bool lastmatchnonzero;
char *mb_indices = NULL;
-
+
if ((flags & GENSUB) != 0) {
double d;
- NODE *t1;
+ NODE *glob_flag;
tmp = PEEK(3);
rp = re_update(tmp);
- t = POP_STRING(); /* original string */
-
- t1 = POP_SCALAR(); /* value of global flag */
- if ((t1->flags & (STRCUR|STRING)) != 0) {
- if (t1->stlen > 0 && (t1->stptr[0] == 'g' || t1->stptr[0] == 'G'))
- how_many = -1;
- else {
- (void) force_number(t1);
- d = get_number_d(t1);
- if ((t1->flags & NUMCUR) != 0)
- goto set_how_many;
+ target = POP_STRING(); /* original string */
- warning(_("gensub: third argument `%.*s' treated as 1"),
- (int) t1->stlen, t1->stptr);
- how_many = 1;
- }
- } else {
- (void) force_number(t1);
- d = get_number_d(t1);
-set_how_many:
+ glob_flag = POP_SCALAR(); /* value of global flag */
+ if ( (glob_flag->flags & STRING) != 0
+ && glob_flag->stlen > 0
+ && (glob_flag->stptr[0] == 'g' || glob_flag->stptr[0] == 'G'))
+ how_many = -1;
+ else {
+ (void) force_number(glob_flag);
+ d = get_number_d(glob_flag);
if (d < 1)
how_many = 1;
else if (d < LONG_MAX)
how_many = d;
else
how_many = LONG_MAX;
- if (d <= 0)
- warning(_("gensub: third argument %g treated as 1"), d);
+ if (d <= 0) {
+ (void) force_string(glob_flag);
+ warning(_("gensub: third argument `%.*s' treated as 1"),
+ (int) glob_flag->stlen,
+ glob_flag->stptr);
+ }
}
- DEREF(t1);
-
+ DEREF(glob_flag);
} else {
-
/* take care of regexp early, in case re_update is fatal */
tmp = PEEK(2);
@@ -2812,30 +2888,30 @@ set_how_many:
/* original string */
if ((flags & LITERAL) != 0)
- t = POP_STRING();
+ target = POP_STRING();
else {
lhs = POP_ADDRESS();
- t = force_string(*lhs);
+ target = force_string(*lhs);
}
}
global = (how_many == -1);
- s = POP_STRING(); /* replacement text */
+ rep_node = POP_STRING(); /* replacement text */
decr_sp(); /* regexp, already updated above */
/* do the search early to avoid work on non-match */
- if (research(rp, t->stptr, 0, t->stlen, RE_NEED_START) == -1 ||
- RESTART(rp, t->stptr) > t->stlen)
+ if (research(rp, target->stptr, 0, target->stlen, RE_NEED_START) == -1 ||
+ RESTART(rp, target->stptr) > target->stlen)
goto done;
- t->flags |= STRING;
+ target->flags |= STRING;
- text = t->stptr;
- textlen = t->stlen;
+ text = target->stptr;
+ textlen = target->stlen;
- repl = s->stptr;
- replend = repl + s->stlen;
+ repl = rep_node->stptr;
+ replend = repl + rep_node->stlen;
repllen = replend - repl;
ampersands = 0;
@@ -2853,6 +2929,7 @@ set_how_many:
index_multibyte_buffer(repl, mb_indices, repllen);
}
+ /* compute length of replacement string, number of ampersands */
for (scan = repl; scan < replend; scan++) {
if ((gawk_mb_cur_max == 1 || (repllen > 0 && mb_indices[scan - repl] == 1))
&& (*scan == '&')) {
@@ -2899,24 +2976,32 @@ set_how_many:
lastmatchnonzero = false;
- /* guesstimate how much room to allocate; +2 forces > 0 */
- buflen = textlen + (ampersands + 1) * repllen + 2;
- emalloc(buf, char *, buflen + 2, "do_sub");
+ /* guesstimate how much room to allocate; +1 forces > 0 */
+ buflen = textlen + (ampersands + 1) * repllen + 1;
+ emalloc(buf, char *, buflen + 1, "do_sub");
buf[buflen] = '\0';
- buf[buflen + 1] = '\0';
bp = buf;
for (current = 1;; current++) {
matches++;
- matchstart = t->stptr + RESTART(rp, t->stptr);
- matchend = t->stptr + REEND(rp, t->stptr);
+ matchstart = target->stptr + RESTART(rp, target->stptr);
+ matchend = target->stptr + REEND(rp, target->stptr);
/*
* create the result, copying in parts of the original
- * string
+ * string. note that length of replacement string can
+ * vary since ampersand is actual text of regexp match.
*/
- len = matchstart - text + repllen
- + ampersands * (matchend - matchstart);
+
+ /*
+ * add 1 to len to handle "empty" case where
+ * matchend == matchstart and we force a match on a single
+ * char. Use 'matchend - text' instead of 'matchstart - text'
+ * because we may not actually make any substitution depending
+ * on the 'global' and 'how_many' values.
+ */
+ len = matchend - text + repllen
+ + ampersands * (matchend - matchstart) + 1;
sofar = bp - buf;
while (buflen < (sofar + len + 1)) {
buflen *= 2;
@@ -2963,13 +3048,13 @@ set_how_many:
if (flags & GENSUB) { /* gensub, behave sanely */
if (isdigit((unsigned char) scan[1])) {
int dig = scan[1] - '0';
- if (dig < NUMSUBPATS(rp, t->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) {
+ if (dig < NUMSUBPATS(rp, target->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) {
char *start, *end;
-
- start = t->stptr
- + SUBPATSTART(rp, t->stptr, dig);
- end = t->stptr
- + SUBPATEND(rp, t->stptr, dig);
+
+ start = target->stptr
+ + SUBPATSTART(rp, target->stptr, dig);
+ end = target->stptr
+ + SUBPATEND(rp, target->stptr, dig);
for (cp = start; cp < end; cp++)
*bp++ = *cp;
@@ -3025,19 +3110,29 @@ set_how_many:
textlen = text + textlen - matchend;
text = matchend;
+#if 0
+ if (bp - buf > sofar + len)
+ fprintf(stderr, "debug: len = %zu, but used %ld\n", len, (long)((bp - buf) - (long)sofar));
+#endif
+
if ((current >= how_many && ! global)
|| ((long) textlen <= 0 && matchstart == matchend)
- || research(rp, t->stptr, text - t->stptr, textlen, RE_NEED_START) == -1)
+ || research(rp, target->stptr, text - target->stptr, textlen, RE_NEED_START) == -1)
break;
}
sofar = bp - buf;
- if (buflen - sofar - textlen - 1) {
- buflen = sofar + textlen + 2;
+ if (buflen < (sofar + textlen + 1)) {
+ buflen = sofar + textlen + 1;
erealloc(buf, char *, buflen, "do_sub");
bp = buf + sofar;
}
- for (scan = matchend; scan < text + textlen; scan++)
+ /*
+ * Note that text == matchend, since that assignment is made before
+ * exiting the 'for' loop above. Thus we copy in the rest of the
+ * original string.
+ */
+ for (scan = text; scan < text + textlen; scan++)
*bp++ = *scan;
*bp = '\0';
textlen = bp - buf;
@@ -3046,31 +3141,31 @@ set_how_many:
efree(mb_indices);
done:
- DEREF(s);
+ DEREF(rep_node);
if ((matches == 0 || (flags & LITERAL) != 0) && buf != NULL) {
- efree(buf);
+ efree(buf);
buf = NULL;
}
if (flags & GENSUB) {
if (matches > 0) {
/* return the result string */
- DEREF(t);
+ DEREF(target);
assert(buf != NULL);
- return make_str_node(buf, textlen, ALREADY_MALLOCED);
+ return make_str_node(buf, textlen, ALREADY_MALLOCED);
}
/* return the original string */
- return t;
+ return target;
}
/* For a string literal, must not change the original string. */
if ((flags & LITERAL) != 0)
- DEREF(t);
+ DEREF(target);
else if (matches > 0) {
unref(*lhs);
- *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED);
+ *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED);
}
return make_number((AWKNUM) matches);
@@ -3238,16 +3333,18 @@ do_lshift(int nargs)
POP_TWO_SCALARS(s1, s2);
if (do_lint) {
- if ((s1->flags & (NUMCUR|NUMBER)) == 0)
+ if ((fixtype(s1)->flags & NUMBER) == 0)
lintwarn(_("lshift: received non-numeric first argument"));
- if ((s2->flags & (NUMCUR|NUMBER)) == 0)
+ if ((fixtype(s2)->flags & NUMBER) == 0)
lintwarn(_("lshift: received non-numeric second argument"));
}
+
val = force_number(s1)->numbr;
shift = force_number(s2)->numbr;
+ if (val < 0 || shift < 0)
+ fatal(_("lshift(%f, %f): negative values are not allowed"), val, shift);
+
if (do_lint) {
- if (val < 0 || shift < 0)
- lintwarn(_("lshift(%f, %f): negative values will give strange results"), val, shift);
if (double_to_int(val) != val || double_to_int(shift) != shift)
lintwarn(_("lshift(%f, %f): fractional values will be truncated"), val, shift);
if (shift >= sizeof(uintmax_t) * CHAR_BIT)
@@ -3275,16 +3372,18 @@ do_rshift(int nargs)
POP_TWO_SCALARS(s1, s2);
if (do_lint) {
- if ((s1->flags & (NUMCUR|NUMBER)) == 0)
+ if ((fixtype(s1)->flags & NUMBER) == 0)
lintwarn(_("rshift: received non-numeric first argument"));
- if ((s2->flags & (NUMCUR|NUMBER)) == 0)
+ if ((fixtype(s2)->flags & NUMBER) == 0)
lintwarn(_("rshift: received non-numeric second argument"));
}
+
val = force_number(s1)->numbr;
shift = force_number(s2)->numbr;
+ if (val < 0 || shift < 0)
+ fatal(_("rshift(%f, %f): negative values are not allowed"), val, shift);
+
if (do_lint) {
- if (val < 0 || shift < 0)
- lintwarn(_("rshift(%f, %f): negative values will give strange results"), val, shift);
if (double_to_int(val) != val || double_to_int(shift) != shift)
lintwarn(_("rshift(%f, %f): fractional values will be truncated"), val, shift);
if (shift >= sizeof(uintmax_t) * CHAR_BIT)
@@ -3317,12 +3416,12 @@ do_and(int nargs)
for (i = 1; nargs > 0; nargs--, i++) {
s1 = POP_SCALAR();
- if (do_lint && (s1->flags & (NUMCUR|NUMBER)) == 0)
+ if (do_lint && (fixtype(s1)->flags & NUMBER) == 0)
lintwarn(_("and: argument %d is non-numeric"), i);
val = force_number(s1)->numbr;
- if (do_lint && val < 0)
- lintwarn(_("and: argument %d negative value %g will give strange results"), i, val);
+ if (val < 0)
+ fatal(_("and: argument %d negative value %g is not allowed"), i, val);
uval = (uintmax_t) val;
res &= uval;
@@ -3349,12 +3448,12 @@ do_or(int nargs)
for (i = 1; nargs > 0; nargs--, i++) {
s1 = POP_SCALAR();
- if (do_lint && (s1->flags & (NUMCUR|NUMBER)) == 0)
+ if (do_lint && (fixtype(s1)->flags & NUMBER) == 0)
lintwarn(_("or: argument %d is non-numeric"), i);
val = force_number(s1)->numbr;
- if (do_lint && val < 0)
- lintwarn(_("or: argument %d negative value %g will give strange results"), i, val);
+ if (val < 0)
+ fatal(_("or: argument %d negative value %g is not allowed"), i, val);
uval = (uintmax_t) val;
res |= uval;
@@ -3381,12 +3480,12 @@ do_xor(int nargs)
res = 0; /* silence compiler warning */
for (i = 1; nargs > 0; nargs--, i++) {
s1 = POP_SCALAR();
- if (do_lint && (s1->flags & (NUMCUR|NUMBER)) == 0)
+ if (do_lint && (fixtype(s1)->flags & NUMBER) == 0)
lintwarn(_("xor: argument %d is non-numeric"), i);
val = force_number(s1)->numbr;
- if (do_lint && val < 0)
- lintwarn(_("xor: argument %d negative value %g will give strange results"), i, val);
+ if (val < 0)
+ fatal(_("xor: argument %d negative value %g is not allowed"), i, val);
uval = (uintmax_t) val;
if (i == 1)
@@ -3410,17 +3509,16 @@ do_compl(int nargs)
uintmax_t uval;
tmp = POP_SCALAR();
- if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
+ if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
lintwarn(_("compl: received non-numeric argument"));
d = force_number(tmp)->numbr;
DEREF(tmp);
- if (do_lint) {
- if (d < 0)
- lintwarn(_("compl(%f): negative value will give strange results"), d);
- if (double_to_int(d) != d)
- lintwarn(_("compl(%f): fractional value will be truncated"), d);
- }
+ if (d < 0)
+ fatal(_("compl(%f): negative value is not allowed"), d);
+
+ if (do_lint && double_to_int(d) != d)
+ lintwarn(_("compl(%f): fractional value will be truncated"), d);
uval = (uintmax_t) d;
uval = ~ uval;
@@ -3435,11 +3533,11 @@ do_strtonum(int nargs)
NODE *tmp;
AWKNUM d;
- tmp = POP_SCALAR();
- if ((tmp->flags & (NUMBER|NUMCUR)) != 0)
- d = (AWKNUM) force_number(tmp)->numbr;
+ tmp = fixtype(POP_SCALAR());
+ if ((tmp->flags & NUMBER) != 0)
+ d = (AWKNUM) tmp->numbr;
else if (get_numbase(tmp->stptr, use_lc_numeric) != 10)
- d = nondec2awknum(tmp->stptr, tmp->stlen);
+ d = nondec2awknum(tmp->stptr, tmp->stlen, NULL);
else
d = (AWKNUM) force_number(tmp)->numbr;
@@ -3456,7 +3554,7 @@ do_strtonum(int nargs)
*/
AWKNUM
-nondec2awknum(char *str, size_t len)
+nondec2awknum(char *str, size_t len, char **endptr)
{
AWKNUM retval = 0.0;
char save;
@@ -3468,8 +3566,11 @@ nondec2awknum(char *str, size_t len)
* User called strtonum("0x") or some such,
* so just quit early.
*/
- if (len <= 2)
+ if (len <= 2) {
+ if (endptr)
+ *endptr = start;
return (AWKNUM) 0.0;
+ }
for (str += 2, len -= 2; len > 0; len--, str++) {
switch (*str) {
@@ -3502,14 +3603,21 @@ nondec2awknum(char *str, size_t len)
val = *str - 'A' + 10;
break;
default:
+ if (endptr)
+ *endptr = str;
goto done;
}
retval = (retval * 16) + val;
}
+ if (endptr)
+ *endptr = str;
} else if (*str == '0') {
for (; len > 0; len--) {
- if (! isdigit((unsigned char) *str))
+ if (! isdigit((unsigned char) *str)) {
+ if (endptr)
+ *endptr = str;
goto done;
+ }
else if (*str == '8' || *str == '9') {
str = start;
goto decimal;
@@ -3517,11 +3625,13 @@ nondec2awknum(char *str, size_t len)
retval = (retval * 8) + (*str - '0');
str++;
}
+ if (endptr)
+ *endptr = str;
} else {
decimal:
save = str[len];
str[len] = '\0';
- retval = strtod(str, NULL);
+ retval = strtod(str, endptr);
str[len] = save;
}
done:
@@ -3757,6 +3867,125 @@ do_bindtextdomain(int nargs)
return make_string(the_result, strlen(the_result));
}
+/* do_intdiv --- do integer division, return quotient and remainder in dest array */
+
+/*
+ * We define the semantics as:
+ * numerator = int(numerator)
+ * denominator = int(denonmator)
+ * quotient = int(numerator / denomator)
+ * remainder = int(numerator % denomator)
+ */
+
+NODE *
+do_intdiv(int nargs)
+{
+ NODE *numerator, *denominator, *result;
+ double num, denom, quotient, remainder;
+ NODE *sub, **lhs;
+
+ result = POP_PARAM();
+ if (result->type != Node_var_array)
+ fatal(_("intdiv: third argument is not an array"));
+ assoc_clear(result);
+
+ denominator = POP_SCALAR();
+ numerator = POP_SCALAR();
+
+ if (do_lint) {
+ if ((fixtype(numerator)->flags & NUMBER) == 0)
+ lintwarn(_("intdiv: received non-numeric first argument"));
+ if ((fixtype(denominator)->flags & NUMBER) == 0)
+ lintwarn(_("intdiv: received non-numeric second argument"));
+ }
+
+ (void) force_number(numerator);
+ (void) force_number(denominator);
+ num = double_to_int(get_number_d(numerator));
+ denom = double_to_int(get_number_d(denominator));
+
+ if (denom == 0.0)
+ fatal(_("intdiv: division by zero attempted"));
+
+ quotient = double_to_int(num / denom);
+ /*
+ * FIXME: This code is duplicated, factor it out to a
+ * separate function.
+ */
+#ifdef HAVE_FMOD
+ remainder = fmod(num, denom);
+#else /* ! HAVE_FMOD */
+ (void) modf(num / denom, & remainder);
+ remainder = num - remainder * denom;
+#endif /* ! HAVE_FMOD */
+ remainder = double_to_int(remainder);
+
+ sub = make_string("quotient", 8);
+ lhs = assoc_lookup(result, sub);
+ unref(*lhs);
+ *lhs = make_number((AWKNUM) quotient);
+
+ sub = make_string("remainder", 9);
+ lhs = assoc_lookup(result, sub);
+ unref(*lhs);
+ *lhs = make_number((AWKNUM) remainder);
+
+ return make_number((AWKNUM) 0.0);
+}
+
+/* do_typeof --- return a string with the type of the arg */
+
+NODE *
+do_typeof(int nargs)
+{
+ NODE *arg;
+ char *res = "unknown";
+ bool deref = true;
+
+ arg = POP();
+ switch (arg->type) {
+ case Node_var_array:
+ /* Node_var_array is never UPREF'ed */
+ res = "array";
+ deref = false;
+ break;
+ case Node_val:
+ case Node_var:
+ switch (arg->flags & (STRING|NUMBER|MAYBE_NUM)) {
+ case STRING:
+ res = "string";
+ break;
+ case NUMBER:
+ res = "number";
+ break;
+ case STRING|MAYBE_NUM:
+ res = "strnum";
+ break;
+ case NUMBER|STRING:
+ if (arg == Nnull_string) {
+ res = "unassigned";
+ break;
+ }
+ /* fall through */
+ default:
+ warning(_("typeof detected invalid flags combination `%s'; please file a bug report."), flags2str(arg->flags));
+ break;
+ }
+ break;
+ case Node_var_new:
+ res = "untyped";
+ deref = false;
+ break;
+ default:
+ fatal(_("typeof: unknown argument type `%s'"),
+ nodetype2str(arg->type));
+ break;
+ }
+
+ if (deref)
+ DEREF(arg);
+ return make_string(res, strlen(res));
+}
/* mbc_byte_count --- return number of bytes for corresponding numchars multibyte characters */
@@ -3813,3 +4042,24 @@ mbc_char_count(const char *ptr, size_t numbytes)
return sum;
}
+
+/* sanitize_exit_status --- convert a 16 bit Unix exit status into something reasonable */
+
+int sanitize_exit_status(int status)
+{
+ int ret = 0;
+
+ if (WIFEXITED(status))
+ ret = WEXITSTATUS(status); /* normal exit */
+ else if (WIFSIGNALED(status)) {
+ bool coredumped = false;
+#ifdef WCOREDUMP
+ coredumped = WCOREDUMP(status);
+#endif
+ /* use 256 since exit values are 8 bits */
+ ret = WTERMSIG(status) + (coredumped ? 512 : 256);
+ } else
+ ret = 0; /* shouldn't get here */
+
+ return ret;
+}