diff options
Diffstat (limited to 'builtin.c')
-rw-r--r-- | builtin.c | 628 |
1 files changed, 439 insertions, 189 deletions
@@ -2,22 +2,22 @@ * builtin.c - Builtin functions and various utility procedures. */ -/* +/* * Copyright (C) 1986, 1988, 1989, 1991-2016 the Free Software Foundation, Inc. - * + * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. - * + * * GAWK is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. - * + * * GAWK is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA @@ -133,10 +133,14 @@ wrerror: if (fp == stdout && errno == EPIPE) gawk_exit(EXIT_FATAL); + /* otherwise die verbosely */ - fatal(_("%s to \"%s\" failed (%s)"), from, - rp ? rp->value : _("standard output"), - errno ? strerror(errno) : _("reason unknown")); + if ((rp != NULL) ? is_non_fatal_redirect(rp->value) : is_non_fatal_std(fp)) + update_ERRNO_int(errno); + else + fatal(_("%s to \"%s\" failed (%s)"), from, + rp ? rp->value : _("standard output"), + errno ? strerror(errno) : _("reason unknown")); } /* do_exp --- exponential function */ @@ -148,7 +152,7 @@ do_exp(int nargs) double d, res; tmp = POP_SCALAR(); - if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) + if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0) lintwarn(_("exp: received non-numeric argument")); d = force_number(tmp)->numbr; DEREF(tmp); @@ -193,9 +197,9 @@ do_fflush(int nargs) /* * November, 2012. - * It turns out that circa 2002, when BWK + * It turns out that circa 2002, when BWK * added fflush() and fflush("") to his awk, he made both of - * them flush everything. + * them flush everything. * * Now, with our inside agent getting ready to try to get fflush() * standardized in POSIX, we are going to make our awk consistent @@ -354,9 +358,9 @@ do_index(int nargs) POP_TWO_SCALARS(s1, s2); if (do_lint) { - if ((s1->flags & (STRING|STRCUR)) == 0) + if ((fixtype(s1)->flags & STRING) == 0) lintwarn(_("index: received non-string first argument")); - if ((s2->flags & (STRING|STRCUR)) == 0) + if ((fixtype(s2)->flags & STRING) == 0) lintwarn(_("index: received non-string second argument")); } @@ -386,7 +390,7 @@ do_index(int nargs) * If we don't have valid wide character strings, use * the real bytes. */ - do_single_byte = ((s1->wstlen == 0 && s1->stlen > 0) + do_single_byte = ((s1->wstlen == 0 && s1->stlen > 0) || (s2->wstlen == 0 && s2->stlen > 0)); } @@ -469,7 +473,7 @@ do_int(int nargs) double d; tmp = POP_SCALAR(); - if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) + if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0) lintwarn(_("int: received non-numeric argument")); d = force_number(tmp)->numbr; d = double_to_int(d); @@ -484,6 +488,12 @@ do_isarray(int nargs) { NODE *tmp; int ret = 1; + static bool warned = false; + + if (do_lint && ! warned) { + warned = true; + lintwarn(_("`isarray' is deprecated. Use `typeof' instead")); + } tmp = POP(); if (tmp->type != Node_var_array) { @@ -515,7 +525,7 @@ do_length(int nargs) /* * Support for deferred loading of array elements requires that - * we use the array length interface even though it isn't + * we use the array length interface even though it isn't * necessary for the built-in array types. * * 1/2015: The deferred arrays are gone, but this is probably @@ -528,7 +538,7 @@ do_length(int nargs) assert(tmp->type == Node_val); - if (do_lint && (tmp->flags & (STRING|STRCUR)) == 0) + if (do_lint && (fixtype(tmp)->flags & STRING) == 0) lintwarn(_("length: received non-string argument")); tmp = force_string(tmp); @@ -557,7 +567,7 @@ do_log(int nargs) double d, arg; tmp = POP_SCALAR(); - if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) + if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0) lintwarn(_("log: received non-numeric argument")); arg = force_number(tmp)->numbr; if (arg < 0.0) @@ -716,7 +726,7 @@ format_tree( emalloc(obuf, char *, INITIAL_OUT_SIZE, "format_tree"); obufout = obuf; osiz = INITIAL_OUT_SIZE; - ofre = osiz - 2; + ofre = osiz - 1; cur_arg = 1; @@ -956,7 +966,7 @@ check_pos: case ' ': /* print ' ' or '-' */ /* 'space' flag is ignored */ /* if '+' already present */ - if (signchar != false) + if (signchar != false) goto check_pos; /* FALL THROUGH */ case '+': /* print '+' or '-' */ @@ -982,18 +992,18 @@ check_pos: alt = true; goto check_pos; case '\'': -#if defined(HAVE_LOCALE_H) +#if defined(HAVE_LOCALE_H) quote_flag = true; goto check_pos; #else - goto retry; + goto retry; #endif case 'l': if (big_flag) break; else { static bool warned = false; - + if (do_lint && ! warned) { lintwarn(_("`l' is meaningless in awk formats; ignored")); warned = true; @@ -1010,7 +1020,7 @@ check_pos: break; else { static bool warned = false; - + if (do_lint && ! warned) { lintwarn(_("`L' is meaningless in awk formats; ignored")); warned = true; @@ -1027,7 +1037,7 @@ check_pos: break; else { static bool warned = false; - + if (do_lint && ! warned) { lintwarn(_("`h' is meaningless in awk formats; ignored")); warned = true; @@ -1043,8 +1053,7 @@ check_pos: need_format = false; parse_next_arg(); /* user input that looks numeric is numeric */ - if ((arg->flags & (MAYBE_NUM|NUMBER)) == MAYBE_NUM) - (void) force_number(arg); + fixtype(arg); if ((arg->flags & NUMBER) != 0) { uval = get_number_uj(arg); if (gawk_mb_cur_max > 1) { @@ -1214,7 +1223,7 @@ out0: jj = 0; /* keep using current val in loc.grouping[ii] */ else if (loc.grouping[ii+1] == CHAR_MAX) quote_flag = false; - else { + else { ii++; jj = 0; } @@ -1329,7 +1338,7 @@ mpf1: zero_flag = (! lj && ((zero_flag && ! have_prec) || (fw == 0 && have_prec))); - + (void) mpfr_get_z(mpzval, mf, MPFR_RNDZ); /* convert to GMP integer */ fmt_type = have_prec ? MP_INT_WITH_PREC : MP_INT_WITHOUT_PREC; zi = mpzval; @@ -1394,11 +1403,11 @@ mpf1: PREPEND(ts[k]); } } - if (loc.grouping[ii+1] == 0) + if (loc.grouping[ii+1] == 0) jj = 0; /* keep using current val in loc.grouping[ii] */ - else if (loc.grouping[ii+1] == CHAR_MAX) + else if (loc.grouping[ii+1] == CHAR_MAX) quote_flag = false; - else { + else { ii++; jj = 0; } @@ -1578,7 +1587,7 @@ mpf1: bchunk(s0, s1 - s0); olen_final = obufout - obuf; if (ofre > 0) - erealloc(obuf, char *, olen_final + 2, "format_tree"); + erealloc(obuf, char *, olen_final + 1, "format_tree"); r = make_str_node(obuf, olen_final, ALREADY_MALLOCED); obuf = NULL; out: @@ -1649,7 +1658,7 @@ do_printf(int nargs, int redirtype) FILE *fp = NULL; NODE *tmp; struct redirect *rp = NULL; - int errflg; /* not used, sigh */ + int errflg = 0; NODE *redir_exp = NULL; if (nargs == 0) { @@ -1660,7 +1669,7 @@ do_printf(int nargs, int redirtype) redir_exp = TOP(); if (redir_exp->type != Node_val) fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp)); - rp = redirect(redir_exp, redirtype, & errflg); + rp = redirect(redir_exp, redirtype, & errflg, true); DEREF(redir_exp); decr_sp(); } @@ -1673,14 +1682,22 @@ do_printf(int nargs, int redirtype) redir_exp = PEEK(nargs); if (redir_exp->type != Node_val) fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp)); - rp = redirect(redir_exp, redirtype, & errflg); + rp = redirect(redir_exp, redirtype, & errflg, true); if (rp != NULL) { if ((rp->flag & RED_TWOWAY) != 0 && rp->output.fp == NULL) { + if (is_non_fatal_redirect(redir_exp->stptr)) { + update_ERRNO_int(EBADF); + return; + } (void) close_rp(rp, CLOSE_ALL); fatal(_("printf: attempt to write to closed write end of two-way pipe")); } fp = rp->output.fp; } + else if (errflg) { + update_ERRNO_int(errflg); + return; + } } else if (do_debug) /* only the debugger can change the default output */ fp = output_fp; else @@ -1713,7 +1730,7 @@ do_sqrt(int nargs) double arg; tmp = POP_SCALAR(); - if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) + if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0) lintwarn(_("sqrt: received non-numeric argument")); arg = (double) force_number(tmp)->numbr; DEREF(tmp); @@ -1854,7 +1871,7 @@ do_substr(int nargs) * way to do things. */ memset(& mbs, 0, sizeof(mbs)); - emalloc(substr, char *, (length * gawk_mb_cur_max) + 2, "do_substr"); + emalloc(substr, char *, (length * gawk_mb_cur_max) + 1, "do_substr"); wp = t1->wstptr + indx; for (cp = substr; length > 0; length--) { result = wcrtomb(cp, *wp, & mbs); @@ -1903,7 +1920,7 @@ do_strftime(int nargs) unref(sub); if (val != NULL) { - if (do_lint && (val->flags & STRING) == 0) + if (do_lint && (fixtype(val)->flags & STRING) == 0) lintwarn(_("strftime: format value in PROCINFO[\"strftime\"] has numeric type")); val = force_string(val); format = val->stptr; @@ -1917,16 +1934,13 @@ do_strftime(int nargs) if (nargs == 3) { t3 = POP_SCALAR(); - if ((t3->flags & (NUMCUR|NUMBER)) != 0) - do_gmt = (t3->numbr != 0); - else - do_gmt = (t3->stlen > 0); + do_gmt = boolval(t3); DEREF(t3); } if (nargs >= 2) { t2 = POP_SCALAR(); - if (do_lint && (t2->flags & (NUMCUR|NUMBER)) == 0) + if (do_lint && (fixtype(t2)->flags & NUMBER) == 0) lintwarn(_("strftime: received non-numeric second argument")); (void) force_number(t2); clock_val = get_number_d(t2); @@ -1952,9 +1966,9 @@ do_strftime(int nargs) } tmp = POP_SCALAR(); - if (do_lint && (tmp->flags & (STRING|STRCUR)) == 0) + if (do_lint && (fixtype(tmp)->flags & STRING) == 0) lintwarn(_("strftime: received non-string first argument")); - + t1 = force_string(tmp); format = t1->stptr; formatlen = t1->stlen; @@ -2028,7 +2042,7 @@ do_mktime(int nargs) char save; t1 = POP_SCALAR(); - if (do_lint && (t1->flags & (STRING|STRCUR)) == 0) + if (do_lint && (fixtype(t1)->flags & STRING) == 0) lintwarn(_("mktime: received non-string argument")); t1 = force_string(t1); @@ -2086,7 +2100,7 @@ do_system(int nargs) (void) flush_io(); /* so output is synchronous with gawk's */ tmp = POP_SCALAR(); - if (do_lint && (tmp->flags & (STRING|STRCUR)) == 0) + if (do_lint && (fixtype(tmp)->flags & STRING) == 0) lintwarn(_("system: received non-string argument")); cmd = force_string(tmp)->stptr; @@ -2114,22 +2128,12 @@ do_system(int nargs) ; /* leave it alone, full 16 bits */ else if (do_traditional) #ifdef __MINGW32__ - ret = (((unsigned)status) & ~0xC0000000); + ret = (((unsigned)status) & ~0xC0000000); #else ret = (status / 256.0); #endif - else if (WIFEXITED(status)) - ret = WEXITSTATUS(status); /* normal exit */ - else if (WIFSIGNALED(status)) { - bool coredumped = false; -#ifdef WCOREDUMP - coredumped = WCOREDUMP(status); -#endif - /* use 256 since exit values are 8 bits */ - ret = WTERMSIG(status) + - (coredumped ? 512 : 256); - } else - ret = 0; /* shouldn't get here */ + else + ret = sanitize_exit_status(status); } if ((BINMODE & BINMODE_INPUT) != 0) @@ -2150,7 +2154,7 @@ void do_print(int nargs, int redirtype) { struct redirect *rp = NULL; - int errflg; /* not used, sigh */ + int errflg = 0; FILE *fp = NULL; int i; NODE *redir_exp = NULL; @@ -2162,14 +2166,22 @@ do_print(int nargs, int redirtype) redir_exp = PEEK(nargs); if (redir_exp->type != Node_val) fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp)); - rp = redirect(redir_exp, redirtype, & errflg); + rp = redirect(redir_exp, redirtype, & errflg, true); if (rp != NULL) { if ((rp->flag & RED_TWOWAY) != 0 && rp->output.fp == NULL) { + if (is_non_fatal_redirect(redir_exp->stptr)) { + update_ERRNO_int(EBADF); + return; + } (void) close_rp(rp, CLOSE_ALL); fatal(_("print: attempt to write to closed write end of two-way pipe")); } fp = rp->output.fp; } + else if (errflg) { + update_ERRNO_int(errflg); + return; + } } else if (do_debug) /* only the debugger can change the default output */ fp = output_fp; else @@ -2183,8 +2195,10 @@ do_print(int nargs, int redirtype) fatal(_("attempt to use array `%s' in a scalar context"), array_vname(tmp)); } - if ((tmp->flags & STRCUR) == 0 || (tmp->stfmt != -1 && tmp->stfmt != OFMTidx)) - args_array[i] = format_val(OFMT, OFMTidx, tmp); + if ( (tmp->flags & STRCUR) == 0 + || ( tmp->stfmt != STFMT_UNUSED + && tmp->stfmt != OFMTidx)) + args_array[i] = format_val(OFMT, OFMTidx, tmp); } if (redir_exp != NULL) { @@ -2215,21 +2229,25 @@ do_print(int nargs, int redirtype) /* do_print_rec --- special case printing of $0, for speed */ -void +void do_print_rec(int nargs, int redirtype) { FILE *fp = NULL; NODE *f0; struct redirect *rp = NULL; - int errflg; /* not used, sigh */ + int errflg = 0; NODE *redir_exp = NULL; assert(nargs == 0); if (redirtype != 0) { redir_exp = TOP(); - rp = redirect(redir_exp, redirtype, & errflg); + rp = redirect(redir_exp, redirtype, & errflg, true); if (rp != NULL) { if ((rp->flag & RED_TWOWAY) != 0 && rp->output.fp == NULL) { + if (is_non_fatal_redirect(redir_exp->stptr)) { + update_ERRNO_int(EBADF); + return; + } (void) close_rp(rp, CLOSE_ALL); fatal(_("print: attempt to write to closed write end of two-way pipe")); } @@ -2240,6 +2258,11 @@ do_print_rec(int nargs, int redirtype) } else fp = output_fp; + if (errflg) { + update_ERRNO_int(errflg); + return; + } + if (fp == NULL) return; @@ -2333,7 +2356,7 @@ do_tolower(int nargs) NODE *t1, *t2; t1 = POP_SCALAR(); - if (do_lint && (t1->flags & (STRING|STRCUR)) == 0) + if (do_lint && (fixtype(t1)->flags & STRING) == 0) lintwarn(_("tolower: received non-string argument")); t1 = force_string(t1); t2 = make_string(t1->stptr, t1->stlen); @@ -2364,7 +2387,7 @@ do_toupper(int nargs) NODE *t1, *t2; t1 = POP_SCALAR(); - if (do_lint && (t1->flags & (STRING|STRCUR)) == 0) + if (do_lint && (fixtype(t1)->flags & STRING) == 0) lintwarn(_("toupper: received non-string argument")); t1 = force_string(t1); t2 = make_string(t1->stptr, t1->stlen); @@ -2397,9 +2420,9 @@ do_atan2(int nargs) POP_TWO_SCALARS(t1, t2); if (do_lint) { - if ((t1->flags & (NUMCUR|NUMBER)) == 0) + if ((fixtype(t1)->flags & NUMBER) == 0) lintwarn(_("atan2: received non-numeric first argument")); - if ((t2->flags & (NUMCUR|NUMBER)) == 0) + if ((fixtype(t2)->flags & NUMBER) == 0) lintwarn(_("atan2: received non-numeric second argument")); } d1 = force_number(t1)->numbr; @@ -2418,7 +2441,7 @@ do_sin(int nargs) double d; tmp = POP_SCALAR(); - if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) + if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0) lintwarn(_("sin: received non-numeric argument")); d = sin((double) force_number(tmp)->numbr); DEREF(tmp); @@ -2434,7 +2457,7 @@ do_cos(int nargs) double d; tmp = POP_SCALAR(); - if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) + if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0) lintwarn(_("cos: received non-numeric argument")); d = cos((double) force_number(tmp)->numbr); DEREF(tmp); @@ -2453,6 +2476,8 @@ static char *const state = (char *const) istate; NODE * do_rand(int nargs ATTRIBUTE_UNUSED) { + double tmprand; +#define RAND_DIVISOR ((double)GAWK_RANDOM_MAX+1.0) if (firstrand) { (void) initstate((unsigned) 1, state, SIZEOF_STATE); /* don't need to srandom(1), initstate() does it for us. */ @@ -2464,7 +2489,66 @@ do_rand(int nargs ATTRIBUTE_UNUSED) * * 0 <= n < 1 */ - return make_number((AWKNUM) (random() % GAWK_RANDOM_MAX) / GAWK_RANDOM_MAX); + /* + * Date: Wed, 28 Aug 2013 17:52:46 -0700 + * From: Bob Jewett <jewett@bill.scs.agilent.com> + * + * Call random() twice to fill in more bits in the value + * of the double. Also, there is a bug in random() such + * that when the values of successive values are combined + * like (rand1*rand2)^2, (rand3*rand4)^2, ... the + * resulting time series is not white noise. The + * following also seems to fix that bug. + * + * The add/subtract 0.5 keeps small bits from filling + * below 2^-53 in the double, not that anyone should be + * looking down there. + * + * Date: Wed, 25 Sep 2013 10:45:38 -0600 (MDT) + * From: "Nelson H. F. Beebe" <beebe@math.utah.edu> + * (4) The code is typical of many published fragments for converting + * from integer to floating-point, and I discuss the serious pitfalls + * in my book, because it leads to platform-dependent behavior at the + * end points of the interval [0,1] + * + * (5) the documentation in the gawk info node says + * + * `rand()' + * Return a random number. The values of `rand()' are uniformly + * distributed between zero and one. The value could be zero but is + * never one.(1) + * + * The division by RAND_DIVISOR may not guarantee that 1.0 is never + * returned: the programmer forgot the platform-dependent issue of + * rounding. + * + * For points 4 and 5, the safe way is a loop: + * + * double + * rand(void) // return value in [0.0, 1.0) + * { + * value = internal_rand(); + * + * while (value == 1.0) + * value = internal_rand(); + * + * return (value); + * } + */ + + do { + long d1, d2; + /* + * Do the calls in predictable order to avoid + * compiler differences in order of evaluation. + */ + d1 = random(); + d2 = random(); + tmprand = 0.5 + ( (d1/RAND_DIVISOR + d2) / RAND_DIVISOR ); + tmprand -= 0.5; + } while (tmprand == 1.0); + + return make_number((AWKNUM) tmprand); } /* do_srand --- seed the random number generator */ @@ -2487,7 +2571,7 @@ do_srand(int nargs) srandom((unsigned int) (save_seed = (long) time((time_t *) 0))); else { tmp = POP_SCALAR(); - if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) + if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0) lintwarn(_("srand: received non-numeric argument")); srandom((unsigned int) (save_seed = (long) force_number(tmp)->numbr)); DEREF(tmp); @@ -2525,7 +2609,7 @@ do_match(int nargs) tre = POP(); rp = re_update(tre); t1 = POP_STRING(); - + rstart = research(rp, t1->stptr, 0, t1->stlen, RE_NEED_START); if (rstart >= 0) { /* match succeded */ size_t *wc_indices = NULL; @@ -2538,7 +2622,7 @@ do_match(int nargs) } rstart++; /* now it's 1-based indexing */ - + /* Build the array only if the caller wants the optional subpatterns */ if (dest != NULL) { subsepstr = SUBSEP_node->var_value->stptr; @@ -2554,7 +2638,7 @@ do_match(int nargs) size_t subpat_len; NODE **lhs; NODE *sub; - + start = t1->stptr + s; subpat_start = s; subpat_len = len = SUBPATEND(rp, t1->stptr, ii) - s; @@ -2562,7 +2646,7 @@ do_match(int nargs) subpat_start = wc_indices[s]; subpat_len = wc_indices[s + len - 1] - subpat_start + 1; } - + it = make_string(start, len); it->flags |= MAYBE_NUM; /* user input */ @@ -2577,8 +2661,8 @@ do_match(int nargs) sprintf(buff, "%d", ii); ilen = strlen(buff); - amt = ilen + subseplen + strlen("length") + 2; - + amt = ilen + subseplen + strlen("length") + 1; + if (oldamt == 0) { emalloc(buf, char *, amt, "do_match"); } else if (amt > oldamt) { @@ -2588,9 +2672,9 @@ do_match(int nargs) memcpy(buf, buff, ilen); memcpy(buf + ilen, subsepstr, subseplen); memcpy(buf + ilen + subseplen, "start", 6); - + slen = ilen + subseplen + 5; - + it = make_number((AWKNUM) subpat_start + 1); sub = make_string(buf, slen); lhs = assoc_lookup(dest, sub); @@ -2599,13 +2683,13 @@ do_match(int nargs) if (dest->astore != NULL) (*dest->astore)(dest, sub); unref(sub); - + memcpy(buf, buff, ilen); memcpy(buf + ilen, subsepstr, subseplen); memcpy(buf + ilen + subseplen, "length", 7); - + slen = ilen + subseplen + 6; - + it = make_number((AWKNUM) subpat_len); sub = make_string(buf, slen); lhs = assoc_lookup(dest, sub); @@ -2640,9 +2724,9 @@ do_match(int nargs) * Gsub can be tricksy; particularly when handling the case of null strings. * The following awk code was useful in debugging problems. It is too bad * that it does not readily translate directly into the C code, below. - * + * * #! /usr/local/bin/mawk -f - * + * * BEGIN { * true = 1; false = 0 * print "--->", mygsub("abc", "b+", "FOO") @@ -2652,7 +2736,7 @@ do_match(int nargs) * print "--->", mygsub("abc", "c+", "X") * print "--->", mygsub("abc", "x*$", "X") * } - * + * * function mygsub(str, regex, replace, origstr, newstr, eosflag, nonzeroflag) * { * origstr = str; @@ -2690,7 +2774,7 @@ do_match(int nargs) * } * if (length(str) > 0) * newstr = newstr str # rest of string - * + * * return newstr * } */ @@ -2702,7 +2786,7 @@ do_match(int nargs) * * The relevant text is to be found on lines 6394-6407 (pages 166, 167) of the * 2001 standard: - * + * * sub(ere, repl[, in ]) * Substitute the string repl in place of the first instance of the * extended regular expression ERE in string in and return the number of @@ -2751,56 +2835,48 @@ do_sub(int nargs, unsigned int flags) int ampersands; int matches = 0; Regexp *rp; - NODE *s; /* subst. pattern */ - NODE *t; /* string to make sub. in; $0 if none given */ + NODE *rep_node; /* replacement text */ + NODE *target; /* string to make sub. in; $0 if none given */ NODE *tmp; NODE **lhs = NULL; long how_many = 1; /* one substitution for sub, also gensub default */ - int global; + bool global; long current; bool lastmatchnonzero; char *mb_indices = NULL; - + if ((flags & GENSUB) != 0) { double d; - NODE *t1; + NODE *glob_flag; tmp = PEEK(3); rp = re_update(tmp); - t = POP_STRING(); /* original string */ - - t1 = POP_SCALAR(); /* value of global flag */ - if ((t1->flags & (STRCUR|STRING)) != 0) { - if (t1->stlen > 0 && (t1->stptr[0] == 'g' || t1->stptr[0] == 'G')) - how_many = -1; - else { - (void) force_number(t1); - d = get_number_d(t1); - if ((t1->flags & NUMCUR) != 0) - goto set_how_many; + target = POP_STRING(); /* original string */ - warning(_("gensub: third argument `%.*s' treated as 1"), - (int) t1->stlen, t1->stptr); - how_many = 1; - } - } else { - (void) force_number(t1); - d = get_number_d(t1); -set_how_many: + glob_flag = POP_SCALAR(); /* value of global flag */ + if ( (glob_flag->flags & STRING) != 0 + && glob_flag->stlen > 0 + && (glob_flag->stptr[0] == 'g' || glob_flag->stptr[0] == 'G')) + how_many = -1; + else { + (void) force_number(glob_flag); + d = get_number_d(glob_flag); if (d < 1) how_many = 1; else if (d < LONG_MAX) how_many = d; else how_many = LONG_MAX; - if (d <= 0) - warning(_("gensub: third argument %g treated as 1"), d); + if (d <= 0) { + (void) force_string(glob_flag); + warning(_("gensub: third argument `%.*s' treated as 1"), + (int) glob_flag->stlen, + glob_flag->stptr); + } } - DEREF(t1); - + DEREF(glob_flag); } else { - /* take care of regexp early, in case re_update is fatal */ tmp = PEEK(2); @@ -2812,30 +2888,30 @@ set_how_many: /* original string */ if ((flags & LITERAL) != 0) - t = POP_STRING(); + target = POP_STRING(); else { lhs = POP_ADDRESS(); - t = force_string(*lhs); + target = force_string(*lhs); } } global = (how_many == -1); - s = POP_STRING(); /* replacement text */ + rep_node = POP_STRING(); /* replacement text */ decr_sp(); /* regexp, already updated above */ /* do the search early to avoid work on non-match */ - if (research(rp, t->stptr, 0, t->stlen, RE_NEED_START) == -1 || - RESTART(rp, t->stptr) > t->stlen) + if (research(rp, target->stptr, 0, target->stlen, RE_NEED_START) == -1 || + RESTART(rp, target->stptr) > target->stlen) goto done; - t->flags |= STRING; + target->flags |= STRING; - text = t->stptr; - textlen = t->stlen; + text = target->stptr; + textlen = target->stlen; - repl = s->stptr; - replend = repl + s->stlen; + repl = rep_node->stptr; + replend = repl + rep_node->stlen; repllen = replend - repl; ampersands = 0; @@ -2853,6 +2929,7 @@ set_how_many: index_multibyte_buffer(repl, mb_indices, repllen); } + /* compute length of replacement string, number of ampersands */ for (scan = repl; scan < replend; scan++) { if ((gawk_mb_cur_max == 1 || (repllen > 0 && mb_indices[scan - repl] == 1)) && (*scan == '&')) { @@ -2899,24 +2976,32 @@ set_how_many: lastmatchnonzero = false; - /* guesstimate how much room to allocate; +2 forces > 0 */ - buflen = textlen + (ampersands + 1) * repllen + 2; - emalloc(buf, char *, buflen + 2, "do_sub"); + /* guesstimate how much room to allocate; +1 forces > 0 */ + buflen = textlen + (ampersands + 1) * repllen + 1; + emalloc(buf, char *, buflen + 1, "do_sub"); buf[buflen] = '\0'; - buf[buflen + 1] = '\0'; bp = buf; for (current = 1;; current++) { matches++; - matchstart = t->stptr + RESTART(rp, t->stptr); - matchend = t->stptr + REEND(rp, t->stptr); + matchstart = target->stptr + RESTART(rp, target->stptr); + matchend = target->stptr + REEND(rp, target->stptr); /* * create the result, copying in parts of the original - * string + * string. note that length of replacement string can + * vary since ampersand is actual text of regexp match. */ - len = matchstart - text + repllen - + ampersands * (matchend - matchstart); + + /* + * add 1 to len to handle "empty" case where + * matchend == matchstart and we force a match on a single + * char. Use 'matchend - text' instead of 'matchstart - text' + * because we may not actually make any substitution depending + * on the 'global' and 'how_many' values. + */ + len = matchend - text + repllen + + ampersands * (matchend - matchstart) + 1; sofar = bp - buf; while (buflen < (sofar + len + 1)) { buflen *= 2; @@ -2963,13 +3048,13 @@ set_how_many: if (flags & GENSUB) { /* gensub, behave sanely */ if (isdigit((unsigned char) scan[1])) { int dig = scan[1] - '0'; - if (dig < NUMSUBPATS(rp, t->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) { + if (dig < NUMSUBPATS(rp, target->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) { char *start, *end; - - start = t->stptr - + SUBPATSTART(rp, t->stptr, dig); - end = t->stptr - + SUBPATEND(rp, t->stptr, dig); + + start = target->stptr + + SUBPATSTART(rp, target->stptr, dig); + end = target->stptr + + SUBPATEND(rp, target->stptr, dig); for (cp = start; cp < end; cp++) *bp++ = *cp; @@ -3025,19 +3110,29 @@ set_how_many: textlen = text + textlen - matchend; text = matchend; +#if 0 + if (bp - buf > sofar + len) + fprintf(stderr, "debug: len = %zu, but used %ld\n", len, (long)((bp - buf) - (long)sofar)); +#endif + if ((current >= how_many && ! global) || ((long) textlen <= 0 && matchstart == matchend) - || research(rp, t->stptr, text - t->stptr, textlen, RE_NEED_START) == -1) + || research(rp, target->stptr, text - target->stptr, textlen, RE_NEED_START) == -1) break; } sofar = bp - buf; - if (buflen - sofar - textlen - 1) { - buflen = sofar + textlen + 2; + if (buflen < (sofar + textlen + 1)) { + buflen = sofar + textlen + 1; erealloc(buf, char *, buflen, "do_sub"); bp = buf + sofar; } - for (scan = matchend; scan < text + textlen; scan++) + /* + * Note that text == matchend, since that assignment is made before + * exiting the 'for' loop above. Thus we copy in the rest of the + * original string. + */ + for (scan = text; scan < text + textlen; scan++) *bp++ = *scan; *bp = '\0'; textlen = bp - buf; @@ -3046,31 +3141,31 @@ set_how_many: efree(mb_indices); done: - DEREF(s); + DEREF(rep_node); if ((matches == 0 || (flags & LITERAL) != 0) && buf != NULL) { - efree(buf); + efree(buf); buf = NULL; } if (flags & GENSUB) { if (matches > 0) { /* return the result string */ - DEREF(t); + DEREF(target); assert(buf != NULL); - return make_str_node(buf, textlen, ALREADY_MALLOCED); + return make_str_node(buf, textlen, ALREADY_MALLOCED); } /* return the original string */ - return t; + return target; } /* For a string literal, must not change the original string. */ if ((flags & LITERAL) != 0) - DEREF(t); + DEREF(target); else if (matches > 0) { unref(*lhs); - *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED); + *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED); } return make_number((AWKNUM) matches); @@ -3238,16 +3333,18 @@ do_lshift(int nargs) POP_TWO_SCALARS(s1, s2); if (do_lint) { - if ((s1->flags & (NUMCUR|NUMBER)) == 0) + if ((fixtype(s1)->flags & NUMBER) == 0) lintwarn(_("lshift: received non-numeric first argument")); - if ((s2->flags & (NUMCUR|NUMBER)) == 0) + if ((fixtype(s2)->flags & NUMBER) == 0) lintwarn(_("lshift: received non-numeric second argument")); } + val = force_number(s1)->numbr; shift = force_number(s2)->numbr; + if (val < 0 || shift < 0) + fatal(_("lshift(%f, %f): negative values are not allowed"), val, shift); + if (do_lint) { - if (val < 0 || shift < 0) - lintwarn(_("lshift(%f, %f): negative values will give strange results"), val, shift); if (double_to_int(val) != val || double_to_int(shift) != shift) lintwarn(_("lshift(%f, %f): fractional values will be truncated"), val, shift); if (shift >= sizeof(uintmax_t) * CHAR_BIT) @@ -3275,16 +3372,18 @@ do_rshift(int nargs) POP_TWO_SCALARS(s1, s2); if (do_lint) { - if ((s1->flags & (NUMCUR|NUMBER)) == 0) + if ((fixtype(s1)->flags & NUMBER) == 0) lintwarn(_("rshift: received non-numeric first argument")); - if ((s2->flags & (NUMCUR|NUMBER)) == 0) + if ((fixtype(s2)->flags & NUMBER) == 0) lintwarn(_("rshift: received non-numeric second argument")); } + val = force_number(s1)->numbr; shift = force_number(s2)->numbr; + if (val < 0 || shift < 0) + fatal(_("rshift(%f, %f): negative values are not allowed"), val, shift); + if (do_lint) { - if (val < 0 || shift < 0) - lintwarn(_("rshift(%f, %f): negative values will give strange results"), val, shift); if (double_to_int(val) != val || double_to_int(shift) != shift) lintwarn(_("rshift(%f, %f): fractional values will be truncated"), val, shift); if (shift >= sizeof(uintmax_t) * CHAR_BIT) @@ -3317,12 +3416,12 @@ do_and(int nargs) for (i = 1; nargs > 0; nargs--, i++) { s1 = POP_SCALAR(); - if (do_lint && (s1->flags & (NUMCUR|NUMBER)) == 0) + if (do_lint && (fixtype(s1)->flags & NUMBER) == 0) lintwarn(_("and: argument %d is non-numeric"), i); val = force_number(s1)->numbr; - if (do_lint && val < 0) - lintwarn(_("and: argument %d negative value %g will give strange results"), i, val); + if (val < 0) + fatal(_("and: argument %d negative value %g is not allowed"), i, val); uval = (uintmax_t) val; res &= uval; @@ -3349,12 +3448,12 @@ do_or(int nargs) for (i = 1; nargs > 0; nargs--, i++) { s1 = POP_SCALAR(); - if (do_lint && (s1->flags & (NUMCUR|NUMBER)) == 0) + if (do_lint && (fixtype(s1)->flags & NUMBER) == 0) lintwarn(_("or: argument %d is non-numeric"), i); val = force_number(s1)->numbr; - if (do_lint && val < 0) - lintwarn(_("or: argument %d negative value %g will give strange results"), i, val); + if (val < 0) + fatal(_("or: argument %d negative value %g is not allowed"), i, val); uval = (uintmax_t) val; res |= uval; @@ -3381,12 +3480,12 @@ do_xor(int nargs) res = 0; /* silence compiler warning */ for (i = 1; nargs > 0; nargs--, i++) { s1 = POP_SCALAR(); - if (do_lint && (s1->flags & (NUMCUR|NUMBER)) == 0) + if (do_lint && (fixtype(s1)->flags & NUMBER) == 0) lintwarn(_("xor: argument %d is non-numeric"), i); val = force_number(s1)->numbr; - if (do_lint && val < 0) - lintwarn(_("xor: argument %d negative value %g will give strange results"), i, val); + if (val < 0) + fatal(_("xor: argument %d negative value %g is not allowed"), i, val); uval = (uintmax_t) val; if (i == 1) @@ -3410,17 +3509,16 @@ do_compl(int nargs) uintmax_t uval; tmp = POP_SCALAR(); - if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) + if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0) lintwarn(_("compl: received non-numeric argument")); d = force_number(tmp)->numbr; DEREF(tmp); - if (do_lint) { - if (d < 0) - lintwarn(_("compl(%f): negative value will give strange results"), d); - if (double_to_int(d) != d) - lintwarn(_("compl(%f): fractional value will be truncated"), d); - } + if (d < 0) + fatal(_("compl(%f): negative value is not allowed"), d); + + if (do_lint && double_to_int(d) != d) + lintwarn(_("compl(%f): fractional value will be truncated"), d); uval = (uintmax_t) d; uval = ~ uval; @@ -3435,11 +3533,11 @@ do_strtonum(int nargs) NODE *tmp; AWKNUM d; - tmp = POP_SCALAR(); - if ((tmp->flags & (NUMBER|NUMCUR)) != 0) - d = (AWKNUM) force_number(tmp)->numbr; + tmp = fixtype(POP_SCALAR()); + if ((tmp->flags & NUMBER) != 0) + d = (AWKNUM) tmp->numbr; else if (get_numbase(tmp->stptr, use_lc_numeric) != 10) - d = nondec2awknum(tmp->stptr, tmp->stlen); + d = nondec2awknum(tmp->stptr, tmp->stlen, NULL); else d = (AWKNUM) force_number(tmp)->numbr; @@ -3456,7 +3554,7 @@ do_strtonum(int nargs) */ AWKNUM -nondec2awknum(char *str, size_t len) +nondec2awknum(char *str, size_t len, char **endptr) { AWKNUM retval = 0.0; char save; @@ -3468,8 +3566,11 @@ nondec2awknum(char *str, size_t len) * User called strtonum("0x") or some such, * so just quit early. */ - if (len <= 2) + if (len <= 2) { + if (endptr) + *endptr = start; return (AWKNUM) 0.0; + } for (str += 2, len -= 2; len > 0; len--, str++) { switch (*str) { @@ -3502,14 +3603,21 @@ nondec2awknum(char *str, size_t len) val = *str - 'A' + 10; break; default: + if (endptr) + *endptr = str; goto done; } retval = (retval * 16) + val; } + if (endptr) + *endptr = str; } else if (*str == '0') { for (; len > 0; len--) { - if (! isdigit((unsigned char) *str)) + if (! isdigit((unsigned char) *str)) { + if (endptr) + *endptr = str; goto done; + } else if (*str == '8' || *str == '9') { str = start; goto decimal; @@ -3517,11 +3625,13 @@ nondec2awknum(char *str, size_t len) retval = (retval * 8) + (*str - '0'); str++; } + if (endptr) + *endptr = str; } else { decimal: save = str[len]; str[len] = '\0'; - retval = strtod(str, NULL); + retval = strtod(str, endptr); str[len] = save; } done: @@ -3757,6 +3867,125 @@ do_bindtextdomain(int nargs) return make_string(the_result, strlen(the_result)); } +/* do_intdiv --- do integer division, return quotient and remainder in dest array */ + +/* + * We define the semantics as: + * numerator = int(numerator) + * denominator = int(denonmator) + * quotient = int(numerator / denomator) + * remainder = int(numerator % denomator) + */ + +NODE * +do_intdiv(int nargs) +{ + NODE *numerator, *denominator, *result; + double num, denom, quotient, remainder; + NODE *sub, **lhs; + + result = POP_PARAM(); + if (result->type != Node_var_array) + fatal(_("intdiv: third argument is not an array")); + assoc_clear(result); + + denominator = POP_SCALAR(); + numerator = POP_SCALAR(); + + if (do_lint) { + if ((fixtype(numerator)->flags & NUMBER) == 0) + lintwarn(_("intdiv: received non-numeric first argument")); + if ((fixtype(denominator)->flags & NUMBER) == 0) + lintwarn(_("intdiv: received non-numeric second argument")); + } + + (void) force_number(numerator); + (void) force_number(denominator); + num = double_to_int(get_number_d(numerator)); + denom = double_to_int(get_number_d(denominator)); + + if (denom == 0.0) + fatal(_("intdiv: division by zero attempted")); + + quotient = double_to_int(num / denom); + /* + * FIXME: This code is duplicated, factor it out to a + * separate function. + */ +#ifdef HAVE_FMOD + remainder = fmod(num, denom); +#else /* ! HAVE_FMOD */ + (void) modf(num / denom, & remainder); + remainder = num - remainder * denom; +#endif /* ! HAVE_FMOD */ + remainder = double_to_int(remainder); + + sub = make_string("quotient", 8); + lhs = assoc_lookup(result, sub); + unref(*lhs); + *lhs = make_number((AWKNUM) quotient); + + sub = make_string("remainder", 9); + lhs = assoc_lookup(result, sub); + unref(*lhs); + *lhs = make_number((AWKNUM) remainder); + + return make_number((AWKNUM) 0.0); +} + +/* do_typeof --- return a string with the type of the arg */ + +NODE * +do_typeof(int nargs) +{ + NODE *arg; + char *res = "unknown"; + bool deref = true; + + arg = POP(); + switch (arg->type) { + case Node_var_array: + /* Node_var_array is never UPREF'ed */ + res = "array"; + deref = false; + break; + case Node_val: + case Node_var: + switch (arg->flags & (STRING|NUMBER|MAYBE_NUM)) { + case STRING: + res = "string"; + break; + case NUMBER: + res = "number"; + break; + case STRING|MAYBE_NUM: + res = "strnum"; + break; + case NUMBER|STRING: + if (arg == Nnull_string) { + res = "unassigned"; + break; + } + /* fall through */ + default: + warning(_("typeof detected invalid flags combination `%s'; please file a bug report."), flags2str(arg->flags)); + break; + } + break; + case Node_var_new: + res = "untyped"; + deref = false; + break; + default: + fatal(_("typeof: unknown argument type `%s'"), + nodetype2str(arg->type)); + break; + } + + if (deref) + DEREF(arg); + return make_string(res, strlen(res)); +} /* mbc_byte_count --- return number of bytes for corresponding numchars multibyte characters */ @@ -3813,3 +4042,24 @@ mbc_char_count(const char *ptr, size_t numbytes) return sum; } + +/* sanitize_exit_status --- convert a 16 bit Unix exit status into something reasonable */ + +int sanitize_exit_status(int status) +{ + int ret = 0; + + if (WIFEXITED(status)) + ret = WEXITSTATUS(status); /* normal exit */ + else if (WIFSIGNALED(status)) { + bool coredumped = false; +#ifdef WCOREDUMP + coredumped = WCOREDUMP(status); +#endif + /* use 256 since exit values are 8 bits */ + ret = WTERMSIG(status) + (coredumped ? 512 : 256); + } else + ret = 0; /* shouldn't get here */ + + return ret; +} |