diff options
Diffstat (limited to 'builtin.c')
-rw-r--r-- | builtin.c | 351 |
1 files changed, 284 insertions, 67 deletions
@@ -129,10 +129,14 @@ wrerror: if (fp == stdout && errno == EPIPE) gawk_exit(EXIT_FATAL); + /* otherwise die verbosely */ - fatal(_("%s to \"%s\" failed (%s)"), from, - rp ? rp->value : _("standard output"), - errno ? strerror(errno) : _("reason unknown")); + if ((rp != NULL) ? is_non_fatal_redirect(rp->value) : is_non_fatal_std(fp)) + update_ERRNO_int(errno); + else + fatal(_("%s to \"%s\" failed (%s)"), from, + rp ? rp->value : _("standard output"), + errno ? strerror(errno) : _("reason unknown")); } /* do_exp --- exponential function */ @@ -477,6 +481,12 @@ do_isarray(int nargs) { NODE *tmp; int ret = 1; + static bool warned = false; + + if (do_lint && ! warned) { + warned = true; + lintwarn(_("`isarray' is deprecated. Use `typeof' instead")); + } tmp = POP(); if (tmp->type != Node_var_array) { @@ -1639,7 +1649,7 @@ do_printf(int nargs, int redirtype) FILE *fp = NULL; NODE *tmp; struct redirect *rp = NULL; - int errflg; /* not used, sigh */ + int errflg = 0; NODE *redir_exp = NULL; if (nargs == 0) { @@ -1650,7 +1660,7 @@ do_printf(int nargs, int redirtype) redir_exp = TOP(); if (redir_exp->type != Node_val) fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp)); - rp = redirect(redir_exp, redirtype, & errflg); + rp = redirect(redir_exp, redirtype, & errflg, true); DEREF(redir_exp); decr_sp(); } @@ -1663,9 +1673,13 @@ do_printf(int nargs, int redirtype) redir_exp = PEEK(nargs); if (redir_exp->type != Node_val) fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp)); - rp = redirect(redir_exp, redirtype, & errflg); + rp = redirect(redir_exp, redirtype, & errflg, true); if (rp != NULL) fp = rp->output.fp; + else if (errflg) { + update_ERRNO_int(errflg); + return; + } } else if (do_debug) /* only the debugger can change the default output */ fp = output_fp; else @@ -1839,7 +1853,7 @@ do_substr(int nargs) * way to do things. */ memset(& mbs, 0, sizeof(mbs)); - emalloc(substr, char *, (length * gawk_mb_cur_max) + 2, "do_substr"); + emalloc(substr, char *, (length * gawk_mb_cur_max) + 1, "do_substr"); wp = t1->wstptr + indx; for (cp = substr; length > 0; length--) { result = wcrtomb(cp, *wp, & mbs); @@ -2098,7 +2112,7 @@ void do_print(int nargs, int redirtype) { struct redirect *rp = NULL; - int errflg; /* not used, sigh */ + int errflg = 0; FILE *fp = NULL; int i; NODE *redir_exp = NULL; @@ -2110,9 +2124,13 @@ do_print(int nargs, int redirtype) redir_exp = PEEK(nargs); if (redir_exp->type != Node_val) fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp)); - rp = redirect(redir_exp, redirtype, & errflg); + rp = redirect(redir_exp, redirtype, & errflg, true); if (rp != NULL) fp = rp->output.fp; + else if (errflg) { + update_ERRNO_int(errflg); + return; + } } else if (do_debug) /* only the debugger can change the default output */ fp = output_fp; else @@ -2126,7 +2144,9 @@ do_print(int nargs, int redirtype) fatal(_("attempt to use array `%s' in a scalar context"), array_vname(tmp)); } - if ((tmp->flags & (NUMBER|STRING)) == NUMBER) { + if (tmp->type == Node_typedregex) + args_array[i] = force_string(tmp); + else if ((tmp->flags & (NUMBER|STRING)) == NUMBER) { if (OFMTidx == CONVFMTidx) args_array[i] = force_string(tmp); else @@ -2168,13 +2188,13 @@ do_print_rec(int nargs, int redirtype) FILE *fp = NULL; NODE *f0; struct redirect *rp = NULL; - int errflg; /* not used, sigh */ + int errflg = 0; NODE *redir_exp = NULL; assert(nargs == 0); if (redirtype != 0) { redir_exp = TOP(); - rp = redirect(redir_exp, redirtype, & errflg); + rp = redirect(redir_exp, redirtype, & errflg, true); if (rp != NULL) fp = rp->output.fp; DEREF(redir_exp); @@ -2182,6 +2202,11 @@ do_print_rec(int nargs, int redirtype) } else fp = output_fp; + if (errflg) { + update_ERRNO_int(errflg); + return; + } + if (fp == NULL) return; @@ -2395,6 +2420,8 @@ static char *const state = (char *const) istate; NODE * do_rand(int nargs ATTRIBUTE_UNUSED) { + double tmprand; +#define RAND_DIVISOR ((double)GAWK_RANDOM_MAX+1.0) if (firstrand) { (void) initstate((unsigned) 1, state, SIZEOF_STATE); /* don't need to srandom(1), initstate() does it for us. */ @@ -2406,7 +2433,66 @@ do_rand(int nargs ATTRIBUTE_UNUSED) * * 0 <= n < 1 */ - return make_number((AWKNUM) (random() % GAWK_RANDOM_MAX) / GAWK_RANDOM_MAX); + /* + * Date: Wed, 28 Aug 2013 17:52:46 -0700 + * From: Bob Jewett <jewett@bill.scs.agilent.com> + * + * Call random() twice to fill in more bits in the value + * of the double. Also, there is a bug in random() such + * that when the values of successive values are combined + * like (rand1*rand2)^2, (rand3*rand4)^2, ... the + * resulting time series is not white noise. The + * following also seems to fix that bug. + * + * The add/subtract 0.5 keeps small bits from filling + * below 2^-53 in the double, not that anyone should be + * looking down there. + * + * Date: Wed, 25 Sep 2013 10:45:38 -0600 (MDT) + * From: "Nelson H. F. Beebe" <beebe@math.utah.edu> + * (4) The code is typical of many published fragments for converting + * from integer to floating-point, and I discuss the serious pitfalls + * in my book, because it leads to platform-dependent behavior at the + * end points of the interval [0,1] + * + * (5) the documentation in the gawk info node says + * + * `rand()' + * Return a random number. The values of `rand()' are uniformly + * distributed between zero and one. The value could be zero but is + * never one.(1) + * + * The division by RAND_DIVISOR may not guarantee that 1.0 is never + * returned: the programmer forgot the platform-dependent issue of + * rounding. + * + * For points 4 and 5, the safe way is a loop: + * + * double + * rand(void) // return value in [0.0, 1.0) + * { + * value = internal_rand(); + * + * while (value == 1.0) + * value = internal_rand(); + * + * return (value); + * } + */ + + do { + long d1, d2; + /* + * Do the calls in predictable order to avoid + * compiler differences in order of evaluation. + */ + d1 = random(); + d2 = random(); + tmprand = 0.5 + ( (d1/RAND_DIVISOR + d2) / RAND_DIVISOR ); + tmprand -= 0.5; + } while (tmprand == 1.0); + + return make_number((AWKNUM) tmprand); } /* do_srand --- seed the random number generator */ @@ -2519,7 +2605,7 @@ do_match(int nargs) sprintf(buff, "%d", ii); ilen = strlen(buff); - amt = ilen + subseplen + strlen("length") + 2; + amt = ilen + subseplen + strlen("length") + 1; if (oldamt == 0) { emalloc(buf, char *, amt, "do_match"); @@ -2693,42 +2779,42 @@ do_sub(int nargs, unsigned int flags) int ampersands; int matches = 0; Regexp *rp; - NODE *s; /* subst. pattern */ - NODE *t; /* string to make sub. in; $0 if none given */ + NODE *rep_node; /* replacement text */ + NODE *target; /* string to make sub. in; $0 if none given */ NODE *tmp; NODE **lhs = NULL; long how_many = 1; /* one substitution for sub, also gensub default */ - int global; + bool global; long current; bool lastmatchnonzero; char *mb_indices = NULL; if ((flags & GENSUB) != 0) { double d; - NODE *t1; + NODE *glob_flag; tmp = PEEK(3); rp = re_update(tmp); - t = POP_STRING(); /* original string */ + target = POP_STRING(); /* original string */ - t1 = POP_SCALAR(); /* value of global flag */ - if ((t1->flags & (STRCUR|STRING)) != 0) { - if (t1->stlen > 0 && (t1->stptr[0] == 'g' || t1->stptr[0] == 'G')) + glob_flag = POP_SCALAR(); /* value of global flag */ + if ((glob_flag->flags & (STRCUR|STRING)) != 0) { + if (glob_flag->stlen > 0 && (glob_flag->stptr[0] == 'g' || glob_flag->stptr[0] == 'G')) how_many = -1; else { - (void) force_number(t1); - d = get_number_d(t1); - if ((t1->flags & NUMCUR) != 0) + (void) force_number(glob_flag); + d = get_number_d(glob_flag); + if ((glob_flag->flags & NUMCUR) != 0) goto set_how_many; warning(_("gensub: third argument `%.*s' treated as 1"), - (int) t1->stlen, t1->stptr); + (int) glob_flag->stlen, glob_flag->stptr); how_many = 1; } } else { - (void) force_number(t1); - d = get_number_d(t1); + (void) force_number(glob_flag); + d = get_number_d(glob_flag); set_how_many: if (d < 1) how_many = 1; @@ -2739,10 +2825,8 @@ set_how_many: if (d <= 0) warning(_("gensub: third argument %g treated as 1"), d); } - DEREF(t1); - + DEREF(glob_flag); } else { - /* take care of regexp early, in case re_update is fatal */ tmp = PEEK(2); @@ -2754,30 +2838,30 @@ set_how_many: /* original string */ if ((flags & LITERAL) != 0) - t = POP_STRING(); + target = POP_STRING(); else { lhs = POP_ADDRESS(); - t = force_string(*lhs); + target = force_string(*lhs); } } global = (how_many == -1); - s = POP_STRING(); /* replacement text */ + rep_node = POP_STRING(); /* replacement text */ decr_sp(); /* regexp, already updated above */ /* do the search early to avoid work on non-match */ - if (research(rp, t->stptr, 0, t->stlen, RE_NEED_START) == -1 || - RESTART(rp, t->stptr) > t->stlen) + if (research(rp, target->stptr, 0, target->stlen, RE_NEED_START) == -1 || + RESTART(rp, target->stptr) > target->stlen) goto done; - t->flags |= STRING; + target->flags |= STRING; - text = t->stptr; - textlen = t->stlen; + text = target->stptr; + textlen = target->stlen; - repl = s->stptr; - replend = repl + s->stlen; + repl = rep_node->stptr; + replend = repl + rep_node->stlen; repllen = replend - repl; ampersands = 0; @@ -2795,6 +2879,7 @@ set_how_many: index_multibyte_buffer(repl, mb_indices, repllen); } + /* compute length of replacement string, number of ampersands */ for (scan = repl; scan < replend; scan++) { if ((gawk_mb_cur_max == 1 || (repllen > 0 && mb_indices[scan - repl] == 1)) && (*scan == '&')) { @@ -2839,24 +2924,32 @@ set_how_many: lastmatchnonzero = false; - /* guesstimate how much room to allocate; +2 forces > 0 */ - buflen = textlen + (ampersands + 1) * repllen + 2; - emalloc(buf, char *, buflen + 2, "do_sub"); + /* guesstimate how much room to allocate; +1 forces > 0 */ + buflen = textlen + (ampersands + 1) * repllen + 1; + emalloc(buf, char *, buflen + 1, "do_sub"); buf[buflen] = '\0'; - buf[buflen + 1] = '\0'; bp = buf; for (current = 1;; current++) { matches++; - matchstart = t->stptr + RESTART(rp, t->stptr); - matchend = t->stptr + REEND(rp, t->stptr); + matchstart = target->stptr + RESTART(rp, target->stptr); + matchend = target->stptr + REEND(rp, target->stptr); /* * create the result, copying in parts of the original - * string + * string. note that length of replacement string can + * vary since ampersand is actual text of regexp match. */ - len = matchstart - text + repllen - + ampersands * (matchend - matchstart); + + /* + * add 1 to len to handle "empty" case where + * matchend == matchstart and we force a match on a single + * char. Use 'matchend - text' instead of 'matchstart - text' + * because we may not actually make any substitution depending + * on the 'global' and 'how_many' values. + */ + len = matchend - text + repllen + + ampersands * (matchend - matchstart) + 1; sofar = bp - buf; while (buflen < (sofar + len + 1)) { buflen *= 2; @@ -2903,13 +2996,13 @@ set_how_many: if (flags & GENSUB) { /* gensub, behave sanely */ if (isdigit((unsigned char) scan[1])) { int dig = scan[1] - '0'; - if (dig < NUMSUBPATS(rp, t->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) { + if (dig < NUMSUBPATS(rp, target->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) { char *start, *end; - start = t->stptr - + SUBPATSTART(rp, t->stptr, dig); - end = t->stptr - + SUBPATEND(rp, t->stptr, dig); + start = target->stptr + + SUBPATSTART(rp, target->stptr, dig); + end = target->stptr + + SUBPATEND(rp, target->stptr, dig); for (cp = start; cp < end; cp++) *bp++ = *cp; @@ -2963,19 +3056,29 @@ set_how_many: textlen = text + textlen - matchend; text = matchend; +#if 0 + if (bp - buf > sofar + len) + fprintf(stderr, "debug: len = %zu, but used %ld\n", len, (long)((bp - buf) - (long)sofar)); +#endif + if ((current >= how_many && ! global) || ((long) textlen <= 0 && matchstart == matchend) - || research(rp, t->stptr, text - t->stptr, textlen, RE_NEED_START) == -1) + || research(rp, target->stptr, text - target->stptr, textlen, RE_NEED_START) == -1) break; } sofar = bp - buf; - if (buflen - sofar - textlen - 1) { - buflen = sofar + textlen + 2; + if (buflen < (sofar + textlen + 1)) { + buflen = sofar + textlen + 1; erealloc(buf, char *, buflen, "do_sub"); bp = buf + sofar; } - for (scan = matchend; scan < text + textlen; scan++) + /* + * Note that text == matchend, since that assignment is made before + * exiting the 'for' loop above. Thus we copy in the rest of the + * original string. + */ + for (scan = text; scan < text + textlen; scan++) *bp++ = *scan; *bp = '\0'; textlen = bp - buf; @@ -2984,7 +3087,7 @@ set_how_many: efree(mb_indices); done: - DEREF(s); + DEREF(rep_node); if ((matches == 0 || (flags & LITERAL) != 0) && buf != NULL) { efree(buf); @@ -2994,18 +3097,18 @@ done: if (flags & GENSUB) { if (matches > 0) { /* return the result string */ - DEREF(t); + DEREF(target); assert(buf != NULL); return make_str_node(buf, textlen, ALREADY_MALLOCED); } /* return the original string */ - return t; + return target; } /* For a string literal, must not change the original string. */ if ((flags & LITERAL) != 0) - DEREF(t); + DEREF(target); else if (matches > 0) { unref(*lhs); *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED); @@ -3044,7 +3147,8 @@ call_sub(const char *name, int nargs) * push replace * push $0 */ - regex = make_regnode(Node_regex, regex); + if (regex->type != Node_typedregex) + regex = make_regnode(Node_regex, regex); PUSH(regex); PUSH(replace); lhs = r_get_field(zero, (Func_ptr *) 0, true); @@ -3068,7 +3172,8 @@ call_sub(const char *name, int nargs) * nargs++ * } */ - regex = make_regnode(Node_regex, regex); + if (regex->type != Node_typedregex) + regex = make_regnode(Node_regex, regex); PUSH(regex); PUSH(replace); PUSH(glob_flag); @@ -3105,7 +3210,8 @@ call_match(int nargs) /* Don't need to pop the string just to push it back ... */ - regex = make_regnode(Node_regex, regex); + if (regex->type != Node_typedregex) + regex = make_regnode(Node_regex, regex); PUSH(regex); if (array) @@ -3133,7 +3239,8 @@ call_split_func(const char *name, int nargs) if (nargs >= 3) { regex = POP_STRING(); - regex = make_regnode(Node_regex, regex); + if (regex->type != Node_typedregex) + regex = make_regnode(Node_regex, regex); } else { if (name[0] == 's') { regex = make_regnode(Node_regex, FS_node->var_value); @@ -3694,6 +3801,116 @@ do_bindtextdomain(int nargs) return make_string(the_result, strlen(the_result)); } +/* do_intdiv --- do integer division, return quotient and remainder in dest array */ + +/* + * We define the semantics as: + * numerator = int(numerator) + * denominator = int(denonmator) + * quotient = int(numerator / denomator) + * remainder = int(numerator % denomator) + */ + +NODE * +do_intdiv(int nargs) +{ + NODE *numerator, *denominator, *result; + double num, denom, quotient, remainder; + NODE *sub, **lhs; + + result = POP_PARAM(); + if (result->type != Node_var_array) + fatal(_("intdiv: third argument is not an array")); + assoc_clear(result); + + denominator = POP_SCALAR(); + numerator = POP_SCALAR(); + + if (do_lint) { + if ((numerator->flags & (NUMCUR|NUMBER)) == 0) + lintwarn(_("intdiv: received non-numeric first argument")); + if ((denominator->flags & (NUMCUR|NUMBER)) == 0) + lintwarn(_("intdiv: received non-numeric second argument")); + } + + (void) force_number(numerator); + (void) force_number(denominator); + num = double_to_int(get_number_d(numerator)); + denom = double_to_int(get_number_d(denominator)); + + if (denom == 0.0) + fatal(_("intdiv: division by zero attempted")); + + quotient = double_to_int(num / denom); + /* + * FIXME: This code is duplicated, factor it out to a + * separate function. + */ +#ifdef HAVE_FMOD + remainder = fmod(num, denom); +#else /* ! HAVE_FMOD */ + (void) modf(num / denom, & remainder); + remainder = num - remainder * denom; +#endif /* ! HAVE_FMOD */ + remainder = double_to_int(remainder); + + sub = make_string("quotient", 8); + lhs = assoc_lookup(result, sub); + unref(*lhs); + *lhs = make_number((AWKNUM) quotient); + + sub = make_string("remainder", 9); + lhs = assoc_lookup(result, sub); + unref(*lhs); + *lhs = make_number((AWKNUM) remainder); + + return make_number((AWKNUM) 0.0); +} + +/* do_typeof --- return a string with the type of the arg */ + +NODE * +do_typeof(int nargs) +{ + NODE *arg; + char *res = "unknown"; + bool deref = true; + + arg = POP(); + switch (arg->type) { + case Node_var_array: + /* Node_var_array is never UPREF'ed */ + res = "array"; + deref = false; + break; + case Node_typedregex: + res = "regexp"; + break; + case Node_val: + case Node_var: + if (arg == Nnull_string) + res = "unassigned"; + else if ((arg->flags & STRING) != 0) { + res = "string"; + if ((arg->flags & MAYBE_NUM) != 0) + res = "strnum"; + } else if ((arg->flags & NUMBER) != 0) + res = "number"; + break; + case Node_var_new: + res = "untyped"; + deref = false; + break; + default: + fatal(_("typeof: unknown argument type `%s'"), + nodetype2str(arg->type)); + break; + } + + if (deref) + DEREF(arg); + return make_string(res, strlen(res)); +} /* mbc_byte_count --- return number of bytes for corresponding numchars multibyte characters */ |