aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2011-04-29 10:35:31 +0300
committerArnold D. Robbins <arnold@skeeve.com>2011-04-29 10:35:31 +0300
commit70a498e0df48c71699797024fcb4c8154599ea5d (patch)
tree544ce76df279e47303dd1deb532986b655bd2853
parent04746bc5c0301fac55badc956225f486607dffd9 (diff)
downloadegawk-70a498e0df48c71699797024fcb4c8154599ea5d.tar.gz
egawk-70a498e0df48c71699797024fcb4c8154599ea5d.tar.bz2
egawk-70a498e0df48c71699797024fcb4c8154599ea5d.zip
Bug fixes from John and Pat.
-rw-r--r--ChangeLog16
-rw-r--r--Makefile.am1
-rw-r--r--Makefile.in1
-rw-r--r--array.c49
-rw-r--r--awkprintf.h1019
-rw-r--r--builtin.c1005
-rw-r--r--debug.c22
7 files changed, 1037 insertions, 1076 deletions
diff --git a/ChangeLog b/ChangeLog
index cad2f3a7..3295d46d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+Fri Apr 29 10:27:18 2011 Arnold D. Robbins <arnold@skeeve.com>
+
+ * array.c (awk_hash): Remove code for VAXC, it's no longer
+ needed. Per Pat Rankin.
+
+Fri Apr 29 10:15:24 2011 John Haque <j.eh@mchsi.com>
+
+ * builtin.c: Relocate all codes from awkprintf.h. Restore
+ format_tree.
+ * debug.c (do_print_f): Adjust appropriately. Install fatal trap
+ for format_tree.
+ * Makefile.am (base_sources): Remove awkprintf.h.
+
+ * array.c (assoc_list): Avoid possible crash; Remove unneeded
+ initialization of pre_func.
+
Wed Apr 27 22:31:23 2011 Arnold D. Robbins <arnold@skeeve.com>
* awk.h (ahash_dupnode): Merged into dupnode in node.c, change uses.
diff --git a/Makefile.am b/Makefile.am
index 71136366..c89f6423 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -83,7 +83,6 @@ base_sources = \
awk.h \
awkgram.y \
builtin.c \
- awkprintf.h \
custom.h \
dfa.c \
dfa.h \
diff --git a/Makefile.in b/Makefile.in
index f8b51282..6348d56b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -359,7 +359,6 @@ base_sources = \
awk.h \
awkgram.y \
builtin.c \
- awkprintf.h \
custom.h \
dfa.c \
dfa.h \
diff --git a/array.c b/array.c
index 3c960fbc..7f644ba7 100644
--- a/array.c
+++ b/array.c
@@ -333,17 +333,18 @@ static unsigned long
awk_hash(const char *s, size_t len, unsigned long hsize, size_t *code)
{
unsigned long h = 0;
+ unsigned long htmp;
/*
+ * Ozan Yigit's original sdbm hash, copied from Margo Seltzers
+ * db package.
+ *
* This is INCREDIBLY ugly, but fast. We break the string up into
* 8 byte units. On the first time through the loop we get the
* "leftover bytes" (strlen % 8). On every other iteration, we
* perform 8 HASHC's so we handle all 8 bytes. Essentially, this
* saves us 7 cmp & branch instructions. If this routine is
* heavily used enough, it's worth the ugly coding.
- *
- * Ozan Yigit's original sdbm hash, copied from Margo Seltzers
- * db package.
*/
/*
@@ -358,45 +359,11 @@ awk_hash(const char *s, size_t len, unsigned long hsize, size_t *code)
#define HASHC htmp = (h << 6); \
h = *s++ + htmp + (htmp << 10) - h ; \
htmp &= 0xFFFFFFFF; \
- h &= 0xFFFFFFFF;
-
- unsigned long htmp;
+ h &= 0xFFFFFFFF
h = 0;
-#if defined(VAXC)
- /*
- * This was an implementation of "Duff's Device", but it has been
- * redone, separating the switch for extra iterations from the
- * loop. This is necessary because the DEC VAX-C compiler is
- * STOOPID.
- */
- switch (len & (8 - 1)) {
- case 7: HASHC;
- case 6: HASHC;
- case 5: HASHC;
- case 4: HASHC;
- case 3: HASHC;
- case 2: HASHC;
- case 1: HASHC;
- default: break;
- }
-
- if (len > (8 - 1)) {
- size_t loop = len >> 3;
- do {
- HASHC;
- HASHC;
- HASHC;
- HASHC;
- HASHC;
- HASHC;
- HASHC;
- HASHC;
- } while (--loop);
- }
-#else /* ! VAXC */
- /* "Duff's Device" for those who can handle it */
+ /* "Duff's Device" */
if (len > 0) {
size_t loop = (len + 8 - 1) >> 3;
@@ -414,7 +381,7 @@ awk_hash(const char *s, size_t len, unsigned long hsize, size_t *code)
} while (--loop);
}
}
-#endif /* ! VAXC */
+
if (code != NULL)
*code = h;
@@ -1594,6 +1561,7 @@ assoc_list(NODE *array, NODE *sort_str, SORT_CTXT sort_ctxt)
fatal(_("sort comparison function `%s' is not defined"), sort_str->stptr);
cmp_func = sort_user_func;
+ /* pre_func is still NULL */
/* make function call instructions */
code = bcalloc(Op_func_call, 2, 0);
@@ -1626,7 +1594,6 @@ assoc_list(NODE *array, NODE *sort_str, SORT_CTXT sort_ctxt)
return list;
/* special pre-processing of list items */
- pre_func = sort_funcs[qi].pre_func;
if (pre_func)
pre_func(list, num_elems);
diff --git a/awkprintf.h b/awkprintf.h
deleted file mode 100644
index 47ee377d..00000000
--- a/awkprintf.h
+++ /dev/null
@@ -1,1019 +0,0 @@
-/*
- * awkprintf.h -- Formatting code for gawk, used in debug.c and builtin.c.
- */
-
-/*
- * Copyright (C) 1986, 1988, 1989, 1991-2011 the Free Software Foundation, Inc.
- *
- * This file is part of GAWK, the GNU implementation of the
- * AWK Programming Language.
- *
- * GAWK is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * GAWK is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
- */
-
-#include "floatmagic.h"
-
-#define DEFAULT_G_PRECISION 6
-
-#ifdef GFMT_WORKAROUND
-/* semi-temporary hack, mostly to gracefully handle VMS */
-static void sgfmt(char *buf, const char *format, int alt,
- int fwidth, int precision, double value);
-#endif /* GFMT_WORKAROUND */
-static size_t mbc_byte_count(const char *ptr, size_t numchars);
-static size_t mbc_char_count(const char *ptr, size_t numbytes);
-
-/*
- * r_format_arg() formats arguments of sprintf,
- * and accordingly to a fmt_string providing a format like in
- * printf family from C library. Returns a string node which value
- * is a formatted string. Called by sprintf function.
- *
- * It is one of the uglier parts of gawk. Thanks to Michal Jaegermann
- * for taming this beast and making it compatible with ANSI C.
- */
-
-r_format_arg(
- const char *fmt_string,
- size_t n0,
- NODE **the_args,
- long num_args)
-{
-/* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
-/* difference of pointers should be of ptrdiff_t type, but let us be kind */
-#define bchunk(s, l) if (l) { \
- while ((l) > ofre) { \
- size_t olen = obufout - obuf; \
- erealloc(obuf, char *, osiz * 2, "format_tree"); \
- ofre += osiz; \
- osiz *= 2; \
- obufout = obuf + olen; \
- } \
- memcpy(obufout, s, (size_t) (l)); \
- obufout += (l); \
- ofre -= (l); \
-}
-
-/* copy one byte from 's' to 'obufout' checking for space in the process */
-#define bchunk_one(s) { \
- if (ofre < 1) { \
- size_t olen = obufout - obuf; \
- erealloc(obuf, char *, osiz * 2, "format_tree"); \
- ofre += osiz; \
- osiz *= 2; \
- obufout = obuf + olen; \
- } \
- *obufout++ = *s; \
- --ofre; \
-}
-
-/* Is there space for something L big in the buffer? */
-#define chksize(l) if ((l) >= ofre) { \
- size_t olen = obufout - obuf; \
- size_t delta = osiz+l-ofre; \
- erealloc(obuf, char *, osiz + delta, "format_tree"); \
- obufout = obuf + olen; \
- ofre += delta; \
- osiz += delta; \
-}
-
- size_t cur_arg = 0;
- NODE *r = NULL;
- int i;
- int toofew = FALSE;
- char *obuf, *obufout;
- size_t osiz, ofre;
- const char *chbuf;
- const char *s0, *s1;
- int cs1;
- NODE *arg;
- long fw, prec, argnum;
- int used_dollar;
- int lj, alt, big, bigbig, small, have_prec, need_format;
- long *cur = NULL;
- uintmax_t uval;
- int sgn;
- int base = 0;
- /*
- * Although this is an array, the elements serve two different
- * purposes. The first element is the general buffer meant
- * to hold the entire result string. The second one is a
- * temporary buffer for large floating point values. They
- * could just as easily be separate variables, and the
- * code might arguably be clearer.
- */
- struct {
- char *buf;
- size_t bufsize;
- char stackbuf[30];
- } cpbufs[2];
-#define cpbuf cpbufs[0].buf
- char *cend = &cpbufs[0].stackbuf[sizeof(cpbufs[0].stackbuf)];
- char *cp;
- const char *fill;
- AWKNUM tmpval;
- char signchar = FALSE;
- size_t len;
- int zero_flag = FALSE;
- int quote_flag = FALSE;
- int ii, jj;
- char *chp;
- size_t copy_count, char_count;
- static const char sp[] = " ";
- static const char zero_string[] = "0";
- static const char lchbuf[] = "0123456789abcdef";
- static const char Uchbuf[] = "0123456789ABCDEF";
-
-#define INITIAL_OUT_SIZE 512
- emalloc(obuf, char *, INITIAL_OUT_SIZE, "format_tree");
- obufout = obuf;
- osiz = INITIAL_OUT_SIZE;
- ofre = osiz - 2;
-
- cur_arg = 1;
-
- {
- size_t k;
- for (k = 0; k < sizeof(cpbufs)/sizeof(cpbufs[0]); k++) {
- cpbufs[k].bufsize = sizeof(cpbufs[k].stackbuf);
- cpbufs[k].buf = cpbufs[k].stackbuf;
- }
- }
-
- /*
- * The point of this goop is to grow the buffer
- * holding the converted number, so that large
- * values don't overflow a fixed length buffer.
- */
-#define PREPEND(CH) do { \
- if (cp == cpbufs[0].buf) { \
- char *prev = cpbufs[0].buf; \
- emalloc(cpbufs[0].buf, char *, 2*cpbufs[0].bufsize, \
- "format_tree"); \
- memcpy((cp = cpbufs[0].buf+cpbufs[0].bufsize), prev, \
- cpbufs[0].bufsize); \
- cpbufs[0].bufsize *= 2; \
- if (prev != cpbufs[0].stackbuf) \
- efree(prev); \
- cend = cpbufs[0].buf+cpbufs[0].bufsize; \
- } \
- *--cp = (CH); \
-} while(0)
-
- /*
- * Check first for use of `count$'.
- * If plain argument retrieval was used earlier, choke.
- * Otherwise, return the requested argument.
- * If not `count$' now, but it was used earlier, choke.
- * If this format is more than total number of args, choke.
- * Otherwise, return the current argument.
- */
-#define parse_next_arg() { \
- if (argnum > 0) { \
- if (cur_arg > 1) { \
- fmt_msg(_("fatal: must use `count$' on all formats or none")); \
- goto out; \
- } \
- arg = the_args[argnum]; \
- } else if (used_dollar) { \
- fmt_msg(_("fatal: must use `count$' on all formats or none")); \
- arg = 0; /* shutup the compiler */ \
- goto out; \
- } else if (cur_arg >= num_args) { \
- arg = 0; /* shutup the compiler */ \
- toofew = TRUE; \
- break; \
- } else { \
- arg = the_args[cur_arg]; \
- cur_arg++; \
- } \
-}
-
- need_format = FALSE;
- used_dollar = FALSE;
-
- s0 = s1 = fmt_string;
- while (n0-- > 0) {
- if (*s1 != '%') {
- s1++;
- continue;
- }
- need_format = TRUE;
- bchunk(s0, s1 - s0);
- s0 = s1;
- cur = &fw;
- fw = 0;
- prec = 0;
- argnum = 0;
- have_prec = FALSE;
- signchar = FALSE;
- zero_flag = FALSE;
- quote_flag = FALSE;
- lj = alt = big = bigbig = small = FALSE;
- fill = sp;
- cp = cend;
- chbuf = lchbuf;
- s1++;
-
-retry:
- if (n0-- == 0) /* ran out early! */
- break;
-
- switch (cs1 = *s1++) {
- case (-1): /* dummy case to allow for checking */
-check_pos:
- if (cur != &fw)
- break; /* reject as a valid format */
- goto retry;
- case '%':
- need_format = FALSE;
- /*
- * 29 Oct. 2002:
- * The C99 standard pages 274 and 279 seem to imply that
- * since there's no arg converted, the field width doesn't
- * apply. The code already was that way, but this
- * comment documents it, at least in the code.
- */
- if (do_lint) {
- const char *msg = NULL;
-
- if (fw && ! have_prec)
- msg = _("field width is ignored for `%%' specifier");
- else if (fw == 0 && have_prec)
- msg = _("precision is ignored for `%%' specifier");
- else if (fw && have_prec)
- msg = _("field width and precision are ignored for `%%' specifier");
-
- if (msg != NULL)
- lintwarn("%s", msg);
- }
- bchunk_one("%");
- s0 = s1;
- break;
-
- case '0':
- /*
- * Only turn on zero_flag if we haven't seen
- * the field width or precision yet. Otherwise,
- * screws up floating point formatting.
- */
- if (cur == & fw)
- zero_flag = TRUE;
- if (lj)
- goto retry;
- /* FALL through */
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if (cur == NULL)
- break;
- if (prec >= 0)
- *cur = cs1 - '0';
- /*
- * with a negative precision *cur is already set
- * to -1, so it will remain negative, but we have
- * to "eat" precision digits in any case
- */
- while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
- --n0;
- *cur = *cur * 10 + *s1++ - '0';
- }
- if (prec < 0) /* negative precision is discarded */
- have_prec = FALSE;
- if (cur == &prec)
- cur = NULL;
- if (n0 == 0) /* badly formatted control string */
- continue;
- goto retry;
- case '$':
- if (do_traditional) {
- fmt_msg(_("fatal: `$' is not permitted in awk formats"));
- goto out;
- }
-
- if (cur == &fw) {
- argnum = fw;
- fw = 0;
- used_dollar = TRUE;
- if (argnum <= 0) {
- fmt_msg(_("fatal: arg count with `$' must be > 0"));
- goto out;
- }
- if (argnum >= num_args) {
- fmt_msg(_("fatal: arg count %ld greater than total number of supplied arguments"), argnum);
- goto out;
- }
- } else {
- fmt_msg(_("fatal: `$' not permitted after period in format"));
- goto out;
- }
-
- goto retry;
- case '*':
- if (cur == NULL)
- break;
- if (! do_traditional && isdigit((unsigned char) *s1)) {
- int val = 0;
-
- for (; n0 > 0 && *s1 && isdigit((unsigned char) *s1); s1++, n0--) {
- val *= 10;
- val += *s1 - '0';
- }
- if (*s1 != '$') {
- fmt_msg(_("fatal: no `$' supplied for positional field width or precision"));
- goto out;
- } else {
- s1++;
- n0--;
- }
- if (val >= num_args) {
- toofew = TRUE;
- break;
- }
- arg = the_args[val];
- } else {
- parse_next_arg();
- }
- *cur = force_number(arg);
- if (*cur < 0 && cur == &fw) {
- *cur = -*cur;
- lj++;
- }
- if (cur == &prec) {
- if (*cur >= 0)
- have_prec = TRUE;
- else
- have_prec = FALSE;
- cur = NULL;
- }
- goto retry;
- case ' ': /* print ' ' or '-' */
- /* 'space' flag is ignored */
- /* if '+' already present */
- if (signchar != FALSE)
- goto check_pos;
- /* FALL THROUGH */
- case '+': /* print '+' or '-' */
- signchar = cs1;
- goto check_pos;
- case '-':
- if (prec < 0)
- break;
- if (cur == &prec) {
- prec = -1;
- goto retry;
- }
- fill = sp; /* if left justified then other */
- lj++; /* filling is ignored */
- goto check_pos;
- case '.':
- if (cur != &fw)
- break;
- cur = &prec;
- have_prec = TRUE;
- goto retry;
- case '#':
- alt = TRUE;
- goto check_pos;
- case '\'':
-#if defined(HAVE_LOCALE_H)
- /* allow quote_flag if there is a thousands separator. */
- if (loc.thousands_sep[0] != '\0')
- quote_flag = TRUE;
- goto check_pos;
-#else
- goto retry;
-#endif
- case 'l':
- if (big)
- break;
- else {
- static short warned = FALSE;
-
- if (do_lint && ! warned) {
- lintwarn(_("`l' is meaningless in awk formats; ignored"));
- warned = TRUE;
- }
- if (do_posix) {
- fmt_msg(_("fatal: `l' is not permitted in POSIX awk formats"));
- goto out;
- }
- }
- big = TRUE;
- goto retry;
- case 'L':
- if (bigbig)
- break;
- else {
- static short warned = FALSE;
-
- if (do_lint && ! warned) {
- lintwarn(_("`L' is meaningless in awk formats; ignored"));
- warned = TRUE;
- }
- if (do_posix) {
- fmt_msg(_("fatal: `L' is not permitted in POSIX awk formats"));
- goto out;
- }
- }
- bigbig = TRUE;
- goto retry;
- case 'h':
- if (small)
- break;
- else {
- static short warned = FALSE;
-
- if (do_lint && ! warned) {
- lintwarn(_("`h' is meaningless in awk formats; ignored"));
- warned = TRUE;
- }
- if (do_posix) {
- fmt_msg(_("fatal: `h' is not permitted in POSIX awk formats"));
- goto out;
- }
- }
- small = TRUE;
- goto retry;
- case 'c':
- need_format = FALSE;
- parse_next_arg();
- /* user input that looks numeric is numeric */
- if ((arg->flags & (MAYBE_NUM|NUMBER)) == MAYBE_NUM)
- (void) force_number(arg);
- if (arg->flags & NUMBER) {
- uval = (uintmax_t) arg->numbr;
-#if MBS_SUPPORT
- if (gawk_mb_cur_max > 1) {
- char buf[100];
- wchar_t wc;
- mbstate_t mbs;
- size_t count;
-
- memset(& mbs, 0, sizeof(mbs));
- wc = uval;
-
- count = wcrtomb(buf, wc, & mbs);
- if (count == 0
- || count == (size_t)-1
- || count == (size_t)-2)
- goto out0;
-
- memcpy(cpbuf, buf, count);
- prec = count;
- cp = cpbuf;
- goto pr_tail;
- }
-out0:
- ;
- /* else,
- fall through */
-#endif
- if (do_lint && uval > 255) {
- lintwarn("[s]printf: value %g is too big for %%c format",
- arg->numbr);
- }
- cpbuf[0] = uval;
- prec = 1;
- cp = cpbuf;
- goto pr_tail;
- }
- /*
- * As per POSIX, only output first character of a
- * string value. Thus, we ignore any provided
- * precision, forcing it to 1. (Didn't this
- * used to work? 6/2003.)
- */
- cp = arg->stptr;
-#ifdef MBS_SUPPORT
- /*
- * First character can be multiple bytes if
- * it's a multibyte character. Grr.
- */
- if (gawk_mb_cur_max > 1) {
- mbstate_t state;
- size_t count;
-
- memset(& state, 0, sizeof(state));
- count = mbrlen(cp, arg->stlen, & state);
- if (count == 0
- || count == (size_t)-1
- || count == (size_t)-2)
- goto out2;
- prec = count;
- goto pr_tail;
- }
-out2:
- ;
-#endif
- prec = 1;
- goto pr_tail;
- case 's':
- need_format = FALSE;
- parse_next_arg();
- arg = force_string(arg);
- if (fw == 0 && ! have_prec)
- prec = arg->stlen;
- else {
- char_count = mbc_char_count(arg->stptr, arg->stlen);
- if (! have_prec || prec > char_count)
- prec = char_count;
- }
- cp = arg->stptr;
- goto pr_tail;
- case 'd':
- case 'i':
- need_format = FALSE;
- parse_next_arg();
- tmpval = force_number(arg);
- /*
- * Check for Nan or Inf.
- */
- if (isnan(tmpval) || isinf(tmpval))
- goto out_of_range;
- else
- tmpval = double_to_int(tmpval);
-
- /*
- * ``The result of converting a zero value with a
- * precision of zero is no characters.''
- */
- if (have_prec && prec == 0 && tmpval == 0)
- goto pr_tail;
-
- if (tmpval < 0) {
- tmpval = -tmpval;
- sgn = TRUE;
- } else {
- if (tmpval == -0.0)
- /* avoid printing -0 */
- tmpval = 0.0;
- sgn = FALSE;
- }
- /*
- * Use snprintf return value to tell if there
- * is enough room in the buffer or not.
- */
- while ((i = snprintf(cpbufs[1].buf,
- cpbufs[1].bufsize, "%.0f",
- tmpval)) >=
- cpbufs[1].bufsize) {
- if (cpbufs[1].buf == cpbufs[1].stackbuf)
- cpbufs[1].buf = NULL;
- if (i > 0) {
- cpbufs[1].bufsize += ((i > cpbufs[1].bufsize) ?
- i : cpbufs[1].bufsize);
- }
- else
- cpbufs[1].bufsize *= 2;
- assert(cpbufs[1].bufsize > 0);
- erealloc(cpbufs[1].buf, char *,
- cpbufs[1].bufsize, "format_tree");
- }
- if (i < 1)
- goto out_of_range;
- chp = &cpbufs[1].buf[i-1];
- ii = jj = 0;
- do {
- PREPEND(*chp);
- chp--; i--;
-#if defined(HAVE_LOCALE_H)
- if (quote_flag && loc.grouping[ii] && ++jj == loc.grouping[ii]) {
- if (i) /* only add if more digits coming */
- PREPEND(loc.thousands_sep[0]); /* XXX - assumption it's one char */
- if (loc.grouping[ii+1] == 0)
- jj = 0; /* keep using current val in loc.grouping[ii] */
- else if (loc.grouping[ii+1] == CHAR_MAX)
- quote_flag = FALSE;
- else {
- ii++;
- jj = 0;
- }
- }
-#endif
- } while (i > 0);
-
- /* add more output digits to match the precision */
- if (have_prec) {
- while (cend - cp < prec)
- PREPEND('0');
- }
-
- if (sgn)
- PREPEND('-');
- else if (signchar)
- PREPEND(signchar);
- /*
- * When to fill with zeroes is of course not simple.
- * First: No zero fill if left-justifying.
- * Next: There seem to be two cases:
- * A '0' without a precision, e.g. %06d
- * A precision with no field width, e.g. %.10d
- * Any other case, we don't want to fill with zeroes.
- */
- if (! lj
- && ((zero_flag && ! have_prec)
- || (fw == 0 && have_prec)))
- fill = zero_string;
- if (prec > fw)
- fw = prec;
- prec = cend - cp;
- if (fw > prec && ! lj && fill != sp
- && (*cp == '-' || signchar)) {
- bchunk_one(cp);
- cp++;
- prec--;
- fw--;
- }
- goto pr_tail;
- case 'X':
- chbuf = Uchbuf; /* FALL THROUGH */
- case 'x':
- base += 6; /* FALL THROUGH */
- case 'u':
- base += 2; /* FALL THROUGH */
- case 'o':
- base += 8;
- need_format = FALSE;
- parse_next_arg();
- tmpval = force_number(arg);
-
- /*
- * ``The result of converting a zero value with a
- * precision of zero is no characters.''
- *
- * If I remember the ANSI C standard, though,
- * it says that for octal conversions
- * the precision is artificially increased
- * to add an extra 0 if # is supplied.
- * Indeed, in C,
- * printf("%#.0o\n", 0);
- * prints a single 0.
- */
- if (! alt && have_prec && prec == 0 && tmpval == 0)
- goto pr_tail;
-
- if (tmpval < 0) {
- uval = (uintmax_t) (intmax_t) tmpval;
- if ((AWKNUM)(intmax_t)uval !=
- double_to_int(tmpval))
- goto out_of_range;
- } else {
- uval = (uintmax_t) tmpval;
- if ((AWKNUM)uval != double_to_int(tmpval))
- goto out_of_range;
- }
- /*
- * When to fill with zeroes is of course not simple.
- * First: No zero fill if left-justifying.
- * Next: There seem to be two cases:
- * A '0' without a precision, e.g. %06d
- * A precision with no field width, e.g. %.10d
- * Any other case, we don't want to fill with zeroes.
- */
- if (! lj
- && ((zero_flag && ! have_prec)
- || (fw == 0 && have_prec)))
- fill = zero_string;
- ii = jj = 0;
- do {
- PREPEND(chbuf[uval % base]);
- uval /= base;
-#if defined(HAVE_LOCALE_H)
- if (base == 10 && quote_flag && loc.grouping[ii] && ++jj == loc.grouping[ii]) {
- if (uval) /* only add if more digits coming */
- PREPEND(loc.thousands_sep[0]); /* XXX --- assumption it's one char */
- if (loc.grouping[ii+1] == 0)
- jj = 0; /* keep using current val in loc.grouping[ii] */
- else if (loc.grouping[ii+1] == CHAR_MAX)
- quote_flag = FALSE;
- else {
- ii++;
- jj = 0;
- }
- }
-#endif
- } while (uval > 0);
-
- /* add more output digits to match the precision */
- if (have_prec) {
- while (cend - cp < prec)
- PREPEND('0');
- }
-
- if (alt && tmpval != 0) {
- if (base == 16) {
- PREPEND(cs1);
- PREPEND('0');
- if (fill != sp) {
- bchunk(cp, 2);
- cp += 2;
- fw -= 2;
- }
- } else if (base == 8)
- PREPEND('0');
- }
- base = 0;
- if (prec > fw)
- fw = prec;
- prec = cend - cp;
- pr_tail:
- if (! lj) {
- while (fw > prec) {
- bchunk_one(fill);
- fw--;
- }
- }
- copy_count = prec;
- if (fw == 0 && ! have_prec)
- ;
- else if (gawk_mb_cur_max > 1 && (cs1 == 's' || cs1 == 'c')) {
- assert(cp == arg->stptr || cp == cpbuf);
- copy_count = mbc_byte_count(arg->stptr, prec);
- }
- bchunk(cp, copy_count);
- while (fw > prec) {
- bchunk_one(fill);
- fw--;
- }
- s0 = s1;
- break;
-
- out_of_range:
- /* out of range - emergency use of %g format */
- if (do_lint)
- lintwarn(_("[s]printf: value %g is out of range for `%%%c' format"),
- (double) tmpval, cs1);
- cs1 = 'g';
- goto format_float;
-
- case 'F':
-#if ! defined(PRINTF_HAS_F_FORMAT) || PRINTF_HAS_F_FORMAT != 1
- cs1 = 'f';
- /* FALL THROUGH */
-#endif
- case 'g':
- case 'G':
- case 'e':
- case 'f':
- case 'E':
- need_format = FALSE;
- parse_next_arg();
- tmpval = force_number(arg);
- format_float:
- if (! have_prec)
- prec = DEFAULT_G_PRECISION;
- chksize(fw + prec + 9); /* 9 == slop */
-#ifdef VAXCRTL
- /* pre-ANSI library doesn't handle '0' flag
- correctly in many cases; reject it */
- if (zero_flag
- && (lj || (signchar && signchar != '+')))
- zero_flag = FALSE;
-#endif
- cp = cpbuf;
- *cp++ = '%';
- if (lj)
- *cp++ = '-';
- if (signchar)
- *cp++ = signchar;
- if (alt)
- *cp++ = '#';
- if (zero_flag)
- *cp++ = '0';
- if (quote_flag)
- *cp++ = '\'';
- strcpy(cp, "*.*");
- cp += 3;
- *cp++ = cs1;
- *cp = '\0';
-#ifndef GFMT_WORKAROUND
-#if defined(LC_NUMERIC)
- if (quote_flag && ! use_lc_numeric)
- setlocale(LC_NUMERIC, "");
-#endif
- {
- int n;
- while ((n = snprintf(obufout, ofre, cpbuf,
- (int) fw, (int) prec,
- (double) tmpval)) >= ofre)
- chksize(n)
- }
-#if defined(LC_NUMERIC)
- if (quote_flag && ! use_lc_numeric)
- setlocale(LC_NUMERIC, "C");
-#endif
-#else /* GFMT_WORKAROUND */
- if (cs1 == 'g' || cs1 == 'G')
- sgfmt(obufout, cpbuf, (int) alt,
- (int) fw, (int) prec, (double) tmpval);
- else {
- int n;
- while ((n = snprintf(obufout, ofre, cpbuf,
- (int) fw, (int) prec,
- (double) tmpval)) >= ofre)
- chksize(n)
- }
-#endif /* GFMT_WORKAROUND */
- len = strlen(obufout);
- ofre -= len;
- obufout += len;
- s0 = s1;
- break;
- default:
- if (do_lint && isalpha(cs1))
- lintwarn(_("ignoring unknown format specifier character `%c': no argument converted"), cs1);
- break;
- }
- if (toofew) {
- fmt_msg("%s\n\t`%s'\n\t%*s%s",
- _("fatal: not enough arguments to satisfy format string"),
- fmt_string, (int) (s1 - fmt_string - 1), "",
- _("^ ran out for this one"));
- goto out;
- }
- }
- if (do_lint) {
- if (need_format)
- lintwarn(
- _("[s]printf: format specifier does not have control letter"));
- if (cur_arg < num_args)
- lintwarn(
- _("too many arguments supplied for format string"));
- }
- bchunk(s0, s1 - s0);
- r = make_str_node(obuf, obufout - obuf, ALREADY_MALLOCED);
- obuf = NULL;
-out:
- {
- size_t k;
- size_t count = sizeof(cpbufs)/sizeof(cpbufs[0]);
- for (k = 0; k < count; k++) {
- if (cpbufs[k].buf != cpbufs[k].stackbuf)
- efree(cpbufs[k].buf);
- }
- if (obuf != NULL)
- efree(obuf);
- }
- return r;
-}
-
-
-#ifdef GFMT_WORKAROUND
-/*
- * printf's %g format [can't rely on gcvt()]
- * caveat: don't use as argument to *printf()!
- * 'format' string HAS to be of "<flags>*.*g" kind, or we bomb!
- */
-static void
-sgfmt(char *buf, /* return buffer; assumed big enough to hold result */
- const char *format,
- int alt, /* use alternate form flag */
- int fwidth, /* field width in a format */
- int prec, /* indicates desired significant digits, not decimal places */
- double g) /* value to format */
-{
- char dform[40];
- char *gpos;
- char *d, *e, *p;
- int again = FALSE;
-
- strncpy(dform, format, sizeof dform - 1);
- dform[sizeof dform - 1] = '\0';
- gpos = strrchr(dform, '.');
-
- if (g == 0.0 && ! alt) { /* easy special case */
- *gpos++ = 'd';
- *gpos = '\0';
- (void) sprintf(buf, dform, fwidth, 0);
- return;
- }
-
- /* advance to location of 'g' in the format */
- while (*gpos && *gpos != 'g' && *gpos != 'G')
- gpos++;
-
- if (prec <= 0) /* negative precision is ignored */
- prec = (prec < 0 ? DEFAULT_G_PRECISION : 1);
-
- if (*gpos == 'G')
- again = TRUE;
- /* start with 'e' format (it'll provide nice exponent) */
- *gpos = 'e';
- prec--;
- (void) sprintf(buf, dform, fwidth, prec, g);
- if ((e = strrchr(buf, 'e')) != NULL) { /* find exponent */
- int expn = atoi(e+1); /* fetch exponent */
- if (expn >= -4 && expn <= prec) { /* per K&R2, B1.2 */
- /* switch to 'f' format and re-do */
- *gpos = 'f';
- prec -= expn; /* decimal precision */
- (void) sprintf(buf, dform, fwidth, prec, g);
- e = buf + strlen(buf);
- while (*--e == ' ')
- continue;
- e++;
- }
- else if (again)
- *gpos = 'E';
-
- /* if 'alt' in force, then trailing zeros are not removed */
- if (! alt && (d = strrchr(buf, '.')) != NULL) {
- /* throw away an excess of precision */
- for (p = e; p > d && *--p == '0'; )
- prec--;
- if (d == p)
- prec--;
- if (prec < 0)
- prec = 0;
- /* and do that once again */
- again = TRUE;
- }
- if (again)
- (void) sprintf(buf, dform, fwidth, prec, g);
- }
-}
-#endif /* GFMT_WORKAROUND */
-
-
-/* mbc_byte_count --- return number of bytes for corresponding numchars multibyte characters */
-
-static size_t
-mbc_byte_count(const char *ptr, size_t numchars)
-{
-#ifdef MBS_SUPPORT
- mbstate_t cur_state;
- size_t sum = 0;
- int mb_len;
-
- memset(& cur_state, 0, sizeof(cur_state));
-
- assert(gawk_mb_cur_max > 1);
- mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state);
- if (mb_len <= 0)
- return numchars; /* no valid m.b. char */
-
- for (; numchars > 0; numchars--) {
- mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state);
- if (mb_len <= 0)
- break;
- sum += mb_len;
- ptr += mb_len;
- }
-
- return sum;
-#else
- return numchars;
-#endif
-}
-
-/* mbc_char_count --- return number of m.b. chars in string, up to numbytes bytes */
-
-static size_t
-mbc_char_count(const char *ptr, size_t numbytes)
-{
-#ifdef MBS_SUPPORT
- mbstate_t cur_state;
- size_t sum = 0;
- int mb_len;
-
- if (gawk_mb_cur_max == 1)
- return numbytes;
-
- memset(& cur_state, 0, sizeof(cur_state));
-
- mb_len = mbrlen(ptr, numbytes * gawk_mb_cur_max, &cur_state);
- if (mb_len <= 0)
- return numbytes; /* no valid m.b. char */
-
- for (; numbytes > 0; numbytes--) {
- mb_len = mbrlen(ptr, numbytes * gawk_mb_cur_max, &cur_state);
- if (mb_len <= 0)
- break;
- sum++;
- ptr += mb_len;
- }
-
- return sum;
-#else
- return numbytes;
-#endif
-}
diff --git a/builtin.c b/builtin.c
index d4dfb20d..322672c2 100644
--- a/builtin.c
+++ b/builtin.c
@@ -30,6 +30,7 @@
#endif
#include <math.h>
#include "random.h"
+#include "floatmagic.h"
#ifndef CHAR_BIT
# define CHAR_BIT 8
@@ -54,6 +55,16 @@
#define SIZE_MAX ((size_t) -1)
#endif
+#define DEFAULT_G_PRECISION 6
+
+#ifdef GFMT_WORKAROUND
+/* semi-temporary hack, mostly to gracefully handle VMS */
+static void sgfmt(char *buf, const char *format, int alt,
+ int fwidth, int precision, double value);
+#endif /* GFMT_WORKAROUND */
+static size_t mbc_byte_count(const char *ptr, size_t numchars);
+static size_t mbc_char_count(const char *ptr, size_t numbytes);
+
/* Can declare these, since we always use the random shipped with gawk */
extern char *initstate(unsigned long seed, char *state, long n);
extern char *setstate(char *state);
@@ -84,12 +95,6 @@ if ((s1)->type == Node_var_array) \
static void efwrite(const void *ptr, size_t size, size_t count, FILE *fp,
const char *from, struct redirect *rp, int flush);
-#define r_format_arg NODE * format_tree
-#define fmt_msg msg
-#include "awkprintf.h"
-#undef fmt_msg
-#undef r_format_arg
-
/* efwrite --- like fwrite, but with error checking */
static void
@@ -538,6 +543,852 @@ do_log(int nargs)
return make_number((AWKNUM) d);
}
+
+/*
+ * format_tree() formats arguments of sprintf,
+ * and accordingly to a fmt_string providing a format like in
+ * printf family from C library. Returns a string node which value
+ * is a formatted string. Called by sprintf function.
+ *
+ * It is one of the uglier parts of gawk. Thanks to Michal Jaegermann
+ * for taming this beast and making it compatible with ANSI C.
+ */
+
+NODE *
+format_tree(
+ const char *fmt_string,
+ size_t n0,
+ NODE **the_args,
+ long num_args)
+{
+/* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
+/* difference of pointers should be of ptrdiff_t type, but let us be kind */
+#define bchunk(s, l) if (l) { \
+ while ((l) > ofre) { \
+ size_t olen = obufout - obuf; \
+ erealloc(obuf, char *, osiz * 2, "format_tree"); \
+ ofre += osiz; \
+ osiz *= 2; \
+ obufout = obuf + olen; \
+ } \
+ memcpy(obufout, s, (size_t) (l)); \
+ obufout += (l); \
+ ofre -= (l); \
+}
+
+/* copy one byte from 's' to 'obufout' checking for space in the process */
+#define bchunk_one(s) { \
+ if (ofre < 1) { \
+ size_t olen = obufout - obuf; \
+ erealloc(obuf, char *, osiz * 2, "format_tree"); \
+ ofre += osiz; \
+ osiz *= 2; \
+ obufout = obuf + olen; \
+ } \
+ *obufout++ = *s; \
+ --ofre; \
+}
+
+/* Is there space for something L big in the buffer? */
+#define chksize(l) if ((l) >= ofre) { \
+ size_t olen = obufout - obuf; \
+ size_t delta = osiz+l-ofre; \
+ erealloc(obuf, char *, osiz + delta, "format_tree"); \
+ obufout = obuf + olen; \
+ ofre += delta; \
+ osiz += delta; \
+}
+
+ size_t cur_arg = 0;
+ NODE *r = NULL;
+ int i;
+ int toofew = FALSE;
+ char *obuf, *obufout;
+ size_t osiz, ofre;
+ const char *chbuf;
+ const char *s0, *s1;
+ int cs1;
+ NODE *arg;
+ long fw, prec, argnum;
+ int used_dollar;
+ int lj, alt, big, bigbig, small, have_prec, need_format;
+ long *cur = NULL;
+ uintmax_t uval;
+ int sgn;
+ int base = 0;
+ /*
+ * Although this is an array, the elements serve two different
+ * purposes. The first element is the general buffer meant
+ * to hold the entire result string. The second one is a
+ * temporary buffer for large floating point values. They
+ * could just as easily be separate variables, and the
+ * code might arguably be clearer.
+ */
+ struct {
+ char *buf;
+ size_t bufsize;
+ char stackbuf[30];
+ } cpbufs[2];
+#define cpbuf cpbufs[0].buf
+ char *cend = &cpbufs[0].stackbuf[sizeof(cpbufs[0].stackbuf)];
+ char *cp;
+ const char *fill;
+ AWKNUM tmpval;
+ char signchar = FALSE;
+ size_t len;
+ int zero_flag = FALSE;
+ int quote_flag = FALSE;
+ int ii, jj;
+ char *chp;
+ size_t copy_count, char_count;
+ static const char sp[] = " ";
+ static const char zero_string[] = "0";
+ static const char lchbuf[] = "0123456789abcdef";
+ static const char Uchbuf[] = "0123456789ABCDEF";
+
+#define INITIAL_OUT_SIZE 512
+ emalloc(obuf, char *, INITIAL_OUT_SIZE, "format_tree");
+ obufout = obuf;
+ osiz = INITIAL_OUT_SIZE;
+ ofre = osiz - 2;
+
+ cur_arg = 1;
+
+ {
+ size_t k;
+ for (k = 0; k < sizeof(cpbufs)/sizeof(cpbufs[0]); k++) {
+ cpbufs[k].bufsize = sizeof(cpbufs[k].stackbuf);
+ cpbufs[k].buf = cpbufs[k].stackbuf;
+ }
+ }
+
+ /*
+ * The point of this goop is to grow the buffer
+ * holding the converted number, so that large
+ * values don't overflow a fixed length buffer.
+ */
+#define PREPEND(CH) do { \
+ if (cp == cpbufs[0].buf) { \
+ char *prev = cpbufs[0].buf; \
+ emalloc(cpbufs[0].buf, char *, 2*cpbufs[0].bufsize, \
+ "format_tree"); \
+ memcpy((cp = cpbufs[0].buf+cpbufs[0].bufsize), prev, \
+ cpbufs[0].bufsize); \
+ cpbufs[0].bufsize *= 2; \
+ if (prev != cpbufs[0].stackbuf) \
+ efree(prev); \
+ cend = cpbufs[0].buf+cpbufs[0].bufsize; \
+ } \
+ *--cp = (CH); \
+} while(0)
+
+ /*
+ * Check first for use of `count$'.
+ * If plain argument retrieval was used earlier, choke.
+ * Otherwise, return the requested argument.
+ * If not `count$' now, but it was used earlier, choke.
+ * If this format is more than total number of args, choke.
+ * Otherwise, return the current argument.
+ */
+#define parse_next_arg() { \
+ if (argnum > 0) { \
+ if (cur_arg > 1) { \
+ msg(_("fatal: must use `count$' on all formats or none")); \
+ goto out; \
+ } \
+ arg = the_args[argnum]; \
+ } else if (used_dollar) { \
+ msg(_("fatal: must use `count$' on all formats or none")); \
+ arg = 0; /* shutup the compiler */ \
+ goto out; \
+ } else if (cur_arg >= num_args) { \
+ arg = 0; /* shutup the compiler */ \
+ toofew = TRUE; \
+ break; \
+ } else { \
+ arg = the_args[cur_arg]; \
+ cur_arg++; \
+ } \
+}
+
+ need_format = FALSE;
+ used_dollar = FALSE;
+
+ s0 = s1 = fmt_string;
+ while (n0-- > 0) {
+ if (*s1 != '%') {
+ s1++;
+ continue;
+ }
+ need_format = TRUE;
+ bchunk(s0, s1 - s0);
+ s0 = s1;
+ cur = &fw;
+ fw = 0;
+ prec = 0;
+ argnum = 0;
+ have_prec = FALSE;
+ signchar = FALSE;
+ zero_flag = FALSE;
+ quote_flag = FALSE;
+ lj = alt = big = bigbig = small = FALSE;
+ fill = sp;
+ cp = cend;
+ chbuf = lchbuf;
+ s1++;
+
+retry:
+ if (n0-- == 0) /* ran out early! */
+ break;
+
+ switch (cs1 = *s1++) {
+ case (-1): /* dummy case to allow for checking */
+check_pos:
+ if (cur != &fw)
+ break; /* reject as a valid format */
+ goto retry;
+ case '%':
+ need_format = FALSE;
+ /*
+ * 29 Oct. 2002:
+ * The C99 standard pages 274 and 279 seem to imply that
+ * since there's no arg converted, the field width doesn't
+ * apply. The code already was that way, but this
+ * comment documents it, at least in the code.
+ */
+ if (do_lint) {
+ const char *msg = NULL;
+
+ if (fw && ! have_prec)
+ msg = _("field width is ignored for `%%' specifier");
+ else if (fw == 0 && have_prec)
+ msg = _("precision is ignored for `%%' specifier");
+ else if (fw && have_prec)
+ msg = _("field width and precision are ignored for `%%' specifier");
+
+ if (msg != NULL)
+ lintwarn("%s", msg);
+ }
+ bchunk_one("%");
+ s0 = s1;
+ break;
+
+ case '0':
+ /*
+ * Only turn on zero_flag if we haven't seen
+ * the field width or precision yet. Otherwise,
+ * screws up floating point formatting.
+ */
+ if (cur == & fw)
+ zero_flag = TRUE;
+ if (lj)
+ goto retry;
+ /* FALL through */
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (cur == NULL)
+ break;
+ if (prec >= 0)
+ *cur = cs1 - '0';
+ /*
+ * with a negative precision *cur is already set
+ * to -1, so it will remain negative, but we have
+ * to "eat" precision digits in any case
+ */
+ while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
+ --n0;
+ *cur = *cur * 10 + *s1++ - '0';
+ }
+ if (prec < 0) /* negative precision is discarded */
+ have_prec = FALSE;
+ if (cur == &prec)
+ cur = NULL;
+ if (n0 == 0) /* badly formatted control string */
+ continue;
+ goto retry;
+ case '$':
+ if (do_traditional) {
+ msg(_("fatal: `$' is not permitted in awk formats"));
+ goto out;
+ }
+
+ if (cur == &fw) {
+ argnum = fw;
+ fw = 0;
+ used_dollar = TRUE;
+ if (argnum <= 0) {
+ msg(_("fatal: arg count with `$' must be > 0"));
+ goto out;
+ }
+ if (argnum >= num_args) {
+ msg(_("fatal: arg count %ld greater than total number of supplied arguments"), argnum);
+ goto out;
+ }
+ } else {
+ msg(_("fatal: `$' not permitted after period in format"));
+ goto out;
+ }
+
+ goto retry;
+ case '*':
+ if (cur == NULL)
+ break;
+ if (! do_traditional && isdigit((unsigned char) *s1)) {
+ int val = 0;
+
+ for (; n0 > 0 && *s1 && isdigit((unsigned char) *s1); s1++, n0--) {
+ val *= 10;
+ val += *s1 - '0';
+ }
+ if (*s1 != '$') {
+ msg(_("fatal: no `$' supplied for positional field width or precision"));
+ goto out;
+ } else {
+ s1++;
+ n0--;
+ }
+ if (val >= num_args) {
+ toofew = TRUE;
+ break;
+ }
+ arg = the_args[val];
+ } else {
+ parse_next_arg();
+ }
+ *cur = force_number(arg);
+ if (*cur < 0 && cur == &fw) {
+ *cur = -*cur;
+ lj++;
+ }
+ if (cur == &prec) {
+ if (*cur >= 0)
+ have_prec = TRUE;
+ else
+ have_prec = FALSE;
+ cur = NULL;
+ }
+ goto retry;
+ case ' ': /* print ' ' or '-' */
+ /* 'space' flag is ignored */
+ /* if '+' already present */
+ if (signchar != FALSE)
+ goto check_pos;
+ /* FALL THROUGH */
+ case '+': /* print '+' or '-' */
+ signchar = cs1;
+ goto check_pos;
+ case '-':
+ if (prec < 0)
+ break;
+ if (cur == &prec) {
+ prec = -1;
+ goto retry;
+ }
+ fill = sp; /* if left justified then other */
+ lj++; /* filling is ignored */
+ goto check_pos;
+ case '.':
+ if (cur != &fw)
+ break;
+ cur = &prec;
+ have_prec = TRUE;
+ goto retry;
+ case '#':
+ alt = TRUE;
+ goto check_pos;
+ case '\'':
+#if defined(HAVE_LOCALE_H)
+ /* allow quote_flag if there is a thousands separator. */
+ if (loc.thousands_sep[0] != '\0')
+ quote_flag = TRUE;
+ goto check_pos;
+#else
+ goto retry;
+#endif
+ case 'l':
+ if (big)
+ break;
+ else {
+ static short warned = FALSE;
+
+ if (do_lint && ! warned) {
+ lintwarn(_("`l' is meaningless in awk formats; ignored"));
+ warned = TRUE;
+ }
+ if (do_posix) {
+ msg(_("fatal: `l' is not permitted in POSIX awk formats"));
+ goto out;
+ }
+ }
+ big = TRUE;
+ goto retry;
+ case 'L':
+ if (bigbig)
+ break;
+ else {
+ static short warned = FALSE;
+
+ if (do_lint && ! warned) {
+ lintwarn(_("`L' is meaningless in awk formats; ignored"));
+ warned = TRUE;
+ }
+ if (do_posix) {
+ msg(_("fatal: `L' is not permitted in POSIX awk formats"));
+ goto out;
+ }
+ }
+ bigbig = TRUE;
+ goto retry;
+ case 'h':
+ if (small)
+ break;
+ else {
+ static short warned = FALSE;
+
+ if (do_lint && ! warned) {
+ lintwarn(_("`h' is meaningless in awk formats; ignored"));
+ warned = TRUE;
+ }
+ if (do_posix) {
+ msg(_("fatal: `h' is not permitted in POSIX awk formats"));
+ goto out;
+ }
+ }
+ small = TRUE;
+ goto retry;
+ case 'c':
+ need_format = FALSE;
+ parse_next_arg();
+ /* user input that looks numeric is numeric */
+ if ((arg->flags & (MAYBE_NUM|NUMBER)) == MAYBE_NUM)
+ (void) force_number(arg);
+ if (arg->flags & NUMBER) {
+ uval = (uintmax_t) arg->numbr;
+#if MBS_SUPPORT
+ if (gawk_mb_cur_max > 1) {
+ char buf[100];
+ wchar_t wc;
+ mbstate_t mbs;
+ size_t count;
+
+ memset(& mbs, 0, sizeof(mbs));
+ wc = uval;
+
+ count = wcrtomb(buf, wc, & mbs);
+ if (count == 0
+ || count == (size_t)-1
+ || count == (size_t)-2)
+ goto out0;
+
+ memcpy(cpbuf, buf, count);
+ prec = count;
+ cp = cpbuf;
+ goto pr_tail;
+ }
+out0:
+ ;
+ /* else,
+ fall through */
+#endif
+ if (do_lint && uval > 255) {
+ lintwarn("[s]printf: value %g is too big for %%c format",
+ arg->numbr);
+ }
+ cpbuf[0] = uval;
+ prec = 1;
+ cp = cpbuf;
+ goto pr_tail;
+ }
+ /*
+ * As per POSIX, only output first character of a
+ * string value. Thus, we ignore any provided
+ * precision, forcing it to 1. (Didn't this
+ * used to work? 6/2003.)
+ */
+ cp = arg->stptr;
+#ifdef MBS_SUPPORT
+ /*
+ * First character can be multiple bytes if
+ * it's a multibyte character. Grr.
+ */
+ if (gawk_mb_cur_max > 1) {
+ mbstate_t state;
+ size_t count;
+
+ memset(& state, 0, sizeof(state));
+ count = mbrlen(cp, arg->stlen, & state);
+ if (count == 0
+ || count == (size_t)-1
+ || count == (size_t)-2)
+ goto out2;
+ prec = count;
+ goto pr_tail;
+ }
+out2:
+ ;
+#endif
+ prec = 1;
+ goto pr_tail;
+ case 's':
+ need_format = FALSE;
+ parse_next_arg();
+ arg = force_string(arg);
+ if (fw == 0 && ! have_prec)
+ prec = arg->stlen;
+ else {
+ char_count = mbc_char_count(arg->stptr, arg->stlen);
+ if (! have_prec || prec > char_count)
+ prec = char_count;
+ }
+ cp = arg->stptr;
+ goto pr_tail;
+ case 'd':
+ case 'i':
+ need_format = FALSE;
+ parse_next_arg();
+ tmpval = force_number(arg);
+ /*
+ * Check for Nan or Inf.
+ */
+ if (isnan(tmpval) || isinf(tmpval))
+ goto out_of_range;
+ else
+ tmpval = double_to_int(tmpval);
+
+ /*
+ * ``The result of converting a zero value with a
+ * precision of zero is no characters.''
+ */
+ if (have_prec && prec == 0 && tmpval == 0)
+ goto pr_tail;
+
+ if (tmpval < 0) {
+ tmpval = -tmpval;
+ sgn = TRUE;
+ } else {
+ if (tmpval == -0.0)
+ /* avoid printing -0 */
+ tmpval = 0.0;
+ sgn = FALSE;
+ }
+ /*
+ * Use snprintf return value to tell if there
+ * is enough room in the buffer or not.
+ */
+ while ((i = snprintf(cpbufs[1].buf,
+ cpbufs[1].bufsize, "%.0f",
+ tmpval)) >=
+ cpbufs[1].bufsize) {
+ if (cpbufs[1].buf == cpbufs[1].stackbuf)
+ cpbufs[1].buf = NULL;
+ if (i > 0) {
+ cpbufs[1].bufsize += ((i > cpbufs[1].bufsize) ?
+ i : cpbufs[1].bufsize);
+ }
+ else
+ cpbufs[1].bufsize *= 2;
+ assert(cpbufs[1].bufsize > 0);
+ erealloc(cpbufs[1].buf, char *,
+ cpbufs[1].bufsize, "format_tree");
+ }
+ if (i < 1)
+ goto out_of_range;
+ chp = &cpbufs[1].buf[i-1];
+ ii = jj = 0;
+ do {
+ PREPEND(*chp);
+ chp--; i--;
+#if defined(HAVE_LOCALE_H)
+ if (quote_flag && loc.grouping[ii] && ++jj == loc.grouping[ii]) {
+ if (i) /* only add if more digits coming */
+ PREPEND(loc.thousands_sep[0]); /* XXX - assumption it's one char */
+ if (loc.grouping[ii+1] == 0)
+ jj = 0; /* keep using current val in loc.grouping[ii] */
+ else if (loc.grouping[ii+1] == CHAR_MAX)
+ quote_flag = FALSE;
+ else {
+ ii++;
+ jj = 0;
+ }
+ }
+#endif
+ } while (i > 0);
+
+ /* add more output digits to match the precision */
+ if (have_prec) {
+ while (cend - cp < prec)
+ PREPEND('0');
+ }
+
+ if (sgn)
+ PREPEND('-');
+ else if (signchar)
+ PREPEND(signchar);
+ /*
+ * When to fill with zeroes is of course not simple.
+ * First: No zero fill if left-justifying.
+ * Next: There seem to be two cases:
+ * A '0' without a precision, e.g. %06d
+ * A precision with no field width, e.g. %.10d
+ * Any other case, we don't want to fill with zeroes.
+ */
+ if (! lj
+ && ((zero_flag && ! have_prec)
+ || (fw == 0 && have_prec)))
+ fill = zero_string;
+ if (prec > fw)
+ fw = prec;
+ prec = cend - cp;
+ if (fw > prec && ! lj && fill != sp
+ && (*cp == '-' || signchar)) {
+ bchunk_one(cp);
+ cp++;
+ prec--;
+ fw--;
+ }
+ goto pr_tail;
+ case 'X':
+ chbuf = Uchbuf; /* FALL THROUGH */
+ case 'x':
+ base += 6; /* FALL THROUGH */
+ case 'u':
+ base += 2; /* FALL THROUGH */
+ case 'o':
+ base += 8;
+ need_format = FALSE;
+ parse_next_arg();
+ tmpval = force_number(arg);
+
+ /*
+ * ``The result of converting a zero value with a
+ * precision of zero is no characters.''
+ *
+ * If I remember the ANSI C standard, though,
+ * it says that for octal conversions
+ * the precision is artificially increased
+ * to add an extra 0 if # is supplied.
+ * Indeed, in C,
+ * printf("%#.0o\n", 0);
+ * prints a single 0.
+ */
+ if (! alt && have_prec && prec == 0 && tmpval == 0)
+ goto pr_tail;
+
+ if (tmpval < 0) {
+ uval = (uintmax_t) (intmax_t) tmpval;
+ if ((AWKNUM)(intmax_t)uval !=
+ double_to_int(tmpval))
+ goto out_of_range;
+ } else {
+ uval = (uintmax_t) tmpval;
+ if ((AWKNUM)uval != double_to_int(tmpval))
+ goto out_of_range;
+ }
+ /*
+ * When to fill with zeroes is of course not simple.
+ * First: No zero fill if left-justifying.
+ * Next: There seem to be two cases:
+ * A '0' without a precision, e.g. %06d
+ * A precision with no field width, e.g. %.10d
+ * Any other case, we don't want to fill with zeroes.
+ */
+ if (! lj
+ && ((zero_flag && ! have_prec)
+ || (fw == 0 && have_prec)))
+ fill = zero_string;
+ ii = jj = 0;
+ do {
+ PREPEND(chbuf[uval % base]);
+ uval /= base;
+#if defined(HAVE_LOCALE_H)
+ if (base == 10 && quote_flag && loc.grouping[ii] && ++jj == loc.grouping[ii]) {
+ if (uval) /* only add if more digits coming */
+ PREPEND(loc.thousands_sep[0]); /* XXX --- assumption it's one char */
+ if (loc.grouping[ii+1] == 0)
+ jj = 0; /* keep using current val in loc.grouping[ii] */
+ else if (loc.grouping[ii+1] == CHAR_MAX)
+ quote_flag = FALSE;
+ else {
+ ii++;
+ jj = 0;
+ }
+ }
+#endif
+ } while (uval > 0);
+
+ /* add more output digits to match the precision */
+ if (have_prec) {
+ while (cend - cp < prec)
+ PREPEND('0');
+ }
+
+ if (alt && tmpval != 0) {
+ if (base == 16) {
+ PREPEND(cs1);
+ PREPEND('0');
+ if (fill != sp) {
+ bchunk(cp, 2);
+ cp += 2;
+ fw -= 2;
+ }
+ } else if (base == 8)
+ PREPEND('0');
+ }
+ base = 0;
+ if (prec > fw)
+ fw = prec;
+ prec = cend - cp;
+ pr_tail:
+ if (! lj) {
+ while (fw > prec) {
+ bchunk_one(fill);
+ fw--;
+ }
+ }
+ copy_count = prec;
+ if (fw == 0 && ! have_prec)
+ ;
+ else if (gawk_mb_cur_max > 1 && (cs1 == 's' || cs1 == 'c')) {
+ assert(cp == arg->stptr || cp == cpbuf);
+ copy_count = mbc_byte_count(arg->stptr, prec);
+ }
+ bchunk(cp, copy_count);
+ while (fw > prec) {
+ bchunk_one(fill);
+ fw--;
+ }
+ s0 = s1;
+ break;
+
+ out_of_range:
+ /* out of range - emergency use of %g format */
+ if (do_lint)
+ lintwarn(_("[s]printf: value %g is out of range for `%%%c' format"),
+ (double) tmpval, cs1);
+ cs1 = 'g';
+ goto format_float;
+
+ case 'F':
+#if ! defined(PRINTF_HAS_F_FORMAT) || PRINTF_HAS_F_FORMAT != 1
+ cs1 = 'f';
+ /* FALL THROUGH */
+#endif
+ case 'g':
+ case 'G':
+ case 'e':
+ case 'f':
+ case 'E':
+ need_format = FALSE;
+ parse_next_arg();
+ tmpval = force_number(arg);
+ format_float:
+ if (! have_prec)
+ prec = DEFAULT_G_PRECISION;
+ chksize(fw + prec + 9); /* 9 == slop */
+#ifdef VAXCRTL
+ /* pre-ANSI library doesn't handle '0' flag
+ correctly in many cases; reject it */
+ if (zero_flag
+ && (lj || (signchar && signchar != '+')))
+ zero_flag = FALSE;
+#endif
+ cp = cpbuf;
+ *cp++ = '%';
+ if (lj)
+ *cp++ = '-';
+ if (signchar)
+ *cp++ = signchar;
+ if (alt)
+ *cp++ = '#';
+ if (zero_flag)
+ *cp++ = '0';
+ if (quote_flag)
+ *cp++ = '\'';
+ strcpy(cp, "*.*");
+ cp += 3;
+ *cp++ = cs1;
+ *cp = '\0';
+#ifndef GFMT_WORKAROUND
+#if defined(LC_NUMERIC)
+ if (quote_flag && ! use_lc_numeric)
+ setlocale(LC_NUMERIC, "");
+#endif
+ {
+ int n;
+ while ((n = snprintf(obufout, ofre, cpbuf,
+ (int) fw, (int) prec,
+ (double) tmpval)) >= ofre)
+ chksize(n)
+ }
+#if defined(LC_NUMERIC)
+ if (quote_flag && ! use_lc_numeric)
+ setlocale(LC_NUMERIC, "C");
+#endif
+#else /* GFMT_WORKAROUND */
+ if (cs1 == 'g' || cs1 == 'G')
+ sgfmt(obufout, cpbuf, (int) alt,
+ (int) fw, (int) prec, (double) tmpval);
+ else {
+ int n;
+ while ((n = snprintf(obufout, ofre, cpbuf,
+ (int) fw, (int) prec,
+ (double) tmpval)) >= ofre)
+ chksize(n)
+ }
+#endif /* GFMT_WORKAROUND */
+ len = strlen(obufout);
+ ofre -= len;
+ obufout += len;
+ s0 = s1;
+ break;
+ default:
+ if (do_lint && isalpha(cs1))
+ lintwarn(_("ignoring unknown format specifier character `%c': no argument converted"), cs1);
+ break;
+ }
+ if (toofew) {
+ msg("%s\n\t`%s'\n\t%*s%s",
+ _("fatal: not enough arguments to satisfy format string"),
+ fmt_string, (int) (s1 - fmt_string - 1), "",
+ _("^ ran out for this one"));
+ goto out;
+ }
+ }
+ if (do_lint) {
+ if (need_format)
+ lintwarn(
+ _("[s]printf: format specifier does not have control letter"));
+ if (cur_arg < num_args)
+ lintwarn(
+ _("too many arguments supplied for format string"));
+ }
+ bchunk(s0, s1 - s0);
+ r = make_str_node(obuf, obufout - obuf, ALREADY_MALLOCED);
+ obuf = NULL;
+out:
+ {
+ size_t k;
+ size_t count = sizeof(cpbufs)/sizeof(cpbufs[0]);
+ for (k = 0; k < count; k++) {
+ if (cpbufs[k].buf != cpbufs[k].stackbuf)
+ efree(cpbufs[k].buf);
+ }
+ if (obuf != NULL)
+ efree(obuf);
+ }
+ if (r == NULL)
+ gawk_exit(EXIT_FATAL);
+ return r;
+}
+
+
/* printf_common --- common code for sprintf and printf */
static NODE *
@@ -1907,6 +2758,83 @@ set_how_many:
}
+#ifdef GFMT_WORKAROUND
+/*
+ * printf's %g format [can't rely on gcvt()]
+ * caveat: don't use as argument to *printf()!
+ * 'format' string HAS to be of "<flags>*.*g" kind, or we bomb!
+ */
+static void
+sgfmt(char *buf, /* return buffer; assumed big enough to hold result */
+ const char *format,
+ int alt, /* use alternate form flag */
+ int fwidth, /* field width in a format */
+ int prec, /* indicates desired significant digits, not decimal places */
+ double g) /* value to format */
+{
+ char dform[40];
+ char *gpos;
+ char *d, *e, *p;
+ int again = FALSE;
+
+ strncpy(dform, format, sizeof dform - 1);
+ dform[sizeof dform - 1] = '\0';
+ gpos = strrchr(dform, '.');
+
+ if (g == 0.0 && ! alt) { /* easy special case */
+ *gpos++ = 'd';
+ *gpos = '\0';
+ (void) sprintf(buf, dform, fwidth, 0);
+ return;
+ }
+
+ /* advance to location of 'g' in the format */
+ while (*gpos && *gpos != 'g' && *gpos != 'G')
+ gpos++;
+
+ if (prec <= 0) /* negative precision is ignored */
+ prec = (prec < 0 ? DEFAULT_G_PRECISION : 1);
+
+ if (*gpos == 'G')
+ again = TRUE;
+ /* start with 'e' format (it'll provide nice exponent) */
+ *gpos = 'e';
+ prec--;
+ (void) sprintf(buf, dform, fwidth, prec, g);
+ if ((e = strrchr(buf, 'e')) != NULL) { /* find exponent */
+ int expn = atoi(e+1); /* fetch exponent */
+ if (expn >= -4 && expn <= prec) { /* per K&R2, B1.2 */
+ /* switch to 'f' format and re-do */
+ *gpos = 'f';
+ prec -= expn; /* decimal precision */
+ (void) sprintf(buf, dform, fwidth, prec, g);
+ e = buf + strlen(buf);
+ while (*--e == ' ')
+ continue;
+ e++;
+ }
+ else if (again)
+ *gpos = 'E';
+
+ /* if 'alt' in force, then trailing zeros are not removed */
+ if (! alt && (d = strrchr(buf, '.')) != NULL) {
+ /* throw away an excess of precision */
+ for (p = e; p > d && *--p == '0'; )
+ prec--;
+ if (d == p)
+ prec--;
+ if (prec < 0)
+ prec = 0;
+ /* and do that once again */
+ again = TRUE;
+ }
+ if (again)
+ (void) sprintf(buf, dform, fwidth, prec, g);
+ }
+}
+#endif /* GFMT_WORKAROUND */
+
+
/* make_integer - Convert an integer to a number node. */
static NODE *
@@ -2453,3 +3381,68 @@ do_bindtextdomain(int nargs)
return make_string(the_result, strlen(the_result));
}
+
+
+/* mbc_byte_count --- return number of bytes for corresponding numchars multibyte characters */
+
+static size_t
+mbc_byte_count(const char *ptr, size_t numchars)
+{
+#ifdef MBS_SUPPORT
+ mbstate_t cur_state;
+ size_t sum = 0;
+ int mb_len;
+
+ memset(& cur_state, 0, sizeof(cur_state));
+
+ assert(gawk_mb_cur_max > 1);
+ mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state);
+ if (mb_len <= 0)
+ return numchars; /* no valid m.b. char */
+
+ for (; numchars > 0; numchars--) {
+ mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state);
+ if (mb_len <= 0)
+ break;
+ sum += mb_len;
+ ptr += mb_len;
+ }
+
+ return sum;
+#else
+ return numchars;
+#endif
+}
+
+/* mbc_char_count --- return number of m.b. chars in string, up to numbytes bytes */
+
+static size_t
+mbc_char_count(const char *ptr, size_t numbytes)
+{
+#ifdef MBS_SUPPORT
+ mbstate_t cur_state;
+ size_t sum = 0;
+ int mb_len;
+
+ if (gawk_mb_cur_max == 1)
+ return numbytes;
+
+ memset(& cur_state, 0, sizeof(cur_state));
+
+ mb_len = mbrlen(ptr, numbytes * gawk_mb_cur_max, &cur_state);
+ if (mb_len <= 0)
+ return numbytes; /* no valid m.b. char */
+
+ for (; numbytes > 0; numbytes--) {
+ mb_len = mbrlen(ptr, numbytes * gawk_mb_cur_max, &cur_state);
+ if (mb_len <= 0)
+ break;
+ sum++;
+ ptr += mb_len;
+ }
+
+ return sum;
+#else
+ return numbytes;
+#endif
+}
diff --git a/debug.c b/debug.c
index bcb030f7..ee1b311d 100644
--- a/debug.c
+++ b/debug.c
@@ -346,11 +346,6 @@ static struct command_source *cmd_src = NULL;
} \
} while (FALSE)
-#define r_format_arg static NODE * format_arg
-#define fmt_msg d_error
-#include "awkprintf.h"
-#undef fmt_msg
-#undef r_format_arg
/* g_readline -- read a line of text; the interface is like 'readline' but
* without any command-line editing; used when not compiled with
@@ -4865,8 +4860,9 @@ do_print_f(CMDARG *arg, int cmd ATTRIBUTE_UNUSED)
int i;
CMDARG *a;
NODE **tmp;
- NODE *r;
char *name;
+ NODE *r;
+ volatile jmp_buf fatal_tag_stack;
/* count maximum required size for tmp */
for (a = arg; a != NULL ; a = a->next)
@@ -4944,8 +4940,18 @@ do_print_f(CMDARG *arg, int cmd ATTRIBUTE_UNUSED)
}
force_string(tmp[0]);
- r = format_arg(tmp[0]->stptr, tmp[0]->stlen, tmp, i);
- if (r != NULL) {
+
+ PUSH_BINDING(fatal_tag_stack, fatal_tag, fatal_tag_valid);
+ if (setjmp(fatal_tag) == 0)
+ r = format_tree(tmp[0]->stptr, tmp[0]->stlen, tmp, i);
+ else {
+ /* fatal error, restore exit_val of program */
+ exit_val = EXIT_SUCCESS;
+ r = NULL;
+ }
+ POP_BINDING(fatal_tag_stack, fatal_tag, fatal_tag_valid);
+
+ if (r != NULL) {
(void) fwrite(r->stptr, sizeof(char), r->stlen, out_fp);
unref(r);
}