diff options
Diffstat (limited to 'builtin.c')
-rw-r--r-- | builtin.c | 195 |
1 files changed, 164 insertions, 31 deletions
@@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-2007 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-2009 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -30,6 +30,7 @@ #endif #include <math.h> #include "random.h" +#include "floatmagic.h" #ifndef CHAR_BIT # define CHAR_BIT 8 @@ -91,6 +92,8 @@ static void sgfmt P((char *buf, const char *format, int alt, static void efwrite P((const void *ptr, size_t size, size_t count, FILE *fp, const char *from, struct redirect *rp, int flush)); +static size_t mbc_byte_count P((const char *ptr, size_t numchars)); +static size_t mbc_char_count P((const char *ptr, size_t numbytes)); /* efwrite --- like fwrite, but with error checking */ @@ -298,6 +301,7 @@ do_index(NODE *tree) register const char *p1, *p2; register size_t l1, l2; long ret; + int do_single_byte = FALSE; s1 = tree_eval(tree->lnode); s2 = tree_eval(tree->rnode->lnode); @@ -325,18 +329,28 @@ do_index(NODE *tree) goto out; } +#ifdef MBS_SUPPORT + if (gawk_mb_cur_max > 1) { + s1 = force_wstring(s1); + s2 = force_wstring(s2); + /* + * If we don't have valid wide character strings, use + * the real bytes. + */ + do_single_byte = ((s1->wstlen == 0 && s1->stlen > 0) + || (s2->wstlen == 0 && s2->stlen > 0)); + } +#endif + /* IGNORECASE will already be false if posix */ if (IGNORECASE) { while (l1 > 0) { if (l2 > l1) break; #ifdef MBS_SUPPORT - if (gawk_mb_cur_max > 1) { + if (! do_single_byte && gawk_mb_cur_max > 1) { const wchar_t *pos; - s1 = force_wstring(s1); - s2 = force_wstring(s2); - pos = wcasestrstr(s1->wstptr, s1->wstlen, s2->wstptr, s2->wstlen); if (pos == NULL) ret = 0; @@ -370,12 +384,9 @@ do_index(NODE *tree) break; } #ifdef MBS_SUPPORT - if (gawk_mb_cur_max > 1) { + if (! do_single_byte && gawk_mb_cur_max > 1) { const wchar_t *pos; - s1 = force_wstring(s1); - s2 = force_wstring(s2); - pos = wstrstr(s1->wstptr, s1->wstlen, s2->wstptr, s2->wstlen); if (pos == NULL) ret = 0; @@ -434,10 +445,15 @@ do_length(NODE *tree) { NODE *tmp; size_t len; + NODE *n; + + n = tree->lnode; + if (n->type == Node_param_list) + n = stack_ptr[n->param_cnt]; - if (tree->lnode->type == Node_var_array - || tree->lnode->type == Node_array_ref) { - NODE *array_var = tree->lnode; + if (n->type == Node_var_array + || n->type == Node_array_ref) { + NODE *array_var = n; static short warned = FALSE; if (array_var->type == Node_array_ref) @@ -453,7 +469,10 @@ do_length(NODE *tree) return tmp_number((AWKNUM) array_var->table_size); } else { normal: - tmp = tree_eval(tree->lnode); + if (do_lint && n->type == Node_var_new) + lintwarn(_("length: untyped argument will be forced to scalar")); + + tmp = tree_eval(n); if (do_lint && (tmp->flags & (STRING|STRCUR)) == 0) lintwarn(_("length: received non-string argument")); tmp = force_string(tmp); @@ -461,6 +480,12 @@ normal: if (gawk_mb_cur_max > 1) { tmp = force_wstring(tmp); len = tmp->wstlen; + /* + * If the bytes don't make a valid wide character + * string, fall back to the bytes themselves. + */ + if (len == 0 && tmp->stlen > 0) + len = tmp->stlen; } else #endif len = tmp->stlen; @@ -595,6 +620,7 @@ format_tree( int quote_flag = FALSE; int ii, jj; char *chp; + size_t copy_count, char_count; static const char sp[] = " "; static const char zero_string[] = "0"; static const char lchbuf[] = "0123456789abcdef"; @@ -750,6 +776,19 @@ check_pos: * apply. The code already was that way, but this * comment documents it, at least in the code. */ + if (do_lint) { + const char *msg = NULL; + + if (fw && ! have_prec) + msg = _("field width is ignored for `%%%%' specifier"); + else if (fw == 0 && have_prec) + msg = _("precision is ignored for `%%%%' specifier"); + else if (fw && have_prec) + msg = _("field width and precision are ignored for `%%%%' specifier"); + + if (msg != NULL) + lintwarn(msg); + } bchunk_one("%"); s0 = s1; break; @@ -938,6 +977,10 @@ check_pos: #else uval = (uintmax_t) arg->numbr; #endif + if (do_lint && uval > 255) { + lintwarn("[s]printf: value %g is too big for %%c format", + arg->numbr); + } cpbuf[0] = uval; prec = 1; cp = cpbuf; @@ -958,8 +1001,13 @@ check_pos: fill = zero_string; parse_next_arg(); arg = force_string(arg); - if (! have_prec || prec > arg->stlen) + if (fw == 0 && ! have_prec) prec = arg->stlen; + else { + char_count = mbc_char_count(arg->stptr, arg->stlen); + if (! have_prec || prec > char_count) + prec = char_count; + } cp = arg->stptr; goto pr_tail; case 'd': @@ -968,11 +1016,9 @@ check_pos: parse_next_arg(); tmpval = force_number(arg); /* - * Check for Nan or Inf (without using isfinite(), - * since that may not be available on all platforms) + * Check for Nan or Inf. */ - if ((tmpval != tmpval) || - ((2*tmpval == tmpval) && (tmpval != 0))) + if (isnan(tmpval) || isinf(tmpval)) goto out_of_range; else tmpval = double_to_int(tmpval); @@ -1168,7 +1214,15 @@ check_pos: fw--; } } - bchunk(cp, (int) prec); + copy_count = prec; + if (fw == 0 && ! have_prec) + ; + else if (gawk_mb_cur_max > 1 && (cs1 == 's' || cs1 == 'c')) { + assert(cp == arg->stptr || cp == cpbuf); + copy_count = mbc_byte_count(arg->stptr, + cs1 == 's' ? arg->stlen : 1); + } + bchunk(cp, copy_count); while (fw > prec) { bchunk_one(fill); fw--; @@ -1258,6 +1312,8 @@ check_pos: s0 = s1; break; default: + if (do_lint && ISALPHA(cs1)) + lintwarn(_("ignoring unknown format specifier character `%c': no argument converted"), cs1); break; } if (toofew) { @@ -1420,7 +1476,14 @@ do_substr(NODE *tree) if (tree->rnode->rnode == NULL) { /* third arg. missing */ /* use remainder of string */ - length = t1->stlen - indx; + length = t1->stlen - indx; /* default to bytes */ +#ifdef MBS_SUPPORT + if (gawk_mb_cur_max > 1) { + t1 = force_wstring(t1); + if (t1->wstlen > 0) /* use length of wide char string if we have one */ + length = t1->wstlen - indx; + } +#endif d_length = length; /* set here in case used in diagnostics, below */ } else { t3 = tree_eval(tree->rnode->rnode->lnode); @@ -2027,14 +2090,17 @@ do_cos(NODE *tree) /* do_rand --- do the rand function */ static int firstrand = TRUE; -static char state[256]; +/* Some systems require this array to be integer aligned. Sigh. */ +#define SIZEOF_STATE 256 +static uint32_t istate[SIZEOF_STATE/sizeof(uint32_t)]; +static char *const state = (char *const) istate; /* ARGSUSED */ NODE * do_rand(NODE *tree ATTRIBUTE_UNUSED) { if (firstrand) { - (void) initstate((unsigned) 1, state, sizeof state); + (void) initstate((unsigned) 1, state, SIZEOF_STATE); /* don't need to srandom(1), initstate() does it for us. */ firstrand = FALSE; setstate(state); @@ -2057,7 +2123,7 @@ do_srand(NODE *tree) long ret = save_seed; /* SVR4 awk srand returns previous seed */ if (firstrand) { - (void) initstate((unsigned) 1, state, sizeof state); + (void) initstate((unsigned) 1, state, SIZEOF_STATE); /* don't need to srandom(1), we're changing the seed below */ firstrand = FALSE; (void) setstate(state); @@ -2147,6 +2213,7 @@ do_match(NODE *tree) #endif it = make_string(start, len); + it->flags |= MAYBE_NUM; /* user input */ /* * assoc_lookup() does free_temp() on 2nd arg. */ @@ -2479,15 +2546,17 @@ sub_common(NODE *tree, long how_many, int backdigs) if (backdigs) { /* gensub, behave sanely */ if (ISDIGIT(scan[1])) { int dig = scan[1] - '0'; - char *start, *end; + if (dig < NUMSUBPATS(rp, t->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) { + char *start, *end; - start = t->stptr - + SUBPATSTART(rp, t->stptr, dig); - end = t->stptr - + SUBPATEND(rp, t->stptr, dig); - - for (cp = start; cp < end; cp++) - *bp++ = *cp; + start = t->stptr + + SUBPATSTART(rp, t->stptr, dig); + end = t->stptr + + SUBPATEND(rp, t->stptr, dig); + + for (cp = start; cp < end; cp++) + *bp++ = *cp; + } scan++; } else /* \q for any q --> q */ *bp++ = *++scan; @@ -3279,3 +3348,67 @@ do_bindtextdomain(NODE *tree) return tmp_string(the_result, strlen(the_result)); } + +/* mbc_byte_count --- return number of bytes for corresponding numchars multibyte characters */ + +static size_t +mbc_byte_count(const char *ptr, size_t numchars) +{ +#ifdef MBS_SUPPORT + mbstate_t cur_state; + size_t sum = 0; + int mb_len; + + memset(& cur_state, 0, sizeof(cur_state)); + + assert(gawk_mb_cur_max > 1); + mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state); + if (mb_len <= 0) + return numchars; /* no valid m.b. char */ + + for (; numchars > 0; numchars--) { + mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state); + if (mb_len <= 0) + break; + sum += mb_len; + ptr += mb_len; + } + + return sum; +#else + return numchars; +#endif +} + +/* mbc_char_count --- return number of m.b. chars in string, up to numbytes bytes */ + +static size_t +mbc_char_count(const char *ptr, size_t numbytes) +{ +#ifdef MBS_SUPPORT + mbstate_t cur_state; + size_t sum = 0; + int mb_len; + + memset(& cur_state, 0, sizeof(cur_state)); + + if (gawk_mb_cur_max == 1) + return numbytes; + + mb_len = mbrlen(ptr, numbytes * gawk_mb_cur_max, &cur_state); + if (mb_len <= 0) + return numbytes; /* no valid m.b. char */ + + for (; numbytes > 0; numbytes--) { + mb_len = mbrlen(ptr, numbytes * gawk_mb_cur_max, &cur_state); + if (mb_len <= 0) + break; + sum++; + ptr += mb_len; + } + + return sum; +#else + return numbytes; +#endif +} |