diff options
Diffstat (limited to 'node.c')
-rw-r--r-- | node.c | 340 |
1 files changed, 136 insertions, 204 deletions
@@ -3,7 +3,8 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-2001, 2003-2010 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-2001, 2003-2010, + * the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -26,31 +27,23 @@ #include "awk.h" #include "math.h" -static int is_ieee_magic_val P((const char *val)); -static AWKNUM get_ieee_magic_val P((const char *val)); +static int is_ieee_magic_val(const char *val); +static AWKNUM get_ieee_magic_val(const char *val); -/* r_force_number --- force a value to be numeric */ +/* force_number --- force a value to be numeric */ AWKNUM -r_force_number(register NODE *n) +r_force_number(NODE *n) { - register char *cp; - register char *cpend; + char *cp; + char *cpend; char save; char *ptr; unsigned int newflags; extern double strtod(); -#ifdef GAWKDEBUG - if (n == NULL) - cant_happen(); - if (n->type != Node_val) - cant_happen(); - if (n->flags == 0) - cant_happen(); if (n->flags & NUMCUR) return n->numbr; -#endif /* all the conditionals are an attempt to avoid the expensive strtod */ @@ -59,8 +52,6 @@ r_force_number(register NODE *n) n->numbr = 0.0; if (n->stlen == 0) { - if (0 && do_lint) - lintwarn(_("can't convert string to float")); return 0.0; } @@ -74,8 +65,6 @@ r_force_number(register NODE *n) */ if (! do_posix) { if (isalpha(*cp)) { - if (0 && do_lint) - lintwarn(_("can't convert string to float")); return 0.0; } else if (n->stlen == 4 && is_ieee_magic_val(n->stptr)) { if (n->flags & MAYBE_NUM) @@ -95,14 +84,12 @@ r_force_number(register NODE *n) while (cp < cpend && isspace(*cp)) cp++; - /* FIXME: Simplify this condition! */ - if ( cp == cpend - || (! do_posix - && (isalpha(*cp) + if ( cp == cpend /* only spaces, or */ + || (! do_posix /* not POSIXLY paranoid and */ + && (isalpha(*cp) /* letter, or */ + /* CANNOT do non-decimal and saw 0x */ || (! do_non_decimal_data && cp[0] == '0' && (cp[1] == 'x' || cp[1] == 'X'))))) { - if (0 && do_lint) - lintwarn(_("can't convert string to float")); return 0.0; } @@ -112,13 +99,12 @@ r_force_number(register NODE *n) } else newflags = 0; - if (cpend - cp == 1) { - if (isdigit(*cp)) { + if (cpend - cp == 1) { /* only one character */ + if (isdigit(*cp)) { /* it's a digit! */ n->numbr = (AWKNUM)(*cp - '0'); n->flags |= newflags; n->flags |= NUMCUR; - } else if (0 && do_lint) - lintwarn(_("can't convert string to float")); + } return n->numbr; } @@ -142,13 +128,10 @@ r_force_number(register NODE *n) ptr++; *cpend = save; finish: - /* the >= should be ==, but for SunOS 3.5 strtod() */ - if (errno == 0 && ptr >= cpend) { + if (errno == 0 && ptr == cpend) { n->flags |= newflags; n->flags |= NUMCUR; } else { - if (0 && do_lint && ptr < cpend) - lintwarn(_("can't convert string to float")); errno = 0; } @@ -157,9 +140,9 @@ finish: /* - * the following lookup table is used as an optimization in force_string + * The following lookup table is used as an optimization in force_string; * (more complicated) variations on this theme didn't seem to pay off, but - * systematic testing might be in order at some point + * systematic testing might be in order at some point. */ static const char *values[] = { "0", @@ -178,10 +161,10 @@ static const char *values[] = { /* format_val --- format a numeric value based on format */ NODE * -format_val(const char *format, int index, register NODE *s) +format_val(const char *format, int index, NODE *s) { char buf[BUFSIZ]; - register char *sp = buf; + char *sp = buf; double val; char *orig, *trans, save; @@ -193,7 +176,7 @@ format_val(const char *format, int index, register NODE *s) trans = dgettext(TEXTDOMAIN, orig); s->stptr[s->stlen] = save; - return tmp_string(trans, strlen(trans)); + return make_string(trans, strlen(trans)); } /* @@ -226,38 +209,37 @@ format_val(const char *format, int index, register NODE *s) * and just always format the value ourselves. */ - NODE *dummy, *r; + NODE *dummy[2], *r; unsigned short oflags; extern NODE **fmt_list; /* declared in eval.c */ /* create dummy node for a sole use of format_tree */ - getnode(dummy); - dummy->type = Node_expression_list; - dummy->lnode = s; - dummy->rnode = NULL; + dummy[1] = s; oflags = s->flags; - s->flags |= PERM; /* prevent from freeing by format_tree() */ if (val == s->numbr) { /* integral value, but outside range of %ld, use %.0f */ r = format_tree("%.0f", 4, dummy, 2); s->stfmt = -1; } else { r = format_tree(format, fmt_list[index]->stlen, dummy, 2); + assert(r != NULL); s->stfmt = (char) index; } s->flags = oflags; s->stlen = r->stlen; if ((s->flags & STRCUR) != 0) - free(s->stptr); + efree(s->stptr); s->stptr = r->stptr; - freenode(r); /* Do not free_temp(r)! We want */ - freenode(dummy); /* to keep s->stptr == r->stpr. */ + freenode(r); /* Do not unref(r)! We want to keep s->stptr == r->stpr. */ goto no_malloc; } else { - /* integral value */ - /* force conversion to long only once */ - register long num = (long) val; + /* + * integral value + * force conversion to long only once + */ + long num = (long) val; + if (num < NVAL && num >= 0) { sp = (char *) values[num]; s->stlen = 1; @@ -267,74 +249,50 @@ format_val(const char *format, int index, register NODE *s) } s->stfmt = -1; } + if (s->stptr != NULL) + efree(s->stptr); emalloc(s->stptr, char *, s->stlen + 2, "format_val"); memcpy(s->stptr, sp, s->stlen+1); no_malloc: - s->stref = 1; s->flags |= STRCUR; free_wstr(s); return s; } -/* r_force_string --- force a value to be a string */ +/* force_string --- force a value to be a string */ NODE * -r_force_string(register NODE *s) +r_force_string(NODE *s) { - NODE *ret; -#ifdef GAWKDEBUG - if (s == NULL) - cant_happen(); - if (s->type != Node_val) - cant_happen(); - if (s->stref <= 0) - cant_happen(); if ((s->flags & STRCUR) != 0 - && (s->stfmt == -1 || s->stfmt == CONVFMTidx)) + && (s->stfmt == -1 || s->stfmt == CONVFMTidx) + ) return s; -#endif - - ret = format_val(CONVFMT, CONVFMTidx, s); - return ret; + return format_val(CONVFMT, CONVFMTidx, s); } -/* - * dupnode: - * Duplicate a node. (For strings, "duplicate" means crank up the - * reference count.) - */ +/* dupnode --- duplicate a node */ NODE * -r_dupnode(NODE *n) +dupnode(NODE *n) { - register NODE *r; + NODE *r; + + assert(n->type == Node_val); -#ifndef DUPNODE_MACRO - if ((n->flags & TEMP) != 0) { - n->flags &= ~TEMP; - n->flags |= MALLOC; - return n; - } if ((n->flags & PERM) != 0) return n; -#endif - if ((n->flags & (MALLOC|STRCUR)) == (MALLOC|STRCUR)) { - if (n->stref < LONG_MAX) - n->stref++; - else - n->flags |= PERM; - return n; - } else if ((n->flags & MALLOC) != 0 && n->type == Node_ahash) { - if (n->ahname_ref < LONG_MAX) - n->ahname_ref++; - else - n->flags |= PERM; + + if ((n->flags & MALLOC) != 0) { + n->valref++; return n; } + getnode(r); *r = *n; - r->flags &= ~(PERM|TEMP|FIELD); + r->flags &= ~FIELD; r->flags |= MALLOC; + r->valref = 1; #ifdef MBS_SUPPORT /* * DON'T call free_wstr(r) here! @@ -344,74 +302,52 @@ r_dupnode(NODE *n) r->wstptr = NULL; r->wstlen = 0; #endif /* defined MBS_SUPPORT */ - if (n->type == Node_val && (n->flags & STRCUR) != 0) { - r->stref = 1; - emalloc(r->stptr, char *, r->stlen + 2, "dupnode"); - memcpy(r->stptr, n->stptr, r->stlen); - r->stptr[r->stlen] = '\0'; + + if ((n->flags & STRCUR) != 0) { + emalloc(r->stptr, char *, n->stlen + 2, "dupnode"); + memcpy(r->stptr, n->stptr, n->stlen); + r->stptr[n->stlen] = '\0'; #if defined MBS_SUPPORT if ((n->flags & WSTRCUR) != 0) { r->wstlen = n->wstlen; - emalloc(r->wstptr, wchar_t *, sizeof(wchar_t) * (r->wstlen + 2), "dupnode"); - memcpy(r->wstptr, n->wstptr, r->wstlen * sizeof(wchar_t)); - r->wstptr[r->wstlen] = L'\0'; + emalloc(r->wstptr, wchar_t *, sizeof(wchar_t) * (n->wstlen + 2), "dupnode"); + memcpy(r->wstptr, n->wstptr, n->wstlen * sizeof(wchar_t)); + r->wstptr[n->wstlen] = L'\0'; r->flags |= WSTRCUR; } #endif /* defined MBS_SUPPORT */ - } else if (n->type == Node_ahash && (n->flags & MALLOC) != 0) { - r->ahname_ref = 1; - emalloc(r->ahname_str, char *, r->ahname_len + 2, "dupnode"); - memcpy(r->ahname_str, n->ahname_str, r->ahname_len); - r->ahname_str[r->ahname_len] = '\0'; } + return r; } -/* copy_node --- force a brand new copy of a node to be allocated */ - -NODE * -copynode(NODE *old) -{ - NODE *new; - int saveflags; - - assert(old != NULL); - saveflags = old->flags; - old->flags &= ~(MALLOC|PERM); - new = dupnode(old); - old->flags = saveflags; - return new; -} - /* mk_number --- allocate a node with defined number */ NODE * mk_number(AWKNUM x, unsigned int flags) { - register NODE *r; + NODE *r; getnode(r); r->type = Node_val; r->numbr = x; + r->valref = 1; r->flags = flags; -#ifdef GAWKDEBUG - r->stref = 1; r->stptr = NULL; r->stlen = 0; free_wstr(r); -#endif /* GAWKDEBUG */ return r; } /* make_str_node --- make a string node */ NODE * -make_str_node(char *s, unsigned long len, int flags) +r_make_str_node(char *s, unsigned long len, int flags) { - register NODE *r; - + NODE *r; getnode(r); r->type = Node_val; + r->numbr = 0; r->flags = (STRING|STRCUR|MALLOC); #ifdef MBS_SUPPORT r->wstptr = NULL; @@ -421,16 +357,16 @@ make_str_node(char *s, unsigned long len, int flags) if (flags & ALREADY_MALLOCED) r->stptr = s; else { - emalloc(r->stptr, char *, len + 2, s); + emalloc(r->stptr, char *, len + 2, "make_str_node"); memcpy(r->stptr, s, len); } r->stptr[len] = '\0'; - + if ((flags & SCAN) != 0) { /* scan for escape sequences */ const char *pf; - register char *ptm; - register int c; - register const char *end; + char *ptm; + int c; + const char *end; #ifdef MBS_SUPPORT mbstate_t cur_state; @@ -472,27 +408,16 @@ make_str_node(char *s, unsigned long len, int flags) len = ptm - r->stptr; erealloc(r->stptr, char *, len + 1, "make_str_node"); r->stptr[len] = '\0'; + r->flags &= ~MALLOC; r->flags |= PERM; } r->stlen = len; - r->stref = 1; + r->valref = 1; r->stfmt = -1; return r; } -/* tmp_string --- allocate a temporary string */ - -NODE * -tmp_string(char *s, size_t len) -{ - register NODE *r; - - r = make_string(s, len); - r->flags |= TEMP; - return r; -} - /* more_nodes --- allocate more nodes */ #define NODECHUNK 100 @@ -502,7 +427,7 @@ NODE *nextfree = NULL; NODE * more_nodes() { - register NODE *np; + NODE *np; /* get more nodes and initialize list */ emalloc(nextfree, NODE *, NODECHUNK * sizeof(NODE), "more_nodes"); @@ -517,62 +442,29 @@ more_nodes() return np; } -#ifdef MEMDEBUG -#undef freenode -/* freenode --- release a node back to the pool */ - -void -freenode(NODE *it) -{ -#ifdef MPROF - it->stref = 0; - free((char *) it); -#else /* not MPROF */ -#ifndef NO_PROFILING - it->exec_count = 0; -#endif - /* add it to head of freelist */ - it->nextp = nextfree; - nextfree = it; -#endif /* not MPROF */ -} -#endif /* GAWKDEBUG */ - /* unref --- remove reference to a particular node */ void -unref(register NODE *tmp) +unref(NODE *tmp) { if (tmp == NULL) return; if ((tmp->flags & PERM) != 0) return; - tmp->flags &= ~TEMP; + if ((tmp->flags & MALLOC) != 0) { - if (tmp->type == Node_ahash) { - if (tmp->ahname_ref > 1) { - tmp->ahname_ref--; - return; - } - free(tmp->ahname_str); - } else if ((tmp->flags & STRCUR) != 0) { - if (tmp->stref > 1) { - tmp->stref--; - return; - } - free(tmp->stptr); - free_wstr(tmp); - } - freenode(tmp); - return; - } - if ((tmp->flags & FIELD) != 0) { - free_wstr(tmp); - freenode(tmp); - return; + if (tmp->valref > 1) { + tmp->valref--; + return; + } + if (tmp->flags & STRCUR) + efree(tmp->stptr); } + free_wstr(tmp); + freenode(tmp); } + /* * parse_escape: * @@ -589,20 +481,21 @@ unref(register NODE *tmp) * If \ is followed by 000, we return 0 and leave the string pointer after the * zeros. A value of 0 does not mean end of string. * - * Posix doesn't allow \x. + * POSIX doesn't allow \x. */ int parse_escape(const char **string_ptr) { - register int c = *(*string_ptr)++; - register int i; - register int count; + int c = *(*string_ptr)++; + int i; + int count; int j; const char *start; if (do_lint_old) { switch (c) { + case 'a': case 'b': case 'f': case 'r': @@ -613,7 +506,7 @@ parse_escape(const char **string_ptr) switch (c) { case 'a': - return BELL; + return '\a'; case 'b': return '\b'; case 'f': @@ -755,9 +648,19 @@ str2wstr(NODE *n, size_t **ptr) char *sp; mbstate_t mbs; wchar_t wc, *wsp; + static short warned = FALSE; assert((n->flags & (STRING|STRCUR)) != 0); + /* + * Don't convert global null string or global null field + * variables to a wide string. They are both zero-length anyway. + * This also avoids future double-free errors while releasing + * shallow copies, eg. *tmp = *Null_field; free_wstr(tmp); + */ + if (n == Nnull_string || n == Null_field) + return n; + if ((n->flags & WSTRCUR) != 0) { if (ptr == NULL) return n; @@ -771,11 +674,11 @@ str2wstr(NODE *n, size_t **ptr) * code trades space for time. We allocate * an array of wchar_t that is n->stlen long. * This is needed in the worst case anyway, where - * each input bytes maps to one wchar_t. The + * each input byte maps to one wchar_t. The * advantage is that we only have to convert the string * once, instead of twice, once to find out how many - * wide characters, and then again to actually fill - * the info in. If there's a lot left over, we can + * wide characters, and then again to actually fill in + * the info. If there's a lot left over, we can * realloc the wide string down in size. */ @@ -799,11 +702,41 @@ str2wstr(NODE *n, size_t **ptr) src_count = n->stlen; memset(& mbs, 0, sizeof(mbs)); for (i = 0; src_count > 0; i++) { - count = mbrtowc(& wc, sp, src_count, & mbs); + /* + * 9/2010: Check the current byte; if it's a valid character, + * then it doesn't start a multibyte sequence. This brings a + * big speed up. Thanks to Ulrich Drepper for the tip. + */ + if ( isprint(*sp) + || isgraph(*sp) + || iscntrl(*sp) + || *sp == '\0' ) { + count = 1; + wc = *sp; + } else + count = mbrtowc(& wc, sp, src_count, & mbs); switch (count) { case (size_t) -2: case (size_t) -1: - goto done; + /* + * Just skip the bad byte and keep going, so that + * we get a more-or-less full string, instead of + * stopping early. This is particularly important + * for match() where we need to build the indices. + */ + sp++; + src_count--; + /* + * mbrtowc(3) says the state of mbs becomes undefined + * after a bad character, so reset it. + */ + memset(& mbs, 0, sizeof(mbs)); + /* And warn the user something's wrong */ + if (do_lint && ! warned) { + warned = TRUE; + lintwarn(_("Invalid multibyte data detected. There may be a mismatch between your data and your locale.")); + } + break; case 0: count = 1; @@ -820,9 +753,8 @@ str2wstr(NODE *n, size_t **ptr) } } -done: *wsp = L'\0'; - n->wstlen = i; + n->wstlen = wsp - n->wstptr; n->flags |= WSTRCUR; #define ARBITRARY_AMOUNT_TO_GIVE_BACK 100 if (n->stlen - n->wstlen > ARBITRARY_AMOUNT_TO_GIVE_BACK) @@ -840,7 +772,7 @@ free_wstr(NODE *n) if ((n->flags & WSTRCUR) != 0) { assert(n->wstptr != NULL); - free(n->wstptr); + efree(n->wstptr); } n->wstptr = NULL; n->wstlen = 0; |