diff options
author | Andrew J. Schorr <aschorr@telemetry-investments.com> | 2016-07-06 21:31:22 -0400 |
---|---|---|
committer | Andrew J. Schorr <aschorr@telemetry-investments.com> | 2016-07-06 21:31:22 -0400 |
commit | ce342a04922797cb53557178c54d32c4efafda16 (patch) | |
tree | d92372e30a992d950705e1f234bea5ac6405bd67 | |
parent | f8424b236fabb881cc977b9e8e2e7c8debf56da0 (diff) | |
download | egawk-ce342a04922797cb53557178c54d32c4efafda16.tar.gz egawk-ce342a04922797cb53557178c54d32c4efafda16.tar.bz2 egawk-ce342a04922797cb53557178c54d32c4efafda16.zip |
Document string termination in header files and remove no-longer-needed string termination logic in various places.
-rw-r--r-- | ChangeLog | 34 | ||||
-rw-r--r-- | awk.h | 8 | ||||
-rw-r--r-- | builtin.c | 16 | ||||
-rw-r--r-- | eval.c | 12 | ||||
-rw-r--r-- | gawkapi.c | 2 | ||||
-rw-r--r-- | gawkapi.h | 6 | ||||
-rw-r--r-- | int_array.c | 4 | ||||
-rw-r--r-- | interpret.h | 5 | ||||
-rw-r--r-- | io.c | 1 | ||||
-rw-r--r-- | mpfr.c | 5 | ||||
-rw-r--r-- | node.c | 19 | ||||
-rw-r--r-- | profile.c | 1 |
12 files changed, 55 insertions, 58 deletions
@@ -1,5 +1,39 @@ 2016-07-06 Andrew J. Schorr <aschorr@telemetry-investments.com> + * awk.h: Modify stptr comment to indicate that all strings are now + NUL-terminated. + * builtin.c (do_mktime): Remove unnecessary logic to terminate + the string with '\0' temporarily. + (do_system) Ditto. + (nondec2awknum): Add a comment about termination. + * eval.c (posix_compare): Remove logic to terminate strings temporarily. + (set_ORS): No need to terminate ORS, since the string node is already + terminated. What gave us the right to modify that node anyway? + (fmt_index): Remove code to terminate string. This seems to have been + invalid anyway, since we don't own that memory. + (set_TEXTDOMAIN): Do not terminate TEXTDOMAIN string, since the node + is already terminated. We didn't have the right to modify that node + anyway. + * gawkapi.c (node_to_awk_value): Add assert checks to confirm that the + string is NUL-terminated. + * gawkapi.h: Modify awk_string comment to indicate that strings are + always terminated with '\0'. + * int_array.c (isinteger): Remove unnecessary logic to terminate string + with '\0' temporarily. + * interpret.h (Op_push_i): Ditto. + * io.c (nextfile): Remove string termination. We didn't own that memory + anyway. + * mpfr.c (force_mpnum): Remove unnecessary logic to terminate the + string with '\0' temporarily. + * node.c (r_force_number): Remove NUL termination around strtod call, + since we already know that there is either a white space or '\0' + character there. Either one will stop strtod. + (get_ieee_magic_val): Ditto. + * profile.c (pp_number): No need to terminate string returned by + r_format_val. + +2016-07-06 Andrew J. Schorr <aschorr@telemetry-investments.com> + * interpret.h (Op_field_spec): Now that all $n field values are NUL-terminated, there is no reason to call dupnode for $n where n > 0. This saves malloc and copying overhead, thereby more than offsetting the @@ -473,13 +473,7 @@ typedef struct exp_node { #define re_cnt flags /* Node_val */ -/* - * Note that the string in stptr may not be NUL-terminated, but it is - * guaranteed to have at least one extra byte that may be temporarily set - * to '\0'. This is helpful when calling functions such as strtod that require - * a NUL-terminated argument. In particular, field values $n for n > 0 and - * n < NF will not have a NUL terminator, since they point into the $0 buffer. - */ +/* Note that the string in stptr will always be NUL-terminated. */ #define stptr sub.val.sp #define stlen sub.val.slen #define valref sub.val.sref @@ -2035,16 +2035,12 @@ do_mktime(int nargs) int month, day, hour, minute, second, count; int dst = -1; /* default is unknown */ time_t then_stamp; - char save; t1 = POP_SCALAR(); if (do_lint && (fixtype(t1)->flags & STRING) == 0) lintwarn(_("mktime: received non-string argument")); t1 = force_string(t1); - save = t1->stptr[t1->stlen]; - t1->stptr[t1->stlen] = '\0'; - count = sscanf(t1->stptr, "%ld %d %d %d %d %d %d", & year, & month, & day, & hour, & minute, & second, @@ -2058,7 +2054,6 @@ do_mktime(int nargs) || (month < 1 || month > 12) )) lintwarn(_("mktime: at least one of the values is out of the default range")); - t1->stptr[t1->stlen] = save; DEREF(t1); if (count < 6 @@ -2088,7 +2083,6 @@ do_system(int nargs) NODE *tmp; AWKNUM ret = 0; /* floating point on purpose, compat Unix awk */ char *cmd; - char save; int status; if (do_sandbox) @@ -2101,10 +2095,6 @@ do_system(int nargs) cmd = force_string(tmp)->stptr; if (cmd && *cmd) { - /* insure arg to system is zero-terminated */ - save = cmd[tmp->stlen]; - cmd[tmp->stlen] = '\0'; - os_restore_mode(fileno(stdin)); #ifdef SIGPIPE signal(SIGPIPE, SIG_DFL); @@ -2148,7 +2138,6 @@ do_system(int nargs) signal(SIGPIPE, SIG_IGN); #endif - cmd[tmp->stlen] = save; } DEREF(tmp); return make_number((AWKNUM) ret); @@ -3632,6 +3621,11 @@ nondec2awknum(char *str, size_t len, char **endptr) *endptr = str; } else { decimal: + /* + * Terminating is probably unnecessary, since the caller always + * passes a string ending with '\0' or white space, but it + * seems safest to leave this to avoid future problems. + */ save = str[len]; str[len] = '\0'; retval = strtod(str, endptr); @@ -493,15 +493,8 @@ static int posix_compare(NODE *s1, NODE *s2) { int ret = 0; - char save1, save2; size_t l = 0; - save1 = s1->stptr[s1->stlen]; - s1->stptr[s1->stlen] = '\0'; - - save2 = s2->stptr[s2->stlen]; - s2->stptr[s2->stlen] = '\0'; - if (gawk_mb_cur_max == 1) { if (strlen(s1->stptr) == s1->stlen && strlen(s2->stptr) == s2->stlen) ret = strcoll(s1->stptr, s2->stptr); @@ -563,8 +556,6 @@ posix_compare(NODE *s1, NODE *s2) } #endif - s1->stptr[s1->stlen] = save1; - s2->stptr[s2->stlen] = save2; return ret; } @@ -824,7 +815,6 @@ set_ORS() ORS_node->var_value = force_string(ORS_node->var_value); ORS = ORS_node->var_value->stptr; ORSlen = ORS_node->var_value->stlen; - ORS[ORSlen] = '\0'; } /* fmt_ok --- is the conversion format a valid one? */ @@ -887,7 +877,6 @@ fmt_index(NODE *n) ix++; } /* not found */ - n->stptr[n->stlen] = '\0'; if (do_lint && ! fmt_ok(n)) lintwarn(_("bad `%sFMT' specification `%s'"), n == CONVFMT_node->var_value ? "CONV" @@ -972,7 +961,6 @@ set_TEXTDOMAIN() tmp = TEXTDOMAIN_node->var_value = force_string(TEXTDOMAIN_node->var_value); TEXTDOMAIN = tmp->stptr; len = tmp->stlen; - TEXTDOMAIN[len] = '\0'; /* * Note: don't call textdomain(); this value is for * the awk program, not for gawk itself. @@ -440,6 +440,7 @@ node_to_awk_value(NODE *node, awk_value_t *val, awk_valtype_t wanted) (void) force_string(node); val->str_value.str = node->stptr; val->str_value.len = node->stlen; + assert(val->str_value.str[val->str_value.len] == '\0'); ret = awk_true; break; @@ -468,6 +469,7 @@ node_to_awk_value(NODE *node, awk_value_t *val, awk_valtype_t wanted) val->val_type = AWK_STRING; val->str_value.str = node->stptr; val->str_value.len = node->stlen; + assert(val->str_value.str[val->str_value.len] == '\0'); ret = awk_true; } else val->val_type = AWK_UNDEFINED; @@ -279,11 +279,7 @@ enum { * be multibyte encoded in the current locale's encoding and character * set. Gawk will convert internally to wide characters if necessary. * - * Note that the string may not be terminated with a '\0' character. - * In particular, this happens for field values $n where n > 0 and n < NF, - * since the string points directly into the $0 buffer. All other strings, - * including those created by extensions, should be NUL-terminated. In general - * though, extension code should not assume that the string is NUL-terminated! + * Note that the string will always be terminated with a '\0' character. */ typedef struct awk_string { char *str; /* data */ diff --git a/int_array.c b/int_array.c index e7913dea..93e96d1f 100644 --- a/int_array.c +++ b/int_array.c @@ -128,7 +128,6 @@ is_integer(NODE *symbol, NODE *subs) /* must be a STRING */ char *cp = subs->stptr, *cpend, *ptr; - char save; size_t len = subs->stlen; if (len == 0 || (! isdigit((unsigned char) *cp) && *cp != '-')) @@ -151,12 +150,9 @@ is_integer(NODE *symbol, NODE *subs) } cpend = cp + len; - save = *cpend; - *cpend = '\0'; errno = 0; l = strtol(cp, & ptr, 10); - *cpend = save; if (errno != 0 || ptr != cpend) return NULL; diff --git a/interpret.h b/interpret.h index 106367f7..9d7b423e 100644 --- a/interpret.h +++ b/interpret.h @@ -135,13 +135,10 @@ top: case Op_push_i: m = pc->memory; if (! do_traditional && (m->flags & INTLSTR) != 0) { - char *orig, *trans, save; + char *orig, *trans; - save = m->stptr[m->stlen]; - m->stptr[m->stlen] = '\0'; orig = m->stptr; trans = dgettext(TEXTDOMAIN, orig); - m->stptr[m->stlen] = save; m = make_string(trans, strlen(trans)); } else UPREF(m); @@ -480,7 +480,6 @@ nextfile(IOBUF **curfile, bool skipping) if (arg == NULL || arg->stlen == 0) continue; arg = force_string(arg); - arg->stptr[arg->stlen] = '\0'; if (! do_traditional) { unref(ARGIND_node->var_value); ARGIND_node->var_value = make_number((AWKNUM) i); @@ -275,7 +275,6 @@ static int force_mpnum(NODE *n, int do_nondec, int use_locale) { char *cp, *cpend, *ptr, *cp1; - char save; int tval, base = 10; if (n->stlen == 0) { @@ -292,9 +291,6 @@ force_mpnum(NODE *n, int do_nondec, int use_locale) return false; } - save = *cpend; - *cpend = '\0'; - if (*cp == '+' || *cp == '-') cp1 = cp + 1; else @@ -329,7 +325,6 @@ done: /* trailing space is OK for NUMBER */ while (ptr < cpend && isspace((unsigned char) *ptr)) ptr++; - *cpend = save; if (errno == 0 && ptr == cpend) return true; errno = 0; @@ -59,7 +59,6 @@ r_force_number(NODE *n) { char *cp; char *cpend; - char save; char *ptr; extern double strtod(); @@ -133,10 +132,13 @@ r_force_number(NODE *n) /* nondec2awknum() saves and restores the byte after the string itself */ n->numbr = nondec2awknum(cp, cpend - cp, &ptr); } else { - save = *cpend; - *cpend = '\0'; + /* + * There is no need to set *cpend to '\0' because it is either + * pointing to white space or the '\0' at the end of the string. + * In either case, strtod should terminate on that character + * or earlier due to non-numeric characters. + */ n->numbr = (AWKNUM) strtod((const char *) cp, &ptr); - *cpend = save; } if (errno == 0) { @@ -941,13 +943,14 @@ get_ieee_magic_val(char *val) static bool first = true; static AWKNUM inf; static AWKNUM nan; - char save; char *ptr; - save = val[4]; - val[4] = '\0'; + /* + * There is no need to set val[4] to '\0' because it is either white + * space or the NUL character at the end of the string. Either way, + * strtod should terminate on that character. + */ AWKNUM v = strtod(val, &ptr); - val[4] = save; if (val == ptr) { /* Older strtod implementations don't support inf or nan. */ if (first) { @@ -1541,7 +1541,6 @@ pp_number(NODE *n) s = r_format_val("%.6g", 0, s); - s->stptr[s->stlen] = '\0'; str = s->stptr; freenode(s); |