diff options
-rw-r--r-- | ChangeLog | 52 | ||||
-rw-r--r-- | awk.h | 2 | ||||
-rw-r--r-- | awkgram.c | 4 | ||||
-rw-r--r-- | awkgram.y | 4 | ||||
-rw-r--r-- | builtin.c | 61 | ||||
-rw-r--r-- | interpret.h | 17 | ||||
-rw-r--r-- | mpfr.c | 6 | ||||
-rw-r--r-- | node.c | 20 | ||||
-rw-r--r-- | str_array.c | 7 | ||||
-rw-r--r-- | test/ChangeLog | 5 | ||||
-rw-r--r-- | test/Makefile.am | 5 | ||||
-rw-r--r-- | test/Makefile.in | 10 | ||||
-rw-r--r-- | test/Maketests | 5 | ||||
-rw-r--r-- | test/strftfld.awk | 3 | ||||
-rw-r--r-- | test/strftfld.in | 1 | ||||
-rw-r--r-- | test/strftfld.ok | 1 |
16 files changed, 165 insertions, 38 deletions
@@ -12,6 +12,58 @@ 2017-01-26 Andrew J. Schorr <aschorr@telemetry-investments.com> + * builtin.c (do_dcgettext): First argument also needs protection + from string overrun. + (do_dcngettext): Need to terminate string1 and string2 also, + and replace strlen(the_result), which could overrun. + (do_bindtextdomain): Terminate both string args, and eliminate + saved_end boolean which is redundant with (t2 != NULL). + +2017-01-26 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * interpret.h (Op_arrayfor_init): Protect against string overrun + on sorting method. + (Op_indirect_func_call): Terminate function name. + +2017-01-26 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * str_array.c (env_remove): Terminate string before calling unsetenv. + +2017-01-26 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * node.c (is_hex): Add a new argument pointing to the end of the string + so we can check for string overrun. + (r_force_number): Pass string end to is_hex. + +2017-01-26 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * awk.h (get_numbase): Add string length argument so we can operate + on unterminated strings. + * awkgram.y: Call get_numbase with string length, and fix off-by-one + error in length passed to nondec2awknum: should be strlen(tokstart)-1 + based on surrounding code. + * builtin.c (do_strtonum): Pass string length to get_numbase. + (nondec2awknum): Check string length before accessing characters. + * mpfr.c (force_mpnum): Pass string length to get_numbase. + * node.c (r_force_number): Pass string length to get_numbase. + (get_numbase): Add string length argument and honor it. + +2017-01-26 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * builtin.c (do_strftime): If format argument is passed, we need + to terminate it in case it's a field variable. + +2017-01-26 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * node.c (r_format_val): Before we free s->stptr, make sure that it + was malloced. + (wstr2str): Add comment explaining why it's safe to free n->stptr + without doing any checks. + * mpfr.c (mpg_format_val): Ditto. And no need to reset the STRCUR flag + that we just checked. + +2017-01-26 Andrew J. Schorr <aschorr@telemetry-investments.com> + * awk.h (enum block_id): Remove BLOCK_INVALID, since it serves no useful purpose and seems to slow things down a bit. * node.c (nextfree): Remove first invalid entry. @@ -1679,7 +1679,7 @@ extern Regexp *re_update(NODE *t); extern void resyntax(int syntax); extern void resetup(void); extern int reisstring(const char *text, size_t len, Regexp *re, const char *buf); -extern int get_numbase(const char *str, bool use_locale); +extern int get_numbase(const char *str, size_t len, bool use_locale); extern bool using_utf8(void); /* symbol.c */ @@ -6414,7 +6414,7 @@ retry: base = 10; if (! do_traditional) { - base = get_numbase(tokstart, false); + base = get_numbase(tokstart, strlen(tokstart)-1, false); if (do_lint) { if (base == 8) lintwarn("numeric constant `%.*s' treated as octal", @@ -6450,7 +6450,7 @@ retry: } #endif if (base != 10) - d = nondec2awknum(tokstart, strlen(tokstart), NULL); + d = nondec2awknum(tokstart, strlen(tokstart)-1, NULL); else d = atof(tokstart); yylval->memory = make_profile_number(d, tokstart, strlen(tokstart) - 1); @@ -3994,7 +3994,7 @@ retry: base = 10; if (! do_traditional) { - base = get_numbase(tokstart, false); + base = get_numbase(tokstart, strlen(tokstart)-1, false); if (do_lint) { if (base == 8) lintwarn("numeric constant `%.*s' treated as octal", @@ -4030,7 +4030,7 @@ retry: } #endif if (base != 10) - d = nondec2awknum(tokstart, strlen(tokstart), NULL); + d = nondec2awknum(tokstart, strlen(tokstart)-1, NULL); else d = atof(tokstart); yylval->memory = make_profile_number(d, tokstart, strlen(tokstart) - 1); @@ -1907,6 +1907,7 @@ do_strftime(int nargs) int do_gmt; NODE *val = NULL; NODE *sub = NULL; + char save; static const time_t time_t_min = TYPE_MINIMUM(time_t); static const time_t time_t_max = TYPE_MAXIMUM(time_t); @@ -1980,6 +1981,8 @@ do_strftime(int nargs) DEREF(t1); return make_string("", 0); } + save = format[formatlen]; + t1->stptr[formatlen] = '\0'; } if (do_gmt) @@ -1987,8 +1990,10 @@ do_strftime(int nargs) else tm = localtime(& fclock); - if (tm == NULL) - return make_string("", 0); + if (tm == NULL) { + ret = make_string("", 0); + goto done; + } bufp = buf; bufsize = sizeof(buf); @@ -2014,8 +2019,11 @@ do_strftime(int nargs) ret = make_string(bufp, buflen); if (bufp != buf) efree(bufp); - if (t1) +done: + if (t1) { + t1->stptr[formatlen] = save; DEREF(t1); + } return ret; } @@ -3550,7 +3558,7 @@ do_strtonum(int nargs) tmp = fixtype(POP_SCALAR()); if ((tmp->flags & NUMBER) != 0) d = (AWKNUM) tmp->numbr; - else if (get_numbase(tmp->stptr, use_lc_numeric) != 10) + else if (get_numbase(tmp->stptr, tmp->stlen, use_lc_numeric) != 10) d = nondec2awknum(tmp->stptr, tmp->stlen, NULL); else d = (AWKNUM) force_number(tmp)->numbr; @@ -3575,7 +3583,7 @@ nondec2awknum(char *str, size_t len, char **endptr) short val; char *start = str; - if (*str == '0' && (str[1] == 'x' || str[1] == 'X')) { + if (len >= 2 && *str == '0' && (str[1] == 'x' || str[1] == 'X')) { /* * User called strtonum("0x") or some such, * so just quit early. @@ -3625,7 +3633,7 @@ nondec2awknum(char *str, size_t len, char **endptr) } if (endptr) *endptr = str; - } else if (*str == '0') { + } else if (len >= 1 && *str == '0') { for (; len > 0; len--) { if (! isdigit((unsigned char) *str)) { if (endptr) @@ -3743,7 +3751,7 @@ do_dcgettext(int nargs) #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT int lc_cat; char *domain; - char save; + char save, save1; bool saved_end = false; if (nargs == 3) { /* third argument */ @@ -3774,9 +3782,12 @@ do_dcgettext(int nargs) t1 = POP_STRING(); /* first argument */ string = t1->stptr; + save1 = string[t1->stlen]; + string[t1->stlen] = '\0'; #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT the_result = dcgettext(domain, string, lc_cat); + string[t1->stlen] = save1; if (saved_end) domain[t2->stlen] = save; if (t2 != NULL) @@ -3797,11 +3808,12 @@ do_dcngettext(int nargs) unsigned long number; AWKNUM d; char *the_result; + size_t reslen; #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT int lc_cat; char *domain; - char save; + char save, save1, save2; bool saved_end = false; if (nargs == 5) { /* fifth argument */ @@ -3843,17 +3855,31 @@ do_dcngettext(int nargs) #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT + save1 = string1[t1->stlen]; + string1[t1->stlen] = '\0'; + save2 = string2[t2->stlen]; + string2[t2->stlen] = '\0'; the_result = dcngettext(domain, string1, string2, number, lc_cat); + reslen = strlen(the_result); + string1[t1->stlen] = save1; + string2[t2->stlen] = save2; if (saved_end) domain[t3->stlen] = save; if (t3 != NULL) DEREF(t3); #else - the_result = (number == 1 ? string1 : string2); + if (number == 1) { + the_result = string1; + reslen = t1->stlen; + } + else { + the_result = string2; + reslen = t2->stlen; + } #endif DEREF(t1); DEREF(t2); - return make_string(the_result, strlen(the_result)); + return make_string(the_result, reslen); } /* do_bindtextdomain --- set the directory for a text domain */ @@ -3878,29 +3904,32 @@ do_bindtextdomain(int nargs) /* set defaults */ directory = NULL; domain = TEXTDOMAIN; - char save; - bool saved_end = false; + char save, save1; if (nargs == 2) { /* second argument */ t2 = POP_STRING(); domain = (const char *) t2->stptr; save = t2->stptr[t2->stlen]; t2->stptr[t2->stlen] = '\0'; - saved_end = true; } /* first argument */ t1 = POP_STRING(); - if (t1->stlen > 0) + if (t1->stlen > 0) { directory = (const char *) t1->stptr; + save1 = t1->stptr[t1->stlen]; + t1->stptr[t1->stlen] = '\0'; + } the_result = bindtextdomain(domain, directory); + if (directory) + t1->stptr[t1->stlen] = save1; DEREF(t1); - if (saved_end) + if (t2 != NULL) { t2->stptr[t2->stlen] = save; - if (t2 != NULL) DEREF(t2); + } return make_string(the_result, strlen(the_result)); } diff --git a/interpret.h b/interpret.h index 8c9675bb..191e1efb 100644 --- a/interpret.h +++ b/interpret.h @@ -886,6 +886,8 @@ mod: size_t num_elems = 0; static NODE *sorted_in = NULL; const char *how_to_sort = "@unsorted"; + char save; + bool saved_end = false; /* get the array */ array = POP_ARRAY(); @@ -908,11 +910,17 @@ mod: if (sort_str != NULL) { sort_str = force_string(sort_str); - if (sort_str->stlen > 0) + if (sort_str->stlen > 0) { how_to_sort = sort_str->stptr; + save = sort_str->stptr[sort_str->stlen]; + sort_str->stptr[sort_str->stlen] = '\0'; + saved_end = true; + } } list = assoc_list(array, how_to_sort, SORTED_IN); + if (saved_end) + sort_str->stptr[sort_str->stlen] = save; arrayfor: getnode(r); @@ -1049,6 +1057,7 @@ match_re: { NODE *f = NULL; int arg_count; + char save; arg_count = (pc + 1)->expr_count; t1 = PEEK(arg_count); /* indirect var */ @@ -1057,12 +1066,15 @@ match_re: fatal(_("indirect function call requires a simple scalar value")); t1 = force_string(t1); + save = t1->stptr[t1->stlen]; + t1->stptr[t1->stlen] = '\0'; if (t1->stlen > 0) { /* retrieve function definition node */ f = pc->func_body; if (f != NULL && strcmp(f->vname, t1->stptr) == 0) { /* indirect var hasn't been reassigned */ + t1->stptr[t1->stlen] = save; ni = setup_frame(pc); JUMPTO(ni); /* Op_func */ } @@ -1087,10 +1099,12 @@ match_re: r = call_split_func(t1->stptr, arg_count); else r = the_func(arg_count); + t1->stptr[t1->stlen] = save; PUSH(r); break; } else if (f->type != Node_func) { + t1->stptr[t1->stlen] = save; if (f->type == Node_ext_func) { /* code copied from below, keep in sync */ INSTRUCTION *bc; @@ -1115,6 +1129,7 @@ match_re: pc->func_name); } pc->func_body = f; /* save for next call */ + t1->stptr[t1->stlen] = save; ni = setup_frame(pc); JUMPTO(ni); /* Op_func */ @@ -303,7 +303,7 @@ force_mpnum(NODE *n, int do_nondec, int use_locale) cp1 = cp; if (do_nondec) - base = get_numbase(cp1, use_locale); + base = get_numbase(cp1, cpend - cp1, use_locale); if (! mpg_maybe_float(cp1, use_locale)) { mpg_zero(n); @@ -381,12 +381,10 @@ mpg_format_val(const char *format, int index, NODE *s) } s->flags = oflags; s->stlen = r->stlen; - if ((s->flags & STRCUR) != 0) + if ((s->flags & (MALLOC|STRCUR)) == (MALLOC|STRCUR)) efree(s->stptr); s->stptr = r->stptr; freenode(r); /* Do not unref(r)! We want to keep s->stptr == r->stpr. */ - - s->flags |= STRCUR; free_wstr(s); return s; } @@ -41,12 +41,13 @@ int (*cmp_numbers)(const NODE *, const NODE *) = cmp_awknums; /* is_hex --- return true if a string looks like a hex value */ static bool -is_hex(const char *str) +is_hex(const char *str, const char *cpend) { + /* on entry, we know the string length is >= 1 */ if (*str == '-' || *str == '+') str++; - if (str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) + if (str + 1 < cpend && str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) return true; return false; @@ -113,7 +114,7 @@ r_force_number(NODE *n) if ( (! do_posix /* not POSIXLY paranoid and */ && (is_alpha((unsigned char) *cp) /* letter, or */ /* CANNOT do non-decimal and saw 0x */ - || (! do_non_decimal_data && is_hex(cp))))) { + || (! do_non_decimal_data && is_hex(cp, cpend))))) { goto badnum; } @@ -129,7 +130,7 @@ r_force_number(NODE *n) errno = 0; if (do_non_decimal_data /* main.c assures false if do_posix */ - && ! do_traditional && get_numbase(cp, true) != 10) { + && ! do_traditional && get_numbase(cp, cpend - cp, true) != 10) { /* nondec2awknum() saves and restores the byte after the string itself */ n->numbr = nondec2awknum(cp, cpend - cp, &ptr); } else { @@ -248,7 +249,7 @@ r_format_val(const char *format, int index, NODE *s) } s->flags = oflags; s->stlen = r->stlen; - if ((s->flags & STRCUR) != 0) + if ((s->flags & (MALLOC|STRCUR)) == (MALLOC|STRCUR)) efree(s->stptr); s->stptr = r->stptr; freenode(r); /* Do not unref(r)! We want to keep s->stptr == r->stpr. */ @@ -273,7 +274,7 @@ r_format_val(const char *format, int index, NODE *s) s->flags |= STRING; } } - if ((s->flags & STRCUR) != 0) + if ((s->flags & (MALLOC|STRCUR)) == (MALLOC|STRCUR)) efree(s->stptr); emalloc(s->stptr, char *, s->stlen + 1, "format_val"); memcpy(s->stptr, sp, s->stlen + 1); @@ -631,7 +632,7 @@ parse_escape(const char **string_ptr) /* get_numbase --- return the base to use for the number in 's' */ int -get_numbase(const char *s, bool use_locale) +get_numbase(const char *s, size_t len, bool use_locale) { int dec_point = '.'; const char *str = s; @@ -645,7 +646,7 @@ get_numbase(const char *s, bool use_locale) dec_point = loc.decimal_point[0]; /* XXX --- assumes one char */ #endif - if (str[0] != '0') + if (len < 2 || str[0] != '0') return 10; /* leading 0x or 0X */ @@ -658,7 +659,7 @@ get_numbase(const char *s, bool use_locale) * * These beasts can have trailing whitespace. Deal with that too. */ - for (; *str != '\0'; str++) { + for (; len > 0; len--, str++) { if (*str == 'e' || *str == 'E' || *str == dec_point) return 10; else if (! isdigit((unsigned char) *str)) @@ -844,6 +845,7 @@ wstr2str(NODE *n) } *cp = '\0'; + /* N.B. caller just created n with make_string, so this free is safe */ efree(n->stptr); n->stptr = newval; n->stlen = cp - newval; diff --git a/str_array.c b/str_array.c index d832380d..c559a39a 100644 --- a/str_array.c +++ b/str_array.c @@ -773,9 +773,14 @@ static NODE ** env_remove(NODE *symbol, NODE *subs) { NODE **val = str_remove(symbol, subs); + char save; - if (val != NULL) + if (val != NULL) { + save = subs->stptr[subs->stlen]; + subs->stptr[subs->stlen] = '\0'; (void) unsetenv(subs->stptr); + subs->stptr[subs->stlen] = save; + } return val; } diff --git a/test/ChangeLog b/test/ChangeLog index 3566cce7..4aa98610 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -3,6 +3,11 @@ * Makefile.am (gensub3): New test. * gensub3.awk, gensub3.in, gensub3.ok: New files. +2017-01-26 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * Makefile.am (strftfld): New test. + * strftfld.awk, strftfld.in, strftfld.ok: New files. + 2017-01-15 Andrew J. Schorr <aschorr@telemetry-investments.com> * Makefile.am (concat5): New test. diff --git a/test/Makefile.am b/test/Makefile.am index 9f79df8f..dfedcd64 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -1026,6 +1026,9 @@ EXTRA_DIST = \ strftime.awk \ strftlng.awk \ strftlng.ok \ + strftfld.awk \ + strftfld.in \ + strftfld.ok \ strnum1.awk \ strnum1.ok \ strnum2.awk \ @@ -1231,7 +1234,7 @@ GAWK_EXT_TESTS = \ rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin rsstart1 \ rsstart2 rsstart3 rstest6 shadow shadowbuiltin \ sortfor sortfor2 sortu split_after_fpat \ - splitarg4 strftime \ + splitarg4 strftime strftfld \ strtonum strtonum1 switch2 symtab1 symtab2 symtab3 symtab4 symtab5 symtab6 \ symtab7 symtab8 symtab9 symtab10 \ typedregex1 typedregex2 typedregex3 typeof1 typeof2 typeof3 typeof4 \ diff --git a/test/Makefile.in b/test/Makefile.in index 1cd8bf15..af75a6a9 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -1284,6 +1284,9 @@ EXTRA_DIST = \ strftime.awk \ strftlng.awk \ strftlng.ok \ + strftfld.awk \ + strftfld.in \ + strftfld.ok \ strnum1.awk \ strnum1.ok \ strnum2.awk \ @@ -1488,7 +1491,7 @@ GAWK_EXT_TESTS = \ rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin rsstart1 \ rsstart2 rsstart3 rstest6 shadow shadowbuiltin \ sortfor sortfor2 sortu split_after_fpat \ - splitarg4 strftime \ + splitarg4 strftime strftfld \ strtonum strtonum1 switch2 symtab1 symtab2 symtab3 symtab4 symtab5 symtab6 \ symtab7 symtab8 symtab9 symtab10 \ typedregex1 typedregex2 typedregex3 typeof1 typeof2 typeof3 typeof4 \ @@ -4251,6 +4254,11 @@ splitarg4: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +strftfld: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + strtonum: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/Maketests b/test/Maketests index 30211d66..9036ff99 100644 --- a/test/Maketests +++ b/test/Maketests @@ -1447,6 +1447,11 @@ splitarg4: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +strftfld: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + strtonum: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/strftfld.awk b/test/strftfld.awk new file mode 100644 index 00000000..26f75a5a --- /dev/null +++ b/test/strftfld.awk @@ -0,0 +1,3 @@ +{ + print split(strftime($1), f) +} diff --git a/test/strftfld.in b/test/strftfld.in new file mode 100644 index 00000000..c1175143 --- /dev/null +++ b/test/strftfld.in @@ -0,0 +1 @@ +%F %T diff --git a/test/strftfld.ok b/test/strftfld.ok new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/test/strftfld.ok @@ -0,0 +1 @@ +1 |