aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--awkgram.c90
-rw-r--r--awkgram.y90
3 files changed, 104 insertions, 82 deletions
diff --git a/ChangeLog b/ChangeLog
index b385cffb..35e08c91 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2014-07-03 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awkgram.y (nextc): Add bool check_for_bad parameter to check
+ for bad characters in the source program.
+ (yylex): Adjust calls.
+
2014-06-22 Paul Eggert <eggert@penguin.cs.ucla.edu>
Bring in from GNULIB:
diff --git a/awkgram.c b/awkgram.c
index b36816c9..94ca1313 100644
--- a/awkgram.c
+++ b/awkgram.c
@@ -5135,7 +5135,7 @@ tokexpand()
#if MBS_SUPPORT
static int
-nextc(void)
+nextc(bool check_for_bad)
{
if (gawk_mb_cur_max > 1) {
again:
@@ -5186,14 +5186,19 @@ again:
0 : work_ring_idx + 1;
cur_char_ring[work_ring_idx] = 0;
}
+ if (check_for_bad && iscntrl(*lexptr) && ! isspace(*lexptr))
+ fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), *lexptr);
return (int) (unsigned char) *lexptr++;
} else {
do {
if (lexeof)
return END_FILE;
- if (lexptr && lexptr < lexend)
- return ((int) (unsigned char) *lexptr++);
+ if (lexptr && lexptr < lexend) {
+ if (check_for_bad && iscntrl(*lexptr) && ! isspace(*lexptr))
+ fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), *lexptr);
+ return ((int) (unsigned char) *lexptr++);
+ }
} while (get_src_buf());
return END_SRC;
}
@@ -5202,13 +5207,16 @@ again:
#else /* MBS_SUPPORT */
int
-nextc()
+nextc(bool check_for_bad)
{
do {
if (lexeof)
return END_FILE;
- if (lexptr && lexptr < lexend)
+ if (lexptr && lexptr < lexend) {
+ if (check_for_bad && iscntrl(*lexptr) && ! isspace(*lexptr))
+ fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), *lexptr);
return ((int) (unsigned char) *lexptr++);
+ }
} while (get_src_buf());
return END_SRC;
}
@@ -5237,13 +5245,13 @@ allow_newline(void)
int c;
for (;;) {
- c = nextc();
+ c = nextc(true);
if (c == END_FILE) {
pushback();
break;
}
if (c == '#') {
- while ((c = nextc()) != '\n' && c != END_FILE)
+ while ((c = nextc(false)) != '\n' && c != END_FILE)
continue;
if (c == END_FILE) {
pushback();
@@ -5315,7 +5323,7 @@ yylex(void)
if (lasttok == LEX_EOF) /* error earlier in current source, must give up !! */
return 0;
- c = nextc();
+ c = nextc(true);
if (c == END_SRC)
return 0;
if (c == END_FILE)
@@ -5357,12 +5365,12 @@ yylex(void)
want_regexp = false;
tok = tokstart;
for (;;) {
- c = nextc();
+ c = nextc(true);
if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
case '[':
/* one day check for `.' and `=' too */
- if (nextc() == ':' || in_brack == 0)
+ if (nextc(true) == ':' || in_brack == 0)
in_brack++;
pushback();
break;
@@ -5376,7 +5384,7 @@ yylex(void)
in_brack--;
break;
case '\\':
- if ((c = nextc()) == END_FILE) {
+ if ((c = nextc(true)) == END_FILE) {
pushback();
yyerror(_("unterminated regexp ends with `\\' at end of file"));
goto end_regexp; /* kludge */
@@ -5396,7 +5404,7 @@ end_regexp:
yylval = GET_INSTRUCTION(Op_token);
yylval->lextok = estrdup(tokstart, tok - tokstart);
if (do_lint) {
- int peek = nextc();
+ int peek = nextc(true);
pushback();
if (peek == 'i' || peek == 's') {
@@ -5426,7 +5434,7 @@ end_regexp:
retry:
/* skipping \r is a hack, but windows is just too pervasive. sigh. */
- while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
+ while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
continue;
lexeme = lexptr ? lexptr - 1 : lexptr;
@@ -5448,7 +5456,7 @@ retry:
return lasttok = NEWLINE;
case '#': /* it's a comment */
- while ((c = nextc()) != '\n') {
+ while ((c = nextc(false)) != '\n') {
if (c == END_FILE)
return lasttok = NEWLINE_EOF;
}
@@ -5468,7 +5476,7 @@ retry:
*/
if (! do_traditional) {
/* strip trailing white-space and/or comment */
- while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
+ while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
continue;
if (c == '#') {
static bool warned = false;
@@ -5478,16 +5486,16 @@ retry:
lintwarn(
_("use of `\\ #...' line continuation is not portable"));
}
- while ((c = nextc()) != '\n')
+ while ((c = nextc(false)) != '\n')
if (c == END_FILE)
break;
}
pushback();
}
#endif /* RELAXED_CONTINUATION */
- c = nextc();
+ c = nextc(true);
if (c == '\r') /* allow MS-DOS files. bleah */
- c = nextc();
+ c = nextc(true);
if (c == '\n') {
sourceline++;
goto retry;
@@ -5526,7 +5534,7 @@ retry:
case '[':
return lasttok = c;
case ']':
- c = nextc();
+ c = nextc(true);
pushback();
if (c == '[') {
yylval = GET_INSTRUCTION(Op_sub_array);
@@ -5538,7 +5546,7 @@ retry:
return ']';
case '*':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_assign_times);
return lasttok = ASSIGNOP;
} else if (do_posix) {
@@ -5549,7 +5557,7 @@ retry:
/* make ** and **= aliases for ^ and ^= */
static bool did_warn_op = false, did_warn_assgn = false;
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
if (! did_warn_assgn) {
did_warn_assgn = true;
if (do_lint)
@@ -5577,7 +5585,7 @@ retry:
return lasttok = '*';
case '/':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
pushback();
return lasttok = SLASH_BEFORE_EQUAL;
}
@@ -5586,7 +5594,7 @@ retry:
return lasttok = '/';
case '%':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
yylval = GET_INSTRUCTION(Op_assign_mod);
return lasttok = ASSIGNOP;
}
@@ -5598,7 +5606,7 @@ retry:
{
static bool did_warn_op = false, did_warn_assgn = false;
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
if (do_lint_old && ! did_warn_assgn) {
did_warn_assgn = true;
warning(_("operator `^=' is not supported in old awk"));
@@ -5616,7 +5624,7 @@ retry:
}
case '+':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_assign_plus);
return lasttok = ASSIGNOP;
}
@@ -5629,7 +5637,7 @@ retry:
return lasttok = '+';
case '!':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_notequal);
return lasttok = RELOP;
}
@@ -5642,7 +5650,7 @@ retry:
return lasttok = '!';
case '<':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
yylval = GET_INSTRUCTION(Op_leq);
return lasttok = RELOP;
}
@@ -5651,7 +5659,7 @@ retry:
return lasttok = '<';
case '=':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
yylval = GET_INSTRUCTION(Op_equal);
return lasttok = RELOP;
}
@@ -5660,7 +5668,7 @@ retry:
return lasttok = ASSIGN;
case '>':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_geq);
return lasttok = RELOP;
} else if (c == '>') {
@@ -5699,7 +5707,7 @@ retry:
case '"':
string:
esc_seen = false;
- while ((c = nextc()) != '"') {
+ while ((c = nextc(true)) != '"') {
if (c == '\n') {
pushback();
yyerror(_("unterminated string"));
@@ -5707,7 +5715,7 @@ retry:
}
if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
c == '\\') {
- c = nextc();
+ c = nextc(true);
if (c == '\n') {
sourceline++;
continue;
@@ -5741,7 +5749,7 @@ retry:
return lasttok = YSTRING;
case '-':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_assign_minus);
return lasttok = ASSIGNOP;
}
@@ -5754,7 +5762,7 @@ retry:
return lasttok = '-';
case '.':
- c = nextc();
+ c = nextc(true);
pushback();
if (! isdigit(c))
return lasttok = '.';
@@ -5782,7 +5790,7 @@ retry:
if (do_traditional)
goto done;
if (tok == tokstart + 2) {
- int peek = nextc();
+ int peek = nextc(true);
if (isxdigit(peek)) {
inhex = true;
@@ -5810,8 +5818,8 @@ retry:
break;
}
seen_e = true;
- if ((c = nextc()) == '-' || c == '+') {
- int c2 = nextc();
+ if ((c = nextc(true)) == '-' || c == '+') {
+ int c2 = nextc(true);
if (isdigit(c2)) {
tokadd(c);
@@ -5858,7 +5866,7 @@ retry:
}
if (gotnumber)
break;
- c = nextc();
+ c = nextc(true);
}
pushback();
@@ -5907,7 +5915,7 @@ retry:
return lasttok = YNUMBER;
case '&':
- if ((c = nextc()) == '&') {
+ if ((c = nextc(true)) == '&') {
yylval = GET_INSTRUCTION(Op_and);
allow_newline();
return lasttok = LEX_AND;
@@ -5917,7 +5925,7 @@ retry:
return lasttok = '&';
case '|':
- if ((c = nextc()) == '|') {
+ if ((c = nextc(true)) == '|') {
yylval = GET_INSTRUCTION(Op_or);
allow_newline();
return lasttok = LEX_OR;
@@ -5958,7 +5966,7 @@ retry:
* occasions where the interactions are funny.
*/
if (! do_traditional && c == '_' && lasttok != '$') {
- if ((c = nextc()) == '"') {
+ if ((c = nextc(true)) == '"') {
intlstr = true;
goto string;
}
@@ -5970,7 +5978,7 @@ retry:
tok = tokstart;
while (c != END_FILE && is_identchar(c)) {
tokadd(c);
- c = nextc();
+ c = nextc(true);
}
tokadd('\0');
pushback();
diff --git a/awkgram.y b/awkgram.y
index 906e6c8b..b512584c 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -2796,7 +2796,7 @@ tokexpand()
#if MBS_SUPPORT
static int
-nextc(void)
+nextc(bool check_for_bad)
{
if (gawk_mb_cur_max > 1) {
again:
@@ -2847,14 +2847,19 @@ again:
0 : work_ring_idx + 1;
cur_char_ring[work_ring_idx] = 0;
}
+ if (check_for_bad && iscntrl(*lexptr) && ! isspace(*lexptr))
+ fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), *lexptr);
return (int) (unsigned char) *lexptr++;
} else {
do {
if (lexeof)
return END_FILE;
- if (lexptr && lexptr < lexend)
- return ((int) (unsigned char) *lexptr++);
+ if (lexptr && lexptr < lexend) {
+ if (check_for_bad && iscntrl(*lexptr) && ! isspace(*lexptr))
+ fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), *lexptr);
+ return ((int) (unsigned char) *lexptr++);
+ }
} while (get_src_buf());
return END_SRC;
}
@@ -2863,13 +2868,16 @@ again:
#else /* MBS_SUPPORT */
int
-nextc()
+nextc(bool check_for_bad)
{
do {
if (lexeof)
return END_FILE;
- if (lexptr && lexptr < lexend)
+ if (lexptr && lexptr < lexend) {
+ if (check_for_bad && iscntrl(*lexptr) && ! isspace(*lexptr))
+ fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), *lexptr);
return ((int) (unsigned char) *lexptr++);
+ }
} while (get_src_buf());
return END_SRC;
}
@@ -2898,13 +2906,13 @@ allow_newline(void)
int c;
for (;;) {
- c = nextc();
+ c = nextc(true);
if (c == END_FILE) {
pushback();
break;
}
if (c == '#') {
- while ((c = nextc()) != '\n' && c != END_FILE)
+ while ((c = nextc(false)) != '\n' && c != END_FILE)
continue;
if (c == END_FILE) {
pushback();
@@ -2976,7 +2984,7 @@ yylex(void)
if (lasttok == LEX_EOF) /* error earlier in current source, must give up !! */
return 0;
- c = nextc();
+ c = nextc(true);
if (c == END_SRC)
return 0;
if (c == END_FILE)
@@ -3018,12 +3026,12 @@ yylex(void)
want_regexp = false;
tok = tokstart;
for (;;) {
- c = nextc();
+ c = nextc(true);
if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
case '[':
/* one day check for `.' and `=' too */
- if (nextc() == ':' || in_brack == 0)
+ if (nextc(true) == ':' || in_brack == 0)
in_brack++;
pushback();
break;
@@ -3037,7 +3045,7 @@ yylex(void)
in_brack--;
break;
case '\\':
- if ((c = nextc()) == END_FILE) {
+ if ((c = nextc(true)) == END_FILE) {
pushback();
yyerror(_("unterminated regexp ends with `\\' at end of file"));
goto end_regexp; /* kludge */
@@ -3057,7 +3065,7 @@ end_regexp:
yylval = GET_INSTRUCTION(Op_token);
yylval->lextok = estrdup(tokstart, tok - tokstart);
if (do_lint) {
- int peek = nextc();
+ int peek = nextc(true);
pushback();
if (peek == 'i' || peek == 's') {
@@ -3087,7 +3095,7 @@ end_regexp:
retry:
/* skipping \r is a hack, but windows is just too pervasive. sigh. */
- while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
+ while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
continue;
lexeme = lexptr ? lexptr - 1 : lexptr;
@@ -3109,7 +3117,7 @@ retry:
return lasttok = NEWLINE;
case '#': /* it's a comment */
- while ((c = nextc()) != '\n') {
+ while ((c = nextc(false)) != '\n') {
if (c == END_FILE)
return lasttok = NEWLINE_EOF;
}
@@ -3129,7 +3137,7 @@ retry:
*/
if (! do_traditional) {
/* strip trailing white-space and/or comment */
- while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
+ while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
continue;
if (c == '#') {
static bool warned = false;
@@ -3139,16 +3147,16 @@ retry:
lintwarn(
_("use of `\\ #...' line continuation is not portable"));
}
- while ((c = nextc()) != '\n')
+ while ((c = nextc(false)) != '\n')
if (c == END_FILE)
break;
}
pushback();
}
#endif /* RELAXED_CONTINUATION */
- c = nextc();
+ c = nextc(true);
if (c == '\r') /* allow MS-DOS files. bleah */
- c = nextc();
+ c = nextc(true);
if (c == '\n') {
sourceline++;
goto retry;
@@ -3187,7 +3195,7 @@ retry:
case '[':
return lasttok = c;
case ']':
- c = nextc();
+ c = nextc(true);
pushback();
if (c == '[') {
yylval = GET_INSTRUCTION(Op_sub_array);
@@ -3199,7 +3207,7 @@ retry:
return ']';
case '*':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_assign_times);
return lasttok = ASSIGNOP;
} else if (do_posix) {
@@ -3210,7 +3218,7 @@ retry:
/* make ** and **= aliases for ^ and ^= */
static bool did_warn_op = false, did_warn_assgn = false;
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
if (! did_warn_assgn) {
did_warn_assgn = true;
if (do_lint)
@@ -3238,7 +3246,7 @@ retry:
return lasttok = '*';
case '/':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
pushback();
return lasttok = SLASH_BEFORE_EQUAL;
}
@@ -3247,7 +3255,7 @@ retry:
return lasttok = '/';
case '%':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
yylval = GET_INSTRUCTION(Op_assign_mod);
return lasttok = ASSIGNOP;
}
@@ -3259,7 +3267,7 @@ retry:
{
static bool did_warn_op = false, did_warn_assgn = false;
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
if (do_lint_old && ! did_warn_assgn) {
did_warn_assgn = true;
warning(_("operator `^=' is not supported in old awk"));
@@ -3277,7 +3285,7 @@ retry:
}
case '+':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_assign_plus);
return lasttok = ASSIGNOP;
}
@@ -3290,7 +3298,7 @@ retry:
return lasttok = '+';
case '!':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_notequal);
return lasttok = RELOP;
}
@@ -3303,7 +3311,7 @@ retry:
return lasttok = '!';
case '<':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
yylval = GET_INSTRUCTION(Op_leq);
return lasttok = RELOP;
}
@@ -3312,7 +3320,7 @@ retry:
return lasttok = '<';
case '=':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
yylval = GET_INSTRUCTION(Op_equal);
return lasttok = RELOP;
}
@@ -3321,7 +3329,7 @@ retry:
return lasttok = ASSIGN;
case '>':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_geq);
return lasttok = RELOP;
} else if (c == '>') {
@@ -3360,7 +3368,7 @@ retry:
case '"':
string:
esc_seen = false;
- while ((c = nextc()) != '"') {
+ while ((c = nextc(true)) != '"') {
if (c == '\n') {
pushback();
yyerror(_("unterminated string"));
@@ -3368,7 +3376,7 @@ retry:
}
if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
c == '\\') {
- c = nextc();
+ c = nextc(true);
if (c == '\n') {
sourceline++;
continue;
@@ -3402,7 +3410,7 @@ retry:
return lasttok = YSTRING;
case '-':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_assign_minus);
return lasttok = ASSIGNOP;
}
@@ -3415,7 +3423,7 @@ retry:
return lasttok = '-';
case '.':
- c = nextc();
+ c = nextc(true);
pushback();
if (! isdigit(c))
return lasttok = '.';
@@ -3443,7 +3451,7 @@ retry:
if (do_traditional)
goto done;
if (tok == tokstart + 2) {
- int peek = nextc();
+ int peek = nextc(true);
if (isxdigit(peek)) {
inhex = true;
@@ -3471,8 +3479,8 @@ retry:
break;
}
seen_e = true;
- if ((c = nextc()) == '-' || c == '+') {
- int c2 = nextc();
+ if ((c = nextc(true)) == '-' || c == '+') {
+ int c2 = nextc(true);
if (isdigit(c2)) {
tokadd(c);
@@ -3519,7 +3527,7 @@ retry:
}
if (gotnumber)
break;
- c = nextc();
+ c = nextc(true);
}
pushback();
@@ -3568,7 +3576,7 @@ retry:
return lasttok = YNUMBER;
case '&':
- if ((c = nextc()) == '&') {
+ if ((c = nextc(true)) == '&') {
yylval = GET_INSTRUCTION(Op_and);
allow_newline();
return lasttok = LEX_AND;
@@ -3578,7 +3586,7 @@ retry:
return lasttok = '&';
case '|':
- if ((c = nextc()) == '|') {
+ if ((c = nextc(true)) == '|') {
yylval = GET_INSTRUCTION(Op_or);
allow_newline();
return lasttok = LEX_OR;
@@ -3619,7 +3627,7 @@ retry:
* occasions where the interactions are funny.
*/
if (! do_traditional && c == '_' && lasttok != '$') {
- if ((c = nextc()) == '"') {
+ if ((c = nextc(true)) == '"') {
intlstr = true;
goto string;
}
@@ -3631,7 +3639,7 @@ retry:
tok = tokstart;
while (c != END_FILE && is_identchar(c)) {
tokadd(c);
- c = nextc();
+ c = nextc(true);
}
tokadd('\0');
pushback();