aboutsummaryrefslogtreecommitdiffstats
path: root/awkgram.y
diff options
context:
space:
mode:
Diffstat (limited to 'awkgram.y')
-rw-r--r--awkgram.y258
1 files changed, 197 insertions, 61 deletions
diff --git a/awkgram.y b/awkgram.y
index 14121f55..28025578 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -155,8 +155,6 @@ static inline INSTRUCTION *list_merge(INSTRUCTION *l1, INSTRUCTION *l2);
extern double fmod(double x, double y);
#define YYSTYPE INSTRUCTION *
-
-#define is_identchar(c) (isalnum(c) || (c) == '_')
%}
%token FUNC_CALL NAME REGEXP FILENAME
@@ -1406,21 +1404,12 @@ simp_exp
| LEX_GETLINE opt_variable input_redir
{
/*
- * In BEGINFILE/ENDFILE, allow `getline var < file'
+ * In BEGINFILE/ENDFILE, allow `getline [var] < file'
*/
- if (rule == BEGINFILE || rule == ENDFILE) {
- if ($2 != NULL && $3 != NULL)
- ; /* all ok */
- else {
- if ($2 != NULL)
- error_ln($1->source_line,
- _("`getline var' invalid inside `%s' rule"), ruletab[rule]);
- else
- error_ln($1->source_line,
- _("`getline' invalid inside `%s' rule"), ruletab[rule]);
- }
- }
+ if ((rule == BEGINFILE || rule == ENDFILE) && $3 == NULL)
+ error_ln($1->source_line,
+ _("non-redirected `getline' invalid inside `%s' rule"), ruletab[rule]);
if (do_lint && rule == END && $3 == NULL)
lintwarn_ln($1->source_line,
_("non-redirected `getline' undefined inside END action"));
@@ -1815,6 +1804,7 @@ struct token {
# define GAWKX 0x0400 /* gawk extension */
# define BREAK 0x0800 /* break allowed inside */
# define CONTINUE 0x1000 /* continue allowed inside */
+# define DEBUG_USE 0x2000 /* for use by developers */
NODE *(*ptr)(int); /* function that implements this keyword */
NODE *(*ptr2)(int); /* alternate arbitrary-precision function */
@@ -1853,7 +1843,7 @@ static const struct token tokentab[] = {
{"END", Op_rule, LEX_END, 0, 0, 0},
{"ENDFILE", Op_rule, LEX_ENDFILE, GAWKX, 0, 0},
#ifdef ARRAYDEBUG
-{"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_adump, 0},
+{"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|DEBUG_USE, do_adump, 0},
#endif
{"and", Op_builtin, LEX_BUILTIN, GAWKX, do_and, MPF(and)},
{"asort", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asort, 0},
@@ -1870,6 +1860,7 @@ static const struct token tokentab[] = {
{"dcngettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext, 0},
{"default", Op_K_default, LEX_DEFAULT, GAWKX, 0, 0},
{"delete", Op_K_delete, LEX_DELETE, NOT_OLD, 0, 0},
+{"div", Op_builtin, LEX_BUILTIN, GAWKX|A(3), do_div, MPF(div)},
{"do", Op_K_do, LEX_DO, NOT_OLD|BREAK|CONTINUE, 0, 0},
{"else", Op_K_else, LEX_ELSE, 0, 0, 0},
{"eval", Op_symbol, LEX_EVAL, 0, 0, 0},
@@ -1912,7 +1903,7 @@ static const struct token tokentab[] = {
{"sqrt", Op_builtin, LEX_BUILTIN, A(1), do_sqrt, MPF(sqrt)},
{"srand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand, MPF(srand)},
#if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
-{"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0), stopme, 0},
+{"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|DEBUG_USE, stopme, 0},
#endif
{"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime, 0},
{"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum, MPF(strtonum)},
@@ -1954,7 +1945,7 @@ getfname(NODE *(*fptr)(int))
j = sizeof(tokentab) / sizeof(tokentab[0]);
/* linear search, no other way to do it */
for (i = 0; i < j; i++)
- if (tokentab[i].ptr == fptr)
+ if (tokentab[i].ptr == fptr || tokentab[i].ptr2 == fptr)
return tokentab[i].operator;
return NULL;
@@ -2334,6 +2325,15 @@ parse_program(INSTRUCTION **pcode)
return (ret || errcount);
}
+/* free_srcfile --- free a SRCFILE struct */
+
+void
+free_srcfile(SRCFILE *thisfile)
+{
+ efree(thisfile->src);
+ efree(thisfile);
+}
+
/* do_add_srcfile --- add one item to srcfiles */
static SRCFILE *
@@ -2792,12 +2792,40 @@ tokexpand()
return tok;
}
+/* check_bad_char --- fatal if c isn't allowed in gawk source code */
+
+/*
+ * The error message was inspired by someone who decided to put
+ * a physical \0 byte into the source code to see what would
+ * happen and then filed a bug report about it. Sigh.
+ */
+
+static void
+check_bad_char(int c)
+{
+ /* allow escapes. needed for autoconf. bleah. */
+ switch (c) {
+ case '\a':
+ case '\b':
+ case '\f':
+ case '\n':
+ case '\r':
+ case '\t':
+ return;
+ default:
+ break;
+ }
+
+ if (iscntrl(c) && ! isspace(c))
+ fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), c);
+}
+
/* nextc --- get the next input character */
#if MBS_SUPPORT
static int
-nextc(void)
+nextc(bool check_for_bad)
{
if (gawk_mb_cur_max > 1) {
again:
@@ -2848,14 +2876,19 @@ again:
0 : work_ring_idx + 1;
cur_char_ring[work_ring_idx] = 0;
}
+ if (check_for_bad)
+ check_bad_char(*lexptr);
return (int) (unsigned char) *lexptr++;
} else {
do {
if (lexeof)
return END_FILE;
- if (lexptr && lexptr < lexend)
- return ((int) (unsigned char) *lexptr++);
+ if (lexptr && lexptr < lexend) {
+ if (check_for_bad)
+ check_bad_char(*lexptr);
+ return ((int) (unsigned char) *lexptr++);
+ }
} while (get_src_buf());
return END_SRC;
}
@@ -2864,13 +2897,16 @@ again:
#else /* MBS_SUPPORT */
int
-nextc()
+nextc(bool check_for_bad)
{
do {
if (lexeof)
return END_FILE;
- if (lexptr && lexptr < lexend)
+ if (lexptr && lexptr < lexend) {
+ if (check_for_bad)
+ check_bad_char(*lexptr);
return ((int) (unsigned char) *lexptr++);
+ }
} while (get_src_buf());
return END_SRC;
}
@@ -2899,13 +2935,13 @@ allow_newline(void)
int c;
for (;;) {
- c = nextc();
+ c = nextc(true);
if (c == END_FILE) {
pushback();
break;
}
if (c == '#') {
- while ((c = nextc()) != '\n' && c != END_FILE)
+ while ((c = nextc(false)) != '\n' && c != END_FILE)
continue;
if (c == END_FILE) {
pushback();
@@ -2977,7 +3013,7 @@ yylex(void)
if (lasttok == LEX_EOF) /* error earlier in current source, must give up !! */
return 0;
- c = nextc();
+ c = nextc(true);
if (c == END_SRC)
return 0;
if (c == END_FILE)
@@ -3019,12 +3055,12 @@ yylex(void)
want_regexp = false;
tok = tokstart;
for (;;) {
- c = nextc();
+ c = nextc(true);
if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
case '[':
/* one day check for `.' and `=' too */
- if (nextc() == ':' || in_brack == 0)
+ if (nextc(true) == ':' || in_brack == 0)
in_brack++;
pushback();
break;
@@ -3038,7 +3074,7 @@ yylex(void)
in_brack--;
break;
case '\\':
- if ((c = nextc()) == END_FILE) {
+ if ((c = nextc(true)) == END_FILE) {
pushback();
yyerror(_("unterminated regexp ends with `\\' at end of file"));
goto end_regexp; /* kludge */
@@ -3058,7 +3094,7 @@ end_regexp:
yylval = GET_INSTRUCTION(Op_token);
yylval->lextok = estrdup(tokstart, tok - tokstart);
if (do_lint) {
- int peek = nextc();
+ int peek = nextc(true);
pushback();
if (peek == 'i' || peek == 's') {
@@ -3088,7 +3124,7 @@ end_regexp:
retry:
/* skipping \r is a hack, but windows is just too pervasive. sigh. */
- while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
+ while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
continue;
lexeme = lexptr ? lexptr - 1 : lexptr;
@@ -3110,7 +3146,7 @@ retry:
return lasttok = NEWLINE;
case '#': /* it's a comment */
- while ((c = nextc()) != '\n') {
+ while ((c = nextc(false)) != '\n') {
if (c == END_FILE)
return lasttok = NEWLINE_EOF;
}
@@ -3130,7 +3166,7 @@ retry:
*/
if (! do_traditional) {
/* strip trailing white-space and/or comment */
- while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
+ while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
continue;
if (c == '#') {
static bool warned = false;
@@ -3140,16 +3176,16 @@ retry:
lintwarn(
_("use of `\\ #...' line continuation is not portable"));
}
- while ((c = nextc()) != '\n')
+ while ((c = nextc(false)) != '\n')
if (c == END_FILE)
break;
}
pushback();
}
#endif /* RELAXED_CONTINUATION */
- c = nextc();
+ c = nextc(true);
if (c == '\r') /* allow MS-DOS files. bleah */
- c = nextc();
+ c = nextc(true);
if (c == '\n') {
sourceline++;
goto retry;
@@ -3188,7 +3224,7 @@ retry:
case '[':
return lasttok = c;
case ']':
- c = nextc();
+ c = nextc(true);
pushback();
if (c == '[') {
yylval = GET_INSTRUCTION(Op_sub_array);
@@ -3200,7 +3236,7 @@ retry:
return ']';
case '*':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_assign_times);
return lasttok = ASSIGNOP;
} else if (do_posix) {
@@ -3211,7 +3247,7 @@ retry:
/* make ** and **= aliases for ^ and ^= */
static bool did_warn_op = false, did_warn_assgn = false;
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
if (! did_warn_assgn) {
did_warn_assgn = true;
if (do_lint)
@@ -3239,7 +3275,7 @@ retry:
return lasttok = '*';
case '/':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
pushback();
return lasttok = SLASH_BEFORE_EQUAL;
}
@@ -3248,7 +3284,7 @@ retry:
return lasttok = '/';
case '%':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
yylval = GET_INSTRUCTION(Op_assign_mod);
return lasttok = ASSIGNOP;
}
@@ -3260,7 +3296,7 @@ retry:
{
static bool did_warn_op = false, did_warn_assgn = false;
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
if (do_lint_old && ! did_warn_assgn) {
did_warn_assgn = true;
warning(_("operator `^=' is not supported in old awk"));
@@ -3278,7 +3314,7 @@ retry:
}
case '+':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_assign_plus);
return lasttok = ASSIGNOP;
}
@@ -3291,7 +3327,7 @@ retry:
return lasttok = '+';
case '!':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_notequal);
return lasttok = RELOP;
}
@@ -3304,7 +3340,7 @@ retry:
return lasttok = '!';
case '<':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
yylval = GET_INSTRUCTION(Op_leq);
return lasttok = RELOP;
}
@@ -3313,7 +3349,7 @@ retry:
return lasttok = '<';
case '=':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
yylval = GET_INSTRUCTION(Op_equal);
return lasttok = RELOP;
}
@@ -3322,7 +3358,7 @@ retry:
return lasttok = ASSIGN;
case '>':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_geq);
return lasttok = RELOP;
} else if (c == '>') {
@@ -3361,7 +3397,7 @@ retry:
case '"':
string:
esc_seen = false;
- while ((c = nextc()) != '"') {
+ while ((c = nextc(true)) != '"') {
if (c == '\n') {
pushback();
yyerror(_("unterminated string"));
@@ -3369,7 +3405,7 @@ retry:
}
if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
c == '\\') {
- c = nextc();
+ c = nextc(true);
if (c == '\n') {
sourceline++;
continue;
@@ -3403,7 +3439,7 @@ retry:
return lasttok = YSTRING;
case '-':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_assign_minus);
return lasttok = ASSIGNOP;
}
@@ -3416,7 +3452,7 @@ retry:
return lasttok = '-';
case '.':
- c = nextc();
+ c = nextc(true);
pushback();
if (! isdigit(c))
return lasttok = '.';
@@ -3444,7 +3480,7 @@ retry:
if (do_traditional)
goto done;
if (tok == tokstart + 2) {
- int peek = nextc();
+ int peek = nextc(true);
if (isxdigit(peek)) {
inhex = true;
@@ -3472,8 +3508,8 @@ retry:
break;
}
seen_e = true;
- if ((c = nextc()) == '-' || c == '+') {
- int c2 = nextc();
+ if ((c = nextc(true)) == '-' || c == '+') {
+ int c2 = nextc(true);
if (isdigit(c2)) {
tokadd(c);
@@ -3520,7 +3556,7 @@ retry:
}
if (gotnumber)
break;
- c = nextc();
+ c = nextc(true);
}
pushback();
@@ -3569,7 +3605,7 @@ retry:
return lasttok = YNUMBER;
case '&':
- if ((c = nextc()) == '&') {
+ if ((c = nextc(true)) == '&') {
yylval = GET_INSTRUCTION(Op_and);
allow_newline();
return lasttok = LEX_AND;
@@ -3579,7 +3615,7 @@ retry:
return lasttok = '&';
case '|':
- if ((c = nextc()) == '|') {
+ if ((c = nextc(true)) == '|') {
yylval = GET_INSTRUCTION(Op_or);
allow_newline();
return lasttok = LEX_OR;
@@ -3600,7 +3636,7 @@ retry:
}
}
- if (c != '_' && ! isalpha(c)) {
+ if (c != '_' && ! is_alpha(c)) {
yyerror(_("invalid char '%c' in expression"), c);
return lasttok = LEX_EOF;
}
@@ -3620,7 +3656,7 @@ retry:
* occasions where the interactions are funny.
*/
if (! do_traditional && c == '_' && lasttok != '$') {
- if ((c = nextc()) == '"') {
+ if ((c = nextc(true)) == '"') {
intlstr = true;
goto string;
}
@@ -3632,7 +3668,7 @@ retry:
tok = tokstart;
while (c != END_FILE && is_identchar(c)) {
tokadd(c);
- c = nextc();
+ c = nextc(true);
}
tokadd('\0');
pushback();
@@ -3870,7 +3906,7 @@ snode(INSTRUCTION *subn, INSTRUCTION *r)
}
#ifdef HAVE_MPFR
- /* N.B.: There isn't any special processing for an alternate function below */
+ /* N.B.: If necessary, add special processing for alternate builtin, below */
if (do_mpfr && tokentab[idx].ptr2)
r->builtin = tokentab[idx].ptr2;
else
@@ -3899,6 +3935,15 @@ snode(INSTRUCTION *subn, INSTRUCTION *r)
arg = subn->nexti;
if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
arg->nexti->opcode = Op_push_arg; /* argument may be array */
+ } else if (r->builtin == do_div
+#ifdef HAVE_MPFR
+ || r->builtin == MPF(div)
+#endif
+ ) {
+ arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */
+ ip = arg->lasti;
+ if (ip->opcode == Op_push)
+ ip->opcode = Op_push_array;
} else if (r->builtin == do_match) {
static bool warned = false;
@@ -5670,3 +5715,94 @@ one_line_close(int fd)
}
+/* lookup_builtin --- find a builtin function or return NULL */
+
+builtin_func_t
+lookup_builtin(const char *name)
+{
+ int mid = check_special(name);
+
+ if (mid == -1 || tokentab[mid].class != LEX_BUILTIN)
+ return NULL;
+#ifdef HAVE_MPFR
+ if (do_mpfr)
+ return tokentab[mid].ptr2;
+#endif
+
+ return tokentab[mid].ptr;
+}
+
+/* install_builtins --- add built-in functions to FUNCTAB */
+
+void
+install_builtins(void)
+{
+ int i, j;
+
+ j = sizeof(tokentab) / sizeof(tokentab[0]);
+ for (i = 0; i < j; i++) {
+ if ( tokentab[i].class == LEX_BUILTIN
+ && (tokentab[i].flags & DEBUG_USE) == 0) {
+ (void) install_symbol(tokentab[i].operator, Node_builtin_func);
+ }
+ }
+}
+
+/*
+ * 9/2014: Gawk cannot use <ctype.h> isalpha or isalnum when
+ * parsing the program since that can let through non-English
+ * letters. So, we supply our own. !@#$%^&*()-ing locales!
+ */
+
+/* is_alpha --- return true if c is an English letter */
+
+/*
+ * The scene of the murder was grisly to look upon. When the inspector
+ * arrived, the sergeant turned to him and said, "Another programmer stabbed
+ * in the back. He never knew what happened."
+ *
+ * The inspector replied, "Looks like the MO of isalpha, and his even meaner
+ * big brother, isalnum. The Locale brothers." The sergeant merely
+ * shuddered in horror.
+ */
+
+bool
+is_alpha(int c)
+{
+#ifdef I_DONT_KNOW_WHAT_IM_DOING
+ return isalpha(c);
+#else /* ! I_DONT_KNOW_WHAT_IM_DOING */
+ switch (c) {
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 'y': case 'z':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z':
+ return true;
+ }
+ return false;
+#endif /* ! I_DONT_KNOW_WHAT_IM_DOING */
+}
+
+/* is_alnum --- return true for alphanumeric, English only letters */
+
+bool
+is_alnum(int c)
+{
+ /* digit test is good for EBCDIC too. so there. */
+ return (is_alpha(c) || ('0' <= c && c <= '9'));
+}
+
+
+/* is_identchar --- return true if c can be in an identifier */
+
+bool
+is_identchar(int c)
+{
+ return (is_alnum(c) || c == '_');
+}