aboutsummaryrefslogtreecommitdiffstats
path: root/awkgram.y
diff options
context:
space:
mode:
Diffstat (limited to 'awkgram.y')
-rw-r--r--awkgram.y336
1 files changed, 273 insertions, 63 deletions
diff --git a/awkgram.y b/awkgram.y
index 649ccac4..63de4e90 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991-2001 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991-2002 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Programming Language.
@@ -56,6 +56,7 @@ static void parms_shadow P((const char *fname, NODE *func));
static int isnoeffect P((NODETYPE t));
static int isassignable P((NODE *n));
static void dumpintlstr P((char *str, size_t len));
+static void dumpintlstr2 P((char *str1, size_t len1, char *str2, size_t len2));
static void count_args P((NODE *n));
enum defref { FUNC_DEFINE, FUNC_USE };
@@ -110,6 +111,7 @@ extern NODE *end_block;
%type <nodeval> rexpression_list opt_rexpression_list
%type <nodeval> expression_list opt_expression_list
%type <nodeval> statements statement if_statement opt_param_list
+%type <nodeval> simple_stmt opt_simple_stmt
%type <nodeval> opt_exp opt_variable regexp
%type <nodeval> input_redir output_redir
%type <nodetypeval> print
@@ -406,24 +408,36 @@ statement
* Check that the body is a `delete a[i]' statement,
* and that both the loop var and array names match.
*/
- if ($8 != NULL && $8->type == Node_K_delete
- && $8->rnode != NULL
- && ($8->rnode->type == Node_var || $8->rnode->type == Node_param_list)
- && strcmp($3, $8->rnode->var_value->vname) == 0
- && strcmp($5, $8->lnode->vname) == 0) {
- $8->type = Node_K_delete_loop;
- $$ = $8;
+ if ($8 != NULL && $8->type == Node_K_delete) {
+ NODE *arr, *sub;
+
+ assert($8->rnode->type == Node_expression_list);
+ arr = $8->lnode; /* array var */
+ sub = $8->rnode->lnode; /* index var */
+
+ if ( (arr->type == Node_var
+ || arr->type == Node_var_array
+ || arr->type == Node_param_list)
+ && (sub->type == Node_var || sub->type == Node_param_list)
+ && strcmp($3, sub->vname) == 0
+ && strcmp($5, arr->vname) == 0) {
+ $8->type = Node_K_delete_loop;
+ $$ = $8;
+ }
+ else
+ goto regular_loop;
} else {
+ regular_loop:
$$ = node($8, Node_K_arrayfor,
make_for_loop(variable($3, CAN_FREE, Node_var),
(NODE *) NULL, variable($5, CAN_FREE, Node_var_array)));
}
}
- | LEX_FOR '(' opt_exp semi opt_nls exp semi opt_nls opt_exp r_paren opt_nls statement
+ | LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement
{
$$ = node($12, Node_K_for, (NODE *) make_for_loop($3, $6, $9));
}
- | LEX_FOR '(' opt_exp semi opt_nls semi opt_nls opt_exp r_paren opt_nls statement
+ | LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement
{
$$ = node($11, Node_K_for,
(NODE *) make_for_loop($3, (NODE *) NULL, $8));
@@ -434,34 +448,6 @@ statement
| LEX_CONTINUE statement_term
/* similarly */
{ $$ = node((NODE *) NULL, Node_K_continue, (NODE *) NULL); }
- | print '(' expression_list r_paren output_redir statement_term
- {
- $$ = node($3, $1, $5);
- if ($$->type == Node_K_printf)
- count_args($$)
- }
- | print opt_rexpression_list output_redir statement_term
- {
- if ($1 == Node_K_print && $2 == NULL) {
- static int warned = FALSE;
-
- $2 = node(node(make_number(0.0),
- Node_field_spec,
- (NODE *) NULL),
- Node_expression_list,
- (NODE *) NULL);
-
- if (do_lint && ! io_allowed && ! warned) {
- warned = TRUE;
- lintwarn(
- _("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
- }
- }
-
- $$ = node($2, $1, $3);
- if ($$->type == Node_K_printf)
- count_args($$)
- }
| LEX_NEXT statement_term
{ NODETYPE type;
@@ -498,9 +484,49 @@ statement
}
opt_exp statement_term
{ $$ = node($3, Node_K_return, (NODE *) NULL); }
- | LEX_DELETE NAME '[' expression_list ']' statement_term
+ | simple_stmt statement_term
+ ;
+
+ /*
+ * A simple_stmt exists to satisfy a constraint in the POSIX
+ * grammar allowing them to occur as the 1st and 3rd parts
+ * in a `for (...;...;...)' loop. This is a historical oddity
+ * inherited from Unix awk, not at all documented in the AK&W
+ * awk book. We support it, as this was reported as a bug.
+ * We don't bother to document it though. So there.
+ */
+simple_stmt
+ : print '(' expression_list r_paren output_redir
+ {
+ $$ = node($3, $1, $5);
+ if ($$->type == Node_K_printf)
+ count_args($$);
+ }
+ | print opt_rexpression_list output_redir
+ {
+ if ($1 == Node_K_print && $2 == NULL) {
+ static int warned = FALSE;
+
+ $2 = node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL);
+
+ if (do_lint && ! io_allowed && ! warned) {
+ warned = TRUE;
+ lintwarn(
+ _("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
+ }
+ }
+
+ $$ = node($2, $1, $3);
+ if ($$->type == Node_K_printf)
+ count_args($$);
+ }
+ | LEX_DELETE NAME '[' expression_list ']'
{ $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, $4); }
- | LEX_DELETE NAME statement_term
+ | LEX_DELETE NAME
{
if (do_lint)
lintwarn(_("`delete array' is a gawk extension"));
@@ -514,7 +540,14 @@ statement
}
$$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
}
- | exp statement_term
+ | exp
+ { $$ = $1; }
+ ;
+
+opt_simple_stmt
+ : /* empty */
+ { $$ = NULL; }
+ | simple_stmt
{ $$ = $1; }
;
@@ -819,7 +852,7 @@ non_post_simp_exp
| '-' simp_exp %prec UNARY
{
- if ($2->type == Node_val) {
+ if ($2->type == Node_val && ($2->flags & (STR|STRING)) == 0) {
$2->numbr = -(force_number($2));
$$ = $2;
} else
@@ -919,6 +952,7 @@ static struct token tokentab[] = {
{"continue", Node_K_continue, LEX_CONTINUE, 0, 0},
{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos},
{"dcgettext", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_dcgettext},
+{"dcngettext", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext},
{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0},
{"do", Node_K_do, LEX_DO, NOT_OLD, 0},
{"else", Node_illegal, LEX_ELSE, 0, 0},
@@ -969,6 +1003,20 @@ static struct token tokentab[] = {
{"xor", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_xor},
};
+#ifdef MBS_SUPPORT
+/* Variable containing the current shift state. */
+static mbstate_t cur_mbstate;
+/* Ring buffer containing current characters. */
+#define MAX_CHAR_IN_RING_BUFFER 8
+#define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX)
+static char cur_char_ring[RING_BUFFER_SIZE];
+/* Index for ring buffers. */
+static int cur_ring_idx;
+/* This macro means that last nextc() return a singlebyte character
+ or 1st byte of a multibyte character. */
+#define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
+#endif /* MBS_SUPPORT */
+
/* getfname --- return name of a builtin function (for pretty printing) */
const char *
@@ -1006,7 +1054,8 @@ static void
const char *mesg = NULL;
register char *bp, *cp;
char *scan;
- char buf[120];
+ char *buf;
+ int count;
static char end_of_file_line[] = "(END OF FILE)";
errcount++;
@@ -1033,11 +1082,24 @@ static void
bp = thisline + strlen(thisline);
}
msg("%.*s", (int) (bp - thisline), thisline);
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+ va_start(args, m);
+ if (mesg == NULL)
+ mesg = m;
+#else
+ va_start(args);
+ if (mesg == NULL)
+ mesg = va_arg(args, char *);
+#endif
+ count = (bp - thisline) + strlen(mesg) + 2 + 1;
+ emalloc(buf, char *, count, "yyerror");
+
bp = buf;
- cp = buf + sizeof(buf) - 24; /* 24 more than longest msg. input */
+
if (lexptr != NULL) {
scan = thisline;
- while (bp < cp && scan < lexeme)
+ while (scan < lexeme)
if (*scan++ == '\t')
*bp++ = '\t';
else
@@ -1045,18 +1107,10 @@ static void
*bp++ = '^';
*bp++ = ' ';
}
-#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
- va_start(args, m);
- if (mesg == NULL)
- mesg = m;
-#else
- va_start(args);
- if (mesg == NULL)
- mesg = va_arg(args, char *);
-#endif
strcpy(bp, mesg);
err("", buf, args);
va_end(args);
+ free(buf);
}
/* get_src_buf --- read the next buffer of source program */
@@ -1237,6 +1291,75 @@ tokexpand()
/* nextc --- get the next input character */
+#ifdef MBS_SUPPORT
+
+static int
+nextc()
+{
+ if (MB_CUR_MAX > 1) {
+ /* Update the buffer index. */
+ cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 :
+ cur_ring_idx + 1;
+
+ /* Did we already check the current character? */
+ if (cur_char_ring[cur_ring_idx] == 0) {
+ /* No, we need to check the next character on the buffer. */
+ int idx, work_ring_idx = cur_ring_idx;
+ mbstate_t tmp_state;
+ size_t mbclen;
+
+ if (!lexptr || lexptr >= lexend)
+ if (!get_src_buf()) {
+ return EOF;
+ }
+
+ for (idx = 0 ; lexptr + idx < lexend ; idx++) {
+ tmp_state = cur_mbstate;
+ mbclen = mbrlen(lexptr, idx + 1, &tmp_state);
+
+ if (mbclen == 1 || mbclen == (size_t)-1 || mbclen == 0) {
+ /* It is a singlebyte character, non-complete multibyte
+ character or EOF. We treat it as a singlebyte
+ character. */
+ cur_char_ring[work_ring_idx] = 1;
+ break;
+ } else if (mbclen == (size_t)-2) {
+ /* It is not a complete multibyte character. */
+ cur_char_ring[work_ring_idx] = idx + 1;
+ } else {
+ /* mbclen > 1 */
+ cur_char_ring[work_ring_idx] = mbclen;
+ break;
+ }
+ work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
+ 0 : work_ring_idx + 1;
+ }
+ cur_mbstate = tmp_state;
+
+ /* Put a mark on the position on which we write next character. */
+ work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
+ 0 : work_ring_idx + 1;
+ cur_char_ring[work_ring_idx] = 0;
+ }
+
+ return (int) (unsigned char) *lexptr++;
+ }
+ else {
+ int c;
+
+ if (lexptr && lexptr < lexend)
+ c = (int) (unsigned char) *lexptr++;
+ else if (get_src_buf())
+ c = (int) (unsigned char) *lexptr++;
+ else
+ c = EOF;
+
+ return c;
+ }
+}
+
+#else /* MBS_SUPPORT */
+
#if GAWKDEBUG
int
nextc()
@@ -1259,10 +1382,29 @@ nextc()
)
#endif
+#endif /* MBS_SUPPORT */
+
/* pushback --- push a character back on the input */
+#ifdef MBS_SUPPORT
+
+static void
+pushback()
+{
+ if (MB_CUR_MAX > 1) {
+ cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
+ cur_ring_idx - 1;
+ (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
+ } else
+ (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
+}
+
+#else
+
#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr)
+#endif /* MBS_SUPPORT */
+
/* allow_newline --- allow newline after &&, ||, ? and : */
static void
@@ -1317,7 +1459,7 @@ yylex()
return 0;
}
pushback();
-#ifdef OS2
+#if defined OS2 || defined __EMX__
/*
* added for OS/2's extproc feature of cmd.exe
* (like #! in BSD sh)
@@ -1352,6 +1494,9 @@ yylex()
tok = tokstart;
for (;;) {
c = nextc();
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX == 1 || nextc_is_1stbyte)
+#endif
switch (c) {
case '[':
/* one day check for `.' and `=' too */
@@ -1409,6 +1554,9 @@ retry:
tok = tokstart;
yylval.nodetypeval = Node_illegal;
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX == 1 || nextc_is_1stbyte)
+#endif
switch (c) {
case EOF:
if (lasttok != NEWLINE) {
@@ -1650,6 +1798,9 @@ retry:
yyerror(_("unterminated string"));
exit(1);
}
+#ifdef MBS_SUPPORT
+ if (MB_CUR_MAX == 1 || nextc_is_1stbyte)
+#endif
if (c == '\\') {
c = nextc();
if (c == '\n') {
@@ -1779,9 +1930,15 @@ retry:
eof_warned = TRUE;
}
tokadd('\0');
- if (! do_traditional && isnondecimal(tokstart))
+ if (! do_traditional && isnondecimal(tokstart)) {
+ static short warned = FALSE;
+ if (do_lint && ! warned) {
+ warned = TRUE;
+ lintwarn("numeric constant `%.*s' treated as octal or hexadecimal",
+ strlen(tokstart)-1, tokstart);
+ }
yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart)));
- else
+ } else
yylval.nodeval = make_number(atof(tokstart));
yylval.nodeval->flags |= PERM;
return lasttok = YNUMBER;
@@ -1816,7 +1973,21 @@ retry:
exit(1);
}
- if (! do_traditional && c == '_') {
+ /*
+ * Lots of fog here. Consider:
+ *
+ * print "xyzzy"$_"foo"
+ *
+ * Without the check for ` lasttok != '$'' ', this is parsed as
+ *
+ * print "xxyzz" $(_"foo")
+ *
+ * With the check, it is "correctly" parsed as three
+ * string concatenations. Sigh. This seems to be
+ * "more correct", but this is definitely one of those
+ * occasions where the interactions are funny.
+ */
+ if (! do_traditional && c == '_' && lasttok != '$') {
if ((c = nextc()) == '"') {
intlstr = TRUE;
goto string;
@@ -1953,7 +2124,7 @@ snode(NODE *subn, NODETYPE op, int idx)
/* traverse expression list to see how many args. given */
for (n = subn; n != NULL; n = n->rnode) {
nexp++;
- if (nexp > 3)
+ if (nexp > 5)
break;
}
@@ -2047,9 +2218,22 @@ snode(NODE *subn, NODETYPE op, int idx)
/* don't dump it, the lexer already did */
else
dumpintlstr(str->stptr, str->stlen);
+ } else if (do_intl /* --gen-po */
+ && r->proc == do_dcngettext /* dcngettext(...) */
+ && subn->lnode->type == Node_val /* 1st arg is constant */
+ && (subn->lnode->flags & STR) != 0 /* it's a string constant */
+ && subn->rnode->lnode->type == Node_val /* 2nd arg is constant too */
+ && (subn->rnode->lnode->flags & STR) != 0) { /* it's a string constant */
+ /* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */
+ NODE *str1 = subn->lnode;
+ NODE *str2 = subn->rnode->lnode;
+
+ if (((str1->flags | str2->flags) & INTLSTR) != 0)
+ warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore"));
+ else
+ dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen);
}
-
r->subnode = subn;
if (r->proc == do_sprintf) {
count_args(r);
@@ -2726,7 +2910,7 @@ mk_rexp(NODE *exp)
return exp;
getnode(n);
- n->type = Node_regex;
+ n->type = Node_dynregex;
n->re_exp = exp;
n->re_text = NULL;
n->re_reg = NULL;
@@ -2828,7 +3012,7 @@ isassignable(register NODE *n)
NODE *
stopme(NODE *tree)
{
- return tmp_number((AWKNUM) 0.0);
+ return 0;
}
/* dumpintlstr --- write out an initial .po file entry for the string */
@@ -2848,10 +3032,36 @@ dumpintlstr(char *str, size_t len)
}
printf("msgid ");
- fflush(stdout);
pp_string_fp(stdout, str, len, '"', TRUE);
putchar('\n');
printf("msgstr \"\"\n\n");
+ fflush(stdout);
+}
+
+/* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */
+
+static void
+dumpintlstr2(char *str1, size_t len1, char *str2, size_t len2)
+{
+ char *cp;
+
+ /* See the GNU gettext distribution for details on the file format */
+
+ if (source != NULL) {
+ /* ala the gettext sources, remove leading `./'s */
+ for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
+ continue;
+ printf("#: %s:%d\n", cp, sourceline);
+ }
+
+ printf("msgid ");
+ pp_string_fp(stdout, str1, len1, '"', TRUE);
+ putchar('\n');
+ printf("msgid_plural ");
+ pp_string_fp(stdout, str2, len2, '"', TRUE);
+ putchar('\n');
+ printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n");
+ fflush(stdout);
}
/* count_args --- count the number of printf arguments */