diff options
Diffstat (limited to 'awkgram.y')
-rw-r--r-- | awkgram.y | 130 |
1 files changed, 122 insertions, 8 deletions
@@ -164,7 +164,7 @@ extern double fmod(double x, double y); %} %token FUNC_CALL NAME REGEXP FILENAME -%token YNUMBER YSTRING +%token YNUMBER YSTRING HARD_REGEXP %token RELOP IO_OUT IO_IN %token ASSIGNOP ASSIGN MATCHOP CONCAT_OP %token SUBSCRIPT @@ -192,7 +192,7 @@ extern double fmod(double x, double y); %left MATCHOP %nonassoc RELOP '<' '>' IO_IN IO_OUT %left CONCAT_OP -%left YSTRING YNUMBER +%left YSTRING YNUMBER HARD_REGEXP %left '+' '-' %left '*' '/' '%' %right '!' UNARY @@ -491,6 +491,33 @@ regexp } ; +hard_regexp + : HARD_REGEXP + { + NODE *n, *exp; + char *re; + size_t len; + + re = $1->lextok; + $1->lextok = NULL; + len = strlen(re); + + exp = make_str_node(re, len, ALREADY_MALLOCED); + n = make_regnode(Node_hardregex, exp); + if (n == NULL) { + unref(exp); + YYABORT; + } + $$ = $1; +#if 0 + /* Don't set this, on purpose */ + /* $$->opcode = Op_match_rec; */ +#else + $$->opcode = Op_push_re; +#endif + $$->memory = n; + } + a_slash : '/' { bcfree($1); } @@ -1186,6 +1213,15 @@ case_value { $$ = $1; } | regexp { + if ($1->memory->type == Node_regex) + $1->opcode = Op_push_re; + else + $1->opcode = Op_push; + $$ = $1; + } + | hard_regexp + { + assert($1->memory->type == Node_hardregex); $1->opcode = Op_push_re; $$ = $1; } @@ -1331,6 +1367,48 @@ expression_list } ; +opt_fcall_expression_list + : /* empty */ + { $$ = NULL; } + | fcall_expression_list + { $$ = $1; } + ; + +fcall_expression_list + : fcall_exp + { $$ = mk_expression_list(NULL, $1); } + | fcall_expression_list comma fcall_exp + { + $$ = mk_expression_list($1, $3); + yyerrok; + } + | error + { $$ = NULL; } + | fcall_expression_list error + { + /* + * Returning the expression list instead of NULL lets + * snode get a list of arguments that it can count. + */ + $$ = $1; + } + | fcall_expression_list error fcall_exp + { + /* Ditto */ + $$ = mk_expression_list($1, $3); + } + | fcall_expression_list comma error + { + /* Ditto */ + $$ = $1; + } + ; + +fcall_exp + : exp { $$ = $1; } + | hard_regexp { $$ = list_create($1); } + ; + /* Expressions, not including the comma operator. */ exp : variable assign_operator exp %prec ASSIGNOP @@ -1340,10 +1418,27 @@ exp _("regular expression on right of assignment")); $$ = mk_assignment($1, $3, $2); } + | variable ASSIGN hard_regexp %prec ASSIGNOP + { + $$ = mk_assignment($1, list_create($3), $2); + } | exp LEX_AND exp { $$ = mk_boolean($1, $3, $2); } | exp LEX_OR exp { $$ = mk_boolean($1, $3, $2); } + | exp MATCHOP hard_regexp + { + if ($1->lasti->opcode == Op_match_rec) + warning_ln($2->source_line, + _("regular expression on left of `~' or `!~' operator")); + + assert($3->opcode == Op_push_re + && $3->memory->type == Node_hardregex); + /* RHS is @/.../ */ + $2->memory = $3->memory; + bcfree($3); + $$ = list_append($1, $2); + } | exp MATCHOP exp { if ($1->lasti->opcode == Op_match_rec) @@ -1351,6 +1446,7 @@ exp _("regular expression on left of `~' or `!~' operator")); if ($3->lasti == $3->nexti && $3->nexti->opcode == Op_match_rec) { + /* RHS is /.../ */ $2->memory = $3->nexti->memory; bcfree($3->nexti); /* Op_match_rec */ bcfree($3); /* Op_list */ @@ -1586,13 +1682,13 @@ non_post_simp_exp } | '(' exp r_paren { $$ = $2; } - | LEX_BUILTIN '(' opt_expression_list r_paren + | LEX_BUILTIN '(' opt_fcall_expression_list r_paren { $$ = snode($3, $1); if ($$ == NULL) YYABORT; } - | LEX_LENGTH '(' opt_expression_list r_paren + | LEX_LENGTH '(' opt_fcall_expression_list r_paren { $$ = snode($3, $1); if ($$ == NULL) @@ -1701,7 +1797,7 @@ func_call ; direct_func_call - : FUNC_CALL '(' opt_expression_list r_paren + : FUNC_CALL '(' opt_fcall_expression_list r_paren { NODE *n; @@ -2003,6 +2099,7 @@ static const struct token tokentab[] = { {"systime", Op_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime, 0}, {"tolower", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower, 0}, {"toupper", Op_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper, 0}, +{"typeof", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_typeof, 0}, {"while", Op_K_while, LEX_WHILE, BREAK|CONTINUE, 0, 0}, {"xor", Op_builtin, LEX_BUILTIN, GAWKX, do_xor, MPF(xor)}, }; @@ -3183,6 +3280,7 @@ yylex(void) bool inhex = false; bool intlstr = false; AWKNUM d; + bool collecting_hard_regexp = false; #define GET_INSTRUCTION(op) bcalloc(op, 1, sourceline) @@ -3217,6 +3315,7 @@ yylex(void) lexeme = lexptr; thisline = NULL; +collect_regexp: if (want_regexp) { int in_brack = 0; /* count brackets, [[:alnum:]] allowed */ /* @@ -3293,7 +3392,13 @@ end_regexp: peek); } } - return lasttok = REGEXP; + if (collecting_hard_regexp) { + collecting_hard_regexp = false; + lasttok = HARD_REGEXP; + } else + lasttok = REGEXP; + + return lasttok; case '\n': pushback(); yyerror(_("unterminated regexp")); @@ -3351,6 +3456,13 @@ retry: return lasttok = NEWLINE; case '@': + c = nextc(true); + if (c == '/') { + want_regexp = true; + collecting_hard_regexp = true; + goto collect_regexp; + } + pushback(); at_seen = true; return lasttok = '@'; @@ -4139,7 +4251,7 @@ snode(INSTRUCTION *subn, INSTRUCTION *r) if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push) arg->nexti->opcode = Op_push_arg; /* argument may be array */ } - } else if (r->builtin == do_isarray) { + } else if (r->builtin == do_isarray || r->builtin == do_typeof) { arg = subn->nexti; if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push) arg->nexti->opcode = Op_push_arg; /* argument may be array */ @@ -4721,7 +4833,7 @@ make_regnode(int type, NODE *exp) n->type = type; n->re_cnt = 1; - if (type == Node_regex) { + if (type == Node_regex || type == Node_hardregex) { n->re_reg = make_regexp(exp->stptr, exp->stlen, false, true, false); if (n->re_reg == NULL) { freenode(n); @@ -4744,6 +4856,8 @@ mk_rexp(INSTRUCTION *list) ip = list->nexti; if (ip == list->lasti && ip->opcode == Op_match_rec) ip->opcode = Op_push_re; + else if (ip == list->lasti && ip->opcode == Op_push_re) + ; /* do nothing --- @/.../ */ else { ip = instruction(Op_push_re); ip->memory = make_regnode(Node_dynregex, NULL); |