diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2016-11-15 21:03:57 +0200 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2016-11-15 21:03:57 +0200 |
commit | b37675aa79213f2665abb2bbb4db90560642bdee (patch) | |
tree | 74a00854546a7a174b0722277def07fbfbc0e4cd | |
parent | 84a7c376d6322a6e2429af79358091d10d94c004 (diff) | |
download | egawk-b37675aa79213f2665abb2bbb4db90560642bdee.tar.gz egawk-b37675aa79213f2665abb2bbb4db90560642bdee.tar.bz2 egawk-b37675aa79213f2665abb2bbb4db90560642bdee.zip |
First steps reworking code away from node type.
-rw-r--r-- | ChangeLog | 25 | ||||
-rw-r--r-- | awk.h | 15 | ||||
-rw-r--r-- | awkgram.c | 23 | ||||
-rw-r--r-- | awkgram.y | 23 | ||||
-rw-r--r-- | builtin.c | 26 | ||||
-rw-r--r-- | debug.c | 13 | ||||
-rw-r--r-- | eval.c | 7 | ||||
-rw-r--r-- | interpret.h | 5 | ||||
-rw-r--r-- | profile.c | 28 | ||||
-rw-r--r-- | re.c | 9 |
10 files changed, 100 insertions, 74 deletions
@@ -1,3 +1,28 @@ +2016-11-15 Arnold D. Robbins <arnold@skeeve.com> + + Start reworking typed regexes. + + * awk.h (Node_typedregex): Nuked. + [REGEX]: New flag. + (tre_reg): New member in val part of NODE union. + (force_string, force_number, fixtype): Remove use of Node_typedregex. + * awkgram.y (grammer): Use REGEX flag instead of node type. + (valinfo); Ditto. + (make_regnode): Adjust creation based on node type. + * builtin.c (do_length, do_print, call_sub, call_match, + call_split_func, do_typeof): Adjust code. + * debug.c (watchpoint_triggered, initialize_watch_item, + print_memory): Adjust code. + * eval.c (nodetypes): Remove Node_typedregex. + (flags2str): Add REGEX. + (setup_frame): Adjust code after removal of Node_typedregex. + * interpret.h (r_interpret): Adjust code after removal + of Node_typedregex. + * profile.c (pp_typed_regex): Renamed from pp_strong_regex. + (pp_string_or_strong_regex): Renamed from pp_string_or_strong_regex. + (pprint): Adjust code after removal of Node_typedregex. + * re.c (re_update): Adjust code after removal of Node_typedregex. + 2016-11-04 Eli Zaretskii <eliz@gnu.org> * builtin.c (efwrite) [__MINGW32__]: Call w32_maybe_set_errno if @@ -267,7 +267,6 @@ typedef enum nodevals { Node_val, /* node is a value - type in flags */ Node_regex, /* a regexp, text, compiled, flags, etc */ Node_dynregex, /* a dynamic regexp */ - Node_typedregex, /* like Node_regex, but is a real type */ /* symbol table values */ Node_var, /* scalar variable, lnode is value */ @@ -385,6 +384,7 @@ typedef struct exp_node { int idx; wchar_t *wsp; size_t wslen; + Regexp *preg; } val; } sub; NODETYPE type; @@ -461,6 +461,7 @@ typedef struct exp_node { * See cint_array.c */ # define XARRAY 0x20000 # define NUMCONSTSTR 0x40000 /* have string value for numeric constant */ +# define REGEX 0x80000 /* this is a typed regex */ } NODE; #define vname sub.nodep.name @@ -508,6 +509,7 @@ typedef struct exp_node { #else #define numbr sub.val.fltnum #endif +#define tre_regs sub.val.preg /* * If stfmt is set to STFMT_UNUSED, it means that the string representation @@ -1818,9 +1820,6 @@ dupnode(NODE *n) static inline NODE * force_string(NODE *s) { - if (s->type == Node_typedregex) - return dupnode(s->re_exp); - if ((s->flags & STRCUR) != 0 && (s->stfmt == STFMT_UNUSED || s->stfmt == CONVFMTidx) ) @@ -1847,9 +1846,6 @@ unref(NODE *r) static inline NODE * force_number(NODE *n) { - if (n->type == Node_typedregex) - return Nnull_string; - return (n->flags & NUMCUR) != 0 ? n : str2number(n); } @@ -1866,15 +1862,12 @@ force_number(NODE *n) * It is safe to assume that the return value will be the same NODE, * since force_number on a MAYBE_NUM should always return the same NODE, * and force_string on an INTIND should as well. - * - * There is no way to handle a Node_typedregex correctly, so we ignore - * that case. */ static inline NODE * fixtype(NODE *n) { - assert(n->type == Node_val || n->type == Node_typedregex); + assert(n->type == Node_val); if (n->type == Node_val) { if ((n->flags & MAYBE_NUM) != 0) return force_number(n); @@ -2283,7 +2283,7 @@ yyreduce: len = strlen(re); exp = make_str_node(re, len, ALREADY_MALLOCED); - n = make_regnode(Node_typedregex, exp); + n = make_regnode(Node_val, exp); if (n == NULL) { unref(exp); YYABORT; @@ -3154,7 +3154,7 @@ regular_print: case 80: #line 1278 "awkgram.y" /* yacc.c:1646 */ { - assert((yyvsp[0])->memory->type == Node_typedregex); + assert(((yyvsp[0])->memory->flags & REGEX) == REGEX); (yyvsp[0])->opcode = Op_push_re; (yyval) = (yyvsp[0]); } @@ -3481,7 +3481,7 @@ regular_print: _("regular expression on left of `~' or `!~' operator")); assert((yyvsp[0])->opcode == Op_push_re - && (yyvsp[0])->memory->type == Node_typedregex); + && ((yyvsp[0])->memory->flags & REGEX) != 0); /* RHS is @/.../ */ (yyvsp[-1])->memory = (yyvsp[0])->memory; bcfree((yyvsp[0])); @@ -5836,6 +5836,7 @@ yylex(void) lexeme = lexptr; thisline = NULL; + collect_regexp: if (want_regexp) { int in_brack = 0; /* count brackets, [[:alnum:]] allowed */ @@ -7051,8 +7052,8 @@ valinfo(NODE *n, Func_print print_func, FILE *fp) { if (n == Nnull_string) print_func(fp, "uninitialized scalar\n"); - else if (n->type == Node_typedregex) - print_func(fp, "@/%.*s/\n", n->re_exp->stlen, n->re_exp->stptr); + else if ((n->flags & REGEX) != 0) + print_func(fp, "@/%.*s/\n", n->stlen, n->stptr); else if ((n->flags & STRING) != 0) { pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', false); print_func(fp, "\n"); @@ -7425,9 +7426,9 @@ make_regnode(int type, NODE *exp) getnode(n); memset(n, 0, sizeof(NODE)); n->type = type; - n->re_cnt = 1; - if (type == Node_regex || type == Node_typedregex) { + if (type == Node_regex) { + n->re_cnt = 1; n->re_reg = make_regexp(exp->stptr, exp->stlen, false, true, false); if (n->re_reg == NULL) { freenode(n); @@ -7436,6 +7437,14 @@ make_regnode(int type, NODE *exp) n->re_exp = exp; n->re_flags = CONSTANT; n->valref = 1; + } else if (type == Node_val) { + exp->tre_regs = make_regexp(exp->stptr, exp->stlen, false, true, false); + exp->flags |= REGEX|MALLOC|STRCUR|NUMCUR; + exp->numbr = 0; + exp->flags &= ~(STRING|NUMBER); + exp->valref = 1; + unref(n); + n = exp; } return n; } @@ -525,7 +525,7 @@ typed_regexp len = strlen(re); exp = make_str_node(re, len, ALREADY_MALLOCED); - n = make_regnode(Node_typedregex, exp); + n = make_regnode(Node_val, exp); if (n == NULL) { unref(exp); YYABORT; @@ -1276,7 +1276,7 @@ case_value } | typed_regexp { - assert($1->memory->type == Node_typedregex); + assert(($1->memory->flags & REGEX) == REGEX); $1->opcode = Op_push_re; $$ = $1; } @@ -1491,7 +1491,7 @@ exp _("regular expression on left of `~' or `!~' operator")); assert($3->opcode == Op_push_re - && $3->memory->type == Node_typedregex); + && ($3->memory->flags & REGEX) != 0); /* RHS is @/.../ */ $2->memory = $3->memory; bcfree($3); @@ -3416,6 +3416,7 @@ yylex(void) lexeme = lexptr; thisline = NULL; + collect_regexp: if (want_regexp) { int in_brack = 0; /* count brackets, [[:alnum:]] allowed */ @@ -4631,8 +4632,8 @@ valinfo(NODE *n, Func_print print_func, FILE *fp) { if (n == Nnull_string) print_func(fp, "uninitialized scalar\n"); - else if (n->type == Node_typedregex) - print_func(fp, "@/%.*s/\n", n->re_exp->stlen, n->re_exp->stptr); + else if ((n->flags & REGEX) != 0) + print_func(fp, "@/%.*s/\n", n->stlen, n->stptr); else if ((n->flags & STRING) != 0) { pp_string_fp(print_func, fp, n->stptr, n->stlen, '"', false); print_func(fp, "\n"); @@ -5005,9 +5006,9 @@ make_regnode(int type, NODE *exp) getnode(n); memset(n, 0, sizeof(NODE)); n->type = type; - n->re_cnt = 1; - if (type == Node_regex || type == Node_typedregex) { + if (type == Node_regex) { + n->re_cnt = 1; n->re_reg = make_regexp(exp->stptr, exp->stlen, false, true, false); if (n->re_reg == NULL) { freenode(n); @@ -5016,6 +5017,14 @@ make_regnode(int type, NODE *exp) n->re_exp = exp; n->re_flags = CONSTANT; n->valref = 1; + } else if (type == Node_val) { + exp->tre_regs = make_regexp(exp->stptr, exp->stlen, false, true, false); + exp->flags |= REGEX|MALLOC|STRCUR|NUMCUR; + exp->numbr = 0; + exp->flags &= ~(STRING|NUMBER); + exp->valref = 1; + unref(n); + n = exp; } return n; } @@ -536,7 +536,7 @@ do_length(int nargs) return make_number(size); } - assert(tmp->type == Node_val || tmp->type == Node_typedregex); + assert(tmp->type == Node_val); if (do_lint && (fixtype(tmp)->flags & STRING) == 0) lintwarn(_("length: received non-string argument")); @@ -2195,11 +2195,9 @@ do_print(int nargs, int redirtype) fatal(_("attempt to use array `%s' in a scalar context"), array_vname(tmp)); } - if (tmp->type == Node_typedregex) - args_array[i] = force_string(tmp); - else if ( (tmp->flags & STRCUR) == 0 - || ( tmp->stfmt != STFMT_UNUSED - && tmp->stfmt != OFMTidx)) + if ( (tmp->flags & STRCUR) == 0 + || ( tmp->stfmt != STFMT_UNUSED + && tmp->stfmt != OFMTidx)) args_array[i] = format_val(OFMT, OFMTidx, tmp); } @@ -3203,7 +3201,7 @@ call_sub(const char *name, int nargs) * push replace * push $0 */ - if (regex->type != Node_typedregex) + if ((regex->flags & REGEX) == 0) regex = make_regnode(Node_regex, regex); PUSH(regex); PUSH(replace); @@ -3228,7 +3226,7 @@ call_sub(const char *name, int nargs) * nargs++ * } */ - if (regex->type != Node_typedregex) + if ((regex->flags & REGEX) == 0) regex = make_regnode(Node_regex, regex); PUSH(regex); PUSH(replace); @@ -3266,7 +3264,7 @@ call_match(int nargs) /* Don't need to pop the string just to push it back ... */ - if (regex->type != Node_typedregex) + if ((regex->flags & REGEX) == 0) regex = make_regnode(Node_regex, regex); PUSH(regex); @@ -3295,7 +3293,7 @@ call_split_func(const char *name, int nargs) if (nargs >= 3) { regex = POP_STRING(); - if (regex->type != Node_typedregex) + if ((regex->flags & REGEX) == 0) regex = make_regnode(Node_regex, regex); } else { if (name[0] == 's') { @@ -3955,12 +3953,9 @@ do_typeof(int nargs) res = "array"; deref = false; break; - case Node_typedregex: - res = "regexp"; - break; case Node_val: case Node_var: - switch (arg->flags & (STRING|NUMBER|MAYBE_NUM)) { + switch (arg->flags & (STRING|NUMBER|MAYBE_NUM|REGEX)) { case STRING: res = "string"; break; @@ -3970,6 +3965,9 @@ do_typeof(int nargs) case STRING|MAYBE_NUM: res = "strnum"; break; + case REGEX: + res = "regexp"; + break; case NUMBER|STRING: if (arg == Nnull_string) { res = "unassigned"; @@ -1736,8 +1736,6 @@ watchpoint_triggered(struct list_item *w) /* new != NULL */ if (t2->type == Node_val) w->cur_value = dupnode(t2); - else if (t2->type == Node_typedregex) - w->cur_value = dupnode(t2); else { w->flags |= CUR_IS_ARRAY; w->cur_size = (t2->type == Node_var_array) ? assoc_length(t2) : 0; @@ -1750,7 +1748,6 @@ watchpoint_triggered(struct list_item *w) w->flags |= CUR_IS_ARRAY; w->cur_size = assoc_length(t2); } else - /* works for Node_typedregex too */ w->cur_value = dupnode(t2); } @@ -1793,7 +1790,7 @@ initialize_watch_item(struct list_item *w) } else if (symbol->type == Node_var_array) { w->flags |= CUR_IS_ARRAY; w->cur_size = assoc_length(symbol); - } else if (symbol->type == Node_typedregex) { + } else if (symbol->type == Node_val && (symbol->flags & REGEX) != 0) { w->cur_value = dupnode(symbol); } /* else can't happen */ @@ -3708,14 +3705,14 @@ print_memory(NODE *m, NODE *func, Func_print print_func, FILE *fp) print_func(fp, "%g", m->numbr); } else if ((m->flags & STRING) != 0) pp_string_fp(print_func, fp, m->stptr, m->stlen, '"', false); - else + else if ((m->flags & REGEX) != 0) { + print_func(fp, "@"); + pp_string_fp(print_func, fp, m->stptr, m->stlen, '/', false); + } else print_func(fp, "-?-"); print_func(fp, " [%s]", flags2str(m->flags)); break; - case Node_typedregex: - print_func(fp, "@"); - /* fall through */ case Node_regex: pp_string_fp(print_func, fp, m->re_exp->stptr, m->re_exp->stlen, '/', false); break; @@ -236,7 +236,6 @@ static const char *const nodetypes[] = { "Node_val", "Node_regex", "Node_dynregex", - "Node_typedregex", "Node_var", "Node_var_array", "Node_var_new", @@ -451,6 +450,7 @@ flags2str(int flagval) { HALFHAT, "HALFHAT" }, { XARRAY, "XARRAY" }, { NUMCONSTSTR, "NUMCONSTSTR" }, + { REGEX, "REGEX" }, { 0, NULL }, }; @@ -1330,11 +1330,6 @@ setup_frame(INSTRUCTION *pc) r->var_value = m; break; - case Node_typedregex: - r->type = Node_var; - r->var_value = m; - break; - default: cant_happen(); } diff --git a/interpret.h b/interpret.h index 9d3c1087..9b737617 100644 --- a/interpret.h +++ b/interpret.h @@ -268,7 +268,7 @@ uninitialized_scalar: r = r->var_value; } - if (r->type == Node_val || r->type == Node_typedregex) + if (r->type == Node_val) UPREF(r); PUSH(r); break; @@ -991,7 +991,8 @@ arrayfor: r = POP_STRING(); unref(m->re_exp); m->re_exp = r; - } else if (m->type == Node_typedregex) { + } else if (m->type == Node_val) { + assert((m->flags & REGEX) != 0); UPREF(m); } PUSH(m); @@ -32,8 +32,8 @@ static void parenthesize(int type, NODE *left, NODE *right); static char *pp_list(int nargs, const char *paren, const char *delim); static char *pp_group3(const char *s1, const char *s2, const char *s3); static char *pp_concat(int nargs); -static char *pp_string_or_strong_regex(const char *in_str, size_t len, int delim, bool strong_regex); -static char *pp_strong_regex(const char *in_str, size_t len, int delim); +static char *pp_string_or_typed_regex(const char *in_str, size_t len, int delim, bool typed_regex); +static char *pp_typed_regex(const char *in_str, size_t len, int delim); static bool is_binary(int type); static bool is_scalar(int type); static int prec_level(int type); @@ -640,7 +640,7 @@ cleanup: break; case Op_push_re: - if (pc->memory->type != Node_regex && pc->memory->type != Node_typedregex) + if (pc->memory->type != Node_regex && (pc->memory->flags & REGEX) == 0) break; /* else fall through */ @@ -650,7 +650,7 @@ cleanup: if (pc->memory->type == Node_regex) str = pp_string(re->stptr, re->stlen, '/'); else - str = pp_strong_regex(re->stptr, re->stlen, '/'); + str = pp_typed_regex(re->stptr, re->stlen, '/'); pp_push(pc->opcode, str, CAN_FREE); } break; @@ -672,9 +672,9 @@ cleanup: txt = t2->pp_str; str = pp_group3(txt, op2str(pc->opcode), restr); pp_free(t2); - } else if (m->type == Node_typedregex) { + } else if (m->type == Node_val && (m->flags & REGEX) != 0) { NODE *re = m->re_exp; - restr = pp_strong_regex(re->stptr, re->stlen, '/'); + restr = pp_typed_regex(re->stptr, re->stlen, '/'); str = pp_group3(txt, op2str(pc->opcode), restr); efree(restr); } else { @@ -1416,21 +1416,21 @@ parenthesize(int type, NODE *left, NODE *right) char * pp_string(const char *in_str, size_t len, int delim) { - return pp_string_or_strong_regex(in_str, len, delim, false); + return pp_string_or_typed_regex(in_str, len, delim, false); } -/* pp_strong_regex --- pretty format a hard regex constant */ +/* pp_typed_regex --- pretty format a hard regex constant */ static char * -pp_strong_regex(const char *in_str, size_t len, int delim) +pp_typed_regex(const char *in_str, size_t len, int delim) { - return pp_string_or_strong_regex(in_str, len, delim, true); + return pp_string_or_typed_regex(in_str, len, delim, true); } -/* pp_string_or_strong_regex --- pretty format a string, regex, or hard regex constant */ +/* pp_string_or_typed_regex --- pretty format a string, regex, or typed regex constant */ char * -pp_string_or_strong_regex(const char *in_str, size_t len, int delim, bool strong_regex) +pp_string_or_typed_regex(const char *in_str, size_t len, int delim, bool typed_regex) { static char str_escapes[] = "\a\b\f\n\r\t\v\\"; static char str_printables[] = "abfnrtv\\"; @@ -1464,12 +1464,12 @@ pp_string_or_strong_regex(const char *in_str, size_t len, int delim, bool strong } ofre -= (l) /* initial size; 3 for delim + terminating null, 1 for @ */ - osiz = len + 3 + 1 + (strong_regex == true); + osiz = len + 3 + 1 + (typed_regex == true); emalloc(obuf, char *, osiz, "pp_string"); obufout = obuf; ofre = osiz - 1; - if (strong_regex) + if (typed_regex) *obufout++ = '@'; *obufout++ = delim; @@ -345,18 +345,17 @@ re_update(NODE *t) { NODE *t1; + if (t->type == Node_val && (t->flags & REGEX) != 0) + return t->tre_regs; + if ((t->re_flags & CASE) == IGNORECASE) { /* regex was compiled with settings matching IGNORECASE */ if ((t->re_flags & CONSTANT) != 0) { /* it's a constant, so just return it as is */ - assert(t->type == Node_regex || t->type == Node_typedregex); + assert(t->type == Node_regex); return t->re_reg; } t1 = t->re_exp; - if (t1->type == Node_typedregex) { - assert((t1->re_flags & CONSTANT) != 0); - return t1->re_reg; - } if (t->re_text != NULL) { /* if contents haven't changed, just return it */ if (cmp_nodes(t->re_text, t1, true) == 0) |