diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2010-07-16 11:58:26 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2010-07-16 11:58:26 +0300 |
commit | 765c7494b3dac62207e6cd57fb839997e237f292 (patch) | |
tree | f7da12ffdb85d9f82671cb3122775b2ce73f7ad9 /field.c | |
parent | cce5115e21db1702e0617afdca36633e7e2c9eae (diff) | |
download | egawk-765c7494b3dac62207e6cd57fb839997e237f292.tar.gz egawk-765c7494b3dac62207e6cd57fb839997e237f292.tar.bz2 egawk-765c7494b3dac62207e6cd57fb839997e237f292.zip |
Moving to 2.13.2.
Diffstat (limited to 'field.c')
-rw-r--r-- | field.c | 668 |
1 files changed, 429 insertions, 239 deletions
@@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Progamming Language. @@ -25,44 +25,69 @@ #include "awk.h" -extern void assoc_clear(); -extern int a_get_three(); -extern int get_rs(); - -static char *get_fs(); -static int re_split(); -static int parse_fields(); -static void set_element(); - -char *line_buf = NULL; /* holds current input line */ +static int (*parse_field) P((int, char **, int, char *, + Regexp *, void (*)(), NODE *)); +static void rebuild_record P((void)); +static int re_parse_field P((int, char **, int, char *, + Regexp *, void (*)(), NODE *)); +static int def_parse_field P((int, char **, int, char *, + Regexp *, void (*)(), NODE *)); +static int sc_parse_field P((int, char **, int, char *, + Regexp *, void (*)(), NODE *)); +static int fw_parse_field P((int, char **, int, char *, + Regexp *, void (*)(), NODE *)); +static void set_element P((int, char *, int, NODE *)); + +static Regexp *FS_regexp = NULL; static char *parse_extent; /* marks where to restart parse of record */ static int parse_high_water=0; /* field number that we have parsed so far */ -static char f_empty[] = ""; -static char *save_fs = " "; /* save current value of FS when line is read, +static int nf_high_water = 0; /* size of fields_arr */ +static char f_empty[] = "\0"; +static int resave_fs; +static NODE *save_FS; +static char *save_fs; /* save current value of FS when line is read, * to be used in deferred parsing */ - NODE **fields_arr; /* array of pointers to the field nodes */ -NODE node0; /* node for $0 which never gets free'd */ -int node0_valid = 1; /* $(>0) has not been changed yet */ +int field0_valid = 1; /* $(>0) has not been changed yet */ +NODE *field0; +static NODE **nodes; /* permanent repository of field nodes */ +static int *FIELDWIDTHS = NULL; void init_fields() { emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields"); - node0.type = Node_val; - node0.stref = 0; - node0.stptr = ""; - node0.flags = (STR|PERM); /* never free buf */ - fields_arr[0] = &node0; + emalloc(nodes, NODE **, sizeof(NODE *), "init_fields"); + emalloc(field0, NODE *, sizeof(NODE), "init_fields"); + field0->type = Node_val; + field0->stref = 0; + field0->stptr = ""; + field0->flags = (STRING|STR|PERM); /* never free buf */ + fields_arr[0] = field0; + save_FS = dupnode(FS_node->var_value); + save_fs = save_FS->stptr; } -/* - * Danger! Must only be called for fields we know have just been blanked, or - * fields we know don't exist yet. - */ +static void +grow_fields_arr(num) +int num; +{ + register int t; + register NODE *n; + + erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field"); + erealloc(nodes, NODE **, (num+1) * sizeof(NODE *), "set_field"); + for (t = nf_high_water+1; t <= num; t++) { + getnode(n); + n->type = Node_val; + nodes[t] = n; + fields_arr[t] = nodes[t]; + } + nf_high_water = num; +} /*ARGSUSED*/ static void @@ -72,21 +97,16 @@ char *str; int len; NODE *dummy; /* not used -- just to make interface same as set_element */ { - NODE *n; - int t; - static int nf_high_water = 0; - - if (num > nf_high_water) { - erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field"); - nf_high_water = num; - } - /* fill in fields that don't exist */ - for (t = parse_high_water + 1; t < num; t++) - fields_arr[t] = Nnull_string; - n = make_string(str, len); - (void) force_number(n); + register NODE *n; + register int t; + + if (num > nf_high_water) + grow_fields_arr(num); + n = nodes[num]; + n->stptr = str; + n->stlen = len; + n->flags = (PERM|STR|STRING|MAYBE_NUM); fields_arr[num] = n; - parse_high_water = num; } /* Someone assigned a value to $(something). Fix up $0 to be right */ @@ -104,17 +124,17 @@ rebuild_record() tlen = 0; ofs = force_string(OFS_node->var_value); ofslen = ofs->stlen; - ptr = &fields_arr[parse_high_water]; + ptr = &fields_arr[NF]; while (ptr > &fields_arr[0]) { tmp = force_string(*ptr); tlen += tmp->stlen; ptr--; } - tlen += (parse_high_water - 1) * ofslen; - emalloc(ops, char *, tlen + 1, "fix_fields"); + tlen += (NF - 1) * ofslen; + emalloc(ops, char *, tlen + 2, "fix_fields"); cops = ops; ops[0] = '\0'; - for (ptr = &fields_arr[1]; ptr <= &fields_arr[parse_high_water]; ptr++) { + for (ptr = &fields_arr[1]; ptr <= &fields_arr[NF]; ptr++) { tmp = *ptr; if (tmp->stlen == 1) *cops++ = tmp->stptr[0]; @@ -122,7 +142,7 @@ rebuild_record() memcpy(cops, tmp->stptr, tmp->stlen); cops += tmp->stlen; } - if (ptr != &fields_arr[parse_high_water]) { + if (ptr != &fields_arr[NF]) { if (ofslen == 1) *cops++ = ofs->stptr[0]; else if (ofslen != 0) { @@ -131,11 +151,10 @@ rebuild_record() } } } - tmp = make_string(ops, tlen); - free(ops); - deref = fields_arr[0]; - do_deref(); + tmp = make_str_node(ops, tlen, ALREADY_MALLOCED); + unref(fields_arr[0]); fields_arr[0] = tmp; + field0_valid = 1; } /* @@ -143,36 +162,231 @@ rebuild_record() * or to NF. At that point, parse only as much as necessary. */ void -set_record(buf, cnt) +set_record(buf, cnt, freeold) char *buf; int cnt; +int freeold; { register int i; - assign_number(&NF_node->var_value, (AWKNUM)-1); + NF = -1; for (i = 1; i <= parse_high_water; i++) { - deref = fields_arr[i]; - do_deref(); + unref(fields_arr[i]); } parse_high_water = 0; - node0_valid = 1; - if (buf == line_buf) { - deref = fields_arr[0]; - do_deref(); - save_fs = get_fs(); - node0.type = Node_val; - node0.stptr = buf; - node0.stlen = cnt; - node0.stref = 1; - node0.flags = (STR|PERM); /* never free buf */ - fields_arr[0] = &node0; + if (freeold) { + unref(fields_arr[0]); + if (resave_fs) { + resave_fs = 0; + unref(save_FS); + save_FS = dupnode(FS_node->var_value); + save_fs = save_FS->stptr; + } + field0->stptr = buf; + field0->stlen = cnt; + field0->stref = 1; + field0->flags = (STRING|STR|PERM|MAYBE_NUM); + fields_arr[0] = field0; + } + fields_arr[0]->flags |= MAYBE_NUM; + field0_valid = 1; +} + +void +reset_record() +{ + (void) force_string(fields_arr[0]); + set_record(fields_arr[0]->stptr, fields_arr[0]->stlen, 0); +} + +void +set_NF() +{ + NF = (int) force_number(NF_node->var_value); + field0_valid = 0; +} + +/* + * this is called both from get_field() and from do_split() + * via (*parse_field)(). This variation is for when FS is a regular + * expression -- either user-defined or because RS=="" and FS==" " + */ +static int +re_parse_field(up_to, buf, len, fs, rp, set, n) +int up_to; /* parse only up to this field number */ +char **buf; /* on input: string to parse; on output: point to start next */ +int len; +register char *fs; +Regexp *rp; +void (*set) (); /* routine to set the value of the parsed field */ +NODE *n; +{ + register char *scan = *buf; + register int nf = parse_high_water; + register char *field; + register char *end = scan + len; + char *cp; + + if (up_to == HUGE) + nf = 0; + if (len == 0) + return nf; + + cp = FS_node->var_value->stptr; + if (*RS == 0 && *cp == ' ' && *(cp+1) == '\0') { + while (scan < end + && (*scan == '\n' || *scan == ' ' || *scan == '\t')) + scan++; + } + field = scan; + while (scan < end + && research(rp, scan, (int)(end - scan), 1) != -1 + && nf < up_to) { + if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */ + scan++; + if (scan == end) { + (*set)(++nf, field, scan - field, n); + up_to = nf; + break; + } + continue; + } + (*set)(++nf, field, RESTART(rp, scan), n); + scan += REEND(rp, scan); + field = scan; + } + if (nf != up_to && *RS != 0 && scan < end) { + (*set)(++nf, scan, (int)(end - scan), n); + scan = end; + } + *buf = scan; + return (nf); +} + +/* + * this is called both from get_field() and from do_split() + * via (*parse_field)(). This variation is for when FS is a single space + * character. + */ +static int +def_parse_field(up_to, buf, len, fs, rp, set, n) +int up_to; /* parse only up to this field number */ +char **buf; /* on input: string to parse; on output: point to start next */ +int len; +register char *fs; +Regexp *rp; +void (*set) (); /* routine to set the value of the parsed field */ +NODE *n; +{ + register char *scan = *buf; + register int nf = parse_high_water; + register char *field; + register char *end = scan + len; + + if (up_to == HUGE) + nf = 0; + if (len == 0) + return nf; + + *end = ' '; /* sentinel character */ + for (; nf < up_to; scan++) { + /* + * special case: fs is single space, strip leading whitespace + */ + while (scan < end && (*scan == ' ' || *scan == '\t')) + scan++; + if (scan >= end) + break; + field = scan; + while (*scan != ' ' && *scan != '\t') + scan++; + (*set)(++nf, field, (int)(scan - field), n); + if (scan == end) + break; + } + *buf = scan; + return nf; +} + +/* + * this is called both from get_field() and from do_split() + * via (*pase_field)(). This variation is for when FS is a single character + * other than space. + */ +static int +sc_parse_field(up_to, buf, len, fs, rp, set, n) +int up_to; /* parse only up to this field number */ +char **buf; /* on input: string to parse; on output: point to start next */ +int len; +register char *fs; +Regexp *rp; +void (*set) (); /* routine to set the value of the parsed field */ +NODE *n; +{ + register char *scan = *buf; + register char fschar = *fs; + register int nf = parse_high_water; + register char *field; + register char *end = scan + len; + + if (up_to == HUGE) + nf = 0; + if (len == 0) + return nf; + *end = fschar; /* sentinel character */ + for (; nf < up_to; scan++) { + field = scan; + while (*scan++ != fschar) + ; + scan--; + (*set)(++nf, field, (int)(scan - field), n); + if (scan == end) + break; } + *buf = scan; + return nf; +} + +/* + * this is called both from get_field() and from do_split() + * via (*pase_field)(). This variation is for when FS is a single character + * other than space. + */ +static int +fw_parse_field(up_to, buf, len, fs, rp, set, n) +int up_to; /* parse only up to this field number */ +char **buf; /* on input: string to parse; on output: point to start next */ +int len; +register char *fs; +Regexp *rp; +void (*set) (); /* routine to set the value of the parsed field */ +NODE *n; +{ + register char *scan = *buf; + register int nf = parse_high_water; + register char *end = scan + len; + + if (up_to == HUGE) + nf = 0; + if (len == 0) + return nf; + for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) { + if (len > end - scan) + len = end - scan; + (*set)(++nf, scan, len, n); + scan += len; + } + if (len == -1) + *buf = end; + else + *buf = scan; + return nf; } NODE ** get_field(num, assign) -int num; -int assign; /* this field is on the LHS of an assign */ +register int num; +Func_ptr *assign; /* this field is on the LHS of an assign */ { int n; @@ -180,40 +394,52 @@ int assign; /* this field is on the LHS of an assign */ * if requesting whole line but some other field has been altered, * then the whole line must be rebuilt */ - if (num == 0 && (node0_valid == 0 || assign)) { - /* first, parse remainder of input record */ - if (NF_node->var_value->numbr == -1) { - if (parse_high_water == 0) - parse_extent = node0.stptr; - n = parse_fields(HUGE-1, &parse_extent, - node0.stlen - (parse_extent - node0.stptr), - save_fs, set_field, (NODE *)NULL); - assign_number(&NF_node->var_value, (AWKNUM)n); - } - if (node0_valid == 0) + if (num == 0) { + if (!field0_valid) { + /* first, parse remainder of input record */ + if (NF == -1) { + NF = (*parse_field)(HUGE-1, &parse_extent, + fields_arr[0]->stlen - + (parse_extent - fields_arr[0]->stptr), + save_fs, FS_regexp, set_field, + (NODE *)NULL); + parse_high_water = NF; + } rebuild_record(); + } + if (assign) + *assign = reset_record; return &fields_arr[0]; } - if (num > 0 && assign) - node0_valid = 0; + + /* assert(num > 0); */ + + if (assign) + field0_valid = 0; if (num <= parse_high_water) /* we have already parsed this field */ return &fields_arr[num]; - if (parse_high_water == 0 && num > 0) /* starting at the beginning */ + if (parse_high_water == 0) /* starting at the beginning */ parse_extent = fields_arr[0]->stptr; /* * parse up to num fields, calling set_field() for each, and saving * in parse_extent the point where the parse left off */ - n = parse_fields(num, &parse_extent, + n = (*parse_field)(num, &parse_extent, fields_arr[0]->stlen - (parse_extent-fields_arr[0]->stptr), - save_fs, set_field, (NODE *)NULL); + save_fs, FS_regexp, set_field, (NODE *)NULL); + parse_high_water = n; if (num == HUGE-1) num = n; - if (n < num) { /* requested field number beyond end of record; - * set_field will just extend the number of fields, - * with empty fields - */ - set_field(num, f_empty, 0, (NODE *) NULL); + if (n < num) { /* requested field number beyond end of record; */ + register int i; + + if (num > nf_high_water) + grow_fields_arr(num); + + /* fill in fields that don't exist */ + for (i = n + 1; i <= num; i++) + fields_arr[i] = Nnull_string; + /* * if this field is onthe LHS of an assignment, then we want to * set NF to this value, below @@ -229,196 +455,160 @@ int assign; /* this field is on the LHS of an assign */ * only gets set if the field is assigned to -- in this case n has * been set to num above */ - if (*parse_extent == '\0') - assign_number(&NF_node->var_value, (AWKNUM)n); + if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen) + NF = n; return &fields_arr[num]; } -/* - * this is called both from get_field() and from do_split() - */ -static int -parse_fields(up_to, buf, len, fs, set, n) -int up_to; /* parse only up to this field number */ -char **buf; /* on input: string to parse; on output: point to start next */ +static void +set_element(num, s, len, n) +int num; +char *s; int len; -register char *fs; -void (*set) (); /* routine to set the value of the parsed field */ NODE *n; { - char *s = *buf; - register char *field; - register char *scan; - register char *end = s + len; - int NF = parse_high_water; - char rs = get_rs(); + register NODE *it; - - if (up_to == HUGE) - NF = 0; - if (*fs && *(fs + 1) != '\0') { /* fs is a regexp */ - struct re_registers reregs; - - scan = s; - if (rs == 0 && STREQ(FS_node->var_value->stptr, " ")) { - while ((*scan == '\n' || *scan == ' ' || *scan == '\t') - && scan < end) - scan++; - } - s = scan; - while (scan < end - && re_split(scan, (int)(end - scan), fs, &reregs) != -1 - && NF < up_to) { - if (reregs.end[0] == 0) { /* null match */ - scan++; - if (scan == end) { - (*set)(++NF, s, scan - s, n); - up_to = NF; - break; - } - continue; - } - (*set)(++NF, s, scan - s + reregs.start[0], n); - scan += reregs.end[0]; - s = scan; - } - if (NF != up_to && scan <= end) { - if (!(rs == 0 && scan == end)) { - (*set)(++NF, scan, (int)(end - scan), n); - scan = end; - } - } - *buf = scan; - return (NF); - } - for (scan = s; scan < end && NF < up_to; scan++) { - /* - * special case: fs is single space, strip leading - * whitespace - */ - if (*fs == ' ') { - while ((*scan == ' ' || *scan == '\t') && scan < end) - scan++; - if (scan >= end) - break; - } - field = scan; - if (*fs == ' ') - while (*scan != ' ' && *scan != '\t' && scan < end) - scan++; - else { - while (*scan != *fs && scan < end) - scan++; - if (rs && scan == end-1 && *scan == *fs) { - (*set)(++NF, field, (int)(scan - field), n); - field = scan; - } - } - (*set)(++NF, field, (int)(scan - field), n); - if (scan == end) - break; - } - *buf = scan; - return NF; -} - -static int -re_split(buf, len, fs, reregsp) -char *buf, *fs; -int len; -struct re_registers *reregsp; -{ - typedef struct re_pattern_buffer RPAT; - static RPAT *rp; - static char *last_fs = NULL; - - if ((last_fs != NULL && !STREQ(fs, last_fs)) - || (rp && ! strict && ((IGNORECASE_node->var_value->numbr != 0) - ^ (rp->translate != NULL)))) - { - /* fs has changed or IGNORECASE has changed */ - free(rp->buffer); - free(rp->fastmap); - free((char *) rp); - free(last_fs); - last_fs = NULL; - } - if (last_fs == NULL) { /* first time */ - emalloc(rp, RPAT *, sizeof(RPAT), "re_split"); - memset((char *) rp, 0, sizeof(RPAT)); - emalloc(rp->buffer, char *, 8, "re_split"); - rp->allocated = 8; - emalloc(rp->fastmap, char *, 256, "re_split"); - emalloc(last_fs, char *, strlen(fs) + 1, "re_split"); - (void) strcpy(last_fs, fs); - if (! strict && IGNORECASE_node->var_value->numbr != 0.0) - rp->translate = casetable; - else - rp->translate = NULL; - if (re_compile_pattern(fs, strlen(fs), rp) != NULL) - fatal("illegal regular expression for FS: `%s'", fs); - } - return re_search(rp, buf, len, 0, len, reregsp); + it = make_string(s, len); + it->flags |= MAYBE_NUM; + *assoc_lookup(n, tmp_number((AWKNUM) (num))) = it; } NODE * do_split(tree) NODE *tree; { - NODE *t1, *t2, *t3; - register char *splitc; + NODE *t1, *t2, *t3, *tmp; + register char *splitc = ""; char *s; - NODE *n; + int (*parseit)(); + Regexp *rp = NULL; - if (a_get_three(tree, &t1, &t2, &t3) < 3) - splitc = get_fs(); - else - splitc = force_string(t3)->stptr; + t1 = tree_eval(tree->lnode); + t2 = tree->rnode->lnode; + t3 = tree->rnode->rnode->lnode; + + (void) force_string(t1); - n = t2; if (t2->type == Node_param_list) - n = stack_ptr[t2->param_cnt]; - if (n->type != Node_var && n->type != Node_var_array) + t2 = stack_ptr[t2->param_cnt]; + if (t2->type != Node_var && t2->type != Node_var_array) fatal("second argument of split is not a variable"); - assoc_clear(n); - - tree = force_string(t1); + assoc_clear(t2); + + if (t3->re_flags & FS_DFLT) { + parseit = parse_field; + splitc = FS; + rp = FS_regexp; + } else { + tmp = force_string(tree_eval(t3->re_exp)); + if (tmp->stlen == 1) { + if (tmp->stptr[0] == ' ') { + parseit = def_parse_field; + } else { + parseit = sc_parse_field; + splitc = tmp->stptr; + } + } else { + parseit = re_parse_field; + rp = re_update(t3); + } + free_temp(tmp); + } - s = tree->stptr; - return tmp_number((AWKNUM) - parse_fields(HUGE, &s, tree->stlen, splitc, set_element, n)); + s = t1->stptr; + tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, t1->stlen, + splitc, rp, set_element, t2)); + free_temp(t1); + return tmp; } -static char * -get_fs() +void +set_FS() { register NODE *tmp; static char buf[10]; + if (FS_regexp) { + refree(FS_regexp); + FS_regexp = NULL; + } + parse_field = def_parse_field; tmp = force_string(FS_node->var_value); - if (get_rs() == 0) { + FS = tmp->stptr; + if (*RS == 0) { + parse_field = re_parse_field; + FS = buf; if (tmp->stlen == 1) { if (tmp->stptr[0] == ' ') (void) strcpy(buf, "[ \n]+"); - else + else if (tmp->stptr[0] != '\n') sprintf(buf, "[%c\n]", tmp->stptr[0]); + else { + parse_field = sc_parse_field; + FS = tmp->stptr; + } } else if (tmp->stlen == 0) { buf[0] = '\n'; buf[1] = '\0'; + parse_field = sc_parse_field; } else - return tmp->stptr; - return buf; + FS = tmp->stptr; + } else { + if (tmp->stlen > 1) + parse_field = re_parse_field; + else if (*FS != ' ' && tmp->stlen == 1) + parse_field = sc_parse_field; } - return tmp->stptr; + if (parse_field == re_parse_field) { + tmp = tmp_string(FS, strlen(FS)); + FS_regexp = make_regexp(tmp, 0, 1); + free_temp(tmp); + } else + FS_regexp = NULL; + resave_fs = 1; } -static void -set_element(num, s, len, n) -int num; -char *s; -int len; -NODE *n; +void +set_RS() { - *assoc_lookup(n, tmp_number((AWKNUM) (num))) = make_string(s, len); + (void) force_string(RS_node->var_value); + RS = RS_node->var_value->stptr; + set_FS(); +} + +void +set_FIELDWIDTHS() +{ + register char *scan; + char *end; + register int i; + static int fw_alloc = 1; + static int warned = 0; + + if (do_lint && ! warned) { + warned = 1; + warning("use of FIELDWIDTHS is a gawk extension"); + } + if (strict) /* quick and dirty, does the trick */ + return; + + parse_field = fw_parse_field; + scan = force_string(FIELDWIDTHS_node->var_value)->stptr; + end = scan + 1; + if (FIELDWIDTHS == NULL) + emalloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS"); + FIELDWIDTHS[0] = 0; + for (i = 1; ; i++) { + if (i >= fw_alloc) { + fw_alloc *= 2; + erealloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS"); + } + FIELDWIDTHS[i] = (int) strtol(scan, &end, 10); + if (end == scan) + break; + scan = end; + } + FIELDWIDTHS[i] = -1; } |