diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2010-07-15 23:12:49 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2010-07-15 23:12:49 +0300 |
commit | 3697ec5ca140f686643d204a54181a5ddbf9a799 (patch) | |
tree | 592873e8614475012ddd5f4e6d0482acadbfc9e2 /field.c | |
parent | f3d9dd233ac07f764a554528c85be3768a1d1ddb (diff) | |
download | egawk-3697ec5ca140f686643d204a54181a5ddbf9a799.tar.gz egawk-3697ec5ca140f686643d204a54181a5ddbf9a799.tar.bz2 egawk-3697ec5ca140f686643d204a54181a5ddbf9a799.zip |
Moved to gawk 2.11.
Diffstat (limited to 'field.c')
-rw-r--r-- | field.c | 412 |
1 files changed, 412 insertions, 0 deletions
diff --git a/field.c b/field.c new file mode 100644 index 00000000..205b534b --- /dev/null +++ b/field.c @@ -0,0 +1,412 @@ +/* + * field.c - routines for dealing with fields and record parsing + */ + +/* + * Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 1, or (at your option) + * any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "awk.h" + +extern void assoc_clear(); +extern int a_get_three(); +extern int get_rs(); + +static char *get_fs(); +static int re_split(); +static int parse_fields(); +static void set_element(); + +char *line_buf = NULL; /* holds current input line */ + +static char *parse_extent; /* marks where to restart parse of record */ +static int parse_high_water=0; /* field number that we have parsed so far */ +static char f_empty[] = ""; +static char *save_fs = " "; /* save current value of FS when line is read, + * to be used in deferred parsing + */ + + +NODE **fields_arr; /* array of pointers to the field nodes */ +NODE node0; /* node for $0 which never gets free'd */ +int node0_valid = 1; /* $(>0) has not been changed yet */ + +void +init_fields() +{ + emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields"); + node0.type = Node_val; + node0.stref = 0; + node0.stptr = ""; + node0.flags = (STR|PERM); /* never free buf */ + fields_arr[0] = &node0; +} + +/* + * Danger! Must only be called for fields we know have just been blanked, or + * fields we know don't exist yet. + */ + +/*ARGSUSED*/ +static void +set_field(num, str, len, dummy) +int num; +char *str; +int len; +NODE *dummy; /* not used -- just to make interface same as set_element */ +{ + NODE *n; + int t; + static int nf_high_water = 0; + + if (num > nf_high_water) { + erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field"); + nf_high_water = num; + } + /* fill in fields that don't exist */ + for (t = parse_high_water + 1; t < num; t++) + fields_arr[t] = Nnull_string; + n = make_string(str, len); + (void) force_number(n); + fields_arr[num] = n; + parse_high_water = num; +} + +/* Someone assigned a value to $(something). Fix up $0 to be right */ +static void +rebuild_record() +{ + register int tlen; + register NODE *tmp; + NODE *ofs; + char *ops; + register char *cops; + register NODE **ptr; + register int ofslen; + + tlen = 0; + ofs = force_string(OFS_node->var_value); + ofslen = ofs->stlen; + ptr = &fields_arr[parse_high_water]; + while (ptr > &fields_arr[0]) { + tmp = force_string(*ptr); + tlen += tmp->stlen; + ptr--; + } + tlen += (parse_high_water - 1) * ofslen; + emalloc(ops, char *, tlen + 1, "fix_fields"); + cops = ops; + ops[0] = '\0'; + for (ptr = &fields_arr[1]; ptr <= &fields_arr[parse_high_water]; ptr++) { + tmp = *ptr; + if (tmp->stlen == 1) + *cops++ = tmp->stptr[0]; + else if (tmp->stlen != 0) { + memcpy(cops, tmp->stptr, tmp->stlen); + cops += tmp->stlen; + } + if (ptr != &fields_arr[parse_high_water]) { + if (ofslen == 1) + *cops++ = ofs->stptr[0]; + else if (ofslen != 0) { + memcpy(cops, ofs->stptr, ofslen); + cops += ofslen; + } + } + } + tmp = make_string(ops, tlen); + free(ops); + deref = fields_arr[0]; + do_deref(); + fields_arr[0] = tmp; +} + +/* + * setup $0, but defer parsing rest of line until reference is made to $(>0) + * or to NF. At that point, parse only as much as necessary. + */ +void +set_record(buf, cnt) +char *buf; +int cnt; +{ + register int i; + + assign_number(&NF_node->var_value, (AWKNUM)-1); + for (i = 1; i <= parse_high_water; i++) { + deref = fields_arr[i]; + do_deref(); + } + parse_high_water = 0; + node0_valid = 1; + if (buf == line_buf) { + deref = fields_arr[0]; + do_deref(); + save_fs = get_fs(); + node0.type = Node_val; + node0.stptr = buf; + node0.stlen = cnt; + node0.stref = 1; + node0.flags = (STR|PERM); /* never free buf */ + fields_arr[0] = &node0; + } +} + +NODE ** +get_field(num, assign) +int num; +int assign; /* this field is on the LHS of an assign */ +{ + int n; + + /* + * if requesting whole line but some other field has been altered, + * then the whole line must be rebuilt + */ + if (num == 0 && (node0_valid == 0 || assign)) { + /* first, parse remainder of input record */ + if (NF_node->var_value->numbr == -1) { + if (parse_high_water == 0) + parse_extent = node0.stptr; + n = parse_fields(HUGE-1, &parse_extent, + node0.stlen - (parse_extent - node0.stptr), + save_fs, set_field, (NODE *)NULL); + assign_number(&NF_node->var_value, (AWKNUM)n); + } + if (node0_valid == 0) + rebuild_record(); + return &fields_arr[0]; + } + if (num > 0 && assign) + node0_valid = 0; + if (num <= parse_high_water) /* we have already parsed this field */ + return &fields_arr[num]; + if (parse_high_water == 0 && num > 0) /* starting at the beginning */ + parse_extent = fields_arr[0]->stptr; + /* + * parse up to num fields, calling set_field() for each, and saving + * in parse_extent the point where the parse left off + */ + n = parse_fields(num, &parse_extent, + fields_arr[0]->stlen - (parse_extent-fields_arr[0]->stptr), + save_fs, set_field, (NODE *)NULL); + if (num == HUGE-1) + num = n; + if (n < num) { /* requested field number beyond end of record; + * set_field will just extend the number of fields, + * with empty fields + */ + set_field(num, f_empty, 0, (NODE *) NULL); + /* + * if this field is onthe LHS of an assignment, then we want to + * set NF to this value, below + */ + if (assign) + n = num; + } + /* + * if we reached the end of the record, set NF to the number of fields + * so far. Note that num might actually refer to a field that + * is beyond the end of the record, but we won't set NF to that value at + * this point, since this is only a reference to the field and NF + * only gets set if the field is assigned to -- in this case n has + * been set to num above + */ + if (*parse_extent == '\0') + assign_number(&NF_node->var_value, (AWKNUM)n); + + return &fields_arr[num]; +} + +/* + * this is called both from get_field() and from do_split() + */ +static int +parse_fields(up_to, buf, len, fs, set, n) +int up_to; /* parse only up to this field number */ +char **buf; /* on input: string to parse; on output: point to start next */ +int len; +register char *fs; +void (*set) (); /* routine to set the value of the parsed field */ +NODE *n; +{ + char *s = *buf; + register char *field; + register char *scan; + register char *end = s + len; + int NF = parse_high_water; + char rs = get_rs(); + + + if (up_to == HUGE) + NF = 0; + if (*fs && *(fs + 1) != '\0') { /* fs is a regexp */ + struct re_registers reregs; + + scan = s; + if (rs == 0 && STREQ(FS_node->var_value->stptr, " ")) { + while ((*scan == '\n' || *scan == ' ' || *scan == '\t') + && scan < end) + scan++; + } + while (re_split(scan, (int)(end - scan), fs, &reregs) != -1 && + NF < up_to) { + (*set)(++NF, scan, reregs.start[0], n); + scan += reregs.end[0]; + } + if (NF != up_to && scan <= end) { + if (!(rs == 0 && scan == end)) { + (*set)(++NF, scan, (int)(end - scan), n); + scan = end; + } + } + *buf = scan; + return (NF); + } + for (scan = s; scan < end && NF < up_to; scan++) { + /* + * special case: fs is single space, strip leading + * whitespace + */ + if (*fs == ' ') { + while ((*scan == ' ' || *scan == '\t') && scan < end) + scan++; + if (scan >= end) + break; + } + field = scan; + if (*fs == ' ') + while (*scan != ' ' && *scan != '\t' && scan < end) + scan++; + else { + while (*scan != *fs && scan < end) + scan++; + if (rs && scan == end-1 && *scan == *fs) { + (*set)(++NF, field, (int)(scan - field), n); + field = scan; + } + } + (*set)(++NF, field, (int)(scan - field), n); + if (scan == end) + break; + } + *buf = scan; + return NF; +} + +static int +re_split(buf, len, fs, reregsp) +char *buf, *fs; +int len; +struct re_registers *reregsp; +{ + typedef struct re_pattern_buffer RPAT; + static RPAT *rp; + static char *last_fs = NULL; + + if ((last_fs != NULL && !STREQ(fs, last_fs)) + || (rp && ! strict && ((IGNORECASE_node->var_value->numbr != 0) + ^ (rp->translate != NULL)))) + { + /* fs has changed or IGNORECASE has changed */ + free(rp->buffer); + free(rp->fastmap); + free((char *) rp); + free(last_fs); + last_fs = NULL; + } + if (last_fs == NULL) { /* first time */ + emalloc(rp, RPAT *, sizeof(RPAT), "re_split"); + memset((char *) rp, 0, sizeof(RPAT)); + emalloc(rp->buffer, char *, 8, "re_split"); + rp->allocated = 8; + emalloc(rp->fastmap, char *, 256, "re_split"); + emalloc(last_fs, char *, strlen(fs) + 1, "re_split"); + (void) strcpy(last_fs, fs); + if (! strict && IGNORECASE_node->var_value->numbr != 0.0) + rp->translate = casetable; + else + rp->translate = NULL; + if (re_compile_pattern(fs, strlen(fs), rp) != NULL) + fatal("illegal regular expression for FS: `%s'", fs); + } + return re_search(rp, buf, len, 0, len, reregsp); +} + +NODE * +do_split(tree) +NODE *tree; +{ + NODE *t1, *t2, *t3; + register char *splitc; + char *s; + NODE *n; + + if (a_get_three(tree, &t1, &t2, &t3) < 3) + splitc = get_fs(); + else + splitc = force_string(t3)->stptr; + + n = t2; + if (t2->type == Node_param_list) + n = stack_ptr[t2->param_cnt]; + if (n->type != Node_var && n->type != Node_var_array) + fatal("second argument of split is not a variable"); + assoc_clear(n); + + tree = force_string(t1); + + s = tree->stptr; + return tmp_number((AWKNUM) + parse_fields(HUGE, &s, tree->stlen, splitc, set_element, n)); +} + +static char * +get_fs() +{ + register NODE *tmp; + static char buf[10]; + + tmp = force_string(FS_node->var_value); + if (get_rs() == 0) { + if (tmp->stlen == 1) { + if (tmp->stptr[0] == ' ') + (void) strcpy(buf, "[ \n]+"); + else + sprintf(buf, "[%c\n]", tmp->stptr[0]); + } else if (tmp->stlen == 0) { + buf[0] = '\n'; + buf[1] = '\0'; + } else + return tmp->stptr; + return buf; + } + return tmp->stptr; +} + +static void +set_element(num, s, len, n) +int num; +char *s; +int len; +NODE *n; +{ + *assoc_lookup(n, tmp_number((AWKNUM) (num))) = make_string(s, len); +} |