aboutsummaryrefslogtreecommitdiffstats
path: root/field.c
diff options
context:
space:
mode:
Diffstat (limited to 'field.c')
-rw-r--r--field.c139
1 files changed, 31 insertions, 108 deletions
diff --git a/field.c b/field.c
index 5f5b2b65..4f24d5f3 100644
--- a/field.c
+++ b/field.c
@@ -2,22 +2,22 @@
* field.c - routines for dealing with fields and record parsing
*/
-/*
+/*
* Copyright (C) 1986, 1988, 1989, 1991-2016 the Free Software Foundation, Inc.
- *
+ *
* This file is part of GAWK, the GNU implementation of the
* AWK Programming Language.
- *
+ *
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
- *
+ *
* GAWK is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
@@ -44,8 +44,6 @@ static long re_parse_field(long, char **, int, NODE *,
Regexp *, Setfunc, NODE *, NODE *, bool);
static long def_parse_field(long, char **, int, NODE *,
Regexp *, Setfunc, NODE *, NODE *, bool);
-static long posix_def_parse_field(long, char **, int, NODE *,
- Regexp *, Setfunc, NODE *, NODE *, bool);
static long null_parse_field(long, char **, int, NODE *,
Regexp *, Setfunc, NODE *, NODE *, bool);
static long sc_parse_field(long, char **, int, NODE *,
@@ -163,7 +161,7 @@ rebuild_record()
tlen += (NF - 1) * OFSlen;
if ((long) tlen < 0)
tlen = 0;
- emalloc(ops, char *, tlen + 2, "rebuild_record");
+ emalloc(ops, char *, tlen + 1, "rebuild_record");
cops = ops;
ops[0] = '\0';
for (i = 1; i <= NF; i++) {
@@ -218,7 +216,7 @@ rebuild_record()
}
} else {
*n = *r;
- n->flags &= ~(MALLOC|STRING);
+ n->flags &= ~MALLOC;
}
n->stptr = cops;
@@ -278,7 +276,7 @@ set_record(const char *buf, int cnt)
memcpy(databuf, buf, cnt);
/*
- * Add terminating '\0' so that C library routines
+ * Add terminating '\0' so that C library routines
* will know when to stop.
*/
databuf[cnt] = '\0';
@@ -290,7 +288,7 @@ set_record(const char *buf, int cnt)
n->stlen = cnt;
n->valref = 1;
n->type = Node_val;
- n->stfmt = -1;
+ n->stfmt = STFMT_UNUSED;
n->flags = (STRING|STRCUR|MAYBE_NUM|FIELD);
fields_arr[0] = n;
@@ -341,7 +339,7 @@ set_NF()
assert(NF != -1);
(void) force_number(NF_node->var_value);
- nf = get_number_si(NF_node->var_value);
+ nf = get_number_si(NF_node->var_value);
if (nf < 0)
fatal(_("NF set to negative value"));
NF = nf;
@@ -409,7 +407,7 @@ re_parse_field(long up_to, /* parse only up to this field number */
sep = scan;
while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n'))
scan++;
- if (sep_arr != NULL && sep < scan)
+ if (sep_arr != NULL && sep < scan)
set_element(nf, sep, (long)(scan - sep), sep_arr);
}
@@ -441,8 +439,8 @@ re_parse_field(long up_to, /* parse only up to this field number */
}
(*set)(++nf, field,
(long)(scan + RESTART(rp, scan) - field), n);
- if (sep_arr != NULL)
- set_element(nf, scan + RESTART(rp, scan),
+ if (sep_arr != NULL)
+ set_element(nf, scan + RESTART(rp, scan),
(long) (REEND(rp, scan) - RESTART(rp, scan)), sep_arr);
scan += REEND(rp, scan);
field = scan;
@@ -506,7 +504,7 @@ def_parse_field(long up_to, /* parse only up to this field number */
sep = scan;
for (; nf < up_to; scan++) {
/*
- * special case: fs is single space, strip leading whitespace
+ * special case: fs is single space, strip leading whitespace
*/
while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n'))
scan++;
@@ -538,75 +536,6 @@ def_parse_field(long up_to, /* parse only up to this field number */
}
/*
- * posix_def_parse_field --- default field parsing.
- *
- * This is called both from get_field() and from do_split()
- * via (*parse_field)(). This variation is for when FS is a single space
- * character. The only difference between this and def_parse_field()
- * is that this one does not allow newlines to separate fields.
- */
-
-static long
-posix_def_parse_field(long up_to, /* parse only up to this field number */
- char **buf, /* on input: string to parse; on output: point to start next */
- int len,
- NODE *fs,
- Regexp *rp ATTRIBUTE_UNUSED,
- Setfunc set, /* routine to set the value of the parsed field */
- NODE *n,
- NODE *dummy ATTRIBUTE_UNUSED, /* sep_arr not needed here: hence dummy */
- bool in_middle ATTRIBUTE_UNUSED)
-{
- char *scan = *buf;
- long nf = parse_high_water;
- char *field;
- char *end = scan + len;
- char sav;
-
- if (up_to == UNLIMITED)
- nf = 0;
- if (len == 0)
- return nf;
-
- /*
- * Nasty special case. If FS set to "", return whole record
- * as first field. This is not worth a separate function.
- */
- if (fs->stlen == 0) {
- (*set)(++nf, *buf, len, n);
- *buf += len;
- return nf;
- }
-
- /* before doing anything save the char at *end */
- sav = *end;
- /* because it will be destroyed now: */
-
- *end = ' '; /* sentinel character */
- for (; nf < up_to; scan++) {
- /*
- * special case: fs is single space, strip leading whitespace
- */
- while (scan < end && (*scan == ' ' || *scan == '\t'))
- scan++;
- if (scan >= end)
- break;
- field = scan;
- while (*scan != ' ' && *scan != '\t')
- scan++;
- (*set)(++nf, field, (long)(scan - field), n);
- if (scan == end)
- break;
- }
-
- /* everything done, restore original char at *end */
- *end = sav;
-
- *buf = scan;
- return nf;
-}
-
-/*
* null_parse_field --- each character is a separate field
*
* This is called both from get_field() and from do_split()
@@ -857,11 +786,11 @@ get_field(long requested, Func_ptr *assign)
/*
* Keep things uniform. Also, mere intention of assigning something
* to $n should not make $0 invalid. Makes sense to invalidate $0
- * after the actual assignment is performed. Not a real issue in
+ * after the actual assignment is performed. Not a real issue in
* the interpreter otherwise, but causes problem in the
* debugger when watching or printing fields.
*/
-
+
if (assign != NULL)
*assign = invalidate_field0; /* $0 needs reconstruction */
#endif
@@ -977,12 +906,12 @@ do_split(int nargs)
if (sep_arr != NULL) {
if (sep_arr == arr)
- fatal(_("split: cannot use the same array for second and fourth args"));
+ fatal(_("split: cannot use the same array for second and fourth args"));
/* This checks need to be done before clearing any of the arrays */
for (tmp = sep_arr->parent_array; tmp != NULL; tmp = tmp->parent_array)
if (tmp == arr)
- fatal(_("split: cannot use a subarray of second arg for fourth arg"));
+ fatal(_("split: cannot use a subarray of second arg for fourth arg"));
for (tmp = arr->parent_array; tmp != NULL; tmp = tmp->parent_array)
if (tmp == sep_arr)
fatal(_("split: cannot use a subarray of fourth arg for second arg"));
@@ -1020,10 +949,7 @@ do_split(int nargs)
}
} else if (fs->stlen == 1 && (sep->re_flags & CONSTANT) == 0) {
if (fs->stptr[0] == ' ') {
- if (do_posix)
- parseit = posix_def_parse_field;
- else
- parseit = def_parse_field;
+ parseit = def_parse_field;
} else
parseit = sc_parse_field;
} else {
@@ -1071,7 +997,7 @@ do_patsplit(int nargs)
if (sep_arr != NULL) {
if (sep_arr == arr)
- fatal(_("patsplit: cannot use the same array for second and fourth args"));
+ fatal(_("patsplit: cannot use the same array for second and fourth args"));
/* These checks need to be done before clearing any of the arrays */
for (tmp = sep_arr->parent_array; tmp != NULL; tmp = tmp->parent_array)
@@ -1138,7 +1064,7 @@ set_FIELDWIDTHS()
FIELDWIDTHS[0] = 0;
for (i = 1; ; i++) {
unsigned long int tmp;
- if (i + 2 >= fw_alloc) {
+ if (i + 1 >= fw_alloc) {
fw_alloc *= 2;
erealloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
}
@@ -1165,7 +1091,7 @@ set_FIELDWIDTHS()
|| (*end != '\0' && ! is_blank(*end))
|| !(0 < tmp && tmp <= INT_MAX)
) {
- fatal_error = true;
+ fatal_error = true;
break;
}
FIELDWIDTHS[i] = tmp;
@@ -1236,7 +1162,7 @@ set_FS()
* FS_regexp will be NULL with a non-null FS_re_yes_case.
* refree() handles null argument; no need for `if (FS_regexp != NULL)' below.
* Please do not remerge.
- */
+ */
refree(FS_re_yes_case);
refree(FS_re_no_case);
FS_re_yes_case = FS_re_no_case = FS_regexp = NULL;
@@ -1278,10 +1204,7 @@ choose_fs_function:
}
}
} else {
- if (do_posix)
- parse_field = posix_def_parse_field;
- else
- parse_field = def_parse_field;
+ parse_field = def_parse_field;
if (fs->stlen == 1) {
if (fs->stptr[0] == ' ')
@@ -1482,19 +1405,19 @@ incr_scan(char **scanp, size_t len, mbstate_t *mbs)
* BEGIN {
* false = 0
* true = 1
- *
+ *
* fpat[1] = "([^,]*)|(\"[^\"]+\")"
* fpat[2] = fpat[1]
* fpat[3] = fpat[1]
* fpat[4] = "aa+"
* fpat[5] = fpat[4]
- *
+ *
* data[1] = "Robbins,,Arnold,"
* data[2] = "Smith,,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA"
* data[3] = "Robbins,Arnold,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA"
* data[4] = "bbbaaacccdddaaaaaqqqq"
* data[5] = "bbbaaacccdddaaaaaqqqqa" # should get trailing qqqa
- *
+ *
* for (i = 1; i in data; i++) {
* printf("Splitting: <%s>\n", data[i])
* n = mypatsplit(data[i], fields, fpat[i], seps)
@@ -1505,7 +1428,7 @@ incr_scan(char **scanp, size_t len, mbstate_t *mbs)
* printf("seps[%s] = <%s>\n", j, seps[j])
* }
* }
- *
+ *
* function mypatsplit(string, array, pattern, seps,
* eosflag, non_empty, nf) # locals
* {
@@ -1513,7 +1436,7 @@ incr_scan(char **scanp, size_t len, mbstate_t *mbs)
* delete seps
* if (length(string) == 0)
* return 0
- *
+ *
* eosflag = non_empty = false
* nf = 0
* while (match(string, pattern)) {
@@ -1564,7 +1487,7 @@ incr_scan(char **scanp, size_t len, mbstate_t *mbs)
* }
* if (length(string) > 0)
* seps[nf] = string
- *
+ *
* return length(array)
* }
*/
@@ -1637,7 +1560,7 @@ fpat_parse_field(long up_to, /* parse only up to this field number */
* last match was non-empty, and at the
* current character we get a zero length match,
* which we don't want, so skip over it
- */
+ */
non_empty = false;
if (sep_arr != NULL) {
need_to_set_sep = false;