aboutsummaryrefslogtreecommitdiffstats
path: root/field.c
diff options
context:
space:
mode:
authorAndrew J. Schorr <aschorr@telemetry-investments.com>2016-07-08 15:26:00 -0400
committerAndrew J. Schorr <aschorr@telemetry-investments.com>2016-07-08 15:26:00 -0400
commitc86137f472fdf876c2c223c8d99f673f477b7554 (patch)
tree7ed0e172eaa8d4f831a2d59287547f63f1e71dee /field.c
parent4a0f74139fb702a14c2e6782fb1965245e4f9d2f (diff)
downloadegawk-c86137f472fdf876c2c223c8d99f673f477b7554.tar.gz
egawk-c86137f472fdf876c2c223c8d99f673f477b7554.tar.bz2
egawk-c86137f472fdf876c2c223c8d99f673f477b7554.zip
Optimization: support unterminated field strings inside gawk, but make terminated copies for the API.
Diffstat (limited to 'field.c')
-rw-r--r--field.c135
1 files changed, 34 insertions, 101 deletions
diff --git a/field.c b/field.c
index 3307aced..1b3751d6 100644
--- a/field.c
+++ b/field.c
@@ -56,7 +56,6 @@ static void set_element(long num, char * str, long len, NODE *arr);
static void grow_fields_arr(long num);
static void set_field(long num, char *str, long len, NODE *dummy);
static void purge_record(void);
-static void allocate_databuf(size_t, bool);
static char *parse_extent; /* marks where to restart parse of record */
static long parse_high_water = 0; /* field number that we have parsed so far */
@@ -117,15 +116,6 @@ grow_fields_arr(long num)
nf_high_water = num;
}
-static struct {
- char *p; /* buffer for $0 and field copies */
- size_t size; /* buffer size */
- char *space; /*
- * Pointer to free space in databuf.p for making
- * NUL-terminated copies of $1 thru $NF
- */
-} databuf;
-
/* set_field --- set the value of a particular field */
/*ARGSUSED*/
@@ -140,18 +130,7 @@ set_field(long num,
if (num > nf_high_water)
grow_fields_arr(num);
n = fields_arr[num];
- /*
- * Make a NUL-terminated copy. It is tempting to do this only if
- * str[len] != '\0', but the parse methods cannot be relied upon to
- * avoid altering the contents of the record during parsing. For
- * example, def_parse_field changes the final NUL to a space. In
- * principle, the method could change other characters, so it does
- * not seem safe to rely upon the value of str[len].
- */
- memcpy(databuf.space, str, len);
- databuf.space[len] = '\0';
- n->stptr = databuf.space;
- databuf.space += len+1;
+ n->stptr = str;
n->stlen = len;
n->flags = (STRCUR|STRING|MAYBE_NUM); /* do not set MALLOC */
}
@@ -207,8 +186,6 @@ rebuild_record()
}
}
tmp = make_str_node(ops, tlen, ALREADY_MALLOCED);
- allocate_databuf(tlen, false);
- databuf.space = databuf.p;
/*
* Since we are about to unref fields_arr[0], we want to find
@@ -223,7 +200,7 @@ rebuild_record()
* the new $0 buffer, although that's how previous versions did
* it. It seems faster to leave the malloc'ed fields in place.
*/
- if ((r->flags & MALLOC) == 0) {
+ if (r->stlen > 0 && (r->flags & MALLOC) == 0) {
NODE *n;
getnode(n);
@@ -246,16 +223,7 @@ rebuild_record()
r->flags |= MALLOC;
}
- if (cops[n->stlen] == '\0')
- /* should be the case for $NF */
- n->stptr = cops;
- else {
- /* make a NUL-terminated copy */
- memcpy(databuf.space, cops, n->stlen);
- databuf.space[n->stlen] = '\0';
- n->stptr = databuf.space;
- databuf.space += n->stlen+1;
- }
+ n->stptr = cops;
unref(r);
fields_arr[i] = n;
assert((n->flags & WSTRCUR) == 0);
@@ -273,57 +241,6 @@ rebuild_record()
field0_valid = true;
}
-static void
-allocate_databuf(size_t reclen, bool need_zero)
-{
- size_t needed;
-#define INITIAL_SIZE 512
-#define MAX_SIZE ((size_t) ~0) /* maximally portable ... */
-
- /* buffer management: */
- if (databuf.size == 0) { /* first time */
- emalloc(databuf.p, char *, INITIAL_SIZE, "set_record");
- databuf.size = INITIAL_SIZE;
-
- }
- /*
- * Make sure there's enough room. We need space for $0 plus a NUL
- * terminator plus room for NUL-terminated copies of $1 through $NF.
- * We use reclen as an upper bound for NF, assuming at least 1 byte
- * for a field and its field separator (or fixed-width column). So our
- * total requirement is reclen + 1 + 2*reclen -> 3*reclen + 1.
- * It is tempting to skip the copy if the field value is already
- * terminated with a NUL; this should normally be the case for $NF.
- * Unfortunately, the parse methods often alter the string while
- * parsing, typically changing the final NUL to a sentinel. So when
- * set_field is called, the value of the character after the string
- * in question may not be the actual value once parsing is complete.
- * To be safe, it is prudent to copy all of the fields.
- */
- needed = 2*reclen; /* for copying $1..$NF */
- if (need_zero)
- needed += reclen + 1; /* for $0 plus '\0' */
-#ifdef GAWKDEBUG
- /* malloc precise size so we can check for overruns with valgrind */
- if (needed == 0)
- needed = 1; /* erealloc requires non-zero bytes */
- databuf.size = needed;
- erealloc(databuf.p, char *, databuf.size, "set_record");
-#else
- if (needed > databuf.size) {
- do {
- if (databuf.size > MAX_SIZE/2)
- fatal(_("input record too large"));
- databuf.size *= 2;
- } while (needed > databuf.size);
- erealloc(databuf.p, char *, databuf.size, "set_record");
- }
-#endif
-
-#undef INITIAL_SIZE
-#undef MAX_SIZE
-}
-
/*
* set_record:
* setup $0, but defer parsing rest of line until reference is made to $(>0)
@@ -338,19 +255,42 @@ void
set_record(const char *buf, int cnt)
{
NODE *n;
+ static char *databuf;
+ static unsigned long databuf_size;
+#define INITIAL_SIZE 512
+#define MAX_SIZE ((unsigned long) ~0) /* maximally portable ... */
purge_record();
- allocate_databuf(cnt, true);
+ /* buffer management: */
+ if (databuf_size == 0) { /* first time */
+ emalloc(databuf, char *, INITIAL_SIZE, "set_record");
+ databuf_size = INITIAL_SIZE;
+ memset(databuf, '\0', INITIAL_SIZE);
+
+ }
+ /*
+ * Make sure there's enough room. Since we sometimes need
+ * to place a sentinel at the end, we make sure
+ * databuf_size is > cnt after allocation.
+ */
+ if (cnt >= databuf_size) {
+ do {
+ if (databuf_size > MAX_SIZE/2)
+ fatal(_("input record too large"));
+ databuf_size *= 2;
+ } while (cnt >= databuf_size);
+ erealloc(databuf, char *, databuf_size, "set_record");
+ memset(databuf, '\0', databuf_size);
+ }
/* copy the data */
- memcpy(databuf.p, buf, cnt);
+ memcpy(databuf, buf, cnt);
/*
* Add terminating '\0' so that C library routines
* will know when to stop.
*/
- databuf.p[cnt] = '\0';
- databuf.space = databuf.p + cnt + 1;
+ databuf[cnt] = '\0';
/* manage field 0: */
#ifndef NDEBUG
@@ -359,13 +299,16 @@ set_record(const char *buf, int cnt)
#endif
unref(fields_arr[0]);
getnode(n);
- n->stptr = databuf.p;
+ n->stptr = databuf;
n->stlen = cnt;
n->valref = 1;
n->type = Node_val;
n->stfmt = STFMT_UNUSED;
n->flags = (STRING|STRCUR|MAYBE_NUM); /* do not set MALLOC */
fields_arr[0] = n;
+
+#undef INITIAL_SIZE
+#undef MAX_SIZE
}
/* reset_record --- start over again with current $0 */
@@ -375,16 +318,6 @@ reset_record()
{
fields_arr[0] = force_string(fields_arr[0]);
purge_record();
- if ((fields_arr[0]->flags & MALLOC) != 0) {
- allocate_databuf(fields_arr[0]->stlen, false);
- databuf.space = databuf.p;
- }
- else {
- allocate_databuf(fields_arr[0]->stlen, true);
- /* may have been realloced, so set stptr */
- fields_arr[0]->stptr = databuf.p;
- databuf.space = databuf.p + fields_arr[0]->stlen + 1;
- }
}
static void