diff options
-rw-r--r-- | extension/ChangeLog | 5 | ||||
-rw-r--r-- | extension/bindarr.c | 22 | ||||
-rw-r--r-- | extension/fileop.c | 394 | ||||
-rw-r--r-- | extension/record.awk | 252 | ||||
-rw-r--r-- | extension/spec_array.c | 40 | ||||
-rwxr-xr-x | extension/steps | 2 | ||||
-rwxr-xr-x | extension/testrecord.sh | 19 |
7 files changed, 723 insertions, 11 deletions
diff --git a/extension/ChangeLog b/extension/ChangeLog index 6fdd40e5..678e2702 100644 --- a/extension/ChangeLog +++ b/extension/ChangeLog @@ -1,3 +1,8 @@ +2011-05-03 John Haque <j.eh@mchsi.com> + + * fileop.c, record.awk, testrecord.sh: New files. + * steps: Updated. + 2011-05-02 John Haque <j.eh@mchsi.com> * bindarr.c, dbarray.awk, testdbarray.awk: New files. diff --git a/extension/bindarr.c b/extension/bindarr.c index f500b748..9f8e090c 100644 --- a/extension/bindarr.c +++ b/extension/bindarr.c @@ -64,7 +64,8 @@ static afunc_t bind_array_func[] = { }; enum { INIT, FINI, COUNT, EXISTS, LOOKUP, - STORE, DELETE, CLEAR, FETCHALL }; + STORE, DELETE, CLEAR, FETCHALL +}; static const char *const bfn[] = { "init", "fini", "count", "exists", "lookup", @@ -121,8 +122,9 @@ static NODE ** bind_array_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED) { NODE *xn = symbol->xarray; + (void) xn->aclear(xn, NULL); (void) array_func_call(symbol, NULL, CLEAR); - return xn->aclear(xn, NULL); + return NULL; } /* bind_array_remove --- if subs is already in the table, remove it. */ @@ -131,8 +133,9 @@ static NODE ** bind_array_remove(NODE *symbol, NODE *subs) { NODE *xn = symbol->xarray; + (void) xn->aremove(xn, subs); (void) array_func_call(symbol, subs, DELETE); - return xn->aremove(xn, subs); + return NULL; } /* bind_array_store --- update the value for the SUBS */ @@ -181,10 +184,12 @@ array_func_call(NODE *symbol, NODE *arg1, int fi) force_number(retval); ret = get_number_si(retval); unref(retval); - if (ret < 0) - fatal(ERRNO_node->var_value->stlen > 0 ? - _("%s"), ERRNO_node->var_value->stptr : - _("unknown reason")); + if (ret < 0) { + if (ERRNO_node->var_value->stlen > 0) + fatal(_("%s"), ERRNO_node->var_value->stptr); + else + fatal(_("unknown reason")); + } return ret; } @@ -199,6 +204,9 @@ do_bind_array(int nargs) char *aname; symbol = get_array_argument(0, FALSE); + if (symbol->array_funcs == bind_array_func) + fatal(_("bind_array: array `%s' already bound"), array_vname(symbol)); + assoc_clear(symbol); emalloc(aq, array_t *, sizeof(array_t), "do_bind_array"); diff --git a/extension/fileop.c b/extension/fileop.c new file mode 100644 index 00000000..947c683b --- /dev/null +++ b/extension/fileop.c @@ -0,0 +1,394 @@ +/* + * fileop.c -- Builtin functions for binary I/O and other interfaces to + * the filesystem. + */ + +/* + * Copyright (C) 2012 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Programming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "awk.h" + +int plugin_is_GPL_compatible; + +typedef struct file_struct { + struct file_struct *next; + FILE *fp; + int flags; + char path[1]; +} file_t; + +static file_t *files; +static file_t *file_open(const char *builtin_name, int nargs, int do_open); +static int mode2flags(const char *mode); + +/* do_fread --- read from file */ + +static NODE * +do_fread(int nargs) +{ + NODE *arg; + size_t rlen, count; + file_t *f; + char *rbuf; + + f = file_open("fread", nargs, TRUE); + + arg = get_scalar_argument(2, FALSE); + force_number(arg); + rlen = get_number_ui(arg); + + emalloc(rbuf, char *, rlen + 2, "do_fread"); + if ((count = fread(rbuf, 1, rlen, f->fp)) < rlen) { + if (! feof(f->fp)) + update_ERRNO(); + } + return make_str_node(rbuf, count, ALREADY_MALLOCED); +} + +/* do_fwrite --- write to file */ + +static NODE * +do_fwrite(int nargs) +{ + NODE *arg; + file_t *f; + size_t count = 0; + + f = file_open("fwrite", nargs, TRUE); + + arg = get_scalar_argument(2, FALSE); + force_string(arg); + if (arg->stlen > 0) { + count = fwrite(arg->stptr, 1, arg->stlen, f->fp); + if (count < arg->stlen) + update_ERRNO(); + } + return make_number(count); +} + +/* do_fseek --- set the file position indicator */ + +static NODE * +do_fseek(int nargs) +{ + NODE *arg; + long offset; + file_t *f; + int whence = 0, ret = 0; + + f = file_open("fseek", nargs, TRUE); + + arg = get_scalar_argument(2, FALSE); + force_number(arg); + offset = get_number_si(arg); + + arg = get_scalar_argument(3, FALSE); + force_string(arg); + if (strcasecmp(arg->stptr, "SEEK_SET") == 0) + whence = SEEK_SET; + else if (strcasecmp(arg->stptr, "SEEK_CUR") == 0) + whence = SEEK_CUR; + else if (strcasecmp(arg->stptr, "SEEK_END") == 0) + whence = SEEK_END; + else + fatal(_("fseek: `%.*s' is not a valid 4th argument"), + (int) arg->stlen, arg->stptr); + + if (fseek(f->fp, offset, whence) < 0) { + update_ERRNO(); + ret = -1; + } + return make_number(ret); +} + +/* do_ftruncate --- truncate the file to a specified length */ + +static NODE * +do_ftruncate(int nargs) +{ + NODE *arg; + file_t *f; + off_t len; + int ret = 0; + + f = file_open("ftruncate", nargs, TRUE); + arg = get_scalar_argument(2, FALSE); + force_number(arg); + len = (off_t) get_number_si(arg); + if (ftruncate(fileno(f->fp), len) < 0) { + update_ERRNO(); + ret = -1; + } + return make_number(ret); +} + +/* do_unlink --- delete the name from the filesystem */ + +static NODE * +do_unlink(int nargs) +{ + NODE *file; + int ret = 0; + + file = get_scalar_argument(0, FALSE); + force_string(file); + if (file->stlen == 0) + fatal(_("unlink: filename has empty string value")); + if (unlink(file->stptr) < 0) { + update_ERRNO(); + ret = -1; + } + return make_number(ret); +} + +/* do_flush --- flush buffered data to file */ + +static NODE * +do_flush(int nargs) +{ + file_t *f; + int status = -1; + + f = file_open("flush", nargs, FALSE); + if (f != NULL) { + status = fflush(f->fp); + if (status != 0) + update_ERRNO(); + } + return make_number(status); +} + +/* do_fclose --- close an open file */ + +static NODE * +do_fclose(int nargs) +{ + file_t *f; + int status = -1; + + f = file_open("fclose", nargs, FALSE); + if (f != NULL) { + status = fclose(f->fp); + if (status != 0) + update_ERRNO(); + assert(files == f); + files = f->next; + efree(f); + } + return make_number(status); +} + +/* do_filesize --- return the size of the file */ + +static NODE * +do_filesize(int nargs) +{ + NODE *file; + struct stat sbuf; + AWKNUM d = -1.0; + + file = get_scalar_argument(0, FALSE); + force_string(file); + if (file->stlen == 0) + fatal(_("filesize: filename has empty string value")); + + if (stat(file->stptr, & sbuf) < 0) { + update_ERRNO(); + goto ferror; + } + if ((sbuf.st_mode & S_IFMT) != S_IFREG) { + errno = EINVAL; + update_ERRNO(); + goto ferror; + } + d = sbuf.st_size; + +ferror: + return make_number(d); +} + +/* do_file_exists --- check if path exists in the filesystem */ + +static NODE * +do_file_exists(int nargs) +{ + NODE *file; + struct stat sbuf; + int ret = 1; + + file = get_scalar_argument(0, FALSE); + force_string(file); + if (file->stlen == 0) + fatal(_("file_exists: filename has empty string value")); + + if (stat(file->stptr, & sbuf) < 0) { + if (errno != ENOENT) + update_ERRNO(); + ret = 0; + } + return make_number(ret); +} + + +/* file_open --- open a file or find an already opened file */ + +static file_t * +file_open(const char *builtin_name, int nargs, int do_open) +{ + NODE *file, *mode; + file_t *f, *prev; + FILE *fp; + int flags; + char *path; + + if (nargs < 2) + cant_happen(); + + file = get_scalar_argument(0, FALSE); + force_string(file); + mode = get_scalar_argument(1, TRUE); + force_string(mode); + + if (file->stlen == 0) + fatal(_("%s: filename has empty string value"), builtin_name); + if (mode->stlen == 0) + fatal(_("%s: mode has empty string value"), builtin_name); + + flags = mode2flags(mode->stptr); + if (flags < 0) + fatal(_("%s: invalid mode `%.*s'"), builtin_name, + (int) mode->stlen, mode->stptr); + + path = file->stptr; + for (prev = NULL, f = files; f != NULL; prev = f, f = f->next) { + if (strcmp(f->path, path) == 0 && f->flags == flags) { + /* Move to the head of the list */ + if (prev != NULL) { + prev->next = f->next; + f->next = files; + files = f; + } + return f; + } + } + + if (! do_open) { + if (do_lint) + lintwarn(_("%s: `%.*s' is not an open file"), + builtin_name, (int) file->stlen, file->stptr); + return NULL; + } + + fp = fopen(path, mode->stptr); + if (fp == NULL) + fatal(_("%s: cannot open file `%.*s'"), + builtin_name, (int) file->stlen, file->stptr); + + os_close_on_exec(fileno(fp), path, "", ""); + + emalloc(f, file_t *, sizeof(file_t) + file->stlen + 1, "file_open"); + memcpy(f->path, path, file->stlen + 1); + f->fp = fp; + f->flags = flags; + f->next = files; + files = f; + return f; +} + + +/* + * mode2flags --- convert a string mode to an integer flag; + * modified from str2mode in io.c. + */ + +static int +mode2flags(const char *mode) +{ + int ret = -1; + const char *second; + + if (mode == NULL || mode[0] == '\0') + return -1; + + second = & mode[1]; + + if (*second == 'b') + second++; + + switch(mode[0]) { + case 'r': + ret = O_RDONLY; + if (*second == '+' || *second == 'w') + ret = O_RDWR; + break; + + case 'w': + ret = O_WRONLY|O_CREAT|O_TRUNC; + if (*second == '+' || *second == 'r') + ret = O_RDWR|O_CREAT|O_TRUNC; + break; + + case 'a': + ret = O_WRONLY|O_APPEND|O_CREAT; + if (*second == '+') + ret = O_RDWR|O_APPEND|O_CREAT; + break; + + default: + ret = -1; + } + if (ret != -1 && strchr(mode, 'b') != NULL) + ret |= O_BINARY; + return ret; +} + + +/* dlload --- load new builtins in this library */ + +NODE * +dlload(NODE *tree, void *dl) +{ + make_builtin("fseek", do_fseek, 4); + make_builtin("fread", do_fread, 3); + make_builtin("fwrite", do_fwrite, 3); + make_builtin("flush", do_flush, 2); + make_builtin("filesize", do_filesize, 1); + make_builtin("file_exists", do_file_exists, 1); + make_builtin("fclose", do_fclose, 2); + make_builtin("ftruncate", do_ftruncate, 3); + make_builtin("unlink", do_unlink, 1); + return make_number((AWKNUM) 0); +} + + +/* dlunload --- routine called when exiting */ + +void +dlunload() +{ + file_t *f; + for (f = files; f != NULL; f = f->next) { + if (f->fp != NULL) { + fclose(f->fp); + f->fp = NULL; + } + } +} diff --git a/extension/record.awk b/extension/record.awk new file mode 100644 index 00000000..18a3ce48 --- /dev/null +++ b/extension/record.awk @@ -0,0 +1,252 @@ +# record.awk -- represent fixed-length records in a file as an array. +# Each element in the array corresponds to a record in the file. +# The records are numbered starting from 1, and each record read in +# from the file is cached. If opened using mode "r+", +# changes to the array are reflected in the file immediately i.e. +# writing to an element writes the data into the file. +# +# Usage: +# record(r, path [, reclen [, mode]]) +# r -- array to bind +# path -- filename +# reclen -- length of each record +# mode -- "r" for reading (default), "r+" for reading and writing +# +# With reclen <= 0, entire file is treated as one record #1. +# +# record(r, "data.in", 80, "r+") +# r[10] = r[1] +# for (i = 1; i in r; i++) +# print r[i] +# delete r[1] +# +# See Also: testrecord.sh +# +# +# TODO: +# * implement deferred writing +# * limit memory usage for read cache +# * use fixed size buffer when deleting a record +# + +BEGIN { + extension("fileop.so") + extension("bindarr.so") +} + +# _record_count --- return the number of records in file + +function _record_count(symbol, rd) +{ + if (! ("rectot" in rd)) + rd["rectot"] = ("reclen" in rd) ? + int(filesize(rd["path"]) / rd["reclen"]) : 1 + return rd["rectot"] +} + +# _record_exists --- check if record exists + +function _record_exists(symbol, rd, recnum, + path, mode, reclen, rectot) +{ + path = rd["path"] + reclen = ("reclen" in rd) ? rd["reclen"] : filesize(path) + mode = rd["mode"] + rectot = _record_count(symbol, rd) + + recnum = int(recnum) + if (recnum <= 0 || recnum > rectot) + return 0 + + if (! (recnum in symbol)) { + fseek(path, mode, (recnum - 1) * reclen, "SEEK_SET") + symbol[recnum] = fread(path, mode, reclen) + } + return 0 +} + +# _record_lookup --- lookup a record + +function _record_lookup(symbol, rd, recnum, + path, mode, reclen, rectot) +{ + path = rd["path"] + reclen = ("reclen" in rd) ? rd["reclen"] : filesize(path) + mode = rd["mode"] + rectot = _record_count(symbol, rd) + + recnum = int(recnum) + if (recnum <= 0 || recnum > rectot) { + ERRNO = sprintf("record: %s: reference to non-existent record #%d", path, recnum) + return -1 + } + + if (! (recnum in symbol)) { + fseek(path, mode, (recnum - 1) * reclen, "SEEK_SET") + symbol[recnum] = fread(path, mode, reclen) + } + return 0 +} + +# _record_clear --- remove all records + +function _record_clear(symbol, rd, + path, mode) +{ + path = rd["path"] + mode = rd["mode"] + if (mode == "r") { + ERRNO = sprintf("record: cannot delete record from file `%s' opened only for reading", path) + return -1 + } + ftruncate(path, mode, 0) + delete rd["reclen"] + return 0 +} + +# _record_delete --- delete a record from the file + +function _record_delete(symbol, rd, recnum, + path, mode, reclen, rectot) +{ + path = rd["path"] + reclen = ("reclen" in rd) ? rd["reclen"] : filesize(path) + mode = rd["mode"] + + if (mode == "r") { + ERRNO = sprintf("record: cannot delete record from file `%s' opened only for reading", path) + return -1 + } + + recnum = int(recnum) + if (! ("reclen" in rd)) { + # entire file is record #1 + ftruncate(path, mode, 0) + delete rd["reclen"] + return 0 + } + + sz = filesize(path) + rectot = int(sz / reclen) + + recstart = (recnum - 1) * reclen + off = sz - (recstart + reclen) + + fseek(path, mode, -off, "SEEK_END") + tmp = fread(path, mode, off) + fseek(path, mode, recstart, "SEEK_SET") + if (fwrite(path, mode, tmp) != length(tmp)) + return -1 + flush(path, mode) + ftruncate(path, mode, sz - reclen) + + rd["rectot"] = rectot - 1 + for (i = recnum + 1; i <= rectot; i++) { + if (i in symbol) { + symbol[i - 1] = symbol[i] + delete symbol[i] + } + } + return 0 +} + +# _record_store --- write a record to file + +function _record_store(symbol, rd, recnum, + path, mode, reclen, val) +{ + path = rd["path"] + reclen = ("reclen" in rd) ? rd["reclen"] : filesize(path) + mode = rd["mode"] + + if (mode == "r") { + ERRNO = sprintf("record: cannot write to file `%s' opened only for reading", path) + return -1 + } + + recnum = int(recnum) + val = symbol[recnum] + if (! ("reclen" in rd)) { + # the entire file is record #1 + if (reclen != 0) + ftruncate(path, mode, 0) + } else if (length(val) != reclen) { + ERRNO = sprintf("record: %s: invalid length for record #%d", path, recnum) + return -1 + } + + fseek(path, mode, (recnum - 1) * reclen, "SEEK_SET") + if (fwrite(path, mode, val) != length(val)) + return -1 + flush(path, mode) + return 0 +} + +# _record_fetchall --- retrieve all the records + +function _record_fetchall(symbol, rd, + path, mode, reclen, rectot, recnum) +{ + path = rd["path"] + reclen = ("reclen" in rd) ? rd["reclen"] : filesize(path) + mode = rd["mode"] + rectot = _record_count(symbol, rd) + + if (rd["loaded"]) + return 0 + for (recnum = 1; recnum <= rectot; recnum++) { + if (! (recnum in symbol)) { + fseek(path, mode, (recnum - 1) * reclen, "SEEK_SET") + symbol[recnum] = fread(path, mode, reclen) + } + } + rd["loaded"] = 1 + return 0 +} + +# _record_init --- initialization routine + +function _record_init(symbol, rd) +{ + if (! file_exists(rd["path"])) { + ERRNO = sprintf("record: cannot open file `%s' for reading", rd["path"]) + return -1 + } + return 0 +} + +# _record_fini --- cleanup routine + +function _record_fini(symbol, rd) +{ + fclose(rd["path"], rd["mode"]) +} + +# record --- bind an array to a file with fixed-length records + +function record(array, path, reclen, mode, rd) +{ + if (path == "") { + print "fatal: record: empty string value for filename" > "/dev/stderr" + exit(1) + } + + # register our array routines + rd["init"] = "_record_init" + rd["fini"] = "_record_fini" + rd["count"] = "_record_count" + rd["exists"] = "_record_exists" + rd["lookup"] = "_record_lookup" + rd["delete"] = "_record_delete" + rd["store"] = "_record_store" + rd["clear"] = "_record_clear" + rd["fetchall"] = "_record_fetchall" + + rd["path"] = path + if (reclen > 0) + rd["reclen"] = reclen + rd["mode"] = mode == "r+" ? "r+" : "r" + + delete array + bind_array(array, rd) +} diff --git a/extension/spec_array.c b/extension/spec_array.c index b2713002..78b24018 100644 --- a/extension/spec_array.c +++ b/extension/spec_array.c @@ -130,13 +130,43 @@ deferred_array_##F(NODE *symbol, NODE *subs) \ DEF_ARR(exists) DEF_ARR(lookup) -DEF_ARR(clear) -DEF_ARR(remove) DEF_ARR(list) DEF_ARR(copy) #undef DEF_ARR +/* deferred_array_remove --- remove the index from the array */ + +static NODE ** +deferred_array_remove(NODE *symbol, NODE *subs) +{ + array_t *av = (array_t *) symbol->xarray; + + (void) SUPER(aremove)(symbol, subs); + if (av) { + symbol->xarray = NULL; + (*av->load_func)(symbol, av->data); + symbol->xarray = (NODE *) av; + } + return NULL; +} + +/* deferred_array_clear --- flush all the values in symbol[] */ + +static NODE ** +deferred_array_clear(NODE *symbol, NODE *subs) +{ + array_t *av = (array_t *) symbol->xarray; + + (void) SUPER(aclear)(symbol, subs); + if (av) { + symbol->xarray = NULL; + (*av->load_func)(symbol, av->data); + symbol->xarray = (NODE *) av; + } + return NULL; +} + /* * dyn_array --- array with triggers for reading and writing @@ -272,12 +302,13 @@ dyn_array_remove(NODE *symbol, NODE *subs) { array_t *av = (array_t *) symbol->xarray; + (void) SUPER(aremove)(symbol, subs); if (av && av->store_func) { symbol->xarray = NULL; (*av->store_func)(symbol, subs, NULL, av->data); symbol->xarray = (NODE *) av; } - return SUPER(aremove)(symbol, subs); + return NULL; } /* dyn_array_clear --- flush all the values in symbol[] */ @@ -287,12 +318,13 @@ dyn_array_clear(NODE *symbol, NODE *subs) { array_t *av = (array_t *) symbol->xarray; + (void) SUPER(aclear)(symbol, subs); if (av && av->store_func) { symbol->xarray = NULL; (*av->store_func)(symbol, NULL, NULL, av->data); symbol->xarray = (NODE *) av; } - return SUPER(aclear)(symbol, subs); + return NULL; } /* dyn_array_list --- return a list of items in symbol[] */ diff --git a/extension/steps b/extension/steps index 1abab9d2..9e0cc00e 100755 --- a/extension/steps +++ b/extension/steps @@ -16,6 +16,7 @@ gcc -fPIC -shared -Wall -DHAVE_CONFIG_H -c -O -g -I.. rwarray.c gcc -fPIC -shared -Wall -DHAVE_CONFIG_H -c -O -g -I.. spec_array.c gcc -fPIC -shared -Wall -DHAVE_CONFIG_H -c -O -g -I.. sparr.c gcc -fPIC -shared -Wall -DHAVE_CONFIG_H -c -O -g -I.. bindarr.c +gcc -fPIC -shared -Wall -DHAVE_CONFIG_H -c -O -g -I.. fileop.c ld -o dl.so -shared dl.o ld -o filefuncs.so -shared filefuncs.o ld -o fork.so -shared fork.o @@ -26,3 +27,4 @@ ld -o testarg.so -shared testarg.o ld -o rwarray.so -shared rwarray.o ld -o sparr.so -shared sparr.o spec_array.o ld -o bindarr.so -shared bindarr.o +ld -o fileop.so -shared fileop.o diff --git a/extension/testrecord.sh b/extension/testrecord.sh new file mode 100755 index 00000000..61d1ba76 --- /dev/null +++ b/extension/testrecord.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +AWK=../gawk +$AWK 'BEGIN { OFS = ORS = ""; for (j = 1; j <= 4; j++) for (i = 1; i <= 16; i++) print j}' > _rec.in +for i in 1 2 3 4 5 +do +$AWK -f record.awk -vinfile='_rec.in' -e 'BEGIN { +reclen = 16 +record(r, infile, reclen, "r+") +FIELDWIDTHS="8 4 4" +for (i = 1; i in r; i++) { + $0 = r[i] + print $1 +} +delete r[1] +unbind_array(r) +print "--" }' +done +rm -f _rec.in |