diff options
-rw-r--r-- | ChangeLog | 55 | ||||
-rw-r--r-- | awk.h | 6 | ||||
-rw-r--r-- | extension/ChangeLog | 7 | ||||
-rw-r--r-- | extension/Makefile.am | 8 | ||||
-rw-r--r-- | extension/Makefile.in | 33 | ||||
-rw-r--r-- | extension/readdir_test.c | 336 | ||||
-rw-r--r-- | field.c | 121 | ||||
-rw-r--r-- | gawkapi.h | 17 | ||||
-rw-r--r-- | io.c | 23 | ||||
-rw-r--r-- | main.c | 17 | ||||
-rw-r--r-- | test/ChangeLog | 7 | ||||
-rw-r--r-- | test/Makefile.am | 8 | ||||
-rw-r--r-- | test/Makefile.in | 8 |
13 files changed, 595 insertions, 51 deletions
@@ -1,3 +1,58 @@ +2017-03-06 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * field.c (normal_parse_field): Renamed from save_parse_field to reflect + better its purpose. Added a comment to explain more clearly what's + going on. + (set_record, set_parser): Rename save_parse_field to normal_parse_field. + +2017-03-06 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * gawkapi.h (awk_input_buf_t): Remove field_width array and instead + add it as a 6th argument to the get_record function. This should + not break existing code, since it's fine to ignore the additional + argument. Document the behavior of the field_width argument. + * io.c (inrec): Pass pointer to field_width array to get_a_record, + and then hand it off to set_record. + (do_getline_redir): If not reading into a variable, pass pointer to + field_width array to get_a_record and then hand it off to set_record. + (do_getline): Ditto. + (get_a_record): Add a 4th field_width argument to pass through to + the API get_record method. + +2017-03-05 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * awk.h (set_record): Add a new argument containing a field-width + array returned by an API parser. + (field_sep_type): Add new enum value Using_API. + (current_field_sep_str): Declare new function. + * field.c (save_parse_field): New static variable to save the + parse_field value in cases where it's overridden by API parsing. + (api_fw): New static variable to hold pointer to API parser fieldwidth + array. + (set_record): Add new field-width array argument. If present, API + parsing will override the default parsing mechanism. + (api_parse_field): New field parser using field widths supplied by the + API. This is very similar to the existing fw_parse_field function. + (get_field): Fix typo in comment. + (set_parser): New function to set default parser and check whether + there's an API parser override in effect. Update PROCINFO["FS"] if + something has changed. + (set_FIELDWIDTHS): Use set_parser and stop updating PROCINFO["FS"]. + (set_FS): Ditto. + (set_FPAT): Ditto. + (current_field_sep): Return Using_API when using the API field parsing + widths. + (current_field_sep_str): New function to return the proper string + value for PROCINFO["FS"]. + * gawkapi.h (awk_input_buf_t): Add field_width array to enable the + parser get_record function to supply field widths to override the + default gawk field parsing mechanism. + * io.c (inrec): Pass iop->public.field_width to set_record as the + 3rd argument to enable API field parsing overrides. + (do_getline_redir, do_getline): Ditto. + * main.c (load_procinfo): Use new current_field_sep_str function + instead of switching on the return value from current_field_sep. + 2017-02-23 Arnold D. Robbins <arnold@skeeve.com> * awk.h (boolval): Return bool instead of int. @@ -1510,7 +1510,7 @@ extern NODE *get_actual_argument(NODE *, int, bool); #endif /* field.c */ extern void init_fields(void); -extern void set_record(const char *buf, int cnt); +extern void set_record(const char *buf, int cnt, const int *); extern void reset_record(void); extern void rebuild_record(void); extern void set_NF(void); @@ -1527,9 +1527,11 @@ extern void update_PROCINFO_num(const char *subscript, AWKNUM val); typedef enum { Using_FS, Using_FIELDWIDTHS, - Using_FPAT + Using_FPAT, + Using_API } field_sep_type; extern field_sep_type current_field_sep(void); +extern const char *current_field_sep_str(void); /* gawkapi.c: */ extern gawk_api_t api_impl; diff --git a/extension/ChangeLog b/extension/ChangeLog index f1622b69..5d9a194e 100644 --- a/extension/ChangeLog +++ b/extension/ChangeLog @@ -10,6 +10,13 @@ installed, automake cannot use the final destination directory to determine -rpath by itself. The value doesn't matter. +2017-03-06 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * readdir_test.c: Test extension using new get_record field_width + parsing feature. + * Makefile.am (noinst_LTLIBRARIES): Add readdir_test.la. + (readdir_test_la_*): Configure building of new extension library. + 2017-01-21 Eli Zaretskii <eliz@gnu.org> * testext.c (getuid) [__MINGW32__]: New function, mirrors what diff --git a/extension/Makefile.am b/extension/Makefile.am index 185bc795..6ea16f5d 100644 --- a/extension/Makefile.am +++ b/extension/Makefile.am @@ -48,6 +48,7 @@ pkgextension_LTLIBRARIES = \ time.la noinst_LTLIBRARIES = \ + readdir_test.la \ testext.la MY_MODULE_FLAGS = -module -avoid-version -no-undefined @@ -106,6 +107,13 @@ testext_la_SOURCES = testext.c testext_la_LDFLAGS = $(MY_MODULE_FLAGS) -rpath /foo testext_la_LIBADD = $(MY_LIBS) +# N.B. Because we are not installing readdir_test, we must specify -rpath in +# LDFLAGS to get automake to build a shared library, since it needs +# an installation path. +readdir_test_la_SOURCES = readdir_test.c +readdir_test_la_LDFLAGS = $(MY_MODULE_FLAGS) -rpath /foo +readdir_test_la_LIBADD = $(MY_LIBS) + install-data-hook: for i in $(pkgextension_LTLIBRARIES) ; do \ $(RM) $(DESTDIR)$(pkgextensiondir)/$$i ; \ diff --git a/extension/Makefile.in b/extension/Makefile.in index 6557693a..c0e2676b 100644 --- a/extension/Makefile.in +++ b/extension/Makefile.in @@ -199,6 +199,13 @@ readdir_la_OBJECTS = $(am_readdir_la_OBJECTS) readdir_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(readdir_la_LDFLAGS) $(LDFLAGS) -o $@ +readdir_test_la_DEPENDENCIES = $(am__DEPENDENCIES_1) +am_readdir_test_la_OBJECTS = readdir_test.lo +readdir_test_la_OBJECTS = $(am_readdir_test_la_OBJECTS) +readdir_test_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(readdir_test_la_LDFLAGS) $(LDFLAGS) \ + -o $@ readfile_la_DEPENDENCIES = $(am__DEPENDENCIES_1) am_readfile_la_OBJECTS = readfile.lo readfile_la_OBJECTS = $(am_readfile_la_OBJECTS) @@ -271,14 +278,16 @@ am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = SOURCES = $(filefuncs_la_SOURCES) $(fnmatch_la_SOURCES) \ $(fork_la_SOURCES) $(inplace_la_SOURCES) $(ordchr_la_SOURCES) \ - $(readdir_la_SOURCES) $(readfile_la_SOURCES) \ - $(revoutput_la_SOURCES) $(revtwoway_la_SOURCES) \ - $(rwarray_la_SOURCES) $(testext_la_SOURCES) $(time_la_SOURCES) + $(readdir_la_SOURCES) $(readdir_test_la_SOURCES) \ + $(readfile_la_SOURCES) $(revoutput_la_SOURCES) \ + $(revtwoway_la_SOURCES) $(rwarray_la_SOURCES) \ + $(testext_la_SOURCES) $(time_la_SOURCES) DIST_SOURCES = $(filefuncs_la_SOURCES) $(fnmatch_la_SOURCES) \ $(fork_la_SOURCES) $(inplace_la_SOURCES) $(ordchr_la_SOURCES) \ - $(readdir_la_SOURCES) $(readfile_la_SOURCES) \ - $(revoutput_la_SOURCES) $(revtwoway_la_SOURCES) \ - $(rwarray_la_SOURCES) $(testext_la_SOURCES) $(time_la_SOURCES) + $(readdir_la_SOURCES) $(readdir_test_la_SOURCES) \ + $(readfile_la_SOURCES) $(revoutput_la_SOURCES) \ + $(revtwoway_la_SOURCES) $(rwarray_la_SOURCES) \ + $(testext_la_SOURCES) $(time_la_SOURCES) RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \ ctags-recursive dvi-recursive html-recursive info-recursive \ install-data-recursive install-dvi-recursive \ @@ -520,6 +529,7 @@ pkgextension_LTLIBRARIES = \ time.la noinst_LTLIBRARIES = \ + readdir_test.la \ testext.la MY_MODULE_FLAGS = -module -avoid-version -no-undefined @@ -567,6 +577,13 @@ time_la_LIBADD = $(MY_LIBS) testext_la_SOURCES = testext.c testext_la_LDFLAGS = $(MY_MODULE_FLAGS) -rpath /foo testext_la_LIBADD = $(MY_LIBS) + +# N.B. Because we are not installing readdir_test, we must specify -rpath in +# LDFLAGS to get automake to build a shared library, since it needs +# an installation path. +readdir_test_la_SOURCES = readdir_test.c +readdir_test_la_LDFLAGS = $(MY_MODULE_FLAGS) -rpath /foo +readdir_test_la_LIBADD = $(MY_LIBS) EXTRA_DIST = build-aux/config.rpath \ ChangeLog \ ChangeLog.0 \ @@ -702,6 +719,9 @@ ordchr.la: $(ordchr_la_OBJECTS) $(ordchr_la_DEPENDENCIES) $(EXTRA_ordchr_la_DEPE readdir.la: $(readdir_la_OBJECTS) $(readdir_la_DEPENDENCIES) $(EXTRA_readdir_la_DEPENDENCIES) $(AM_V_CCLD)$(readdir_la_LINK) -rpath $(pkgextensiondir) $(readdir_la_OBJECTS) $(readdir_la_LIBADD) $(LIBS) +readdir_test.la: $(readdir_test_la_OBJECTS) $(readdir_test_la_DEPENDENCIES) $(EXTRA_readdir_test_la_DEPENDENCIES) + $(AM_V_CCLD)$(readdir_test_la_LINK) $(readdir_test_la_OBJECTS) $(readdir_test_la_LIBADD) $(LIBS) + readfile.la: $(readfile_la_OBJECTS) $(readfile_la_DEPENDENCIES) $(EXTRA_readfile_la_DEPENDENCIES) $(AM_V_CCLD)$(readfile_la_LINK) -rpath $(pkgextensiondir) $(readfile_la_OBJECTS) $(readfile_la_LIBADD) $(LIBS) @@ -733,6 +753,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/inplace.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordchr.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readdir.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readdir_test.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readfile.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/revoutput.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/revtwoway.Plo@am__quote@ diff --git a/extension/readdir_test.c b/extension/readdir_test.c new file mode 100644 index 00000000..cc23081e --- /dev/null +++ b/extension/readdir_test.c @@ -0,0 +1,336 @@ +/* + * readdir.c --- Provide an input parser to read directories + * + * Arnold Robbins + * arnold@skeeve.com + * Written 7/2012 + * + * Andrew Schorr and Arnold Robbins: further fixes 8/2012. + * Simplified 11/2012. + */ + +/* + * Copyright (C) 2012-2014 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Programming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#define _BSD_SOURCE +#include <stdio.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sys/types.h> +#include <sys/stat.h> + +#ifdef HAVE_LIMITS_H +#include <limits.h> +#endif + +#ifdef HAVE_DIRENT_H +#include <dirent.h> +#else +#error Cannot compile the dirent extension on this system! +#endif + +#ifdef __MINGW32__ +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#endif + +#include "gawkapi.h" + +#include "gawkdirfd.h" + +#include "gettext.h" +#define _(msgid) gettext(msgid) +#define N_(msgid) msgid + +#ifndef PATH_MAX +#define PATH_MAX 1024 /* a good guess */ +#endif + +static const gawk_api_t *api; /* for convenience macros to work */ +static awk_ext_id_t *ext_id; +static const char *ext_version = "readdir extension: version 1.0"; + +static awk_bool_t init_readdir(void); +static awk_bool_t (*init_func)(void) = init_readdir; + +int plugin_is_GPL_compatible; + +/* data type for the opaque pointer: */ + +typedef struct open_directory { + DIR *dp; + char *buf; + int field_width[7]; +} open_directory_t; + +/* ftype --- return type of file as a single character string */ + +static const char * +ftype(struct dirent *entry, const char *dirname) +{ +#ifdef DT_BLK + (void) dirname; /* silence warnings */ + switch (entry->d_type) { + case DT_BLK: return "b"; + case DT_CHR: return "c"; + case DT_DIR: return "d"; + case DT_FIFO: return "p"; + case DT_LNK: return "l"; + case DT_REG: return "f"; + case DT_SOCK: return "s"; + default: + case DT_UNKNOWN: return "u"; + } +#else + char fname[PATH_MAX]; + struct stat sbuf; + + strcpy(fname, dirname); + strcat(fname, "/"); + strcat(fname, entry->d_name); + if (stat(fname, &sbuf) == 0) { + if (S_ISBLK(sbuf.st_mode)) + return "b"; + if (S_ISCHR(sbuf.st_mode)) + return "c"; + if (S_ISDIR(sbuf.st_mode)) + return "d"; + if (S_ISFIFO(sbuf.st_mode)) + return "p"; + if (S_ISREG(sbuf.st_mode)) + return "f"; +#ifdef S_ISLNK + if (S_ISLNK(sbuf.st_mode)) + return "l"; +#endif +#ifdef S_ISSOCK + if (S_ISSOCK(sbuf.st_mode)) + return "s"; +#endif + } + return "u"; +#endif +} + +/* get_inode --- get the inode of a file */ +static long long +get_inode(struct dirent *entry, const char *dirname) +{ +#ifdef __MINGW32__ + char fname[PATH_MAX]; + HANDLE fh; + BY_HANDLE_FILE_INFORMATION info; + + sprintf(fname, "%s\\%s", dirname, entry->d_name); + fh = CreateFile(fname, 0, 0, NULL, OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS, NULL); + if (fh == INVALID_HANDLE_VALUE) + return 0; + if (GetFileInformationByHandle(fh, &info)) { + long long inode = info.nFileIndexHigh; + + inode <<= 32; + inode += info.nFileIndexLow; + return inode; + } + return 0; +#else + (void) dirname; /* silence warnings */ + return entry->d_ino; +#endif +} + +/* dir_get_record --- get one record at a time out of a directory */ + +static int +dir_get_record(char **out, awk_input_buf_t *iobuf, int *errcode, + char **rt_start, size_t *rt_len, const int **field_width) +{ + DIR *dp; + struct dirent *dirent; + int len, flen; + open_directory_t *the_dir; + const char *ftstr; + unsigned long long ino; + + /* + * The caller sets *errcode to 0, so we should set it only if an + * error occurs. + */ + + if (out == NULL || iobuf == NULL || iobuf->opaque == NULL) + return EOF; + + the_dir = (open_directory_t *) iobuf->opaque; + dp = the_dir->dp; + + /* + * Initialize errno, since readdir does not set it to zero on EOF. + */ + errno = 0; + dirent = readdir(dp); + if (dirent == NULL) { + *errcode = errno; /* in case there was an error */ + return EOF; + } + + ino = get_inode (dirent, iobuf->name); + +#if __MINGW32__ + len = sprintf(the_dir->buf, "%I64u", ino); +#else + len = sprintf(the_dir->buf, "%llu", ino); +#endif + the_dir->field_width[1] = len; + len += (flen = sprintf(the_dir->buf + len, "/%s", dirent->d_name)); + the_dir->field_width[3] = flen-1; + + ftstr = ftype(dirent, iobuf->name); + len += (flen = sprintf(the_dir->buf + len, "/%s", ftstr)); + the_dir->field_width[5] = flen-1; + + *out = the_dir->buf; + + *rt_start = NULL; + *rt_len = 0; /* set RT to "" */ + if (field_width) + *field_width = the_dir->field_width; + return len; +} + +/* dir_close --- close up when done */ + +static void +dir_close(awk_input_buf_t *iobuf) +{ + open_directory_t *the_dir; + + if (iobuf == NULL || iobuf->opaque == NULL) + return; + + the_dir = (open_directory_t *) iobuf->opaque; + + closedir(the_dir->dp); + gawk_free(the_dir->buf); + gawk_free(the_dir); + + iobuf->fd = -1; +} + +/* dir_can_take_file --- return true if we want the file */ + +static awk_bool_t +dir_can_take_file(const awk_input_buf_t *iobuf) +{ + if (iobuf == NULL) + return awk_false; + + return (iobuf->fd != INVALID_HANDLE && S_ISDIR(iobuf->sbuf.st_mode)); +} + +/* + * dir_take_control_of --- set up input parser. + * We can assume that dir_can_take_file just returned true, + * and no state has changed since then. + */ + +static awk_bool_t +dir_take_control_of(awk_input_buf_t *iobuf) +{ + DIR *dp; + open_directory_t *the_dir; + size_t size; + + errno = 0; +#ifdef HAVE_FDOPENDIR + dp = fdopendir(iobuf->fd); +#else + dp = opendir(iobuf->name); + if (dp != NULL) + iobuf->fd = dirfd(dp); +#endif + if (dp == NULL) { + warning(ext_id, _("dir_take_control_of: opendir/fdopendir failed: %s"), + strerror(errno)); + update_ERRNO_int(errno); + return awk_false; + } + + emalloc(the_dir, open_directory_t *, sizeof(open_directory_t), "dir_take_control_of"); + the_dir->dp = dp; + /* pre-populate the field_width array with constant values: */ + the_dir->field_width[0] = 0; /* no leading space */ + the_dir->field_width[2] = 1; /* single slash sign separator*/ + the_dir->field_width[4] = 1; /* single slash sign separator*/ + the_dir->field_width[6] = -1; /* terminate it after 3 fields */ + size = sizeof(struct dirent) + 21 /* max digits in inode */ + 2 /* slashes */; + emalloc(the_dir->buf, char *, size, "dir_take_control_of"); + + iobuf->opaque = the_dir; + iobuf->get_record = dir_get_record; + iobuf->close_func = dir_close; + + return awk_true; +} + +static awk_input_parser_t readdir_parser = { + "readdir", + dir_can_take_file, + dir_take_control_of, + NULL +}; + +#ifdef TEST_DUPLICATE +static awk_input_parser_t readdir_parser2 = { + "readdir2", + dir_can_take_file, + dir_take_control_of, + NULL +}; +#endif + +/* init_readdir --- set things ups */ + +static awk_bool_t +init_readdir() +{ + register_input_parser(& readdir_parser); +#ifdef TEST_DUPLICATE + register_input_parser(& readdir_parser2); +#endif + + return awk_true; +} + +static awk_ext_func_t func_table[] = { + { NULL, NULL, 0, 0, awk_false, NULL } +}; + +/* define the dl_load function using the boilerplate macro */ + +dl_load_func(func_table, readdir, "") @@ -40,6 +40,13 @@ typedef void (* Setfunc)(long, char *, long, NODE *); static long (*parse_field)(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); +/* + * N.B. The normal_parse_field function pointer contains the parse_field value + * that should be used except when API field parsing is overriding the default + * field parsing mechanism. + */ +static long (*normal_parse_field)(long, char **, int, NODE *, + Regexp *, Setfunc, NODE *, NODE *, bool); static long re_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); static long def_parse_field(long, char **, int, NODE *, @@ -50,6 +57,9 @@ static long sc_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); static long fw_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); +static long api_parse_field(long, char **, int, NODE *, + Regexp *, Setfunc, NODE *, NODE *, bool); +static const int *api_fw = NULL; static long fpat_parse_field(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool); static void set_element(long num, char * str, long len, NODE *arr); @@ -252,7 +262,7 @@ rebuild_record() * but better correct than fast. */ void -set_record(const char *buf, int cnt) +set_record(const char *buf, int cnt, const int *fw) { NODE *n; static char *databuf; @@ -306,6 +316,20 @@ set_record(const char *buf, int cnt) n->stfmt = STFMT_UNUSED; n->flags = (STRING|STRCUR|USER_INPUT); /* do not set MALLOC */ fields_arr[0] = n; + if (fw != api_fw) { + if ((api_fw = fw) != NULL) { + if (parse_field != api_parse_field) { + parse_field = api_parse_field; + update_PROCINFO_str("FS", "API"); + } + } + else { + if (parse_field != normal_parse_field) { + parse_field = normal_parse_field; + update_PROCINFO_str("FS", current_field_sep_str()); + } + } + } #undef INITIAL_SIZE #undef MAX_SIZE @@ -760,6 +784,49 @@ fw_parse_field(long up_to, /* parse only up to this field number */ return nf; } +/* + * api_parse_field --- field parsing using field widths returned by API parser. + * + * This is called from get_field() via (*parse_field)(). + */ +static long +api_parse_field(long up_to, /* parse only up to this field number */ + char **buf, /* on input: string to parse; on output: point to start next */ + int len, + NODE *fs ATTRIBUTE_UNUSED, + Regexp *rp ATTRIBUTE_UNUSED, + Setfunc set, /* routine to set the value of the parsed field */ + NODE *n, + NODE *dummy ATTRIBUTE_UNUSED, /* sep_arr not needed here: hence dummy */ + bool in_middle ATTRIBUTE_UNUSED) +{ + char *scan = *buf; + long nf = parse_high_water; + char *end = scan + len; + int skiplen; + + if (up_to == UNLIMITED) + nf = 0; + if (len == 0) + return nf; + while (nf < up_to) { + if (((skiplen = api_fw[2*nf]) < 0) || + ((len = api_fw[2*nf+1]) < 0)) { + *buf = end; + return nf; + } + if (skiplen > end - scan) + skiplen = end - scan; + scan += skiplen; + if (len > end - scan) + len = end - scan; + (*set)(++nf, scan, (long) len, n); + scan += len; + } + *buf = scan; + return nf; +} + /* invalidate_field0 --- $0 needs reconstruction */ void @@ -845,7 +912,7 @@ get_field(long requested, Func_ptr *assign) if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen) NF = parse_high_water; else if (parse_field == fpat_parse_field) { - /* FPAT parsing is wierd, isolate the special cases */ + /* FPAT parsing is weird, isolate the special cases */ char *rec_start = fields_arr[0]->stptr; char *rec_end = fields_arr[0]->stptr + fields_arr[0]->stlen; @@ -1057,6 +1124,18 @@ do_patsplit(int nargs) return tmp; } +/* set_parser: update the current (non-API) parser */ + +static void +set_parser(long (*func)(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool)) +{ + normal_parse_field = func; + if (parse_field != api_parse_field && parse_field != func) { + parse_field = func; + update_PROCINFO_str("FS", current_field_sep_str()); + } +} + /* set_FIELDWIDTHS --- handle an assignment to FIELDWIDTHS */ void @@ -1084,7 +1163,7 @@ set_FIELDWIDTHS() if (fields_arr != NULL) (void) get_field(UNLIMITED - 1, 0); - parse_field = fw_parse_field; + set_parser(fw_parse_field); tmp = force_string(FIELDWIDTHS_node->var_value); scan = tmp->stptr; @@ -1134,7 +1213,6 @@ set_FIELDWIDTHS() } FIELDWIDTHS[i+1] = -1; - update_PROCINFO_str("FS", "FIELDWIDTHS"); if (fatal_error) fatal(_("invalid FIELDWIDTHS value, near `%s'"), scan); @@ -1205,7 +1283,7 @@ choose_fs_function: if (! do_traditional && fs->stlen == 0) { static bool warned = false; - parse_field = null_parse_field; + set_parser(null_parse_field); if (do_lint && ! warned) { warned = true; @@ -1214,10 +1292,10 @@ choose_fs_function: } else if (fs->stlen > 1) { if (do_lint_old) warning(_("old awk does not support regexps as value of `FS'")); - parse_field = re_parse_field; + set_parser(re_parse_field); } else if (RS_is_null) { /* we know that fs->stlen <= 1 */ - parse_field = sc_parse_field; + set_parser(sc_parse_field); if (fs->stlen == 1) { if (fs->stptr[0] == ' ') { default_FS = true; @@ -1233,7 +1311,7 @@ choose_fs_function: } } } else { - parse_field = def_parse_field; + set_parser(def_parse_field); if (fs->stlen == 1) { if (fs->stptr[0] == ' ') @@ -1242,7 +1320,7 @@ choose_fs_function: /* same special case */ strcpy(buf, "[\\\\]"); else - parse_field = sc_parse_field; + set_parser(sc_parse_field); } } if (remake_re) { @@ -1254,7 +1332,7 @@ choose_fs_function: FS_re_yes_case = make_regexp(buf, strlen(buf), false, true, true); FS_re_no_case = make_regexp(buf, strlen(buf), true, true, true); FS_regexp = (IGNORECASE ? FS_re_no_case : FS_re_yes_case); - parse_field = re_parse_field; + set_parser(re_parse_field); } else if (parse_field == re_parse_field) { FS_re_yes_case = make_regexp(fs->stptr, fs->stlen, false, true, true); FS_re_no_case = make_regexp(fs->stptr, fs->stlen, true, true, true); @@ -1270,8 +1348,6 @@ choose_fs_function: */ if (fs->stlen == 1 && parse_field == re_parse_field) FS_regexp = FS_re_yes_case; - - update_PROCINFO_str("FS", "FS"); } /* current_field_sep --- return what field separator is */ @@ -1283,10 +1359,27 @@ current_field_sep() return Using_FIELDWIDTHS; else if (parse_field == fpat_parse_field) return Using_FPAT; + else if (parse_field == api_parse_field) + return Using_API; else return Using_FS; } +/* current_field_sep --- return what field separator is */ + +const char * +current_field_sep_str() +{ + if (parse_field == fw_parse_field) + return "FIELDWIDTHS"; + else if (parse_field == fpat_parse_field) + return "FPAT"; + else if (parse_field == api_parse_field) + return "API"; + else + return "FS"; +} + /* update_PROCINFO_str --- update PROCINFO[sub] with string value */ void @@ -1373,7 +1466,7 @@ set_FPAT() set_fpat_function: fpat = force_string(FPAT_node->var_value); - parse_field = fpat_parse_field; + set_parser(fpat_parse_field); if (remake_re) { refree(FPAT_re_yes_case); @@ -1384,8 +1477,6 @@ set_fpat_function: FPAT_re_no_case = make_regexp(fpat->stptr, fpat->stlen, true, true, true); FPAT_regexp = (IGNORECASE ? FPAT_re_no_case : FPAT_re_yes_case); } - - update_PROCINFO_str("FS", "FPAT"); } /* @@ -146,9 +146,24 @@ typedef struct awk_input { * than zero, gawk will automatically update the ERRNO variable based * on the value of *errcode (e.g., setting *errcode = errno should do * the right thing). + * + * If field_width is non-NULL, then its value will be initialized + * to NULL, and the function may set it to point to an array of + * integers supplying field width information to override the default + * gawk field parsing mechanism. The field_width array should have + * at least 2*NF+1 elements, and the value of field_width[2*NF] + * must be negative. The first entry field_width[0] should contain + * the number of bytes to skip before $1; field_width[1] contains + * the number of bytes in $1. Note that these values are specified + * in bytes, not (potentially multi-byte) characters! And note that this + * array will not be copied by gawk; it must persist at least until the + * next call to get_record or close_func. Note that field_width will + * be NULL when getline is assigning the results to a variable, thus + * field parsing is not needed. */ int (*get_record)(char **out, struct awk_input *iobuf, int *errcode, - char **rt_start, size_t *rt_len); + char **rt_start, size_t *rt_len, + const int **field_width); /* * No argument prototype on read_func to allow for older systems @@ -287,7 +287,7 @@ static RECVALUE rsrescan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state); static RECVALUE (*matchrec)(IOBUF *iop, struct recmatch *recm, SCANSTATE *state) = rs1scan; -static int get_a_record(char **out, IOBUF *iop, int *errcode); +static int get_a_record(char **out, IOBUF *iop, int *errcode, const int **field_width); static void free_rp(struct redirect *rp); @@ -590,13 +590,14 @@ inrec(IOBUF *iop, int *errcode) char *begin; int cnt; bool retval = true; + const int *field_width = NULL; if (at_eof(iop) && no_data_left(iop)) cnt = EOF; else if ((iop->flag & IOP_CLOSED) != 0) cnt = EOF; else - cnt = get_a_record(& begin, iop, errcode); + cnt = get_a_record(& begin, iop, errcode, & field_width); /* Note that get_a_record may return -2 when I/O would block */ if (cnt < 0) { @@ -604,7 +605,7 @@ inrec(IOBUF *iop, int *errcode) } else { INCREMENT_REC(NR); INCREMENT_REC(FNR); - set_record(begin, cnt); + set_record(begin, cnt, field_width); if (*errcode > 0) retval = false; } @@ -2618,6 +2619,7 @@ do_getline_redir(int into_variable, enum redirval redirtype) NODE *redir_exp = NULL; NODE **lhs = NULL; int redir_error = 0; + const int *field_width = NULL; if (into_variable) lhs = POP_ADDRESS(); @@ -2646,7 +2648,7 @@ do_getline_redir(int into_variable, enum redirval redirtype) return make_number((AWKNUM) 0.0); errcode = 0; - cnt = get_a_record(& s, iop, & errcode); + cnt = get_a_record(& s, iop, & errcode, (lhs ? NULL : & field_width)); if (errcode != 0) { if (! do_traditional && (errcode != -1)) update_ERRNO_int(errcode); @@ -2668,7 +2670,7 @@ do_getline_redir(int into_variable, enum redirval redirtype) } if (lhs == NULL) /* no optional var. */ - set_record(s, cnt); + set_record(s, cnt, field_width); else { /* assignment to variable */ unref(*lhs); *lhs = make_string(s, cnt); @@ -2686,6 +2688,7 @@ do_getline(int into_variable, IOBUF *iop) int cnt = EOF; char *s = NULL; int errcode; + const int *field_width = NULL; if (iop == NULL) { /* end of input */ if (into_variable) @@ -2694,7 +2697,7 @@ do_getline(int into_variable, IOBUF *iop) } errcode = 0; - cnt = get_a_record(& s, iop, & errcode); + cnt = get_a_record(& s, iop, & errcode, (into_variable ? NULL : & field_width)); if (errcode != 0) { if (! do_traditional && (errcode != -1)) update_ERRNO_int(errcode); @@ -2709,7 +2712,7 @@ do_getline(int into_variable, IOBUF *iop) INCREMENT_REC(FNR); if (! into_variable) /* no optional var. */ - set_record(s, cnt); + set_record(s, cnt, field_width); else { /* assignment to variable */ NODE **lhs; lhs = POP_ADDRESS(); @@ -3653,7 +3656,8 @@ errno_io_retry(void) static int get_a_record(char **out, /* pointer to pointer to data */ IOBUF *iop, /* input IOP */ - int *errcode) /* pointer to error variable */ + int *errcode, /* pointer to error variable */ + const int **field_width)/* pointer to pointer to field_width array */ { struct recmatch recm; SCANSTATE state; @@ -3672,7 +3676,8 @@ get_a_record(char **out, /* pointer to pointer to data */ char *rt_start; size_t rt_len; int rc = iop->public.get_record(out, &iop->public, errcode, - &rt_start, &rt_len); + &rt_start, &rt_len, + field_width); if (rc == EOF) iop->flag |= IOP_AT_EOF; else { @@ -1005,22 +1005,7 @@ load_procinfo() value = getegid(); update_PROCINFO_num("egid", value); - switch (current_field_sep()) { - case Using_FIELDWIDTHS: - update_PROCINFO_str("FS", "FIELDWIDTHS"); - break; - case Using_FPAT: - update_PROCINFO_str("FS", "FPAT"); - break; - case Using_FS: - update_PROCINFO_str("FS", "FS"); - break; - default: - fatal(_("unknown value for field spec: %d\n"), - current_field_sep()); - break; - } - + update_PROCINFO_str("FS", current_field_sep_str()); #if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0 for (i = 0; i < ngroups; i++) { diff --git a/test/ChangeLog b/test/ChangeLog index b0ca5a97..acd2bf72 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,10 @@ +2017-03-06 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * Makefile.am (readdir_test): New test to check whether get_record + field_width parsing is working by comparing the results from the + readdir and readdir_test extensions. + (SHLIB_TESTS): Add readdir_test. + 2017-02-21 Andrew J. Schorr <aschorr@telemetry-investments.com> * Makefile.am (mktime): New test. diff --git a/test/Makefile.am b/test/Makefile.am index 65336b6c..c41e9dd9 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -1263,7 +1263,7 @@ LOCALE_CHARSET_TESTS = \ SHLIB_TESTS = \ apiterm fnmatch filefuncs fork fork2 fts functab4 getfile inplace1 inplace2 inplace3 \ - ordchr ordchr2 readdir readfile readfile2 revout revtwoway rwarray testext time + ordchr ordchr2 readdir readdir_test readfile readfile2 revout revtwoway rwarray testext time # List of the tests which should be run with --lint option: NEED_LINT = \ @@ -2193,6 +2193,12 @@ readdir: -v dirlist=_dirlist -v longlist=_longlist > $@.ok @-$(CMP) $@.ok _$@ && rm -f $@.ok _$@ _dirlist _longlist +readdir_test: + @echo $@ + @$(AWK) -lreaddir -F/ '{printf "[%s] [%s] [%s] [%s]\n", $$1, $$2, $$3, $$4}' "$(top_srcdir)" > $@.ok + @$(AWK) -lreaddir_test '{printf "[%s] [%s] [%s] [%s]\n", $$1, $$2, $$3, $$4}' "$(top_srcdir)" > _$@ + @-$(CMP) $@.ok _$@ && rm -f $@.ok _$@ + fts: @case `uname` in \ IRIX) \ diff --git a/test/Makefile.in b/test/Makefile.in index 1a61996f..c07bc6d7 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -1516,7 +1516,7 @@ LOCALE_CHARSET_TESTS = \ SHLIB_TESTS = \ apiterm fnmatch filefuncs fork fork2 fts functab4 getfile inplace1 inplace2 inplace3 \ - ordchr ordchr2 readdir readfile readfile2 revout revtwoway rwarray testext time + ordchr ordchr2 readdir readdir_test readfile readfile2 revout revtwoway rwarray testext time # List of the tests which should be run with --lint option: @@ -2632,6 +2632,12 @@ readdir: -v dirlist=_dirlist -v longlist=_longlist > $@.ok @-$(CMP) $@.ok _$@ && rm -f $@.ok _$@ _dirlist _longlist +readdir_test: + @echo $@ + @$(AWK) -lreaddir -F/ '{printf "[%s] [%s] [%s] [%s]\n", $$1, $$2, $$3, $$4}' "$(top_srcdir)" > $@.ok + @$(AWK) -lreaddir_test '{printf "[%s] [%s] [%s] [%s]\n", $$1, $$2, $$3, $$4}' "$(top_srcdir)" > _$@ + @-$(CMP) $@.ok _$@ && rm -f $@.ok _$@ + fts: @case `uname` in \ IRIX) \ |