aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog78
-rw-r--r--awk.h6
-rw-r--r--extension/ChangeLog14
-rw-r--r--extension/Makefile.am8
-rw-r--r--extension/Makefile.in33
-rw-r--r--extension/readdir_test.c337
-rw-r--r--field.c122
-rw-r--r--gawkapi.h28
-rw-r--r--io.c24
-rw-r--r--main.c17
-rw-r--r--test/ChangeLog7
-rw-r--r--test/Makefile.am8
-rw-r--r--test/Makefile.in8
13 files changed, 638 insertions, 52 deletions
diff --git a/ChangeLog b/ChangeLog
index 8290a5f8..176142c9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -27,6 +27,84 @@
* configure.ac: Some cleanups.
+2017-03-09 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawkapi.h (awk_input_field_info_t): Define new structure to contain
+ API field parsing info.
+ (awk_input_buf_t): Update get_record prototype to use an array of
+ awk_input_field_info_t instead of integers.
+ * awk.h (set_record): Change 3rd argument from 'const int *' to
+ 'const awk_input_field_info_t *'.
+ * field.c (api_fw): Now points to an array of awk_input_field_info_t
+ instead of integers.
+ (set_record): Change 3rd argument to point to an array of
+ awk_input_field_info_t.
+ (api_parse_field): Update parsing logic to use awk_input_field_info_t
+ structures instead of an array of integers.
+ * io.c (inrec, do_getline_redir, do_getline): Change field_width type
+ from 'const int *' to 'const awk_input_field_info_t *'.
+ (get_a_record): Change field_width argument type from 'const int **'
+ to 'const awk_input_field_info_t **'.
+
+2017-03-09 Arnold D. Robbins <arnold@skeeve.com>
+
+ * field.c: Minor style edits.
+
+2017-03-06 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * field.c (normal_parse_field): Renamed from save_parse_field to reflect
+ better its purpose. Added a comment to explain more clearly what's
+ going on.
+ (set_record, set_parser): Rename save_parse_field to normal_parse_field.
+
+2017-03-06 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawkapi.h (awk_input_buf_t): Remove field_width array and instead
+ add it as a 6th argument to the get_record function. This should
+ not break existing code, since it's fine to ignore the additional
+ argument. Document the behavior of the field_width argument.
+ * io.c (inrec): Pass pointer to field_width array to get_a_record,
+ and then hand it off to set_record.
+ (do_getline_redir): If not reading into a variable, pass pointer to
+ field_width array to get_a_record and then hand it off to set_record.
+ (do_getline): Ditto.
+ (get_a_record): Add a 4th field_width argument to pass through to
+ the API get_record method.
+
+2017-03-05 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * awk.h (set_record): Add a new argument containing a field-width
+ array returned by an API parser.
+ (field_sep_type): Add new enum value Using_API.
+ (current_field_sep_str): Declare new function.
+ * field.c (save_parse_field): New static variable to save the
+ parse_field value in cases where it's overridden by API parsing.
+ (api_fw): New static variable to hold pointer to API parser fieldwidth
+ array.
+ (set_record): Add new field-width array argument. If present, API
+ parsing will override the default parsing mechanism.
+ (api_parse_field): New field parser using field widths supplied by the
+ API. This is very similar to the existing fw_parse_field function.
+ (get_field): Fix typo in comment.
+ (set_parser): New function to set default parser and check whether
+ there's an API parser override in effect. Update PROCINFO["FS"] if
+ something has changed.
+ (set_FIELDWIDTHS): Use set_parser and stop updating PROCINFO["FS"].
+ (set_FS): Ditto.
+ (set_FPAT): Ditto.
+ (current_field_sep): Return Using_API when using the API field parsing
+ widths.
+ (current_field_sep_str): New function to return the proper string
+ value for PROCINFO["FS"].
+ * gawkapi.h (awk_input_buf_t): Add field_width array to enable the
+ parser get_record function to supply field widths to override the
+ default gawk field parsing mechanism.
+ * io.c (inrec): Pass iop->public.field_width to set_record as the
+ 3rd argument to enable API field parsing overrides.
+ (do_getline_redir, do_getline): Ditto.
+ * main.c (load_procinfo): Use new current_field_sep_str function
+ instead of switching on the return value from current_field_sep.
+
2017-02-23 Arnold D. Robbins <arnold@skeeve.com>
* awk.h (boolval): Return bool instead of int.
diff --git a/awk.h b/awk.h
index 163ad362..1935534c 100644
--- a/awk.h
+++ b/awk.h
@@ -1510,7 +1510,7 @@ extern NODE *get_actual_argument(NODE *, int, bool);
#endif
/* field.c */
extern void init_fields(void);
-extern void set_record(const char *buf, int cnt);
+extern void set_record(const char *buf, int cnt, const awk_input_field_info_t *);
extern void reset_record(void);
extern void rebuild_record(void);
extern void set_NF(void);
@@ -1527,9 +1527,11 @@ extern void update_PROCINFO_num(const char *subscript, AWKNUM val);
typedef enum {
Using_FS,
Using_FIELDWIDTHS,
- Using_FPAT
+ Using_FPAT,
+ Using_API
} field_sep_type;
extern field_sep_type current_field_sep(void);
+extern const char *current_field_sep_str(void);
/* gawkapi.c: */
extern gawk_api_t api_impl;
diff --git a/extension/ChangeLog b/extension/ChangeLog
index f1622b69..bb99f9d8 100644
--- a/extension/ChangeLog
+++ b/extension/ChangeLog
@@ -1,3 +1,10 @@
+2017-03-09 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * readdir_test.c (open_directory_t): Update field_width type from an
+ array of integers to an array of awk_input_field_info_t.
+ (dir_get_record): Ditto.
+ (dir_take_control_of): Ditto.
+
2017-03-07 Andrew J. Schorr <aschorr@telemetry-investments.com>
* Makefile.am (pkgextension_LTLIBRARIES): Remove testext.la, since it
@@ -10,6 +17,13 @@
installed, automake cannot use the final destination directory to
determine -rpath by itself. The value doesn't matter.
+2017-03-06 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * readdir_test.c: Test extension using new get_record field_width
+ parsing feature.
+ * Makefile.am (noinst_LTLIBRARIES): Add readdir_test.la.
+ (readdir_test_la_*): Configure building of new extension library.
+
2017-01-21 Eli Zaretskii <eliz@gnu.org>
* testext.c (getuid) [__MINGW32__]: New function, mirrors what
diff --git a/extension/Makefile.am b/extension/Makefile.am
index 185bc795..6ea16f5d 100644
--- a/extension/Makefile.am
+++ b/extension/Makefile.am
@@ -48,6 +48,7 @@ pkgextension_LTLIBRARIES = \
time.la
noinst_LTLIBRARIES = \
+ readdir_test.la \
testext.la
MY_MODULE_FLAGS = -module -avoid-version -no-undefined
@@ -106,6 +107,13 @@ testext_la_SOURCES = testext.c
testext_la_LDFLAGS = $(MY_MODULE_FLAGS) -rpath /foo
testext_la_LIBADD = $(MY_LIBS)
+# N.B. Because we are not installing readdir_test, we must specify -rpath in
+# LDFLAGS to get automake to build a shared library, since it needs
+# an installation path.
+readdir_test_la_SOURCES = readdir_test.c
+readdir_test_la_LDFLAGS = $(MY_MODULE_FLAGS) -rpath /foo
+readdir_test_la_LIBADD = $(MY_LIBS)
+
install-data-hook:
for i in $(pkgextension_LTLIBRARIES) ; do \
$(RM) $(DESTDIR)$(pkgextensiondir)/$$i ; \
diff --git a/extension/Makefile.in b/extension/Makefile.in
index 6557693a..c0e2676b 100644
--- a/extension/Makefile.in
+++ b/extension/Makefile.in
@@ -199,6 +199,13 @@ readdir_la_OBJECTS = $(am_readdir_la_OBJECTS)
readdir_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(readdir_la_LDFLAGS) $(LDFLAGS) -o $@
+readdir_test_la_DEPENDENCIES = $(am__DEPENDENCIES_1)
+am_readdir_test_la_OBJECTS = readdir_test.lo
+readdir_test_la_OBJECTS = $(am_readdir_test_la_OBJECTS)
+readdir_test_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
+ $(AM_CFLAGS) $(CFLAGS) $(readdir_test_la_LDFLAGS) $(LDFLAGS) \
+ -o $@
readfile_la_DEPENDENCIES = $(am__DEPENDENCIES_1)
am_readfile_la_OBJECTS = readfile.lo
readfile_la_OBJECTS = $(am_readfile_la_OBJECTS)
@@ -271,14 +278,16 @@ am__v_CCLD_0 = @echo " CCLD " $@;
am__v_CCLD_1 =
SOURCES = $(filefuncs_la_SOURCES) $(fnmatch_la_SOURCES) \
$(fork_la_SOURCES) $(inplace_la_SOURCES) $(ordchr_la_SOURCES) \
- $(readdir_la_SOURCES) $(readfile_la_SOURCES) \
- $(revoutput_la_SOURCES) $(revtwoway_la_SOURCES) \
- $(rwarray_la_SOURCES) $(testext_la_SOURCES) $(time_la_SOURCES)
+ $(readdir_la_SOURCES) $(readdir_test_la_SOURCES) \
+ $(readfile_la_SOURCES) $(revoutput_la_SOURCES) \
+ $(revtwoway_la_SOURCES) $(rwarray_la_SOURCES) \
+ $(testext_la_SOURCES) $(time_la_SOURCES)
DIST_SOURCES = $(filefuncs_la_SOURCES) $(fnmatch_la_SOURCES) \
$(fork_la_SOURCES) $(inplace_la_SOURCES) $(ordchr_la_SOURCES) \
- $(readdir_la_SOURCES) $(readfile_la_SOURCES) \
- $(revoutput_la_SOURCES) $(revtwoway_la_SOURCES) \
- $(rwarray_la_SOURCES) $(testext_la_SOURCES) $(time_la_SOURCES)
+ $(readdir_la_SOURCES) $(readdir_test_la_SOURCES) \
+ $(readfile_la_SOURCES) $(revoutput_la_SOURCES) \
+ $(revtwoway_la_SOURCES) $(rwarray_la_SOURCES) \
+ $(testext_la_SOURCES) $(time_la_SOURCES)
RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
ctags-recursive dvi-recursive html-recursive info-recursive \
install-data-recursive install-dvi-recursive \
@@ -520,6 +529,7 @@ pkgextension_LTLIBRARIES = \
time.la
noinst_LTLIBRARIES = \
+ readdir_test.la \
testext.la
MY_MODULE_FLAGS = -module -avoid-version -no-undefined
@@ -567,6 +577,13 @@ time_la_LIBADD = $(MY_LIBS)
testext_la_SOURCES = testext.c
testext_la_LDFLAGS = $(MY_MODULE_FLAGS) -rpath /foo
testext_la_LIBADD = $(MY_LIBS)
+
+# N.B. Because we are not installing readdir_test, we must specify -rpath in
+# LDFLAGS to get automake to build a shared library, since it needs
+# an installation path.
+readdir_test_la_SOURCES = readdir_test.c
+readdir_test_la_LDFLAGS = $(MY_MODULE_FLAGS) -rpath /foo
+readdir_test_la_LIBADD = $(MY_LIBS)
EXTRA_DIST = build-aux/config.rpath \
ChangeLog \
ChangeLog.0 \
@@ -702,6 +719,9 @@ ordchr.la: $(ordchr_la_OBJECTS) $(ordchr_la_DEPENDENCIES) $(EXTRA_ordchr_la_DEPE
readdir.la: $(readdir_la_OBJECTS) $(readdir_la_DEPENDENCIES) $(EXTRA_readdir_la_DEPENDENCIES)
$(AM_V_CCLD)$(readdir_la_LINK) -rpath $(pkgextensiondir) $(readdir_la_OBJECTS) $(readdir_la_LIBADD) $(LIBS)
+readdir_test.la: $(readdir_test_la_OBJECTS) $(readdir_test_la_DEPENDENCIES) $(EXTRA_readdir_test_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(readdir_test_la_LINK) $(readdir_test_la_OBJECTS) $(readdir_test_la_LIBADD) $(LIBS)
+
readfile.la: $(readfile_la_OBJECTS) $(readfile_la_DEPENDENCIES) $(EXTRA_readfile_la_DEPENDENCIES)
$(AM_V_CCLD)$(readfile_la_LINK) -rpath $(pkgextensiondir) $(readfile_la_OBJECTS) $(readfile_la_LIBADD) $(LIBS)
@@ -733,6 +753,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/inplace.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordchr.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readdir.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readdir_test.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readfile.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/revoutput.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/revtwoway.Plo@am__quote@
diff --git a/extension/readdir_test.c b/extension/readdir_test.c
new file mode 100644
index 00000000..e023b67c
--- /dev/null
+++ b/extension/readdir_test.c
@@ -0,0 +1,337 @@
+/*
+ * readdir.c --- Provide an input parser to read directories
+ *
+ * Arnold Robbins
+ * arnold@skeeve.com
+ * Written 7/2012
+ *
+ * Andrew Schorr and Arnold Robbins: further fixes 8/2012.
+ * Simplified 11/2012.
+ */
+
+/*
+ * Copyright (C) 2012-2014 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#define _BSD_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#endif
+
+#ifdef HAVE_DIRENT_H
+#include <dirent.h>
+#else
+#error Cannot compile the dirent extension on this system!
+#endif
+
+#ifdef __MINGW32__
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+
+#include "gawkapi.h"
+
+#include "gawkdirfd.h"
+
+#include "gettext.h"
+#define _(msgid) gettext(msgid)
+#define N_(msgid) msgid
+
+#ifndef PATH_MAX
+#define PATH_MAX 1024 /* a good guess */
+#endif
+
+static const gawk_api_t *api; /* for convenience macros to work */
+static awk_ext_id_t *ext_id;
+static const char *ext_version = "readdir extension: version 1.0";
+
+static awk_bool_t init_readdir(void);
+static awk_bool_t (*init_func)(void) = init_readdir;
+
+int plugin_is_GPL_compatible;
+
+/* data type for the opaque pointer: */
+
+typedef struct open_directory {
+ DIR *dp;
+ char *buf;
+ awk_input_field_info_t field_width[4];
+} open_directory_t;
+
+/* ftype --- return type of file as a single character string */
+
+static const char *
+ftype(struct dirent *entry, const char *dirname)
+{
+#ifdef DT_BLK
+ (void) dirname; /* silence warnings */
+ switch (entry->d_type) {
+ case DT_BLK: return "b";
+ case DT_CHR: return "c";
+ case DT_DIR: return "d";
+ case DT_FIFO: return "p";
+ case DT_LNK: return "l";
+ case DT_REG: return "f";
+ case DT_SOCK: return "s";
+ default:
+ case DT_UNKNOWN: return "u";
+ }
+#else
+ char fname[PATH_MAX];
+ struct stat sbuf;
+
+ strcpy(fname, dirname);
+ strcat(fname, "/");
+ strcat(fname, entry->d_name);
+ if (stat(fname, &sbuf) == 0) {
+ if (S_ISBLK(sbuf.st_mode))
+ return "b";
+ if (S_ISCHR(sbuf.st_mode))
+ return "c";
+ if (S_ISDIR(sbuf.st_mode))
+ return "d";
+ if (S_ISFIFO(sbuf.st_mode))
+ return "p";
+ if (S_ISREG(sbuf.st_mode))
+ return "f";
+#ifdef S_ISLNK
+ if (S_ISLNK(sbuf.st_mode))
+ return "l";
+#endif
+#ifdef S_ISSOCK
+ if (S_ISSOCK(sbuf.st_mode))
+ return "s";
+#endif
+ }
+ return "u";
+#endif
+}
+
+/* get_inode --- get the inode of a file */
+static long long
+get_inode(struct dirent *entry, const char *dirname)
+{
+#ifdef __MINGW32__
+ char fname[PATH_MAX];
+ HANDLE fh;
+ BY_HANDLE_FILE_INFORMATION info;
+
+ sprintf(fname, "%s\\%s", dirname, entry->d_name);
+ fh = CreateFile(fname, 0, 0, NULL, OPEN_EXISTING,
+ FILE_FLAG_BACKUP_SEMANTICS, NULL);
+ if (fh == INVALID_HANDLE_VALUE)
+ return 0;
+ if (GetFileInformationByHandle(fh, &info)) {
+ long long inode = info.nFileIndexHigh;
+
+ inode <<= 32;
+ inode += info.nFileIndexLow;
+ return inode;
+ }
+ return 0;
+#else
+ (void) dirname; /* silence warnings */
+ return entry->d_ino;
+#endif
+}
+
+/* dir_get_record --- get one record at a time out of a directory */
+
+static int
+dir_get_record(char **out, awk_input_buf_t *iobuf, int *errcode,
+ char **rt_start, size_t *rt_len,
+ const awk_input_field_info_t **field_width)
+{
+ DIR *dp;
+ struct dirent *dirent;
+ int len, flen;
+ open_directory_t *the_dir;
+ const char *ftstr;
+ unsigned long long ino;
+
+ /*
+ * The caller sets *errcode to 0, so we should set it only if an
+ * error occurs.
+ */
+
+ if (out == NULL || iobuf == NULL || iobuf->opaque == NULL)
+ return EOF;
+
+ the_dir = (open_directory_t *) iobuf->opaque;
+ dp = the_dir->dp;
+
+ /*
+ * Initialize errno, since readdir does not set it to zero on EOF.
+ */
+ errno = 0;
+ dirent = readdir(dp);
+ if (dirent == NULL) {
+ *errcode = errno; /* in case there was an error */
+ return EOF;
+ }
+
+ ino = get_inode (dirent, iobuf->name);
+
+#if __MINGW32__
+ len = sprintf(the_dir->buf, "%I64u", ino);
+#else
+ len = sprintf(the_dir->buf, "%llu", ino);
+#endif
+ the_dir->field_width[0].len = len;
+ len += (flen = sprintf(the_dir->buf + len, "/%s", dirent->d_name));
+ the_dir->field_width[1].len = flen-1;
+
+ ftstr = ftype(dirent, iobuf->name);
+ len += (flen = sprintf(the_dir->buf + len, "/%s", ftstr));
+ the_dir->field_width[2].len = flen-1;
+
+ *out = the_dir->buf;
+
+ *rt_start = NULL;
+ *rt_len = 0; /* set RT to "" */
+ if (field_width)
+ *field_width = the_dir->field_width;
+ return len;
+}
+
+/* dir_close --- close up when done */
+
+static void
+dir_close(awk_input_buf_t *iobuf)
+{
+ open_directory_t *the_dir;
+
+ if (iobuf == NULL || iobuf->opaque == NULL)
+ return;
+
+ the_dir = (open_directory_t *) iobuf->opaque;
+
+ closedir(the_dir->dp);
+ gawk_free(the_dir->buf);
+ gawk_free(the_dir);
+
+ iobuf->fd = -1;
+}
+
+/* dir_can_take_file --- return true if we want the file */
+
+static awk_bool_t
+dir_can_take_file(const awk_input_buf_t *iobuf)
+{
+ if (iobuf == NULL)
+ return awk_false;
+
+ return (iobuf->fd != INVALID_HANDLE && S_ISDIR(iobuf->sbuf.st_mode));
+}
+
+/*
+ * dir_take_control_of --- set up input parser.
+ * We can assume that dir_can_take_file just returned true,
+ * and no state has changed since then.
+ */
+
+static awk_bool_t
+dir_take_control_of(awk_input_buf_t *iobuf)
+{
+ DIR *dp;
+ open_directory_t *the_dir;
+ size_t size;
+
+ errno = 0;
+#ifdef HAVE_FDOPENDIR
+ dp = fdopendir(iobuf->fd);
+#else
+ dp = opendir(iobuf->name);
+ if (dp != NULL)
+ iobuf->fd = dirfd(dp);
+#endif
+ if (dp == NULL) {
+ warning(ext_id, _("dir_take_control_of: opendir/fdopendir failed: %s"),
+ strerror(errno));
+ update_ERRNO_int(errno);
+ return awk_false;
+ }
+
+ emalloc(the_dir, open_directory_t *, sizeof(open_directory_t), "dir_take_control_of");
+ the_dir->dp = dp;
+ /* pre-populate the field_width array with constant values: */
+ the_dir->field_width[0].skip = 0; /* no leading space */
+ the_dir->field_width[1].skip = 1; /* single '/' separator */
+ the_dir->field_width[2].skip = 1; /* single '/' separator */
+ the_dir->field_width[3].skip = -1; /* terminate after 3 fields */
+ size = sizeof(struct dirent) + 21 /* max digits in inode */ + 2 /* slashes */;
+ emalloc(the_dir->buf, char *, size, "dir_take_control_of");
+
+ iobuf->opaque = the_dir;
+ iobuf->get_record = dir_get_record;
+ iobuf->close_func = dir_close;
+
+ return awk_true;
+}
+
+static awk_input_parser_t readdir_parser = {
+ "readdir",
+ dir_can_take_file,
+ dir_take_control_of,
+ NULL
+};
+
+#ifdef TEST_DUPLICATE
+static awk_input_parser_t readdir_parser2 = {
+ "readdir2",
+ dir_can_take_file,
+ dir_take_control_of,
+ NULL
+};
+#endif
+
+/* init_readdir --- set things ups */
+
+static awk_bool_t
+init_readdir()
+{
+ register_input_parser(& readdir_parser);
+#ifdef TEST_DUPLICATE
+ register_input_parser(& readdir_parser2);
+#endif
+
+ return awk_true;
+}
+
+static awk_ext_func_t func_table[] = {
+ { NULL, NULL, 0, 0, awk_false, NULL }
+};
+
+/* define the dl_load function using the boilerplate macro */
+
+dl_load_func(func_table, readdir, "")
diff --git a/field.c b/field.c
index 0799fb1b..5ef4d74b 100644
--- a/field.c
+++ b/field.c
@@ -40,6 +40,13 @@ typedef void (* Setfunc)(long, char *, long, NODE *);
static long (*parse_field)(long, char **, int, NODE *,
Regexp *, Setfunc, NODE *, NODE *, bool);
+/*
+ * N.B. The normal_parse_field function pointer contains the parse_field value
+ * that should be used except when API field parsing is overriding the default
+ * field parsing mechanism.
+ */
+static long (*normal_parse_field)(long, char **, int, NODE *,
+ Regexp *, Setfunc, NODE *, NODE *, bool);
static long re_parse_field(long, char **, int, NODE *,
Regexp *, Setfunc, NODE *, NODE *, bool);
static long def_parse_field(long, char **, int, NODE *,
@@ -50,6 +57,9 @@ static long sc_parse_field(long, char **, int, NODE *,
Regexp *, Setfunc, NODE *, NODE *, bool);
static long fw_parse_field(long, char **, int, NODE *,
Regexp *, Setfunc, NODE *, NODE *, bool);
+static long api_parse_field(long, char **, int, NODE *,
+ Regexp *, Setfunc, NODE *, NODE *, bool);
+static const awk_input_field_info_t *api_fw = NULL;
static long fpat_parse_field(long, char **, int, NODE *,
Regexp *, Setfunc, NODE *, NODE *, bool);
static void set_element(long num, char * str, long len, NODE *arr);
@@ -252,7 +262,7 @@ rebuild_record()
* but better correct than fast.
*/
void
-set_record(const char *buf, int cnt)
+set_record(const char *buf, int cnt, const awk_input_field_info_t *fw)
{
NODE *n;
static char *databuf;
@@ -306,6 +316,17 @@ set_record(const char *buf, int cnt)
n->stfmt = STFMT_UNUSED;
n->flags = (STRING|STRCUR|USER_INPUT); /* do not set MALLOC */
fields_arr[0] = n;
+ if (fw != api_fw) {
+ if ((api_fw = fw) != NULL) {
+ if (parse_field != api_parse_field) {
+ parse_field = api_parse_field;
+ update_PROCINFO_str("FS", "API");
+ }
+ } else if (parse_field != normal_parse_field) {
+ parse_field = normal_parse_field;
+ update_PROCINFO_str("FS", current_field_sep_str());
+ }
+ }
#undef INITIAL_SIZE
#undef MAX_SIZE
@@ -760,6 +781,51 @@ fw_parse_field(long up_to, /* parse only up to this field number */
return nf;
}
+/*
+ * api_parse_field --- field parsing using field widths returned by API parser.
+ *
+ * This is called from get_field() via (*parse_field)().
+ */
+
+static long
+api_parse_field(long up_to, /* parse only up to this field number */
+ char **buf, /* on input: string to parse; on output: point to start next */
+ int len,
+ NODE *fs ATTRIBUTE_UNUSED,
+ Regexp *rp ATTRIBUTE_UNUSED,
+ Setfunc set, /* routine to set the value of the parsed field */
+ NODE *n,
+ NODE *dummy ATTRIBUTE_UNUSED, /* sep_arr not needed here: hence dummy */
+ bool in_middle ATTRIBUTE_UNUSED)
+{
+ char *scan = *buf;
+ long nf = parse_high_water;
+ char *end = scan + len;
+ int skiplen;
+ size_t flen;
+
+ if (up_to == UNLIMITED)
+ nf = 0;
+ if (len == 0)
+ return nf;
+ while (nf < up_to) {
+ if ((skiplen = api_fw[nf].skip) < 0) {
+ *buf = end;
+ return nf;
+ }
+ if (skiplen > end - scan)
+ skiplen = end - scan;
+ scan += skiplen;
+ flen = api_fw[nf].len;
+ if (flen > end - scan)
+ flen = end - scan;
+ (*set)(++nf, scan, (long) flen, n);
+ scan += flen;
+ }
+ *buf = scan;
+ return nf;
+}
+
/* invalidate_field0 --- $0 needs reconstruction */
void
@@ -845,7 +911,7 @@ get_field(long requested, Func_ptr *assign)
if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen)
NF = parse_high_water;
else if (parse_field == fpat_parse_field) {
- /* FPAT parsing is wierd, isolate the special cases */
+ /* FPAT parsing is weird, isolate the special cases */
char *rec_start = fields_arr[0]->stptr;
char *rec_end = fields_arr[0]->stptr + fields_arr[0]->stlen;
@@ -1057,6 +1123,18 @@ do_patsplit(int nargs)
return tmp;
}
+/* set_parser: update the current (non-API) parser */
+
+static void
+set_parser(long (*func)(long, char **, int, NODE *, Regexp *, Setfunc, NODE *, NODE *, bool))
+{
+ normal_parse_field = func;
+ if (parse_field != api_parse_field && parse_field != func) {
+ parse_field = func;
+ update_PROCINFO_str("FS", current_field_sep_str());
+ }
+}
+
/* set_FIELDWIDTHS --- handle an assignment to FIELDWIDTHS */
void
@@ -1084,7 +1162,7 @@ set_FIELDWIDTHS()
if (fields_arr != NULL)
(void) get_field(UNLIMITED - 1, 0);
- parse_field = fw_parse_field;
+ set_parser(fw_parse_field);
tmp = force_string(FIELDWIDTHS_node->var_value);
scan = tmp->stptr;
@@ -1134,7 +1212,6 @@ set_FIELDWIDTHS()
}
FIELDWIDTHS[i+1] = -1;
- update_PROCINFO_str("FS", "FIELDWIDTHS");
if (fatal_error)
fatal(_("invalid FIELDWIDTHS value, near `%s'"),
scan);
@@ -1205,7 +1282,7 @@ choose_fs_function:
if (! do_traditional && fs->stlen == 0) {
static bool warned = false;
- parse_field = null_parse_field;
+ set_parser(null_parse_field);
if (do_lint && ! warned) {
warned = true;
@@ -1214,10 +1291,10 @@ choose_fs_function:
} else if (fs->stlen > 1) {
if (do_lint_old)
warning(_("old awk does not support regexps as value of `FS'"));
- parse_field = re_parse_field;
+ set_parser(re_parse_field);
} else if (RS_is_null) {
/* we know that fs->stlen <= 1 */
- parse_field = sc_parse_field;
+ set_parser(sc_parse_field);
if (fs->stlen == 1) {
if (fs->stptr[0] == ' ') {
default_FS = true;
@@ -1233,7 +1310,7 @@ choose_fs_function:
}
}
} else {
- parse_field = def_parse_field;
+ set_parser(def_parse_field);
if (fs->stlen == 1) {
if (fs->stptr[0] == ' ')
@@ -1242,7 +1319,7 @@ choose_fs_function:
/* same special case */
strcpy(buf, "[\\\\]");
else
- parse_field = sc_parse_field;
+ set_parser(sc_parse_field);
}
}
if (remake_re) {
@@ -1254,7 +1331,7 @@ choose_fs_function:
FS_re_yes_case = make_regexp(buf, strlen(buf), false, true, true);
FS_re_no_case = make_regexp(buf, strlen(buf), true, true, true);
FS_regexp = (IGNORECASE ? FS_re_no_case : FS_re_yes_case);
- parse_field = re_parse_field;
+ set_parser(re_parse_field);
} else if (parse_field == re_parse_field) {
FS_re_yes_case = make_regexp(fs->stptr, fs->stlen, false, true, true);
FS_re_no_case = make_regexp(fs->stptr, fs->stlen, true, true, true);
@@ -1270,11 +1347,9 @@ choose_fs_function:
*/
if (fs->stlen == 1 && parse_field == re_parse_field)
FS_regexp = FS_re_yes_case;
-
- update_PROCINFO_str("FS", "FS");
}
-/* current_field_sep --- return what field separator is */
+/* current_field_sep --- return the field separator type */
field_sep_type
current_field_sep()
@@ -1283,10 +1358,27 @@ current_field_sep()
return Using_FIELDWIDTHS;
else if (parse_field == fpat_parse_field)
return Using_FPAT;
+ else if (parse_field == api_parse_field)
+ return Using_API;
else
return Using_FS;
}
+/* current_field_sep_str --- return the field separator type as a string */
+
+const char *
+current_field_sep_str()
+{
+ if (parse_field == fw_parse_field)
+ return "FIELDWIDTHS";
+ else if (parse_field == fpat_parse_field)
+ return "FPAT";
+ else if (parse_field == api_parse_field)
+ return "API";
+ else
+ return "FS";
+}
+
/* update_PROCINFO_str --- update PROCINFO[sub] with string value */
void
@@ -1373,7 +1465,7 @@ set_FPAT()
set_fpat_function:
fpat = force_string(FPAT_node->var_value);
- parse_field = fpat_parse_field;
+ set_parser(fpat_parse_field);
if (remake_re) {
refree(FPAT_re_yes_case);
@@ -1384,8 +1476,6 @@ set_fpat_function:
FPAT_re_no_case = make_regexp(fpat->stptr, fpat->stlen, true, true, true);
FPAT_regexp = (IGNORECASE ? FPAT_re_no_case : FPAT_re_yes_case);
}
-
- update_PROCINFO_str("FS", "FPAT");
}
/*
diff --git a/gawkapi.h b/gawkapi.h
index 5071adce..e744a0fc 100644
--- a/gawkapi.h
+++ b/gawkapi.h
@@ -117,6 +117,19 @@ typedef enum awk_bool {
awk_true
} awk_bool_t; /* we don't use <stdbool.h> on purpose */
+/*
+ * If the input parser would like to specify the field positions in the input
+ * record, it may populate an array of awk_input_field_info_t structures
+ * to indicate the location of each field. The 0th array element contains
+ * the information about field $1, and the NFth element should set skip
+ * to a negative value. For both skip and len, the value should be in
+ * bytes, not (potentially multi-byte) characters.
+ */
+typedef struct {
+ int skip; /* # of bytes to skip before field starts */
+ size_t len; /* # of bytes in field */
+} awk_input_field_info_t;
+
/* The information about input files that input parsers need to know: */
typedef struct awk_input {
const char *name; /* filename */
@@ -146,9 +159,22 @@ typedef struct awk_input {
* than zero, gawk will automatically update the ERRNO variable based
* on the value of *errcode (e.g., setting *errcode = errno should do
* the right thing).
+ *
+ * If field_width is non-NULL, then its value will be initialized
+ * to NULL, and the function may set it to point to an array of
+ * structures supplying field width information to override the default
+ * gawk field parsing mechanism. The field_width array should have
+ * at least NF+1 elements, and the value of field_width[NF].skip
+ * must be negative. Note that these values are specified
+ * in bytes, not (potentially multi-byte) characters! And note that this
+ * array will not be copied by gawk; it must persist at least until the
+ * next call to get_record or close_func. Note that field_width will
+ * be NULL when getline is assigning the results to a variable, thus
+ * field parsing is not needed.
*/
int (*get_record)(char **out, struct awk_input *iobuf, int *errcode,
- char **rt_start, size_t *rt_len);
+ char **rt_start, size_t *rt_len,
+ const awk_input_field_info_t **field_width);
/*
* No argument prototype on read_func to allow for older systems
diff --git a/io.c b/io.c
index 0da27575..d1033fcd 100644
--- a/io.c
+++ b/io.c
@@ -287,7 +287,7 @@ static RECVALUE rsrescan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state);
static RECVALUE (*matchrec)(IOBUF *iop, struct recmatch *recm, SCANSTATE *state) = rs1scan;
-static int get_a_record(char **out, IOBUF *iop, int *errcode);
+static int get_a_record(char **out, IOBUF *iop, int *errcode, const awk_input_field_info_t **field_width);
static void free_rp(struct redirect *rp);
@@ -590,13 +590,14 @@ inrec(IOBUF *iop, int *errcode)
char *begin;
int cnt;
bool retval = true;
+ const awk_input_field_info_t *field_width = NULL;
if (at_eof(iop) && no_data_left(iop))
cnt = EOF;
else if ((iop->flag & IOP_CLOSED) != 0)
cnt = EOF;
else
- cnt = get_a_record(& begin, iop, errcode);
+ cnt = get_a_record(& begin, iop, errcode, & field_width);
/* Note that get_a_record may return -2 when I/O would block */
if (cnt < 0) {
@@ -604,7 +605,7 @@ inrec(IOBUF *iop, int *errcode)
} else {
INCREMENT_REC(NR);
INCREMENT_REC(FNR);
- set_record(begin, cnt);
+ set_record(begin, cnt, field_width);
if (*errcode > 0)
retval = false;
}
@@ -2637,6 +2638,7 @@ do_getline_redir(int into_variable, enum redirval redirtype)
NODE *redir_exp = NULL;
NODE **lhs = NULL;
int redir_error = 0;
+ const awk_input_field_info_t *field_width = NULL;
if (into_variable)
lhs = POP_ADDRESS();
@@ -2665,7 +2667,7 @@ do_getline_redir(int into_variable, enum redirval redirtype)
return make_number((AWKNUM) 0.0);
errcode = 0;
- cnt = get_a_record(& s, iop, & errcode);
+ cnt = get_a_record(& s, iop, & errcode, (lhs ? NULL : & field_width));
if (errcode != 0) {
if (! do_traditional && (errcode != -1))
update_ERRNO_int(errcode);
@@ -2687,7 +2689,7 @@ do_getline_redir(int into_variable, enum redirval redirtype)
}
if (lhs == NULL) /* no optional var. */
- set_record(s, cnt);
+ set_record(s, cnt, field_width);
else { /* assignment to variable */
unref(*lhs);
*lhs = make_string(s, cnt);
@@ -2705,6 +2707,7 @@ do_getline(int into_variable, IOBUF *iop)
int cnt = EOF;
char *s = NULL;
int errcode;
+ const awk_input_field_info_t *field_width = NULL;
if (iop == NULL) { /* end of input */
if (into_variable)
@@ -2713,7 +2716,7 @@ do_getline(int into_variable, IOBUF *iop)
}
errcode = 0;
- cnt = get_a_record(& s, iop, & errcode);
+ cnt = get_a_record(& s, iop, & errcode, (into_variable ? NULL : & field_width));
if (errcode != 0) {
if (! do_traditional && (errcode != -1))
update_ERRNO_int(errcode);
@@ -2728,7 +2731,7 @@ do_getline(int into_variable, IOBUF *iop)
INCREMENT_REC(FNR);
if (! into_variable) /* no optional var. */
- set_record(s, cnt);
+ set_record(s, cnt, field_width);
else { /* assignment to variable */
NODE **lhs;
lhs = POP_ADDRESS();
@@ -3672,7 +3675,9 @@ errno_io_retry(void)
static int
get_a_record(char **out, /* pointer to pointer to data */
IOBUF *iop, /* input IOP */
- int *errcode) /* pointer to error variable */
+ int *errcode, /* pointer to error variable */
+ const awk_input_field_info_t **field_width)
+ /* pointer to pointer to field_width array */
{
struct recmatch recm;
SCANSTATE state;
@@ -3691,7 +3696,8 @@ get_a_record(char **out, /* pointer to pointer to data */
char *rt_start;
size_t rt_len;
int rc = iop->public.get_record(out, &iop->public, errcode,
- &rt_start, &rt_len);
+ &rt_start, &rt_len,
+ field_width);
if (rc == EOF)
iop->flag |= IOP_AT_EOF;
else {
diff --git a/main.c b/main.c
index 4f578d3e..f348171c 100644
--- a/main.c
+++ b/main.c
@@ -1004,22 +1004,7 @@ load_procinfo()
value = getegid();
update_PROCINFO_num("egid", value);
- switch (current_field_sep()) {
- case Using_FIELDWIDTHS:
- update_PROCINFO_str("FS", "FIELDWIDTHS");
- break;
- case Using_FPAT:
- update_PROCINFO_str("FS", "FPAT");
- break;
- case Using_FS:
- update_PROCINFO_str("FS", "FS");
- break;
- default:
- fatal(_("unknown value for field spec: %d\n"),
- current_field_sep());
- break;
- }
-
+ update_PROCINFO_str("FS", current_field_sep_str());
#if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
for (i = 0; i < ngroups; i++) {
diff --git a/test/ChangeLog b/test/ChangeLog
index df0ed8fa..ad1b35b1 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -5,6 +5,13 @@
$(srcdir) is the current directory.
* argarray.ok: Replace argarray.in with argarray.input.
+2017-03-06 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * Makefile.am (readdir_test): New test to check whether get_record
+ field_width parsing is working by comparing the results from the
+ readdir and readdir_test extensions.
+ (SHLIB_TESTS): Add readdir_test.
+
2017-02-21 Andrew J. Schorr <aschorr@telemetry-investments.com>
* Makefile.am (mktime): New test.
diff --git a/test/Makefile.am b/test/Makefile.am
index a356d63b..855958f1 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -1263,7 +1263,7 @@ LOCALE_CHARSET_TESTS = \
SHLIB_TESTS = \
apiterm fnmatch filefuncs fork fork2 fts functab4 getfile inplace1 inplace2 inplace3 \
- ordchr ordchr2 readdir readfile readfile2 revout revtwoway rwarray testext time
+ ordchr ordchr2 readdir readdir_test readfile readfile2 revout revtwoway rwarray testext time
# List of the tests which should be run with --lint option:
NEED_LINT = \
@@ -2187,6 +2187,12 @@ readdir:
-v dirlist=_dirlist -v longlist=_longlist > $@.ok
@-$(CMP) $@.ok _$@ && rm -f $@.ok _$@ _dirlist _longlist
+readdir_test:
+ @echo $@
+ @$(AWK) -lreaddir -F/ '{printf "[%s] [%s] [%s] [%s]\n", $$1, $$2, $$3, $$4}' "$(top_srcdir)" > $@.ok
+ @$(AWK) -lreaddir_test '{printf "[%s] [%s] [%s] [%s]\n", $$1, $$2, $$3, $$4}' "$(top_srcdir)" > _$@
+ @-$(CMP) $@.ok _$@ && rm -f $@.ok _$@
+
fts:
@case `uname` in \
IRIX) \
diff --git a/test/Makefile.in b/test/Makefile.in
index 8719840b..c23156df 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -1516,7 +1516,7 @@ LOCALE_CHARSET_TESTS = \
SHLIB_TESTS = \
apiterm fnmatch filefuncs fork fork2 fts functab4 getfile inplace1 inplace2 inplace3 \
- ordchr ordchr2 readdir readfile readfile2 revout revtwoway rwarray testext time
+ ordchr ordchr2 readdir readdir_test readfile readfile2 revout revtwoway rwarray testext time
# List of the tests which should be run with --lint option:
@@ -2626,6 +2626,12 @@ readdir:
-v dirlist=_dirlist -v longlist=_longlist > $@.ok
@-$(CMP) $@.ok _$@ && rm -f $@.ok _$@ _dirlist _longlist
+readdir_test:
+ @echo $@
+ @$(AWK) -lreaddir -F/ '{printf "[%s] [%s] [%s] [%s]\n", $$1, $$2, $$3, $$4}' "$(top_srcdir)" > $@.ok
+ @$(AWK) -lreaddir_test '{printf "[%s] [%s] [%s] [%s]\n", $$1, $$2, $$3, $$4}' "$(top_srcdir)" > _$@
+ @-$(CMP) $@.ok _$@ && rm -f $@.ok _$@
+
fts:
@case `uname` in \
IRIX) \