diff options
Diffstat (limited to 'doc')
-rw-r--r-- | doc/CMakeLists.txt | 95 | ||||
-rw-r--r-- | doc/ChangeLog | 189 | ||||
-rw-r--r-- | doc/Makefile.am | 14 | ||||
-rw-r--r-- | doc/Makefile.in | 30 | ||||
-rw-r--r-- | doc/awkcard.in | 17 | ||||
-rw-r--r-- | doc/gawk.1 | 117 | ||||
-rw-r--r-- | doc/gawk.info | 2137 | ||||
-rw-r--r-- | doc/gawk.texi | 850 | ||||
-rw-r--r-- | doc/gawktexi.in | 850 |
9 files changed, 3343 insertions, 956 deletions
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt new file mode 100644 index 00000000..e12f5de0 --- /dev/null +++ b/doc/CMakeLists.txt @@ -0,0 +1,95 @@ +# +# doc/CMakeLists.txt --- CMake input file for gawk +# +# Copyright (C) 2013 +# the Free Software Foundation, Inc. +# +# This file is part of GAWK, the GNU implementation of the +# AWK Programming Language. +# +# GAWK is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GAWK is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +# + +## process this file with CMake to produce Makefile + +MACRO(DocDependency outfile) + add_dependencies(doc ${outfile}) + add_custom_target( + ${outfile} + DEPENDS ${ARGN} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMAND ${CMAKE_SOURCE_DIR}/cmake/docmaker ${outfile} ${ARGN} + ) +ENDMACRO(DocDependency) + +find_program(TEXI2DVI_CONVERTER texi2dvi) +if (TEXI2DVI_CONVERTER) + add_custom_target(doc) + DocDependency(gawk.texi gawktexi.in rflashlight.eps api-figure1.fig api-figure2.fig api-figure3.fig general-program.fig process-flow.fig) + DocDependency(rflashlight.eps) + DocDependency(api-figure1.fig) + DocDependency(api-figure2.fig) + DocDependency(api-figure3.fig) + DocDependency(general-program.fig) + DocDependency(process-flow.fig) + DocDependency(gawk.dvi gawk.texi) + DocDependency(gawk.info gawk.texi) + DocDependency(gawkinet.dvi gawkinet.texi) + DocDependency(gawkinet.info gawkinet.texi) + DocDependency(gawkinet.texi statist.eps) + DocDependency(gawk.1.ps gawk.1) + DocDependency(igawk.1.ps igawk.1) + find_program(DVIPS_CONVERTER dvips) + if (DVIPS_CONVERTER) + DocDependency(gawk.ps gawk.dvi) + DocDependency(gawkinet.ps gawkinet.dvi) + find_program(PS2PDF_CONVERTER ps2pdf) + if (PS2PDF_CONVERTER) + DocDependency(gawk.1.pdf gawk.1.ps) + DocDependency(igawk.1.pdf igawk.1.ps) + DocDependency(gawk.pdf gawk.ps) + DocDependency(gawkinet.pdf gawkinet.ps) + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawk.1.pdf DESTINATION doc) + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/igawk.1.pdf DESTINATION doc) + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawk.info DESTINATION doc) + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawk.pdf DESTINATION doc) + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawkinet.info DESTINATION doc) + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawkinet.pdf DESTINATION doc) + + set(CARDSRC macros cardfonts colors awkcard.tr) + set(CARDSRC_N macros cardfonts no.colors awkcard.tr) + set(CARDFILES ${CARDSRC} ad.block awkcard.in setter.outline) + DocDependency(awkcard.tr awkcard.in) + DocDependency(awkcard.nc ${CARDFILES}) + DocDependency(awkcard.ps ${CARDFILES}) + DocDependency(awkcard.pdf awkcard.ps) + install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/awkcard.pdf DESTINATION doc) + + else() + message(WARNING "Found no ps2pdf tool; no doc will be generated") + install(CODE "MESSAGE(\"doc generated only in .ps files\")") + endif() + else() + message(WARNING "Found no dvips tool; no doc will be generated") + install(CODE "MESSAGE(\"doc generated only in .dvi files and man pages in .ps files\")") + endif() +else() + message(WARNING "Found no texi2dvi tool; no doc will be generated") + add_custom_command( + TARGET doc + COMMAND echo no doc generated because of missing texi2dvi + ) +endif() + diff --git a/doc/ChangeLog b/doc/ChangeLog index bf5f47f2..a8c33d4d 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -9,6 +9,8 @@ dlward134@gmail.com. Added an example of use of rewind(), also per suggestion from David Ward. * gawktexi.in: Update info about Texinfo versions. + * gawktexi.in (Limitations): Fix Heisenberg Physics example and + spelling of Heisenberg's name. Thanks to Hermann Peifer. 2016-02-14 Arnold D. Robbins <arnold@skeeve.com> @@ -18,6 +20,14 @@ Use @sup for superscripts where possible. * texinfo.tex: Updated. +2016-02-05 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi: Document that optimization in now the default, + there are new -s/--no-optimize options and that + pretty-printing and profiling disable optimization. + * gawk.1: Ditto. + * awkcard.in: Ditto. + 2016-02-03 Andrew J. Schorr <aschorr@telemetry-investments.com> * gawktexi.in (Command-Line Options): Change wording of -M description @@ -50,17 +60,39 @@ * ChangeLog: Remove spurious whitespace. + Unrelated: + + * gawk.1: Restore text on PROCINFO["RETRY"] and fix up the + formatting while we're at it. Thanks to Andrew Schorr for + pointing out the problem. + 2016-01-13 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in (Array Sorting Functions): Add an example of using a function name with asort(). Response to bug report Stephane Goujet <stephane.goujet@wanadoo.fr>. +2016-01-06 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Finish documenting that --pretty-print + doesn't run the program. Thanks to Antonio + Giovanni Colombo for the report and patch. + +2016-01-03 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Document that GNU/Linux on Alpha is no + longer supported. + 2015-12-27 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Fix some @c endfile. Thanks to Antonio Giovanni Colombo for the report and patch. +2015-12-20 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Add PROCINFO["NONFATAL"] to the list for PROCINFO. + * gawk.1: Ditto. + 2015-12-18 Arnold D. Robbins <arnold@skeeve.com> * gawk.1: Update description of PROCINFO, and sort it properly. @@ -74,6 +106,11 @@ 2015-11-15 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Minor edits. + * gawk.1: Revise \x to maximum of two digits. + +2015-11-04 Arnold D. Robbins <arnold@skeeve.com> + + * Makefile.am (pdf-local): Remove igawk.1.pdf. Ooops. 2015-10-30 Arnold D. Robbins <arnold@skeeve.com> @@ -86,6 +123,10 @@ * gawk.1: Put commas outside quoting in regexps to avoid confusion. Thanks to Mike Frysinger <vapier@gentoo.org>. +2015-10-16 Arnold D. Robbins <arnold@skeeve.com> + + * awkcard.in: Fix tbl complaint. + 2015-10-07 Arnold D. Robbins <arnold@skeeve.com> * texinfo.tex: Updated to a working version. @@ -117,11 +158,26 @@ * gawktexi.in: Typo fixes in Appendix A. Thanks to Antonio Colombo. +2015-07-30 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Small typo fix; thanks to Antonio Colombo + for noticing. + 2015-07-01 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Update info on Quiktrim awk; thanks to Antonio Colombo for the pointer. +2015-06-30 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in (Limitations): Document that sometimes the + debugger can affect the program being run. + Thanks to Hermann Peifer for the test case. + +2015-06-26 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Update description of values returned by typeof. + 2015-06-19 Arnold D. Robbins <arnold@skeeve.com> * gawkinet.info: Fix an old arnold@gnu.org. @@ -147,6 +203,13 @@ * gawktexi.in: Add another pithy quote from Chet Ramey. Currently commented out. +2015-05-31 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Revised description of default field parsing + for POSIX. Newline is now a separator also. Thanks to + Michael Klement <michael.klement@usa.net> for pointing this out. + * gawk.1: Updated too. + 2015-05-30 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in (Bitwise Functions): Update results of testbits.awk. @@ -171,15 +234,33 @@ * gawktexi.in: Fix description of nextfile within a function. Sigh. +2015-05-15 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * gawktexi.in (Undocumented): Describe the new PROCINFO["argv"] array. + 2015-05-14 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in (Bugs): Add that email should be in plain text and not in HTML. Sigh. +2015-05-11 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Add doc on conversions for strongly typed + regexp variables. + +2015-05-03 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Add initial documentation for strongly typed + regexps and for `typeof'. + 2015-04-29 Arnold D. Robbins <arnold@skeeve.com> * 4.1.2: Release tar ball made. +2015-04-16 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in (Undocumented): More info added. + 2015-04-08 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Update feature history section. @@ -201,6 +282,11 @@ * gawktexi.in: Fix a figure caption. Thanks to Antonio Colombo for pointing this out. + * gawktexi.in: Additional typo fix, also thanks to Antonio. + +2015-04-02 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in, gawk.1, awkcard.in: Name change: div() --> intdiv(). 2015-03-31 Arnold D. Robbins <arnold@skeeve.com> @@ -208,10 +294,17 @@ indirectly. Small additional fix relating to rand(). Thanks to Antonio Colombo. +2015-03-27 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Minor edits. + 2015-03-24 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Minor fixes from Antonio Colombo and new exercise in chapter 16. + * gawk.1: Minor edits. + * gawktexi.in: Edits in material on errno and retryable and get_file + API. 2015-03-17 Andrew J. Schorr <aschorr@telemetry-investments.com> @@ -225,6 +318,12 @@ Thanks to Nicholas Mills <nlmills@clemson.edu> for pointing out the issue. +2015-03-08 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Briefly describe that nonfatal I/O overrides + GAWK_SOCK_RETRIES, in the env var part and in the nonfatal I/O + part. + 2015-03-01 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Change quotes to @dfn for pseudorandom. @@ -281,6 +380,7 @@ 2015-02-08 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: O'Reilly fixes. + Make non-fatal i/o use "NONFATAL". 2015-02-06 Arnold D. Robbins <arnold@skeeve.com> @@ -289,6 +389,7 @@ 2015-02-04 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: O'Reilly fixes. + * gawktexi.in: Update various version-related bits of info. 2015-02-02 Arnold D. Robbins <arnold@skeeve.com> @@ -318,7 +419,7 @@ 2015-01-25 Arnold D. Robbins <arnold@skeeve.com> - * gawktexi.in: Fix a bad URL. + * gawktexi.in: Fix a bad URL. And another one. More O'Reilly fixes. 2015-01-23 Arnold D. Robbins <arnold@skeeve.com> @@ -340,12 +441,40 @@ * gawkinet.texi: Fix capitalization in document title. * gawktexi.in: Here we go again: Starting on more O'Reilly fixes. +2014-12-27 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Add info that nonfatal I/O works with stdout and + stderr. Revise version info and what was added when. + +2015-01-05 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * gawktexi.in: Improve get_file documentation. + +2015-01-05 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * gawktexi.in: Replace "Retrying I/O" with "Retrying Input", since this + feature pertains to input, not output. + +2015-01-04 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * gawktexi.in: Document the get_file API function. + +2015-01-04 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * gawk.1: Document new features PROCINFO["errno"] and + PROCINFO["input", "RETRY"], and new getline return value of -2. + * gawktexi.in: Ditto. + 2014-12-26 Antonio Giovanni Colombo <azc100@gmail.com> * gawktexi.in (Glossary): Really sort the items. 2014-12-24 Arnold D. Robbins <arnold@skeeve.com> + * gawktexi.in: Start documenting nonfatal output. + +2014-12-24 Arnold D. Robbins <arnold@skeeve.com> + * gawktexi.in: Add one more paragraph to new foreword. * gawktexi.in: Fix exponentiation in TeX mode. Thanks to Marco Curreli by way of Antonio Giovanni Colombo. @@ -377,6 +506,11 @@ * gawktexi.in: Various minor fixes and updates. +2014-11-23 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Update that TZ env. var can influnce mktime + in running program. Thanks to Hermann Peifer. + 2014-11-19 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Update that RFC 4180 documents CSV data. @@ -390,6 +524,11 @@ * gawktexi.in: Comment out that I need an owner for awk.info. I may have found one or two people. +2014-10-29 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * gawktexi.in: Document new extras directory containing shell startup + files to manipulate AWKPATH and AWKLIBPATH environment variables. + 2014-10-28 Arnold D. Robbins <arnold@skeeve.com> * gawk.1: Clarification that debugger reads stdin. @@ -403,6 +542,7 @@ 2014-10-25 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Minor typo fixes. + Fix discussion of \x, per note from Antonio Colombo. 2014-10-17 Arnold D. Robbins <arnold@skeeve.com> @@ -444,10 +584,25 @@ * gawktexi.in: Pretty much done! + Unrelated: + + * gawktexi.in: Fix braino in awk version of div function. + Thanks to Katie Wasserman for the catch. + 2014-10-01 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: More fixes after reading through the MS. + Unrelated: + + * gawktexi.in: Add Katie Wasserman's program to compute + the digits of PI. + + Unrelated: + + * gawktexi.in: Document the differences between profiling + and pretty printing. + 2014-09-30 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: More fixes after reading through the MS. @@ -543,6 +698,10 @@ exercises. Remove use of LC_ALL in an example; doesn't seem to be needed anymore. + Unrelated: + + * gawktexi.in: Document that MirBSD is no longer supported. + 2014-08-25 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Exercises are excluded from print edition. @@ -576,6 +735,10 @@ * gawktexi.in: Starting on reviewer comments. Update acknowledgements. +2014-08-12 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Cause div.awk to get into the example files. + 2014-08-06 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Misc minor additions. @@ -590,6 +753,18 @@ * gawktexi.in: Fix doc for API get_record - errcode needs to be greater than zero. +2014-07-24 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in (Numeric Functions): For `div()', clarify + truncation is towards zero. Thanks to Michal Jaegermann + for pointing out the need to clarify this. + +2014-07-10 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in (Numeric Functions): Document new `div()' function. + (Arbitrary Precision Integers): Document raison d'etre for div(). + * gawk.1, awkcard.in: Document `div()'. + 2014-07-04 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in (Bracket Expressions): Add a note about how to @@ -600,6 +775,11 @@ * gawktexi.in: Update permissions on copyright page per latest maintain.texi. Add GPL to print version of book. +2014-06-24 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Document that --pretty-print no longer runs the + program. Remove mention of GAWK_NO_PP_RUN env var. + 2014-06-22 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Typo fixes and minor corrections. @@ -896,7 +1076,7 @@ 2013-12-26 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: More minor additions / fixes. - (Bugs): Add John Malmberg for VMS. + (Bugs): Add John Malmberg for VMS. Other minor edits. 2013-12-25 Arnold D. Robbins <arnold@skeeve.com> @@ -970,6 +1150,11 @@ * gawktexi.in (FN, FFN, DF,DDF, PVERSION, CTL): Remove macros. They have no alternate versions and are just in the way. +2013-08-15 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.1: Document that ENVIRON updates affect the environment. + * gawktexi.in: Ditto. + 2013-06-27 Arnold D. Robbins <arnold@skeeve.com> * texinfo.tex: Update from Karl, fixes a formating problem. diff --git a/doc/Makefile.am b/doc/Makefile.am index 5c585f1c..5eefaadf 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -26,7 +26,7 @@ info_TEXINFOS = gawk.texi gawkinet.texi -man_MANS = gawk.1 igawk.1 +man_MANS = gawk.1 EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \ awkcard.in awkforai.txt texinfo.tex cardfonts \ @@ -50,7 +50,7 @@ EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \ bc_notes # Get rid of generated files when cleaning -CLEANFILES = *.ps *.html *.dvi *~ awkcard.nc awkcard.tr gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf igawk.1.pdf +CLEANFILES = *.ps *.html *.dvi *~ awkcard.nc awkcard.tr gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf MAKEINFO = @MAKEINFO@ --no-split --force @@ -76,9 +76,9 @@ AWKCARD = awkcard.ps gawk.texi: $(srcdir)/gawktexi.in $(srcdir)/sidebar.awk awk -f $(srcdir)/sidebar.awk < $(srcdir)/gawktexi.in > gawk.texi -postscript: gawk.ps gawkinet.ps gawk.1.ps igawk.1.ps $(AWKCARD) +postscript: gawk.ps gawkinet.ps gawk.1.ps $(AWKCARD) -pdf-local: postscript gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf igawk.1.pdf +pdf-local: postscript gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf gawk.ps: gawk.dvi TEXINPUTS=$(srcdir): dvips -o gawk.ps gawk.dvi @@ -92,12 +92,6 @@ gawk.1.ps: gawk.1 gawk.1.pdf: gawk.1.ps ps2pdf gawk.1.ps gawk.1.pdf -igawk.1.ps: igawk.1 - -groff -man $(srcdir)/igawk.1 > igawk.1.ps - -igawk.1.pdf: igawk.1.ps - ps2pdf igawk.1.ps igawk.1.pdf - awkcard.tr: awkcard.in sed 's:SRCDIR:$(srcdir):' < $(srcdir)/awkcard.in > awkcard.tr diff --git a/doc/Makefile.in b/doc/Makefile.in index 577b4778..2fa1fac0 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -116,14 +116,14 @@ ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/arch.m4 \ $(top_srcdir)/m4/codeset.m4 $(top_srcdir)/m4/gettext.m4 \ $(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/intlmacosx.m4 \ - $(top_srcdir)/m4/isc-posix.m4 $(top_srcdir)/m4/lcmessage.m4 \ - $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \ - $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libsigsegv.m4 \ - $(top_srcdir)/m4/longlong.m4 $(top_srcdir)/m4/mpfr.m4 \ - $(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/noreturn.m4 \ - $(top_srcdir)/m4/po.m4 $(top_srcdir)/m4/progtest.m4 \ - $(top_srcdir)/m4/readline.m4 $(top_srcdir)/m4/socket.m4 \ - $(top_srcdir)/m4/ulonglong.m4 $(top_srcdir)/configure.ac + $(top_srcdir)/m4/lcmessage.m4 $(top_srcdir)/m4/lib-ld.m4 \ + $(top_srcdir)/m4/lib-link.m4 $(top_srcdir)/m4/lib-prefix.m4 \ + $(top_srcdir)/m4/libsigsegv.m4 $(top_srcdir)/m4/longlong.m4 \ + $(top_srcdir)/m4/mpfr.m4 $(top_srcdir)/m4/nls.m4 \ + $(top_srcdir)/m4/noreturn.m4 $(top_srcdir)/m4/po.m4 \ + $(top_srcdir)/m4/progtest.m4 $(top_srcdir)/m4/readline.m4 \ + $(top_srcdir)/m4/socket.m4 $(top_srcdir)/m4/ulonglong.m4 \ + $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) @@ -353,7 +353,7 @@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ info_TEXINFOS = gawk.texi gawkinet.texi -man_MANS = gawk.1 igawk.1 +man_MANS = gawk.1 EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \ awkcard.in awkforai.txt texinfo.tex cardfonts \ api-figure1.eps api-figure1.fig api-figure1.pdf \ @@ -377,7 +377,7 @@ EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \ # Get rid of generated files when cleaning -CLEANFILES = *.ps *.html *.dvi *~ awkcard.nc awkcard.tr gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf igawk.1.pdf +CLEANFILES = *.ps *.html *.dvi *~ awkcard.nc awkcard.tr gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf TROFF = groff -t -Tps -U SEDME = sed -e "s/^level0 restore/level0 restore flashme 100 72 moveto (Copyright `date '+%m-%d-%y %T'`, FSF, Inc. (all)) show/" \ -e "s/^\/level0 save def/\/level0 save def 30 -48 translate/" @@ -881,9 +881,9 @@ uninstall-man: uninstall-man1 gawk.texi: $(srcdir)/gawktexi.in $(srcdir)/sidebar.awk awk -f $(srcdir)/sidebar.awk < $(srcdir)/gawktexi.in > gawk.texi -postscript: gawk.ps gawkinet.ps gawk.1.ps igawk.1.ps $(AWKCARD) +postscript: gawk.ps gawkinet.ps gawk.1.ps $(AWKCARD) -pdf-local: postscript gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf igawk.1.pdf +pdf-local: postscript gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf gawk.ps: gawk.dvi TEXINPUTS=$(srcdir): dvips -o gawk.ps gawk.dvi @@ -897,12 +897,6 @@ gawk.1.ps: gawk.1 gawk.1.pdf: gawk.1.ps ps2pdf gawk.1.ps gawk.1.pdf -igawk.1.ps: igawk.1 - -groff -man $(srcdir)/igawk.1 > igawk.1.ps - -igawk.1.pdf: igawk.1.ps - ps2pdf igawk.1.ps igawk.1.pdf - awkcard.tr: awkcard.in sed 's:SRCDIR:$(srcdir):' < $(srcdir)/awkcard.in > awkcard.tr diff --git a/doc/awkcard.in b/doc/awkcard.in index 05c03502..06085343 100644 --- a/doc/awkcard.in +++ b/doc/awkcard.in @@ -244,7 +244,7 @@ Overridden by \*(FC\-\^\-posix\*(FR. .TI "\*(FC\-c\*(FR, \*(FC\-\^\-traditional\*(FR Disable \*(GK-specific extensions. .TI "\*(FC\-C\*(FR, \*(FC\-\^\-copyright\*(FR -Print the short version of the GNU +Print the short GNU copyright information on \*(FCstdout\*(FR. .TI "\*(FC\-d\*(FR[\*(FIfile\*(FR], \*(FC\-\^\-dump-variables\*(FR[\*(FC=\*(FIfile\*(FR] Print a sorted list of global variables, @@ -292,7 +292,7 @@ Force use of the locale's decimal point character when parsing input data. Output a pretty printed version of the program to \*(FIfile\*(FR (default: \*(FCawkprof.out\*(FR). .TI "\*(FC\-O\*(FR, \*(FC\-\^\-optimize\*(FR -Enable some internal optimizations. +Enable internal optimizations (default is on). .TI "\*(FC\-p\*(FR[\*(FC\*(FIfile\*(FR], \*(FC\-\^\-profile\*(FR[\*(FC=\*(FIfile\*(FR] Send profiling data to \*(FIfile\*(FR (default: \*(FCawkprof.out\*(FR). @@ -300,6 +300,9 @@ The profile contains execution counts in the left margin of each statement in the program. .TI "\*(FC\-P\*(FR, \*(FC\-\^\-posix\*(FR Disable common and GNU extensions.\*(CB +.TI "\*(FC\-r\*(FR, \*(FC\-\^\-re\-interval\*(FR +Enable \*(FIinterval expressions\*(FR. +(Needed with \*(FC\-c\*(FR.) .in -4n .EB "\s+2\f(HBCOMMAND LINE ARGUMENTS (\*(GK\f(HB)\*(FR\s0" @@ -311,9 +314,8 @@ Disable common and GNU extensions.\*(CB .ES .fi .in +4n -.TI "\*(FC\-r\*(FR, \*(FC\-\^\-re\-interval\*(FR -Enable \*(FIinterval expressions\*(FR. -(Needed with \*(FC\-c\*(FR.) +.TI "\*(FC\-s\*(FR, \*(FC\-\^\-no\-optimize\*(FR +Disable internal optimizations. .TI "\*(FC\-S\*(FR, \*(FC\-\^\-sandbox\*(FR Disable the \*(FCsystem()\*(FR function, input redirection with \*(FCgetline\*(FR, @@ -1606,11 +1608,14 @@ may be used in place of .fi .TS expand; -l lw(2i). +l lw(1.9i). \*(CD\*(FCatan2(\*(FIy\*(FC, \*(FIx\*(FC)\*(FR The arctangent of \*(FIy/x\fP in radians. \*(FCcos(\*(FIexpr\*(FC)\*(FR The cosine of \*(FIexpr\fP, which is in radians. \*(FCexp(\*(FIexpr\*(FC)\*(FR The exponential function (\*(FIe \*(FC^ \*(FIx\*(FR). \*(FCint(\*(FIexpr\*(FC)\*(FR Truncate to integer. +\*(CB\*(FCintdiv(\*(FIn\*(FR\*(FC,\*(FI d\*(FR\*(FC,\*(FI r\*(FR\*(FC)\*(FR T{ +Return result of integer division in \*(FIr\*(FR.\*(CD +T} \*(FClog(\*(FIexpr\*(FC)\*(FR The natural logarithm function (base \*(FIe\^\*(FR). \*(FCrand()\fP A random number \*(FIN\fP such that 0 \(<= \*(FIN\fP < 1. \*(FCsin(\*(FIexpr\*(FC)\*(FR The sine of \*(FIexpr\fP, which is in radians. @@ -13,7 +13,7 @@ . if \w'\(rq' .ds rq "\(rq . \} .\} -.TH GAWK 1 "Dec 17 2015" "Free Software Foundation" "Utility Commands" +.TH GAWK 1 "Feb 04 2016" "Free Software Foundation" "Utility Commands" .SH NAME gawk \- pattern scanning and processing language .SH SYNOPSIS @@ -405,17 +405,20 @@ is provided, uses a file named .B awkprof.out in the current directory. +Implies +.BR \-\^\-no\-optimize . .TP .PD 0 .B \-O .TP .PD .B \-\^\-optimize -Enable optimizations upon the internal representation of the program. +Enable +.IR gawk 's +default optimizations upon the internal representation of the program. Currently, this includes simple constant-folding, and tail call -elimination for recursive functions. The -.I gawk -maintainer hopes to add additional optimizations over time. +elimination for recursive functions. +This option is on by default. .TP .PD 0 \fB\-p\fR[\fIprof-file\fR] @@ -428,6 +431,8 @@ The default is .BR awkprof.out . The profile contains execution counts of each statement in the program in the left margin and function call counts for each user-defined function. +Implies +.BR \-\^\-no\-optimize . .TP .PD 0 .B \-P @@ -444,11 +449,6 @@ mode, with the following additional restrictions: escape sequences are not recognized. .TP \(bu -Only space and tab act as field separators when -.B FS -is set to a single space, newline does not. -.TP -\(bu You cannot continue lines after .B ? and @@ -493,6 +493,15 @@ They are enabled by default, but this option remains for use with .BR \-\^\-traditional . .TP .PD 0 +.B \-s +.TP +.PD +.B \-\^\-no\-optimize +Disable +.IR gawk 's +default optimizations upon the internal representation of the program. +.TP +.PD 0 .BI \-S .TP .PD @@ -785,9 +794,6 @@ In the special case that .B FS is a single space, fields are separated by runs of spaces and/or tabs and/or newlines. -(But see the section -.BR "POSIX COMPATIBILITY" , -below). .BR NOTE : The value of .B IGNORECASE @@ -918,11 +924,17 @@ An array containing the values of the current environment. The array is indexed by the environment variables, each element being the value of that variable (e.g., \fBENVIRON["HOME"]\fP might be \fB"/home/arnold"\fR). -Changing this array does not affect the environment seen by programs which +.sp +In POSIX mode, +changing this array does not affect the environment seen by programs which .I gawk spawns via redirection or the .B system() function. +Otherwise, +.I gawk +updates its real environment so that programs it spawns see +the changes. .TP .B ERRNO If a system error occurs either doing a redirection for @@ -936,6 +948,15 @@ then will contain a string describing the error. The value is subject to translation in non-English locales. +If the string in +.B ERRNO +corresponds to a system error in the +.IR errno (3) +variable, then the numeric value can be found in +.B PROCINFO["errno"]. +For non-system errors, +.B PROCINFO["errno"] +will be zero. .TP .B FIELDWIDTHS A whitespace separated list of field widths. When set, @@ -1093,6 +1114,13 @@ The value of the .IR getegid (2) system call. .TP +\fBPROCINFO["errno"]\fP +The value of +.IR errno (3) +when +.BR ERRNO +is set to the associated error message. +.TP \fBPROCINFO["euid"]\fP The value of the .IR geteuid (2) @@ -1124,7 +1152,7 @@ knows about the identifiers after it has finished parsing the program; they are updated while the program runs. For each identifier, the value of the element is one of the following: .RS -.TP +.TP \w'\fB"extension"\fR'u+1n \fB"array"\fR The identifier is an array. .TP @@ -1207,6 +1235,14 @@ change .IR gawk 's behavior: .TP +\fBPROCINFO["NONFATAL"]\fR +If this exists, then I/O errors for all output redirections become nonfatal. +.TP +\fBPROCINFO["\fIoutput_command\fB", "NONFATAL"]\fR +Make output errors for +.I output_name +be nonfatal. +.TP \fBPROCINFO["\fIcommand\fB", "pty"]\fR Use a pseudo-tty for two-way communication with .I command @@ -1220,6 +1256,23 @@ where is a redirection string or a filename. A value of zero or less than zero means no timeout. .TP +\fBPROCINFO["\fIinput\^\fB", "RETRY"]\fR +If an I/O error that may be retried occurs when reading data from +.IR input , +and this array entry exists, then +.B getline +will return \-2 instead of following the default behavior of returning \-1 +and configuring +.IR input +to return no further data. +An I/O error that may be retried is one where +.IR errno (3) +has the value EAGAIN, EWOULDBLOCK, EINTR, or ETIMEDOUT. +This may be useful in conjunction with +\fBPROCINFO["\fIinput\^\fB", "READ_TIMEOUT"]\fR +or situations where a file descriptor has been configured to behave in a +non-blocking fashion. +.TP \fBPROCINFO["sorted_in"]\fP If this element exists in .BR PROCINFO , @@ -1240,7 +1293,9 @@ Supported values are \fB"@val_num_desc"\fR, and \fB"@unsorted"\fR. -The value can also be the name of any comparison function defined +The value can also be the name (as a +.IR string ) +of any comparison function defined as follows: .sp .in +5m @@ -1544,9 +1599,9 @@ Vertical tab. The character represented by the string of hexadecimal digits following the .BR \ex . -As in ISO C, all following hexadecimal digits are considered part of +Up to two +following hexadecimal digits are considered part of the escape sequence. -(This feature should tell us something about language design by committee.) E.g., \fB"\ex1B"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character. .TP .BI \e ddd @@ -2290,6 +2345,13 @@ below.) The .B getline command returns 1 on success, 0 on end of file, and \-1 on an error. +If the +.IR errno (3) +value indicates that the I/O operation may be retried, +and \fBPROCINFO["\fIinput\^\fP", "RETRY"]\fR +is set, then \-2 will be returned instead of \-1, and further calls to +.B getline +may be attempted. Upon an error, .B ERRNO is set to a string describing the problem. @@ -2642,6 +2704,23 @@ The exponential function. .BI int( expr ) Truncate to integer. .TP +.BI intdiv( num ", " denom ", " result ) +Truncate +.I num +and +.I denom +to integers. Return the quotient of +.I num +divided by +.I denom +in \fIresult\fB["quotient"]\fR +and the remainder in +in \fIresult\fB["remainder"]\fR. +This is a +.I gawk +extension, primarily of value when working with +arbitrarily large integers. +.TP .BI log( expr ) The natural logarithm function. .TP @@ -3986,7 +4065,7 @@ We thank him. .SH COPYING PERMISSIONS Copyright \(co 1989, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2001, 2002, 2003, 2004, 2005, 2007, 2009, -2010, 2011, 2012, 2013, 2014 +2010, 2011, 2012, 2013, 2014, 2015, 2016 Free Software Foundation, Inc. .PP Permission is granted to make and distribute verbatim copies of diff --git a/doc/gawk.info b/doc/gawk.info index b5b45471..cf6a1356 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -175,6 +175,7 @@ in (a) below. A copy of the license is included in the section entitled * Computed Regexps:: Using Dynamic Regexps. * GNU Regexp Operators:: Operators specific to GNU software. * Case-sensitivity:: How to do case-insensitive matching. +* Strong Regexp Constants:: Strongly typed regexp constants. * Regexp Summary:: Regular expressions summary. * Records:: Controlling how data is split into records. @@ -217,6 +218,7 @@ in (a) below. A copy of the license is included in the section entitled 'getline'. * Getline Summary:: Summary of 'getline' Variants. * Read Timeout:: Reading input with a timeout. +* Retrying Input:: Retrying input after certain errors. * Command-line directories:: What happens if you put a directory on the command line. * Input Summary:: Input summary. @@ -246,6 +248,7 @@ in (a) below. A copy of the license is included in the section entitled * Special Caveats:: Things to watch out for. * Close Files And Pipes:: Closing Input and Output Files and Pipes. +* Nonfatal:: Enabling Nonfatal Output. * Output Summary:: Output summary. * Output Exercises:: Exercises. * Values:: Constants, Variables, and Regular @@ -557,6 +560,7 @@ in (a) below. A copy of the license is included in the section entitled * Array Functions:: Functions for working with arrays. * Flattening Arrays:: How to flatten arrays. * Creating Arrays:: How to create and populate arrays. +* Redirection API:: How to access and manipulate redirections. * Extension API Variables:: Variables provided by the API. * Extension Versioning:: API Version information. * Extension API Informational Variables:: Variables providing information about @@ -615,6 +619,7 @@ in (a) below. A copy of the license is included in the section entitled * Unix Installation:: Installing 'gawk' under various versions of Unix. * Quick Installation:: Compiling 'gawk' under Unix. +* Shell Startup Files:: Shell convenience functions. * Additional Configuration Options:: Other compile-time options. * Configuration Philosophy:: How it's all supposed to work. * Non-Unix Installation:: Installation on Other Operating @@ -2625,30 +2630,33 @@ The following list describes options mandated by the POSIX standard: '-o'[FILE] '--pretty-print'['='FILE] - Enable pretty-printing of 'awk' programs. By default, the output - program is created in a file named 'awkprof.out' (*note - Profiling::). The optional FILE argument allows you to specify a - different file name for the output. No space is allowed between - the '-o' and FILE, if FILE is supplied. + Enable pretty-printing of 'awk' programs. Implies '--no-optimize'. + By default, the output program is created in a file named + 'awkprof.out' (*note Profiling::). The optional FILE argument + allows you to specify a different file name for the output. No + space is allowed between the '-o' and FILE, if FILE is supplied. - NOTE: Due to the way 'gawk' has evolved, with this option your - program still executes. This will change in the next major - release, such that 'gawk' will only pretty-print the program - and not run it. + NOTE: In the past, this option would also execute your + program. This is no longer the case. '-O' '--optimize' - Enable some optimizations on the internal representation of the - program. At the moment, this includes just simple constant - folding. + Enable 'gawk''s default optimizations on the internal + representation of the program. At the moment, this includes simple + constant folding and tail recursion elimination in function calls. + + These optimizations are enabled by default. This option remains + primarily for backwards compatibilty. However, it may be used to + cancel the effect of an earlier '-s' option (see later in this + list). '-p'[FILE] '--profile'['='FILE] - Enable profiling of 'awk' programs (*note Profiling::). By - default, profiles are created in a file named 'awkprof.out'. The - optional FILE argument allows you to specify a different file name - for the profile file. No space is allowed between the '-p' and - FILE, if FILE is supplied. + Enable profiling of 'awk' programs (*note Profiling::). Implies + '--no-optimize'. By default, profiles are created in a file named + 'awkprof.out'. The optional FILE argument allows you to specify a + different file name for the profile file. No space is allowed + between the '-p' and FILE, if FILE is supplied. The profile contains execution counts for each statement in the program in the left margin, and function call counts for each @@ -2662,9 +2670,6 @@ The following list describes options mandated by the POSIX standard: in 'gawk' that are disabled by this option. Also, the following additional restrictions apply: - * Newlines do not act as whitespace to separate fields when 'FS' - is equal to a single space (*note Fields::). - * Newlines are not allowed after '?' or ':' (*note Conditional Exp::). @@ -2685,6 +2690,11 @@ The following list describes options mandated by the POSIX standard: remains (both for backward compatibility and for use in combination with '--traditional'). +'-s' +'--no-optimize' + Disable 'gawk''s default optimizations on the internal + representation of the program. + '-S' '--sandbox' Disable the 'system()' function, input redirections with 'getline', @@ -2933,6 +2943,9 @@ Since '.' is included at the beginning, 'gawk' searches first in the current directory and then in '/usr/local/share/awk'. In practice, this means that you will rarely need to change the value of 'AWKPATH'. + *Note Shell Startup Files::, for information on functions that help +to manipulate the 'AWKPATH' variable. + 'gawk' places the value of the search path that it used into 'ENVIRON["AWKPATH"]'. This provides access to the actual search path value from within an 'awk' program. @@ -2972,6 +2985,9 @@ empty value, 'gawk' uses a default path; this is typically '/usr/local/lib/gawk', although it can vary depending upon how 'gawk' was built. + *Note Shell Startup Files::, for information on functions that help +to manipulate the 'AWKLIBPATH' variable. + 'gawk' places the value of the search path that it used into 'ENVIRON["AWKLIBPATH"]'. This provides access to the actual search path value from within an 'awk' program. @@ -2998,7 +3014,8 @@ used by regular users: 'GAWK_SOCK_RETRIES' Controls the number of times 'gawk' attempts to retry a two-way TCP/IP (socket) connection before giving up. *Note TCP/IP - Networking::. + Networking::. Note that when nonfatal I/O is enabled (*note + Nonfatal::), 'gawk' only tries to open a TCP/IP socket once. 'POSIXLY_CORRECT' Causes 'gawk' to switch to POSIX-compatibility mode, disabling all @@ -3043,13 +3060,6 @@ change. The variables are: supposed to be differences, but occasionally theory and practice don't coordinate with each other.) -'GAWK_NO_PP_RUN' - When 'gawk' is invoked with the '--pretty-print' option, it will - not run the program if this environment variable exists. - - CAUTION: This variable will not survive into the next major - release. - 'GAWK_STACKSIZE' This specifies the amount by which 'gawk' should grow its internal evaluation stack, when needed. @@ -3314,6 +3324,7 @@ you specify more complicated classes of strings. * Computed Regexps:: Using Dynamic Regexps. * GNU Regexp Operators:: Operators specific to GNU software. * Case-sensitivity:: How to do case-insensitive matching. +* Strong Regexp Constants:: Strongly typed regexp constants. * Regexp Summary:: Regular expressions summary. @@ -3440,15 +3451,18 @@ sequences apply to both string constants and regexp constants: '\xHH...' The hexadecimal value HH, where HH stands for a sequence of - hexadecimal digits ('0'-'9', and either 'A'-'F' or 'a'-'f'). Like - the same construct in ISO C, the escape sequence continues until - the first nonhexadecimal digit is seen. (c.e.) However, using - more than two hexadecimal digits produces undefined results. (The - '\x' escape sequence is not allowed in POSIX 'awk'.) - - CAUTION: The next major release of 'gawk' will change, such - that a maximum of two hexadecimal digits following the '\x' - will be used. + hexadecimal digits ('0'-'9', and either 'A'-'F' or 'a'-'f'). A + maximum of two digts are allowed after the '\x'. Any further + hexadecimal digits are treated as simple letters or numbers. + (c.e.) (The '\x' escape sequence is not allowed in POSIX awk.) + + CAUTION: In ISO C, the escape sequence continues until the + first nonhexadecimal digit is seen. For many years, 'gawk' + would continue incorporating hexadecimal digits into the value + until a non-hexadecimal digit or the end of the string was + encountered. However, using more than two hexadecimal digits + produced undefined results. As of version 4.2, only two + digits are processed. '\/' A literal slash (necessary for regexp constants only). This @@ -4025,7 +4039,7 @@ No options default. -File: gawk.info, Node: Case-sensitivity, Next: Regexp Summary, Prev: GNU Regexp Operators, Up: Regexp +File: gawk.info, Node: Case-sensitivity, Next: Strong Regexp Constants, Prev: GNU Regexp Operators, Up: Regexp 3.8 Case Sensitivity in Matching ================================ @@ -4099,10 +4113,77 @@ and we don't recommend it. that 'gawk' does the right thing. -File: gawk.info, Node: Regexp Summary, Prev: Case-sensitivity, Up: Regexp +File: gawk.info, Node: Strong Regexp Constants, Next: Regexp Summary, Prev: Case-sensitivity, Up: Regexp -3.9 Summary -=========== +3.9 Strongly Typed Regexp Constants +=================================== + +This minor node describes a 'gawk'-specific feature. + + Regexp constants ('/.../') hold a strange position in the 'awk' +language. In most contexts, they act like an expression: '$0 ~ /.../'. +In other contexts, they denote only a regexp to be matched. In no case +are they really a "first class citizen" of the language. That is, you +cannot define a scalar variable whose type is "regexp" in the same sense +that you can define a variable to be a number or a string: + + num = 42 Numeric variable + str = "hi" String variable + re = /foo/ Wrong! re is the result of $0 ~ /foo/ + + For a number of more advanced use cases (described later on in this +Info file), it would be nice to have regexp constants that are "strongly +typed"; in other words, that denote a regexp useful for matching, and +not an expression. + + 'gawk' provides this feature. A strongly typed regexp constant looks +almost like a regular regexp constant, except that it is preceded by an +'@' sign: + + re = @/foo/ Regexp variable + + Strongly typed regexp constants _cannot_ be used eveywhere that a +regular regexp constant can, because this would make the language even +more confusing. Instead, you may use them only in certain contexts: + + * On the righthand side of the '~' and '!~' operators: 'some_var ~ + @/foo/' (*note Regexp Usage::). + + * In the 'case' part of a 'switch' statement (*note Switch + Statement::). + + * As an argument to one of the built-in functions that accept regexp + constants: 'gensub()', 'gsub()', 'match()', 'patsplit()', + 'split()', and 'sub()' (*note String Functions::). + + * As a parameter in a call to a user-defined function (*note + User-defined::). + + * On the righthand side of an assignment to a variable: 'some_var = + @/foo/'. In this case, the type of 'some_var' is regexp. + Additionally, 'some_var' can be used with '~' and '!~', passed to + one of the built-in functions listed above, or passed as a + parameter to a user-defined function. + + You may use the 'typeof()' built-in function (*note Type Functions::) +to determine if a variable or function parameter is a regexp variable. + + The true power of this feature comes from the ability to create +variables that have regexp type. Such variables can be passed on to +user-defined functions, without the confusing aspects of computed +regular expressions created from strings or string constants. They may +also be passed through indirect function calls (*note Indirect Calls::) +onto the built-in functions that accept regexp constants. + + When used in numeric conversions, strongly typed regexp variables +convert to zero. When used in string conversions, they convert to the +string value of the original regexp text. + + +File: gawk.info, Node: Regexp Summary, Prev: Strong Regexp Constants, Up: Regexp + +3.10 Summary +============ * Regular expressions describe sets of strings to be matched. In 'awk', regular expression constants are written enclosed between @@ -4135,6 +4216,9 @@ File: gawk.info, Node: Regexp Summary, Prev: Case-sensitivity, Up: Regexp sensitivity of regexp matching. In other 'awk' versions, use 'tolower()' or 'toupper()'. + * Strongly typed regexp constants ('@/.../') enable certain advanced + use cases to be described later on in the Info file. + File: gawk.info, Node: Reading Files, Next: Printing, Prev: Regexp, Up: Top @@ -4173,6 +4257,7 @@ be named on the 'awk' command line (*note Getline::). * Getline:: Reading files under explicit program control using the 'getline' function. * Read Timeout:: Reading input with a timeout. +* Retrying Input:: Retrying input after certain errors. * Command-line directories:: What happens if you put a directory on the command line. * Input Summary:: Input summary. @@ -4434,7 +4519,7 @@ When 'awk' reads an input record, the record is automatically "parsed" or separated by the 'awk' utility into chunks called "fields". By default, fields are separated by "whitespace", like words in a line. Whitespace in 'awk' means any string of one or more spaces, TABs, or -newlines;(1) other characters that are considered whitespace by other +newlines; other characters that are considered whitespace by other languages (such as formfeed, vertical tab, etc.) are _not_ considered whitespace by 'awk'. @@ -4486,11 +4571,6 @@ record: -| Julie F -| Samuel A - ---------- Footnotes ---------- - - (1) In POSIX 'awk', newlines are not considered whitespace for -separating fields. - File: gawk.info, Node: Nonconstant Fields, Next: Changing Fields, Prev: Fields, Up: Reading Files @@ -5436,6 +5516,11 @@ record, such as a file that cannot be opened, then 'getline' returns -1. In this case, 'gawk' sets the variable 'ERRNO' to a string describing the error that occurred. + If 'ERRNO' indicates that the I/O operation may be retried, and +'PROCINFO["INPUT", "RETRY"]' is set, then 'getline' returns -2 instead +of -1, and further calls to 'getline' may be attemped. *Note Retrying +Input:: for further information about this feature. + In the following examples, COMMAND stands for a string value that represents a shell command. @@ -5871,7 +5956,7 @@ VAR Table 4.1: 'getline' variants and what they set -File: gawk.info, Node: Read Timeout, Next: Command-line directories, Prev: Getline, Up: Reading Files +File: gawk.info, Node: Read Timeout, Next: Retrying Input, Prev: Getline, Up: Reading Files 4.10 Reading Input with a Timeout ================================= @@ -5950,7 +6035,8 @@ per-command or per-connection basis. 'gawk' considers a timeout event to be an error even though the attempt to read from the underlying device may succeed in a later attempt. This is a limitation, and it also means that you cannot use -this to multiplex input from two or more sources. +this to multiplex input from two or more sources. *Note Retrying +Input:: for a way to enable later I/O attempts to succeed. Assigning a timeout value prevents read operations from blocking indefinitely. But bear in mind that there are other ways 'gawk' can @@ -5964,9 +6050,36 @@ can block indefinitely until some other process opens it for writing. (1) This assumes that standard input is the keyboard. -File: gawk.info, Node: Command-line directories, Next: Input Summary, Prev: Read Timeout, Up: Reading Files +File: gawk.info, Node: Retrying Input, Next: Command-line directories, Prev: Read Timeout, Up: Reading Files + +4.11 Retrying Reads After Certain Input Errors +============================================== + +This minor node describes a feature that is specific to 'gawk'. + + When 'gawk' encounters an error while reading input, by default +'getline' returns -1, and subsequent attempts to read from that file +result in an end-of-file indication. However, you may optionally +instruct 'gawk' to allow I/O to be retried when certain errors are +encountered by setting a special element in the 'PROCINFO' array (*note +Auto-set::): + + PROCINFO["INPUT_NAME", "RETRY"] = 1 + + When this element exists, 'gawk' checks the value of the system (C +language) 'errno' variable when an I/O error occurs. If 'errno' +indicates a subsequent I/O attempt may succeed, 'getline' instead +returns -2 and further calls to 'getline' may succeed. This applies to +the 'errno' values 'EAGAIN', 'EWOULDBLOCK', 'EINTR', or 'ETIMEDOUT'. + + This feature is useful in conjunction with 'PROCINFO["INPUT_NAME", +"READ_TIMEOUT"]' or situations where a file descriptor has been +configured to behave in a non-blocking fashion. + + +File: gawk.info, Node: Command-line directories, Next: Input Summary, Prev: Retrying Input, Up: Reading Files -4.11 Directories on the Command Line +4.12 Directories on the Command Line ==================================== According to the POSIX standard, files named on the 'awk' command line @@ -5989,7 +6102,7 @@ usable data from an 'awk' program. File: gawk.info, Node: Input Summary, Next: Input Exercises, Prev: Command-line directories, Up: Reading Files -4.12 Summary +4.13 Summary ============ * Input is split into records based on the value of 'RS'. The @@ -6061,7 +6174,7 @@ File: gawk.info, Node: Input Summary, Next: Input Exercises, Prev: Command-li File: gawk.info, Node: Input Exercises, Prev: Input Summary, Up: Reading Files -4.13 Exercises +4.14 Exercises ============== 1. Using the 'FIELDWIDTHS' variable (*note Constant Size::), write a @@ -6111,6 +6224,7 @@ function. 'gawk' allows access to inherited file descriptors. * Close Files And Pipes:: Closing Input and Output Files and Pipes. +* Nonfatal:: Enabling Nonfatal Output. * Output Summary:: Output summary. * Output Exercises:: Exercises. @@ -7018,7 +7132,7 @@ that 'gawk' provides: behavior. -File: gawk.info, Node: Close Files And Pipes, Next: Output Summary, Prev: Special Files, Up: Printing +File: gawk.info, Node: Close Files And Pipes, Next: Nonfatal, Prev: Special Files, Up: Printing 5.9 Closing Input and Output Redirections ========================================= @@ -7186,9 +7300,68 @@ call. See the system manual pages for information on how to decode this value. -File: gawk.info, Node: Output Summary, Next: Output Exercises, Prev: Close Files And Pipes, Up: Printing +File: gawk.info, Node: Nonfatal, Next: Output Summary, Prev: Close Files And Pipes, Up: Printing + +5.10 Enabling Nonfatal Output +============================= + +This minor node describes a 'gawk'-specific feature. + + In standard 'awk', output with 'print' or 'printf' to a nonexistent +file, or some other I/O error (such as filling up the disk) is a fatal +error. + + $ gawk 'BEGIN { print "hi" > "/no/such/file" }' + error-> gawk: cmd. line:1: fatal: can't redirect to `/no/such/file' (No such file or directory) + + 'gawk' makes it possible to detect that an error has occurred, +allowing you to possibly recover from the error, or at least print an +error message of your choosing before exiting. You can do this in one +of two ways: + + * For all output files, by assigning any value to + 'PROCINFO["NONFATAL"]'. + + * On a per-file basis, by assigning any value to 'PROCINFO[FILENAME, + "NONFATAL"]'. Here, FILENAME is the name of the file to which you + wish output to be nonfatal. -5.10 Summary + Once you have enabled nonfatal output, you must check 'ERRNO' after +every relevant 'print' or 'printf' statement to see if something went +wrong. It is also a good idea to initialize 'ERRNO' to zero before +attempting the output. For example: + + $ gawk ' + > BEGIN { + > PROCINFO["NONFATAL"] = 1 + > ERRNO = 0 + > print "hi" > "/no/such/file" + > if (ERRNO) { + > print("Output failed:", ERRNO) > "/dev/stderr" + > exit 1 + > } + > }' + error-> Output failed: No such file or directory + + Here, 'gawk' did not produce a fatal error; instead it let the 'awk' +program code detect the problem and handle it. + + This mechanism works also for standard output and standard error. +For standard output, you may use 'PROCINFO["-", "NONFATAL"]' or +'PROCINFO["/dev/stdout", "NONFATAL"]'. For standard error, use +'PROCINFO["/dev/stderr", "NONFATAL"]'. + + When attempting to open a TCP/IP socket (*note TCP/IP Networking::), +'gawk' tries multiple times. The 'GAWK_SOCK_RETRIES' environment +variable (*note Other Environment Variables::) allows you to override +'gawk''s builtin default number of attempts. However, once nonfatal I/O +is enabled for a given socket, 'gawk' only retries once, relying on +'awk'-level code to notice that there was a problem. + + +File: gawk.info, Node: Output Summary, Next: Output Exercises, Prev: Nonfatal, Up: Printing + +5.11 Summary ============ * The 'print' statement prints comma-separated expressions. Each @@ -7210,10 +7383,15 @@ File: gawk.info, Node: Output Summary, Next: Output Exercises, Prev: Close Fi For coprocesses, it is possible to close only one direction of the communications. + * Normally errors with 'print' or 'printf' are fatal. 'gawk' lets + you make output errors be nonfatal either for all files or on a + per-file basis. You must then check for errors after every + relevant output statement. + File: gawk.info, Node: Output Exercises, Prev: Output Summary, Up: Printing -5.11 Exercises +5.12 Exercises ============== 1. Rewrite the program: @@ -10133,7 +10311,7 @@ each variable.) The default value is '" "', a string consisting of a single space. As a special exception, this value means that any sequence of - spaces, TABs, and/or newlines is a single separator.(1) It also + spaces, TABs, and/or newlines is a single separator. It also causes spaces, TABs, and newlines at the beginning and end of a record to be ignored. @@ -10230,10 +10408,6 @@ each variable.) Internationalization::). The default value of 'TEXTDOMAIN' is '"messages"'. - ---------- Footnotes ---------- - - (1) In POSIX 'awk', newline does not count as whitespace. - File: gawk.info, Node: Auto-set, Next: ARGC and ARGV, Prev: User-modified, Up: Built-in Variables @@ -10297,10 +10471,24 @@ they are not special: An associative array containing the values of the environment. The array indices are the environment variable names; the elements are the values of the particular environment variables. For example, - 'ENVIRON["HOME"]' might be '"/home/arnold"'. Changing this array - does not affect the environment passed on to any programs that - 'awk' may spawn via redirection or the 'system()' function. (In a - future version of 'gawk', it may do so.) + 'ENVIRON["HOME"]' might be '/home/arnold'. + + For POSIX 'awk', changing this array does not affect the + environment passed on to any programs that 'awk' may spawn via + redirection or the 'system()' function. + + However, beginning with version 4.2, if not in POSIX compatibility + mode, 'gawk' does update its own environment when 'ENVIRON' is + changed, thus changing the environment seen by programs that it + creates. You should therefore be especially careful if you modify + 'ENVIRON["PATH"]', which is the search path for finding executable + programs. + + This can also affect the running 'gawk' program, since some of the + built-in functions may pay attention to certain environment + variables. The most notable instance of this is 'mktime()' (*note + Time Functions::), which pays attention the value of the 'TZ' + environment variable on many systems. Some operating systems may not have environment variables. On such systems, the 'ENVIRON' array is empty (except for @@ -10323,6 +10511,11 @@ they are not special: 'getline' returning -1. You are, of course, free to clear it yourself before doing an I/O operation. + If the value of 'ERRNO' corresponds to a system error in the C + 'errno' variable, then 'PROCINFO["errno"]' will be set to the value + of 'errno'. For non-system errors, 'PROCINFO["errno"]' will be + zero. + 'FILENAME' The name of the current input file. When no data files are listed on the command line, 'awk' reads from the standard input and @@ -10371,6 +10564,10 @@ they are not special: 'PROCINFO["egid"]' The value of the 'getegid()' system call. + 'PROCINFO["errno"]' + The value of the C 'errno' variable when 'ERRNO' is set to the + associated error message. + 'PROCINFO["euid"]' The value of the 'geteuid()' system call. @@ -10469,6 +10666,14 @@ they are not special: The following elements allow you to change 'gawk''s behavior: + 'PROCINFO["NONFATAL"]' + If this element exists, then I/O errors for all output + redirections become nonfatal. *Note Nonfatal::. + + 'PROCINFO["OUTPUT_NAME", "NONFATAL"]' + Make output errors for OUTPUT_NAME be nonfatal. *Note + Nonfatal::. + 'PROCINFO["COMMAND", "pty"]' For two-way communication to COMMAND, use a pseudo-tty instead of setting up a two-way pipe. *Note Two-way I/O:: for more @@ -11870,6 +12075,21 @@ brackets ([ ]): truncated toward zero. For example, 'int(3)' is 3, 'int(3.9)' is 3, 'int(-3.9)' is -3, and 'int(-3)' is -3 as well. +'intdiv(NUMERATOR, DENOMINATOR, RESULT)' + Perform integer division, similar to the standard C function of the + same name. First, truncate 'numerator' and 'denominator' towards + zero, creating integer values. Clear the 'result' array, and then + set 'result["quotient"]' to the result of 'numerator / + denominator', truncated towards zero to an integer, and set + 'result["remainder"]' to the result of 'numerator % denominator', + truncated towards zero to an integer. This function is primarily + intended for use with arbitrary length integers; it avoids creating + MPFR arbitrary precision floating-point values (*note Arbitrary + Precision Integers::). + + This function is a 'gawk' extension. It is not available in + compatibility mode (*note Options::). + 'log(X)' Return the natural logarithm of X, if X is positive; otherwise, return 'NaN' ("not a number") on IEEE 754 systems. Additionally, @@ -13307,14 +13527,56 @@ File: gawk.info, Node: Type Functions, Next: I18N Functions, Prev: Bitwise Fu 9.1.7 Getting Type Information ------------------------------ -'gawk' provides a single function that lets you distinguish an array -from a scalar variable. This is necessary for writing code that -traverses every element of an array of arrays (*note Arrays of -Arrays::). +'gawk' provides two functions that lets you distinguish the type of a +variable. This is necessary for writing code that traverses every +element of an array of arrays (*note Arrays of Arrays::), and in other +contexts. 'isarray(X)' Return a true value if X is an array. Otherwise, return false. +'typeof(X)' + Return one of the following strings, depending upon the type of X: + + '"array"' + X is an array. + + '"regexp"' + X is a strongly typed regexp (*note Strong Regexp + Constants::). + + '"number"' + X is a number. + + '"string"' + X is a string. + + '"strnum"' + X is a string that might be a number, such as a field or the + result of calling 'split()'. (I.e., X has the STRNUM + attribute; *note Variable Typing::.) + + '"unassigned"' + X is a scalar variable that has not been assigned a value yet. + For example: + + BEGIN { + a[1] # creates a[1] but it has no assigned value + print typeof(a[1]) # scalar_u + } + + '"untyped"' + X has not yet been used yet at all; it can become a scalar or + an array. For example: + + BEGIN { + print typeof(x) # x never used --> untyped + mk_arr(x) + print typeof(x) # x now an array --> array + } + + function mk_arr(a) { a[1] = 1 } + 'isarray()' is meant for use in two circumstances. The first is when traversing a multidimensional array: you can test if an element is itself an array or not. The second is inside the body of a user-defined @@ -13328,6 +13590,14 @@ parameter is an array or not. that has not been previously used to 'isarray()', 'gawk' ends up turning it into a scalar. + The 'typeof()' function is general; it allows you to determine if a +variable or function parameter is a scalar, an array, or a strongly +typed regexp. + + 'isarray()' is deprecated; you should use 'typeof()' instead. You +should replace any existing uses of 'isarray(var)' in your code with +'typeof(var) == "array"'. + File: gawk.info, Node: I18N Functions, Prev: Type Functions, Up: Built-in @@ -19924,8 +20194,7 @@ output. They are as follows: you typed when you wrote it. This is because 'gawk' creates the profiled version by "pretty-printing" its internal representation of the program. The advantage to this is that 'gawk' can produce a standard -representation. The disadvantage is that all source code comments are -lost. Also, things such as: +representation. Also, things such as: /foo/ @@ -19984,8 +20253,28 @@ the 'Ctrl-\' key. called this way, 'gawk' "pretty-prints" the program into 'awkprof.out', without any execution counts. - NOTE: The '--pretty-print' option still runs your program. This - will change in the next major release. + NOTE: Once upon a time, the '--pretty-print' option would also run + your program. This is is no longer the case. + + There is a significant difference between the output created when +profiling, and that created when pretty-printing. Pretty-printed output +preserves the original comments that were in the program, although their +placement may not correspond exactly to their original locations in the +source code. + + However, as a deliberate design decision, profiling output _omits_ +the original program's comments. This allows you to focus on the +execution count data and helps you avoid the temptation to use the +profiler for pretty-printing. + + Additionally, pretty-printed output does not have the leading +indentation that the profiling output does. This makes it easy to +pretty-print your code once development is completed, and then use the +result as the final version of your program. + + Because the internal representation of your program is formatted to +recreate an 'awk' program, profiling and pretty-printing automatically +disable 'gawk''s default optimizations. File: gawk.info, Node: Advanced Features Summary, Prev: Profiling, Up: Advanced Features @@ -20026,8 +20315,7 @@ File: gawk.info, Node: Advanced Features Summary, Prev: Profiling, Up: Advanc 'USR1' signal while profiling causes 'gawk' to dump the profile and keep going, including a function call stack. - * You can also just "pretty-print" the program. This currently also - runs the program, but that will change in the next major release. + * You can also just "pretty-print" the program. File: gawk.info, Node: Internationalization, Next: Debugger, Prev: Advanced Features, Up: Top @@ -21705,6 +21993,62 @@ some limitations. A few that it's worth being aware of are: * The 'gawk' debugger only accepts source code supplied with the '-f' option. + One other point is worth disucssing. Conventional debuggers run in a +separate process (and thus address space) from the programs that they +debug (the "debuggee", if you will). + + The 'gawk' debugger is different; it is an integrated part of 'gawk' +itself. This makes it possible, in rare cases, for 'gawk' to become an +excellent demonstrator of Heisenberg Uncertainty physics, where the mere +act of observing something can change it. Consider the following:(1) + + $ cat test.awk + -| { print typeof($1), typeof($2) } + $ cat test.data + -| abc 123 + $ gawk -f test.awk test.data + -| strnum strnum + + This is all as expected: field data has the STRNUM attribute (*note +Variable Typing::). Now watch what happens when we run this program +under the debugger: + + $ gawk -D -f test.awk test.data + gawk> w $1 Set watchpoint on $1 + -| Watchpoint 1: $1 + gawk> w $2 Set watchpoint on $2 + -| Watchpoint 2: $2 + gawk> r Start the program + -| Starting program: + -| Stopping in Rule ... + -| Watchpoint 1: $1 Watchpoint fires + -| Old value: "" + -| New value: "abc" + -| main() at `test.awk':1 + -| 1 { print typeof($1), typeof($2) } + gawk> n Keep going ... + -| Watchpoint 2: $2 Watchpoint fires + -| Old value: "" + -| New value: "123" + -| main() at `test.awk':1 + -| 1 { print typeof($1), typeof($2) } + gawk> n Get result from typeof() + -| strnum number Result for $2 isn't right + -| Program exited normally with exit value: 0 + gawk> quit + + In this case, the act of comparing the new value of '$2' with the old +one caused 'gawk' to evaluate it and determine that it is indeed a +number, and this is reflected in the result of 'typeof()'. + + Cases like this where the debugger is not transparent to the +program's execution should be rare. If you encounter one, please report +it (*note Bugs::). + + ---------- Footnotes ---------- + + (1) Thanks to Hermann Peifer for this example. + File: gawk.info, Node: Debugging Summary, Prev: Limitations, Up: Debugger @@ -21733,6 +22077,9 @@ File: gawk.info, Node: Debugging Summary, Prev: Limitations, Up: Debugger it is used by the debugger to provide command-line history and editing. + * Usually, the debugger does not not affect the program being + debugged, but occasionally it can. + File: gawk.info, Node: Arbitrary Precision Arithmetic, Next: Dynamic Extensions, Prev: Debugger, Up: Top @@ -22466,6 +22813,62 @@ the following: gawk -M 'BEGIN { n = 13; print n % 2 }' + When dividing two arbitrary precision integers with either '/' or +'%', the result is typically an arbitrary precision floating point value +(unless the denominator evenly divides into the numerator). In order to +do integer division or remainder with arbitrary precision integers, use +the built-in 'intdiv()' function (*note Numeric Functions::). + + You can simulate the 'intdiv()' function in standard 'awk' using this +user-defined function: + + # intdiv --- do integer division + + function intdiv(numerator, denominator, result) + { + split("", result) + + numerator = int(numerator) + denominator = int(denominator) + result["quotient"] = int(numerator / denominator) + result["remainder"] = int(numerator % denominator) + + return 0.0 + } + + The following example program, contributed by Katie Wasserman, uses +'intdiv()' to compute the digits of pi to as many places as you choose +to set: + + # pi.awk --- compute the digits of pi + + BEGIN { + digits = 100000 + two = 2 * 10 ^ digits + pi = two + for (m = digits * 4; m > 0; --m) { + d = m * 2 + 1 + x = pi * m + intdiv(x, d, result) + pi = result["quotient"] + pi = pi + two + } + print pi + } + + When asked about the algorithm used, Katie replied: + + It's not that well known but it's not that obscure either. It's + Euler's modification to Newton's method for calculating pi. Take a + look at lines (23) - (25) here: + <http://mathworld.wolfram.com/PiFormulas.html>. + + The algorithm I wrote simply expands the multiply by 2 and works + from the innermost expression outwards. I used this to program HP + calculators because it's quite easy to modify for tiny memory + devices with smallish word sizes. See + <http://www.hpmuseum.org/cgi-sys/cgiwrap/hpmuseum/articles.cgi?read=899>. + ---------- Footnotes ---------- (1) Weisstein, Eric W. 'Sylvester's Sequence'. From MathWorld--A @@ -22828,6 +23231,7 @@ API in detail. * Symbol Table Access:: Functions for accessing global variables. * Array Manipulation:: Functions for working with arrays. +* Redirection API:: How to access and manipulate redirections. * Extension API Variables:: Variables provided by the API. * Extension API Boilerplate:: Boilerplate code for using the API. @@ -22883,6 +23287,8 @@ operations: - Flattening an array for easy C-style looping over all its indices and elements + * Accessing and manipulating redirections. + Some points about using the API: * The following types, macros, and/or functions are referenced in @@ -24088,7 +24494,7 @@ using 'release_value()'. 'double' to store. -File: gawk.info, Node: Array Manipulation, Next: Extension API Variables, Prev: Symbol Table Access, Up: Extension API Description +File: gawk.info, Node: Array Manipulation, Next: Redirection API, Prev: Symbol Table Access, Up: Extension API Description 16.4.11 Array Manipulation -------------------------- @@ -24572,9 +24978,78 @@ array: environment variable.) -File: gawk.info, Node: Extension API Variables, Next: Extension API Boilerplate, Prev: Array Manipulation, Up: Extension API Description +File: gawk.info, Node: Redirection API, Next: Extension API Variables, Prev: Array Manipulation, Up: Extension API Description + +16.4.12 Accessing and Manipulating Redirections +----------------------------------------------- + +The following function allows extensions to access and manipulate +redirections. + +'awk_bool_t get_file(const char *name,' +' size_t name_len,' +' const char *filetype,' +' int fd,' +' const awk_input_buf_t **ibufp,' +' const awk_output_buf_t **obufp);' + Look up a file in 'gawk''s internal redirection table. If 'name' + is 'NULL' or 'name_len' is zero, return data for the currently open + input file corresponding to 'FILENAME'. (This does not access the + 'filetype' argument, so that may be undefined). If the file is not + already open, attempt to open it. The 'filetype' argument must be + zero-terminated and should be one of: + + '">"' + A file opened for output. + + '">>"' + A file opened for append. + + '"<"' + A file opened for input. + + '"|>"' + A pipe opened for output. + + '"|<"' + A pipe opened for input. + + '"|&"' + A two-way coprocess. + + On error, return a 'false' value. Otherwise, return 'true', and + return additional information about the redirection in the 'ibufp' + and 'obufp' pointers. For input redirections, the '*ibufp' value + should be non-'NULL', and '*obufp' should be 'NULL'. For output + redirections, the '*obufp' value should be non-'NULL', and '*ibufp' + should be 'NULL'. For two-way coprocesses, both values should be + non-'NULL'. + + In the usual case, the extension is interested in '(*ibufp)->fd' + and/or 'fileno((*obufp)->fp)'. If the file is not already open, + and the 'fd' argument is non-negative, 'gawk' will use that file + descriptor instead of opening the file in the usual way. If 'fd' + is non-negative, but the file exists already, 'gawk' ignores 'fd' + and returns the existing file. It is the caller's responsibility + to notice that neither the 'fd' in the returned 'awk_input_buf_t' + nor the 'fd' in the returned 'awk_output_buf_t' matches the + requested value. + + Note that supplying a file descriptor is currently _not_ supported + for pipes. However, supplying a file descriptor should work for + input, output, append, and two-way (coprocess) sockets. If + 'filetype' is two-way, 'gawk' assumes that it is a socket! Note + that in the two-way case, the input and output file descriptors may + differ. To check for success, you must check whether either + matches. + + It is anticipated that this API function will be used to implement +I/O multiplexing and a socket library. + + +File: gawk.info, Node: Extension API Variables, Next: Extension API Boilerplate, Prev: Redirection API, Up: Extension API Description -16.4.12 API Variables +16.4.13 API Variables --------------------- The API provides two sets of variables. The first provides information @@ -24591,7 +25066,7 @@ information about how 'gawk' was invoked. File: gawk.info, Node: Extension Versioning, Next: Extension API Informational Variables, Up: Extension API Variables -16.4.12.1 API Version Constants and Variables +16.4.13.1 API Version Constants and Variables ............................................. The API provides both a "major" and a "minor" version number. The API @@ -24640,7 +25115,7 @@ Boilerplate::). File: gawk.info, Node: Extension API Informational Variables, Prev: Extension Versioning, Up: Extension API Variables -16.4.12.2 Informational Variables +16.4.13.2 Informational Variables ................................. The API provides access to several variables that describe whether the @@ -24675,7 +25150,7 @@ change during execution. File: gawk.info, Node: Extension API Boilerplate, Prev: Extension API Variables, Up: Extension API Description -16.4.13 Boilerplate Code +16.4.14 Boilerplate Code ------------------------ As mentioned earlier (*note Extension Mechanism Outline::), the function @@ -26003,15 +26478,17 @@ project. * GD graphics library extension + * MPFR library extension (this provides access to a number of MPFR + functions that 'gawk''s native MPFR support does not) + * PDF extension * PostgreSQL extension - * MPFR library extension (this provides access to a number of MPFR - functions that 'gawk''s native MPFR support does not) - * Redis extension + * Select extension + * XML parser extension, using the Expat (http://expat.sourceforge.net) XML parsing library @@ -26424,6 +26901,9 @@ current version of 'gawk'. - Directories on the command line produce a warning and are skipped (*note Command-line directories::) + - Output with 'print' and 'printf' need not be fatal (*note + Nonfatal::) + * New keywords: - The 'BEGINFILE' and 'ENDFILE' special patterns (*note @@ -26473,6 +26953,9 @@ current version of 'gawk'. - The 'bindtextdomain()', 'dcgettext()', and 'dcngettext()' functions for internationalization (*note Programmer i18n::) + - The 'intdiv()' function for doing integer division and + remainder (*note Numeric Functions::) + * Changes and/or additions in the command-line options: - The 'AWKPATH' environment variable for specifying a path @@ -26483,16 +26966,16 @@ current version of 'gawk'. - The '-b', '-c', '-C', '-d', '-D', '-e', '-E', '-g', '-h', '-i', '-l', '-L', '-M', '-n', '-N', '-o', '-O', '-p', '-P', - '-r', '-S', '-t', and '-V' short options. Also, the ability - to use GNU-style long-named options that start with '--', and - the '--assign', '--bignum', '--characters-as-bytes', + '-r', '-s', '-S', '-t', and '-V' short options. Also, the + ability to use GNU-style long-named options that start with + '--', and the '--assign', '--bignum', '--characters-as-bytes', '--copyright', '--debug', '--dump-variables', '--exec', '--field-separator', '--file', '--gen-pot', '--help', '--include', '--lint', '--lint-old', '--load', - '--non-decimal-data', '--optimize', '--posix', - '--pretty-print', '--profile', '--re-interval', '--sandbox', - '--source', '--traditional', '--use-lc-numeric', and - '--version' long options (*note Options::). + '--non-decimal-data', '--optimize', '--no-optimize', + '--posix', '--pretty-print', '--profile', '--re-interval', + '--sandbox', '--source', '--traditional', '--use-lc-numeric', + and '--version' long options (*note Options::). * Support for the following obsolete systems was removed from the code and the documentation for 'gawk' version 4.0: @@ -26526,6 +27009,11 @@ current version of 'gawk'. - Ultrix + * Support for the following systems was removed from the code for + 'gawk' version 4.2: + + - MirBSD + File: gawk.info, Node: Feature History, Next: Common Extensions, Prev: POSIX/GNU, Up: Language History @@ -26917,6 +27405,34 @@ POSIX 'awk', in the order they were added to 'gawk'. * Support for Ultrix was removed. + Version 4.2 introduced the following changes: + + * Changes to 'ENVIRON' are reflected into 'gawk''s environment and + that of programs that it runs. *Note Auto-set::. + + * The '--pretty-print' option no longer runs the 'awk' program too. + *Note Options::. + + * The 'igawk' program and its manual page are no longer installed + when 'gawk' is built. *Note Igawk Program::. + + * The 'intdiv()' function. *Note Numeric Functions::. + + * The maximum number of hexadecimal digits in '\x' escapes is now + two. *Note Escape Sequences::. + + * Nonfatal output with 'print' and 'printf'. *Note Nonfatal::. + + * For many years, POSIX specified that default field splitting only + allowed spaces and tabs to separate fields, and this was how 'gawk' + behaved with '--posix'. As of 2013, the standard restored + historical behavior, and now default field splitting with '--posix' + also allows newlines to separate fields. + + * Support for MirBSD was removed. + + * Support for GNU/Linux on Alpha was removed. + File: gawk.info, Node: Common Extensions, Next: Ranges and Locales, Prev: Feature History, Up: Language History @@ -27435,7 +27951,9 @@ Various '.c', '.y', and '.h' files 'doc/igawk.1' The 'troff' source for a manual page describing the 'igawk' program - presented in *note Igawk Program::. + presented in *note Igawk Program::. (Since 'gawk' can do its own + '@include' processing, neither 'igawk' nor 'igawk.1' are + installed.) 'doc/Makefile.in' The input file used during the configuration process to generate @@ -27477,17 +27995,22 @@ Various '.c', '.y', and '.h' files contains a 'Makefile.in' file, which 'configure' uses to generate a 'Makefile'. 'Makefile.am' is used by GNU Automake to create 'Makefile.in'. The library functions from *note Library - Functions::, and the 'igawk' program from *note Igawk Program:: are - included as ready-to-use files in the 'gawk' distribution. They - are installed as part of the installation process. The rest of the - programs in this Info file are available in appropriate - subdirectories of 'awklib/eg'. + Functions::, are included as ready-to-use files in the 'gawk' + distribution. They are installed as part of the installation + process. The rest of the programs in this Info file are available + in appropriate subdirectories of 'awklib/eg'. 'extension/*' The source code, manual pages, and infrastructure files for the sample extensions included with 'gawk'. *Note Dynamic Extensions::, for more information. +'extras/*' + Additional non-essential files. Currently, this directory contains + some shell startup files to be installed in '/etc/profile.d' to aid + in manipulating the 'AWKPATH' and 'AWKLIBPATH' environment + variables. *Note Shell Startup Files::, for more information. + 'posix/*' Files needed for building 'gawk' on POSIX-compliant systems. @@ -27518,11 +28041,12 @@ for your system yourself. * Menu: * Quick Installation:: Compiling 'gawk' under Unix. +* Shell Startup Files:: Shell convenience functions. * Additional Configuration Options:: Other compile-time options. * Configuration Philosophy:: How it's all supposed to work. -File: gawk.info, Node: Quick Installation, Next: Additional Configuration Options, Up: Unix Installation +File: gawk.info, Node: Quick Installation, Next: Shell Startup Files, Up: Unix Installation B.2.1 Compiling 'gawk' for Unix-Like Systems -------------------------------------------- @@ -27577,9 +28101,42 @@ will be asked for your password, and you will have to have been set up previously as a user who is allowed to run the 'sudo' command. -File: gawk.info, Node: Additional Configuration Options, Next: Configuration Philosophy, Prev: Quick Installation, Up: Unix Installation +File: gawk.info, Node: Shell Startup Files, Next: Additional Configuration Options, Prev: Quick Installation, Up: Unix Installation + +B.2.2 Shell Startup Files +------------------------- + +The distribution contains shell startup files 'gawk.sh' and 'gawk.csh' +containing functions to aid in manipulating the 'AWKPATH' and +'AWKLIBPATH' environment variables. On a Fedora system, these files +should be installed in '/etc/profile.d'; on other platforms, the +appropriate location may be different. + +'gawkpath_default' + Reset the 'AWKPATH' environment variable to its default value. -B.2.2 Additional Configuration Options +'gawkpath_prepend' + Add the argument to the front of the 'AWKPATH' environment + variable. + +'gawkpath_append' + Add the argument to the end of the 'AWKPATH' environment variable. + +'gawklibpath_default' + Reset the 'AWKLIBPATH' environment variable to its default value. + +'gawklibpath_prepend' + Add the argument to the front of the 'AWKLIBPATH' environment + variable. + +'gawklibpath_append' + Add the argument to the end of the 'AWKLIBPATH' environment + variable. + + +File: gawk.info, Node: Additional Configuration Options, Next: Configuration Philosophy, Prev: Shell Startup Files, Up: Unix Installation + +B.2.3 Additional Configuration Options -------------------------------------- There are several additional options you may use on the 'configure' @@ -27623,7 +28180,7 @@ supplied by 'configure'. File: gawk.info, Node: Configuration Philosophy, Prev: Additional Configuration Options, Up: Unix Installation -B.2.3 The Configuration Process +B.2.4 The Configuration Process ------------------------------- This minor node is of interest only if you know something about using @@ -31614,20 +32171,21 @@ Index * --include option: Options. (line 159) * --lint option: Command Line. (line 20) * --lint option <1>: Options. (line 184) -* --lint-old option: Options. (line 294) +* --lint-old option: Options. (line 299) * --load option: Options. (line 172) +* --no-optimize option: Options. (line 285) * --non-decimal-data option: Options. (line 209) * --non-decimal-data option <1>: Nondecimal Data. (line 6) * --non-decimal-data option, strtonum() function and: Nondecimal Data. (line 35) -* --optimize option: Options. (line 236) -* --posix option: Options. (line 254) +* --optimize option: Options. (line 234) +* --posix option: Options. (line 257) * --posix option, --traditional option and: Options. (line 272) * --pretty-print option: Options. (line 223) -* --profile option: Options. (line 242) +* --profile option: Options. (line 245) * --profile option <1>: Profiling. (line 12) * --re-interval option: Options. (line 278) -* --sandbox option: Options. (line 285) +* --sandbox option: Options. (line 290) * --sandbox option, disabling system() function: I/O Functions. (line 129) * --sandbox option, input redirection with getline: Getline. (line 19) @@ -31637,7 +32195,7 @@ Index * --traditional option: Options. (line 82) * --traditional option, --posix option and: Options. (line 272) * --use-lc-numeric option: Options. (line 218) -* --version option: Options. (line 299) +* --version option: Options. (line 304) * --with-whiny-user-strftime configuration option: Additional Configuration Options. (line 37) * -b option: Options. (line 69) @@ -31647,31 +32205,32 @@ Index * -D option: Options. (line 108) * -e option: Options. (line 117) * -E option: Options. (line 125) -* -e option <1>: Options. (line 335) +* -e option <1>: Options. (line 340) * -f option: Long. (line 12) * -F option: Options. (line 21) * -f option <1>: Options. (line 25) -* -F option, -Ft sets FS to TAB: Options. (line 307) +* -F option, -Ft sets FS to TAB: Options. (line 312) * -F option, command-line: Command Line Field Separator. (line 6) -* -f option, multiple uses: Options. (line 312) +* -f option, multiple uses: Options. (line 317) * -g option: Options. (line 147) * -h option: Options. (line 154) * -i option: Options. (line 159) * -l option: Options. (line 172) * -l option <1>: Options. (line 184) -* -L option: Options. (line 294) +* -L option: Options. (line 299) * -M option: Options. (line 203) * -n option: Options. (line 209) * -N option: Options. (line 218) * -o option: Options. (line 223) -* -O option: Options. (line 236) -* -p option: Options. (line 242) -* -P option: Options. (line 254) +* -O option: Options. (line 234) +* -p option: Options. (line 245) +* -P option: Options. (line 257) * -r option: Options. (line 278) -* -S option: Options. (line 285) +* -s option: Options. (line 285) +* -S option: Options. (line 290) * -v option: Options. (line 32) -* -V option: Options. (line 299) +* -V option: Options. (line 304) * -v option <1>: Assignment Options. (line 12) * -W option: Options. (line 47) * . (period), regexp operator: Regexp Operators. (line 44) @@ -31744,16 +32303,16 @@ Index (line 6) * \ (backslash), in bracket expressions: Bracket Expressions. (line 25) * \ (backslash), in escape sequences: Escape Sequences. (line 6) -* \ (backslash), in escape sequences <1>: Escape Sequences. (line 100) +* \ (backslash), in escape sequences <1>: Escape Sequences. (line 103) * \ (backslash), in escape sequences, POSIX and: Escape Sequences. - (line 105) + (line 108) * \ (backslash), in regexp constants: Computed Regexps. (line 30) * \ (backslash), in shell commands: Quoting. (line 48) * \ (backslash), regexp operator: Regexp Operators. (line 18) -* \ (backslash), \" escape sequence: Escape Sequences. (line 82) +* \ (backslash), \" escape sequence: Escape Sequences. (line 85) * \ (backslash), \' operator (gawk): GNU Regexp Operators. (line 59) -* \ (backslash), \/ escape sequence: Escape Sequences. (line 73) +* \ (backslash), \/ escape sequence: Escape Sequences. (line 76) * \ (backslash), \< operator (gawk): GNU Regexp Operators. (line 33) * \ (backslash), \> operator (gawk): GNU Regexp Operators. @@ -31970,7 +32529,7 @@ Index * awf (amazingly workable formatter) program: Glossary. (line 24) * awk debugging, enabling: Options. (line 108) * awk language, POSIX version: Assignment Ops. (line 138) -* awk profiling, enabling: Options. (line 242) +* awk profiling, enabling: Options. (line 245) * awk programs: Getting Started. (line 12) * awk programs <1>: Executable Scripts. (line 6) * awk programs <2>: Two Rules. (line 6) @@ -32041,16 +32600,16 @@ Index (line 6) * backslash (\), in bracket expressions: Bracket Expressions. (line 25) * backslash (\), in escape sequences: Escape Sequences. (line 6) -* backslash (\), in escape sequences <1>: Escape Sequences. (line 100) +* backslash (\), in escape sequences <1>: Escape Sequences. (line 103) * backslash (\), in escape sequences, POSIX and: Escape Sequences. - (line 105) + (line 108) * backslash (\), in regexp constants: Computed Regexps. (line 30) * backslash (\), in shell commands: Quoting. (line 48) * backslash (\), regexp operator: Regexp Operators. (line 18) -* backslash (\), \" escape sequence: Escape Sequences. (line 82) +* backslash (\), \" escape sequence: Escape Sequences. (line 85) * backslash (\), \' operator (gawk): GNU Regexp Operators. (line 59) -* backslash (\), \/ escape sequence: Escape Sequences. (line 73) +* backslash (\), \/ escape sequence: Escape Sequences. (line 76) * backslash (\), \< operator (gawk): GNU Regexp Operators. (line 33) * backslash (\), \> operator (gawk): GNU Regexp Operators. @@ -32169,7 +32728,7 @@ Index * Brennan, Michael <5>: Other Versions. (line 6) * Brennan, Michael <6>: Other Versions. (line 48) * Brian Kernighan's awk: When. (line 21) -* Brian Kernighan's awk <1>: Escape Sequences. (line 109) +* Brian Kernighan's awk <1>: Escape Sequences. (line 112) * Brian Kernighan's awk <2>: GNU Regexp Operators. (line 85) * Brian Kernighan's awk <3>: Regexp Field Splitting. @@ -32377,7 +32936,7 @@ Index * cosine: Numeric Functions. (line 16) * counting: Wc Program. (line 6) * csh utility: Statements/Lines. (line 43) -* csh utility, POSIXLY_CORRECT environment variable: Options. (line 353) +* csh utility, POSIXLY_CORRECT environment variable: Options. (line 358) * csh utility, |& operator, comparison with: Two-way I/O. (line 27) * ctime() user-defined function: Function Example. (line 74) * currency symbols, localization: Explaining gettext. (line 104) @@ -32407,12 +32966,12 @@ Index * dark corner, CONVFMT variable: Strings And Numbers. (line 39) * dark corner, escape sequences: Other Arguments. (line 38) * dark corner, escape sequences, for metacharacters: Escape Sequences. - (line 141) + (line 144) * dark corner, exit statement: Exit Statement. (line 30) * dark corner, field separators: Full Line Fields. (line 22) * dark corner, FILENAME variable: Getline Notes. (line 19) -* dark corner, FILENAME variable <1>: Auto-set. (line 89) -* dark corner, FNR/NR variables: Auto-set. (line 313) +* dark corner, FILENAME variable <1>: Auto-set. (line 108) +* dark corner, FNR/NR variables: Auto-set. (line 344) * dark corner, format-control characters: Control Letters. (line 18) * dark corner, format-control characters <1>: Control Letters. (line 93) @@ -32607,12 +33166,12 @@ Index (line 132) * differences in awk and gawk, command-line directories: Command-line directories. (line 6) -* differences in awk and gawk, ERRNO variable: Auto-set. (line 73) +* differences in awk and gawk, ERRNO variable: Auto-set. (line 87) * differences in awk and gawk, error messages: Special FD. (line 19) * differences in awk and gawk, FIELDWIDTHS variable: User-modified. (line 37) * differences in awk and gawk, FPAT variable: User-modified. (line 43) -* differences in awk and gawk, FUNCTAB variable: Auto-set. (line 115) +* differences in awk and gawk, FUNCTAB variable: Auto-set. (line 134) * differences in awk and gawk, function arguments (gawk): Calling Built-in. (line 16) * differences in awk and gawk, getline command: Getline. (line 19) @@ -32635,7 +33194,7 @@ Index (line 262) * differences in awk and gawk, print/printf statements: Format Modifiers. (line 13) -* differences in awk and gawk, PROCINFO array: Auto-set. (line 129) +* differences in awk and gawk, PROCINFO array: Auto-set. (line 148) * differences in awk and gawk, read timeouts: Read Timeout. (line 6) * differences in awk and gawk, record separators: awk split records. (line 124) @@ -32643,9 +33202,11 @@ Index (line 43) * differences in awk and gawk, regular expressions: Case-sensitivity. (line 26) +* differences in awk and gawk, retrying input: Retrying Input. + (line 6) * differences in awk and gawk, RS/RT variables: gawk split records. (line 58) -* differences in awk and gawk, RT variable: Auto-set. (line 264) +* differences in awk and gawk, RT variable: Auto-set. (line 295) * differences in awk and gawk, single-character fields: Single Character Fields. (line 6) * differences in awk and gawk, split() function: String Functions. @@ -32653,7 +33214,7 @@ Index * differences in awk and gawk, strings: Scalar Constants. (line 20) * differences in awk and gawk, strings, storing: gawk split records. (line 76) -* differences in awk and gawk, SYMTAB variable: Auto-set. (line 268) +* differences in awk and gawk, SYMTAB variable: Auto-set. (line 299) * differences in awk and gawk, TEXTDOMAIN variable: User-modified. (line 152) * differences in awk and gawk, trunc-mod operation: Arithmetic Ops. @@ -32689,12 +33250,12 @@ Index * dump debugger command: Miscellaneous Debugger Commands. (line 9) * dupword.awk program: Dupword Program. (line 31) -* dynamic profiling: Profiling. (line 178) +* dynamic profiling: Profiling. (line 177) * dynamically loaded extensions: Dynamic Extensions. (line 6) * e debugger command (alias for enable): Breakpoint Control. (line 73) * EBCDIC: Ordinal Functions. (line 45) -* effective group ID of gawk user: Auto-set. (line 134) -* effective user ID of gawk user: Auto-set. (line 138) +* effective group ID of gawk user: Auto-set. (line 153) +* effective user ID of gawk user: Auto-set. (line 161) * egrep utility: Bracket Expressions. (line 34) * egrep utility <1>: Egrep Program. (line 6) * egrep.awk program: Egrep Program. (line 53) @@ -32748,14 +33309,14 @@ Index (line 11) * equals sign (=), == operator <1>: Precedence. (line 64) * EREs (Extended Regular Expressions): Bracket Expressions. (line 34) -* ERRNO variable: Auto-set. (line 73) +* ERRNO variable: Auto-set. (line 87) * ERRNO variable <1>: TCP/IP Networking. (line 54) * ERRNO variable, with BEGINFILE pattern: BEGINFILE/ENDFILE. (line 26) * ERRNO variable, with close() function: Close Files And Pipes. (line 140) * ERRNO variable, with getline command: Getline. (line 19) * error handling: Special FD. (line 19) -* error handling, ERRNO variable and: Auto-set. (line 73) +* error handling, ERRNO variable and: Auto-set. (line 87) * error output: Special FD. (line 6) * escape processing, gsub()/gensub()/sub() functions: Gory Details. (line 6) @@ -32796,7 +33357,7 @@ Index (line 102) * exp: Numeric Functions. (line 19) * expand utility: Very Simple. (line 73) -* Expat XML parser library: gawkextlib. (line 35) +* Expat XML parser library: gawkextlib. (line 37) * exponent: Numeric Functions. (line 19) * expressions: Expressions. (line 6) * expressions, as patterns: Expression Patterns. (line 6) @@ -32815,7 +33376,7 @@ Index (line 6) * extension API version: Extension Versioning. (line 6) -* extension API, version number: Auto-set. (line 223) +* extension API, version number: Auto-set. (line 246) * extension example: Extension Example. (line 6) * extension registration: Registration Functions. (line 6) @@ -32871,7 +33432,6 @@ Index * field separators, choice of: Field Separators. (line 50) * field separators, FIELDWIDTHS variable and: User-modified. (line 37) * field separators, FPAT variable and: User-modified. (line 43) -* field separators, POSIX and: Fields. (line 6) * field separators, regular expressions as: Field Separators. (line 50) * field separators, regular expressions as <1>: Regexp Field Splitting. (line 6) @@ -32899,7 +33459,7 @@ Index * file names, in compatibility mode: Special Caveats. (line 9) * file names, standard streams in gawk: Special FD. (line 48) * FILENAME variable: Reading Files. (line 6) -* FILENAME variable <1>: Auto-set. (line 89) +* FILENAME variable <1>: Auto-set. (line 108) * FILENAME variable, getline, setting with: Getline Notes. (line 19) * filenames, assignments as: Ignoring Assigns. (line 6) * files, .gmo: Explaining gettext. (line 42) @@ -32967,8 +33527,8 @@ Index * fnmatch() extension function: Extension Sample Fnmatch. (line 12) * FNR variable: Records. (line 6) -* FNR variable <1>: Auto-set. (line 99) -* FNR variable, changing: Auto-set. (line 313) +* FNR variable <1>: Auto-set. (line 118) +* FNR variable, changing: Auto-set. (line 344) * for statement: For Statement. (line 6) * for statement, looping over arrays: Scanning an Array. (line 20) * fork() extension function: Extension Sample Fork. @@ -33020,7 +33580,7 @@ Index * FSF (Free Software Foundation) <3>: Glossary. (line 405) * fts() extension function: Extension Sample File Functions. (line 60) -* FUNCTAB array: Auto-set. (line 115) +* FUNCTAB array: Auto-set. (line 134) * function calls: Function Calls. (line 6) * function calls, indirect: Indirect Calls. (line 6) * function calls, indirect, @-notation for: Indirect Calls. (line 47) @@ -33071,8 +33631,8 @@ Index * G., Daniel Richard: Acknowledgments. (line 60) * G., Daniel Richard <1>: Bugs. (line 73) * Garfinkle, Scott: Contributors. (line 35) -* gawk program, dynamic profiling: Profiling. (line 178) -* gawk version: Auto-set. (line 198) +* gawk program, dynamic profiling: Profiling. (line 177) +* gawk version: Auto-set. (line 221) * gawk, ARGIND variable in: Other Arguments. (line 15) * gawk, awk and: Preface. (line 21) * gawk, awk and <1>: This Manual. (line 14) @@ -33093,10 +33653,10 @@ Index * gawk, ERRNO variable in <1>: Close Files And Pipes. (line 140) * gawk, ERRNO variable in <2>: BEGINFILE/ENDFILE. (line 26) -* gawk, ERRNO variable in <3>: Auto-set. (line 73) +* gawk, ERRNO variable in <3>: Auto-set. (line 87) * gawk, ERRNO variable in <4>: TCP/IP Networking. (line 54) -* gawk, escape sequences: Escape Sequences. (line 118) -* gawk, extensions, disabling: Options. (line 254) +* gawk, escape sequences: Escape Sequences. (line 121) +* gawk, extensions, disabling: Options. (line 257) * gawk, features, adding: Adding Code. (line 6) * gawk, features, advanced: Advanced Features. (line 6) * gawk, field separators and: User-modified. (line 71) @@ -33108,7 +33668,7 @@ Index * gawk, FPAT variable in: Splitting By Content. (line 25) * gawk, FPAT variable in <1>: User-modified. (line 43) -* gawk, FUNCTAB array in: Auto-set. (line 115) +* gawk, FUNCTAB array in: Auto-set. (line 134) * gawk, function arguments and: Calling Built-in. (line 16) * gawk, hexadecimal numbers and: Nondecimal-numbers. (line 41) * gawk, IGNORECASE variable in: Case-sensitivity. (line 26) @@ -33138,7 +33698,7 @@ Index * gawk, octal numbers and: Nondecimal-numbers. (line 41) * gawk, OS/2 version of: PC Using. (line 17) * gawk, predefined variables and: Built-in Variables. (line 14) -* gawk, PROCINFO array in: Auto-set. (line 129) +* gawk, PROCINFO array in: Auto-set. (line 148) * gawk, PROCINFO array in <1>: Time Functions. (line 47) * gawk, PROCINFO array in <2>: Two-way I/O. (line 99) * gawk, regexp constants and: Using Constant Regexps. @@ -33150,21 +33710,27 @@ Index * gawk, regular expressions, precedence: Regexp Operators. (line 161) * gawk, RT variable in: awk split records. (line 124) * gawk, RT variable in <1>: Multiple Line. (line 130) -* gawk, RT variable in <2>: Auto-set. (line 264) +* gawk, RT variable in <2>: Auto-set. (line 295) * gawk, See Also awk: Preface. (line 34) * gawk, source code, obtaining: Getting. (line 6) * gawk, splitting fields and: Constant Size. (line 86) * gawk, string-translation functions: I18N Functions. (line 6) -* gawk, SYMTAB array in: Auto-set. (line 268) +* gawk, SYMTAB array in: Auto-set. (line 299) * gawk, TEXTDOMAIN variable in: User-modified. (line 152) * gawk, timestamps: Time Functions. (line 6) * gawk, uses for: Preface. (line 34) -* gawk, versions of, information about, printing: Options. (line 299) +* gawk, versions of, information about, printing: Options. (line 304) * gawk, VMS version of: VMS Installation. (line 6) * gawk, word-boundary operator: GNU Regexp Operators. (line 66) * gawkextlib: gawkextlib. (line 6) * gawkextlib project: gawkextlib. (line 6) +* gawklibpath_append shell function: Shell Startup Files. (line 29) +* gawklibpath_default shell function: Shell Startup Files. (line 22) +* gawklibpath_prepend shell function: Shell Startup Files. (line 25) +* gawkpath_append shell function: Shell Startup Files. (line 19) +* gawkpath_default shell function: Shell Startup Files. (line 12) +* gawkpath_prepend shell function: Shell Startup Files. (line 15) * General Public License (GPL): Glossary. (line 396) * General Public License, See GPL: Manual History. (line 11) * generate time values: Time Functions. (line 25) @@ -33218,7 +33784,7 @@ Index * gettext() function (C library): Explaining gettext. (line 63) * gettimeofday() extension function: Extension Sample Time. (line 12) -* git utility: gawkextlib. (line 29) +* git utility: gawkextlib. (line 31) * git utility <1>: Other Versions. (line 29) * git utility <2>: Accessing The Source. (line 10) @@ -33246,7 +33812,7 @@ Index * Grigera, Juan: Contributors. (line 58) * group database, reading: Group Functions. (line 6) * group file: Group Functions. (line 6) -* group ID of gawk user: Auto-set. (line 147) +* group ID of gawk user: Auto-set. (line 170) * groups, information about: Group Functions. (line 6) * gsub: Using Constant Regexps. (line 43) @@ -33268,7 +33834,7 @@ Index * history expansion, in debugger: Readline Support. (line 6) * histsort.awk program: History Sorting. (line 25) * Hughes, Phil: Acknowledgments. (line 43) -* HUP signal, for dynamic profiling: Profiling. (line 210) +* HUP signal, for dynamic profiling: Profiling. (line 209) * hyphen (-), - operator: Precedence. (line 51) * hyphen (-), - operator <1>: Precedence. (line 57) * hyphen (-), -- operator: Increment Ops. (line 48) @@ -33350,7 +33916,9 @@ Index * installing gawk: Installation. (line 6) * instruction tracing, in debugger: Debugger Info. (line 90) * int: Numeric Functions. (line 24) -* INT signal (MS-Windows): Profiling. (line 213) +* INT signal (MS-Windows): Profiling. (line 212) +* intdiv: Numeric Functions. (line 29) +* intdiv <1>: Numeric Functions. (line 29) * integer array indices: Numeric Array Subscripts. (line 31) * integers, arbitrary precision: Arbitrary Precision Integers. @@ -33406,7 +33974,7 @@ Index * Kernighan, Brian <8>: Other Versions. (line 13) * Kernighan, Brian <9>: Basic Data Typing. (line 54) * Kernighan, Brian <10>: Glossary. (line 206) -* kill command, dynamic profiling: Profiling. (line 187) +* kill command, dynamic profiling: Profiling. (line 186) * Knights, jedi: Undocumented. (line 6) * Kwok, Conrad: Contributors. (line 35) * l debugger command (alias for list): Miscellaneous Debugger Commands. @@ -33478,7 +34046,7 @@ Index * lint checking, empty programs: Command Line. (line 16) * lint checking, issuing warnings: Options. (line 184) * lint checking, POSIXLY_CORRECT environment variable: Options. - (line 338) + (line 343) * lint checking, undefined functions: Pass By Value/Reference. (line 85) * LINT variable: User-modified. (line 87) @@ -33499,9 +34067,9 @@ Index * localization: I18N and L10N. (line 6) * localization, See internationalization, localization: I18N and L10N. (line 6) -* log: Numeric Functions. (line 29) +* log: Numeric Functions. (line 44) * log files, timestamps in: Time Functions. (line 6) -* logarithm: Numeric Functions. (line 29) +* logarithm: Numeric Functions. (line 44) * logical false/true: Truth Values. (line 6) * logical operators, See Boolean expressions: Boolean Ops. (line 6) * login information: Passwd Functions. (line 16) @@ -33538,12 +34106,12 @@ Index (line 9) * matching, leftmost longest: Multiple Line. (line 26) * matching, null strings: String Functions. (line 537) -* mawk utility: Escape Sequences. (line 118) +* mawk utility: Escape Sequences. (line 121) * mawk utility <1>: Getline/Pipe. (line 62) * mawk utility <2>: Concatenation. (line 36) * mawk utility <3>: Nextfile Statement. (line 47) * mawk utility <4>: Other Versions. (line 48) -* maximum precision supported by MPFR library: Auto-set. (line 212) +* maximum precision supported by MPFR library: Auto-set. (line 235) * McIlroy, Doug: Glossary. (line 257) * McPhee, Patrick: Contributors. (line 101) * message object files: Explaining gettext. (line 42) @@ -33555,8 +34123,8 @@ Index (line 48) * messages from extensions: Printing Messages. (line 6) * metacharacters in regular expressions: Regexp Operators. (line 6) -* metacharacters, escape sequences for: Escape Sequences. (line 137) -* minimum precision required by MPFR library: Auto-set. (line 215) +* metacharacters, escape sequences for: Escape Sequences. (line 140) +* minimum precision required by MPFR library: Auto-set. (line 238) * mktime: Time Functions. (line 25) * modifiers, in format specifiers: Format Modifiers. (line 6) * monetary information, localization: Explaining gettext. (line 104) @@ -33576,10 +34144,8 @@ Index * networks, programming: TCP/IP Networking. (line 6) * networks, support for: Special Network. (line 6) * newlines: Statements/Lines. (line 6) -* newlines <1>: Options. (line 260) +* newlines <1>: Options. (line 263) * newlines <2>: Boolean Ops. (line 69) -* newlines, as field separators: Default Field Splitting. - (line 6) * newlines, as record separators: awk split records. (line 12) * newlines, in dynamic regexps: Computed Regexps. (line 60) * newlines, in regexp constants: Computed Regexps. (line 70) @@ -33606,7 +34172,7 @@ Index * nexti debugger command: Debugger Execution Control. (line 49) * NF variable: Fields. (line 33) -* NF variable <1>: Auto-set. (line 104) +* NF variable <1>: Auto-set. (line 123) * NF variable, decrementing: Changing Fields. (line 107) * ni debugger command (alias for nexti): Debugger Execution Control. (line 49) @@ -33615,8 +34181,8 @@ Index (line 23) * not Boolean-logic operator: Boolean Ops. (line 6) * NR variable: Records. (line 6) -* NR variable <1>: Auto-set. (line 124) -* NR variable, changing: Auto-set. (line 313) +* NR variable <1>: Auto-set. (line 143) +* NR variable, changing: Auto-set. (line 344) * null strings: awk split records. (line 114) * null strings <1>: Regexp Field Splitting. (line 43) @@ -33736,7 +34302,7 @@ Index * p debugger command (alias for print): Viewing And Changing Data. (line 35) * Papadopoulos, Panos: Contributors. (line 129) -* parent process ID of gawk process: Auto-set. (line 187) +* parent process ID of gawk process: Auto-set. (line 210) * parentheses (), in a profile: Profiling. (line 146) * parentheses (), regexp operator: Regexp Operators. (line 81) * password file: Passwd Functions. (line 16) @@ -33780,14 +34346,14 @@ Index * plus sign (+), += operator <1>: Precedence. (line 94) * plus sign (+), regexp operator: Regexp Operators. (line 105) * pointers to functions: Indirect Calls. (line 6) -* portability: Escape Sequences. (line 100) +* portability: Escape Sequences. (line 103) * portability, #! (executable scripts): Executable Scripts. (line 33) * portability, ** operator and: Arithmetic Ops. (line 81) * portability, **= operator and: Assignment Ops. (line 144) * portability, ARGV variable: Executable Scripts. (line 59) * portability, backslash continuation and: Statements/Lines. (line 30) * portability, backslash in escape sequences: Escape Sequences. - (line 105) + (line 108) * portability, close() function and: Close Files And Pipes. (line 81) * portability, data files as single record: gawk split records. @@ -33805,7 +34371,7 @@ Index * portability, NF variable, decrementing: Changing Fields. (line 115) * portability, operators: Increment Ops. (line 60) * portability, operators, not in POSIX awk: Precedence. (line 97) -* portability, POSIXLY_CORRECT environment variable: Options. (line 358) +* portability, POSIXLY_CORRECT environment variable: Options. (line 363) * portability, substr() function: String Functions. (line 513) * portable object files: Explaining gettext. (line 37) * portable object files <1>: Translator i18n. (line 6) @@ -33826,7 +34392,7 @@ Index * POSIX awk, < operator and: Getline/File. (line 26) * POSIX awk, arithmetic operators and: Arithmetic Ops. (line 30) * POSIX awk, backslashes in string constants: Escape Sequences. - (line 105) + (line 108) * POSIX awk, BEGIN/END patterns: I/O And BEGIN/END. (line 15) * POSIX awk, bracket expressions and: Bracket Expressions. (line 34) * POSIX awk, bracket expressions and, character classes: Bracket Expressions. @@ -33838,9 +34404,7 @@ Index * POSIX awk, continue statement and: Continue Statement. (line 44) * POSIX awk, CONVFMT variable and: User-modified. (line 30) * POSIX awk, date utility and: Time Functions. (line 253) -* POSIX awk, field separators and: Fields. (line 6) -* POSIX awk, field separators and <1>: Full Line Fields. (line 16) -* POSIX awk, FS variable and: User-modified. (line 60) +* POSIX awk, field separators and: Full Line Fields. (line 16) * POSIX awk, function keyword in: Definition Syntax. (line 99) * POSIX awk, functions and, gsub()/sub(): Gory Details. (line 90) * POSIX awk, functions and, length(): String Functions. (line 179) @@ -33855,12 +34419,12 @@ Index * POSIX awk, regular expressions and: Regexp Operators. (line 161) * POSIX awk, timestamps and: Time Functions. (line 6) * POSIX awk, | I/O operator and: Getline/Pipe. (line 56) -* POSIX mode: Options. (line 254) -* POSIX mode <1>: Options. (line 338) +* POSIX mode: Options. (line 257) +* POSIX mode <1>: Options. (line 343) * POSIX, awk and: Preface. (line 21) * POSIX, gawk extensions not included in: POSIX/GNU. (line 6) * POSIX, programs, implementing in awk: Clones. (line 6) -* POSIXLY_CORRECT environment variable: Options. (line 338) +* POSIXLY_CORRECT environment variable: Options. (line 343) * PREC variable: User-modified. (line 124) * precedence: Increment Ops. (line 60) * precedence <1>: Precedence. (line 6) @@ -33907,11 +34471,11 @@ Index * printing, unduplicated lines of text: Uniq Program. (line 6) * printing, user information: Id Program. (line 6) * private variables: Library Names. (line 11) -* process group ID of gawk process: Auto-set. (line 181) -* process ID of gawk process: Auto-set. (line 184) +* process group ID of gawk process: Auto-set. (line 204) +* process ID of gawk process: Auto-set. (line 207) * processes, two-way communications with: Two-way I/O. (line 6) * processing data: Basic High Level. (line 6) -* PROCINFO array: Auto-set. (line 129) +* PROCINFO array: Auto-set. (line 148) * PROCINFO array <1>: Time Functions. (line 47) * PROCINFO array <2>: Passwd Functions. (line 6) * PROCINFO array, and communications via ptys: Two-way I/O. (line 99) @@ -33922,8 +34486,8 @@ Index * PROCINFO, values of sorted_in: Controlling Scanning. (line 26) * profiling awk programs: Profiling. (line 6) -* profiling awk programs, dynamically: Profiling. (line 178) -* program identifiers: Auto-set. (line 150) +* profiling awk programs, dynamically: Profiling. (line 177) +* program identifiers: Auto-set. (line 173) * program, definition of: Getting Started. (line 21) * programming conventions, --non-decimal-data option: Nondecimal Data. (line 35) @@ -33959,7 +34523,7 @@ Index * QuikTrim Awk: Other Versions. (line 139) * quit debugger command: Miscellaneous Debugger Commands. (line 102) -* QUIT signal (MS-Windows): Profiling. (line 213) +* QUIT signal (MS-Windows): Profiling. (line 212) * quoting in gawk command lines: Long. (line 26) * quoting in gawk command lines, tricks for: Quoting. (line 91) * quoting, for small awk programs: Comments. (line 27) @@ -33968,12 +34532,12 @@ Index * Rakitzis, Byron: History Sorting. (line 25) * Ramey, Chet: Acknowledgments. (line 60) * Ramey, Chet <1>: General Data Types. (line 6) -* rand: Numeric Functions. (line 34) +* rand: Numeric Functions. (line 49) * random numbers, Cliff: Cliff Random Function. (line 6) * random numbers, rand()/srand() functions: Numeric Functions. - (line 34) -* random numbers, seed of: Numeric Functions. (line 64) + (line 49) +* random numbers, seed of: Numeric Functions. (line 79) * range expressions (regexps): Bracket Expressions. (line 6) * range patterns: Ranges. (line 6) * range patterns, line continuation and: Ranges. (line 64) @@ -34060,6 +34624,7 @@ Index * relational operators, See comparison operators: Typing and Comparison. (line 9) * replace in string: String Functions. (line 409) +* retrying input: Retrying Input. (line 6) * return debugger command: Debugger Execution Control. (line 54) * return statement, user-defined functions: Return Statement. (line 6) @@ -34083,7 +34648,7 @@ Index * right shift: Bitwise Functions. (line 54) * right shift, bitwise: Bitwise Functions. (line 32) * Ritchie, Dennis: Basic Data Typing. (line 54) -* RLENGTH variable: Auto-set. (line 251) +* RLENGTH variable: Auto-set. (line 282) * RLENGTH variable, match() function and: String Functions. (line 227) * Robbins, Arnold: Command Line Field Separator. (line 71) @@ -34109,11 +34674,11 @@ Index * RS variable <1>: User-modified. (line 133) * RS variable, multiline records and: Multiple Line. (line 17) * rshift: Bitwise Functions. (line 54) -* RSTART variable: Auto-set. (line 257) +* RSTART variable: Auto-set. (line 288) * RSTART variable, match() function and: String Functions. (line 227) * RT variable: awk split records. (line 124) * RT variable <1>: Multiple Line. (line 130) -* RT variable <2>: Auto-set. (line 264) +* RT variable <2>: Auto-set. (line 295) * Rubin, Paul: History. (line 30) * Rubin, Paul <1>: Contributors. (line 16) * rule, definition of: Getting Started. (line 21) @@ -34124,14 +34689,14 @@ Index (line 68) * sample debugging session: Sample Debugging Session. (line 6) -* sandbox mode: Options. (line 285) +* sandbox mode: Options. (line 290) * save debugger options: Debugger Info. (line 85) * scalar or array: Type Functions. (line 11) * scalar values: Basic Data Typing. (line 13) * scanning arrays: Scanning an Array. (line 6) * scanning multidimensional arrays: Multiscanning. (line 11) * Schorr, Andrew: Acknowledgments. (line 60) -* Schorr, Andrew <1>: Auto-set. (line 296) +* Schorr, Andrew <1>: Auto-set. (line 327) * Schorr, Andrew <2>: Contributors. (line 134) * Schreiber, Bert: Acknowledgments. (line 38) * Schreiber, Rita: Acknowledgments. (line 38) @@ -34150,7 +34715,7 @@ Index * sed utility: Full Line Fields. (line 22) * sed utility <1>: Simple Sed. (line 6) * sed utility <2>: Glossary. (line 16) -* seeding random number generator: Numeric Functions. (line 64) +* seeding random number generator: Numeric Functions. (line 79) * semicolon (;), AWKPATH variable and: PC Using. (line 10) * semicolon (;), separating statements in actions: Statements/Lines. (line 90) @@ -34162,7 +34727,6 @@ Index * separators, field <1>: User-modified. (line 113) * separators, field, FIELDWIDTHS variable and: User-modified. (line 37) * separators, field, FPAT variable and: User-modified. (line 43) -* separators, field, POSIX and: Fields. (line 6) * separators, for records: awk split records. (line 6) * separators, for records <1>: awk split records. (line 85) * separators, for records <2>: User-modified. (line 133) @@ -34214,14 +34778,14 @@ Index * sidebar, A Constant's Base Does Not Affect Its Value: Nondecimal-numbers. (line 63) * sidebar, Backslash Before Regular Characters: Escape Sequences. - (line 103) + (line 106) * sidebar, Changing FS Does Not Affect the Fields: Full Line Fields. (line 14) -* sidebar, Changing NR and FNR: Auto-set. (line 311) +* sidebar, Changing NR and FNR: Auto-set. (line 342) * sidebar, Controlling Output Buffering with system(): I/O Functions. (line 137) * sidebar, Escape Sequences for Metacharacters: Escape Sequences. - (line 135) + (line 138) * sidebar, FS and IGNORECASE: Field Splitting Summary. (line 37) * sidebar, Interactive Versus Noninteractive Buffering: I/O Functions. @@ -34243,19 +34807,19 @@ Index (line 130) * sidebar, Using \n in Bracket Expressions of Dynamic Regexps: Computed Regexps. (line 58) -* SIGHUP signal, for dynamic profiling: Profiling. (line 210) -* SIGINT signal (MS-Windows): Profiling. (line 213) -* signals, HUP/SIGHUP, for profiling: Profiling. (line 210) -* signals, INT/SIGINT (MS-Windows): Profiling. (line 213) -* signals, QUIT/SIGQUIT (MS-Windows): Profiling. (line 213) -* signals, USR1/SIGUSR1, for profiling: Profiling. (line 187) +* SIGHUP signal, for dynamic profiling: Profiling. (line 209) +* SIGINT signal (MS-Windows): Profiling. (line 212) +* signals, HUP/SIGHUP, for profiling: Profiling. (line 209) +* signals, INT/SIGINT (MS-Windows): Profiling. (line 212) +* signals, QUIT/SIGQUIT (MS-Windows): Profiling. (line 212) +* signals, USR1/SIGUSR1, for profiling: Profiling. (line 186) * signature program: Signature Program. (line 6) -* SIGQUIT signal (MS-Windows): Profiling. (line 213) -* SIGUSR1 signal, for dynamic profiling: Profiling. (line 187) +* SIGQUIT signal (MS-Windows): Profiling. (line 212) +* SIGUSR1 signal, for dynamic profiling: Profiling. (line 186) * silent debugger command: Debugger Execution Control. (line 10) -* sin: Numeric Functions. (line 75) -* sine: Numeric Functions. (line 75) +* sin: Numeric Functions. (line 90) +* sine: Numeric Functions. (line 90) * single quote ('): One-shot. (line 15) * single quote (') in gawk command lines: Long. (line 35) * single quote ('), in shell commands: Quoting. (line 48) @@ -34305,10 +34869,10 @@ Index * sprintf() function, OFMT variable and: User-modified. (line 113) * sprintf() function, print/printf statements and: Round Function. (line 6) -* sqrt: Numeric Functions. (line 78) +* sqrt: Numeric Functions. (line 93) * square brackets ([]), regexp operator: Regexp Operators. (line 56) -* square root: Numeric Functions. (line 78) -* srand: Numeric Functions. (line 82) +* square root: Numeric Functions. (line 93) +* srand: Numeric Functions. (line 97) * stack frame: Debugging Terms. (line 10) * Stallman, Richard: Manual History. (line 6) * Stallman, Richard <1>: Acknowledgments. (line 18) @@ -34380,9 +34944,9 @@ Index * substr: String Functions. (line 482) * substring: String Functions. (line 482) * Sumner, Andrew: Other Versions. (line 68) -* supplementary groups of gawk process: Auto-set. (line 228) +* supplementary groups of gawk process: Auto-set. (line 251) * switch statement: Switch Statement. (line 6) -* SYMTAB array: Auto-set. (line 268) +* SYMTAB array: Auto-set. (line 299) * syntactic ambiguity: /= operator vs. /=.../ regexp constant: Assignment Ops. (line 149) * system: I/O Functions. (line 107) @@ -34452,7 +35016,7 @@ Index (line 37) * troubleshooting, awk uses FS not IFS: Field Separators. (line 29) * troubleshooting, backslash before nonspecial character: Escape Sequences. - (line 105) + (line 108) * troubleshooting, division: Arithmetic Ops. (line 44) * troubleshooting, fatal errors, field widths, specifying: Constant Size. (line 22) @@ -34486,6 +35050,8 @@ Index * trunc-mod operation: Arithmetic Ops. (line 66) * truth values: Truth Values. (line 6) * type conversion: Strings And Numbers. (line 21) +* type, of variable: Type Functions. (line 14) +* typeof: Type Functions. (line 14) * u debugger command (alias for until): Debugger Execution Control. (line 82) * unassigned array elements: Reference to Elements. @@ -34508,7 +35074,7 @@ Index * uniq.awk program: Uniq Program. (line 65) * Unix: Glossary. (line 748) * Unix awk, backslashes in escape sequences: Escape Sequences. - (line 118) + (line 121) * Unix awk, close() function and: Close Files And Pipes. (line 132) * Unix awk, password files, field separators and: Command Line Field Separator. @@ -34528,10 +35094,11 @@ Index * user-modifiable variables: User-modified. (line 6) * users, information about, printing: Id Program. (line 6) * users, information about, retrieving: Passwd Functions. (line 16) -* USR1 signal, for dynamic profiling: Profiling. (line 187) +* USR1 signal, for dynamic profiling: Profiling. (line 186) * values, numeric: Basic Data Typing. (line 13) * values, string: Basic Data Typing. (line 13) * variable assignments and input files: Other Arguments. (line 26) +* variable type: Type Functions. (line 14) * variable typing: Typing and Comparison. (line 9) * variables: Other Features. (line 6) @@ -34562,10 +35129,10 @@ Index * variables, uninitialized, as array subscripts: Uninitialized Subscripts. (line 6) * variables, user-defined: Variables. (line 6) -* version of gawk: Auto-set. (line 198) -* version of gawk extension API: Auto-set. (line 223) -* version of GNU MP library: Auto-set. (line 206) -* version of GNU MPFR library: Auto-set. (line 208) +* version of gawk: Auto-set. (line 221) +* version of gawk extension API: Auto-set. (line 246) +* version of GNU MP library: Auto-set. (line 229) +* version of GNU MPFR library: Auto-set. (line 231) * vertical bar (|): Regexp Operators. (line 70) * vertical bar (|), | operator (I/O): Getline/Pipe. (line 10) * vertical bar (|), | operator (I/O) <1>: Precedence. (line 64) @@ -34602,7 +35169,7 @@ Index * whitespace, as field separators: Default Field Splitting. (line 6) * whitespace, functions, calling: Calling Built-in. (line 10) -* whitespace, newlines as: Options. (line 260) +* whitespace, newlines as: Options. (line 263) * Williams, Kent: Contributors. (line 35) * Woehlke, Matthew: Contributors. (line 80) * Woods, John: Contributors. (line 28) @@ -34632,559 +35199,563 @@ Index Tag Table: Node: Top1200 -Node: Foreword342138 -Node: Foreword446580 -Node: Preface48112 -Ref: Preface-Footnote-150984 -Ref: Preface-Footnote-251091 -Ref: Preface-Footnote-351325 -Node: History51467 -Node: Names53819 -Ref: Names-Footnote-154913 -Node: This Manual55060 -Ref: This Manual-Footnote-161542 -Node: Conventions61642 -Node: Manual History63996 -Ref: Manual History-Footnote-166991 -Ref: Manual History-Footnote-267032 -Node: How To Contribute67106 -Node: Acknowledgments68235 -Node: Getting Started73121 -Node: Running gawk75560 -Node: One-shot76750 -Node: Read Terminal78013 -Node: Long80045 -Node: Executable Scripts81558 -Ref: Executable Scripts-Footnote-184353 -Node: Comments84456 -Node: Quoting86940 -Node: DOS Quoting92457 -Node: Sample Data Files93132 -Node: Very Simple95727 -Node: Two Rules100629 -Node: More Complex102514 -Node: Statements/Lines105377 -Ref: Statements/Lines-Footnote-1109836 -Node: Other Features110101 -Node: When111037 -Ref: When-Footnote-1112791 -Node: Intro Summary112856 -Node: Invoking Gawk113740 -Node: Command Line115254 -Node: Options116052 -Ref: Options-Footnote-1131959 -Ref: Options-Footnote-2132189 -Node: Other Arguments132214 -Node: Naming Standard Input135161 -Node: Environment Variables136254 -Node: AWKPATH Variable136812 -Ref: AWKPATH Variable-Footnote-1140113 -Ref: AWKPATH Variable-Footnote-2140158 -Node: AWKLIBPATH Variable140419 -Node: Other Environment Variables141563 -Node: Exit Status145328 -Node: Include Files146005 -Node: Loading Shared Libraries149600 -Node: Obsolete151028 -Node: Undocumented151720 -Node: Invoking Summary152017 -Node: Regexp153677 -Node: Regexp Usage155131 -Node: Escape Sequences157168 -Node: Regexp Operators163182 -Ref: Regexp Operators-Footnote-1170598 -Ref: Regexp Operators-Footnote-2170745 -Node: Bracket Expressions170843 -Ref: table-char-classes173319 -Node: Leftmost Longest176456 -Node: Computed Regexps177759 -Node: GNU Regexp Operators181186 -Node: Case-sensitivity184865 -Ref: Case-sensitivity-Footnote-1187752 -Ref: Case-sensitivity-Footnote-2187987 -Node: Regexp Summary188095 -Node: Reading Files189561 -Node: Records191655 -Node: awk split records192388 -Node: gawk split records197319 -Ref: gawk split records-Footnote-1201859 -Node: Fields201896 -Ref: Fields-Footnote-1204676 -Node: Nonconstant Fields204762 -Ref: Nonconstant Fields-Footnote-1206998 -Node: Changing Fields207202 -Node: Field Separators213130 -Node: Default Field Splitting215828 -Node: Regexp Field Splitting216946 -Node: Single Character Fields220299 -Node: Command Line Field Separator221359 -Node: Full Line Fields224577 -Ref: Full Line Fields-Footnote-1226099 -Ref: Full Line Fields-Footnote-2226145 -Node: Field Splitting Summary226246 -Node: Constant Size228320 -Node: Splitting By Content232898 -Ref: Splitting By Content-Footnote-1236869 -Node: Multiple Line237032 -Ref: Multiple Line-Footnote-1242914 -Node: Getline243093 -Node: Plain Getline245297 -Node: Getline/Variable247936 -Node: Getline/File249085 -Node: Getline/Variable/File250471 -Ref: Getline/Variable/File-Footnote-1252074 -Node: Getline/Pipe252162 -Node: Getline/Variable/Pipe254867 -Node: Getline/Coprocess256000 -Node: Getline/Variable/Coprocess257265 -Node: Getline Notes258005 -Node: Getline Summary260800 -Ref: table-getline-variants261222 -Node: Read Timeout261970 -Ref: Read Timeout-Footnote-1265811 -Node: Command-line directories265869 -Node: Input Summary266773 -Node: Input Exercises269945 -Node: Printing270673 -Node: Print272449 -Node: Print Examples273906 -Node: Output Separators276686 -Node: OFMT278703 -Node: Printf280059 -Node: Basic Printf280844 -Node: Control Letters282418 -Node: Format Modifiers286406 -Node: Printf Examples292421 -Node: Redirection294907 -Node: Special FD301748 -Ref: Special FD-Footnote-1304916 -Node: Special Files304990 -Node: Other Inherited Files305607 -Node: Special Network306608 -Node: Special Caveats307468 -Node: Close Files And Pipes308417 -Ref: Close Files And Pipes-Footnote-1315610 -Ref: Close Files And Pipes-Footnote-2315758 -Node: Output Summary315909 -Node: Output Exercises316907 -Node: Expressions317586 -Node: Values318774 -Node: Constants319452 -Node: Scalar Constants320143 -Ref: Scalar Constants-Footnote-1321007 -Node: Nondecimal-numbers321257 -Node: Regexp Constants324270 -Node: Using Constant Regexps324796 -Node: Variables327959 -Node: Using Variables328616 -Node: Assignment Options330526 -Node: Conversion332399 -Node: Strings And Numbers332923 -Ref: Strings And Numbers-Footnote-1335986 -Node: Locale influences conversions336095 -Ref: table-locale-affects338853 -Node: All Operators339471 -Node: Arithmetic Ops340100 -Node: Concatenation342606 -Ref: Concatenation-Footnote-1345453 -Node: Assignment Ops345560 -Ref: table-assign-ops350551 -Node: Increment Ops351864 -Node: Truth Values and Conditions355324 -Node: Truth Values356398 -Node: Typing and Comparison357446 -Node: Variable Typing358266 -Node: Comparison Operators361890 -Ref: table-relational-ops362309 -Node: POSIX String Comparison365804 -Ref: POSIX String Comparison-Footnote-1366878 -Node: Boolean Ops367017 -Ref: Boolean Ops-Footnote-1371499 -Node: Conditional Exp371591 -Node: Function Calls373327 -Node: Precedence377204 -Node: Locales380863 -Node: Expressions Summary382495 -Node: Patterns and Actions385068 -Node: Pattern Overview386188 -Node: Regexp Patterns387865 -Node: Expression Patterns388407 -Node: Ranges392188 -Node: BEGIN/END395296 -Node: Using BEGIN/END396057 -Ref: Using BEGIN/END-Footnote-1398793 -Node: I/O And BEGIN/END398899 -Node: BEGINFILE/ENDFILE401213 -Node: Empty404120 -Node: Using Shell Variables404437 -Node: Action Overview406711 -Node: Statements409036 -Node: If Statement410884 -Node: While Statement412379 -Node: Do Statement414407 -Node: For Statement415555 -Node: Switch Statement418713 -Node: Break Statement421099 -Node: Continue Statement423191 -Node: Next Statement425018 -Node: Nextfile Statement427401 -Node: Exit Statement430053 -Node: Built-in Variables432456 -Node: User-modified433589 -Ref: User-modified-Footnote-1441214 -Node: Auto-set441276 -Ref: Auto-set-Footnote-1454366 -Ref: Auto-set-Footnote-2454572 -Node: ARGC and ARGV454628 -Node: Pattern Action Summary458841 -Node: Arrays461271 -Node: Array Basics462600 -Node: Array Intro463444 -Ref: figure-array-elements465419 -Ref: Array Intro-Footnote-1468123 -Node: Reference to Elements468251 -Node: Assigning Elements470715 -Node: Array Example471206 -Node: Scanning an Array472965 -Node: Controlling Scanning475987 -Ref: Controlling Scanning-Footnote-1481386 -Node: Numeric Array Subscripts481702 -Node: Uninitialized Subscripts483886 -Node: Delete485505 -Ref: Delete-Footnote-1488257 -Node: Multidimensional488314 -Node: Multiscanning491409 -Node: Arrays of Arrays493000 -Node: Arrays Summary497767 -Node: Functions499860 -Node: Built-in500898 -Node: Calling Built-in501976 -Node: Numeric Functions503972 -Ref: Numeric Functions-Footnote-1508000 -Ref: Numeric Functions-Footnote-2508357 -Ref: Numeric Functions-Footnote-3508405 -Node: String Functions508677 -Ref: String Functions-Footnote-1532181 -Ref: String Functions-Footnote-2532309 -Ref: String Functions-Footnote-3532557 -Node: Gory Details532644 -Ref: table-sub-escapes534435 -Ref: table-sub-proposed535954 -Ref: table-posix-sub537317 -Ref: table-gensub-escapes538858 -Ref: Gory Details-Footnote-1539681 -Node: I/O Functions539832 -Ref: I/O Functions-Footnote-1547052 -Node: Time Functions547200 -Ref: Time Functions-Footnote-1557705 -Ref: Time Functions-Footnote-2557773 -Ref: Time Functions-Footnote-3557931 -Ref: Time Functions-Footnote-4558042 -Ref: Time Functions-Footnote-5558154 -Ref: Time Functions-Footnote-6558381 -Node: Bitwise Functions558647 -Ref: table-bitwise-ops559241 -Ref: Bitwise Functions-Footnote-1563579 -Node: Type Functions563752 -Node: I18N Functions564908 -Node: User-defined566559 -Node: Definition Syntax567364 -Ref: Definition Syntax-Footnote-1573051 -Node: Function Example573122 -Ref: Function Example-Footnote-1576044 -Node: Function Caveats576066 -Node: Calling A Function576584 -Node: Variable Scope577542 -Node: Pass By Value/Reference580536 -Node: Return Statement584035 -Node: Dynamic Typing587014 -Node: Indirect Calls587944 -Ref: Indirect Calls-Footnote-1598195 -Node: Functions Summary598323 -Node: Library Functions601028 -Ref: Library Functions-Footnote-1604635 -Ref: Library Functions-Footnote-2604778 -Node: Library Names604949 -Ref: Library Names-Footnote-1608409 -Ref: Library Names-Footnote-2608632 -Node: General Functions608718 -Node: Strtonum Function609821 -Node: Assert Function612843 -Node: Round Function616169 -Node: Cliff Random Function617710 -Node: Ordinal Functions618726 -Ref: Ordinal Functions-Footnote-1621789 -Ref: Ordinal Functions-Footnote-2622041 -Node: Join Function622251 -Ref: Join Function-Footnote-1624021 -Node: Getlocaltime Function624221 -Node: Readfile Function627963 -Node: Shell Quoting629935 -Node: Data File Management631336 -Node: Filetrans Function631968 -Node: Rewind Function636064 -Node: File Checking637969 -Ref: File Checking-Footnote-1639303 -Node: Empty Files639504 -Node: Ignoring Assigns641483 -Node: Getopt Function643033 -Ref: Getopt Function-Footnote-1654502 -Node: Passwd Functions654702 -Ref: Passwd Functions-Footnote-1663541 -Node: Group Functions663629 -Ref: Group Functions-Footnote-1671526 -Node: Walking Arrays671733 -Node: Library Functions Summary674741 -Node: Library Exercises676147 -Node: Sample Programs676612 -Node: Running Examples677382 -Node: Clones678110 -Node: Cut Program679334 -Node: Egrep Program689263 -Ref: Egrep Program-Footnote-1696775 -Node: Id Program696885 -Node: Split Program700565 -Ref: Split Program-Footnote-1704024 -Node: Tee Program704153 -Node: Uniq Program706943 -Node: Wc Program714369 -Ref: Wc Program-Footnote-1718624 -Node: Miscellaneous Programs718718 -Node: Dupword Program719931 -Node: Alarm Program721961 -Node: Translate Program726816 -Ref: Translate Program-Footnote-1731381 -Node: Labels Program731651 -Ref: Labels Program-Footnote-1735002 -Node: Word Sorting735086 -Node: History Sorting739158 -Node: Extract Program740993 -Node: Simple Sed748522 -Node: Igawk Program751596 -Ref: Igawk Program-Footnote-1765927 -Ref: Igawk Program-Footnote-2766129 -Ref: Igawk Program-Footnote-3766251 -Node: Anagram Program766366 -Node: Signature Program769428 -Node: Programs Summary770675 -Node: Programs Exercises771889 -Ref: Programs Exercises-Footnote-1776018 -Node: Advanced Features776109 -Node: Nondecimal Data778099 -Node: Array Sorting779690 -Node: Controlling Array Traversal780390 -Ref: Controlling Array Traversal-Footnote-1788757 -Node: Array Sorting Functions788875 -Ref: Array Sorting Functions-Footnote-1793966 -Node: Two-way I/O794162 -Ref: Two-way I/O-Footnote-1799982 -Ref: Two-way I/O-Footnote-2800169 -Node: TCP/IP Networking800251 -Node: Profiling803369 -Node: Advanced Features Summary810908 -Node: Internationalization812844 -Node: I18N and L10N814324 -Node: Explaining gettext815011 -Ref: Explaining gettext-Footnote-1820034 -Ref: Explaining gettext-Footnote-2820219 -Node: Programmer i18n820384 -Ref: Programmer i18n-Footnote-1825239 -Node: Translator i18n825288 -Node: String Extraction826082 -Ref: String Extraction-Footnote-1827214 -Node: Printf Ordering827300 -Ref: Printf Ordering-Footnote-1830086 -Node: I18N Portability830150 -Ref: I18N Portability-Footnote-1832606 -Node: I18N Example832669 -Ref: I18N Example-Footnote-1835475 -Node: Gawk I18N835548 -Node: I18N Summary836193 -Node: Debugger837534 -Node: Debugging838556 -Node: Debugging Concepts838997 -Node: Debugging Terms840806 -Node: Awk Debugging843381 -Node: Sample Debugging Session844287 -Node: Debugger Invocation844821 -Node: Finding The Bug846207 -Node: List of Debugger Commands852685 -Node: Breakpoint Control854018 -Node: Debugger Execution Control857712 -Node: Viewing And Changing Data861074 -Node: Execution Stack864448 -Node: Debugger Info866085 -Node: Miscellaneous Debugger Commands870156 -Node: Readline Support875244 -Node: Limitations876140 -Node: Debugging Summary878249 -Node: Arbitrary Precision Arithmetic879422 -Node: Computer Arithmetic880838 -Ref: table-numeric-ranges884429 -Ref: Computer Arithmetic-Footnote-1885151 -Node: Math Definitions885208 -Ref: table-ieee-formats888522 -Ref: Math Definitions-Footnote-1889125 -Node: MPFR features889230 -Node: FP Math Caution890947 -Ref: FP Math Caution-Footnote-1892019 -Node: Inexactness of computations892388 -Node: Inexact representation893348 -Node: Comparing FP Values894708 -Node: Errors accumulate895790 -Node: Getting Accuracy897223 -Node: Try To Round899933 -Node: Setting precision900832 -Ref: table-predefined-precision-strings901529 -Node: Setting the rounding mode903359 -Ref: table-gawk-rounding-modes903733 -Ref: Setting the rounding mode-Footnote-1907141 -Node: Arbitrary Precision Integers907320 -Ref: Arbitrary Precision Integers-Footnote-1910304 -Node: POSIX Floating Point Problems910453 -Ref: POSIX Floating Point Problems-Footnote-1914335 -Node: Floating point summary914373 -Node: Dynamic Extensions916563 -Node: Extension Intro918116 -Node: Plugin License919382 -Node: Extension Mechanism Outline920179 -Ref: figure-load-extension920618 -Ref: figure-register-new-function922183 -Ref: figure-call-new-function923275 -Node: Extension API Description925337 -Node: Extension API Functions Introduction926785 -Node: General Data Types931597 -Ref: General Data Types-Footnote-1937552 -Node: Memory Allocation Functions937851 -Ref: Memory Allocation Functions-Footnote-1940696 -Node: Constructor Functions940795 -Node: Registration Functions942540 -Node: Extension Functions943225 -Node: Exit Callback Functions945524 -Node: Extension Version String946774 -Node: Input Parsers947437 -Node: Output Wrappers957322 -Node: Two-way processors961834 -Node: Printing Messages964098 -Ref: Printing Messages-Footnote-1965172 -Node: Updating ERRNO965325 -Node: Requesting Values966064 -Ref: table-value-types-returned966801 -Node: Accessing Parameters967684 -Node: Symbol Table Access968919 -Node: Symbol table by name969431 -Node: Symbol table by cookie971452 -Ref: Symbol table by cookie-Footnote-1975601 -Node: Cached values975665 -Ref: Cached values-Footnote-1979166 -Node: Array Manipulation979257 -Ref: Array Manipulation-Footnote-1980356 -Node: Array Data Types980393 -Ref: Array Data Types-Footnote-1983051 -Node: Array Functions983143 -Node: Flattening Arrays987001 -Node: Creating Arrays993909 -Node: Extension API Variables998680 -Node: Extension Versioning999316 -Node: Extension API Informational Variables1001207 -Node: Extension API Boilerplate1002271 -Node: Finding Extensions1006085 -Node: Extension Example1006644 -Node: Internal File Description1007442 -Node: Internal File Ops1011522 -Ref: Internal File Ops-Footnote-11023284 -Node: Using Internal File Ops1023424 -Ref: Using Internal File Ops-Footnote-11025807 -Node: Extension Samples1026081 -Node: Extension Sample File Functions1027610 -Node: Extension Sample Fnmatch1035259 -Node: Extension Sample Fork1036746 -Node: Extension Sample Inplace1037964 -Node: Extension Sample Ord1041174 -Node: Extension Sample Readdir1042010 -Ref: table-readdir-file-types1042899 -Node: Extension Sample Revout1043704 -Node: Extension Sample Rev2way1044293 -Node: Extension Sample Read write array1045033 -Node: Extension Sample Readfile1046975 -Node: Extension Sample Time1048070 -Node: Extension Sample API Tests1049418 -Node: gawkextlib1049910 -Node: Extension summary1052334 -Node: Extension Exercises1056026 -Node: Language History1057523 -Node: V7/SVR3.11059179 -Node: SVR41061331 -Node: POSIX1062765 -Node: BTL1064144 -Node: POSIX/GNU1064873 -Node: Feature History1070394 -Node: Common Extensions1083723 -Node: Ranges and Locales1085006 -Ref: Ranges and Locales-Footnote-11089622 -Ref: Ranges and Locales-Footnote-21089649 -Ref: Ranges and Locales-Footnote-31089884 -Node: Contributors1090105 -Node: History summary1095674 -Node: Installation1097054 -Node: Gawk Distribution1097998 -Node: Getting1098482 -Node: Extracting1099443 -Node: Distribution contents1101081 -Node: Unix Installation1106832 -Node: Quick Installation1107448 -Node: Additional Configuration Options1109875 -Node: Configuration Philosophy1111679 -Node: Non-Unix Installation1114048 -Node: PC Installation1114506 -Node: PC Binary Installation1115826 -Node: PC Compiling1117678 -Ref: PC Compiling-Footnote-11120702 -Node: PC Testing1120811 -Node: PC Using1121991 -Node: Cygwin1126105 -Node: MSYS1126875 -Node: VMS Installation1127376 -Node: VMS Compilation1128167 -Ref: VMS Compilation-Footnote-11129396 -Node: VMS Dynamic Extensions1129454 -Node: VMS Installation Details1131139 -Node: VMS Running1133392 -Node: VMS GNV1137671 -Node: VMS Old Gawk1138406 -Node: Bugs1138877 -Node: Other Versions1143074 -Node: Installation summary1149658 -Node: Notes1150716 -Node: Compatibility Mode1151581 -Node: Additions1152363 -Node: Accessing The Source1153288 -Node: Adding Code1154723 -Node: New Ports1160942 -Node: Derived Files1165430 -Ref: Derived Files-Footnote-11170915 -Ref: Derived Files-Footnote-21170950 -Ref: Derived Files-Footnote-31171548 -Node: Future Extensions1171662 -Node: Implementation Limitations1172320 -Node: Extension Design1173503 -Node: Old Extension Problems1174657 -Ref: Old Extension Problems-Footnote-11176175 -Node: Extension New Mechanism Goals1176232 -Ref: Extension New Mechanism Goals-Footnote-11179596 -Node: Extension Other Design Decisions1179785 -Node: Extension Future Growth1181898 -Node: Old Extension Mechanism1182734 -Node: Notes summary1184497 -Node: Basic Concepts1185679 -Node: Basic High Level1186360 -Ref: figure-general-flow1186642 -Ref: figure-process-flow1187327 -Ref: Basic High Level-Footnote-11190628 -Node: Basic Data Typing1190813 -Node: Glossary1194141 -Node: Copying1226087 -Node: GNU Free Documentation License1263626 -Node: Index1288744 +Node: Foreword342506 +Node: Foreword446948 +Node: Preface48480 +Ref: Preface-Footnote-151352 +Ref: Preface-Footnote-251459 +Ref: Preface-Footnote-351693 +Node: History51835 +Node: Names54187 +Ref: Names-Footnote-155281 +Node: This Manual55428 +Ref: This Manual-Footnote-161910 +Node: Conventions62010 +Node: Manual History64364 +Ref: Manual History-Footnote-167359 +Ref: Manual History-Footnote-267400 +Node: How To Contribute67474 +Node: Acknowledgments68603 +Node: Getting Started73489 +Node: Running gawk75928 +Node: One-shot77118 +Node: Read Terminal78381 +Node: Long80413 +Node: Executable Scripts81926 +Ref: Executable Scripts-Footnote-184721 +Node: Comments84824 +Node: Quoting87308 +Node: DOS Quoting92825 +Node: Sample Data Files93500 +Node: Very Simple96095 +Node: Two Rules100997 +Node: More Complex102882 +Node: Statements/Lines105745 +Ref: Statements/Lines-Footnote-1110204 +Node: Other Features110469 +Node: When111405 +Ref: When-Footnote-1113159 +Node: Intro Summary113224 +Node: Invoking Gawk114108 +Node: Command Line115622 +Node: Options116420 +Ref: Options-Footnote-1132518 +Ref: Options-Footnote-2132748 +Node: Other Arguments132773 +Node: Naming Standard Input135720 +Node: Environment Variables136813 +Node: AWKPATH Variable137371 +Ref: AWKPATH Variable-Footnote-1140782 +Ref: AWKPATH Variable-Footnote-2140827 +Node: AWKLIBPATH Variable141088 +Node: Other Environment Variables142345 +Node: Exit Status145983 +Node: Include Files146660 +Node: Loading Shared Libraries150255 +Node: Obsolete151683 +Node: Undocumented152375 +Node: Invoking Summary152672 +Node: Regexp154332 +Node: Regexp Usage155851 +Node: Escape Sequences157888 +Node: Regexp Operators164120 +Ref: Regexp Operators-Footnote-1171536 +Ref: Regexp Operators-Footnote-2171683 +Node: Bracket Expressions171781 +Ref: table-char-classes174257 +Node: Leftmost Longest177394 +Node: Computed Regexps178697 +Node: GNU Regexp Operators182124 +Node: Case-sensitivity185803 +Ref: Case-sensitivity-Footnote-1188699 +Ref: Case-sensitivity-Footnote-2188934 +Node: Strong Regexp Constants189042 +Node: Regexp Summary191984 +Node: Reading Files193590 +Node: Records195753 +Node: awk split records196486 +Node: gawk split records201417 +Ref: gawk split records-Footnote-1205957 +Node: Fields205994 +Node: Nonconstant Fields208735 +Ref: Nonconstant Fields-Footnote-1210971 +Node: Changing Fields211175 +Node: Field Separators217103 +Node: Default Field Splitting219801 +Node: Regexp Field Splitting220919 +Node: Single Character Fields224272 +Node: Command Line Field Separator225332 +Node: Full Line Fields228550 +Ref: Full Line Fields-Footnote-1230072 +Ref: Full Line Fields-Footnote-2230118 +Node: Field Splitting Summary230219 +Node: Constant Size232293 +Node: Splitting By Content236871 +Ref: Splitting By Content-Footnote-1240842 +Node: Multiple Line241005 +Ref: Multiple Line-Footnote-1246887 +Node: Getline247066 +Node: Plain Getline249532 +Node: Getline/Variable252171 +Node: Getline/File253320 +Node: Getline/Variable/File254706 +Ref: Getline/Variable/File-Footnote-1256309 +Node: Getline/Pipe256397 +Node: Getline/Variable/Pipe259102 +Node: Getline/Coprocess260235 +Node: Getline/Variable/Coprocess261500 +Node: Getline Notes262240 +Node: Getline Summary265035 +Ref: table-getline-variants265457 +Node: Read Timeout266205 +Ref: Read Timeout-Footnote-1270111 +Node: Retrying Input270169 +Node: Command-line directories271368 +Node: Input Summary272274 +Node: Input Exercises275446 +Node: Printing276174 +Node: Print278008 +Node: Print Examples279465 +Node: Output Separators282245 +Node: OFMT284262 +Node: Printf285618 +Node: Basic Printf286403 +Node: Control Letters287977 +Node: Format Modifiers291965 +Node: Printf Examples297980 +Node: Redirection300466 +Node: Special FD307307 +Ref: Special FD-Footnote-1310475 +Node: Special Files310549 +Node: Other Inherited Files311166 +Node: Special Network312167 +Node: Special Caveats313027 +Node: Close Files And Pipes313976 +Ref: Close Files And Pipes-Footnote-1321163 +Ref: Close Files And Pipes-Footnote-2321311 +Node: Nonfatal321462 +Node: Output Summary323787 +Node: Output Exercises325009 +Node: Expressions325688 +Node: Values326876 +Node: Constants327554 +Node: Scalar Constants328245 +Ref: Scalar Constants-Footnote-1329109 +Node: Nondecimal-numbers329359 +Node: Regexp Constants332372 +Node: Using Constant Regexps332898 +Node: Variables336061 +Node: Using Variables336718 +Node: Assignment Options338628 +Node: Conversion340501 +Node: Strings And Numbers341025 +Ref: Strings And Numbers-Footnote-1344088 +Node: Locale influences conversions344197 +Ref: table-locale-affects346955 +Node: All Operators347573 +Node: Arithmetic Ops348202 +Node: Concatenation350708 +Ref: Concatenation-Footnote-1353555 +Node: Assignment Ops353662 +Ref: table-assign-ops358653 +Node: Increment Ops359966 +Node: Truth Values and Conditions363426 +Node: Truth Values364500 +Node: Typing and Comparison365548 +Node: Variable Typing366368 +Node: Comparison Operators369992 +Ref: table-relational-ops370411 +Node: POSIX String Comparison373906 +Ref: POSIX String Comparison-Footnote-1374980 +Node: Boolean Ops375119 +Ref: Boolean Ops-Footnote-1379601 +Node: Conditional Exp379693 +Node: Function Calls381429 +Node: Precedence385306 +Node: Locales388965 +Node: Expressions Summary390597 +Node: Patterns and Actions393170 +Node: Pattern Overview394290 +Node: Regexp Patterns395967 +Node: Expression Patterns396509 +Node: Ranges400290 +Node: BEGIN/END403398 +Node: Using BEGIN/END404159 +Ref: Using BEGIN/END-Footnote-1406895 +Node: I/O And BEGIN/END407001 +Node: BEGINFILE/ENDFILE409315 +Node: Empty412222 +Node: Using Shell Variables412539 +Node: Action Overview414813 +Node: Statements417138 +Node: If Statement418986 +Node: While Statement420481 +Node: Do Statement422509 +Node: For Statement423657 +Node: Switch Statement426815 +Node: Break Statement429201 +Node: Continue Statement431293 +Node: Next Statement433120 +Node: Nextfile Statement435503 +Node: Exit Statement438155 +Node: Built-in Variables440558 +Node: User-modified441691 +Node: Auto-set449277 +Ref: Auto-set-Footnote-1463664 +Ref: Auto-set-Footnote-2463870 +Node: ARGC and ARGV463926 +Node: Pattern Action Summary468139 +Node: Arrays470569 +Node: Array Basics471898 +Node: Array Intro472742 +Ref: figure-array-elements474717 +Ref: Array Intro-Footnote-1477421 +Node: Reference to Elements477549 +Node: Assigning Elements480013 +Node: Array Example480504 +Node: Scanning an Array482263 +Node: Controlling Scanning485285 +Ref: Controlling Scanning-Footnote-1490684 +Node: Numeric Array Subscripts491000 +Node: Uninitialized Subscripts493184 +Node: Delete494803 +Ref: Delete-Footnote-1497555 +Node: Multidimensional497612 +Node: Multiscanning500707 +Node: Arrays of Arrays502298 +Node: Arrays Summary507065 +Node: Functions509158 +Node: Built-in510196 +Node: Calling Built-in511274 +Node: Numeric Functions513270 +Ref: Numeric Functions-Footnote-1518103 +Ref: Numeric Functions-Footnote-2518460 +Ref: Numeric Functions-Footnote-3518508 +Node: String Functions518780 +Ref: String Functions-Footnote-1542284 +Ref: String Functions-Footnote-2542412 +Ref: String Functions-Footnote-3542660 +Node: Gory Details542747 +Ref: table-sub-escapes544538 +Ref: table-sub-proposed546057 +Ref: table-posix-sub547420 +Ref: table-gensub-escapes548961 +Ref: Gory Details-Footnote-1549784 +Node: I/O Functions549935 +Ref: I/O Functions-Footnote-1557155 +Node: Time Functions557303 +Ref: Time Functions-Footnote-1567808 +Ref: Time Functions-Footnote-2567876 +Ref: Time Functions-Footnote-3568034 +Ref: Time Functions-Footnote-4568145 +Ref: Time Functions-Footnote-5568257 +Ref: Time Functions-Footnote-6568484 +Node: Bitwise Functions568750 +Ref: table-bitwise-ops569344 +Ref: Bitwise Functions-Footnote-1573682 +Node: Type Functions573855 +Node: I18N Functions576516 +Node: User-defined578167 +Node: Definition Syntax578972 +Ref: Definition Syntax-Footnote-1584659 +Node: Function Example584730 +Ref: Function Example-Footnote-1587652 +Node: Function Caveats587674 +Node: Calling A Function588192 +Node: Variable Scope589150 +Node: Pass By Value/Reference592144 +Node: Return Statement595643 +Node: Dynamic Typing598622 +Node: Indirect Calls599552 +Ref: Indirect Calls-Footnote-1609803 +Node: Functions Summary609931 +Node: Library Functions612636 +Ref: Library Functions-Footnote-1616243 +Ref: Library Functions-Footnote-2616386 +Node: Library Names616557 +Ref: Library Names-Footnote-1620017 +Ref: Library Names-Footnote-2620240 +Node: General Functions620326 +Node: Strtonum Function621429 +Node: Assert Function624451 +Node: Round Function627777 +Node: Cliff Random Function629318 +Node: Ordinal Functions630334 +Ref: Ordinal Functions-Footnote-1633397 +Ref: Ordinal Functions-Footnote-2633649 +Node: Join Function633859 +Ref: Join Function-Footnote-1635629 +Node: Getlocaltime Function635829 +Node: Readfile Function639571 +Node: Shell Quoting641543 +Node: Data File Management642944 +Node: Filetrans Function643576 +Node: Rewind Function647672 +Node: File Checking649577 +Ref: File Checking-Footnote-1650911 +Node: Empty Files651112 +Node: Ignoring Assigns653091 +Node: Getopt Function654641 +Ref: Getopt Function-Footnote-1666110 +Node: Passwd Functions666310 +Ref: Passwd Functions-Footnote-1675149 +Node: Group Functions675237 +Ref: Group Functions-Footnote-1683134 +Node: Walking Arrays683341 +Node: Library Functions Summary686349 +Node: Library Exercises687755 +Node: Sample Programs688220 +Node: Running Examples688990 +Node: Clones689718 +Node: Cut Program690942 +Node: Egrep Program700871 +Ref: Egrep Program-Footnote-1708383 +Node: Id Program708493 +Node: Split Program712173 +Ref: Split Program-Footnote-1715632 +Node: Tee Program715761 +Node: Uniq Program718551 +Node: Wc Program725977 +Ref: Wc Program-Footnote-1730232 +Node: Miscellaneous Programs730326 +Node: Dupword Program731539 +Node: Alarm Program733569 +Node: Translate Program738424 +Ref: Translate Program-Footnote-1742989 +Node: Labels Program743259 +Ref: Labels Program-Footnote-1746610 +Node: Word Sorting746694 +Node: History Sorting750766 +Node: Extract Program752601 +Node: Simple Sed760130 +Node: Igawk Program763204 +Ref: Igawk Program-Footnote-1777535 +Ref: Igawk Program-Footnote-2777737 +Ref: Igawk Program-Footnote-3777859 +Node: Anagram Program777974 +Node: Signature Program781036 +Node: Programs Summary782283 +Node: Programs Exercises783497 +Ref: Programs Exercises-Footnote-1787626 +Node: Advanced Features787717 +Node: Nondecimal Data789707 +Node: Array Sorting791298 +Node: Controlling Array Traversal791998 +Ref: Controlling Array Traversal-Footnote-1800365 +Node: Array Sorting Functions800483 +Ref: Array Sorting Functions-Footnote-1805574 +Node: Two-way I/O805770 +Ref: Two-way I/O-Footnote-1811590 +Ref: Two-way I/O-Footnote-2811777 +Node: TCP/IP Networking811859 +Node: Profiling814977 +Node: Advanced Features Summary823431 +Node: Internationalization825275 +Node: I18N and L10N826755 +Node: Explaining gettext827442 +Ref: Explaining gettext-Footnote-1832465 +Ref: Explaining gettext-Footnote-2832650 +Node: Programmer i18n832815 +Ref: Programmer i18n-Footnote-1837670 +Node: Translator i18n837719 +Node: String Extraction838513 +Ref: String Extraction-Footnote-1839645 +Node: Printf Ordering839731 +Ref: Printf Ordering-Footnote-1842517 +Node: I18N Portability842581 +Ref: I18N Portability-Footnote-1845037 +Node: I18N Example845100 +Ref: I18N Example-Footnote-1847906 +Node: Gawk I18N847979 +Node: I18N Summary848624 +Node: Debugger849965 +Node: Debugging850987 +Node: Debugging Concepts851428 +Node: Debugging Terms853237 +Node: Awk Debugging855812 +Node: Sample Debugging Session856718 +Node: Debugger Invocation857252 +Node: Finding The Bug858638 +Node: List of Debugger Commands865116 +Node: Breakpoint Control866449 +Node: Debugger Execution Control870143 +Node: Viewing And Changing Data873505 +Node: Execution Stack876879 +Node: Debugger Info878516 +Node: Miscellaneous Debugger Commands882587 +Node: Readline Support887675 +Node: Limitations888571 +Ref: Limitations-Footnote-1892802 +Node: Debugging Summary892853 +Node: Arbitrary Precision Arithmetic894132 +Node: Computer Arithmetic895548 +Ref: table-numeric-ranges899139 +Ref: Computer Arithmetic-Footnote-1899861 +Node: Math Definitions899918 +Ref: table-ieee-formats903232 +Ref: Math Definitions-Footnote-1903835 +Node: MPFR features903940 +Node: FP Math Caution905657 +Ref: FP Math Caution-Footnote-1906729 +Node: Inexactness of computations907098 +Node: Inexact representation908058 +Node: Comparing FP Values909418 +Node: Errors accumulate910500 +Node: Getting Accuracy911933 +Node: Try To Round914643 +Node: Setting precision915542 +Ref: table-predefined-precision-strings916239 +Node: Setting the rounding mode918069 +Ref: table-gawk-rounding-modes918443 +Ref: Setting the rounding mode-Footnote-1921851 +Node: Arbitrary Precision Integers922030 +Ref: Arbitrary Precision Integers-Footnote-1926947 +Node: POSIX Floating Point Problems927096 +Ref: POSIX Floating Point Problems-Footnote-1930978 +Node: Floating point summary931016 +Node: Dynamic Extensions933206 +Node: Extension Intro934759 +Node: Plugin License936025 +Node: Extension Mechanism Outline936822 +Ref: figure-load-extension937261 +Ref: figure-register-new-function938826 +Ref: figure-call-new-function939918 +Node: Extension API Description941980 +Node: Extension API Functions Introduction943512 +Node: General Data Types948371 +Ref: General Data Types-Footnote-1954326 +Node: Memory Allocation Functions954625 +Ref: Memory Allocation Functions-Footnote-1957470 +Node: Constructor Functions957569 +Node: Registration Functions959314 +Node: Extension Functions959999 +Node: Exit Callback Functions962298 +Node: Extension Version String963548 +Node: Input Parsers964211 +Node: Output Wrappers974096 +Node: Two-way processors978608 +Node: Printing Messages980872 +Ref: Printing Messages-Footnote-1981946 +Node: Updating ERRNO982099 +Node: Requesting Values982838 +Ref: table-value-types-returned983575 +Node: Accessing Parameters984458 +Node: Symbol Table Access985693 +Node: Symbol table by name986205 +Node: Symbol table by cookie988226 +Ref: Symbol table by cookie-Footnote-1992375 +Node: Cached values992439 +Ref: Cached values-Footnote-1995940 +Node: Array Manipulation996031 +Ref: Array Manipulation-Footnote-1997122 +Node: Array Data Types997159 +Ref: Array Data Types-Footnote-1999817 +Node: Array Functions999909 +Node: Flattening Arrays1003767 +Node: Creating Arrays1010675 +Node: Redirection API1015446 +Node: Extension API Variables1018277 +Node: Extension Versioning1018910 +Node: Extension API Informational Variables1020801 +Node: Extension API Boilerplate1021865 +Node: Finding Extensions1025679 +Node: Extension Example1026238 +Node: Internal File Description1027036 +Node: Internal File Ops1031116 +Ref: Internal File Ops-Footnote-11042878 +Node: Using Internal File Ops1043018 +Ref: Using Internal File Ops-Footnote-11045401 +Node: Extension Samples1045675 +Node: Extension Sample File Functions1047204 +Node: Extension Sample Fnmatch1054853 +Node: Extension Sample Fork1056340 +Node: Extension Sample Inplace1057558 +Node: Extension Sample Ord1060768 +Node: Extension Sample Readdir1061604 +Ref: table-readdir-file-types1062493 +Node: Extension Sample Revout1063298 +Node: Extension Sample Rev2way1063887 +Node: Extension Sample Read write array1064627 +Node: Extension Sample Readfile1066569 +Node: Extension Sample Time1067664 +Node: Extension Sample API Tests1069012 +Node: gawkextlib1069504 +Node: Extension summary1071951 +Node: Extension Exercises1075643 +Node: Language History1077140 +Node: V7/SVR3.11078796 +Node: SVR41080948 +Node: POSIX1082382 +Node: BTL1083761 +Node: POSIX/GNU1084490 +Node: Feature History1090352 +Node: Common Extensions1104722 +Node: Ranges and Locales1106005 +Ref: Ranges and Locales-Footnote-11110621 +Ref: Ranges and Locales-Footnote-21110648 +Ref: Ranges and Locales-Footnote-31110883 +Node: Contributors1111104 +Node: History summary1116673 +Node: Installation1118053 +Node: Gawk Distribution1118997 +Node: Getting1119481 +Node: Extracting1120442 +Node: Distribution contents1122080 +Node: Unix Installation1128174 +Node: Quick Installation1128856 +Node: Shell Startup Files1131270 +Node: Additional Configuration Options1132348 +Node: Configuration Philosophy1134153 +Node: Non-Unix Installation1136522 +Node: PC Installation1136980 +Node: PC Binary Installation1138300 +Node: PC Compiling1140152 +Ref: PC Compiling-Footnote-11143176 +Node: PC Testing1143285 +Node: PC Using1144465 +Node: Cygwin1148579 +Node: MSYS1149349 +Node: VMS Installation1149850 +Node: VMS Compilation1150641 +Ref: VMS Compilation-Footnote-11151870 +Node: VMS Dynamic Extensions1151928 +Node: VMS Installation Details1153613 +Node: VMS Running1155866 +Node: VMS GNV1160145 +Node: VMS Old Gawk1160880 +Node: Bugs1161351 +Node: Other Versions1165548 +Node: Installation summary1172132 +Node: Notes1173190 +Node: Compatibility Mode1174055 +Node: Additions1174837 +Node: Accessing The Source1175762 +Node: Adding Code1177197 +Node: New Ports1183416 +Node: Derived Files1187904 +Ref: Derived Files-Footnote-11193389 +Ref: Derived Files-Footnote-21193424 +Ref: Derived Files-Footnote-31194022 +Node: Future Extensions1194136 +Node: Implementation Limitations1194794 +Node: Extension Design1195977 +Node: Old Extension Problems1197131 +Ref: Old Extension Problems-Footnote-11198649 +Node: Extension New Mechanism Goals1198706 +Ref: Extension New Mechanism Goals-Footnote-11202070 +Node: Extension Other Design Decisions1202259 +Node: Extension Future Growth1204372 +Node: Old Extension Mechanism1205208 +Node: Notes summary1206971 +Node: Basic Concepts1208153 +Node: Basic High Level1208834 +Ref: figure-general-flow1209116 +Ref: figure-process-flow1209801 +Ref: Basic High Level-Footnote-11213102 +Node: Basic Data Typing1213287 +Node: Glossary1216615 +Node: Copying1248561 +Node: GNU Free Documentation License1286100 +Node: Index1311218 End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index dcd49e6e..002953eb 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -534,6 +534,7 @@ particular records in a file and perform operations upon them. * Computed Regexps:: Using Dynamic Regexps. * GNU Regexp Operators:: Operators specific to GNU software. * Case-sensitivity:: How to do case-insensitive matching. +* Strong Regexp Constants:: Strongly typed regexp constants. * Regexp Summary:: Regular expressions summary. * Records:: Controlling how data is split into records. @@ -576,6 +577,7 @@ particular records in a file and perform operations upon them. @code{getline}. * Getline Summary:: Summary of @code{getline} Variants. * Read Timeout:: Reading input with a timeout. +* Retrying Input:: Retrying input after certain errors. * Command-line directories:: What happens if you put a directory on the command line. * Input Summary:: Input summary. @@ -605,6 +607,7 @@ particular records in a file and perform operations upon them. * Special Caveats:: Things to watch out for. * Close Files And Pipes:: Closing Input and Output Files and Pipes. +* Nonfatal:: Enabling Nonfatal Output. * Output Summary:: Output summary. * Output Exercises:: Exercises. * Values:: Constants, Variables, and Regular @@ -916,6 +919,7 @@ particular records in a file and perform operations upon them. * Array Functions:: Functions for working with arrays. * Flattening Arrays:: How to flatten arrays. * Creating Arrays:: How to create and populate arrays. +* Redirection API:: How to access and manipulate redirections. * Extension API Variables:: Variables provided by the API. * Extension Versioning:: API Version information. * Extension API Informational Variables:: Variables providing information about @@ -974,6 +978,7 @@ particular records in a file and perform operations upon them. * Unix Installation:: Installing @command{gawk} under various versions of Unix. * Quick Installation:: Compiling @command{gawk} under Unix. +* Shell Startup Files:: Shell convenience functions. * Additional Configuration Options:: Other compile-time options. * Configuration Philosophy:: How it's all supposed to work. * Non-Unix Installation:: Installation on Other Operating @@ -4048,6 +4053,7 @@ when parsing numeric input data (@pxref{Locales}). @cindex @option{-o} option @cindex @option{--pretty-print} option Enable pretty-printing of @command{awk} programs. +Implies @option{--no-optimize}. By default, the output program is created in a file named @file{awkprof.out} (@pxref{Profiling}). The optional @var{file} argument allows you to specify a different @@ -4056,18 +4062,22 @@ No space is allowed between the @option{-o} and @var{file}, if @var{file} is supplied. @quotation NOTE -Due to the way @command{gawk} has evolved, with this option -your program still executes. This will change in the -next major release, such that @command{gawk} will only -pretty-print the program and not run it. +In the past, this option would also execute your program. +This is no longer the case. @end quotation @item @option{-O} @itemx @option{--optimize} @cindex @option{--optimize} option @cindex @option{-O} option -Enable some optimizations on the internal representation of the program. -At the moment, this includes just simple constant folding. +Enable @command{gawk}'s default optimizations on the internal +representation of the program. At the moment, this includes simple +constant folding and tail recursion elimination in function calls. + +These optimizations are enabled by default. +This option remains primarily for backwards compatibilty. However, it may +be used to cancel the effect of an earlier @option{-s} option +(see later in this list). @item @option{-p}[@var{file}] @itemx @option{--profile}[@code{=}@var{file}] @@ -4076,6 +4086,7 @@ At the moment, this includes just simple constant folding. @cindex @command{awk} profiling, enabling Enable profiling of @command{awk} programs (@pxref{Profiling}). +Implies @option{--no-optimize}. By default, profiles are created in a file named @file{awkprof.out}. The optional @var{file} argument allows you to specify a different @value{FN} for the profile file. @@ -4105,11 +4116,6 @@ restrictions apply: @cindex newlines @cindex whitespace, newlines as @item -Newlines do not act as whitespace to separate fields when @code{FS} is -equal to a single space -(@pxref{Fields}). - -@item Newlines are not allowed after @samp{?} or @samp{:} (@pxref{Conditional Exp}). @@ -4147,6 +4153,13 @@ This is now @command{gawk}'s default behavior. Nevertheless, this option remains (both for backward compatibility and for use in combination with @option{--traditional}). +@item @option{-s} +@itemx @option{--no-optimize} +@cindex @option{--no-optimize} option +@cindex @option{-s} option +Disable @command{gawk}'s default optimizations on the internal +representation of the program. + @item @option{-S} @itemx @option{--sandbox} @cindex @option{-S} option @@ -4460,6 +4473,9 @@ searches first in the current directory and then in @file{/usr/local/share/awk}. In practice, this means that you will rarely need to change the value of @env{AWKPATH}. +@xref{Shell Startup Files}, for information on functions that help to +manipulate the @env{AWKPATH} variable. + @command{gawk} places the value of the search path that it used into @code{ENVIRON["AWKPATH"]}. This provides access to the actual search path value from within an @command{awk} program. @@ -4491,6 +4507,9 @@ an empty value, @command{gawk} uses a default path; this is typically @samp{/usr/local/lib/gawk}, although it can vary depending upon how @command{gawk} was built. +@xref{Shell Startup Files}, for information on functions that help to +manipulate the @env{AWKLIBPATH} variable. + @command{gawk} places the value of the search path that it used into @code{ENVIRON["AWKLIBPATH"]}. This provides access to the actual search path value from within an @command{awk} program. @@ -4518,6 +4537,8 @@ wait for input before returning with an error. Controls the number of times @command{gawk} attempts to retry a two-way TCP/IP (socket) connection before giving up. @xref{TCP/IP Networking}. +Note that when nonfatal I/O is enabled (@pxref{Nonfatal}), +@command{gawk} only tries to open a TCP/IP socket once. @item POSIXLY_CORRECT Causes @command{gawk} to switch to POSIX-compatibility @@ -4567,14 +4588,6 @@ two regexp matchers that @command{gawk} uses internally. (There aren't supposed to be differences, but occasionally theory and practice don't coordinate with each other.) -@item GAWK_NO_PP_RUN -When @command{gawk} is invoked with the @option{--pretty-print} option, -it will not run the program if this environment variable exists. - -@quotation CAUTION -This variable will not survive into the next major release. -@end quotation - @item GAWK_STACKSIZE This specifies the amount by which @command{gawk} should grow its internal evaluation stack, when needed. @@ -4872,6 +4885,32 @@ Similarly, you may use @code{print} or @code{printf} statements in the @var{init} and @var{increment} parts of a @code{for} loop. This is another long-undocumented ``feature'' of Unix @command{awk}. +@command{gawk} lets you use the names of built-in functions that are +@command{gawk} extensions as the names of parameters in user-defined functions. +This is intended to ``future-proof'' old code that happens to use +function names added by @command{gawk} after the code was written. +Standard @command{awk} built-in functions, such as @code{sin()} or +@code{substr()} are @emph{not} shadowed in this way. + +The @code{PROCINFO["argv"]} array contains all of the command-line arguments +(after glob expansion and redirection processing on platforms where that must +be done manually by the program) with subscripts ranging from 0 through +@code{argc} @minus{} 1. For example, @code{PROCINFO["argv"][0]} will contain +the name by which @command{gawk} was invoked. Here is an example of how this +feature may be used: + +@example +awk ' +BEGIN @{ + for (i = 0; i < length(PROCINFO["argv"]); i++) + print i, PROCINFO["argv"][i] +@}' +@end example + +Please note that this differs from the standard @code{ARGV} array which does +not include command-line arguments that have already been processed by +@command{gawk} (@pxref{ARGC and ARGV}). + @end ignore @node Invoking Summary @@ -4964,6 +5003,7 @@ regular expressions work, we present more complicated instances. * Computed Regexps:: Using Dynamic Regexps. * GNU Regexp Operators:: Operators specific to GNU software. * Case-sensitivity:: How to do case-insensitive matching. +* Strong Regexp Constants:: Strongly typed regexp constants. * Regexp Summary:: Regular expressions summary. @end menu @@ -5154,17 +5194,21 @@ between @samp{0} and @samp{7}. For example, the code for the ASCII ESC @item \x@var{hh}@dots{} The hexadecimal value @var{hh}, where @var{hh} stands for a sequence of hexadecimal digits (@samp{0}--@samp{9}, and either @samp{A}--@samp{F} -or @samp{a}--@samp{f}). Like the same construct -in ISO C, the escape sequence continues until the first nonhexadecimal -digit is seen. @value{COMMONEXT} -However, using more than two hexadecimal digits produces -undefined results. (The @samp{\x} escape sequence is not allowed in -POSIX @command{awk}.) +or @samp{a}--@samp{f}). A maximum of two digts are allowed after +the @samp{\x}. Any further hexadecimal digits are treated as simple +letters or numbers. @value{COMMONEXT} +(The @samp{\x} escape sequence is not allowed in POSIX awk.) @quotation CAUTION -The next major release of @command{gawk} will change, such -that a maximum of two hexadecimal digits following the -@samp{\x} will be used. +In ISO C, the escape sequence continues until the first nonhexadecimal +digit is seen. +For many years, @command{gawk} would continue incorporating +hexadecimal digits into the value until a non-hexadecimal digit +or the end of the string was encountered. +However, using more than two hexadecimal digits produced +undefined results. +As of @value{PVERSION} 4.2, only two digits +are processed. @end quotation @cindex @code{\} (backslash), @code{\/} escape sequence @@ -6220,6 +6264,89 @@ The value of @code{IGNORECASE} has no effect if @command{gawk} is in compatibility mode (@pxref{Options}). Case is always significant in compatibility mode. +@node Strong Regexp Constants +@section Strongly Typed Regexp Constants + +This @value{SECTION} describes a @command{gawk}-specific feature. + +Regexp constants (@code{/@dots{}/}) hold a strange position in the +@command{awk} language. In most contexts, they act like an expression: +@samp{$0 ~ /@dots{}/}. In other contexts, they denote only a regexp to +be matched. In no case are they really a ``first class citizen'' of the +language. That is, you cannot define a scalar variable whose type is +``regexp'' in the same sense that you can define a variable to be a +number or a string: + +@example +num = 42 @ii{Numeric variable} +str = "hi" @ii{String variable} +re = /foo/ @ii{Wrong!} re @ii{is the result of} $0 ~ /foo/ +@end example + +For a number of more advanced use cases (described later on in this +@value{DOCUMENT}), it would be nice to have regexp constants that +are @dfn{strongly typed}; in other words, that denote a regexp useful +for matching, and not an expression. + +@command{gawk} provides this feature. A strongly typed regexp constant +looks almost like a regular regexp constant, except that it is preceded +by an @samp{@@} sign: + +@example +re = @@/foo/ @ii{Regexp variable} +@end example + +Strongly typed regexp constants @emph{cannot} be used eveywhere that a +regular regexp constant can, because this would make the language even more +confusing. Instead, you may use them only in certain contexts: + +@itemize @bullet +@item +On the righthand side of the @samp{~} and @samp{!~} operators: @samp{some_var ~ @@/foo/} +(@pxref{Regexp Usage}). + +@item +In the @code{case} part of a @code{switch} statement +(@pxref{Switch Statement}). + +@item +As an argument to one of the built-in functions that accept regexp constants: +@code{gensub()}, +@code{gsub()}, +@code{match()}, +@code{patsplit()}, +@code{split()}, +and +@code{sub()} +(@pxref{String Functions}). + +@item +As a parameter in a call to a user-defined function +(@pxref{User-defined}). + +@item +On the righthand side of an assignment to a variable: @samp{some_var = @@/foo/}. +In this case, the type of @code{some_var} is regexp. Additionally, @code{some_var} +can be used with @samp{~} and @samp{!~}, passed to one of the built-in functions +listed above, or passed as a parameter to a user-defined function. +@end itemize + +You may use the @code{typeof()} built-in function +(@pxref{Type Functions}) +to determine if a variable or function parameter is +a regexp variable. + +The true power of this feature comes from the ability to create variables that +have regexp type. Such variables can be passed on to user-defined functions, +without the confusing aspects of computed regular expressions created from +strings or string constants. They may also be passed through indirect function +calls (@pxref{Indirect Calls}) +onto the built-in functions that accept regexp constants. + +When used in numeric conversions, strongly typed regexp variables convert +to zero. When used in string conversions, they convert to the string +value of the original regexp text. + @node Regexp Summary @section Summary @@ -6263,6 +6390,11 @@ treated as regular expressions). case sensitivity of regexp matching. In other @command{awk} versions, use @code{tolower()} or @code{toupper()}. +@item +Strongly typed regexp constants (@code{@@/.../}) enable +certain advanced use cases to be described later on in the +@value{DOCUMENT}. + @end itemize @@ -6310,6 +6442,7 @@ used with it do not have to be named on the @command{awk} command line * Getline:: Reading files under explicit program control using the @code{getline} function. * Read Timeout:: Reading input with a timeout. +* Retrying Input:: Retrying input after certain errors. * Command-line directories:: What happens if you put a directory on the command line. * Input Summary:: Input summary. @@ -6684,16 +6817,12 @@ Readfile} for another option. @cindex fields @cindex accessing fields @cindex fields, examining -@cindex POSIX @command{awk}, field separators and -@cindex field separators, POSIX and -@cindex separators, field, POSIX and When @command{awk} reads an input record, the record is automatically @dfn{parsed} or separated by the @command{awk} utility into chunks called @dfn{fields}. By default, fields are separated by @dfn{whitespace}, like words in a line. Whitespace in @command{awk} means any string of one or more spaces, -TABs, or newlines;@footnote{In POSIX @command{awk}, newlines are not -considered whitespace for separating fields.} other characters +TABs, or newlines; other characters that are considered whitespace by other languages (such as formfeed, vertical tab, etc.) are @emph{not} considered whitespace by @command{awk}. @@ -7138,7 +7267,6 @@ can massage it first with a separate @command{awk} program.) @node Default Field Splitting @subsection Whitespace Normally Separates Fields -@cindex newlines, as field separators @cindex whitespace, as field separators Fields are normally separated by whitespace sequences (spaces, TABs, and newlines), not by single spaces. Two spaces in a row do not @@ -8101,6 +8229,13 @@ a record, such as a file that cannot be opened, then @code{getline} returns @minus{}1. In this case, @command{gawk} sets the variable @code{ERRNO} to a string describing the error that occurred. +If @code{ERRNO} indicates that the I/O operation may be +retried, and @code{PROCINFO["@var{input}", "RETRY"]} is set, +then @code{getline} returns @minus{}2 +instead of @minus{}1, and further calls to @code{getline} +may be attemped. @xref{Retrying Input} for further information about +this feature. + In the following examples, @var{command} stands for a string value that represents a shell command. @@ -8755,7 +8890,8 @@ on a per-command or per-connection basis. the attempt to read from the underlying device may succeed in a later attempt. This is a limitation, and it also means that you cannot use this to multiplex input from -two or more sources. +two or more sources. @xref{Retrying Input} for a way to enable +later I/O attempts to succeed. Assigning a timeout value prevents read operations from blocking indefinitely. But bear in mind that there are other ways @@ -8765,6 +8901,36 @@ a connection before it can start reading any data, or the attempt to open a FIFO special file for reading can block indefinitely until some other process opens it for writing. +@node Retrying Input +@section Retrying Reads After Certain Input Errors +@cindex retrying input + +@cindex differences in @command{awk} and @command{gawk}, retrying input +This @value{SECTION} describes a feature that is specific to @command{gawk}. + +When @command{gawk} encounters an error while reading input, by +default @code{getline} returns @minus{}1, and subsequent attempts to +read from that file result in an end-of-file indication. However, you +may optionally instruct @command{gawk} to allow I/O to be retried when +certain errors are encountered by setting a special element in +the @code{PROCINFO} array (@pxref{Auto-set}): + +@example +PROCINFO["@var{input_name}", "RETRY"] = 1 +@end example + +When this element exists, @command{gawk} checks the value of the system +(C language) +@code{errno} variable when an I/O error occurs. If @code{errno} indicates +a subsequent I/O attempt may succeed, @code{getline} instead returns +@minus{}2 and +further calls to @code{getline} may succeed. This applies to the @code{errno} +values @code{EAGAIN}, @code{EWOULDBLOCK}, @code{EINTR}, or @code{ETIMEDOUT}. + +This feature is useful in conjunction with +@code{PROCINFO["@var{input_name}", "READ_TIMEOUT"]} or situations where a file +descriptor has been configured to behave in a non-blocking fashion. + @node Command-line directories @section Directories on the Command Line @cindex differences in @command{awk} and @command{gawk}, command-line directories @@ -8926,6 +9092,7 @@ and discusses the @code{close()} built-in function. @command{gawk} allows access to inherited file descriptors. * Close Files And Pipes:: Closing Input and Output Files and Pipes. +* Nonfatal:: Enabling Nonfatal Output. * Output Summary:: Output summary. * Output Exercises:: Exercises. @end menu @@ -10431,6 +10598,70 @@ when closing a pipe. @end cartouche @end ifnotdocbook +@node Nonfatal +@section Enabling Nonfatal Output + +This @value{SECTION} describes a @command{gawk}-specific feature. + +In standard @command{awk}, output with @code{print} or @code{printf} +to a nonexistent file, or some other I/O error (such as filling up the +disk) is a fatal error. + +@example +$ @kbd{gawk 'BEGIN @{ print "hi" > "/no/such/file" @}'} +@error{} gawk: cmd. line:1: fatal: can't redirect to `/no/such/file' (No such file or directory) +@end example + +@command{gawk} makes it possible to detect that an error has +occurred, allowing you to possibly recover from the error, or +at least print an error message of your choosing before exiting. +You can do this in one of two ways: + +@itemize @bullet +@item +For all output files, by assigning any value to @code{PROCINFO["NONFATAL"]}. + +@item +On a per-file basis, by assigning any value to +@code{PROCINFO[@var{filename}, "NONFATAL"]}. +Here, @var{filename} is the name of the file to which +you wish output to be nonfatal. +@end itemize + +Once you have enabled nonfatal output, you must check @code{ERRNO} +after every relevant @code{print} or @code{printf} statement to +see if something went wrong. It is also a good idea to initialize +@code{ERRNO} to zero before attempting the output. For example: + +@example +$ @kbd{gawk '} +> @kbd{BEGIN @{} +> @kbd{ PROCINFO["NONFATAL"] = 1} +> @kbd{ ERRNO = 0} +> @kbd{ print "hi" > "/no/such/file"} +> @kbd{ if (ERRNO) @{} +> @kbd{ print("Output failed:", ERRNO) > "/dev/stderr"} +> @kbd{ exit 1} +> @kbd{ @}} +> @kbd{@}'} +@error{} Output failed: No such file or directory +@end example + +Here, @command{gawk} did not produce a fatal error; instead +it let the @command{awk} program code detect the problem and handle it. + +This mechanism works also for standard output and standard error. +For standard output, you may use @code{PROCINFO["-", "NONFATAL"]} +or @code{PROCINFO["/dev/stdout", "NONFATAL"]}. For standard error, use +@code{PROCINFO["/dev/stderr", "NONFATAL"]}. + +When attempting to open a TCP/IP socket (@pxref{TCP/IP Networking}), +@command{gawk} tries multiple times. The @env{GAWK_SOCK_RETRIES} +environment variable (@pxref{Other Environment Variables}) allows you to +override @command{gawk}'s builtin default number of attempts. However, +once nonfatal I/O is enabled for a given socket, @command{gawk} only +retries once, relying on @command{awk}-level code to notice that there +was a problem. @node Output Summary @section Summary @@ -10460,6 +10691,12 @@ Use @code{close()} to close open file, pipe, and coprocess redirections. For coprocesses, it is possible to close only one direction of the communications. +@item +Normally errors with @code{print} or @code{printf} are fatal. +@command{gawk} lets you make output errors be nonfatal either for +all files or on a per-file basis. You must then check for errors +after every relevant output statement. + @end itemize @c EXCLUDE START @@ -14563,12 +14800,11 @@ specify the behavior when @code{FS} is the null string. Nonetheless, some other versions of @command{awk} also treat @code{""} specially.) -@cindex POSIX @command{awk}, @code{FS} variable and The default value is @w{@code{" "}}, a string consisting of a single -space. As a special exception, this value means that any -sequence of spaces, TABs, and/or newlines is a single separator.@footnote{In -POSIX @command{awk}, newline does not count as whitespace.} It also causes -spaces, TABs, and newlines at the beginning and end of a record to be ignored. +space. As a special exception, this value means that any sequence of +spaces, TABs, and/or newlines is a single separator. It also causes +spaces, TABs, and newlines at the beginning and end of a record to +be ignored. You can set the value of @code{FS} on the command line using the @option{-F} option: @@ -14792,10 +15028,24 @@ opens the next file. An associative array containing the values of the environment. The array indices are the environment variable names; the elements are the values of the particular environment variables. For example, -@code{ENVIRON["HOME"]} might be @code{"/home/arnold"}. Changing this array -does not affect the environment passed on to any programs that -@command{awk} may spawn via redirection or the @code{system()} function. -(In a future version of @command{gawk}, it may do so.) +@code{ENVIRON["HOME"]} might be @code{/home/arnold}. + +For POSIX @command{awk}, changing this array does not affect the +environment passed on to any programs that @command{awk} may spawn via +redirection or the @code{system()} function. + +However, beginning with version 4.2, if not in POSIX +compatibility mode, @command{gawk} does update its own environment when +@code{ENVIRON} is changed, thus changing the environment seen by programs +that it creates. You should therefore be especially careful if you +modify @code{ENVIRON["PATH"]}, which is the search path for finding +executable programs. + +This can also affect the running @command{gawk} program, since some of the +built-in functions may pay attention to certain environment variables. +The most notable instance of this is @code{mktime()} (@pxref{Time +Functions}), which pays attention the value of the @env{TZ} environment +variable on many systems. Some operating systems may not have environment variables. On such systems, the @code{ENVIRON} array is empty (except for @@ -14829,6 +15079,11 @@ value to be meaningful when an I/O operation returns a failure value, such as @code{getline} returning @minus{}1. You are, of course, free to clear it yourself before doing an I/O operation. +If the value of @code{ERRNO} corresponds to a system error in the C +@code{errno} variable, then @code{PROCINFO["errno"]} will be set to the value +of @code{errno}. For non-system errors, @code{PROCINFO["errno"]} will +be zero. + @cindex @code{FILENAME} variable @cindex dark corner, @code{FILENAME} variable @item @code{FILENAME} @@ -14897,6 +15152,10 @@ are guaranteed to be available: @item PROCINFO["egid"] The value of the @code{getegid()} system call. +@item PROCINFO["errno"] +The value of the C @code{errno} variable when @code{ERRNO} is set to +the associated error message. + @item PROCINFO["euid"] @cindex effective user ID of @command{gawk} user The value of the @code{geteuid()} system call. @@ -15020,6 +15279,14 @@ to test for these elements The following elements allow you to change @command{gawk}'s behavior: @table @code +@item PROCINFO["NONFATAL"] +If this element exists, then I/O errors for all output redirections become nonfatal. +@xref{Nonfatal}. + +@item PROCINFO["@var{output_name}", "NONFATAL"] +Make output errors for @var{output_name} be nonfatal. +@xref{Nonfatal}. + @item PROCINFO["@var{command}", "pty"] For two-way communication to @var{command}, use a pseudo-tty instead of setting up a two-way pipe. @@ -16942,6 +17209,23 @@ truncated toward zero. For example, @code{int(3)} is 3, @code{int(3.9)} is 3, @code{int(-3.9)} is @minus{}3, and @code{int(-3)} is @minus{}3 as well. +@item @code{intdiv(@var{numerator}, @var{denominator}, @var{result})} +@cindexawkfunc{intdiv} +@cindex intdiv +Perform integer division, similar to the standard C function of the +same name. First, truncate @code{numerator} and @code{denominator} +towards zero, creating integer values. Clear the @code{result} +array, and then set @code{result["quotient"]} to the result of +@samp{numerator / denominator}, truncated towards zero to an integer, +and set @code{result["remainder"]} to the result of @samp{numerator % +denominator}, truncated towards zero to an integer. This function is +primarily intended for use with arbitrary length integers; it avoids +creating MPFR arbitrary precision floating-point values (@pxref{Arbitrary +Precision Integers}). + +This function is a @code{gawk} extension. It is not available in +compatibility mode (@pxref{Options}). + @item @code{log(@var{x})} @cindexawkfunc{log} @cindex logarithm @@ -19192,16 +19476,70 @@ results of the @code{compl()}, @code{lshift()}, and @code{rshift()} functions. @node Type Functions @subsection Getting Type Information -@command{gawk} provides a single function that lets you distinguish -an array from a scalar variable. This is necessary for writing code +@command{gawk} provides two functions that lets you distinguish +the type of a variable. +This is necessary for writing code that traverses every element of an array of arrays -(@pxref{Arrays of Arrays}). +(@pxref{Arrays of Arrays}), and in other contexts. @table @code @cindexgawkfunc{isarray} @cindex scalar or array @item isarray(@var{x}) Return a true value if @var{x} is an array. Otherwise, return false. + +@cindexgawkfunc{typeof} +@cindex variable type +@cindex type, of variable +@item typeof(@var{x}) +Return one of the following strings, depending upon the type of @var{x}: + +@c nested table +@table @code +@item "array" +@var{x} is an array. + +@item "regexp" +@var{x} is a strongly typed regexp (@pxref{Strong Regexp Constants}). + +@item "number" +@var{x} is a number. + +@item "string" +@var{x} is a string. + +@item "strnum" +@var{x} is a string that might be a number, such as a field or +the result of calling @code{split()}. (I.e., @var{x} has the STRNUM +attribute; @pxref{Variable Typing}.) + +@item "unassigned" +@var{x} is a scalar variable that has not been assigned a value yet. +For example: + +@example +BEGIN @{ + a[1] # creates a[1] but it has no assigned value + print typeof(a[1]) # scalar_u +@} +@end example + +@item "untyped" +@var{x} has not yet been used yet at all; it can become a scalar or an +array. +For example: + +@example +BEGIN @{ + print typeof(x) # x never used --> untyped + mk_arr(x) + print typeof(x) # x now an array --> array +@} + +function mk_arr(a) @{ a[1] = 1 @} +@end example + +@end table @end table @code{isarray()} is meant for use in two circumstances. The first is when @@ -19219,6 +19557,14 @@ that has not been previously used to @code{isarray()}, @command{gawk} ends up turning it into a scalar. @end quotation +The @code{typeof()} function is general; it allows you to determine +if a variable or function parameter is a scalar, an array, or a strongly +typed regexp. + +@code{isarray()} is deprecated; you should use @code{typeof()} instead. +You should replace any existing uses of @samp{isarray(var)} in your +code with @samp{typeof(var) == "array"}. + @node I18N Functions @subsection String-Translation Functions @cindex @command{gawk}, string-translation functions @@ -27855,8 +28201,7 @@ The profiled version of your program may not look exactly like what you typed when you wrote it. This is because @command{gawk} creates the profiled version by ``pretty-printing'' its internal representation of the program. The advantage to this is that @command{gawk} can produce -a standard representation. The disadvantage is that all source code -comments are lost. +a standard representation. Also, things such as: @example @@ -27950,10 +28295,30 @@ When called this way, @command{gawk} ``pretty-prints'' the program into @file{awkprof.out}, without any execution counts. @quotation NOTE -The @option{--pretty-print} option still runs your program. -This will change in the next major release. +Once upon a time, the @option{--pretty-print} option would also run +your program. This is is no longer the case. @end quotation +There is a significant difference between the output created when +profiling, and that created when pretty-printing. Pretty-printed output +preserves the original comments that were in the program, although their +placement may not correspond exactly to their original locations in the +source code. + +However, as a deliberate design decision, profiling output @emph{omits} +the original program's comments. This allows you to focus on the +execution count data and helps you avoid the temptation to use the +profiler for pretty-printing. + +Additionally, pretty-printed output does not have the leading indentation +that the profiling output does. This makes it easy to pretty-print your +code once development is completed, and then use the result as the final +version of your program. + +Because the internal representation of your program is formatted to +recreate an @command{awk} program, profiling and pretty-printing +automatically disable @command{gawk}'s default optimizations. + @node Advanced Features Summary @section Summary @@ -27994,8 +28359,7 @@ you tune them more easily. Sending the @code{USR1} signal while profiling cause @command{gawk} to dump the profile and keep going, including a function call stack. @item -You can also just ``pretty-print'' the program. This currently also runs -the program, but that will change in the next major release. +You can also just ``pretty-print'' the program. @end itemize @@ -30165,6 +30529,65 @@ executing, short programs. The @command{gawk} debugger only accepts source code supplied with the @option{-f} option. @end itemize +One other point is worth disucssing. Conventional debuggers run in a +separate process (and thus address space) from the programs that they +debug (the @dfn{debuggee}, if you will). + +The @command{gawk} debugger is different; it is an integrated part +of @command{gawk} itself. This makes it possible, in rare cases, +for @command{gawk} to become an excellent demonstrator of Heisenberg +Uncertainty physics, where the mere act of observing something can change +it. Consider the following:@footnote{Thanks to Hermann Peifer for +this example.} + +@example +$ @kbd{cat test.awk} +@print{} @{ print typeof($1), typeof($2) @} +$ @kbd{cat test.data} +@print{} abc 123 +$ @kbd{gawk -f test.awk test.data} +@print{} strnum strnum +@end example + +This is all as expected: field data has the STRNUM attribute +(@pxref{Variable Typing}). Now watch what happens when we run +this program under the debugger: + +@example +$ @kbd{gawk -D -f test.awk test.data} +gawk> @kbd{w $1} @ii{Set watchpoint on} $1 +@print{} Watchpoint 1: $1 +gawk> @kbd{w $2} @ii{Set watchpoint on} $2 +@print{} Watchpoint 2: $2 +gawk> @kbd{r} @ii{Start the program} +@print{} Starting program: +@print{} Stopping in Rule ... +@print{} Watchpoint 1: $1 @ii{Watchpoint fires} +@print{} Old value: "" +@print{} New value: "abc" +@print{} main() at `test.awk':1 +@print{} 1 @{ print typeof($1), typeof($2) @} +gawk> @kbd{n} @ii{Keep going @dots{}} +@print{} Watchpoint 2: $2 @ii{Watchpoint fires} +@print{} Old value: "" +@print{} New value: "123" +@print{} main() at `test.awk':1 +@print{} 1 @{ print typeof($1), typeof($2) @} +gawk> @kbd{n} @ii{Get result from} typeof() +@print{} strnum number @ii{Result for} $2 @ii{isn't right} +@print{} Program exited normally with exit value: 0 +gawk> @kbd{quit} +@end example + +In this case, the act of comparing the new value of @code{$2} +with the old one caused @command{gawk} to evaluate it and determine that it +is indeed a number, and this is reflected in the result of +@code{typeof()}. + +Cases like this where the debugger is not transparent to the program's +execution should be rare. If you encounter one, please report it +(@pxref{Bugs}). + @ignore Look forward to a future release when these and other missing features may be added, and of course feel free to try to add them yourself! @@ -30201,6 +30624,10 @@ If the GNU Readline library is available when @command{gawk} is compiled, it is used by the debugger to provide command-line history and editing. +@item +Usually, the debugger does not not affect the +program being debugged, but occasionally it can. + @end itemize @node Arbitrary Precision Arithmetic @@ -31018,6 +31445,122 @@ to just use the following: gawk -M 'BEGIN @{ n = 13; print n % 2 @}' @end example +When dividing two arbitrary precision integers with either +@samp{/} or @samp{%}, the result is typically an arbitrary +precision floating point value (unless the denominator evenly +divides into the numerator). In order to do integer division +or remainder with arbitrary precision integers, use the built-in +@code{intdiv()} function (@pxref{Numeric Functions}). + +You can simulate the @code{intdiv()} function in standard @command{awk} +using this user-defined function: + +@example +@c file eg/lib/intdiv.awk +# intdiv --- do integer division + +@c endfile +@ignore +@c file eg/lib/intdiv.awk +# +# Arnold Robbins, arnold@@skeeve.com, Public Domain +# July, 2014 +# +# Name changed from div() to intdiv() +# April, 2015 + +@c endfile + +@end ignore +@c file eg/lib/intdiv.awk +function intdiv(numerator, denominator, result) +@{ + split("", result) + + numerator = int(numerator) + denominator = int(denominator) + result["quotient"] = int(numerator / denominator) + result["remainder"] = int(numerator % denominator) + + return 0.0 +@} +@c endfile +@end example + +The following example program, contributed by Katie Wasserman, +uses @code{intdiv()} to +compute the digits of @value{PI} to as many places as you +choose to set: + +@example +@c file eg/prog/pi.awk +# pi.awk --- compute the digits of pi +@c endfile +@c endfile +@ignore +@c file eg/prog/pi.awk +# +# Katie Wasserman, katie@@wass.net +# August 2014 +@c endfile +@end ignore +@c file eg/prog/pi.awk + +BEGIN @{ + digits = 100000 + two = 2 * 10 ^ digits + pi = two + for (m = digits * 4; m > 0; --m) @{ + d = m * 2 + 1 + x = pi * m + intdiv(x, d, result) + pi = result["quotient"] + pi = pi + two + @} + print pi +@} +@c endfile +@end example + +@ignore +Date: Wed, 20 Aug 2014 10:19:11 -0400 +To: arnold@skeeve.com +From: Katherine Wasserman <katie@wass.net> +Subject: Re: computation of digits of pi? + +Arnold, + +>The program that you sent to compute the digits of pi using div(). Is +>that some standard algorithm that every math student knows? If so, +>what's it called? + +It's not that well known but it's not that obscure either + +It's Euler's modification to Newton's method for calculating pi. + +Take a look at lines (23) - (25) here: http://mathworld.wolfram.com/PiFormulas.htm + +The algorithm I wrote simply expands the multiply by 2 and works from the innermost expression outwards. I used this to program HP calculators because it's quite easy to modify for tiny memory devices with smallish word sizes. + +http://www.hpmuseum.org/cgi-sys/cgiwrap/hpmuseum/articles.cgi?read=899 + +-Katie +@end ignore + +When asked about the algorithm used, Katie replied: + +@quotation +It's not that well known but it's not that obscure either. +It's Euler's modification to Newton's method for calculating pi. +Take a look at lines (23) - (25) here: @uref{http://mathworld.wolfram.com/PiFormulas.html}. + +The algorithm I wrote simply expands the multiply by 2 and works from +the innermost expression outwards. I used this to program HP calculators +because it's quite easy to modify for tiny memory devices with smallish +word sizes. See +@uref{http://www.hpmuseum.org/cgi-sys/cgiwrap/hpmuseum/articles.cgi?read=899}. +@end quotation + @node POSIX Floating Point Problems @section Standards Versus Existing Practice @@ -31417,6 +31960,7 @@ This (rather large) @value{SECTION} describes the API in detail. * Symbol Table Access:: Functions for accessing global variables. * Array Manipulation:: Functions for working with arrays. +* Redirection API:: How to access and manipulate redirections. * Extension API Variables:: Variables provided by the API. * Extension API Boilerplate:: Boilerplate code for using the API. @end menu @@ -31492,6 +32036,10 @@ Clearing an array @item Flattening an array for easy C-style looping over all its indices and elements @end itemize + +@item +Accessing and manipulating redirections. + @end itemize Some points about using the API: @@ -33462,6 +34010,75 @@ $ @kbd{AWKLIBPATH=$PWD ./gawk -f subarray.awk} (@xref{Finding Extensions} for more information on the @env{AWKLIBPATH} environment variable.) +@node Redirection API +@subsection Accessing and Manipulating Redirections + +The following function allows extensions to access and manipulate redirections. + +@table @code +@item awk_bool_t get_file(const char *name, +@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ size_t name_len, +@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const char *filetype, +@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ int fd, +@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const awk_input_buf_t **ibufp, +@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const awk_output_buf_t **obufp); +Look up a file in @command{gawk}'s internal redirection table. +If @code{name} is @code{NULL} or @code{name_len} is zero, return +data for the currently open input file corresponding to @code{FILENAME}. +(This does not access the @code{filetype} argument, so that may be undefined). +If the file is not already open, attempt to open it. +The @code{filetype} argument must be zero-terminated and should be one of: + +@table @code +@item ">" +A file opened for output. + +@item ">>" +A file opened for append. + +@item "<" +A file opened for input. + +@item "|>" +A pipe opened for output. + +@item "|<" +A pipe opened for input. + +@item "|&" +A two-way coprocess. +@end table + +On error, return a @code{false} value. Otherwise, return +@code{true}, and return additional information about the redirection +in the @code{ibufp} and @code{obufp} pointers. For input +redirections, the @code{*ibufp} value should be non-@code{NULL}, +and @code{*obufp} should be @code{NULL}. For output redirections, +the @code{*obufp} value should be non-@code{NULL}, and @code{*ibufp} +should be @code{NULL}. For two-way coprocesses, both values should +be non-@code{NULL}. + +In the usual case, the extension is interested in @code{(*ibufp)->fd} +and/or @code{fileno((*obufp)->fp)}. If the file is not already +open, and the @code{fd} argument is non-negative, @command{gawk} +will use that file descriptor instead of opening the file in the +usual way. If @code{fd} is non-negative, but the file exists already, +@command{gawk} ignores @code{fd} and returns the existing file. It is +the caller's responsibility to notice that neither the @code{fd} in +the returned @code{awk_input_buf_t} nor the @code{fd} in the returned +@code{awk_output_buf_t} matches the requested value. + +Note that supplying a file descriptor is currently @emph{not} supported +for pipes. However, supplying a file descriptor should work for input, +output, append, and two-way (coprocess) sockets. If @code{filetype} +is two-way, @command{gawk} assumes that it is a socket! Note that in +the two-way case, the input and output file descriptors may differ. +To check for success, you must check whether either matches. +@end table + +It is anticipated that this API function will be used to implement I/O +multiplexing and a socket library. + @node Extension API Variables @subsection API Variables @@ -35026,18 +35643,21 @@ As of this writing, there are seven extensions: GD graphics library extension @item +MPFR library extension +(this provides access to a number of MPFR functions that @command{gawk}'s +native MPFR support does not) + +@item PDF extension @item PostgreSQL extension @item -MPFR library extension -(this provides access to a number of MPFR functions that @command{gawk}'s -native MPFR support does not) +Redis extension @item -Redis extension +Select extension @item XML parser extension, using the @uref{http://expat.sourceforge.net, Expat} @@ -35666,6 +36286,10 @@ Indirect function calls @item Directories on the command line produce a warning and are skipped (@pxref{Command-line directories}) + +@item +Output with @code{print} and @code{printf} need not be fatal +(@pxref{Nonfatal}) @end itemize @item @@ -35753,6 +36377,11 @@ The @code{isarray()} function to check if a variable is an array or not The @code{bindtextdomain()}, @code{dcgettext()}, and @code{dcngettext()} functions for internationalization (@pxref{Programmer i18n}) + +@item +The @code{intdiv()} function for doing integer +division and remainder +(@pxref{Numeric Functions}) @end itemize @item @@ -35791,6 +36420,7 @@ The @option{-p}, @option{-P}, @option{-r}, +@option{-s}, @option{-S}, @option{-t}, and @@ -35815,6 +36445,7 @@ and the @option{--load}, @option{--non-decimal-data}, @option{--optimize}, +@option{--no-optimize}, @option{--posix}, @option{--pretty-print}, @option{--profile}, @@ -35885,6 +36516,16 @@ for @command{gawk} @value{PVERSION} 4.1: Ultrix @end itemize +@item +Support for the following systems was removed from the code +for @command{gawk} @value{PVERSION} 4.2: + +@c nested table +@itemize @value{MINUS} +@item +MirBSD +@end itemize + @end itemize @c XXX ADD MORE STUFF HERE @@ -36511,6 +37152,52 @@ Support for Ultrix was removed. @end itemize +Version 4.2 introduced the following changes: + +@itemize @bullet +@item +Changes to @code{ENVIRON} are reflected into @command{gawk}'s +environment and that of programs that it runs. +@xref{Auto-set}. + +@item +The @option{--pretty-print} option no longer runs the @command{awk} +program too. +@xref{Options}. + +@item +The @command{igawk} program and its manual page are no longer +installed when @command{gawk} is built. +@xref{Igawk Program}. + +@item +The @code{intdiv()} function. +@xref{Numeric Functions}. + +@item +The maximum number of hexadecimal digits in @samp{\x} escapes +is now two. +@xref{Escape Sequences}. + +@item +Nonfatal output with @code{print} and @code{printf}. +@xref{Nonfatal}. + +@item +For many years, POSIX specified that default field splitting +only allowed spaces and tabs to separate fields, and this was +how @command{gawk} behaved with @option{--posix}. As of 2013, +the standard restored historical behavior, and now default +field splitting with @option{--posix} also allows newlines to +separate fields. + +@item +Support for MirBSD was removed. + +@item +Support for GNU/Linux on Alpha was removed. +@end itemize + @c XXX ADD MORE STUFF HERE @end ifclear @@ -37178,6 +37865,8 @@ The generated Info file for The @command{troff} source for a manual page describing the @command{igawk} program presented in @ref{Igawk Program}. +(Since @command{gawk} can do its own @code{@@include} processing, +neither @command{igawk} nor @file{igawk.1} are installed.) @item doc/Makefile.in The input file used during the configuration process to generate the @@ -37222,8 +37911,6 @@ source file for this @value{DOCUMENT}. It also contains a @file{Makefile.in} fil @file{Makefile.am} is used by GNU Automake to create @file{Makefile.in}. The library functions from @ref{Library Functions}, -and the @command{igawk} program from -@ref{Igawk Program} are included as ready-to-use files in the @command{gawk} distribution. They are installed as part of the installation process. The rest of the programs in this @value{DOCUMENT} are available in appropriate @@ -37234,6 +37921,12 @@ The source code, manual pages, and infrastructure files for the sample extensions included with @command{gawk}. @xref{Dynamic Extensions}, for more information. +@item extras/* +Additional non-essential files. Currently, this directory contains some shell +startup files to be installed in @file{/etc/profile.d} to aid in manipulating +the @env{AWKPATH} and @env{AWKLIBPATH} environment variables. +@xref{Shell Startup Files}, for more information. + @item posix/* Files needed for building @command{gawk} on POSIX-compliant systems. @@ -37265,6 +37958,7 @@ to configure @command{gawk} for your system yourself. @menu * Quick Installation:: Compiling @command{gawk} under Unix. +* Shell Startup Files:: Shell convenience functions. * Additional Configuration Options:: Other compile-time options. * Configuration Philosophy:: How it's all supposed to work. @end menu @@ -37345,6 +38039,44 @@ is likely that you will be asked for your password, and you will have to have been set up previously as a user who is allowed to run the @command{sudo} command. +@node Shell Startup Files +@appendixsubsec Shell Startup Files + +The distribution contains shell startup files @file{gawk.sh} and +@file{gawk.csh} containing functions to aid in manipulating +the @env{AWKPATH} and @env{AWKLIBPATH} environment variables. +On a Fedora system, these files should be installed in @file{/etc/profile.d}; +on other platforms, the appropriate location may be different. + +@table @command + +@cindex @command{gawkpath_default} shell function +@item gawkpath_default +Reset the @env{AWKPATH} environment variable to its default value. + +@cindex @command{gawkpath_prepend} shell function +@item gawkpath_prepend +Add the argument to the front of the @env{AWKPATH} environment variable. + +@cindex @command{gawkpath_append} shell function +@item gawkpath_append +Add the argument to the end of the @env{AWKPATH} environment variable. + +@cindex @command{gawklibpath_default} shell function +@item gawklibpath_default +Reset the @env{AWKLIBPATH} environment variable to its default value. + +@cindex @command{gawklibpath_prepend} shell function +@item gawklibpath_prepend +Add the argument to the front of the @env{AWKLIBPATH} environment variable. + +@cindex @command{gawklibpath_append} shell function +@item gawklibpath_append +Add the argument to the end of the @env{AWKLIBPATH} environment variable. + +@end table + + @node Additional Configuration Options @appendixsubsec Additional Configuration Options @cindex @command{gawk}, configuring, options diff --git a/doc/gawktexi.in b/doc/gawktexi.in index ff5672a5..37bffc32 100644 --- a/doc/gawktexi.in +++ b/doc/gawktexi.in @@ -529,6 +529,7 @@ particular records in a file and perform operations upon them. * Computed Regexps:: Using Dynamic Regexps. * GNU Regexp Operators:: Operators specific to GNU software. * Case-sensitivity:: How to do case-insensitive matching. +* Strong Regexp Constants:: Strongly typed regexp constants. * Regexp Summary:: Regular expressions summary. * Records:: Controlling how data is split into records. @@ -571,6 +572,7 @@ particular records in a file and perform operations upon them. @code{getline}. * Getline Summary:: Summary of @code{getline} Variants. * Read Timeout:: Reading input with a timeout. +* Retrying Input:: Retrying input after certain errors. * Command-line directories:: What happens if you put a directory on the command line. * Input Summary:: Input summary. @@ -600,6 +602,7 @@ particular records in a file and perform operations upon them. * Special Caveats:: Things to watch out for. * Close Files And Pipes:: Closing Input and Output Files and Pipes. +* Nonfatal:: Enabling Nonfatal Output. * Output Summary:: Output summary. * Output Exercises:: Exercises. * Values:: Constants, Variables, and Regular @@ -911,6 +914,7 @@ particular records in a file and perform operations upon them. * Array Functions:: Functions for working with arrays. * Flattening Arrays:: How to flatten arrays. * Creating Arrays:: How to create and populate arrays. +* Redirection API:: How to access and manipulate redirections. * Extension API Variables:: Variables provided by the API. * Extension Versioning:: API Version information. * Extension API Informational Variables:: Variables providing information about @@ -969,6 +973,7 @@ particular records in a file and perform operations upon them. * Unix Installation:: Installing @command{gawk} under various versions of Unix. * Quick Installation:: Compiling @command{gawk} under Unix. +* Shell Startup Files:: Shell convenience functions. * Additional Configuration Options:: Other compile-time options. * Configuration Philosophy:: How it's all supposed to work. * Non-Unix Installation:: Installation on Other Operating @@ -3959,6 +3964,7 @@ when parsing numeric input data (@pxref{Locales}). @cindex @option{-o} option @cindex @option{--pretty-print} option Enable pretty-printing of @command{awk} programs. +Implies @option{--no-optimize}. By default, the output program is created in a file named @file{awkprof.out} (@pxref{Profiling}). The optional @var{file} argument allows you to specify a different @@ -3967,18 +3973,22 @@ No space is allowed between the @option{-o} and @var{file}, if @var{file} is supplied. @quotation NOTE -Due to the way @command{gawk} has evolved, with this option -your program still executes. This will change in the -next major release, such that @command{gawk} will only -pretty-print the program and not run it. +In the past, this option would also execute your program. +This is no longer the case. @end quotation @item @option{-O} @itemx @option{--optimize} @cindex @option{--optimize} option @cindex @option{-O} option -Enable some optimizations on the internal representation of the program. -At the moment, this includes just simple constant folding. +Enable @command{gawk}'s default optimizations on the internal +representation of the program. At the moment, this includes simple +constant folding and tail recursion elimination in function calls. + +These optimizations are enabled by default. +This option remains primarily for backwards compatibilty. However, it may +be used to cancel the effect of an earlier @option{-s} option +(see later in this list). @item @option{-p}[@var{file}] @itemx @option{--profile}[@code{=}@var{file}] @@ -3987,6 +3997,7 @@ At the moment, this includes just simple constant folding. @cindex @command{awk} profiling, enabling Enable profiling of @command{awk} programs (@pxref{Profiling}). +Implies @option{--no-optimize}. By default, profiles are created in a file named @file{awkprof.out}. The optional @var{file} argument allows you to specify a different @value{FN} for the profile file. @@ -4016,11 +4027,6 @@ restrictions apply: @cindex newlines @cindex whitespace, newlines as @item -Newlines do not act as whitespace to separate fields when @code{FS} is -equal to a single space -(@pxref{Fields}). - -@item Newlines are not allowed after @samp{?} or @samp{:} (@pxref{Conditional Exp}). @@ -4058,6 +4064,13 @@ This is now @command{gawk}'s default behavior. Nevertheless, this option remains (both for backward compatibility and for use in combination with @option{--traditional}). +@item @option{-s} +@itemx @option{--no-optimize} +@cindex @option{--no-optimize} option +@cindex @option{-s} option +Disable @command{gawk}'s default optimizations on the internal +representation of the program. + @item @option{-S} @itemx @option{--sandbox} @cindex @option{-S} option @@ -4371,6 +4384,9 @@ searches first in the current directory and then in @file{/usr/local/share/awk}. In practice, this means that you will rarely need to change the value of @env{AWKPATH}. +@xref{Shell Startup Files}, for information on functions that help to +manipulate the @env{AWKPATH} variable. + @command{gawk} places the value of the search path that it used into @code{ENVIRON["AWKPATH"]}. This provides access to the actual search path value from within an @command{awk} program. @@ -4402,6 +4418,9 @@ an empty value, @command{gawk} uses a default path; this is typically @samp{/usr/local/lib/gawk}, although it can vary depending upon how @command{gawk} was built. +@xref{Shell Startup Files}, for information on functions that help to +manipulate the @env{AWKLIBPATH} variable. + @command{gawk} places the value of the search path that it used into @code{ENVIRON["AWKLIBPATH"]}. This provides access to the actual search path value from within an @command{awk} program. @@ -4429,6 +4448,8 @@ wait for input before returning with an error. Controls the number of times @command{gawk} attempts to retry a two-way TCP/IP (socket) connection before giving up. @xref{TCP/IP Networking}. +Note that when nonfatal I/O is enabled (@pxref{Nonfatal}), +@command{gawk} only tries to open a TCP/IP socket once. @item POSIXLY_CORRECT Causes @command{gawk} to switch to POSIX-compatibility @@ -4478,14 +4499,6 @@ two regexp matchers that @command{gawk} uses internally. (There aren't supposed to be differences, but occasionally theory and practice don't coordinate with each other.) -@item GAWK_NO_PP_RUN -When @command{gawk} is invoked with the @option{--pretty-print} option, -it will not run the program if this environment variable exists. - -@quotation CAUTION -This variable will not survive into the next major release. -@end quotation - @item GAWK_STACKSIZE This specifies the amount by which @command{gawk} should grow its internal evaluation stack, when needed. @@ -4783,6 +4796,32 @@ Similarly, you may use @code{print} or @code{printf} statements in the @var{init} and @var{increment} parts of a @code{for} loop. This is another long-undocumented ``feature'' of Unix @command{awk}. +@command{gawk} lets you use the names of built-in functions that are +@command{gawk} extensions as the names of parameters in user-defined functions. +This is intended to ``future-proof'' old code that happens to use +function names added by @command{gawk} after the code was written. +Standard @command{awk} built-in functions, such as @code{sin()} or +@code{substr()} are @emph{not} shadowed in this way. + +The @code{PROCINFO["argv"]} array contains all of the command-line arguments +(after glob expansion and redirection processing on platforms where that must +be done manually by the program) with subscripts ranging from 0 through +@code{argc} @minus{} 1. For example, @code{PROCINFO["argv"][0]} will contain +the name by which @command{gawk} was invoked. Here is an example of how this +feature may be used: + +@example +awk ' +BEGIN @{ + for (i = 0; i < length(PROCINFO["argv"]); i++) + print i, PROCINFO["argv"][i] +@}' +@end example + +Please note that this differs from the standard @code{ARGV} array which does +not include command-line arguments that have already been processed by +@command{gawk} (@pxref{ARGC and ARGV}). + @end ignore @node Invoking Summary @@ -4875,6 +4914,7 @@ regular expressions work, we present more complicated instances. * Computed Regexps:: Using Dynamic Regexps. * GNU Regexp Operators:: Operators specific to GNU software. * Case-sensitivity:: How to do case-insensitive matching. +* Strong Regexp Constants:: Strongly typed regexp constants. * Regexp Summary:: Regular expressions summary. @end menu @@ -5065,17 +5105,21 @@ between @samp{0} and @samp{7}. For example, the code for the ASCII ESC @item \x@var{hh}@dots{} The hexadecimal value @var{hh}, where @var{hh} stands for a sequence of hexadecimal digits (@samp{0}--@samp{9}, and either @samp{A}--@samp{F} -or @samp{a}--@samp{f}). Like the same construct -in ISO C, the escape sequence continues until the first nonhexadecimal -digit is seen. @value{COMMONEXT} -However, using more than two hexadecimal digits produces -undefined results. (The @samp{\x} escape sequence is not allowed in -POSIX @command{awk}.) +or @samp{a}--@samp{f}). A maximum of two digts are allowed after +the @samp{\x}. Any further hexadecimal digits are treated as simple +letters or numbers. @value{COMMONEXT} +(The @samp{\x} escape sequence is not allowed in POSIX awk.) @quotation CAUTION -The next major release of @command{gawk} will change, such -that a maximum of two hexadecimal digits following the -@samp{\x} will be used. +In ISO C, the escape sequence continues until the first nonhexadecimal +digit is seen. +For many years, @command{gawk} would continue incorporating +hexadecimal digits into the value until a non-hexadecimal digit +or the end of the string was encountered. +However, using more than two hexadecimal digits produced +undefined results. +As of @value{PVERSION} 4.2, only two digits +are processed. @end quotation @cindex @code{\} (backslash), @code{\/} escape sequence @@ -6004,6 +6048,89 @@ The value of @code{IGNORECASE} has no effect if @command{gawk} is in compatibility mode (@pxref{Options}). Case is always significant in compatibility mode. +@node Strong Regexp Constants +@section Strongly Typed Regexp Constants + +This @value{SECTION} describes a @command{gawk}-specific feature. + +Regexp constants (@code{/@dots{}/}) hold a strange position in the +@command{awk} language. In most contexts, they act like an expression: +@samp{$0 ~ /@dots{}/}. In other contexts, they denote only a regexp to +be matched. In no case are they really a ``first class citizen'' of the +language. That is, you cannot define a scalar variable whose type is +``regexp'' in the same sense that you can define a variable to be a +number or a string: + +@example +num = 42 @ii{Numeric variable} +str = "hi" @ii{String variable} +re = /foo/ @ii{Wrong!} re @ii{is the result of} $0 ~ /foo/ +@end example + +For a number of more advanced use cases (described later on in this +@value{DOCUMENT}), it would be nice to have regexp constants that +are @dfn{strongly typed}; in other words, that denote a regexp useful +for matching, and not an expression. + +@command{gawk} provides this feature. A strongly typed regexp constant +looks almost like a regular regexp constant, except that it is preceded +by an @samp{@@} sign: + +@example +re = @@/foo/ @ii{Regexp variable} +@end example + +Strongly typed regexp constants @emph{cannot} be used eveywhere that a +regular regexp constant can, because this would make the language even more +confusing. Instead, you may use them only in certain contexts: + +@itemize @bullet +@item +On the righthand side of the @samp{~} and @samp{!~} operators: @samp{some_var ~ @@/foo/} +(@pxref{Regexp Usage}). + +@item +In the @code{case} part of a @code{switch} statement +(@pxref{Switch Statement}). + +@item +As an argument to one of the built-in functions that accept regexp constants: +@code{gensub()}, +@code{gsub()}, +@code{match()}, +@code{patsplit()}, +@code{split()}, +and +@code{sub()} +(@pxref{String Functions}). + +@item +As a parameter in a call to a user-defined function +(@pxref{User-defined}). + +@item +On the righthand side of an assignment to a variable: @samp{some_var = @@/foo/}. +In this case, the type of @code{some_var} is regexp. Additionally, @code{some_var} +can be used with @samp{~} and @samp{!~}, passed to one of the built-in functions +listed above, or passed as a parameter to a user-defined function. +@end itemize + +You may use the @code{typeof()} built-in function +(@pxref{Type Functions}) +to determine if a variable or function parameter is +a regexp variable. + +The true power of this feature comes from the ability to create variables that +have regexp type. Such variables can be passed on to user-defined functions, +without the confusing aspects of computed regular expressions created from +strings or string constants. They may also be passed through indirect function +calls (@pxref{Indirect Calls}) +onto the built-in functions that accept regexp constants. + +When used in numeric conversions, strongly typed regexp variables convert +to zero. When used in string conversions, they convert to the string +value of the original regexp text. + @node Regexp Summary @section Summary @@ -6047,6 +6174,11 @@ treated as regular expressions). case sensitivity of regexp matching. In other @command{awk} versions, use @code{tolower()} or @code{toupper()}. +@item +Strongly typed regexp constants (@code{@@/.../}) enable +certain advanced use cases to be described later on in the +@value{DOCUMENT}. + @end itemize @@ -6094,6 +6226,7 @@ used with it do not have to be named on the @command{awk} command line * Getline:: Reading files under explicit program control using the @code{getline} function. * Read Timeout:: Reading input with a timeout. +* Retrying Input:: Retrying input after certain errors. * Command-line directories:: What happens if you put a directory on the command line. * Input Summary:: Input summary. @@ -6411,16 +6544,12 @@ Readfile} for another option. @cindex fields @cindex accessing fields @cindex fields, examining -@cindex POSIX @command{awk}, field separators and -@cindex field separators, POSIX and -@cindex separators, field, POSIX and When @command{awk} reads an input record, the record is automatically @dfn{parsed} or separated by the @command{awk} utility into chunks called @dfn{fields}. By default, fields are separated by @dfn{whitespace}, like words in a line. Whitespace in @command{awk} means any string of one or more spaces, -TABs, or newlines;@footnote{In POSIX @command{awk}, newlines are not -considered whitespace for separating fields.} other characters +TABs, or newlines; other characters that are considered whitespace by other languages (such as formfeed, vertical tab, etc.) are @emph{not} considered whitespace by @command{awk}. @@ -6834,7 +6963,6 @@ can massage it first with a separate @command{awk} program.) @node Default Field Splitting @subsection Whitespace Normally Separates Fields -@cindex newlines, as field separators @cindex whitespace, as field separators Fields are normally separated by whitespace sequences (spaces, TABs, and newlines), not by single spaces. Two spaces in a row do not @@ -7701,6 +7829,13 @@ a record, such as a file that cannot be opened, then @code{getline} returns @minus{}1. In this case, @command{gawk} sets the variable @code{ERRNO} to a string describing the error that occurred. +If @code{ERRNO} indicates that the I/O operation may be +retried, and @code{PROCINFO["@var{input}", "RETRY"]} is set, +then @code{getline} returns @minus{}2 +instead of @minus{}1, and further calls to @code{getline} +may be attemped. @xref{Retrying Input} for further information about +this feature. + In the following examples, @var{command} stands for a string value that represents a shell command. @@ -8355,7 +8490,8 @@ on a per-command or per-connection basis. the attempt to read from the underlying device may succeed in a later attempt. This is a limitation, and it also means that you cannot use this to multiplex input from -two or more sources. +two or more sources. @xref{Retrying Input} for a way to enable +later I/O attempts to succeed. Assigning a timeout value prevents read operations from blocking indefinitely. But bear in mind that there are other ways @@ -8365,6 +8501,36 @@ a connection before it can start reading any data, or the attempt to open a FIFO special file for reading can block indefinitely until some other process opens it for writing. +@node Retrying Input +@section Retrying Reads After Certain Input Errors +@cindex retrying input + +@cindex differences in @command{awk} and @command{gawk}, retrying input +This @value{SECTION} describes a feature that is specific to @command{gawk}. + +When @command{gawk} encounters an error while reading input, by +default @code{getline} returns @minus{}1, and subsequent attempts to +read from that file result in an end-of-file indication. However, you +may optionally instruct @command{gawk} to allow I/O to be retried when +certain errors are encountered by setting a special element in +the @code{PROCINFO} array (@pxref{Auto-set}): + +@example +PROCINFO["@var{input_name}", "RETRY"] = 1 +@end example + +When this element exists, @command{gawk} checks the value of the system +(C language) +@code{errno} variable when an I/O error occurs. If @code{errno} indicates +a subsequent I/O attempt may succeed, @code{getline} instead returns +@minus{}2 and +further calls to @code{getline} may succeed. This applies to the @code{errno} +values @code{EAGAIN}, @code{EWOULDBLOCK}, @code{EINTR}, or @code{ETIMEDOUT}. + +This feature is useful in conjunction with +@code{PROCINFO["@var{input_name}", "READ_TIMEOUT"]} or situations where a file +descriptor has been configured to behave in a non-blocking fashion. + @node Command-line directories @section Directories on the Command Line @cindex differences in @command{awk} and @command{gawk}, command-line directories @@ -8526,6 +8692,7 @@ and discusses the @code{close()} built-in function. @command{gawk} allows access to inherited file descriptors. * Close Files And Pipes:: Closing Input and Output Files and Pipes. +* Nonfatal:: Enabling Nonfatal Output. * Output Summary:: Output summary. * Output Exercises:: Exercises. @end menu @@ -9927,6 +10094,70 @@ In POSIX mode (@pxref{Options}), @command{gawk} just returns zero when closing a pipe. @end sidebar +@node Nonfatal +@section Enabling Nonfatal Output + +This @value{SECTION} describes a @command{gawk}-specific feature. + +In standard @command{awk}, output with @code{print} or @code{printf} +to a nonexistent file, or some other I/O error (such as filling up the +disk) is a fatal error. + +@example +$ @kbd{gawk 'BEGIN @{ print "hi" > "/no/such/file" @}'} +@error{} gawk: cmd. line:1: fatal: can't redirect to `/no/such/file' (No such file or directory) +@end example + +@command{gawk} makes it possible to detect that an error has +occurred, allowing you to possibly recover from the error, or +at least print an error message of your choosing before exiting. +You can do this in one of two ways: + +@itemize @bullet +@item +For all output files, by assigning any value to @code{PROCINFO["NONFATAL"]}. + +@item +On a per-file basis, by assigning any value to +@code{PROCINFO[@var{filename}, "NONFATAL"]}. +Here, @var{filename} is the name of the file to which +you wish output to be nonfatal. +@end itemize + +Once you have enabled nonfatal output, you must check @code{ERRNO} +after every relevant @code{print} or @code{printf} statement to +see if something went wrong. It is also a good idea to initialize +@code{ERRNO} to zero before attempting the output. For example: + +@example +$ @kbd{gawk '} +> @kbd{BEGIN @{} +> @kbd{ PROCINFO["NONFATAL"] = 1} +> @kbd{ ERRNO = 0} +> @kbd{ print "hi" > "/no/such/file"} +> @kbd{ if (ERRNO) @{} +> @kbd{ print("Output failed:", ERRNO) > "/dev/stderr"} +> @kbd{ exit 1} +> @kbd{ @}} +> @kbd{@}'} +@error{} Output failed: No such file or directory +@end example + +Here, @command{gawk} did not produce a fatal error; instead +it let the @command{awk} program code detect the problem and handle it. + +This mechanism works also for standard output and standard error. +For standard output, you may use @code{PROCINFO["-", "NONFATAL"]} +or @code{PROCINFO["/dev/stdout", "NONFATAL"]}. For standard error, use +@code{PROCINFO["/dev/stderr", "NONFATAL"]}. + +When attempting to open a TCP/IP socket (@pxref{TCP/IP Networking}), +@command{gawk} tries multiple times. The @env{GAWK_SOCK_RETRIES} +environment variable (@pxref{Other Environment Variables}) allows you to +override @command{gawk}'s builtin default number of attempts. However, +once nonfatal I/O is enabled for a given socket, @command{gawk} only +retries once, relying on @command{awk}-level code to notice that there +was a problem. @node Output Summary @section Summary @@ -9956,6 +10187,12 @@ Use @code{close()} to close open file, pipe, and coprocess redirections. For coprocesses, it is possible to close only one direction of the communications. +@item +Normally errors with @code{print} or @code{printf} are fatal. +@command{gawk} lets you make output errors be nonfatal either for +all files or on a per-file basis. You must then check for errors +after every relevant output statement. + @end itemize @c EXCLUDE START @@ -13891,12 +14128,11 @@ specify the behavior when @code{FS} is the null string. Nonetheless, some other versions of @command{awk} also treat @code{""} specially.) -@cindex POSIX @command{awk}, @code{FS} variable and The default value is @w{@code{" "}}, a string consisting of a single -space. As a special exception, this value means that any -sequence of spaces, TABs, and/or newlines is a single separator.@footnote{In -POSIX @command{awk}, newline does not count as whitespace.} It also causes -spaces, TABs, and newlines at the beginning and end of a record to be ignored. +space. As a special exception, this value means that any sequence of +spaces, TABs, and/or newlines is a single separator. It also causes +spaces, TABs, and newlines at the beginning and end of a record to +be ignored. You can set the value of @code{FS} on the command line using the @option{-F} option: @@ -14120,10 +14356,24 @@ opens the next file. An associative array containing the values of the environment. The array indices are the environment variable names; the elements are the values of the particular environment variables. For example, -@code{ENVIRON["HOME"]} might be @code{"/home/arnold"}. Changing this array -does not affect the environment passed on to any programs that -@command{awk} may spawn via redirection or the @code{system()} function. -(In a future version of @command{gawk}, it may do so.) +@code{ENVIRON["HOME"]} might be @code{/home/arnold}. + +For POSIX @command{awk}, changing this array does not affect the +environment passed on to any programs that @command{awk} may spawn via +redirection or the @code{system()} function. + +However, beginning with version 4.2, if not in POSIX +compatibility mode, @command{gawk} does update its own environment when +@code{ENVIRON} is changed, thus changing the environment seen by programs +that it creates. You should therefore be especially careful if you +modify @code{ENVIRON["PATH"]}, which is the search path for finding +executable programs. + +This can also affect the running @command{gawk} program, since some of the +built-in functions may pay attention to certain environment variables. +The most notable instance of this is @code{mktime()} (@pxref{Time +Functions}), which pays attention the value of the @env{TZ} environment +variable on many systems. Some operating systems may not have environment variables. On such systems, the @code{ENVIRON} array is empty (except for @@ -14157,6 +14407,11 @@ value to be meaningful when an I/O operation returns a failure value, such as @code{getline} returning @minus{}1. You are, of course, free to clear it yourself before doing an I/O operation. +If the value of @code{ERRNO} corresponds to a system error in the C +@code{errno} variable, then @code{PROCINFO["errno"]} will be set to the value +of @code{errno}. For non-system errors, @code{PROCINFO["errno"]} will +be zero. + @cindex @code{FILENAME} variable @cindex dark corner, @code{FILENAME} variable @item @code{FILENAME} @@ -14225,6 +14480,10 @@ are guaranteed to be available: @item PROCINFO["egid"] The value of the @code{getegid()} system call. +@item PROCINFO["errno"] +The value of the C @code{errno} variable when @code{ERRNO} is set to +the associated error message. + @item PROCINFO["euid"] @cindex effective user ID of @command{gawk} user The value of the @code{geteuid()} system call. @@ -14348,6 +14607,14 @@ to test for these elements The following elements allow you to change @command{gawk}'s behavior: @table @code +@item PROCINFO["NONFATAL"] +If this element exists, then I/O errors for all output redirections become nonfatal. +@xref{Nonfatal}. + +@item PROCINFO["@var{output_name}", "NONFATAL"] +Make output errors for @var{output_name} be nonfatal. +@xref{Nonfatal}. + @item PROCINFO["@var{command}", "pty"] For two-way communication to @var{command}, use a pseudo-tty instead of setting up a two-way pipe. @@ -16224,6 +16491,23 @@ truncated toward zero. For example, @code{int(3)} is 3, @code{int(3.9)} is 3, @code{int(-3.9)} is @minus{}3, and @code{int(-3)} is @minus{}3 as well. +@item @code{intdiv(@var{numerator}, @var{denominator}, @var{result})} +@cindexawkfunc{intdiv} +@cindex intdiv +Perform integer division, similar to the standard C function of the +same name. First, truncate @code{numerator} and @code{denominator} +towards zero, creating integer values. Clear the @code{result} +array, and then set @code{result["quotient"]} to the result of +@samp{numerator / denominator}, truncated towards zero to an integer, +and set @code{result["remainder"]} to the result of @samp{numerator % +denominator}, truncated towards zero to an integer. This function is +primarily intended for use with arbitrary length integers; it avoids +creating MPFR arbitrary precision floating-point values (@pxref{Arbitrary +Precision Integers}). + +This function is a @code{gawk} extension. It is not available in +compatibility mode (@pxref{Options}). + @item @code{log(@var{x})} @cindexawkfunc{log} @cindex logarithm @@ -18313,16 +18597,70 @@ results of the @code{compl()}, @code{lshift()}, and @code{rshift()} functions. @node Type Functions @subsection Getting Type Information -@command{gawk} provides a single function that lets you distinguish -an array from a scalar variable. This is necessary for writing code +@command{gawk} provides two functions that lets you distinguish +the type of a variable. +This is necessary for writing code that traverses every element of an array of arrays -(@pxref{Arrays of Arrays}). +(@pxref{Arrays of Arrays}), and in other contexts. @table @code @cindexgawkfunc{isarray} @cindex scalar or array @item isarray(@var{x}) Return a true value if @var{x} is an array. Otherwise, return false. + +@cindexgawkfunc{typeof} +@cindex variable type +@cindex type, of variable +@item typeof(@var{x}) +Return one of the following strings, depending upon the type of @var{x}: + +@c nested table +@table @code +@item "array" +@var{x} is an array. + +@item "regexp" +@var{x} is a strongly typed regexp (@pxref{Strong Regexp Constants}). + +@item "number" +@var{x} is a number. + +@item "string" +@var{x} is a string. + +@item "strnum" +@var{x} is a string that might be a number, such as a field or +the result of calling @code{split()}. (I.e., @var{x} has the STRNUM +attribute; @pxref{Variable Typing}.) + +@item "unassigned" +@var{x} is a scalar variable that has not been assigned a value yet. +For example: + +@example +BEGIN @{ + a[1] # creates a[1] but it has no assigned value + print typeof(a[1]) # scalar_u +@} +@end example + +@item "untyped" +@var{x} has not yet been used yet at all; it can become a scalar or an +array. +For example: + +@example +BEGIN @{ + print typeof(x) # x never used --> untyped + mk_arr(x) + print typeof(x) # x now an array --> array +@} + +function mk_arr(a) @{ a[1] = 1 @} +@end example + +@end table @end table @code{isarray()} is meant for use in two circumstances. The first is when @@ -18340,6 +18678,14 @@ that has not been previously used to @code{isarray()}, @command{gawk} ends up turning it into a scalar. @end quotation +The @code{typeof()} function is general; it allows you to determine +if a variable or function parameter is a scalar, an array, or a strongly +typed regexp. + +@code{isarray()} is deprecated; you should use @code{typeof()} instead. +You should replace any existing uses of @samp{isarray(var)} in your +code with @samp{typeof(var) == "array"}. + @node I18N Functions @subsection String-Translation Functions @cindex @command{gawk}, string-translation functions @@ -26946,8 +27292,7 @@ The profiled version of your program may not look exactly like what you typed when you wrote it. This is because @command{gawk} creates the profiled version by ``pretty-printing'' its internal representation of the program. The advantage to this is that @command{gawk} can produce -a standard representation. The disadvantage is that all source code -comments are lost. +a standard representation. Also, things such as: @example @@ -27041,10 +27386,30 @@ When called this way, @command{gawk} ``pretty-prints'' the program into @file{awkprof.out}, without any execution counts. @quotation NOTE -The @option{--pretty-print} option still runs your program. -This will change in the next major release. +Once upon a time, the @option{--pretty-print} option would also run +your program. This is is no longer the case. @end quotation +There is a significant difference between the output created when +profiling, and that created when pretty-printing. Pretty-printed output +preserves the original comments that were in the program, although their +placement may not correspond exactly to their original locations in the +source code. + +However, as a deliberate design decision, profiling output @emph{omits} +the original program's comments. This allows you to focus on the +execution count data and helps you avoid the temptation to use the +profiler for pretty-printing. + +Additionally, pretty-printed output does not have the leading indentation +that the profiling output does. This makes it easy to pretty-print your +code once development is completed, and then use the result as the final +version of your program. + +Because the internal representation of your program is formatted to +recreate an @command{awk} program, profiling and pretty-printing +automatically disable @command{gawk}'s default optimizations. + @node Advanced Features Summary @section Summary @@ -27085,8 +27450,7 @@ you tune them more easily. Sending the @code{USR1} signal while profiling cause @command{gawk} to dump the profile and keep going, including a function call stack. @item -You can also just ``pretty-print'' the program. This currently also runs -the program, but that will change in the next major release. +You can also just ``pretty-print'' the program. @end itemize @@ -29256,6 +29620,65 @@ executing, short programs. The @command{gawk} debugger only accepts source code supplied with the @option{-f} option. @end itemize +One other point is worth disucssing. Conventional debuggers run in a +separate process (and thus address space) from the programs that they +debug (the @dfn{debuggee}, if you will). + +The @command{gawk} debugger is different; it is an integrated part +of @command{gawk} itself. This makes it possible, in rare cases, +for @command{gawk} to become an excellent demonstrator of Heisenberg +Uncertainty physics, where the mere act of observing something can change +it. Consider the following:@footnote{Thanks to Hermann Peifer for +this example.} + +@example +$ @kbd{cat test.awk} +@print{} @{ print typeof($1), typeof($2) @} +$ @kbd{cat test.data} +@print{} abc 123 +$ @kbd{gawk -f test.awk test.data} +@print{} strnum strnum +@end example + +This is all as expected: field data has the STRNUM attribute +(@pxref{Variable Typing}). Now watch what happens when we run +this program under the debugger: + +@example +$ @kbd{gawk -D -f test.awk test.data} +gawk> @kbd{w $1} @ii{Set watchpoint on} $1 +@print{} Watchpoint 1: $1 +gawk> @kbd{w $2} @ii{Set watchpoint on} $2 +@print{} Watchpoint 2: $2 +gawk> @kbd{r} @ii{Start the program} +@print{} Starting program: +@print{} Stopping in Rule ... +@print{} Watchpoint 1: $1 @ii{Watchpoint fires} +@print{} Old value: "" +@print{} New value: "abc" +@print{} main() at `test.awk':1 +@print{} 1 @{ print typeof($1), typeof($2) @} +gawk> @kbd{n} @ii{Keep going @dots{}} +@print{} Watchpoint 2: $2 @ii{Watchpoint fires} +@print{} Old value: "" +@print{} New value: "123" +@print{} main() at `test.awk':1 +@print{} 1 @{ print typeof($1), typeof($2) @} +gawk> @kbd{n} @ii{Get result from} typeof() +@print{} strnum number @ii{Result for} $2 @ii{isn't right} +@print{} Program exited normally with exit value: 0 +gawk> @kbd{quit} +@end example + +In this case, the act of comparing the new value of @code{$2} +with the old one caused @command{gawk} to evaluate it and determine that it +is indeed a number, and this is reflected in the result of +@code{typeof()}. + +Cases like this where the debugger is not transparent to the program's +execution should be rare. If you encounter one, please report it +(@pxref{Bugs}). + @ignore Look forward to a future release when these and other missing features may be added, and of course feel free to try to add them yourself! @@ -29292,6 +29715,10 @@ If the GNU Readline library is available when @command{gawk} is compiled, it is used by the debugger to provide command-line history and editing. +@item +Usually, the debugger does not not affect the +program being debugged, but occasionally it can. + @end itemize @node Arbitrary Precision Arithmetic @@ -30109,6 +30536,122 @@ to just use the following: gawk -M 'BEGIN @{ n = 13; print n % 2 @}' @end example +When dividing two arbitrary precision integers with either +@samp{/} or @samp{%}, the result is typically an arbitrary +precision floating point value (unless the denominator evenly +divides into the numerator). In order to do integer division +or remainder with arbitrary precision integers, use the built-in +@code{intdiv()} function (@pxref{Numeric Functions}). + +You can simulate the @code{intdiv()} function in standard @command{awk} +using this user-defined function: + +@example +@c file eg/lib/intdiv.awk +# intdiv --- do integer division + +@c endfile +@ignore +@c file eg/lib/intdiv.awk +# +# Arnold Robbins, arnold@@skeeve.com, Public Domain +# July, 2014 +# +# Name changed from div() to intdiv() +# April, 2015 + +@c endfile + +@end ignore +@c file eg/lib/intdiv.awk +function intdiv(numerator, denominator, result) +@{ + split("", result) + + numerator = int(numerator) + denominator = int(denominator) + result["quotient"] = int(numerator / denominator) + result["remainder"] = int(numerator % denominator) + + return 0.0 +@} +@c endfile +@end example + +The following example program, contributed by Katie Wasserman, +uses @code{intdiv()} to +compute the digits of @value{PI} to as many places as you +choose to set: + +@example +@c file eg/prog/pi.awk +# pi.awk --- compute the digits of pi +@c endfile +@c endfile +@ignore +@c file eg/prog/pi.awk +# +# Katie Wasserman, katie@@wass.net +# August 2014 +@c endfile +@end ignore +@c file eg/prog/pi.awk + +BEGIN @{ + digits = 100000 + two = 2 * 10 ^ digits + pi = two + for (m = digits * 4; m > 0; --m) @{ + d = m * 2 + 1 + x = pi * m + intdiv(x, d, result) + pi = result["quotient"] + pi = pi + two + @} + print pi +@} +@c endfile +@end example + +@ignore +Date: Wed, 20 Aug 2014 10:19:11 -0400 +To: arnold@skeeve.com +From: Katherine Wasserman <katie@wass.net> +Subject: Re: computation of digits of pi? + +Arnold, + +>The program that you sent to compute the digits of pi using div(). Is +>that some standard algorithm that every math student knows? If so, +>what's it called? + +It's not that well known but it's not that obscure either + +It's Euler's modification to Newton's method for calculating pi. + +Take a look at lines (23) - (25) here: http://mathworld.wolfram.com/PiFormulas.htm + +The algorithm I wrote simply expands the multiply by 2 and works from the innermost expression outwards. I used this to program HP calculators because it's quite easy to modify for tiny memory devices with smallish word sizes. + +http://www.hpmuseum.org/cgi-sys/cgiwrap/hpmuseum/articles.cgi?read=899 + +-Katie +@end ignore + +When asked about the algorithm used, Katie replied: + +@quotation +It's not that well known but it's not that obscure either. +It's Euler's modification to Newton's method for calculating pi. +Take a look at lines (23) - (25) here: @uref{http://mathworld.wolfram.com/PiFormulas.html}. + +The algorithm I wrote simply expands the multiply by 2 and works from +the innermost expression outwards. I used this to program HP calculators +because it's quite easy to modify for tiny memory devices with smallish +word sizes. See +@uref{http://www.hpmuseum.org/cgi-sys/cgiwrap/hpmuseum/articles.cgi?read=899}. +@end quotation + @node POSIX Floating Point Problems @section Standards Versus Existing Practice @@ -30508,6 +31051,7 @@ This (rather large) @value{SECTION} describes the API in detail. * Symbol Table Access:: Functions for accessing global variables. * Array Manipulation:: Functions for working with arrays. +* Redirection API:: How to access and manipulate redirections. * Extension API Variables:: Variables provided by the API. * Extension API Boilerplate:: Boilerplate code for using the API. @end menu @@ -30583,6 +31127,10 @@ Clearing an array @item Flattening an array for easy C-style looping over all its indices and elements @end itemize + +@item +Accessing and manipulating redirections. + @end itemize Some points about using the API: @@ -32553,6 +33101,75 @@ $ @kbd{AWKLIBPATH=$PWD ./gawk -f subarray.awk} (@xref{Finding Extensions} for more information on the @env{AWKLIBPATH} environment variable.) +@node Redirection API +@subsection Accessing and Manipulating Redirections + +The following function allows extensions to access and manipulate redirections. + +@table @code +@item awk_bool_t get_file(const char *name, +@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ size_t name_len, +@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const char *filetype, +@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ int fd, +@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const awk_input_buf_t **ibufp, +@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const awk_output_buf_t **obufp); +Look up a file in @command{gawk}'s internal redirection table. +If @code{name} is @code{NULL} or @code{name_len} is zero, return +data for the currently open input file corresponding to @code{FILENAME}. +(This does not access the @code{filetype} argument, so that may be undefined). +If the file is not already open, attempt to open it. +The @code{filetype} argument must be zero-terminated and should be one of: + +@table @code +@item ">" +A file opened for output. + +@item ">>" +A file opened for append. + +@item "<" +A file opened for input. + +@item "|>" +A pipe opened for output. + +@item "|<" +A pipe opened for input. + +@item "|&" +A two-way coprocess. +@end table + +On error, return a @code{false} value. Otherwise, return +@code{true}, and return additional information about the redirection +in the @code{ibufp} and @code{obufp} pointers. For input +redirections, the @code{*ibufp} value should be non-@code{NULL}, +and @code{*obufp} should be @code{NULL}. For output redirections, +the @code{*obufp} value should be non-@code{NULL}, and @code{*ibufp} +should be @code{NULL}. For two-way coprocesses, both values should +be non-@code{NULL}. + +In the usual case, the extension is interested in @code{(*ibufp)->fd} +and/or @code{fileno((*obufp)->fp)}. If the file is not already +open, and the @code{fd} argument is non-negative, @command{gawk} +will use that file descriptor instead of opening the file in the +usual way. If @code{fd} is non-negative, but the file exists already, +@command{gawk} ignores @code{fd} and returns the existing file. It is +the caller's responsibility to notice that neither the @code{fd} in +the returned @code{awk_input_buf_t} nor the @code{fd} in the returned +@code{awk_output_buf_t} matches the requested value. + +Note that supplying a file descriptor is currently @emph{not} supported +for pipes. However, supplying a file descriptor should work for input, +output, append, and two-way (coprocess) sockets. If @code{filetype} +is two-way, @command{gawk} assumes that it is a socket! Note that in +the two-way case, the input and output file descriptors may differ. +To check for success, you must check whether either matches. +@end table + +It is anticipated that this API function will be used to implement I/O +multiplexing and a socket library. + @node Extension API Variables @subsection API Variables @@ -34117,18 +34734,21 @@ As of this writing, there are seven extensions: GD graphics library extension @item +MPFR library extension +(this provides access to a number of MPFR functions that @command{gawk}'s +native MPFR support does not) + +@item PDF extension @item PostgreSQL extension @item -MPFR library extension -(this provides access to a number of MPFR functions that @command{gawk}'s -native MPFR support does not) +Redis extension @item -Redis extension +Select extension @item XML parser extension, using the @uref{http://expat.sourceforge.net, Expat} @@ -34757,6 +35377,10 @@ Indirect function calls @item Directories on the command line produce a warning and are skipped (@pxref{Command-line directories}) + +@item +Output with @code{print} and @code{printf} need not be fatal +(@pxref{Nonfatal}) @end itemize @item @@ -34844,6 +35468,11 @@ The @code{isarray()} function to check if a variable is an array or not The @code{bindtextdomain()}, @code{dcgettext()}, and @code{dcngettext()} functions for internationalization (@pxref{Programmer i18n}) + +@item +The @code{intdiv()} function for doing integer +division and remainder +(@pxref{Numeric Functions}) @end itemize @item @@ -34882,6 +35511,7 @@ The @option{-p}, @option{-P}, @option{-r}, +@option{-s}, @option{-S}, @option{-t}, and @@ -34906,6 +35536,7 @@ and the @option{--load}, @option{--non-decimal-data}, @option{--optimize}, +@option{--no-optimize}, @option{--posix}, @option{--pretty-print}, @option{--profile}, @@ -34976,6 +35607,16 @@ for @command{gawk} @value{PVERSION} 4.1: Ultrix @end itemize +@item +Support for the following systems was removed from the code +for @command{gawk} @value{PVERSION} 4.2: + +@c nested table +@itemize @value{MINUS} +@item +MirBSD +@end itemize + @end itemize @c XXX ADD MORE STUFF HERE @@ -35602,6 +36243,52 @@ Support for Ultrix was removed. @end itemize +Version 4.2 introduced the following changes: + +@itemize @bullet +@item +Changes to @code{ENVIRON} are reflected into @command{gawk}'s +environment and that of programs that it runs. +@xref{Auto-set}. + +@item +The @option{--pretty-print} option no longer runs the @command{awk} +program too. +@xref{Options}. + +@item +The @command{igawk} program and its manual page are no longer +installed when @command{gawk} is built. +@xref{Igawk Program}. + +@item +The @code{intdiv()} function. +@xref{Numeric Functions}. + +@item +The maximum number of hexadecimal digits in @samp{\x} escapes +is now two. +@xref{Escape Sequences}. + +@item +Nonfatal output with @code{print} and @code{printf}. +@xref{Nonfatal}. + +@item +For many years, POSIX specified that default field splitting +only allowed spaces and tabs to separate fields, and this was +how @command{gawk} behaved with @option{--posix}. As of 2013, +the standard restored historical behavior, and now default +field splitting with @option{--posix} also allows newlines to +separate fields. + +@item +Support for MirBSD was removed. + +@item +Support for GNU/Linux on Alpha was removed. +@end itemize + @c XXX ADD MORE STUFF HERE @end ifclear @@ -36269,6 +36956,8 @@ The generated Info file for The @command{troff} source for a manual page describing the @command{igawk} program presented in @ref{Igawk Program}. +(Since @command{gawk} can do its own @code{@@include} processing, +neither @command{igawk} nor @file{igawk.1} are installed.) @item doc/Makefile.in The input file used during the configuration process to generate the @@ -36313,8 +37002,6 @@ source file for this @value{DOCUMENT}. It also contains a @file{Makefile.in} fil @file{Makefile.am} is used by GNU Automake to create @file{Makefile.in}. The library functions from @ref{Library Functions}, -and the @command{igawk} program from -@ref{Igawk Program} are included as ready-to-use files in the @command{gawk} distribution. They are installed as part of the installation process. The rest of the programs in this @value{DOCUMENT} are available in appropriate @@ -36325,6 +37012,12 @@ The source code, manual pages, and infrastructure files for the sample extensions included with @command{gawk}. @xref{Dynamic Extensions}, for more information. +@item extras/* +Additional non-essential files. Currently, this directory contains some shell +startup files to be installed in @file{/etc/profile.d} to aid in manipulating +the @env{AWKPATH} and @env{AWKLIBPATH} environment variables. +@xref{Shell Startup Files}, for more information. + @item posix/* Files needed for building @command{gawk} on POSIX-compliant systems. @@ -36356,6 +37049,7 @@ to configure @command{gawk} for your system yourself. @menu * Quick Installation:: Compiling @command{gawk} under Unix. +* Shell Startup Files:: Shell convenience functions. * Additional Configuration Options:: Other compile-time options. * Configuration Philosophy:: How it's all supposed to work. @end menu @@ -36436,6 +37130,44 @@ is likely that you will be asked for your password, and you will have to have been set up previously as a user who is allowed to run the @command{sudo} command. +@node Shell Startup Files +@appendixsubsec Shell Startup Files + +The distribution contains shell startup files @file{gawk.sh} and +@file{gawk.csh} containing functions to aid in manipulating +the @env{AWKPATH} and @env{AWKLIBPATH} environment variables. +On a Fedora system, these files should be installed in @file{/etc/profile.d}; +on other platforms, the appropriate location may be different. + +@table @command + +@cindex @command{gawkpath_default} shell function +@item gawkpath_default +Reset the @env{AWKPATH} environment variable to its default value. + +@cindex @command{gawkpath_prepend} shell function +@item gawkpath_prepend +Add the argument to the front of the @env{AWKPATH} environment variable. + +@cindex @command{gawkpath_append} shell function +@item gawkpath_append +Add the argument to the end of the @env{AWKPATH} environment variable. + +@cindex @command{gawklibpath_default} shell function +@item gawklibpath_default +Reset the @env{AWKLIBPATH} environment variable to its default value. + +@cindex @command{gawklibpath_prepend} shell function +@item gawklibpath_prepend +Add the argument to the front of the @env{AWKLIBPATH} environment variable. + +@cindex @command{gawklibpath_append} shell function +@item gawklibpath_append +Add the argument to the end of the @env{AWKLIBPATH} environment variable. + +@end table + + @node Additional Configuration Options @appendixsubsec Additional Configuration Options @cindex @command{gawk}, configuring, options |