aboutsummaryrefslogtreecommitdiffstats
path: root/doc
diff options
context:
space:
mode:
Diffstat (limited to 'doc')
-rw-r--r--doc/CMakeLists.txt95
-rw-r--r--doc/ChangeLog349
-rw-r--r--doc/Makefile.am19
-rw-r--r--doc/Makefile.in62
-rw-r--r--doc/awkcard.in33
-rw-r--r--doc/gawk.1147
-rw-r--r--doc/gawk.info2814
-rw-r--r--doc/gawk.texi1722
-rw-r--r--doc/gawktexi.in1623
-rw-r--r--doc/using-git.texi1179
-rw-r--r--doc/wordlist9
11 files changed, 6553 insertions, 1499 deletions
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
new file mode 100644
index 00000000..e12f5de0
--- /dev/null
+++ b/doc/CMakeLists.txt
@@ -0,0 +1,95 @@
+#
+# doc/CMakeLists.txt --- CMake input file for gawk
+#
+# Copyright (C) 2013
+# the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+## process this file with CMake to produce Makefile
+
+MACRO(DocDependency outfile)
+ add_dependencies(doc ${outfile})
+ add_custom_target(
+ ${outfile}
+ DEPENDS ${ARGN}
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ COMMAND ${CMAKE_SOURCE_DIR}/cmake/docmaker ${outfile} ${ARGN}
+ )
+ENDMACRO(DocDependency)
+
+find_program(TEXI2DVI_CONVERTER texi2dvi)
+if (TEXI2DVI_CONVERTER)
+ add_custom_target(doc)
+ DocDependency(gawk.texi gawktexi.in rflashlight.eps api-figure1.fig api-figure2.fig api-figure3.fig general-program.fig process-flow.fig)
+ DocDependency(rflashlight.eps)
+ DocDependency(api-figure1.fig)
+ DocDependency(api-figure2.fig)
+ DocDependency(api-figure3.fig)
+ DocDependency(general-program.fig)
+ DocDependency(process-flow.fig)
+ DocDependency(gawk.dvi gawk.texi)
+ DocDependency(gawk.info gawk.texi)
+ DocDependency(gawkinet.dvi gawkinet.texi)
+ DocDependency(gawkinet.info gawkinet.texi)
+ DocDependency(gawkinet.texi statist.eps)
+ DocDependency(gawk.1.ps gawk.1)
+ DocDependency(igawk.1.ps igawk.1)
+ find_program(DVIPS_CONVERTER dvips)
+ if (DVIPS_CONVERTER)
+ DocDependency(gawk.ps gawk.dvi)
+ DocDependency(gawkinet.ps gawkinet.dvi)
+ find_program(PS2PDF_CONVERTER ps2pdf)
+ if (PS2PDF_CONVERTER)
+ DocDependency(gawk.1.pdf gawk.1.ps)
+ DocDependency(igawk.1.pdf igawk.1.ps)
+ DocDependency(gawk.pdf gawk.ps)
+ DocDependency(gawkinet.pdf gawkinet.ps)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawk.1.pdf DESTINATION doc)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/igawk.1.pdf DESTINATION doc)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawk.info DESTINATION doc)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawk.pdf DESTINATION doc)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawkinet.info DESTINATION doc)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawkinet.pdf DESTINATION doc)
+
+ set(CARDSRC macros cardfonts colors awkcard.tr)
+ set(CARDSRC_N macros cardfonts no.colors awkcard.tr)
+ set(CARDFILES ${CARDSRC} ad.block awkcard.in setter.outline)
+ DocDependency(awkcard.tr awkcard.in)
+ DocDependency(awkcard.nc ${CARDFILES})
+ DocDependency(awkcard.ps ${CARDFILES})
+ DocDependency(awkcard.pdf awkcard.ps)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/awkcard.pdf DESTINATION doc)
+
+ else()
+ message(WARNING "Found no ps2pdf tool; no doc will be generated")
+ install(CODE "MESSAGE(\"doc generated only in .ps files\")")
+ endif()
+ else()
+ message(WARNING "Found no dvips tool; no doc will be generated")
+ install(CODE "MESSAGE(\"doc generated only in .dvi files and man pages in .ps files\")")
+ endif()
+else()
+ message(WARNING "Found no texi2dvi tool; no doc will be generated")
+ add_custom_command(
+ TARGET doc
+ COMMAND echo no doc generated because of missing texi2dvi
+ )
+endif()
+
diff --git a/doc/ChangeLog b/doc/ChangeLog
index fbcd9367..142f035e 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,8 +1,39 @@
+2017-03-17 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Improve the discussion of quoting on
+ MS-Windows. Original text contributed by
+ Vincent Belaiche <vincent.belaiche@gmail.com>.
+
+2017-03-03 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Additional small writing tip in the notes
+ after the @bye.
+
+2017-03-02 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Edits preparatory to release.
+
+2017-02-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawk.1: "timezone" --> "time zone".
+ * awkcard.in: Update copyright year.
+
+2017-02-21 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawk.1: Document new mktime optional 2nd utc-flag argument.
+ * gawktex.in: Ditto.
+ * awkcard.in: Ditto.
+
2017-02-13 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Fix two typos.
* wordlist.txt: Update.
+ Related:
+
+ * gawktexi.in: Fix more typos.
+ * wordlist.txt: Update again.
+
2017-01-27 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Update UPDATE-MONTH and copyright years.
@@ -12,6 +43,68 @@
* gawktexi.in: Comment out stuff about awk.info, since that
domain is now gone.
+2016-12-05 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawktexi.in: Explain why an API extension function might want
+ to use the AWK_STRNUM type to return data.
+
+2016-12-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Update API table of type requested / type returned.
+
+2016-12-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Minor edits after merging branches and some
+ additional work in the code.
+
+2016-12-17 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Further API clarifications and edits, add a
+ section on backwards compatibility.
+
+2016-12-16 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Update description of awk_ext_func_t structure,
+ again.
+
+2016-12-14 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Update description of awk_ext_func_t structure.
+
+2016-12-05 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawktexi.in: Document strnum changes as relates to API.
+ Still stuff left to do -- tables for type conversions need
+ to be updated to show new strnum and regex rows and columns.
+
+2016-12-04 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawktexi.in: Remove make_regex and replace it with make_const_regex
+ and make_malloced_regex.
+
+2016-12-04 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawktexi.in: Document new flatten_array_typed API function, and
+ indicate that the old flatten_array function has been superseded.
+
+2016-11-30 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Document typed regex changes as relates to API.
+ Still stuff left to do.
+
+2016-11-21 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Finish off discussion of strongly typed regexp
+ constants and put it in the right place in the manual. A few other
+ minor fixes.
+ * wordlist: Updated.
+
+2016-11-18 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in (Variable Typing): Rework and improve discussion
+ of strings, numbers, and strnums. Update description of strnum
+ in other places.
+
2016-11-10 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Fix example use of dcngegttext.
@@ -26,6 +119,18 @@
Works better for Info, text, and HTML. Thanks to
Marco Curreli <marcocurreli@tiscali.it> for the report.
+2016-11-04 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Fix a spelling error.
+ * wordlist: Update.
+
+2016-10-25 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Document that negative arguments are not allowed
+ for bitwise functions. Add a sidebar explaining it a bit and
+ also showing the difference with and without -M.
+ * gawk.1: Document that negative arguments are not allowed.
+
2016-10-23 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Remove references to MS-DOS and OS/2,
@@ -47,11 +152,18 @@
2016-08-25 Arnold D. Robbins <arnold@skeeve.com>
+ * gawktexi.in (POSIX String Comparison): Update for new
+ spec where == and != use strcmp, rest use strcoll. Thanks to
+ Chet Ramey for pointing me at the new rules.
+
+2016-08-25 Arnold D. Robbins <arnold@skeeve.com>
+
* 4.1.4: Release tar ball made.
2016-08-24 Arnold D. Robbins <arnold@skeeve.com>
* wordlist: Add more words.
+ * gawktexi.in: Fix more typos.
2016-08-23 Arnold D. Robbins <arnold@skeeve.com>
@@ -61,6 +173,18 @@
* gawktexi.in: Fix typos, adjust update date.
* awkcard.in: Update copyright years.
+2016-08-03 Arnold D. Robbins <arnold@skeeve.com>
+
+ Restored doc on typed regexes.
+
+ * gawk.1, gawktexi.in: Updated.
+
+2016-08-03 Arnold D. Robbins <arnold@skeeve.com>
+
+ Remove typed regexes until they can be done properly.
+
+ * gawk.1, gawktexi.in: Updated.
+
2016-08-01 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Mark DJGPP port as unsupported.
@@ -69,6 +193,12 @@
* gawktexi.in: Fix a typo. Thanks to Marco Curreli for reporting.
+2016-07-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Document return value of close on a pipe now like
+ that of system: exit status, status + 256 for signal, or
+ status + 512 for signal with core dump.
+
2016-07-18 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Fix a typo. Thanks to Antonio Colombo for reporting.
@@ -82,6 +212,16 @@
* gawktexi.in (Auto-set): Add example use of multiply function.
+2016-06-30 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawk.1: Typo fix. Thanks to Antonio Giovanni Colombo
+ for noticing.
+
+2016-06-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawk.1: Document typeof(), update modified date.
+ * awkcard.in: Document typeof().
+
2016-06-10 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Fix a typo, and replace hard-coded "section" with
@@ -90,6 +230,15 @@
(UPDATE-MONTH, PATCHLEVEL): Update to current before release.
* awkcard.in: Update version.
+2016-05-30 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawktexi.in: Replace num_expected_args with max_expected_args.
+ Explain what it's used for.
+
+2016-05-25 Manuel Collado <mcollado2011@gmail.com>.
+
+ * gawktexi.in: Document new 'nonfatal' API function.
+
2016-05-25 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Typo fix in extension section, thanks to
@@ -106,6 +255,12 @@
out since 2001, index RFCs, change function name convention to
match main gawktexi.in. Update the update month.
+2016-04-06 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in (Two-way I/O): Document that writing to the closed
+ write end of a two way pipe or reading from the closed read end
+ can be made nonfatal.
+
2016-04-04 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in, gawkinet.texi: Enable use of braces in
@@ -116,6 +271,11 @@
* gawktexi.in (Two-way I/O): Document that closing the "from"
end waits for the process to exit, so it's not such a great idea.
+2016-03-27 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawkinet.texi: Small update about end of line vs full
+ comments when pretty printing.
+
2016-03-21 Arnold D. Robbins <arnold@skeeve.com>
* gawkinet.texi: Update UDP client and discussion, update
@@ -148,6 +308,8 @@
dlward134@gmail.com. Added an example of use of rewind(), also
per suggestion from David Ward.
* gawktexi.in: Update info about Texinfo versions.
+ * gawktexi.in (Limitations): Fix Heisenberg Physics example and
+ spelling of Heisenberg's name. Thanks to Hermann Peifer.
2016-02-14 Arnold D. Robbins <arnold@skeeve.com>
@@ -157,6 +319,14 @@
Use @sup for superscripts where possible.
* texinfo.tex: Updated.
+2016-02-05 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawk.texi: Document that optimization in now the default,
+ there are new -s/--no-optimize options and that
+ pretty-printing and profiling disable optimization.
+ * gawk.1: Ditto.
+ * awkcard.in: Ditto.
+
2016-02-03 Andrew J. Schorr <aschorr@telemetry-investments.com>
* gawktexi.in (Command-Line Options): Change wording of -M description
@@ -189,17 +359,39 @@
* ChangeLog: Remove spurious whitespace.
+ Unrelated:
+
+ * gawk.1: Restore text on PROCINFO["RETRY"] and fix up the
+ formatting while we're at it. Thanks to Andrew Schorr for
+ pointing out the problem.
+
2016-01-13 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in (Array Sorting Functions): Add an example of
using a function name with asort(). Response to bug report
Stephane Goujet <stephane.goujet@wanadoo.fr>.
+2016-01-06 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Finish documenting that --pretty-print
+ doesn't run the program. Thanks to Antonio
+ Giovanni Colombo for the report and patch.
+
+2016-01-03 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Document that GNU/Linux on Alpha is no
+ longer supported.
+
2015-12-27 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Fix some @c endfile. Thanks to Antonio
Giovanni Colombo for the report and patch.
+2015-12-20 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Add PROCINFO["NONFATAL"] to the list for PROCINFO.
+ * gawk.1: Ditto.
+
2015-12-18 Arnold D. Robbins <arnold@skeeve.com>
* gawk.1: Update description of PROCINFO, and sort it properly.
@@ -213,6 +405,11 @@
2015-11-15 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Minor edits.
+ * gawk.1: Revise \x to maximum of two digits.
+
+2015-11-04 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (pdf-local): Remove igawk.1.pdf. Ooops.
2015-10-30 Arnold D. Robbins <arnold@skeeve.com>
@@ -225,6 +422,10 @@
* gawk.1: Put commas outside quoting in regexps to avoid
confusion. Thanks to Mike Frysinger <vapier@gentoo.org>.
+2015-10-16 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awkcard.in: Fix tbl complaint.
+
2015-10-07 Arnold D. Robbins <arnold@skeeve.com>
* texinfo.tex: Updated to a working version.
@@ -256,11 +457,26 @@
* gawktexi.in: Typo fixes in Appendix A.
Thanks to Antonio Colombo.
+2015-07-30 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Small typo fix; thanks to Antonio Colombo
+ for noticing.
+
2015-07-01 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Update info on Quiktrim awk; thanks to
Antonio Colombo for the pointer.
+2015-06-30 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in (Limitations): Document that sometimes the
+ debugger can affect the program being run.
+ Thanks to Hermann Peifer for the test case.
+
+2015-06-26 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Update description of values returned by typeof.
+
2015-06-19 Arnold D. Robbins <arnold@skeeve.com>
* gawkinet.info: Fix an old arnold@gnu.org.
@@ -286,6 +502,13 @@
* gawktexi.in: Add another pithy quote from Chet Ramey. Currently
commented out.
+2015-05-31 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Revised description of default field parsing
+ for POSIX. Newline is now a separator also. Thanks to
+ Michael Klement <michael.klement@usa.net> for pointing this out.
+ * gawk.1: Updated too.
+
2015-05-30 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in (Bitwise Functions): Update results of testbits.awk.
@@ -310,15 +533,33 @@
* gawktexi.in: Fix description of nextfile within a function. Sigh.
+2015-05-15 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawktexi.in (Undocumented): Describe the new PROCINFO["argv"] array.
+
2015-05-14 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in (Bugs): Add that email should be in plain
text and not in HTML. Sigh.
+2015-05-11 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Add doc on conversions for strongly typed
+ regexp variables.
+
+2015-05-03 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Add initial documentation for strongly typed
+ regexps and for `typeof'.
+
2015-04-29 Arnold D. Robbins <arnold@skeeve.com>
* 4.1.2: Release tar ball made.
+2015-04-16 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in (Undocumented): More info added.
+
2015-04-08 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Update feature history section.
@@ -340,6 +581,11 @@
* gawktexi.in: Fix a figure caption. Thanks to Antonio Colombo
for pointing this out.
+ * gawktexi.in: Additional typo fix, also thanks to Antonio.
+
+2015-04-02 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in, gawk.1, awkcard.in: Name change: div() --> intdiv().
2015-03-31 Arnold D. Robbins <arnold@skeeve.com>
@@ -347,10 +593,17 @@
indirectly. Small additional fix relating to rand(). Thanks
to Antonio Colombo.
+2015-03-27 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Minor edits.
+
2015-03-24 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Minor fixes from Antonio Colombo and new exercise
in chapter 16.
+ * gawk.1: Minor edits.
+ * gawktexi.in: Edits in material on errno and retryable and get_file
+ API.
2015-03-17 Andrew J. Schorr <aschorr@telemetry-investments.com>
@@ -364,6 +617,12 @@
Thanks to Nicholas Mills <nlmills@clemson.edu> for pointing out
the issue.
+2015-03-08 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Briefly describe that nonfatal I/O overrides
+ GAWK_SOCK_RETRIES, in the env var part and in the nonfatal I/O
+ part.
+
2015-03-01 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Change quotes to @dfn for pseudorandom.
@@ -420,6 +679,7 @@
2015-02-08 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: O'Reilly fixes.
+ Make non-fatal i/o use "NONFATAL".
2015-02-06 Arnold D. Robbins <arnold@skeeve.com>
@@ -428,6 +688,7 @@
2015-02-04 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: O'Reilly fixes.
+ * gawktexi.in: Update various version-related bits of info.
2015-02-02 Arnold D. Robbins <arnold@skeeve.com>
@@ -457,7 +718,7 @@
2015-01-25 Arnold D. Robbins <arnold@skeeve.com>
- * gawktexi.in: Fix a bad URL.
+ * gawktexi.in: Fix a bad URL. And another one.
More O'Reilly fixes.
2015-01-23 Arnold D. Robbins <arnold@skeeve.com>
@@ -479,12 +740,40 @@
* gawkinet.texi: Fix capitalization in document title.
* gawktexi.in: Here we go again: Starting on more O'Reilly fixes.
+2014-12-27 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Add info that nonfatal I/O works with stdout and
+ stderr. Revise version info and what was added when.
+
+2015-01-05 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawktexi.in: Improve get_file documentation.
+
+2015-01-05 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawktexi.in: Replace "Retrying I/O" with "Retrying Input", since this
+ feature pertains to input, not output.
+
+2015-01-04 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawktexi.in: Document the get_file API function.
+
+2015-01-04 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawk.1: Document new features PROCINFO["errno"] and
+ PROCINFO["input", "RETRY"], and new getline return value of -2.
+ * gawktexi.in: Ditto.
+
2014-12-26 Antonio Giovanni Colombo <azc100@gmail.com>
* gawktexi.in (Glossary): Really sort the items.
2014-12-24 Arnold D. Robbins <arnold@skeeve.com>
+ * gawktexi.in: Start documenting nonfatal output.
+
+2014-12-24 Arnold D. Robbins <arnold@skeeve.com>
+
* gawktexi.in: Add one more paragraph to new foreword.
* gawktexi.in: Fix exponentiation in TeX mode. Thanks to
Marco Curreli by way of Antonio Giovanni Colombo.
@@ -516,6 +805,11 @@
* gawktexi.in: Various minor fixes and updates.
+2014-11-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Update that TZ env. var can influnce mktime
+ in running program. Thanks to Hermann Peifer.
+
2014-11-19 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Update that RFC 4180 documents CSV data.
@@ -529,6 +823,11 @@
* gawktexi.in: Comment out that I need an owner for awk.info.
I may have found one or two people.
+2014-10-29 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawktexi.in: Document new extras directory containing shell startup
+ files to manipulate AWKPATH and AWKLIBPATH environment variables.
+
2014-10-28 Arnold D. Robbins <arnold@skeeve.com>
* gawk.1: Clarification that debugger reads stdin.
@@ -542,6 +841,7 @@
2014-10-25 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Minor typo fixes.
+ Fix discussion of \x, per note from Antonio Colombo.
2014-10-17 Arnold D. Robbins <arnold@skeeve.com>
@@ -583,10 +883,25 @@
* gawktexi.in: Pretty much done!
+ Unrelated:
+
+ * gawktexi.in: Fix braino in awk version of div function.
+ Thanks to Katie Wasserman for the catch.
+
2014-10-01 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: More fixes after reading through the MS.
+ Unrelated:
+
+ * gawktexi.in: Add Katie Wasserman's program to compute
+ the digits of PI.
+
+ Unrelated:
+
+ * gawktexi.in: Document the differences between profiling
+ and pretty printing.
+
2014-09-30 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: More fixes after reading through the MS.
@@ -682,6 +997,10 @@
exercises. Remove use of LC_ALL in an example; doesn't seem
to be needed anymore.
+ Unrelated:
+
+ * gawktexi.in: Document that MirBSD is no longer supported.
+
2014-08-25 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Exercises are excluded from print edition.
@@ -715,6 +1034,10 @@
* gawktexi.in: Starting on reviewer comments.
Update acknowledgements.
+2014-08-12 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Cause div.awk to get into the example files.
+
2014-08-06 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Misc minor additions.
@@ -729,6 +1052,18 @@
* gawktexi.in: Fix doc for API get_record - errcode needs to
be greater than zero.
+2014-07-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in (Numeric Functions): For `div()', clarify
+ truncation is towards zero. Thanks to Michal Jaegermann
+ for pointing out the need to clarify this.
+
+2014-07-10 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in (Numeric Functions): Document new `div()' function.
+ (Arbitrary Precision Integers): Document raison d'etre for div().
+ * gawk.1, awkcard.in: Document `div()'.
+
2014-07-04 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in (Bracket Expressions): Add a note about how to
@@ -739,6 +1074,11 @@
* gawktexi.in: Update permissions on copyright page per
latest maintain.texi. Add GPL to print version of book.
+2014-06-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Document that --pretty-print no longer runs the
+ program. Remove mention of GAWK_NO_PP_RUN env var.
+
2014-06-22 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Typo fixes and minor corrections.
@@ -1035,7 +1375,7 @@
2013-12-26 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: More minor additions / fixes.
- (Bugs): Add John Malmberg for VMS.
+ (Bugs): Add John Malmberg for VMS. Other minor edits.
2013-12-25 Arnold D. Robbins <arnold@skeeve.com>
@@ -1109,6 +1449,11 @@
* gawktexi.in (FN, FFN, DF,DDF, PVERSION, CTL): Remove macros.
They have no alternate versions and are just in the way.
+2013-08-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawk.1: Document that ENVIRON updates affect the environment.
+ * gawktexi.in: Ditto.
+
2013-06-27 Arnold D. Robbins <arnold@skeeve.com>
* texinfo.tex: Update from Karl, fixes a formating problem.
diff --git a/doc/Makefile.am b/doc/Makefile.am
index bda97de7..002fafa5 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -24,9 +24,9 @@
## process this file with automake to produce Makefile.in
-info_TEXINFOS = gawk.texi gawkinet.texi
+info_TEXINFOS = gawk.texi gawkinet.texi using-git.texi
-man_MANS = gawk.1 igawk.1
+man_MANS = gawk.1
EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \
awkcard.in awkforai.txt texinfo.tex cardfonts \
@@ -51,7 +51,7 @@ EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \
bc_notes
# Get rid of generated files when cleaning
-CLEANFILES = *.ps *.html *.dvi *~ awkcard.nc awkcard.tr gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf igawk.1.pdf
+CLEANFILES = *.ps *.html *.dvi *~ awkcard.nc awkcard.tr gawk.pdf gawkinet.pdf using-git.pdf awkcard.pdf gawk.1.pdf
MAKEINFO = @MAKEINFO@ --no-split --force
@@ -77,9 +77,9 @@ AWKCARD = awkcard.ps
gawk.texi: $(srcdir)/gawktexi.in $(srcdir)/sidebar.awk
awk -f $(srcdir)/sidebar.awk < $(srcdir)/gawktexi.in > gawk.texi
-postscript: gawk.ps gawkinet.ps gawk.1.ps igawk.1.ps $(AWKCARD)
+postscript: gawk.ps gawkinet.ps using-git.ps gawk.1.ps $(AWKCARD)
-pdf-local: postscript gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf igawk.1.pdf
+pdf-local: postscript gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf
gawk.ps: gawk.dvi
TEXINPUTS=$(srcdir): dvips -o gawk.ps gawk.dvi
@@ -87,18 +87,15 @@ gawk.ps: gawk.dvi
gawkinet.ps: gawkinet.dvi
TEXINPUTS=$(srcdir): dvips -o gawkinet.ps gawkinet.dvi
+using-git.ps: using-git.dvi
+ TEXINPUTS=$(srcdir): dvips -o using-git.ps using-git.dvi
+
gawk.1.ps: gawk.1
-groff -man $(srcdir)/gawk.1 > gawk.1.ps
gawk.1.pdf: gawk.1.ps
ps2pdf gawk.1.ps gawk.1.pdf
-igawk.1.ps: igawk.1
- -groff -man $(srcdir)/igawk.1 > igawk.1.ps
-
-igawk.1.pdf: igawk.1.ps
- ps2pdf igawk.1.ps igawk.1.pdf
-
awkcard.tr: awkcard.in
sed 's:SRCDIR:$(srcdir):' < $(srcdir)/awkcard.in > awkcard.tr
diff --git a/doc/Makefile.in b/doc/Makefile.in
index 96103d72..b3523a20 100644
--- a/doc/Makefile.in
+++ b/doc/Makefile.in
@@ -116,14 +116,14 @@ ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/arch.m4 \
$(top_srcdir)/m4/codeset.m4 $(top_srcdir)/m4/gettext.m4 \
$(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/intlmacosx.m4 \
- $(top_srcdir)/m4/isc-posix.m4 $(top_srcdir)/m4/lcmessage.m4 \
- $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \
- $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libsigsegv.m4 \
- $(top_srcdir)/m4/longlong.m4 $(top_srcdir)/m4/mpfr.m4 \
- $(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/noreturn.m4 \
- $(top_srcdir)/m4/po.m4 $(top_srcdir)/m4/progtest.m4 \
- $(top_srcdir)/m4/readline.m4 $(top_srcdir)/m4/socket.m4 \
- $(top_srcdir)/m4/ulonglong.m4 $(top_srcdir)/configure.ac
+ $(top_srcdir)/m4/lcmessage.m4 $(top_srcdir)/m4/lib-ld.m4 \
+ $(top_srcdir)/m4/lib-link.m4 $(top_srcdir)/m4/lib-prefix.m4 \
+ $(top_srcdir)/m4/libsigsegv.m4 $(top_srcdir)/m4/longlong.m4 \
+ $(top_srcdir)/m4/mpfr.m4 $(top_srcdir)/m4/nls.m4 \
+ $(top_srcdir)/m4/noreturn.m4 $(top_srcdir)/m4/po.m4 \
+ $(top_srcdir)/m4/progtest.m4 $(top_srcdir)/m4/readline.m4 \
+ $(top_srcdir)/m4/socket.m4 $(top_srcdir)/m4/ulonglong.m4 \
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
@@ -173,13 +173,14 @@ AM_V_texidevnull = $(am__v_texidevnull_@AM_V@)
am__v_texidevnull_ = $(am__v_texidevnull_@AM_DEFAULT_V@)
am__v_texidevnull_0 = > /dev/null
am__v_texidevnull_1 =
-INFO_DEPS = $(srcdir)/gawk.info $(srcdir)/gawkinet.info
+INFO_DEPS = $(srcdir)/gawk.info $(srcdir)/gawkinet.info \
+ $(srcdir)/using-git.info
am__TEXINFO_TEX_DIR = $(srcdir)
-DVIS = gawk.dvi gawkinet.dvi
-PDFS = gawk.pdf gawkinet.pdf
-PSS = gawk.ps gawkinet.ps
-HTMLS = gawk.html gawkinet.html
-TEXINFOS = gawk.texi gawkinet.texi
+DVIS = gawk.dvi gawkinet.dvi using-git.dvi
+PDFS = gawk.pdf gawkinet.pdf using-git.pdf
+PSS = gawk.ps gawkinet.ps using-git.ps
+HTMLS = gawk.html gawkinet.html using-git.html
+TEXINFOS = gawk.texi gawkinet.texi using-git.texi
TEXI2DVI = texi2dvi
TEXI2PDF = $(TEXI2DVI) --pdf --batch
MAKEINFOHTML = $(MAKEINFO) --html
@@ -287,6 +288,7 @@ PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
POSUB = @POSUB@
+RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
@@ -352,8 +354,8 @@ target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
-info_TEXINFOS = gawk.texi gawkinet.texi
-man_MANS = gawk.1 igawk.1
+info_TEXINFOS = gawk.texi gawkinet.texi using-git.texi
+man_MANS = gawk.1
EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \
awkcard.in awkforai.txt texinfo.tex cardfonts \
api-figure1.eps api-figure1.fig api-figure1.pdf \
@@ -378,7 +380,7 @@ EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \
# Get rid of generated files when cleaning
-CLEANFILES = *.ps *.html *.dvi *~ awkcard.nc awkcard.tr gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf igawk.1.pdf
+CLEANFILES = *.ps *.html *.dvi *~ awkcard.nc awkcard.tr gawk.pdf gawkinet.pdf using-git.pdf awkcard.pdf gawk.1.pdf
TROFF = groff -t -Tps -U
SEDME = sed -e "s/^level0 restore/level0 restore flashme 100 72 moveto (Copyright `date '+%m-%d-%y %T'`, FSF, Inc. (all)) show/" \
-e "s/^\/level0 save def/\/level0 save def 30 -48 translate/"
@@ -476,6 +478,10 @@ $(srcdir)/gawkinet.info: gawkinet.texi
gawkinet.dvi: gawkinet.texi
gawkinet.pdf: gawkinet.texi
gawkinet.html: gawkinet.texi
+$(srcdir)/using-git.info: using-git.texi
+using-git.dvi: using-git.texi
+using-git.pdf: using-git.texi
+using-git.html: using-git.texi
.dvi.ps:
$(AM_V_DVIPS)TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \
$(DVIPS) $(AM_V_texinfo) -o $@ $<
@@ -557,13 +563,16 @@ dist-info: $(INFO_DEPS)
done
mostlyclean-aminfo:
- -rm -rf gawk.t2d gawk.t2p gawkinet.t2d gawkinet.t2p
+ -rm -rf gawk.t2d gawk.t2p gawkinet.t2d gawkinet.t2p using-git.t2d \
+ using-git.t2p
clean-aminfo:
-test -z "gawk.dvi gawk.pdf gawk.ps gawk.html gawkinet.dvi gawkinet.pdf \
- gawkinet.ps gawkinet.html" \
+ gawkinet.ps gawkinet.html using-git.dvi using-git.pdf \
+ using-git.ps using-git.html" \
|| rm -rf gawk.dvi gawk.pdf gawk.ps gawk.html gawkinet.dvi gawkinet.pdf \
- gawkinet.ps gawkinet.html
+ gawkinet.ps gawkinet.html using-git.dvi using-git.pdf \
+ using-git.ps using-git.html
maintainer-clean-aminfo:
@list='$(INFO_DEPS)'; for i in $$list; do \
@@ -882,9 +891,9 @@ uninstall-man: uninstall-man1
gawk.texi: $(srcdir)/gawktexi.in $(srcdir)/sidebar.awk
awk -f $(srcdir)/sidebar.awk < $(srcdir)/gawktexi.in > gawk.texi
-postscript: gawk.ps gawkinet.ps gawk.1.ps igawk.1.ps $(AWKCARD)
+postscript: gawk.ps gawkinet.ps using-git.ps gawk.1.ps $(AWKCARD)
-pdf-local: postscript gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf igawk.1.pdf
+pdf-local: postscript gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf
gawk.ps: gawk.dvi
TEXINPUTS=$(srcdir): dvips -o gawk.ps gawk.dvi
@@ -892,18 +901,15 @@ gawk.ps: gawk.dvi
gawkinet.ps: gawkinet.dvi
TEXINPUTS=$(srcdir): dvips -o gawkinet.ps gawkinet.dvi
+using-git.ps: using-git.dvi
+ TEXINPUTS=$(srcdir): dvips -o using-git.ps using-git.dvi
+
gawk.1.ps: gawk.1
-groff -man $(srcdir)/gawk.1 > gawk.1.ps
gawk.1.pdf: gawk.1.ps
ps2pdf gawk.1.ps gawk.1.pdf
-igawk.1.ps: igawk.1
- -groff -man $(srcdir)/igawk.1 > igawk.1.ps
-
-igawk.1.pdf: igawk.1.ps
- ps2pdf igawk.1.ps igawk.1.pdf
-
awkcard.tr: awkcard.in
sed 's:SRCDIR:$(srcdir):' < $(srcdir)/awkcard.in > awkcard.tr
diff --git a/doc/awkcard.in b/doc/awkcard.in
index 34648bef..418cc8d9 100644
--- a/doc/awkcard.in
+++ b/doc/awkcard.in
@@ -1,7 +1,7 @@
.\" AWK Reference Card --- Arnold Robbins, arnold@skeeve.com
.\"
-.\" Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-.\" 2003, 2004, 2005, 2007, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016
+.\" Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+.\" 2005, 2007, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017
.\" Free Software Foundation, Inc.
.\"
.\" Permission is granted to make and distribute verbatim copies of
@@ -244,7 +244,7 @@ Overridden by \*(FC\-\^\-posix\*(FR.
.TI "\*(FC\-c\*(FR, \*(FC\-\^\-traditional\*(FR
Disable \*(GK-specific extensions.
.TI "\*(FC\-C\*(FR, \*(FC\-\^\-copyright\*(FR
-Print the short version of the GNU
+Print the short GNU
copyright information on \*(FCstdout\*(FR.
.TI "\*(FC\-d\*(FR[\*(FIfile\*(FR], \*(FC\-\^\-dump-variables\*(FR[\*(FC=\*(FIfile\*(FR]
Print a sorted list of global variables,
@@ -292,7 +292,7 @@ Force use of the locale's decimal point character when parsing input data.
Output a pretty printed version of the program to \*(FIfile\*(FR
(default: \*(FCawkprof.out\*(FR).
.TI "\*(FC\-O\*(FR, \*(FC\-\^\-optimize\*(FR
-Enable some internal optimizations.
+Enable internal optimizations (default is on).
.TI "\*(FC\-p\*(FR[\*(FC\*(FIfile\*(FR], \*(FC\-\^\-profile\*(FR[\*(FC=\*(FIfile\*(FR]
Send profiling data to \*(FIfile\*(FR
(default: \*(FCawkprof.out\*(FR).
@@ -300,6 +300,9 @@ The profile contains execution counts in the left margin
of each statement in the program.
.TI "\*(FC\-P\*(FR, \*(FC\-\^\-posix\*(FR
Disable common and GNU extensions.\*(CB
+.TI "\*(FC\-r\*(FR, \*(FC\-\^\-re\-interval\*(FR
+Enable \*(FIinterval expressions\*(FR.
+(Needed with \*(FC\-c\*(FR.)
.in -4n
.EB "\s+2\f(HBCOMMAND LINE ARGUMENTS (\*(GK\f(HB)\*(FR\s0"
@@ -311,9 +314,8 @@ Disable common and GNU extensions.\*(CB
.ES
.fi
.in +4n
-.TI "\*(FC\-r\*(FR, \*(FC\-\^\-re\-interval\*(FR
-Enable \*(FIinterval expressions\*(FR.
-(Needed with \*(FC\-c\*(FR.)
+.TI "\*(FC\-s\*(FR, \*(FC\-\^\-no\-optimize\*(FR
+Disable internal optimizations.
.TI "\*(FC\-S\*(FR, \*(FC\-\^\-sandbox\*(FR
Disable the \*(FCsystem()\*(FR function,
input redirection with \*(FCgetline\*(FR,
@@ -1606,11 +1608,14 @@ may be used in place of
.fi
.TS
expand;
-l lw(2i).
+l lw(1.9i).
\*(CD\*(FCatan2(\*(FIy\*(FC, \*(FIx\*(FC)\*(FR The arctangent of \*(FIy/x\fP in radians.
\*(FCcos(\*(FIexpr\*(FC)\*(FR The cosine of \*(FIexpr\fP, which is in radians.
\*(FCexp(\*(FIexpr\*(FC)\*(FR The exponential function (\*(FIe \*(FC^ \*(FIx\*(FR).
\*(FCint(\*(FIexpr\*(FC)\*(FR Truncate to integer.
+\*(CB\*(FCintdiv(\*(FIn\*(FR\*(FC,\*(FI d\*(FR\*(FC,\*(FI r\*(FR\*(FC)\*(FR T{
+Return result of integer division in \*(FIr\*(FR.\*(CD
+T}
\*(FClog(\*(FIexpr\*(FC)\*(FR The natural logarithm function (base \*(FIe\^\*(FR).
\*(FCrand()\fP A random number \*(FIN\fP such that 0 \(<= \*(FIN\fP < 1.
\*(FCsin(\*(FIexpr\*(FC)\*(FR The sine of \*(FIexpr\fP, which is in radians.
@@ -1789,13 +1794,16 @@ formatting them.
.fi
.in +.2i
.ti -.2i
-\*(FCmktime(\*(FIdatespec\*(FC)\*(FR
+\*(FCmktime(\*(FIdatespec \*(FR[\*(FC, \*(FIutc-flag\*(FR]\*(FC)\*(FR
.br
Convert \*(FIdatespec\fP into a time
stamp of the same form as returned by \*(FCsystime()\*(FR
and return it.
The \*(FIdatespec\fP is a string of the form
\*(FC"\*(FIYYYY MM DD HH MM SS[ DST]\*(FC"\*(FR.
+If \*(FIutc-flag\*(FR
+is present and is non-zero or non-null, the result
+is in UTC, otherwise it is in local time.
.ti -.2i
\*(FCstrftime(\*(FR[\*(FIformat \*(FR[\*(FC, \*(FItimestamp\*(FR[\*(FC, \*(FIutc-flag\*(FR]]]\*(FC)\*(FR
.br
@@ -1882,7 +1890,12 @@ See the manual for details.\*(CB
.ti -.2i
\*(CD\*(FCisarray(\*(FIx\*(FC)\*(FR
.br
-Return true if \*(FIx\fP is an array, false otherwise.\*(CB
+Return true if \*(FIx\fP is an array, false otherwise.
+.br
+.ti -.2i
+\*(FCtypeof(\*(FIx\*(FC)\*(FR
+.br
+Return a string indicating the type of \*(FIx\fP.\*(CB
.in -.2i
.EB "\s+2\f(HBTYPE FUNCTIONS (\*(GK\f(HB)\*(FR\s0"
.sp .5
diff --git a/doc/gawk.1 b/doc/gawk.1
index 2caa87ad..2460a686 100644
--- a/doc/gawk.1
+++ b/doc/gawk.1
@@ -13,7 +13,7 @@
. if \w'\(rq' .ds rq "\(rq
. \}
.\}
-.TH GAWK 1 "Mar 7 2016" "Free Software Foundation" "Utility Commands"
+.TH GAWK 1 "Jun 30 2016" "Free Software Foundation" "Utility Commands"
.SH NAME
gawk \- pattern scanning and processing language
.SH SYNOPSIS
@@ -405,17 +405,20 @@ is provided,
uses a file named
.B awkprof.out
in the current directory.
+Implies
+.BR \-\^\-no\-optimize .
.TP
.PD 0
.B \-O
.TP
.PD
.B \-\^\-optimize
-Enable optimizations upon the internal representation of the program.
+Enable
+.IR gawk 's
+default optimizations upon the internal representation of the program.
Currently, this includes simple constant-folding, and tail call
-elimination for recursive functions. The
-.I gawk
-maintainer hopes to add additional optimizations over time.
+elimination for recursive functions.
+This option is on by default.
.TP
.PD 0
\fB\-p\fR[\fIprof-file\fR]
@@ -428,6 +431,8 @@ The default is
.BR awkprof.out .
The profile contains execution counts of each statement in the program
in the left margin and function call counts for each user-defined function.
+Implies
+.BR \-\^\-no\-optimize .
.TP
.PD 0
.B \-P
@@ -444,11 +449,6 @@ mode, with the following additional restrictions:
escape sequences are not recognized.
.TP
\(bu
-Only space and tab act as field separators when
-.B FS
-is set to a single space, newline does not.
-.TP
-\(bu
You cannot continue lines after
.B ?
and
@@ -493,6 +493,15 @@ They are enabled by default, but this option remains for use with
.BR \-\^\-traditional .
.TP
.PD 0
+.B \-s
+.TP
+.PD
+.B \-\^\-no\-optimize
+Disable
+.IR gawk 's
+default optimizations upon the internal representation of the program.
+.TP
+.PD 0
.BI \-S
.TP
.PD
@@ -785,9 +794,6 @@ In the special case that
.B FS
is a single space, fields are separated
by runs of spaces and/or tabs and/or newlines.
-(But see the section
-.BR "POSIX COMPATIBILITY" ,
-below).
.BR NOTE :
The value of
.B IGNORECASE
@@ -918,11 +924,17 @@ An array containing the values of the current environment.
The array is indexed by the environment variables, each element being
the value of that variable (e.g., \fBENVIRON["HOME"]\fP might be
\fB"/home/arnold"\fR).
-Changing this array does not affect the environment seen by programs which
+.sp
+In POSIX mode,
+changing this array does not affect the environment seen by programs which
.I gawk
spawns via redirection or the
.B system()
function.
+Otherwise,
+.I gawk
+updates its real environment so that programs it spawns see
+the changes.
.TP
.B ERRNO
If a system error occurs either doing a redirection for
@@ -936,6 +948,15 @@ then
will contain
a string describing the error.
The value is subject to translation in non-English locales.
+If the string in
+.B ERRNO
+corresponds to a system error in the
+.IR errno (3)
+variable, then the numeric value can be found in
+.B PROCINFO["errno"].
+For non-system errors,
+.B PROCINFO["errno"]
+will be zero.
.TP
.B FIELDWIDTHS
A whitespace separated list of field widths. When set,
@@ -1093,6 +1114,13 @@ The value of the
.IR getegid (2)
system call.
.TP
+\fBPROCINFO["errno"]\fP
+The value of
+.IR errno (3)
+when
+.BR ERRNO
+is set to the associated error message.
+.TP
\fBPROCINFO["euid"]\fP
The value of the
.IR geteuid (2)
@@ -1124,7 +1152,7 @@ knows about the identifiers after it has finished parsing the program; they are
updated while the program runs.
For each identifier, the value of the element is one of the following:
.RS
-.TP
+.TP \w'\fB"extension"\fR'u+1n
\fB"array"\fR
The identifier is an array.
.TP
@@ -1207,6 +1235,14 @@ change
.IR gawk 's
behavior:
.TP
+\fBPROCINFO["NONFATAL"]\fR
+If this exists, then I/O errors for all output redirections become nonfatal.
+.TP
+\fBPROCINFO["\fIoutput_name\fB", "NONFATAL"]\fR
+Make output errors for
+.I output_name
+be nonfatal.
+.TP
\fBPROCINFO["\fIcommand\fB", "pty"]\fR
Use a pseudo-tty for two-way communication with
.I command
@@ -1220,6 +1256,23 @@ where
is a redirection string or a filename. A value of zero or
less than zero means no timeout.
.TP
+\fBPROCINFO["\fIinput\^\fB", "RETRY"]\fR
+If an I/O error that may be retried occurs when reading data from
+.IR input ,
+and this array entry exists, then
+.B getline
+will return \-2 instead of following the default behavior of returning \-1
+and configuring
+.IR input
+to return no further data.
+An I/O error that may be retried is one where
+.IR errno (3)
+has the value EAGAIN, EWOULDBLOCK, EINTR, or ETIMEDOUT.
+This may be useful in conjunction with
+\fBPROCINFO["\fIinput\^\fB", "READ_TIMEOUT"]\fR
+or situations where a file descriptor has been configured to behave in a
+non-blocking fashion.
+.TP
\fBPROCINFO["sorted_in"]\fP
If this element exists in
.BR PROCINFO ,
@@ -1240,7 +1293,9 @@ Supported values are
\fB"@val_num_desc"\fR,
and
\fB"@unsorted"\fR.
-The value can also be the name of any comparison function defined
+The value can also be the name (as a
+.IR string )
+of any comparison function defined
as follows:
.sp
.in +5m
@@ -1544,9 +1599,9 @@ Vertical tab.
The character represented by the string of hexadecimal digits following
the
.BR \ex .
-As in ISO C, all following hexadecimal digits are considered part of
+Up to two
+following hexadecimal digits are considered part of
the escape sequence.
-(This feature should tell us something about language design by committee.)
E.g., \fB"\ex1B"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
.TP
.BI \e ddd
@@ -2291,6 +2346,13 @@ below.)
The
.B getline
command returns 1 on success, 0 on end of file, and \-1 on an error.
+If the
+.IR errno (3)
+value indicates that the I/O operation may be retried,
+and \fBPROCINFO["\fIinput\^\fP", "RETRY"]\fR
+is set, then \-2 will be returned instead of \-1, and further calls to
+.B getline
+may be attempted.
Upon an error,
.B ERRNO
is set to a string describing the problem.
@@ -2643,6 +2705,23 @@ The exponential function.
.BI int( expr )
Truncate to integer.
.TP
+.BI intdiv( num ", " denom ", " result )
+Truncate
+.I num
+and
+.I denom
+to integers. Return the quotient of
+.I num
+divided by
+.I denom
+in \fIresult\fB["quotient"]\fR
+and the remainder in
+in \fIresult\fB["remainder"]\fR.
+This is a
+.I gawk
+extension, primarily of value when working with
+arbitrarily large integers.
+.TP
.BI log( expr )
The natural logarithm function.
.TP
@@ -3026,7 +3105,7 @@ provides the following functions for obtaining time stamps and
formatting them.
.PP
.TP "\w'\fBsystime()\fR'u+1n"
-\fBmktime(\fIdatespec\fB)\fR
+\fBmktime(\fIdatespec\fR [\fB, \fIutc-flag\fR]\fB)\fR
Turn
.I datespec
into a time stamp of the same form as returned by
@@ -3048,7 +3127,11 @@ The values of these numbers need not be within the ranges specified;
for example, an hour of \-1 means 1 hour before midnight.
The origin-zero Gregorian calendar is assumed,
with year 0 preceding year 1 and year \-1 preceding year 0.
-The time is assumed to be in the local timezone.
+If
+.I utc-flag
+is present and is non-zero or non-null, the time is assumed to be in
+the UTC time zone; otherwise, the
+time is assumed to be in the local time zone.
If the daylight saving flag is positive,
the time is assumed to be daylight saving time;
if zero, the time is assumed to be standard time;
@@ -3102,6 +3185,11 @@ values to
.B uintmax_t
integers, doing the operation, and then converting the
result back to floating point.
+.PP
+.BR NOTE :
+Passing negative operands to any of these functions causes
+a fatal error.
+.PP
The functions are:
.TP "\w'\fBrshift(\fIval\fB, \fIcount\fB)\fR'u+2n"
\fBand(\fIv1\fB, \fIv2 \fR[, ...]\fB)\fR
@@ -3134,13 +3222,28 @@ bits.
Return the bitwise XOR of the values provided in the argument list.
There must be at least two.
.PP
-.SS Type Function
+.SS Type Functions
The following function is for use with multidimensional arrays.
.TP
\fBisarray(\fIx\fB)\fR
Return true if
.I x
is an array, false otherwise.
+.PP
+You can tell the type of any variable or array element with the
+following function:
+.TP
+\fBtypeof(\fIx\fB)\fR
+Return a string indicating the type of
+.IR x .
+The string will be one of
+\fB"array"\fP,
+\fB"number"\fP,
+\fB"regexp"\fP,
+\fB"string"\fP,
+\fB"strnum"\fP,
+or
+\fB"undefined"\fP.
.SS Internationalization Functions
The following functions may be used from within your AWK program for
translating strings at run-time.
@@ -3987,7 +4090,7 @@ We thank him.
.SH COPYING PERMISSIONS
Copyright \(co 1989, 1991, 1992, 1993, 1994, 1995, 1996,
1997, 1998, 1999, 2001, 2002, 2003, 2004, 2005, 2007, 2009,
-2010, 2011, 2012, 2013, 2014, 2016
+2010, 2011, 2012, 2013, 2014, 2015, 2016
Free Software Foundation, Inc.
.PP
Permission is granted to make and distribute verbatim copies of
diff --git a/doc/gawk.info b/doc/gawk.info
index 1388b32b..197dc2f7 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -217,6 +217,7 @@ in (a) below. A copy of the license is included in the section entitled
'getline'.
* Getline Summary:: Summary of 'getline' Variants.
* Read Timeout:: Reading input with a timeout.
+* Retrying Input:: Retrying input after certain errors.
* Command-line directories:: What happens if you put a directory on
the command line.
* Input Summary:: Input summary.
@@ -246,6 +247,7 @@ in (a) below. A copy of the license is included in the section entitled
* Special Caveats:: Things to watch out for.
* Close Files And Pipes:: Closing Input and Output Files and
Pipes.
+* Nonfatal:: Enabling Nonfatal Output.
* Output Summary:: Output summary.
* Output Exercises:: Exercises.
* Values:: Constants, Variables, and Regular
@@ -255,6 +257,9 @@ in (a) below. A copy of the license is included in the section entitled
* Nondecimal-numbers:: What are octal and hex numbers.
* Regexp Constants:: Regular Expression constants.
* Using Constant Regexps:: When and how to use a regexp constant.
+* Standard Regexp Constants:: Regexp constants in standard
+ 'awk'.
+* Strong Regexp Constants:: Strongly typed regexp constants.
* Variables:: Variables give names to values for
later use.
* Using Variables:: Using variables in your programs.
@@ -557,11 +562,14 @@ in (a) below. A copy of the license is included in the section entitled
* Array Functions:: Functions for working with arrays.
* Flattening Arrays:: How to flatten arrays.
* Creating Arrays:: How to create and populate arrays.
+* Redirection API:: How to access and manipulate
+ redirections.
* Extension API Variables:: Variables provided by the API.
* Extension Versioning:: API Version information.
* Extension API Informational Variables:: Variables providing information about
'gawk''s invocation.
* Extension API Boilerplate:: Boilerplate code for using the API.
+* Changes from API V1:: Changes from V1 of the API.
* Finding Extensions:: How 'gawk' finds compiled
extensions.
* Extension Example:: Example C code for an extension.
@@ -615,14 +623,16 @@ in (a) below. A copy of the license is included in the section entitled
* Unix Installation:: Installing 'gawk' under
various versions of Unix.
* Quick Installation:: Compiling 'gawk' under Unix.
+* Shell Startup Files:: Shell convenience functions.
* Additional Configuration Options:: Other compile-time options.
* Configuration Philosophy:: How it's all supposed to work.
* Non-Unix Installation:: Installation on Other Operating
Systems.
-* PC Installation:: Installing and Compiling 'gawk' on
- Microsoft Windows.
+* PC Installation:: Installing and Compiling
+ 'gawk' on Microsoft Windows.
* PC Binary Installation:: Installing a prepared distribution.
-* PC Compiling:: Compiling 'gawk' for Windows32.
+* PC Compiling:: Compiling 'gawk' for
+ Windows32.
* PC Using:: Running 'gawk' on Windows32.
* Cygwin:: Building and running 'gawk'
for Cygwin.
@@ -1849,11 +1859,45 @@ that it is worth addressing.
The "shells" on Microsoft Windows systems use the double-quote
character for quoting, and make it difficult or impossible to include an
escaped double-quote character in a command-line script. The following
-example, courtesy of Jeroen Brink, shows how to print all lines in a
-file surrounded by double quotes:
+example, courtesy of Jeroen Brink, shows how to escape the double quotes
+from this one liner script that prints all lines in a file surrounded by
+double quotes:
+
+ { print "\"" $0 "\"" }
+
+In an MS-Windows command-line the one-liner script above may be passed
+as follows:
gawk "{ print \"\042\" $0 \"\042\" }" FILE
+ In this example the '\042' is the octal code for a double-quote;
+'gawk' converts it into a real double-quote for output by the 'print'
+statement.
+
+ In MS-Windows escaping double-quotes is a little tricky because you
+use backslashes to escape double-quotes, but backslashes themselves are
+not escaped in the usual way; indeed they are either duplicated or not,
+depending upon whether there is a subsequent double-quote. The
+MS-Windows rule for double-quoting a string is the following:
+
+ 1. For each double quote in the orginal string, let N be the number of
+ backslash(es) before it, N might be zero. Replace these N
+ backslash(es) by 2*N+1 backslash(es)
+
+ 2. Let N be the number of backslash(es) tailing the original string, N
+ might be zero. Replace these N backslash(es) by 2*N backslash(es)
+
+ 3. Surround the resulting string by double-quotes.
+
+ So to double-quote the one-liner script '{ print "\"" $0 "\"" }' from
+the previous example you would do it this way:
+
+ gawk "{ print \"\\\"\" $0 \"\\\"\" }" FILE
+
+However, the use of '\042' instead of '\\\"' is also possible and easier
+to read, because backslashes that are not followed by a double-quote
+don't need duplication.
+

File: gawk.info, Node: Sample Data Files, Next: Very Simple, Prev: Running gawk, Up: Getting Started
@@ -2613,30 +2657,33 @@ The following list describes options mandated by the POSIX standard:
'-o'[FILE]
'--pretty-print'['='FILE]
- Enable pretty-printing of 'awk' programs. By default, the output
- program is created in a file named 'awkprof.out' (*note
- Profiling::). The optional FILE argument allows you to specify a
- different file name for the output. No space is allowed between
- the '-o' and FILE, if FILE is supplied.
+ Enable pretty-printing of 'awk' programs. Implies '--no-optimize'.
+ By default, the output program is created in a file named
+ 'awkprof.out' (*note Profiling::). The optional FILE argument
+ allows you to specify a different file name for the output. No
+ space is allowed between the '-o' and FILE, if FILE is supplied.
- NOTE: Due to the way 'gawk' has evolved, with this option your
- program still executes. This will change in the next major
- release, such that 'gawk' will only pretty-print the program
- and not run it.
+ NOTE: In the past, this option would also execute your
+ program. This is no longer the case.
'-O'
'--optimize'
- Enable some optimizations on the internal representation of the
- program. At the moment, this includes just simple constant
- folding.
+ Enable 'gawk''s default optimizations on the internal
+ representation of the program. At the moment, this includes simple
+ constant folding and tail recursion elimination in function calls.
+
+ These optimizations are enabled by default. This option remains
+ primarily for backwards compatibility. However, it may be used to
+ cancel the effect of an earlier '-s' option (see later in this
+ list).
'-p'[FILE]
'--profile'['='FILE]
- Enable profiling of 'awk' programs (*note Profiling::). By
- default, profiles are created in a file named 'awkprof.out'. The
- optional FILE argument allows you to specify a different file name
- for the profile file. No space is allowed between the '-p' and
- FILE, if FILE is supplied.
+ Enable profiling of 'awk' programs (*note Profiling::). Implies
+ '--no-optimize'. By default, profiles are created in a file named
+ 'awkprof.out'. The optional FILE argument allows you to specify a
+ different file name for the profile file. No space is allowed
+ between the '-p' and FILE, if FILE is supplied.
The profile contains execution counts for each statement in the
program in the left margin, and function call counts for each
@@ -2650,9 +2697,6 @@ The following list describes options mandated by the POSIX standard:
in 'gawk' that are disabled by this option. Also, the following
additional restrictions apply:
- * Newlines do not act as whitespace to separate fields when 'FS'
- is equal to a single space (*note Fields::).
-
* Newlines are not allowed after '?' or ':' (*note Conditional
Exp::).
@@ -2673,6 +2717,11 @@ The following list describes options mandated by the POSIX standard:
remains (both for backward compatibility and for use in combination
with '--traditional').
+'-s'
+'--no-optimize'
+ Disable 'gawk''s default optimizations on the internal
+ representation of the program.
+
'-S'
'--sandbox'
Disable the 'system()' function, input redirections with 'getline',
@@ -2921,6 +2970,9 @@ Since '.' is included at the beginning, 'gawk' searches first in the
current directory and then in '/usr/local/share/awk'. In practice, this
means that you will rarely need to change the value of 'AWKPATH'.
+ *Note Shell Startup Files::, for information on functions that help
+to manipulate the 'AWKPATH' variable.
+
'gawk' places the value of the search path that it used into
'ENVIRON["AWKPATH"]'. This provides access to the actual search path
value from within an 'awk' program.
@@ -2960,6 +3012,9 @@ empty value, 'gawk' uses a default path; this is typically
'/usr/local/lib/gawk', although it can vary depending upon how 'gawk'
was built.
+ *Note Shell Startup Files::, for information on functions that help
+to manipulate the 'AWKLIBPATH' variable.
+
'gawk' places the value of the search path that it used into
'ENVIRON["AWKLIBPATH"]'. This provides access to the actual search path
value from within an 'awk' program.
@@ -2986,7 +3041,8 @@ used by regular users:
'GAWK_SOCK_RETRIES'
Controls the number of times 'gawk' attempts to retry a two-way
TCP/IP (socket) connection before giving up. *Note TCP/IP
- Networking::.
+ Networking::. Note that when nonfatal I/O is enabled (*note
+ Nonfatal::), 'gawk' only tries to open a TCP/IP socket once.
'POSIXLY_CORRECT'
Causes 'gawk' to switch to POSIX-compatibility mode, disabling all
@@ -3036,13 +3092,6 @@ change. The variables are:
supposed to be differences, but occasionally theory and practice
don't coordinate with each other.)
-'GAWK_NO_PP_RUN'
- When 'gawk' is invoked with the '--pretty-print' option, it will
- not run the program if this environment variable exists.
-
- CAUTION: This variable will not survive into the next major
- release.
-
'GAWK_STACKSIZE'
This specifies the amount by which 'gawk' should grow its internal
evaluation stack, when needed.
@@ -3433,15 +3482,18 @@ sequences apply to both string constants and regexp constants:
'\xHH...'
The hexadecimal value HH, where HH stands for a sequence of
- hexadecimal digits ('0'-'9', and either 'A'-'F' or 'a'-'f'). Like
- the same construct in ISO C, the escape sequence continues until
- the first nonhexadecimal digit is seen. (c.e.) However, using
- more than two hexadecimal digits produces undefined results. (The
- '\x' escape sequence is not allowed in POSIX 'awk'.)
-
- CAUTION: The next major release of 'gawk' will change, such
- that a maximum of two hexadecimal digits following the '\x'
- will be used.
+ hexadecimal digits ('0'-'9', and either 'A'-'F' or 'a'-'f'). A
+ maximum of two digts are allowed after the '\x'. Any further
+ hexadecimal digits are treated as simple letters or numbers.
+ (c.e.) (The '\x' escape sequence is not allowed in POSIX awk.)
+
+ CAUTION: In ISO C, the escape sequence continues until the
+ first nonhexadecimal digit is seen. For many years, 'gawk'
+ would continue incorporating hexadecimal digits into the value
+ until a non-hexadecimal digit or the end of the string was
+ encountered. However, using more than two hexadecimal digits
+ produced undefined results. As of version 4.2, only two
+ digits are processed.
'\/'
A literal slash (necessary for regexp constants only). This
@@ -4166,6 +4218,7 @@ be named on the 'awk' command line (*note Getline::).
* Getline:: Reading files under explicit program control
using the 'getline' function.
* Read Timeout:: Reading input with a timeout.
+* Retrying Input:: Retrying input after certain errors.
* Command-line directories:: What happens if you put a directory on the
command line.
* Input Summary:: Input summary.
@@ -4427,7 +4480,7 @@ When 'awk' reads an input record, the record is automatically "parsed"
or separated by the 'awk' utility into chunks called "fields". By
default, fields are separated by "whitespace", like words in a line.
Whitespace in 'awk' means any string of one or more spaces, TABs, or
-newlines;(1) other characters that are considered whitespace by other
+newlines; other characters that are considered whitespace by other
languages (such as formfeed, vertical tab, etc.) are _not_ considered
whitespace by 'awk'.
@@ -4479,11 +4532,6 @@ record:
-| Julie F
-| Samuel A
- ---------- Footnotes ----------
-
- (1) In POSIX 'awk', newlines are not considered whitespace for
-separating fields.
-

File: gawk.info, Node: Nonconstant Fields, Next: Changing Fields, Prev: Fields, Up: Reading Files
@@ -5429,6 +5477,11 @@ record, such as a file that cannot be opened, then 'getline' returns -1.
In this case, 'gawk' sets the variable 'ERRNO' to a string describing
the error that occurred.
+ If 'ERRNO' indicates that the I/O operation may be retried, and
+'PROCINFO["INPUT", "RETRY"]' is set, then 'getline' returns -2 instead
+of -1, and further calls to 'getline' may be attempted. *Note Retrying
+Input:: for further information about this feature.
+
In the following examples, COMMAND stands for a string value that
represents a shell command.
@@ -5864,7 +5917,7 @@ VAR
Table 4.1: 'getline' variants and what they set

-File: gawk.info, Node: Read Timeout, Next: Command-line directories, Prev: Getline, Up: Reading Files
+File: gawk.info, Node: Read Timeout, Next: Retrying Input, Prev: Getline, Up: Reading Files
4.10 Reading Input with a Timeout
=================================
@@ -5943,7 +5996,8 @@ per-command or per-connection basis.
'gawk' considers a timeout event to be an error even though the
attempt to read from the underlying device may succeed in a later
attempt. This is a limitation, and it also means that you cannot use
-this to multiplex input from two or more sources.
+this to multiplex input from two or more sources. *Note Retrying
+Input:: for a way to enable later I/O attempts to succeed.
Assigning a timeout value prevents read operations from blocking
indefinitely. But bear in mind that there are other ways 'gawk' can
@@ -5957,9 +6011,36 @@ can block indefinitely until some other process opens it for writing.
(1) This assumes that standard input is the keyboard.

-File: gawk.info, Node: Command-line directories, Next: Input Summary, Prev: Read Timeout, Up: Reading Files
+File: gawk.info, Node: Retrying Input, Next: Command-line directories, Prev: Read Timeout, Up: Reading Files
+
+4.11 Retrying Reads After Certain Input Errors
+==============================================
+
+This minor node describes a feature that is specific to 'gawk'.
+
+ When 'gawk' encounters an error while reading input, by default
+'getline' returns -1, and subsequent attempts to read from that file
+result in an end-of-file indication. However, you may optionally
+instruct 'gawk' to allow I/O to be retried when certain errors are
+encountered by setting a special element in the 'PROCINFO' array (*note
+Auto-set::):
+
+ PROCINFO["INPUT_NAME", "RETRY"] = 1
+
+ When this element exists, 'gawk' checks the value of the system (C
+language) 'errno' variable when an I/O error occurs. If 'errno'
+indicates a subsequent I/O attempt may succeed, 'getline' instead
+returns -2 and further calls to 'getline' may succeed. This applies to
+the 'errno' values 'EAGAIN', 'EWOULDBLOCK', 'EINTR', or 'ETIMEDOUT'.
+
+ This feature is useful in conjunction with 'PROCINFO["INPUT_NAME",
+"READ_TIMEOUT"]' or situations where a file descriptor has been
+configured to behave in a non-blocking fashion.
+
+
+File: gawk.info, Node: Command-line directories, Next: Input Summary, Prev: Retrying Input, Up: Reading Files
-4.11 Directories on the Command Line
+4.12 Directories on the Command Line
====================================
According to the POSIX standard, files named on the 'awk' command line
@@ -5982,7 +6063,7 @@ usable data from an 'awk' program.

File: gawk.info, Node: Input Summary, Next: Input Exercises, Prev: Command-line directories, Up: Reading Files
-4.12 Summary
+4.13 Summary
============
* Input is split into records based on the value of 'RS'. The
@@ -6054,7 +6135,7 @@ File: gawk.info, Node: Input Summary, Next: Input Exercises, Prev: Command-li

File: gawk.info, Node: Input Exercises, Prev: Input Summary, Up: Reading Files
-4.13 Exercises
+4.14 Exercises
==============
1. Using the 'FIELDWIDTHS' variable (*note Constant Size::), write a
@@ -6104,6 +6185,7 @@ function.
'gawk' allows access to inherited file
descriptors.
* Close Files And Pipes:: Closing Input and Output Files and Pipes.
+* Nonfatal:: Enabling Nonfatal Output.
* Output Summary:: Output summary.
* Output Exercises:: Exercises.
@@ -7011,7 +7093,7 @@ that 'gawk' provides:
behavior.

-File: gawk.info, Node: Close Files And Pipes, Next: Output Summary, Prev: Special Files, Up: Printing
+File: gawk.info, Node: Close Files And Pipes, Next: Nonfatal, Prev: Special Files, Up: Printing
5.9 Closing Input and Output Redirections
=========================================
@@ -7156,11 +7238,23 @@ there is a system problem closing the file or process. In these cases,
'gawk' sets the predefined variable 'ERRNO' to a string describing the
problem.
- In 'gawk', when closing a pipe or coprocess (input or output), the
-return value is the exit status of the command.(2) Otherwise, it is the
-return value from the system's 'close()' or 'fclose()' C functions when
-closing input or output files, respectively. This value is zero if the
-close succeeds, or -1 if it fails.
+ In 'gawk', starting with version 4.2, when closing a pipe or
+coprocess (input or output), the return value is the exit status of the
+command, as described in *note Table 5.1:
+table-close-pipe-return-values.(2) Otherwise, it is the return value
+from the system's 'close()' or 'fclose()' C functions when closing input
+or output files, respectively. This value is zero if the close
+succeeds, or -1 if it fails.
+
+Situation Return value from 'close()'
+--------------------------------------------------------------------------
+Normal exit of command Command's exit status
+Death by signal of command 256 + number of murderous signal
+Death by signal of command 512 + number of murderous signal
+with core dump
+Some kind of error -1
+
+Table 5.1: Return values from 'close()' of a pipe
The POSIX standard is very vague; it says that 'close()' returns zero
on success and a nonzero value otherwise. In general, different
@@ -7174,14 +7268,73 @@ Options::), 'gawk' just returns zero when closing a pipe.
is called a "zombie," and cleaning up after it is referred to as
"reaping."
- (2) This is a full 16-bit value as returned by the 'wait()' system
-call. See the system manual pages for information on how to decode this
-value.
+ (2) Prior to version 4.2, the return value from closing a pipe or
+co-process was the full 16-bit exit value as defined by the 'wait()'
+system call.

-File: gawk.info, Node: Output Summary, Next: Output Exercises, Prev: Close Files And Pipes, Up: Printing
+File: gawk.info, Node: Nonfatal, Next: Output Summary, Prev: Close Files And Pipes, Up: Printing
+
+5.10 Enabling Nonfatal Output
+=============================
+
+This minor node describes a 'gawk'-specific feature.
+
+ In standard 'awk', output with 'print' or 'printf' to a nonexistent
+file, or some other I/O error (such as filling up the disk) is a fatal
+error.
+
+ $ gawk 'BEGIN { print "hi" > "/no/such/file" }'
+ error-> gawk: cmd. line:1: fatal: can't redirect to `/no/such/file' (No such file or directory)
+
+ 'gawk' makes it possible to detect that an error has occurred,
+allowing you to possibly recover from the error, or at least print an
+error message of your choosing before exiting. You can do this in one
+of two ways:
+
+ * For all output files, by assigning any value to
+ 'PROCINFO["NONFATAL"]'.
+
+ * On a per-file basis, by assigning any value to 'PROCINFO[FILENAME,
+ "NONFATAL"]'. Here, FILENAME is the name of the file to which you
+ wish output to be nonfatal.
-5.10 Summary
+ Once you have enabled nonfatal output, you must check 'ERRNO' after
+every relevant 'print' or 'printf' statement to see if something went
+wrong. It is also a good idea to initialize 'ERRNO' to zero before
+attempting the output. For example:
+
+ $ gawk '
+ > BEGIN {
+ > PROCINFO["NONFATAL"] = 1
+ > ERRNO = 0
+ > print "hi" > "/no/such/file"
+ > if (ERRNO) {
+ > print("Output failed:", ERRNO) > "/dev/stderr"
+ > exit 1
+ > }
+ > }'
+ error-> Output failed: No such file or directory
+
+ Here, 'gawk' did not produce a fatal error; instead it let the 'awk'
+program code detect the problem and handle it.
+
+ This mechanism works also for standard output and standard error.
+For standard output, you may use 'PROCINFO["-", "NONFATAL"]' or
+'PROCINFO["/dev/stdout", "NONFATAL"]'. For standard error, use
+'PROCINFO["/dev/stderr", "NONFATAL"]'.
+
+ When attempting to open a TCP/IP socket (*note TCP/IP Networking::),
+'gawk' tries multiple times. The 'GAWK_SOCK_RETRIES' environment
+variable (*note Other Environment Variables::) allows you to override
+'gawk''s builtin default number of attempts. However, once nonfatal I/O
+is enabled for a given socket, 'gawk' only retries once, relying on
+'awk'-level code to notice that there was a problem.
+
+
+File: gawk.info, Node: Output Summary, Next: Output Exercises, Prev: Nonfatal, Up: Printing
+
+5.11 Summary
============
* The 'print' statement prints comma-separated expressions. Each
@@ -7203,10 +7356,15 @@ File: gawk.info, Node: Output Summary, Next: Output Exercises, Prev: Close Fi
For coprocesses, it is possible to close only one direction of the
communications.
+ * Normally errors with 'print' or 'printf' are fatal. 'gawk' lets
+ you make output errors be nonfatal either for all files or on a
+ per-file basis. You must then check for errors after every
+ relevant output statement.
+

File: gawk.info, Node: Output Exercises, Prev: Output Summary, Up: Printing
-5.11 Exercises
+5.12 Exercises
==============
1. Rewrite the program:
@@ -7337,9 +7495,9 @@ octal (base 8) and hexadecimal (base 16). In octal, the numbers go 0,
1 times 10 plus 1, so '11' in octal is 1 times 8 plus 1. This equals 9
in decimal. In hexadecimal, there are 16 digits. Because the everyday
decimal number system only has ten digits ('0'-'9'), the letters 'a'
-through 'f' are used to represent the rest. (Case in the letters is
-usually irrelevant; hexadecimal 'a' and 'A' have the same value.) Thus,
-'11' in hexadecimal is 1 times 16 plus 1, which equals 17 in decimal.
+through 'f' represent the rest. (Case in the letters is usually
+irrelevant; hexadecimal 'a' and 'A' have the same value.) Thus, '11' in
+hexadecimal is 1 times 16 plus 1, which equals 17 in decimal.
Just by looking at plain '11', you can't tell what base it's in. So,
in C, C++, and other languages derived from C, there is a special
@@ -7416,6 +7574,23 @@ File: gawk.info, Node: Using Constant Regexps, Next: Variables, Prev: Constan
6.1.2 Using Regular Expression Constants
----------------------------------------
+Regular expression constants consist of text describing a regular
+expression enclosed in slashes (such as '/the +answer/'). This minor
+node describes how such constants work in POSIX 'awk' and 'gawk', and
+then goes on to describe "strongly typed regexp constants", which are a
+'gawk' extension.
+
+* Menu:
+
+* Standard Regexp Constants:: Regexp constants in standard 'awk'.
+* Strong Regexp Constants:: Strongly typed regexp constants.
+
+
+File: gawk.info, Node: Standard Regexp Constants, Next: Strong Regexp Constants, Up: Using Constant Regexps
+
+6.1.2.1 Standard Regular Expression Constants
+.............................................
+
When used on the righthand side of the '~' or '!~' operators, a regexp
constant merely stands for the regexp that is to be matched. However,
regexp constants (such as '/foo/') may be used like simple expressions.
@@ -7490,6 +7665,73 @@ function, because passing a truth value in this way is probably not what
was intended.

+File: gawk.info, Node: Strong Regexp Constants, Prev: Standard Regexp Constants, Up: Using Constant Regexps
+
+6.1.2.2 Strongly Typed Regexp Constants
+.......................................
+
+This minor node describes a 'gawk'-specific feature.
+
+ As we saw in the previous minor node, regexp constants ('/.../') hold
+a strange position in the 'awk' language. In most contexts, they act
+like an expression: '$0 ~ /.../'. In other contexts, they denote only a
+regexp to be matched. In no case are they really a "first class
+citizen" of the language. That is, you cannot define a scalar variable
+whose type is "regexp" in the same sense that you can define a variable
+to be a number or a string:
+
+ num = 42 Numeric variable
+ str = "hi" String variable
+ re = /foo/ Wrong! re is the result of $0 ~ /foo/
+
+ For a number of more advanced use cases, it would be nice to have
+regexp constants that are "strongly typed"; in other words, that denote
+a regexp useful for matching, and not an expression.
+
+ 'gawk' provides this feature. A strongly typed regexp constant looks
+almost like a regular regexp constant, except that it is preceded by an
+'@' sign:
+
+ re = @/foo/ Regexp variable
+
+ Strongly typed regexp constants _cannot_ be used everywhere that a
+regular regexp constant can, because this would make the language even
+more confusing. Instead, you may use them only in certain contexts:
+
+ * On the righthand side of the '~' and '!~' operators: 'some_var ~
+ @/foo/' (*note Regexp Usage::).
+
+ * In the 'case' part of a 'switch' statement (*note Switch
+ Statement::).
+
+ * As an argument to one of the built-in functions that accept regexp
+ constants: 'gensub()', 'gsub()', 'match()', 'patsplit()',
+ 'split()', and 'sub()' (*note String Functions::).
+
+ * As a parameter in a call to a user-defined function (*note
+ User-defined::).
+
+ * On the righthand side of an assignment to a variable: 'some_var =
+ @/foo/'. In this case, the type of 'some_var' is regexp.
+ Additionally, 'some_var' can be used with '~' and '!~', passed to
+ one of the built-in functions listed above, or passed as a
+ parameter to a user-defined function.
+
+ You may use the 'typeof()' built-in function (*note Type Functions::)
+to determine if a variable or function parameter is a regexp variable.
+
+ The true power of this feature comes from the ability to create
+variables that have regexp type. Such variables can be passed on to
+user-defined functions, without the confusing aspects of computed
+regular expressions created from strings or string constants. They may
+also be passed through indirect function calls (*note Indirect Calls::)
+and on to the built-in functions that accept regexp constants.
+
+ When used in numeric conversions, strongly typed regexp variables
+convert to zero. When used in string conversions, they convert to the
+string value of the original regexp text.
+
+
File: gawk.info, Node: Variables, Next: Conversion, Prev: Using Constant Regexps, Up: Values
6.1.3 Variables
@@ -8265,11 +8507,74 @@ File: gawk.info, Node: Variable Typing, Next: Comparison Operators, Up: Typin
6.3.2.1 String Type versus Numeric Type
.......................................
-The POSIX standard introduced the concept of a "numeric string", which
-is simply a string that looks like a number--for example, '" +2"'. This
-concept is used for determining the type of a variable. The type of the
-variable is important because the types of two variables determine how
-they are compared. Variable typing follows these rules:
+Scalar objects in 'awk' (variables, array elements, and fields) are
+_dynamically_ typed. This means their type can change as the program
+runs, from "untyped" before any use,(1) to string or number, and then
+from string to number or number to string, as the program progresses.
+('gawk' also provides regexp-typed scalars, but let's ignore that for
+now; *note Strong Regexp Constants::.)
+
+ You can't do much with untyped variables, other than tell that they
+are untyped. The following program tests 'a' against '""' and '0'; the
+test succeeds when 'a' has never been assigned a value. It also uses
+the built-in 'typeof()' function (not presented yet; *note Type
+Functions::) to show 'a''s type:
+
+ $ gawk 'BEGIN { print (a == "" && a == 0 ?
+ > "a is untyped" : "a has a type!") ; print typeof(a) }'
+ -| a is untyped
+ -| unassigned
+
+ A scalar has numeric type when assigned a numeric value, such as from
+a numeric constant, or from another scalar with numeric type:
+
+ $ gawk 'BEGIN { a = 42 ; print typeof(a)
+ > b = a ; print typeof(b) }'
+ number
+ number
+
+ Similarly, a scalar has string type when assigned a string value,
+such as from a string constant, or from another scalar with string type:
+
+ $ gawk 'BEGIN { a = "forty two" ; print typeof(a)
+ > b = a ; print typeof(b) }'
+ string
+ string
+
+ So far, this is all simple and straightforward. What happens,
+though, when 'awk' has to process data from a user? Let's start with
+field data. What should the following command produce as output?
+
+ echo hello | awk '{ printf("%s %s < 42\n", $1,
+ ($1 < 42 ? "is" : "is not")) }'
+
+Since 'hello' is alphabetic data, 'awk' can only do a string comparison.
+Internally, it converts '42' into '"42"' and compares the two string
+values '"hello"' and '"42"'. Here's the result:
+
+ $ echo hello | awk '{ printf("%s %s < 42\n", $1,
+ > ($1 < 42 ? "is" : "is not")) }'
+ -| hello is not < 42
+
+ However, what happens when data from a user _looks like_ a number?
+On the one hand, in reality, the input data consists of characters, not
+binary numeric values. But, on the other hand, the data looks numeric,
+and 'awk' really ought to treat it as such. And indeed, it does:
+
+ $ echo 37 | awk '{ printf("%s %s < 42\n", $1,
+ > ($1 < 42 ? "is" : "is not")) }'
+ -| 37 is < 42
+
+ Here are the rules for when 'awk' treats data as a number, and for
+when it treats data as a string.
+
+ The POSIX standard uses the term "numeric string" for input data that
+looks numeric. The '37' in the previous example is a numeric string.
+So what is the type of a numeric string? Answer: numeric.
+
+ The type of a variable is important because the types of two
+variables determine how they are compared. Variable typing follows
+these definitions and rules:
* A numeric constant or the result of a numeric operation has the
"numeric" attribute.
@@ -8280,8 +8585,9 @@ they are compared. Variable typing follows these rules:
* Fields, 'getline' input, 'FILENAME', 'ARGV' elements, 'ENVIRON'
elements, and the elements of an array created by 'match()',
'split()', and 'patsplit()' that are numeric strings have the
- "strnum" attribute. Otherwise, they have the "string" attribute.
- Uninitialized variables also have the "strnum" attribute.
+ "strnum" attribute.(2) Otherwise, they have the "string"
+ attribute. Uninitialized variables also have the "strnum"
+ attribute.
* Attributes propagate across assignments but are not changed by any
use.
@@ -8300,16 +8606,16 @@ operation:
comparison may be used. This depends upon the attributes of the
operands, according to the following symmetric matrix:
- +-------------------------------
- | STRING NUMERIC STRNUM
- -----+-------------------------------
- |
- STRING | string string string
- |
- NUMERIC | string numeric numeric
- |
- STRNUM | string numeric numeric
- -----+-------------------------------
+ +----------------------------------------------
+ | STRING NUMERIC STRNUM
+--------+----------------------------------------------
+ |
+STRING | string string string
+ |
+NUMERIC | string numeric numeric
+ |
+STRNUM | string numeric numeric
+--------+----------------------------------------------
The basic idea is that user input that looks numeric--and _only_ user
input--should be treated as numeric, even though it is actually made of
@@ -8320,16 +8626,18 @@ for comparison purposes.
In short, when one operand is a "pure" string, such as a string
constant, then a string comparison is performed. Otherwise, a numeric
-comparison is performed.
+comparison is performed. (The primary difference between a number and a
+strnum is that for strnums 'gawk' preserves the original string value
+that the scalar had when it came in.)
+
+ This point bears additional emphasis: Input that looks numeric _is_
+numeric. All other input is treated as strings.
- This point bears additional emphasis: All user input is made of
-characters, and so is first and foremost of string type; input strings
-that look numeric are additionally given the strnum attribute. Thus,
-the six-character input string ' +3.14' receives the strnum attribute.
-In contrast, the eight characters '" +3.14"' appearing in program text
-comprise a string constant. The following examples print '1' when the
-comparison between the two different constants is true, and '0'
-otherwise:
+ Thus, the six-character input string ' +3.14' receives the strnum
+attribute. In contrast, the eight characters '" +3.14"' appearing in
+program text comprise a string constant. The following examples print
+'1' when the comparison between the two different constants is true, and
+'0' otherwise:
$ echo ' +3.14' | awk '{ print($0 == " +3.14") }' True
-| 1
@@ -8348,6 +8656,19 @@ otherwise:
$ echo ' +3.14' | awk '{ print($1 == 3.14) }' True
-| 1
+ You can see the type of an input field (or other user input) using
+'typeof()':
+
+ $ echo hello 37 | gawk '{ print typeof($1), typeof($2) }'
+ -| string strnum
+
+ ---------- Footnotes ----------
+
+ (1) 'gawk' calls this "unassigned", as the following example shows.
+
+ (2) Thus, a POSIX numeric string and 'gawk''s strnum are the same
+thing.
+

File: gawk.info, Node: Comparison Operators, Next: POSIX String Comparison, Prev: Variable Typing, Up: Typing and Comparison
@@ -8464,18 +8785,18 @@ Constant Regexps::, where this is discussed in more detail.

File: gawk.info, Node: POSIX String Comparison, Prev: Comparison Operators, Up: Typing and Comparison
-6.3.2.3 String Comparison with POSIX Rules
-..........................................
+6.3.2.3 String Comparison Based on Locale Collating Order
+.........................................................
-The POSIX standard says that string comparison is performed based on the
-locale's "collating order". This is the order in which characters sort,
-as defined by the locale (for more discussion, *note Locales::). This
-order is usually very different from the results obtained when doing
-straight character-by-character comparison.(1)
+The POSIX standard used to say that all string comparisons are performed
+based on the locale's "collating order". This is the order in which
+characters sort, as defined by the locale (for more discussion, *note
+Locales::). This order is usually very different from the results
+obtained when doing straight byte-by-byte comparison.(1)
Because this behavior differs considerably from existing practice,
-'gawk' only implements it when in POSIX mode (*note Options::). Here is
-an example to illustrate the difference, in an 'en_US.UTF-8' locale:
+'gawk' only implemented it when in POSIX mode (*note Options::). Here
+is an example to illustrate the difference, in an 'en_US.UTF-8' locale:
$ gawk 'BEGIN { printf("ABC < abc = %s\n",
> ("ABC" < "abc" ? "TRUE" : "FALSE")) }'
@@ -8484,11 +8805,28 @@ an example to illustrate the difference, in an 'en_US.UTF-8' locale:
> ("ABC" < "abc" ? "TRUE" : "FALSE")) }'
-| ABC < abc = FALSE
+ Fortunately, as of August 2016, comparison based on locale collating
+order is no longer required for the '==' and '!=' operators.(2)
+However, comparison based on locales is still required for '<', '<=',
+'>', and '>='. POSIX thus recommends as follows:
+
+ Since the '==' operator checks whether strings are identical, not
+ whether they collate equally, applications needing to check whether
+ strings collate equally can use:
+
+ a <= b && a >= b
+
+ As of version 4.2, 'gawk' continues to use locale collating order for
+'<', '<=', '>', and '>=' only in POSIX mode.
+
---------- Footnotes ----------
(1) Technically, string comparison is supposed to behave the same way
as if the strings were compared with the C 'strcoll()' function.
+ (2) See the Austin Group website
+(http://austingroupbugs.net/view.php?id=1070).
+

File: gawk.info, Node: Boolean Ops, Next: Conditional Exp, Prev: Typing and Comparison, Up: Truth Values and Conditions
@@ -10126,7 +10464,7 @@ each variable.)
The default value is '" "', a string consisting of a single space.
As a special exception, this value means that any sequence of
- spaces, TABs, and/or newlines is a single separator.(1) It also
+ spaces, TABs, and/or newlines is a single separator. It also
causes spaces, TABs, and newlines at the beginning and end of a
record to be ignored.
@@ -10223,10 +10561,6 @@ each variable.)
Internationalization::). The default value of 'TEXTDOMAIN' is
'"messages"'.
- ---------- Footnotes ----------
-
- (1) In POSIX 'awk', newline does not count as whitespace.
-

File: gawk.info, Node: Auto-set, Next: ARGC and ARGV, Prev: User-modified, Up: Built-in Variables
@@ -10290,10 +10624,24 @@ they are not special:
An associative array containing the values of the environment. The
array indices are the environment variable names; the elements are
the values of the particular environment variables. For example,
- 'ENVIRON["HOME"]' might be '"/home/arnold"'. Changing this array
- does not affect the environment passed on to any programs that
- 'awk' may spawn via redirection or the 'system()' function. (In a
- future version of 'gawk', it may do so.)
+ 'ENVIRON["HOME"]' might be '/home/arnold'.
+
+ For POSIX 'awk', changing this array does not affect the
+ environment passed on to any programs that 'awk' may spawn via
+ redirection or the 'system()' function.
+
+ However, beginning with version 4.2, if not in POSIX compatibility
+ mode, 'gawk' does update its own environment when 'ENVIRON' is
+ changed, thus changing the environment seen by programs that it
+ creates. You should therefore be especially careful if you modify
+ 'ENVIRON["PATH"]', which is the search path for finding executable
+ programs.
+
+ This can also affect the running 'gawk' program, since some of the
+ built-in functions may pay attention to certain environment
+ variables. The most notable instance of this is 'mktime()' (*note
+ Time Functions::), which pays attention the value of the 'TZ'
+ environment variable on many systems.
Some operating systems may not have environment variables. On such
systems, the 'ENVIRON' array is empty (except for
@@ -10316,6 +10664,11 @@ they are not special:
'getline' returning -1. You are, of course, free to clear it
yourself before doing an I/O operation.
+ If the value of 'ERRNO' corresponds to a system error in the C
+ 'errno' variable, then 'PROCINFO["errno"]' will be set to the value
+ of 'errno'. For non-system errors, 'PROCINFO["errno"]' will be
+ zero.
+
'FILENAME'
The name of the current input file. When no data files are listed
on the command line, 'awk' reads from the standard input and
@@ -10364,6 +10717,10 @@ they are not special:
'PROCINFO["egid"]'
The value of the 'getegid()' system call.
+ 'PROCINFO["errno"]'
+ The value of the C 'errno' variable when 'ERRNO' is set to the
+ associated error message.
+
'PROCINFO["euid"]'
The value of the 'geteuid()' system call.
@@ -10462,6 +10819,14 @@ they are not special:
The following elements allow you to change 'gawk''s behavior:
+ 'PROCINFO["NONFATAL"]'
+ If this element exists, then I/O errors for all output
+ redirections become nonfatal. *Note Nonfatal::.
+
+ 'PROCINFO["OUTPUT_NAME", "NONFATAL"]'
+ Make output errors for OUTPUT_NAME be nonfatal. *Note
+ Nonfatal::.
+
'PROCINFO["COMMAND", "pty"]'
For two-way communication to COMMAND, use a pseudo-tty instead
of setting up a two-way pipe. *Note Two-way I/O:: for more
@@ -11876,6 +12241,21 @@ brackets ([ ]):
truncated toward zero. For example, 'int(3)' is 3, 'int(3.9)' is
3, 'int(-3.9)' is -3, and 'int(-3)' is -3 as well.
+'intdiv(NUMERATOR, DENOMINATOR, RESULT)'
+ Perform integer division, similar to the standard C function of the
+ same name. First, truncate 'numerator' and 'denominator' towards
+ zero, creating integer values. Clear the 'result' array, and then
+ set 'result["quotient"]' to the result of 'numerator /
+ denominator', truncated towards zero to an integer, and set
+ 'result["remainder"]' to the result of 'numerator % denominator',
+ truncated towards zero to an integer. This function is primarily
+ intended for use with arbitrary length integers; it avoids creating
+ MPFR arbitrary precision floating-point values (*note Arbitrary
+ Precision Integers::).
+
+ This function is a 'gawk' extension. It is not available in
+ compatibility mode (*note Options::).
+
'log(X)'
Return the natural logarithm of X, if X is positive; otherwise,
return 'NaN' ("not a number") on IEEE 754 systems. Additionally,
@@ -12911,7 +13291,7 @@ POSIX standard.(2) However, recent versions of 'mawk' (*note Other
Versions::) also support these functions. Optional parameters are
enclosed in square brackets ([ ]):
-'mktime(DATESPEC)'
+'mktime(DATESPEC' [', UTC-FLAG' ]')'
Turn DATESPEC into a timestamp in the same form as is returned by
'systime()'. It is similar to the function of the same name in ISO
C. The argument, DATESPEC, is a string of the form
@@ -12924,12 +13304,14 @@ enclosed in square brackets ([ ]):
The values of these numbers need not be within the ranges
specified; for example, an hour of -1 means 1 hour before midnight.
The origin-zero Gregorian calendar is assumed, with year 0
- preceding year 1 and year -1 preceding year 0. The time is assumed
- to be in the local time zone. If the daylight-savings flag is
- positive, the time is assumed to be daylight savings time; if zero,
- the time is assumed to be standard time; and if negative (the
- default), 'mktime()' attempts to determine whether daylight savings
- time is in effect for the specified time.
+ preceding year 1 and year -1 preceding year 0. If UTC-FLAG is
+ present and is either nonzero or non-null, the time is assumed to
+ be in the UTC time zone; otherwise, the time is assumed to be in
+ the local time zone. If the daylight-savings flag is positive, the
+ time is assumed to be daylight savings time; if zero, the time is
+ assumed to be standard time; and if negative (the default),
+ 'mktime()' attempts to determine whether daylight savings time is
+ in effect for the specified time.
If DATESPEC does not contain enough elements or if the resulting
time is out of range, 'mktime()' returns -1.
@@ -13263,13 +13645,10 @@ are enclosed in square brackets ([ ]):
Return the bitwise XOR of the arguments. There must be at least
two.
- For all of these functions, first the double-precision floating-point
-value is converted to the widest C unsigned integer type, then the
-bitwise operation is performed. If the result cannot be represented
-exactly as a C 'double', leading nonzero bits are removed one by one
-until it can be represented exactly. The result is then converted back
-into a C 'double'. (If you don't understand this paragraph, don't worry
-about it.)
+ CAUTION: Beginning with 'gawk' version 4.2, negative operands are
+ not allowed for any of these functions. A negative operand
+ produces a fatal error. See the sidebar "Beware The Smoke and
+ Mirrors!" for more information as to why.
Here is a user-defined function (*note User-defined::) that
illustrates the use of these functions:
@@ -13331,26 +13710,118 @@ decimal and octal values for the same numbers (*note
Nondecimal-numbers::), and then demonstrates the results of the
'compl()', 'lshift()', and 'rshift()' functions.
+ Beware The Smoke and Mirrors!
+
+ It other languages, bitwise operations are performed on integer
+values, not floating-point values. As a general statement, such
+operations work best when performed on unsigned integers.
+
+ 'gawk' attempts to treat the arguments to the bitwise functions as
+unsigned integers. For this reason, negative arguments produce a fatal
+error.
+
+ In normal operation, for all of these functions, first the
+double-precision floating-point value is converted to the widest C
+unsigned integer type, then the bitwise operation is performed. If the
+result cannot be represented exactly as a C 'double', leading nonzero
+bits are removed one by one until it can be represented exactly. The
+result is then converted back into a C 'double'.(2)
+
+ However, when using arbitrary precision arithmetic with the '-M'
+option (*note Arbitrary Precision Arithmetic::), the results may differ.
+This is particularly noticeable with the 'compl()' function:
+
+ $ gawk 'BEGIN { print compl(42) }'
+ -| 9007199254740949
+ $ gawk -M 'BEGIN { print compl(42) }'
+ -| -43
+
+ What's going on becomes clear when printing the results in
+hexadecimal:
+
+ $ gawk 'BEGIN { printf "%#x\n", compl(42) }'
+ -| 0x1fffffffffffd5
+ $ gawk -M 'BEGIN { printf "%#x\n", compl(42) }'
+ -| 0xffffffffffffffd5
+
+ When using the '-M' option, under the hood, 'gawk' uses GNU MP
+arbitrary precision integers which have at least 64 bits of precision.
+When not using '-M', 'gawk' stores integral values in regular
+double-precision floating point, which only maintain 53 bits of
+precision. Furthermore, the GNU MP library treats (or at least seems to
+treat) the leading bit as a sign bit; thus the result with '-M' in this
+case is a negative number.
+
+ In short, using 'gawk' for any but the simplest kind of bitwise
+operations is probably a bad idea; caveat emptor!
+
---------- Footnotes ----------
(1) This example shows that zeros come in on the left side. For
'gawk', this is always true, but in some languages, it's possible to
have the left side fill with ones.
+ (2) If you don't understand this paragraph, the upshot is that 'gawk'
+can only store a particular range of integer values; numbers outside
+that range are reduced to fit within the range.
+

File: gawk.info, Node: Type Functions, Next: I18N Functions, Prev: Bitwise Functions, Up: Built-in
9.1.7 Getting Type Information
------------------------------
-'gawk' provides a single function that lets you distinguish an array
-from a scalar variable. This is necessary for writing code that
-traverses every element of an array of arrays (*note Arrays of
-Arrays::).
+'gawk' provides two functions that let you distinguish the type of a
+variable. This is necessary for writing code that traverses every
+element of an array of arrays (*note Arrays of Arrays::), and in other
+contexts.
'isarray(X)'
Return a true value if X is an array. Otherwise, return false.
+'typeof(X)'
+ Return one of the following strings, depending upon the type of X:
+
+ '"array"'
+ X is an array.
+
+ '"regexp"'
+ X is a strongly typed regexp (*note Strong Regexp
+ Constants::).
+
+ '"number"'
+ X is a number.
+
+ '"string"'
+ X is a string.
+
+ '"strnum"'
+ X is a number that started life as user input, such as a field
+ or the result of calling 'split()'. (I.e., X has the strnum
+ attribute; *note Variable Typing::.)
+
+ '"unassigned"'
+ X is a scalar variable that has not been assigned a value yet.
+ For example:
+
+ BEGIN {
+ # creates a[1] but it has no assigned value
+ a[1]
+ print typeof(a[1]) # unassigned
+ }
+
+ '"untyped"'
+ X has not yet been used yet at all; it can become a scalar or
+ an array. For example:
+
+ BEGIN {
+ print typeof(x) # x never used --> untyped
+ mk_arr(x)
+ print typeof(x) # x now an array --> array
+ }
+
+ function mk_arr(a) { a[1] = 1 }
+
'isarray()' is meant for use in two circumstances. The first is when
traversing a multidimensional array: you can test if an element is
itself an array or not. The second is inside the body of a user-defined
@@ -13364,6 +13835,14 @@ parameter is an array or not.
that has not been previously used to 'isarray()', 'gawk' ends up
turning it into a scalar.
+ The 'typeof()' function is general; it allows you to determine if a
+variable or function parameter is a scalar, an array, or a strongly
+typed regexp.
+
+ 'isarray()' is deprecated; you should use 'typeof()' instead. You
+should replace any existing uses of 'isarray(var)' in your code with
+'typeof(var) == "array"'.
+

File: gawk.info, Node: I18N Functions, Prev: Type Functions, Up: Built-in
@@ -19679,9 +20158,15 @@ case 'gawk' waits for the child process to exit, which may cause your
program to hang. (Thus, this particular feature is of much less use in
practice than being able to close the '"to"' end.)
- CAUTION: It is a fatal error to write to the '"to"' end of a
- two-way pipe which has been closed. It is also a fatal error to
- read from the '"from"' end of a two-way pipe that has been closed.
+ CAUTION: Normally, it is a fatal error to write to the '"to"' end
+ of a two-way pipe which has been closed, and it is also a fatal
+ error to read from the '"from"' end of a two-way pipe that has been
+ closed.
+
+ You may set 'PROCINFO["COMMAND", "NONFATAL"]' to make such
+ operations become nonfatal. If you do so, you then need to check
+ 'ERRNO' after each 'print', 'printf', or 'getline'. *Note
+ Nonfatal::, for more information.
You may also use pseudo-ttys (ptys) for two-way communication instead
of pipes, if your system supports them. This is done on a per-command
@@ -19969,8 +20454,7 @@ output. They are as follows:
you typed when you wrote it. This is because 'gawk' creates the
profiled version by "pretty-printing" its internal representation of the
program. The advantage to this is that 'gawk' can produce a standard
-representation. The disadvantage is that all source code comments are
-lost. Also, things such as:
+representation. Also, things such as:
/foo/
@@ -20029,8 +20513,40 @@ the 'Ctrl-\' key.
called this way, 'gawk' "pretty-prints" the program into 'awkprof.out',
without any execution counts.
- NOTE: The '--pretty-print' option still runs your program. This
- will change in the next major release.
+ NOTE: Once upon a time, the '--pretty-print' option would also run
+ your program. This is is no longer the case.
+
+ There is a significant difference between the output created when
+profiling, and that created when pretty-printing. Pretty-printed output
+preserves the original comments that were in the program, although their
+placement may not correspond exactly to their original locations in the
+source code.(1)
+
+ However, as a deliberate design decision, profiling output _omits_
+the original program's comments. This allows you to focus on the
+execution count data and helps you avoid the temptation to use the
+profiler for pretty-printing.
+
+ Additionally, pretty-printed output does not have the leading
+indentation that the profiling output does. This makes it easy to
+pretty-print your code once development is completed, and then use the
+result as the final version of your program.
+
+ Because the internal representation of your program is formatted to
+recreate an 'awk' program, profiling and pretty-printing automatically
+disable 'gawk''s default optimizations.
+
+ Pretty printing also preserves the original format of numeric
+constants; if you used an octal or hexadecimal value in your source
+code, it will appear that way in the output.
+
+ ---------- Footnotes ----------
+
+ (1) 'gawk' does the best it can to preserve the distinction between
+comments at the end of a statement and comments on lines by themselves.
+Due to implementation constraints, it does not always do so correctly,
+particularly for 'switch' statements. The 'gawk' maintainers hope to
+improve this in a subsequent release.

File: gawk.info, Node: Advanced Features Summary, Prev: Profiling, Up: Advanced Features
@@ -20071,8 +20587,7 @@ File: gawk.info, Node: Advanced Features Summary, Prev: Profiling, Up: Advanc
'USR1' signal while profiling causes 'gawk' to dump the profile and
keep going, including a function call stack.
- * You can also just "pretty-print" the program. This currently also
- runs the program, but that will change in the next major release.
+ * You can also just "pretty-print" the program.

File: gawk.info, Node: Internationalization, Next: Debugger, Prev: Advanced Features, Up: Top
@@ -21797,6 +22312,9 @@ File: gawk.info, Node: Debugging Summary, Prev: Limitations, Up: Debugger
it is used by the debugger to provide command-line history and
editing.
+ * Usually, the debugger does not not affect the program being
+ debugged, but occasionally it can.
+

File: gawk.info, Node: Arbitrary Precision Arithmetic, Next: Dynamic Extensions, Prev: Debugger, Up: Top
@@ -22530,6 +23048,62 @@ the following:
gawk -M 'BEGIN { n = 13; print n % 2 }'
+ When dividing two arbitrary precision integers with either '/' or
+'%', the result is typically an arbitrary precision floating point value
+(unless the denominator evenly divides into the numerator). In order to
+do integer division or remainder with arbitrary precision integers, use
+the built-in 'intdiv()' function (*note Numeric Functions::).
+
+ You can simulate the 'intdiv()' function in standard 'awk' using this
+user-defined function:
+
+ # intdiv --- do integer division
+
+ function intdiv(numerator, denominator, result)
+ {
+ split("", result)
+
+ numerator = int(numerator)
+ denominator = int(denominator)
+ result["quotient"] = int(numerator / denominator)
+ result["remainder"] = int(numerator % denominator)
+
+ return 0.0
+ }
+
+ The following example program, contributed by Katie Wasserman, uses
+'intdiv()' to compute the digits of pi to as many places as you choose
+to set:
+
+ # pi.awk --- compute the digits of pi
+
+ BEGIN {
+ digits = 100000
+ two = 2 * 10 ^ digits
+ pi = two
+ for (m = digits * 4; m > 0; --m) {
+ d = m * 2 + 1
+ x = pi * m
+ intdiv(x, d, result)
+ pi = result["quotient"]
+ pi = pi + two
+ }
+ print pi
+ }
+
+ When asked about the algorithm used, Katie replied:
+
+ It's not that well known but it's not that obscure either. It's
+ Euler's modification to Newton's method for calculating pi. Take a
+ look at lines (23) - (25) here:
+ <http://mathworld.wolfram.com/PiFormulas.html>.
+
+ The algorithm I wrote simply expands the multiply by 2 and works
+ from the innermost expression outwards. I used this to program HP
+ calculators because it's quite easy to modify for tiny memory
+ devices with smallish word sizes. See
+ <http://www.hpmuseum.org/cgi-sys/cgiwrap/hpmuseum/articles.cgi?read=899>.
+
---------- Footnotes ----------
(1) Weisstein, Eric W. 'Sylvester's Sequence'. From MathWorld--A
@@ -22892,8 +23466,11 @@ API in detail.
* Symbol Table Access:: Functions for accessing global
variables.
* Array Manipulation:: Functions for working with arrays.
+* Redirection API:: How to access and manipulate
+ redirections.
* Extension API Variables:: Variables provided by the API.
* Extension API Boilerplate:: Boilerplate code for using the API.
+* Changes from API V1:: Changes from V1 of the API.

File: gawk.info, Node: Extension API Functions Introduction, Next: General Data Types, Up: Extension API Description
@@ -22947,6 +23524,8 @@ operations:
- Flattening an array for easy C-style looping over all its
indices and elements
+ * Accessing and manipulating redirections.
+
Some points about using the API:
* The following types, macros, and/or functions are referenced in
@@ -22987,16 +23566,25 @@ operations:
* The API defines several simple 'struct's that map values as seen
from 'awk'. A value can be a 'double', a string, or an array (as
- in multidimensional arrays, or when creating a new array). String
- values maintain both pointer and length, because embedded NUL
- characters are allowed.
+ in multidimensional arrays, or when creating a new array).
+
+ String values maintain both pointer and length, because embedded
+ NUL characters are allowed.
- NOTE: By intent, strings are maintained using the current
+ NOTE: By intent, 'gawk' maintains strings using the current
multibyte encoding (as defined by 'LC_XXX' environment
variables) and not using wide characters. This matches how
'gawk' stores strings internally and also how characters are
likely to be input into and output from files.
+ NOTE: String values passed to an extension by 'gawk' are
+ always NUL-terminated. Thus it is safe to pass such string
+ values to standard library and system routines. However,
+ because 'gawk' allows embedded NUL characters in string data,
+ before using the data as a regular C string, you should check
+ that the length for that string passed to the extension
+ matches the return value of 'strlen()' for it.
+
* When retrieving a value (such as a parameter or that of a global
variable or array element), the extension requests a specific type
(number, string, scalar, value cookie, array, or "undefined").
@@ -23067,6 +23655,8 @@ use them.
' AWK_UNDEFINED,'
' AWK_NUMBER,'
' AWK_STRING,'
+' AWK_REGEX,'
+' AWK_STRNUM,'
' AWK_ARRAY,'
' AWK_SCALAR, /* opaque access to a variable */'
' AWK_VALUE_COOKIE /* for updating a previously created value */'
@@ -23089,6 +23679,8 @@ use them.
type.
'#define str_value u.s'
+'#define strnum_value str_value'
+'#define regex_value str_value'
'#define num_value u.d'
'#define array_cookie u.a'
'#define scalar_cookie u.scl'
@@ -23107,15 +23699,35 @@ use them.
This is also discussed in a general fashion in the text following
this list, and in more detail in *note Cached values::.
- Scalar values in 'awk' are either numbers or strings. The
-'awk_value_t' struct represents values. The 'val_type' member indicates
-what is in the 'union'.
+ Scalar values in 'awk' are numbers, strings, strnums, or typed
+regexps. The 'awk_value_t' struct represents values. The 'val_type'
+member indicates what is in the 'union'.
Representing numbers is easy--the API uses a C 'double'. Strings
require more work. Because 'gawk' allows embedded NUL bytes in string
values, a string must be represented as a pair containing a data pointer
and length. This is the 'awk_string_t' type.
+ A strnum (numeric string) value is represented as a string and
+consists of user input data that appears to be numeric. When an
+extension creates a strnum value, the result is a string flagged as user
+input. Subsequent parsing by 'gawk' then determines whether it looks
+like a number and should be treated as a strnum, or as a regular string.
+
+ This is useful in cases where an extension function would like to do
+something comparable to the 'split()' function which sets the strnum
+attribute on the array elements it creates. For example, an extension
+that implements CSV splitting would want to use this feature. This is
+also useful for a function that retrieves a data item from a database.
+The PostgreSQL 'PQgetvalue()' function, for example, returns a string
+that may be numeric or textual depending on the contents.
+
+ Typed regexp values (*note Strong Regexp Constants::) are not of much
+use to extension functions. Extension functions can tell that they've
+received them, and create them for scalar values. Otherwise, they can
+examine the text of the regexp through 'regex_value.str' and
+'regex_value.len'.
+
Identifiers (i.e., the names of global variables) can be associated
with either scalar values or with arrays. In addition, 'gawk' provides
true arrays of arrays, where any given array element can itself be an
@@ -23275,6 +23887,31 @@ code would use them:
This function simply creates a numeric value in the 'awk_value_t'
variable pointed to by 'result'.
+'static inline awk_value_t *'
+'make_const_user_input(const char *string, size_t length, awk_value_t *result);'
+ This function is identical to 'make_const_string()', but the string
+ is flagged as user input that should be treated as a strnum value
+ if the contents of the string are numeric.
+
+'static inline awk_value_t *'
+'make_malloced_user_input(const char *string, size_t length, awk_value_t *result);'
+ This function is identical to 'make_malloced_string()', but the
+ string is flagged as user input that should be treated as a strnum
+ value if the contents of the string are numeric.
+
+'static inline awk_value_t *'
+'make_const_regex(const char *string, size_t length, awk_value_t *result);'
+ This function creates a strongly typed regexp value by allocating a
+ copy of the string. 'string' is the regular expression of length
+ 'len'.
+
+'static inline awk_value_t *'
+'make_malloced_regex(const char *string, size_t length, awk_value_t *result);'
+ This function creates a strongly typed regexp value. 'string' is
+ the regular expression of length 'len'. It expects 'string' to be
+ a 'char *' value pointing to data previously obtained from
+ 'gawk_malloc()', 'gawk_calloc()', or 'gawk_realloc()'.
+

File: gawk.info, Node: Registration Functions, Next: Printing Messages, Prev: Constructor Functions, Up: Extension API Description
@@ -23303,8 +23940,13 @@ Extension functions are described by the following record:
typedef struct awk_ext_func {
const char *name;
- awk_value_t *(*function)(int num_actual_args, awk_value_t *result);
- size_t num_expected_args;
+ awk_value_t *(*const function)(int num_actual_args,
+ awk_value_t *result,
+ struct awk_ext_func *finfo);
+ const size_t max_expected_args;
+ const size_t min_required_args;
+ awk_bool_t suppress_lint;
+ void *data; /* opaque pointer to any extra state */
} awk_ext_func_t;
The fields are:
@@ -23318,34 +23960,89 @@ Extension functions are described by the following record:
which may be followed by any number of letters, digits, and
underscores. Letter case in function names is significant.
-'awk_value_t *(*function)(int num_actual_args, awk_value_t *result);'
+'awk_value_t *(*const function)(int num_actual_args,'
+' awk_value_t *result,'
+' struct awk_ext_func *finfo);'
This is a pointer to the C function that provides the extension's
functionality. The function must fill in '*result' with either a
- number or a string. 'gawk' takes ownership of any string memory.
- As mentioned earlier, string memory _must_ come from one of
- 'gawk_malloc()', 'gawk_calloc()', or 'gawk_realloc()'.
+ number, a string, or a regexp. 'gawk' takes ownership of any
+ string memory. As mentioned earlier, string memory _must_ come
+ from one of 'gawk_malloc()', 'gawk_calloc()', or 'gawk_realloc()'.
The 'num_actual_args' argument tells the C function how many actual
parameters were passed from the calling 'awk' code.
+ The 'finfo' parameter is a pointer to the 'awk_ext_func_t' for this
+ function. The called function may access data within it as
+ desired, or not.
+
The function must return the value of 'result'. This is for the
convenience of the calling code inside 'gawk'.
-'size_t num_expected_args;'
- This is the number of arguments the function expects to receive.
- Each extension function may decide what to do if the number of
- arguments isn't what it expected. As with real 'awk' functions, it
- is likely OK to ignore extra arguments.
+'const size_t max_expected_args;'
+ This is the maximum number of arguments the function expects to
+ receive. If called with more arguments than this, and if lint
+ checking has been enabled, then 'gawk' prints a warning message.
+ For more information, see the entry for 'suppress_lint', later in
+ this list.
+
+'const size_t min_required_args;'
+ This is the minimum number of arguments the function expects to
+ receive. If called with fewer arguments, 'gawk' prints a fatal
+ error message and exits.
+
+'awk_bool_t suppress_lint;'
+ This flag tells 'gawk' not to print a lint message if lint checking
+ has been enabled and if more arguments were supplied in the call
+ than expected. An extension function can tell if 'gawk' already
+ printed at least one such message by checking if 'num_actual_args >
+ finfo->max_expected_args'. If so, and the function does not want
+ more lint messages to be printed, it should set
+ 'finfo->suppress_lint' to 'awk_true'.
+
+'void *data;'
+ This is an opaque pointer to any data that an extension function
+ may wish to have available when called. Passing the
+ 'awk_ext_func_t' structure to the extension function, and having
+ this pointer available in it enable writing a single C or C++
+ function that implements multiple 'awk'-level extension functions.
Once you have a record representing your extension function, you
register it with 'gawk' using this API function:
-'awk_bool_t add_ext_func(const char *namespace, const awk_ext_func_t *func);'
+'awk_bool_t add_ext_func(const char *namespace, awk_ext_func_t *func);'
This function returns true upon success, false otherwise. The
'namespace' parameter is currently not used; you should pass in an
empty string ('""'). The 'func' pointer is the address of a
'struct' representing your function, as just described.
+ 'gawk' does not modify what 'func' points to, but the extension
+ function itself receives this pointer and can modify what it points
+ to, thus it is purposely not declared to be 'const'.
+
+ The combination of 'min_required_args', 'max_expected_args', and
+'suppress_lint' may be confusing. Here is how you should set things up.
+
+Any number of arguments is valid
+ Set 'min_required_args' and 'max_expected_args' to zero and set
+ 'suppress_lint' to 'awk_true'.
+
+A minimum number of arguments is required, no limit on maximum number of arguments
+ Set 'min_required_args' to the minimum required. Set
+ 'max_expected_args' to zero and set 'suppress_lint' to 'awk_true'.
+
+A minimum number of arguments is required, a maximum number is expected
+ Set 'min_required_args' to the minimum required. Set
+ 'max_expected_args' to the maximum expected. Set 'suppress_lint'
+ to 'awk_false'.
+
+A minimum number of arguments is required, and no more than a maximum is allowed
+ Set 'min_required_args' to the minimum required. Set
+ 'max_expected_args' to the maximum expected. Set 'suppress_lint'
+ to 'awk_false'. In your extension function, check that
+ 'num_actual_args' does not exceed 'f->max_expected_args'. If it
+ does, issue a fatal error message.
+

File: gawk.info, Node: Exit Callback Functions, Next: Extension Version String, Prev: Extension Functions, Up: Registration Functions
@@ -23788,6 +24485,9 @@ extension ID received from 'gawk' when the extension was loaded:(1)
'void fatal(awk_ext_id_t id, const char *format, ...);'
Print a message and then cause 'gawk' to exit immediately.
+'void nonfatal(awk_ext_id_t id, const char *format, ...);'
+ Print a nonfatal error message.
+
'void warning(awk_ext_id_t id, const char *format, ...);'
Print a warning message.
@@ -23844,18 +24544,17 @@ summarized in *note Table 16.1: table-value-types-returned.
Type of Actual Value
--------------------------------------------------------------------------
- String Number Array Undefined
-------------------------------------------------------------------------------
- String String String False False
- Number Number if Number False False
- can be
- converted,
- else false
-Type Array False False Array False
-Requested Scalar Scalar Scalar False False
- Undefined String Number Array Undefined
- Value False False False False
- cookie
+ String Strnum Number Regex Array Undefined
+-------------------------------------------------------------------------------
+ String String String String String false false
+ Strnum false Strnum Strnum false false false
+ Number Number Number Number false false false
+Type Regex false false false Regex false false
+Requested Array false false false false Array false
+ Scalar Scalar Scalar Scalar Scalar false false
+ Undefined String Strnum Number Regex Array Undefined
+ Value false false false false false false
+ cookie
Table 16.1: API value types returned
@@ -23936,11 +24635,6 @@ termed a "symbol table". The functions are as follows:
However, with the exception of the 'PROCINFO' array, an extension cannot
change any of those variables.
- CAUTION: It is possible for the lookup of 'PROCINFO' to fail. This
- happens if the 'awk' program being run does not reference
- 'PROCINFO'; in this case, 'gawk' doesn't bother to create the array
- and populate it.
-

File: gawk.info, Node: Symbol table by cookie, Next: Cached values, Prev: Symbol table by name, Up: Symbol Table Access
@@ -23964,8 +24658,9 @@ was discussed earlier, in *note General Data Types::.
'awk_bool_t sym_update_scalar(awk_scalar_t cookie, awk_value_t *value);'
Update the value associated with a scalar cookie. Return false if
- the new value is not of type 'AWK_STRING' or 'AWK_NUMBER'. Here
- too, the predefined variables may not be updated.
+ the new value is not of type 'AWK_STRING', 'AWK_STRNUM',
+ 'AWK_REGEX', or 'AWK_NUMBER'. Here too, the predefined variables
+ may not be updated.
It is not obvious at first glance how to work with scalar cookies or
what their raison d'e^tre really is. In theory, the 'sym_lookup()' and
@@ -24079,10 +24774,10 @@ follows:
'awk_bool_t create_value(awk_value_t *value, awk_value_cookie_t *result);'
Create a cached string or numeric value from 'value' for efficient
- later assignment. Only values of type 'AWK_NUMBER' and
- 'AWK_STRING' are allowed. Any other type is rejected.
- 'AWK_UNDEFINED' could be allowed, but doing so would result in
- inferior performance.
+ later assignment. Only values of type 'AWK_NUMBER', 'AWK_REGEX',
+ 'AWK_STRNUM', and 'AWK_STRING' are allowed. Any other type is
+ rejected. 'AWK_UNDEFINED' could be allowed, but doing so would
+ result in inferior performance.
'awk_bool_t release_value(awk_value_cookie_t vc);'
Release the memory associated with a value cookie obtained from
@@ -24152,7 +24847,7 @@ using 'release_value()'.
'double' to store.

-File: gawk.info, Node: Array Manipulation, Next: Extension API Variables, Prev: Symbol Table Access, Up: Extension API Description
+File: gawk.info, Node: Array Manipulation, Next: Redirection API, Prev: Symbol Table Access, Up: Extension API Description
16.4.11 Array Manipulation
--------------------------
@@ -24309,12 +25004,21 @@ The following functions relate to individual array elements:
array, but after calling this function, it has no elements. This
is equivalent to using the 'delete' statement (*note Delete::).
+'awk_bool_t flatten_array_typed(awk_array_t a_cookie, awk_flat_array_t **data, awk_valtype_t index_type, awk_valtype_t value_type);'
+ For the array represented by 'a_cookie', create an
+ 'awk_flat_array_t' structure and fill it in with indices and values
+ of the requested types. Set the pointer whose address is passed as
+ 'data' to point to this structure. Return true upon success, or
+ false otherwise. *Note Flattening Arrays::, for a discussion of
+ how to flatten an array and work with it.
+
'awk_bool_t flatten_array(awk_array_t a_cookie, awk_flat_array_t **data);'
For the array represented by 'a_cookie', create an
- 'awk_flat_array_t' structure and fill it in. Set the pointer whose
- address is passed as 'data' to point to this structure. Return
- true upon success, or false otherwise. *Note Flattening Arrays::,
- for a discussion of how to flatten an array and work with it.
+ 'awk_flat_array_t' structure and fill it in with 'AWK_STRING'
+ indices and 'AWK_UNDEFINED' values. This is superseded by
+ 'flatten_array_typed()'. It is provided as a macro, and remains
+ for convenience and for source code compatibility with the previous
+ version of the API.
'awk_bool_t release_flattened_array(awk_array_t a_cookie,'
' awk_flat_array_t *data);'
@@ -24416,7 +25120,7 @@ count of elements in the array and print it:
double-check that the count in the 'awk_flat_array_t' is the same as the
count just retrieved:
- if (! flatten_array(value2.array_cookie, & flat_array)) {
+ if (! flatten_array_typed(value2.array_cookie, & flat_array, AWK_STRING, AWK_UNDEFINED)) {
printf("dump_array_and_delete: could not flatten array\n");
goto out;
}
@@ -24636,9 +25340,78 @@ array:
environment variable.)

-File: gawk.info, Node: Extension API Variables, Next: Extension API Boilerplate, Prev: Array Manipulation, Up: Extension API Description
+File: gawk.info, Node: Redirection API, Next: Extension API Variables, Prev: Array Manipulation, Up: Extension API Description
+
+16.4.12 Accessing and Manipulating Redirections
+-----------------------------------------------
+
+The following function allows extensions to access and manipulate
+redirections.
+
+'awk_bool_t get_file(const char *name,'
+' size_t name_len,'
+' const char *filetype,'
+' int fd,'
+' const awk_input_buf_t **ibufp,'
+' const awk_output_buf_t **obufp);'
+ Look up file 'name' in 'gawk''s internal redirection table. If
+ 'name' is 'NULL' or 'name_len' is zero, return data for the
+ currently open input file corresponding to 'FILENAME'. (This does
+ not access the 'filetype' argument, so that may be undefined). If
+ the file is not already open, attempt to open it. The 'filetype'
+ argument must be zero-terminated and should be one of:
+
+ '">"'
+ A file opened for output.
+
+ '">>"'
+ A file opened for append.
+
+ '"<"'
+ A file opened for input.
+
+ '"|>"'
+ A pipe opened for output.
+
+ '"|<"'
+ A pipe opened for input.
+
+ '"|&"'
+ A two-way coprocess.
+
+ On error, return an 'awk_false' value. Otherwise, return
+ 'awk_true', and return additional information about the redirection
+ in the 'ibufp' and 'obufp' pointers. For input redirections, the
+ '*ibufp' value should be non-'NULL', and '*obufp' should be 'NULL'.
+ For output redirections, the '*obufp' value should be non-'NULL',
+ and '*ibufp' should be 'NULL'. For two-way coprocesses, both
+ values should be non-'NULL'.
+
+ In the usual case, the extension is interested in '(*ibufp)->fd'
+ and/or 'fileno((*obufp)->fp)'. If the file is not already open,
+ and the 'fd' argument is nonnegative, 'gawk' will use that file
+ descriptor instead of opening the file in the usual way. If 'fd'
+ is nonnegative, but the file exists already, 'gawk' ignores 'fd'
+ and returns the existing file. It is the caller's responsibility
+ to notice that neither the 'fd' in the returned 'awk_input_buf_t'
+ nor the 'fd' in the returned 'awk_output_buf_t' matches the
+ requested value.
+
+ Note that supplying a file descriptor is currently _not_ supported
+ for pipes. However, supplying a file descriptor should work for
+ input, output, append, and two-way (coprocess) sockets. If
+ 'filetype' is two-way, 'gawk' assumes that it is a socket! Note
+ that in the two-way case, the input and output file descriptors may
+ differ. To check for success, you must check whether either
+ matches.
+
+ It is anticipated that this API function will be used to implement
+I/O multiplexing and a socket library.
+
+
+File: gawk.info, Node: Extension API Variables, Next: Extension API Boilerplate, Prev: Redirection API, Up: Extension API Description
-16.4.12 API Variables
+16.4.13 API Variables
---------------------
The API provides two sets of variables. The first provides information
@@ -24655,7 +25428,7 @@ information about how 'gawk' was invoked.

File: gawk.info, Node: Extension Versioning, Next: Extension API Informational Variables, Up: Extension API Variables
-16.4.12.1 API Version Constants and Variables
+16.4.13.1 API Version Constants and Variables
.............................................
The API provides both a "major" and a "minor" version number. The API
@@ -24663,10 +25436,10 @@ versions are available at compile time as C preprocessor defines to
support conditional compilation, and as enum constants to facilitate
debugging:
-API Version C preprocessor define enum constant
----------------------------------------------------------------------------
-Major gawk_api_major_version GAWK_API_MAJOR_VERSION
-Minor gawk_api_minor_version GAWK_API_MINOR_VERSION
+API Version C Preprocessor Define enum constant
+--------------------------------------------------------------------
+Major 'gawk_api_major_version' 'GAWK_API_MAJOR_VERSION'
+Minor 'gawk_api_minor_version' 'GAWK_API_MINOR_VERSION'
Table 16.2: gawk API version constants
@@ -24683,10 +25456,10 @@ For this reason, the major and minor API versions of the running 'gawk'
are included in the API 'struct' as read-only constant integers:
'api->major_version'
- The major version of the running 'gawk'
+ The major version of the running 'gawk'.
'api->minor_version'
- The minor version of the running 'gawk'
+ The minor version of the running 'gawk'.
It is up to the extension to decide if there are API
incompatibilities. Typically, a check like this is enough:
@@ -24707,7 +25480,7 @@ Boilerplate::).

File: gawk.info, Node: Extension API Informational Variables, Prev: Extension Versioning, Up: Extension API Variables
-16.4.12.2 Informational Variables
+16.4.13.2 Informational Variables
.................................
The API provides access to several variables that describe whether the
@@ -24740,9 +25513,9 @@ predefined variable (*note Built-in Variables::). The others should not
change during execution.

-File: gawk.info, Node: Extension API Boilerplate, Prev: Extension API Variables, Up: Extension API Description
+File: gawk.info, Node: Extension API Boilerplate, Next: Changes from API V1, Prev: Extension API Variables, Up: Extension API Description
-16.4.13 Boilerplate Code
+16.4.14 Boilerplate Code
------------------------
As mentioned earlier (*note Extension Mechanism Outline::), the function
@@ -24760,7 +25533,7 @@ the 'gawkapi.h' header file:
static const char *ext_version = NULL; /* or ... = "some string" */
static awk_ext_func_t func_table[] = {
- { "name", do_name, 1 },
+ { "name", do_name, 1, 0, awk_false, NULL },
/* ... */
};
@@ -24841,6 +25614,22 @@ does the following:
'gawk'.

+File: gawk.info, Node: Changes from API V1, Prev: Extension API Boilerplate, Up: Extension API Description
+
+16.4.15 Changes From Version 1 of the API
+-----------------------------------------
+
+The current API is _not_ binary compatible with version 1 of the API.
+You will have to recompile your extensions in order to use them with the
+current version of 'gawk'.
+
+ Fortunately, at the possible expense of some compile-time warnings,
+the API remains source-code-compatible with the previous API. The major
+differences are the additional members in the 'awk_ext_func_t'
+structure, and the addition of the third argument to the C
+implementation function.
+
+
File: gawk.info, Node: Finding Extensions, Next: Extension Example, Prev: Extension API Description, Up: Dynamic Extensions
16.5 How 'gawk' Finds Extensions
@@ -25057,24 +25846,20 @@ is a pointer to an 'awk_value_t' structure, usually named 'result':
/* do_chdir --- provide dynamically loaded chdir() function for gawk */
static awk_value_t *
- do_chdir(int nargs, awk_value_t *result)
+ do_chdir(int nargs, awk_value_t *result, struct awk_ext_func *unused)
{
awk_value_t newdir;
int ret = -1;
assert(result != NULL);
- if (do_lint && nargs != 1)
- lintwarn(ext_id,
- _("chdir: called with incorrect number of arguments, "
- "expecting 1"));
-
The 'newdir' variable represents the new directory to change to,
which is retrieved with 'get_argument()'. Note that the first argument
is numbered zero.
If the argument is retrieved successfully, the function calls the
-'chdir()' system call. If the 'chdir()' fails, 'ERRNO' is updated:
+'chdir()' system call. Otherwise, if the 'chdir()' fails, it updates
+'ERRNO':
if (get_argument(0, AWK_STRING, & newdir)) {
ret = chdir(newdir.str_value.str);
@@ -25261,7 +26046,7 @@ declarations and argument checking:
/* do_stat --- provide a stat() function for gawk */
static awk_value_t *
- do_stat(int nargs, awk_value_t *result)
+ do_stat(int nargs, awk_value_t *result, struct awk_ext_func *unused)
{
awk_value_t file_param, array_param;
char *name;
@@ -25273,13 +26058,6 @@ declarations and argument checking:
assert(result != NULL);
- if (nargs != 2 && nargs != 3) {
- if (do_lint)
- lintwarn(ext_id,
- _("stat: called with wrong number of arguments"));
- return make_number(-1, result);
- }
-
Then comes the actual work. First, the function gets the arguments.
Next, it gets the information for the file. If the called function
('lstat()' or 'stat()') returns an error, the code sets 'ERRNO' and
@@ -25336,11 +26114,9 @@ there is an initialization function:
for loading each function into 'gawk':
static awk_ext_func_t func_table[] = {
- { "chdir", do_chdir, 1 },
- { "stat", do_stat, 2 },
- #ifndef __MINGW32__
- { "fts", do_fts, 3 },
- #endif
+ { "chdir", do_chdir, 1, 1, awk_false, NULL },
+ { "stat", do_stat, 3, 2, awk_false, NULL },
+ ...
};
Each extension must have a routine named 'dl_load()' to load
@@ -26070,15 +26846,17 @@ project.
* GD graphics library extension
+ * MPFR library extension (this provides access to a number of MPFR
+ functions that 'gawk''s native MPFR support does not)
+
* PDF extension
* PostgreSQL extension
- * MPFR library extension (this provides access to a number of MPFR
- functions that 'gawk''s native MPFR support does not)
-
* Redis extension
+ * Select extension
+
* XML parser extension, using the Expat
(http://expat.sourceforge.net) XML parsing library
@@ -26156,7 +26934,7 @@ File: gawk.info, Node: Extension summary, Next: Extension Exercises, Prev: ga
exit callbacks, a version string, input parsers, output
wrappers, and two-way processors)
- * Printing fatal, warning, and "lint" warning messages
+ * Printing fatal, nonfatal, warning, and "lint" warning messages
* Updating 'ERRNO', or unsetting it
@@ -26491,6 +27269,9 @@ current version of 'gawk'.
- Directories on the command line produce a warning and are
skipped (*note Command-line directories::)
+ - Output with 'print' and 'printf' need not be fatal (*note
+ Nonfatal::)
+
* New keywords:
- The 'BEGINFILE' and 'ENDFILE' special patterns (*note
@@ -26540,6 +27321,9 @@ current version of 'gawk'.
- The 'bindtextdomain()', 'dcgettext()', and 'dcngettext()'
functions for internationalization (*note Programmer i18n::)
+ - The 'intdiv()' function for doing integer division and
+ remainder (*note Numeric Functions::)
+
* Changes and/or additions in the command-line options:
- The 'AWKPATH' environment variable for specifying a path
@@ -26550,16 +27334,16 @@ current version of 'gawk'.
- The '-b', '-c', '-C', '-d', '-D', '-e', '-E', '-g', '-h',
'-i', '-l', '-L', '-M', '-n', '-N', '-o', '-O', '-p', '-P',
- '-r', '-S', '-t', and '-V' short options. Also, the ability
- to use GNU-style long-named options that start with '--', and
- the '--assign', '--bignum', '--characters-as-bytes',
+ '-r', '-s', '-S', '-t', and '-V' short options. Also, the
+ ability to use GNU-style long-named options that start with
+ '--', and the '--assign', '--bignum', '--characters-as-bytes',
'--copyright', '--debug', '--dump-variables', '--exec',
'--field-separator', '--file', '--gen-pot', '--help',
'--include', '--lint', '--lint-old', '--load',
- '--non-decimal-data', '--optimize', '--posix',
- '--pretty-print', '--profile', '--re-interval', '--sandbox',
- '--source', '--traditional', '--use-lc-numeric', and
- '--version' long options (*note Options::).
+ '--non-decimal-data', '--optimize', '--no-optimize',
+ '--posix', '--pretty-print', '--profile', '--re-interval',
+ '--sandbox', '--source', '--traditional', '--use-lc-numeric',
+ and '--version' long options (*note Options::).
* Support for the following obsolete systems was removed from the
code and the documentation for 'gawk' version 4.0:
@@ -26593,6 +27377,13 @@ current version of 'gawk'.
- Ultrix
+ * Support for the following systems was removed from the code for
+ 'gawk' version 4.2:
+
+ - MirBSD
+
+ - GNU/Linux on Alpha
+

File: gawk.info, Node: Feature History, Next: Common Extensions, Prev: POSIX/GNU, Up: Language History
@@ -26984,6 +27775,34 @@ POSIX 'awk', in the order they were added to 'gawk'.
* Support for Ultrix was removed.
+ Version 4.2 introduced the following changes:
+
+ * Changes to 'ENVIRON' are reflected into 'gawk''s environment and
+ that of programs that it runs. *Note Auto-set::.
+
+ * The '--pretty-print' option no longer runs the 'awk' program too.
+ *Note Options::.
+
+ * The 'igawk' program and its manual page are no longer installed
+ when 'gawk' is built. *Note Igawk Program::.
+
+ * The 'intdiv()' function. *Note Numeric Functions::.
+
+ * The maximum number of hexadecimal digits in '\x' escapes is now
+ two. *Note Escape Sequences::.
+
+ * Nonfatal output with 'print' and 'printf'. *Note Nonfatal::.
+
+ * For many years, POSIX specified that default field splitting only
+ allowed spaces and tabs to separate fields, and this was how 'gawk'
+ behaved with '--posix'. As of 2013, the standard restored
+ historical behavior, and now default field splitting with '--posix'
+ also allows newlines to separate fields.
+
+ * Support for MirBSD was removed.
+
+ * Support for GNU/Linux on Alpha was removed.
+

File: gawk.info, Node: Common Extensions, Next: Ranges and Locales, Prev: Feature History, Up: Language History
@@ -27098,7 +27917,7 @@ ranges, such that outside the '"C"' and '"POSIX"' locales, the meaning
of range expressions was _undefined_.(3)
By using this lovely technical term, the standard gives license to
-implementors to implement ranges in whatever way they choose. The
+implementers to implement ranges in whatever way they choose. The
'gawk' maintainer chose to apply the pre-POSIX meaning both with the
default regexp matching and when '--traditional' or '--posix' are used.
In all cases 'gawk' remains POSIX-compliant.
@@ -27412,6 +28231,12 @@ to different non-Unix operating systems:
Various '.c', '.y', and '.h' files
These files contain the actual 'gawk' source code.
+'support/*'
+ C header and source files for routines that 'gawk' uses, but that
+ are not part of its core functionality. For example, argument
+ parsing, regular expression matching, and random number generating
+ routines are all kept here.
+
'ABOUT-NLS'
A file containing information about GNU 'gettext' and translations.
@@ -27502,7 +28327,9 @@ Various '.c', '.y', and '.h' files
'doc/igawk.1'
The 'troff' source for a manual page describing the 'igawk' program
- presented in *note Igawk Program::.
+ presented in *note Igawk Program::. (Since 'gawk' can do its own
+ '@include' processing, neither 'igawk' nor 'igawk.1' are
+ installed.)
'doc/Makefile.in'
The input file used during the configuration process to generate
@@ -27544,17 +28371,22 @@ Various '.c', '.y', and '.h' files
contains a 'Makefile.in' file, which 'configure' uses to generate a
'Makefile'. 'Makefile.am' is used by GNU Automake to create
'Makefile.in'. The library functions from *note Library
- Functions::, and the 'igawk' program from *note Igawk Program:: are
- included as ready-to-use files in the 'gawk' distribution. They
- are installed as part of the installation process. The rest of the
- programs in this Info file are available in appropriate
- subdirectories of 'awklib/eg'.
+ Functions::, are included as ready-to-use files in the 'gawk'
+ distribution. They are installed as part of the installation
+ process. The rest of the programs in this Info file are available
+ in appropriate subdirectories of 'awklib/eg'.
'extension/*'
The source code, manual pages, and infrastructure files for the
sample extensions included with 'gawk'. *Note Dynamic
Extensions::, for more information.
+'extras/*'
+ Additional non-essential files. Currently, this directory contains
+ some shell startup files to be installed in '/etc/profile.d' to aid
+ in manipulating the 'AWKPATH' and 'AWKLIBPATH' environment
+ variables. *Note Shell Startup Files::, for more information.
+
'posix/*'
Files needed for building 'gawk' on POSIX-compliant systems.
@@ -27585,11 +28417,12 @@ for your system yourself.
* Menu:
* Quick Installation:: Compiling 'gawk' under Unix.
+* Shell Startup Files:: Shell convenience functions.
* Additional Configuration Options:: Other compile-time options.
* Configuration Philosophy:: How it's all supposed to work.

-File: gawk.info, Node: Quick Installation, Next: Additional Configuration Options, Up: Unix Installation
+File: gawk.info, Node: Quick Installation, Next: Shell Startup Files, Up: Unix Installation
B.2.1 Compiling 'gawk' for Unix-Like Systems
--------------------------------------------
@@ -27644,9 +28477,42 @@ will be asked for your password, and you will have to have been set up
previously as a user who is allowed to run the 'sudo' command.

-File: gawk.info, Node: Additional Configuration Options, Next: Configuration Philosophy, Prev: Quick Installation, Up: Unix Installation
+File: gawk.info, Node: Shell Startup Files, Next: Additional Configuration Options, Prev: Quick Installation, Up: Unix Installation
+
+B.2.2 Shell Startup Files
+-------------------------
+
+The distribution contains shell startup files 'gawk.sh' and 'gawk.csh',
+containing functions to aid in manipulating the 'AWKPATH' and
+'AWKLIBPATH' environment variables. On a Fedora GNU/Linux system, these
+files should be installed in '/etc/profile.d'; on other platforms, the
+appropriate location may be different.
+
+'gawkpath_default'
+ Reset the 'AWKPATH' environment variable to its default value.
+
+'gawkpath_prepend'
+ Add the argument to the front of the 'AWKPATH' environment
+ variable.
+
+'gawkpath_append'
+ Add the argument to the end of the 'AWKPATH' environment variable.
+
+'gawklibpath_default'
+ Reset the 'AWKLIBPATH' environment variable to its default value.
+
+'gawklibpath_prepend'
+ Add the argument to the front of the 'AWKLIBPATH' environment
+ variable.
-B.2.2 Additional Configuration Options
+'gawklibpath_append'
+ Add the argument to the end of the 'AWKLIBPATH' environment
+ variable.
+
+
+File: gawk.info, Node: Additional Configuration Options, Next: Configuration Philosophy, Prev: Shell Startup Files, Up: Unix Installation
+
+B.2.3 Additional Configuration Options
--------------------------------------
There are several additional options you may use on the 'configure'
@@ -27690,7 +28556,7 @@ supplied by 'configure'.

File: gawk.info, Node: Configuration Philosophy, Prev: Additional Configuration Options, Up: Unix Installation
-B.2.3 The Configuration Process
+B.2.4 The Configuration Process
-------------------------------
This minor node is of interest only if you know something about using
@@ -31577,20 +32443,21 @@ Index
* --include option: Options. (line 159)
* --lint option: Command Line. (line 20)
* --lint option <1>: Options. (line 184)
-* --lint-old option: Options. (line 294)
+* --lint-old option: Options. (line 299)
* --load option: Options. (line 172)
+* --no-optimize option: Options. (line 285)
* --non-decimal-data option: Options. (line 209)
* --non-decimal-data option <1>: Nondecimal Data. (line 6)
* --non-decimal-data option, strtonum() function and: Nondecimal Data.
(line 35)
-* --optimize option: Options. (line 236)
-* --posix option: Options. (line 254)
+* --optimize option: Options. (line 234)
+* --posix option: Options. (line 257)
* --posix option, --traditional option and: Options. (line 272)
* --pretty-print option: Options. (line 223)
-* --profile option: Options. (line 242)
+* --profile option: Options. (line 245)
* --profile option <1>: Profiling. (line 12)
* --re-interval option: Options. (line 278)
-* --sandbox option: Options. (line 285)
+* --sandbox option: Options. (line 290)
* --sandbox option, disabling system() function: I/O Functions.
(line 129)
* --sandbox option, input redirection with getline: Getline. (line 19)
@@ -31600,7 +32467,7 @@ Index
* --traditional option: Options. (line 82)
* --traditional option, --posix option and: Options. (line 272)
* --use-lc-numeric option: Options. (line 218)
-* --version option: Options. (line 299)
+* --version option: Options. (line 304)
* --with-whiny-user-strftime configuration option: Additional Configuration Options.
(line 37)
* -b option: Options. (line 69)
@@ -31610,31 +32477,32 @@ Index
* -D option: Options. (line 108)
* -e option: Options. (line 117)
* -E option: Options. (line 125)
-* -e option <1>: Options. (line 335)
+* -e option <1>: Options. (line 340)
* -f option: Long. (line 12)
* -F option: Options. (line 21)
* -f option <1>: Options. (line 25)
-* -F option, -Ft sets FS to TAB: Options. (line 307)
+* -F option, -Ft sets FS to TAB: Options. (line 312)
* -F option, command-line: Command Line Field Separator.
(line 6)
-* -f option, multiple uses: Options. (line 312)
+* -f option, multiple uses: Options. (line 317)
* -g option: Options. (line 147)
* -h option: Options. (line 154)
* -i option: Options. (line 159)
* -l option: Options. (line 172)
* -l option <1>: Options. (line 184)
-* -L option: Options. (line 294)
+* -L option: Options. (line 299)
* -M option: Options. (line 203)
* -n option: Options. (line 209)
* -N option: Options. (line 218)
* -o option: Options. (line 223)
-* -O option: Options. (line 236)
-* -p option: Options. (line 242)
-* -P option: Options. (line 254)
+* -O option: Options. (line 234)
+* -p option: Options. (line 245)
+* -P option: Options. (line 257)
* -r option: Options. (line 278)
-* -S option: Options. (line 285)
+* -s option: Options. (line 285)
+* -S option: Options. (line 290)
* -v option: Options. (line 32)
-* -V option: Options. (line 299)
+* -V option: Options. (line 304)
* -v option <1>: Assignment Options. (line 12)
* -W option: Options. (line 47)
* . (period), regexp operator: Regexp Operators. (line 44)
@@ -31707,16 +32575,16 @@ Index
(line 6)
* \ (backslash), in bracket expressions: Bracket Expressions. (line 25)
* \ (backslash), in escape sequences: Escape Sequences. (line 6)
-* \ (backslash), in escape sequences <1>: Escape Sequences. (line 100)
+* \ (backslash), in escape sequences <1>: Escape Sequences. (line 103)
* \ (backslash), in escape sequences, POSIX and: Escape Sequences.
- (line 105)
+ (line 108)
* \ (backslash), in regexp constants: Computed Regexps. (line 30)
* \ (backslash), in shell commands: Quoting. (line 48)
* \ (backslash), regexp operator: Regexp Operators. (line 18)
-* \ (backslash), \" escape sequence: Escape Sequences. (line 82)
+* \ (backslash), \" escape sequence: Escape Sequences. (line 85)
* \ (backslash), \' operator (gawk): GNU Regexp Operators.
(line 59)
-* \ (backslash), \/ escape sequence: Escape Sequences. (line 73)
+* \ (backslash), \/ escape sequence: Escape Sequences. (line 76)
* \ (backslash), \< operator (gawk): GNU Regexp Operators.
(line 33)
* \ (backslash), \> operator (gawk): GNU Regexp Operators.
@@ -31893,7 +32761,7 @@ Index
* arrays, unassigned elements: Reference to Elements.
(line 18)
* artificial intelligence, gawk and: Distribution contents.
- (line 52)
+ (line 58)
* ASCII: Ordinal Functions. (line 45)
* ASCII <1>: Glossary. (line 196)
* asort: String Functions. (line 42)
@@ -31933,7 +32801,7 @@ Index
* awf (amazingly workable formatter) program: Glossary. (line 24)
* awk debugging, enabling: Options. (line 108)
* awk language, POSIX version: Assignment Ops. (line 138)
-* awk profiling, enabling: Options. (line 242)
+* awk profiling, enabling: Options. (line 245)
* awk programs: Getting Started. (line 12)
* awk programs <1>: Executable Scripts. (line 6)
* awk programs <2>: Two Rules. (line 6)
@@ -32004,16 +32872,16 @@ Index
(line 6)
* backslash (\), in bracket expressions: Bracket Expressions. (line 25)
* backslash (\), in escape sequences: Escape Sequences. (line 6)
-* backslash (\), in escape sequences <1>: Escape Sequences. (line 100)
+* backslash (\), in escape sequences <1>: Escape Sequences. (line 103)
* backslash (\), in escape sequences, POSIX and: Escape Sequences.
- (line 105)
+ (line 108)
* backslash (\), in regexp constants: Computed Regexps. (line 30)
* backslash (\), in shell commands: Quoting. (line 48)
* backslash (\), regexp operator: Regexp Operators. (line 18)
-* backslash (\), \" escape sequence: Escape Sequences. (line 82)
+* backslash (\), \" escape sequence: Escape Sequences. (line 85)
* backslash (\), \' operator (gawk): GNU Regexp Operators.
(line 59)
-* backslash (\), \/ escape sequence: Escape Sequences. (line 73)
+* backslash (\), \/ escape sequence: Escape Sequences. (line 76)
* backslash (\), \< operator (gawk): GNU Regexp Operators.
(line 33)
* backslash (\), \> operator (gawk): GNU Regexp Operators.
@@ -32083,7 +32951,7 @@ Index
* BINMODE variable: User-modified. (line 15)
* BINMODE variable <1>: PC Using. (line 16)
* bit-manipulation functions: Bitwise Functions. (line 6)
-* bits2str() user-defined function: Bitwise Functions. (line 72)
+* bits2str() user-defined function: Bitwise Functions. (line 69)
* bitwise AND: Bitwise Functions. (line 40)
* bitwise complement: Bitwise Functions. (line 44)
* bitwise OR: Bitwise Functions. (line 50)
@@ -32132,7 +33000,7 @@ Index
* Brennan, Michael <5>: Other Versions. (line 6)
* Brennan, Michael <6>: Other Versions. (line 48)
* Brian Kernighan's awk: When. (line 21)
-* Brian Kernighan's awk <1>: Escape Sequences. (line 109)
+* Brian Kernighan's awk <1>: Escape Sequences. (line 112)
* Brian Kernighan's awk <2>: GNU Regexp Operators.
(line 85)
* Brian Kernighan's awk <3>: Regexp Field Splitting.
@@ -32320,11 +33188,11 @@ Index
* convert string to upper case: String Functions. (line 530)
* converting integer array subscripts: Numeric Array Subscripts.
(line 31)
-* converting, dates to timestamps: Time Functions. (line 76)
+* converting, dates to timestamps: Time Functions. (line 78)
* converting, numbers to strings: Strings And Numbers. (line 6)
-* converting, numbers to strings <1>: Bitwise Functions. (line 111)
+* converting, numbers to strings <1>: Bitwise Functions. (line 108)
* converting, strings to numbers: Strings And Numbers. (line 6)
-* converting, strings to numbers <1>: Bitwise Functions. (line 111)
+* converting, strings to numbers <1>: Bitwise Functions. (line 108)
* CONVFMT variable: Strings And Numbers. (line 29)
* CONVFMT variable <1>: User-modified. (line 30)
* CONVFMT variable, and array subscripts: Numeric Array Subscripts.
@@ -32339,11 +33207,11 @@ Index
* cosine: Numeric Functions. (line 16)
* counting: Wc Program. (line 6)
* csh utility: Statements/Lines. (line 43)
-* csh utility, POSIXLY_CORRECT environment variable: Options. (line 353)
+* csh utility, POSIXLY_CORRECT environment variable: Options. (line 358)
* csh utility, |& operator, comparison with: Two-way I/O. (line 27)
* ctime() user-defined function: Function Example. (line 74)
* currency symbols, localization: Explaining gettext. (line 104)
-* current system time: Time Functions. (line 66)
+* current system time: Time Functions. (line 68)
* custom.h file: Configuration Philosophy.
(line 30)
* customized input parser: Input Parsers. (line 6)
@@ -32369,12 +33237,12 @@ Index
* dark corner, CONVFMT variable: Strings And Numbers. (line 39)
* dark corner, escape sequences: Other Arguments. (line 38)
* dark corner, escape sequences, for metacharacters: Escape Sequences.
- (line 141)
+ (line 144)
* dark corner, exit statement: Exit Statement. (line 30)
* dark corner, field separators: Full Line Fields. (line 22)
* dark corner, FILENAME variable: Getline Notes. (line 19)
-* dark corner, FILENAME variable <1>: Auto-set. (line 89)
-* dark corner, FNR/NR variables: Auto-set. (line 326)
+* dark corner, FILENAME variable <1>: Auto-set. (line 108)
+* dark corner, FNR/NR variables: Auto-set. (line 357)
* dark corner, format-control characters: Control Letters. (line 18)
* dark corner, format-control characters <1>: Control Letters.
(line 93)
@@ -32390,11 +33258,11 @@ Index
* dark corner, OFMT variable: OFMT. (line 27)
* dark corner, regexp as second argument to index(): String Functions.
(line 164)
-* dark corner, regexp constants: Using Constant Regexps.
+* dark corner, regexp constants: Standard Regexp Constants.
(line 6)
* dark corner, regexp constants, /= operator and: Assignment Ops.
(line 149)
-* dark corner, regexp constants, as arguments to user-defined functions: Using Constant Regexps.
+* dark corner, regexp constants, as arguments to user-defined functions: Standard Regexp Constants.
(line 43)
* dark corner, split() function: String Functions. (line 361)
* dark corner, strings, storing: gawk split records. (line 82)
@@ -32406,8 +33274,8 @@ Index
* database, group, reading: Group Functions. (line 6)
* database, users, reading: Passwd Functions. (line 6)
* date utility, GNU: Time Functions. (line 17)
-* date utility, POSIX: Time Functions. (line 253)
-* dates, converting to timestamps: Time Functions. (line 76)
+* date utility, POSIX: Time Functions. (line 255)
+* dates, converting to timestamps: Time Functions. (line 78)
* dates, information related to, localization: Explaining gettext.
(line 112)
* Davies, Stephen: Acknowledgments. (line 60)
@@ -32569,12 +33437,12 @@ Index
(line 132)
* differences in awk and gawk, command-line directories: Command-line directories.
(line 6)
-* differences in awk and gawk, ERRNO variable: Auto-set. (line 73)
+* differences in awk and gawk, ERRNO variable: Auto-set. (line 87)
* differences in awk and gawk, error messages: Special FD. (line 19)
* differences in awk and gawk, FIELDWIDTHS variable: User-modified.
(line 37)
* differences in awk and gawk, FPAT variable: User-modified. (line 43)
-* differences in awk and gawk, FUNCTAB variable: Auto-set. (line 115)
+* differences in awk and gawk, FUNCTAB variable: Auto-set. (line 134)
* differences in awk and gawk, function arguments (gawk): Calling Built-in.
(line 16)
* differences in awk and gawk, getline command: Getline. (line 19)
@@ -32597,17 +33465,19 @@ Index
(line 262)
* differences in awk and gawk, print/printf statements: Format Modifiers.
(line 13)
-* differences in awk and gawk, PROCINFO array: Auto-set. (line 129)
+* differences in awk and gawk, PROCINFO array: Auto-set. (line 148)
* differences in awk and gawk, read timeouts: Read Timeout. (line 6)
* differences in awk and gawk, record separators: awk split records.
(line 124)
-* differences in awk and gawk, regexp constants: Using Constant Regexps.
+* differences in awk and gawk, regexp constants: Standard Regexp Constants.
(line 43)
* differences in awk and gawk, regular expressions: Case-sensitivity.
(line 26)
+* differences in awk and gawk, retrying input: Retrying Input.
+ (line 6)
* differences in awk and gawk, RS/RT variables: gawk split records.
(line 58)
-* differences in awk and gawk, RT variable: Auto-set. (line 264)
+* differences in awk and gawk, RT variable: Auto-set. (line 295)
* differences in awk and gawk, single-character fields: Single Character Fields.
(line 6)
* differences in awk and gawk, split() function: String Functions.
@@ -32615,7 +33485,7 @@ Index
* differences in awk and gawk, strings: Scalar Constants. (line 20)
* differences in awk and gawk, strings, storing: gawk split records.
(line 76)
-* differences in awk and gawk, SYMTAB variable: Auto-set. (line 268)
+* differences in awk and gawk, SYMTAB variable: Auto-set. (line 299)
* differences in awk and gawk, TEXTDOMAIN variable: User-modified.
(line 152)
* differences in awk and gawk, trunc-mod operation: Arithmetic Ops.
@@ -32651,12 +33521,12 @@ Index
* dump debugger command: Miscellaneous Debugger Commands.
(line 9)
* dupword.awk program: Dupword Program. (line 31)
-* dynamic profiling: Profiling. (line 178)
+* dynamic profiling: Profiling. (line 177)
* dynamically loaded extensions: Dynamic Extensions. (line 6)
* e debugger command (alias for enable): Breakpoint Control. (line 73)
* EBCDIC: Ordinal Functions. (line 45)
-* effective group ID of gawk user: Auto-set. (line 134)
-* effective user ID of gawk user: Auto-set. (line 138)
+* effective group ID of gawk user: Auto-set. (line 153)
+* effective user ID of gawk user: Auto-set. (line 161)
* egrep utility: Bracket Expressions. (line 34)
* egrep utility <1>: Egrep Program. (line 6)
* egrep.awk program: Egrep Program. (line 53)
@@ -32711,14 +33581,14 @@ Index
(line 11)
* equals sign (=), == operator <1>: Precedence. (line 64)
* EREs (Extended Regular Expressions): Bracket Expressions. (line 34)
-* ERRNO variable: Auto-set. (line 73)
+* ERRNO variable: Auto-set. (line 87)
* ERRNO variable <1>: TCP/IP Networking. (line 54)
* ERRNO variable, with BEGINFILE pattern: BEGINFILE/ENDFILE. (line 26)
* ERRNO variable, with close() function: Close Files And Pipes.
(line 140)
* ERRNO variable, with getline command: Getline. (line 19)
* error handling: Special FD. (line 19)
-* error handling, ERRNO variable and: Auto-set. (line 73)
+* error handling, ERRNO variable and: Auto-set. (line 87)
* error output: Special FD. (line 6)
* escape processing, gsub()/gensub()/sub() functions: Gory Details.
(line 6)
@@ -32759,7 +33629,7 @@ Index
(line 102)
* exp: Numeric Functions. (line 19)
* expand utility: Very Simple. (line 73)
-* Expat XML parser library: gawkextlib. (line 35)
+* Expat XML parser library: gawkextlib. (line 37)
* exponent: Numeric Functions. (line 19)
* expressions: Expressions. (line 6)
* expressions, as patterns: Expression Patterns. (line 6)
@@ -32778,7 +33648,7 @@ Index
(line 6)
* extension API version: Extension Versioning.
(line 6)
-* extension API, version number: Auto-set. (line 223)
+* extension API, version number: Auto-set. (line 246)
* extension example: Extension Example. (line 6)
* extension registration: Registration Functions.
(line 6)
@@ -32834,7 +33704,6 @@ Index
* field separators, choice of: Field Separators. (line 50)
* field separators, FIELDWIDTHS variable and: User-modified. (line 37)
* field separators, FPAT variable and: User-modified. (line 43)
-* field separators, POSIX and: Fields. (line 6)
* field separators, regular expressions as: Field Separators. (line 50)
* field separators, regular expressions as <1>: Regexp Field Splitting.
(line 6)
@@ -32862,7 +33731,7 @@ Index
* file names, in compatibility mode: Special Caveats. (line 9)
* file names, standard streams in gawk: Special FD. (line 48)
* FILENAME variable: Reading Files. (line 6)
-* FILENAME variable <1>: Auto-set. (line 89)
+* FILENAME variable <1>: Auto-set. (line 108)
* FILENAME variable, getline, setting with: Getline Notes. (line 19)
* filenames, assignments as: Ignoring Assigns. (line 6)
* files, .gmo: Explaining gettext. (line 42)
@@ -32930,8 +33799,8 @@ Index
* fnmatch() extension function: Extension Sample Fnmatch.
(line 12)
* FNR variable: Records. (line 6)
-* FNR variable <1>: Auto-set. (line 99)
-* FNR variable, changing: Auto-set. (line 326)
+* FNR variable <1>: Auto-set. (line 118)
+* FNR variable, changing: Auto-set. (line 357)
* for statement: For Statement. (line 6)
* for statement, looping over arrays: Scanning an Array. (line 20)
* fork() extension function: Extension Sample Fork.
@@ -32941,8 +33810,8 @@ Index
(line 57)
* format specifiers, printf statement: Control Letters. (line 6)
* format specifiers, strftime() function (gawk): Time Functions.
- (line 89)
-* format time string: Time Functions. (line 48)
+ (line 91)
+* format time string: Time Functions. (line 50)
* formats, numeric output: OFMT. (line 6)
* formatting output: Printf. (line 6)
* formatting strings: String Functions. (line 384)
@@ -32983,7 +33852,7 @@ Index
* FSF (Free Software Foundation) <3>: Glossary. (line 405)
* fts() extension function: Extension Sample File Functions.
(line 60)
-* FUNCTAB array: Auto-set. (line 115)
+* FUNCTAB array: Auto-set. (line 134)
* function calls: Function Calls. (line 6)
* function calls, indirect: Indirect Calls. (line 6)
* function calls, indirect, @-notation for: Indirect Calls. (line 47)
@@ -33034,8 +33903,8 @@ Index
* G., Daniel Richard: Acknowledgments. (line 60)
* G., Daniel Richard <1>: Maintainers. (line 14)
* Garfinkle, Scott: Contributors. (line 35)
-* gawk program, dynamic profiling: Profiling. (line 178)
-* gawk version: Auto-set. (line 198)
+* gawk program, dynamic profiling: Profiling. (line 177)
+* gawk version: Auto-set. (line 221)
* gawk, ARGIND variable in: Other Arguments. (line 15)
* gawk, awk and: Preface. (line 21)
* gawk, awk and <1>: This Manual. (line 14)
@@ -33056,10 +33925,10 @@ Index
* gawk, ERRNO variable in <1>: Close Files And Pipes.
(line 140)
* gawk, ERRNO variable in <2>: BEGINFILE/ENDFILE. (line 26)
-* gawk, ERRNO variable in <3>: Auto-set. (line 73)
+* gawk, ERRNO variable in <3>: Auto-set. (line 87)
* gawk, ERRNO variable in <4>: TCP/IP Networking. (line 54)
-* gawk, escape sequences: Escape Sequences. (line 118)
-* gawk, extensions, disabling: Options. (line 254)
+* gawk, escape sequences: Escape Sequences. (line 121)
+* gawk, extensions, disabling: Options. (line 257)
* gawk, features, adding: Adding Code. (line 6)
* gawk, features, advanced: Advanced Features. (line 6)
* gawk, field separators and: User-modified. (line 71)
@@ -33071,7 +33940,7 @@ Index
* gawk, FPAT variable in: Splitting By Content.
(line 25)
* gawk, FPAT variable in <1>: User-modified. (line 43)
-* gawk, FUNCTAB array in: Auto-set. (line 115)
+* gawk, FUNCTAB array in: Auto-set. (line 134)
* gawk, function arguments and: Calling Built-in. (line 16)
* gawk, hexadecimal numbers and: Nondecimal-numbers. (line 41)
* gawk, IGNORECASE variable in: Case-sensitivity. (line 26)
@@ -33099,10 +33968,10 @@ Index
* gawk, newlines in: Statements/Lines. (line 12)
* gawk, octal numbers and: Nondecimal-numbers. (line 41)
* gawk, predefined variables and: Built-in Variables. (line 14)
-* gawk, PROCINFO array in: Auto-set. (line 129)
-* gawk, PROCINFO array in <1>: Time Functions. (line 47)
-* gawk, PROCINFO array in <2>: Two-way I/O. (line 108)
-* gawk, regexp constants and: Using Constant Regexps.
+* gawk, PROCINFO array in: Auto-set. (line 148)
+* gawk, PROCINFO array in <1>: Time Functions. (line 49)
+* gawk, PROCINFO array in <2>: Two-way I/O. (line 114)
+* gawk, regexp constants and: Standard Regexp Constants.
(line 28)
* gawk, regular expressions, case sensitivity: Case-sensitivity.
(line 26)
@@ -33111,25 +33980,31 @@ Index
* gawk, regular expressions, precedence: Regexp Operators. (line 161)
* gawk, RT variable in: awk split records. (line 124)
* gawk, RT variable in <1>: Multiple Line. (line 130)
-* gawk, RT variable in <2>: Auto-set. (line 264)
+* gawk, RT variable in <2>: Auto-set. (line 295)
* gawk, See Also awk: Preface. (line 34)
* gawk, source code, obtaining: Getting. (line 6)
* gawk, splitting fields and: Constant Size. (line 86)
* gawk, string-translation functions: I18N Functions. (line 6)
-* gawk, SYMTAB array in: Auto-set. (line 268)
+* gawk, SYMTAB array in: Auto-set. (line 299)
* gawk, TEXTDOMAIN variable in: User-modified. (line 152)
* gawk, timestamps: Time Functions. (line 6)
* gawk, uses for: Preface. (line 34)
-* gawk, versions of, information about, printing: Options. (line 299)
+* gawk, versions of, information about, printing: Options. (line 304)
* gawk, VMS version of: VMS Installation. (line 6)
* gawk, word-boundary operator: GNU Regexp Operators.
(line 66)
* gawkextlib: gawkextlib. (line 6)
* gawkextlib project: gawkextlib. (line 6)
+* gawklibpath_append shell function: Shell Startup Files. (line 29)
+* gawklibpath_default shell function: Shell Startup Files. (line 22)
+* gawklibpath_prepend shell function: Shell Startup Files. (line 25)
+* gawkpath_append shell function: Shell Startup Files. (line 19)
+* gawkpath_default shell function: Shell Startup Files. (line 12)
+* gawkpath_prepend shell function: Shell Startup Files. (line 15)
* General Public License (GPL): Glossary. (line 396)
* General Public License, See GPL: Manual History. (line 11)
* generate time values: Time Functions. (line 25)
-* gensub: Using Constant Regexps.
+* gensub: Standard Regexp Constants.
(line 43)
* gensub <1>: String Functions. (line 89)
* gensub() function (gawk), escape processing: Gory Details. (line 6)
@@ -33179,7 +34054,7 @@ Index
* gettext() function (C library): Explaining gettext. (line 63)
* gettimeofday() extension function: Extension Sample Time.
(line 12)
-* git utility: gawkextlib. (line 29)
+* git utility: gawkextlib. (line 31)
* git utility <1>: Other Versions. (line 29)
* git utility <2>: Accessing The Source.
(line 10)
@@ -33207,9 +34082,9 @@ Index
* Grigera, Juan: Contributors. (line 58)
* group database, reading: Group Functions. (line 6)
* group file: Group Functions. (line 6)
-* group ID of gawk user: Auto-set. (line 147)
+* group ID of gawk user: Auto-set. (line 170)
* groups, information about: Group Functions. (line 6)
-* gsub: Using Constant Regexps.
+* gsub: Standard Regexp Constants.
(line 43)
* gsub <1>: String Functions. (line 139)
* gsub() function, arguments of: String Functions. (line 463)
@@ -33229,7 +34104,7 @@ Index
* history expansion, in debugger: Readline Support. (line 6)
* histsort.awk program: History Sorting. (line 25)
* Hughes, Phil: Acknowledgments. (line 43)
-* HUP signal, for dynamic profiling: Profiling. (line 210)
+* HUP signal, for dynamic profiling: Profiling. (line 209)
* hyphen (-), - operator: Precedence. (line 51)
* hyphen (-), - operator <1>: Precedence. (line 57)
* hyphen (-), -- operator: Increment Ops. (line 48)
@@ -33311,7 +34186,9 @@ Index
* installing gawk: Installation. (line 6)
* instruction tracing, in debugger: Debugger Info. (line 90)
* int: Numeric Functions. (line 24)
-* INT signal (MS-Windows): Profiling. (line 213)
+* INT signal (MS-Windows): Profiling. (line 212)
+* intdiv: Numeric Functions. (line 29)
+* intdiv <1>: Numeric Functions. (line 29)
* integer array indices: Numeric Array Subscripts.
(line 31)
* integers, arbitrary precision: Arbitrary Precision Integers.
@@ -33367,7 +34244,7 @@ Index
* Kernighan, Brian <8>: Other Versions. (line 13)
* Kernighan, Brian <9>: Basic Data Typing. (line 54)
* Kernighan, Brian <10>: Glossary. (line 206)
-* kill command, dynamic profiling: Profiling. (line 187)
+* kill command, dynamic profiling: Profiling. (line 186)
* Knights, jedi: Undocumented. (line 6)
* Kwok, Conrad: Contributors. (line 35)
* l debugger command (alias for list): Miscellaneous Debugger Commands.
@@ -33440,7 +34317,7 @@ Index
* lint checking, empty programs: Command Line. (line 16)
* lint checking, issuing warnings: Options. (line 184)
* lint checking, POSIXLY_CORRECT environment variable: Options.
- (line 338)
+ (line 343)
* lint checking, undefined functions: Pass By Value/Reference.
(line 85)
* LINT variable: User-modified. (line 87)
@@ -33461,9 +34338,9 @@ Index
* localization: I18N and L10N. (line 6)
* localization, See internationalization, localization: I18N and L10N.
(line 6)
-* log: Numeric Functions. (line 29)
+* log: Numeric Functions. (line 44)
* log files, timestamps in: Time Functions. (line 6)
-* logarithm: Numeric Functions. (line 29)
+* logarithm: Numeric Functions. (line 44)
* logical false/true: Truth Values. (line 6)
* logical operators, See Boolean expressions: Boolean Ops. (line 6)
* login information: Passwd Functions. (line 16)
@@ -33500,12 +34377,12 @@ Index
(line 9)
* matching, leftmost longest: Multiple Line. (line 26)
* matching, null strings: String Functions. (line 537)
-* mawk utility: Escape Sequences. (line 118)
+* mawk utility: Escape Sequences. (line 121)
* mawk utility <1>: Getline/Pipe. (line 62)
* mawk utility <2>: Concatenation. (line 36)
* mawk utility <3>: Nextfile Statement. (line 47)
* mawk utility <4>: Other Versions. (line 48)
-* maximum precision supported by MPFR library: Auto-set. (line 212)
+* maximum precision supported by MPFR library: Auto-set. (line 235)
* McIlroy, Doug: Glossary. (line 257)
* McPhee, Patrick: Contributors. (line 101)
* message object files: Explaining gettext. (line 42)
@@ -33517,8 +34394,8 @@ Index
(line 48)
* messages from extensions: Printing Messages. (line 6)
* metacharacters in regular expressions: Regexp Operators. (line 6)
-* metacharacters, escape sequences for: Escape Sequences. (line 137)
-* minimum precision required by MPFR library: Auto-set. (line 215)
+* metacharacters, escape sequences for: Escape Sequences. (line 140)
+* minimum precision required by MPFR library: Auto-set. (line 238)
* mktime: Time Functions. (line 25)
* modifiers, in format specifiers: Format Modifiers. (line 6)
* monetary information, localization: Explaining gettext. (line 104)
@@ -33538,10 +34415,8 @@ Index
* networks, programming: TCP/IP Networking. (line 6)
* networks, support for: Special Network. (line 6)
* newlines: Statements/Lines. (line 6)
-* newlines <1>: Options. (line 260)
+* newlines <1>: Options. (line 263)
* newlines <2>: Boolean Ops. (line 69)
-* newlines, as field separators: Default Field Splitting.
- (line 6)
* newlines, as record separators: awk split records. (line 12)
* newlines, in dynamic regexps: Computed Regexps. (line 60)
* newlines, in regexp constants: Computed Regexps. (line 70)
@@ -33568,7 +34443,7 @@ Index
* nexti debugger command: Debugger Execution Control.
(line 49)
* NF variable: Fields. (line 33)
-* NF variable <1>: Auto-set. (line 104)
+* NF variable <1>: Auto-set. (line 123)
* NF variable, decrementing: Changing Fields. (line 107)
* ni debugger command (alias for nexti): Debugger Execution Control.
(line 49)
@@ -33577,8 +34452,8 @@ Index
(line 23)
* not Boolean-logic operator: Boolean Ops. (line 6)
* NR variable: Records. (line 6)
-* NR variable <1>: Auto-set. (line 124)
-* NR variable, changing: Auto-set. (line 326)
+* NR variable <1>: Auto-set. (line 143)
+* NR variable, changing: Auto-set. (line 357)
* null strings: awk split records. (line 114)
* null strings <1>: Regexp Field Splitting.
(line 43)
@@ -33591,7 +34466,7 @@ Index
* null strings, converting numbers to strings: Strings And Numbers.
(line 21)
* null strings, matching: String Functions. (line 537)
-* number as string of bits: Bitwise Functions. (line 111)
+* number as string of bits: Bitwise Functions. (line 108)
* number of array elements: String Functions. (line 200)
* number sign (#), #! (executable scripts): Executable Scripts.
(line 6)
@@ -33602,7 +34477,7 @@ Index
* numbers, Cliff random: Cliff Random Function.
(line 6)
* numbers, converting: Strings And Numbers. (line 6)
-* numbers, converting <1>: Bitwise Functions. (line 111)
+* numbers, converting <1>: Bitwise Functions. (line 108)
* numbers, converting, to strings: User-modified. (line 30)
* numbers, converting, to strings <1>: User-modified. (line 104)
* numbers, hexadecimal: Nondecimal-numbers. (line 6)
@@ -33611,7 +34486,7 @@ Index
* numeric constants: Scalar Constants. (line 6)
* numeric functions: Numeric Functions. (line 6)
* numeric, output format: OFMT. (line 6)
-* numeric, strings: Variable Typing. (line 6)
+* numeric, strings: Variable Typing. (line 67)
* o debugger command (alias for option): Debugger Info. (line 57)
* obsolete features: Obsolete. (line 6)
* octal numbers: Nondecimal-numbers. (line 6)
@@ -33698,7 +34573,7 @@ Index
* p debugger command (alias for print): Viewing And Changing Data.
(line 35)
* Papadopoulos, Panos: Contributors. (line 129)
-* parent process ID of gawk process: Auto-set. (line 187)
+* parent process ID of gawk process: Auto-set. (line 210)
* parentheses (), in a profile: Profiling. (line 146)
* parentheses (), regexp operator: Regexp Operators. (line 81)
* password file: Passwd Functions. (line 16)
@@ -33742,14 +34617,14 @@ Index
* plus sign (+), += operator <1>: Precedence. (line 94)
* plus sign (+), regexp operator: Regexp Operators. (line 105)
* pointers to functions: Indirect Calls. (line 6)
-* portability: Escape Sequences. (line 100)
+* portability: Escape Sequences. (line 103)
* portability, #! (executable scripts): Executable Scripts. (line 33)
* portability, ** operator and: Arithmetic Ops. (line 81)
* portability, **= operator and: Assignment Ops. (line 144)
* portability, ARGV variable: Executable Scripts. (line 59)
* portability, backslash continuation and: Statements/Lines. (line 30)
* portability, backslash in escape sequences: Escape Sequences.
- (line 105)
+ (line 108)
* portability, close() function and: Close Files And Pipes.
(line 81)
* portability, data files as single record: gawk split records.
@@ -33767,7 +34642,7 @@ Index
* portability, NF variable, decrementing: Changing Fields. (line 115)
* portability, operators: Increment Ops. (line 60)
* portability, operators, not in POSIX awk: Precedence. (line 97)
-* portability, POSIXLY_CORRECT environment variable: Options. (line 358)
+* portability, POSIXLY_CORRECT environment variable: Options. (line 363)
* portability, substr() function: String Functions. (line 513)
* portable object files: Explaining gettext. (line 37)
* portable object files <1>: Translator i18n. (line 6)
@@ -33788,7 +34663,7 @@ Index
* POSIX awk, < operator and: Getline/File. (line 26)
* POSIX awk, arithmetic operators and: Arithmetic Ops. (line 30)
* POSIX awk, backslashes in string constants: Escape Sequences.
- (line 105)
+ (line 108)
* POSIX awk, BEGIN/END patterns: I/O And BEGIN/END. (line 15)
* POSIX awk, bracket expressions and: Bracket Expressions. (line 34)
* POSIX awk, bracket expressions and, character classes: Bracket Expressions.
@@ -33799,17 +34674,15 @@ Index
* POSIX awk, changes in awk versions: POSIX. (line 6)
* POSIX awk, continue statement and: Continue Statement. (line 44)
* POSIX awk, CONVFMT variable and: User-modified. (line 30)
-* POSIX awk, date utility and: Time Functions. (line 253)
-* POSIX awk, field separators and: Fields. (line 6)
-* POSIX awk, field separators and <1>: Full Line Fields. (line 16)
-* POSIX awk, FS variable and: User-modified. (line 60)
+* POSIX awk, date utility and: Time Functions. (line 255)
+* POSIX awk, field separators and: Full Line Fields. (line 16)
* POSIX awk, function keyword in: Definition Syntax. (line 99)
* POSIX awk, functions and, gsub()/sub(): Gory Details. (line 90)
* POSIX awk, functions and, length(): String Functions. (line 179)
* POSIX awk, GNU long options and: Options. (line 15)
* POSIX awk, interval expressions in: Regexp Operators. (line 135)
* POSIX awk, next/nextfile statements and: Next Statement. (line 44)
-* POSIX awk, numeric strings and: Variable Typing. (line 6)
+* POSIX awk, numeric strings and: Variable Typing. (line 67)
* POSIX awk, OFMT variable and: OFMT. (line 27)
* POSIX awk, OFMT variable and <1>: Strings And Numbers. (line 56)
* POSIX awk, period (.), using: Regexp Operators. (line 51)
@@ -33817,12 +34690,12 @@ Index
* POSIX awk, regular expressions and: Regexp Operators. (line 161)
* POSIX awk, timestamps and: Time Functions. (line 6)
* POSIX awk, | I/O operator and: Getline/Pipe. (line 56)
-* POSIX mode: Options. (line 254)
-* POSIX mode <1>: Options. (line 338)
+* POSIX mode: Options. (line 257)
+* POSIX mode <1>: Options. (line 343)
* POSIX, awk and: Preface. (line 21)
* POSIX, gawk extensions not included in: POSIX/GNU. (line 6)
* POSIX, programs, implementing in awk: Clones. (line 6)
-* POSIXLY_CORRECT environment variable: Options. (line 338)
+* POSIXLY_CORRECT environment variable: Options. (line 343)
* PREC variable: User-modified. (line 124)
* precedence: Increment Ops. (line 60)
* precedence <1>: Precedence. (line 6)
@@ -33869,14 +34742,14 @@ Index
* printing, unduplicated lines of text: Uniq Program. (line 6)
* printing, user information: Id Program. (line 6)
* private variables: Library Names. (line 11)
-* process group ID of gawk process: Auto-set. (line 181)
-* process ID of gawk process: Auto-set. (line 184)
+* process group ID of gawk process: Auto-set. (line 204)
+* process ID of gawk process: Auto-set. (line 207)
* processes, two-way communications with: Two-way I/O. (line 6)
* processing data: Basic High Level. (line 6)
-* PROCINFO array: Auto-set. (line 129)
-* PROCINFO array <1>: Time Functions. (line 47)
+* PROCINFO array: Auto-set. (line 148)
+* PROCINFO array <1>: Time Functions. (line 49)
* PROCINFO array <2>: Passwd Functions. (line 6)
-* PROCINFO array, and communications via ptys: Two-way I/O. (line 108)
+* PROCINFO array, and communications via ptys: Two-way I/O. (line 114)
* PROCINFO array, and group membership: Group Functions. (line 6)
* PROCINFO array, and user and group ID numbers: Id Program. (line 15)
* PROCINFO array, testing the field splitting: Passwd Functions.
@@ -33884,8 +34757,8 @@ Index
* PROCINFO, values of sorted_in: Controlling Scanning.
(line 26)
* profiling awk programs: Profiling. (line 6)
-* profiling awk programs, dynamically: Profiling. (line 178)
-* program identifiers: Auto-set. (line 150)
+* profiling awk programs, dynamically: Profiling. (line 177)
+* program identifiers: Auto-set. (line 173)
* program, definition of: Getting Started. (line 21)
* programming conventions, --non-decimal-data option: Nondecimal Data.
(line 35)
@@ -33921,7 +34794,7 @@ Index
* QuikTrim Awk: Other Versions. (line 139)
* quit debugger command: Miscellaneous Debugger Commands.
(line 102)
-* QUIT signal (MS-Windows): Profiling. (line 213)
+* QUIT signal (MS-Windows): Profiling. (line 212)
* quoting in gawk command lines: Long. (line 26)
* quoting in gawk command lines, tricks for: Quoting. (line 91)
* quoting, for small awk programs: Comments. (line 27)
@@ -33930,12 +34803,12 @@ Index
* Rakitzis, Byron: History Sorting. (line 25)
* Ramey, Chet: Acknowledgments. (line 60)
* Ramey, Chet <1>: General Data Types. (line 6)
-* rand: Numeric Functions. (line 34)
+* rand: Numeric Functions. (line 49)
* random numbers, Cliff: Cliff Random Function.
(line 6)
* random numbers, rand()/srand() functions: Numeric Functions.
- (line 34)
-* random numbers, seed of: Numeric Functions. (line 64)
+ (line 49)
+* random numbers, seed of: Numeric Functions. (line 79)
* range expressions (regexps): Bracket Expressions. (line 6)
* range patterns: Ranges. (line 6)
* range patterns, line continuation and: Ranges. (line 64)
@@ -33980,7 +34853,7 @@ Index
(line 103)
* regexp constants, /=.../, /= operator and: Assignment Ops. (line 149)
* regexp constants, as patterns: Expression Patterns. (line 34)
-* regexp constants, in gawk: Using Constant Regexps.
+* regexp constants, in gawk: Standard Regexp Constants.
(line 28)
* regexp constants, slashes vs. quotes: Computed Regexps. (line 30)
* regexp constants, vs. string constants: Computed Regexps. (line 40)
@@ -34022,6 +34895,7 @@ Index
* relational operators, See comparison operators: Typing and Comparison.
(line 9)
* replace in string: String Functions. (line 409)
+* retrying input: Retrying Input. (line 6)
* return debugger command: Debugger Execution Control.
(line 54)
* return statement, user-defined functions: Return Statement. (line 6)
@@ -34045,7 +34919,7 @@ Index
* right shift: Bitwise Functions. (line 54)
* right shift, bitwise: Bitwise Functions. (line 32)
* Ritchie, Dennis: Basic Data Typing. (line 54)
-* RLENGTH variable: Auto-set. (line 251)
+* RLENGTH variable: Auto-set. (line 282)
* RLENGTH variable, match() function and: String Functions. (line 227)
* Robbins, Arnold: Command Line Field Separator.
(line 71)
@@ -34071,11 +34945,11 @@ Index
* RS variable <1>: User-modified. (line 133)
* RS variable, multiline records and: Multiple Line. (line 17)
* rshift: Bitwise Functions. (line 54)
-* RSTART variable: Auto-set. (line 257)
+* RSTART variable: Auto-set. (line 288)
* RSTART variable, match() function and: String Functions. (line 227)
* RT variable: awk split records. (line 124)
* RT variable <1>: Multiple Line. (line 130)
-* RT variable <2>: Auto-set. (line 264)
+* RT variable <2>: Auto-set. (line 295)
* Rubin, Paul: History. (line 30)
* Rubin, Paul <1>: Contributors. (line 16)
* rule, definition of: Getting Started. (line 21)
@@ -34086,14 +34960,14 @@ Index
(line 68)
* sample debugging session: Sample Debugging Session.
(line 6)
-* sandbox mode: Options. (line 285)
+* sandbox mode: Options. (line 290)
* save debugger options: Debugger Info. (line 85)
* scalar or array: Type Functions. (line 11)
* scalar values: Basic Data Typing. (line 13)
* scanning arrays: Scanning an Array. (line 6)
* scanning multidimensional arrays: Multiscanning. (line 11)
* Schorr, Andrew: Acknowledgments. (line 60)
-* Schorr, Andrew <1>: Auto-set. (line 296)
+* Schorr, Andrew <1>: Auto-set. (line 327)
* Schorr, Andrew <2>: Contributors. (line 134)
* Schreiber, Bert: Acknowledgments. (line 38)
* Schreiber, Rita: Acknowledgments. (line 38)
@@ -34112,7 +34986,7 @@ Index
* sed utility: Full Line Fields. (line 22)
* sed utility <1>: Simple Sed. (line 6)
* sed utility <2>: Glossary. (line 16)
-* seeding random number generator: Numeric Functions. (line 64)
+* seeding random number generator: Numeric Functions. (line 79)
* semicolon (;), AWKPATH variable and: PC Using. (line 9)
* semicolon (;), separating statements in actions: Statements/Lines.
(line 90)
@@ -34124,7 +34998,6 @@ Index
* separators, field <1>: User-modified. (line 113)
* separators, field, FIELDWIDTHS variable and: User-modified. (line 37)
* separators, field, FPAT variable and: User-modified. (line 43)
-* separators, field, POSIX and: Fields. (line 6)
* separators, for records: awk split records. (line 6)
* separators, for records <1>: awk split records. (line 85)
* separators, for records <2>: User-modified. (line 133)
@@ -34176,14 +35049,15 @@ Index
* sidebar, A Constant's Base Does Not Affect Its Value: Nondecimal-numbers.
(line 63)
* sidebar, Backslash Before Regular Characters: Escape Sequences.
- (line 103)
+ (line 106)
+* sidebar, Beware The Smoke and Mirrors!: Bitwise Functions. (line 126)
* sidebar, Changing FS Does Not Affect the Fields: Full Line Fields.
(line 14)
-* sidebar, Changing NR and FNR: Auto-set. (line 324)
+* sidebar, Changing NR and FNR: Auto-set. (line 355)
* sidebar, Controlling Output Buffering with system(): I/O Functions.
(line 164)
* sidebar, Escape Sequences for Metacharacters: Escape Sequences.
- (line 135)
+ (line 138)
* sidebar, FS and IGNORECASE: Field Splitting Summary.
(line 37)
* sidebar, Interactive Versus Noninteractive Buffering: I/O Functions.
@@ -34205,19 +35079,19 @@ Index
(line 130)
* sidebar, Using \n in Bracket Expressions of Dynamic Regexps: Computed Regexps.
(line 58)
-* SIGHUP signal, for dynamic profiling: Profiling. (line 210)
-* SIGINT signal (MS-Windows): Profiling. (line 213)
-* signals, HUP/SIGHUP, for profiling: Profiling. (line 210)
-* signals, INT/SIGINT (MS-Windows): Profiling. (line 213)
-* signals, QUIT/SIGQUIT (MS-Windows): Profiling. (line 213)
-* signals, USR1/SIGUSR1, for profiling: Profiling. (line 187)
+* SIGHUP signal, for dynamic profiling: Profiling. (line 209)
+* SIGINT signal (MS-Windows): Profiling. (line 212)
+* signals, HUP/SIGHUP, for profiling: Profiling. (line 209)
+* signals, INT/SIGINT (MS-Windows): Profiling. (line 212)
+* signals, QUIT/SIGQUIT (MS-Windows): Profiling. (line 212)
+* signals, USR1/SIGUSR1, for profiling: Profiling. (line 186)
* signature program: Signature Program. (line 6)
-* SIGQUIT signal (MS-Windows): Profiling. (line 213)
-* SIGUSR1 signal, for dynamic profiling: Profiling. (line 187)
+* SIGQUIT signal (MS-Windows): Profiling. (line 212)
+* SIGUSR1 signal, for dynamic profiling: Profiling. (line 186)
* silent debugger command: Debugger Execution Control.
(line 10)
-* sin: Numeric Functions. (line 75)
-* sine: Numeric Functions. (line 75)
+* sin: Numeric Functions. (line 90)
+* sine: Numeric Functions. (line 90)
* single quote ('): One-shot. (line 15)
* single quote (') in gawk command lines: Long. (line 35)
* single quote ('), in shell commands: Quoting. (line 48)
@@ -34267,10 +35141,10 @@ Index
* sprintf() function, OFMT variable and: User-modified. (line 113)
* sprintf() function, print/printf statements and: Round Function.
(line 6)
-* sqrt: Numeric Functions. (line 78)
+* sqrt: Numeric Functions. (line 93)
* square brackets ([]), regexp operator: Regexp Operators. (line 56)
-* square root: Numeric Functions. (line 78)
-* srand: Numeric Functions. (line 82)
+* square root: Numeric Functions. (line 93)
+* srand: Numeric Functions. (line 97)
* stack frame: Debugging Terms. (line 10)
* Stallman, Richard: Manual History. (line 6)
* Stallman, Richard <1>: Acknowledgments. (line 18)
@@ -34294,7 +35168,7 @@ Index
(line 79)
* stream editors: Full Line Fields. (line 22)
* stream editors <1>: Simple Sed. (line 6)
-* strftime: Time Functions. (line 48)
+* strftime: Time Functions. (line 50)
* string constants: Scalar Constants. (line 15)
* string constants, vs. regexp constants: Computed Regexps. (line 40)
* string extraction (internationalization): String Extraction.
@@ -34307,7 +35181,7 @@ Index
* string-translation functions: I18N Functions. (line 6)
* strings splitting, example: String Functions. (line 334)
* strings, converting: Strings And Numbers. (line 6)
-* strings, converting <1>: Bitwise Functions. (line 111)
+* strings, converting <1>: Bitwise Functions. (line 108)
* strings, converting letter case: String Functions. (line 523)
* strings, converting, numbers to: User-modified. (line 30)
* strings, converting, numbers to <1>: User-modified. (line 104)
@@ -34318,11 +35192,11 @@ Index
* strings, merging arrays into: Join Function. (line 6)
* strings, null: Regexp Field Splitting.
(line 43)
-* strings, numeric: Variable Typing. (line 6)
+* strings, numeric: Variable Typing. (line 67)
* strtonum: String Functions. (line 391)
* strtonum() function (gawk), --non-decimal-data option and: Nondecimal Data.
(line 35)
-* sub: Using Constant Regexps.
+* sub: Standard Regexp Constants.
(line 43)
* sub <1>: String Functions. (line 409)
* sub() function, arguments of: String Functions. (line 463)
@@ -34342,13 +35216,13 @@ Index
* substr: String Functions. (line 482)
* substring: String Functions. (line 482)
* Sumner, Andrew: Other Versions. (line 68)
-* supplementary groups of gawk process: Auto-set. (line 228)
+* supplementary groups of gawk process: Auto-set. (line 251)
* switch statement: Switch Statement. (line 6)
-* SYMTAB array: Auto-set. (line 268)
+* SYMTAB array: Auto-set. (line 299)
* syntactic ambiguity: /= operator vs. /=.../ regexp constant: Assignment Ops.
(line 149)
* system: I/O Functions. (line 107)
-* systime: Time Functions. (line 66)
+* systime: Time Functions. (line 68)
* t debugger command (alias for tbreak): Breakpoint Control. (line 90)
* tbreak debugger command: Breakpoint Control. (line 90)
* Tcl: Library Names. (line 58)
@@ -34358,7 +35232,7 @@ Index
* tee.awk program: Tee Program. (line 26)
* temporary breakpoint: Breakpoint Control. (line 90)
* terminating records: awk split records. (line 124)
-* testbits.awk program: Bitwise Functions. (line 72)
+* testbits.awk program: Bitwise Functions. (line 69)
* testext extension: Extension Sample API Tests.
(line 6)
* Texinfo: Conventions. (line 6)
@@ -34366,7 +35240,7 @@ Index
* Texinfo <2>: Dupword Program. (line 17)
* Texinfo <3>: Extract Program. (line 12)
* Texinfo <4>: Distribution contents.
- (line 77)
+ (line 83)
* Texinfo <5>: Adding Code. (line 100)
* Texinfo, chapter beginnings in files: Regexp Operators. (line 22)
* Texinfo, extracting programs from source files: Extract Program.
@@ -34396,8 +35270,8 @@ Index
* time, retrieving: Time Functions. (line 17)
* timeout, reading input: Read Timeout. (line 6)
* timestamps: Time Functions. (line 6)
-* timestamps <1>: Time Functions. (line 66)
-* timestamps, converting dates to: Time Functions. (line 76)
+* timestamps <1>: Time Functions. (line 68)
+* timestamps, converting dates to: Time Functions. (line 78)
* timestamps, formatted: Getlocaltime Function.
(line 6)
* tolower: String Functions. (line 524)
@@ -34414,7 +35288,7 @@ Index
(line 37)
* troubleshooting, awk uses FS not IFS: Field Separators. (line 29)
* troubleshooting, backslash before nonspecial character: Escape Sequences.
- (line 105)
+ (line 108)
* troubleshooting, division: Arithmetic Ops. (line 44)
* troubleshooting, fatal errors, field widths, specifying: Constant Size.
(line 22)
@@ -34448,6 +35322,8 @@ Index
* trunc-mod operation: Arithmetic Ops. (line 66)
* truth values: Truth Values. (line 6)
* type conversion: Strings And Numbers. (line 21)
+* type, of variable: Type Functions. (line 14)
+* typeof: Type Functions. (line 14)
* u debugger command (alias for until): Debugger Execution Control.
(line 82)
* unassigned array elements: Reference to Elements.
@@ -34470,7 +35346,7 @@ Index
* uniq.awk program: Uniq Program. (line 65)
* Unix: Glossary. (line 748)
* Unix awk, backslashes in escape sequences: Escape Sequences.
- (line 118)
+ (line 121)
* Unix awk, close() function and: Close Files And Pipes.
(line 132)
* Unix awk, password files, field separators and: Command Line Field Separator.
@@ -34489,10 +35365,11 @@ Index
* user-modifiable variables: User-modified. (line 6)
* users, information about, printing: Id Program. (line 6)
* users, information about, retrieving: Passwd Functions. (line 16)
-* USR1 signal, for dynamic profiling: Profiling. (line 187)
+* USR1 signal, for dynamic profiling: Profiling. (line 186)
* values, numeric: Basic Data Typing. (line 13)
* values, string: Basic Data Typing. (line 13)
* variable assignments and input files: Other Arguments. (line 26)
+* variable type: Type Functions. (line 14)
* variable typing: Typing and Comparison.
(line 9)
* variables: Other Features. (line 6)
@@ -34523,10 +35400,10 @@ Index
* variables, uninitialized, as array subscripts: Uninitialized Subscripts.
(line 6)
* variables, user-defined: Variables. (line 6)
-* version of gawk: Auto-set. (line 198)
-* version of gawk extension API: Auto-set. (line 223)
-* version of GNU MP library: Auto-set. (line 206)
-* version of GNU MPFR library: Auto-set. (line 208)
+* version of gawk: Auto-set. (line 221)
+* version of gawk extension API: Auto-set. (line 246)
+* version of GNU MP library: Auto-set. (line 229)
+* version of GNU MPFR library: Auto-set. (line 231)
* vertical bar (|): Regexp Operators. (line 70)
* vertical bar (|), | operator (I/O): Getline/Pipe. (line 10)
* vertical bar (|), | operator (I/O) <1>: Precedence. (line 64)
@@ -34563,7 +35440,7 @@ Index
* whitespace, as field separators: Default Field Splitting.
(line 6)
* whitespace, functions, calling: Calling Built-in. (line 10)
-* whitespace, newlines as: Options. (line 260)
+* whitespace, newlines as: Options. (line 263)
* Williams, Kent: Contributors. (line 35)
* Woehlke, Matthew: Contributors. (line 80)
* Woods, John: Contributors. (line 28)
@@ -34593,563 +35470,574 @@ Index

Tag Table:
Node: Top1200
-Node: Foreword342162
-Node: Foreword446604
-Node: Preface48136
-Ref: Preface-Footnote-150995
-Ref: Preface-Footnote-251102
-Ref: Preface-Footnote-351336
-Node: History51478
-Node: Names53830
-Ref: Names-Footnote-154924
-Node: This Manual55071
-Ref: This Manual-Footnote-161556
-Node: Conventions61656
-Node: Manual History64010
-Ref: Manual History-Footnote-167005
-Ref: Manual History-Footnote-267046
-Node: How To Contribute67120
-Node: Acknowledgments67771
-Node: Getting Started72657
-Node: Running gawk75096
-Node: One-shot76286
-Node: Read Terminal77549
-Node: Long79542
-Node: Executable Scripts81055
-Ref: Executable Scripts-Footnote-183850
-Node: Comments83953
-Node: Quoting86437
-Node: DOS Quoting91954
-Node: Sample Data Files92629
-Node: Very Simple95224
-Node: Two Rules100126
-Node: More Complex102011
-Node: Statements/Lines104877
-Ref: Statements/Lines-Footnote-1109336
-Node: Other Features109601
-Node: When110537
-Ref: When-Footnote-1112291
-Node: Intro Summary112356
-Node: Invoking Gawk113240
-Node: Command Line114754
-Node: Options115552
-Ref: Options-Footnote-1131459
-Ref: Options-Footnote-2131689
-Node: Other Arguments131714
-Node: Naming Standard Input134661
-Node: Environment Variables135754
-Node: AWKPATH Variable136312
-Ref: AWKPATH Variable-Footnote-1139613
-Ref: AWKPATH Variable-Footnote-2139647
-Node: AWKLIBPATH Variable139908
-Node: Other Environment Variables141052
-Node: Exit Status145000
-Node: Include Files145677
-Node: Loading Shared Libraries149272
-Node: Obsolete150700
-Node: Undocumented151392
-Node: Invoking Summary151689
-Node: Regexp153349
-Node: Regexp Usage154803
-Node: Escape Sequences156840
-Node: Regexp Operators162854
-Ref: Regexp Operators-Footnote-1170270
-Ref: Regexp Operators-Footnote-2170417
-Node: Bracket Expressions170515
-Ref: table-char-classes172991
-Node: Leftmost Longest176128
-Node: Computed Regexps177431
-Node: GNU Regexp Operators180858
-Node: Case-sensitivity184537
-Ref: Case-sensitivity-Footnote-1187424
-Ref: Case-sensitivity-Footnote-2187659
-Node: Regexp Summary187767
-Node: Reading Files189233
-Node: Records191327
-Node: awk split records192060
-Node: gawk split records196991
-Ref: gawk split records-Footnote-1201531
-Node: Fields201568
-Ref: Fields-Footnote-1204348
-Node: Nonconstant Fields204434
-Ref: Nonconstant Fields-Footnote-1206670
-Node: Changing Fields206874
-Node: Field Separators212802
-Node: Default Field Splitting215500
-Node: Regexp Field Splitting216618
-Node: Single Character Fields219971
-Node: Command Line Field Separator221031
-Node: Full Line Fields224249
-Ref: Full Line Fields-Footnote-1225771
-Ref: Full Line Fields-Footnote-2225817
-Node: Field Splitting Summary225918
-Node: Constant Size227992
-Node: Splitting By Content232570
-Ref: Splitting By Content-Footnote-1236541
-Node: Multiple Line236704
-Ref: Multiple Line-Footnote-1242586
-Node: Getline242765
-Node: Plain Getline244969
-Node: Getline/Variable247608
-Node: Getline/File248757
-Node: Getline/Variable/File250143
-Ref: Getline/Variable/File-Footnote-1251746
-Node: Getline/Pipe251834
-Node: Getline/Variable/Pipe254539
-Node: Getline/Coprocess255672
-Node: Getline/Variable/Coprocess256937
-Node: Getline Notes257677
-Node: Getline Summary260472
-Ref: table-getline-variants260894
-Node: Read Timeout261642
-Ref: Read Timeout-Footnote-1265483
-Node: Command-line directories265541
-Node: Input Summary266445
-Node: Input Exercises269617
-Node: Printing270345
-Node: Print272121
-Node: Print Examples273578
-Node: Output Separators276358
-Node: OFMT278375
-Node: Printf279731
-Node: Basic Printf280516
-Node: Control Letters282090
-Node: Format Modifiers286078
-Node: Printf Examples292093
-Node: Redirection294579
-Node: Special FD301420
-Ref: Special FD-Footnote-1304588
-Node: Special Files304662
-Node: Other Inherited Files305279
-Node: Special Network306280
-Node: Special Caveats307140
-Node: Close Files And Pipes308089
-Ref: Close Files And Pipes-Footnote-1315282
-Ref: Close Files And Pipes-Footnote-2315430
-Node: Output Summary315581
-Node: Output Exercises316579
-Node: Expressions317258
-Node: Values318446
-Node: Constants319124
-Node: Scalar Constants319815
-Ref: Scalar Constants-Footnote-1320679
-Node: Nondecimal-numbers320929
-Node: Regexp Constants323942
-Node: Using Constant Regexps324468
-Node: Variables327631
-Node: Using Variables328288
-Node: Assignment Options330198
-Node: Conversion332071
-Node: Strings And Numbers332595
-Ref: Strings And Numbers-Footnote-1335658
-Node: Locale influences conversions335767
-Ref: table-locale-affects338525
-Node: All Operators339143
-Node: Arithmetic Ops339772
-Node: Concatenation342278
-Ref: Concatenation-Footnote-1345125
-Node: Assignment Ops345232
-Ref: table-assign-ops350223
-Node: Increment Ops351536
-Node: Truth Values and Conditions354996
-Node: Truth Values356070
-Node: Typing and Comparison357118
-Node: Variable Typing357938
-Node: Comparison Operators361562
-Ref: table-relational-ops361981
-Node: POSIX String Comparison365476
-Ref: POSIX String Comparison-Footnote-1366550
-Node: Boolean Ops366689
-Ref: Boolean Ops-Footnote-1371171
-Node: Conditional Exp371263
-Node: Function Calls372999
-Node: Precedence376876
-Node: Locales380535
-Node: Expressions Summary382167
-Node: Patterns and Actions384740
-Node: Pattern Overview385860
-Node: Regexp Patterns387537
-Node: Expression Patterns388079
-Node: Ranges391860
-Node: BEGIN/END394968
-Node: Using BEGIN/END395729
-Ref: Using BEGIN/END-Footnote-1398465
-Node: I/O And BEGIN/END398571
-Node: BEGINFILE/ENDFILE400885
-Node: Empty403792
-Node: Using Shell Variables404109
-Node: Action Overview406383
-Node: Statements408708
-Node: If Statement410556
-Node: While Statement412051
-Node: Do Statement414079
-Node: For Statement415227
-Node: Switch Statement418385
-Node: Break Statement420771
-Node: Continue Statement422863
-Node: Next Statement424690
-Node: Nextfile Statement427073
-Node: Exit Statement429725
-Node: Built-in Variables432128
-Node: User-modified433261
-Ref: User-modified-Footnote-1440886
-Node: Auto-set440948
-Ref: Auto-set-Footnote-1454304
-Ref: Auto-set-Footnote-2454510
-Node: ARGC and ARGV454566
-Node: Pattern Action Summary458779
-Node: Arrays461209
-Node: Array Basics462538
-Node: Array Intro463382
-Ref: figure-array-elements465357
-Ref: Array Intro-Footnote-1468061
-Node: Reference to Elements468189
-Node: Assigning Elements470653
-Node: Array Example471144
-Node: Scanning an Array472903
-Node: Controlling Scanning475925
-Ref: Controlling Scanning-Footnote-1481324
-Node: Numeric Array Subscripts481640
-Node: Uninitialized Subscripts483824
-Node: Delete485443
-Ref: Delete-Footnote-1488195
-Node: Multidimensional488252
-Node: Multiscanning491347
-Node: Arrays of Arrays492938
-Node: Arrays Summary497705
-Node: Functions499798
-Node: Built-in500836
-Node: Calling Built-in501917
-Node: Numeric Functions503913
-Ref: Numeric Functions-Footnote-1507941
-Ref: Numeric Functions-Footnote-2508298
-Ref: Numeric Functions-Footnote-3508346
-Node: String Functions508618
-Ref: String Functions-Footnote-1532122
-Ref: String Functions-Footnote-2532250
-Ref: String Functions-Footnote-3532498
-Node: Gory Details532585
-Ref: table-sub-escapes534376
-Ref: table-sub-proposed535895
-Ref: table-posix-sub537258
-Ref: table-gensub-escapes538799
-Ref: Gory Details-Footnote-1539622
-Node: I/O Functions539776
-Ref: table-system-return-values546358
-Ref: I/O Functions-Footnote-1548338
-Ref: I/O Functions-Footnote-2548486
-Node: Time Functions548606
-Ref: Time Functions-Footnote-1559128
-Ref: Time Functions-Footnote-2559196
-Ref: Time Functions-Footnote-3559354
-Ref: Time Functions-Footnote-4559465
-Ref: Time Functions-Footnote-5559577
-Ref: Time Functions-Footnote-6559804
-Node: Bitwise Functions560070
-Ref: table-bitwise-ops560664
-Ref: Bitwise Functions-Footnote-1564982
-Node: Type Functions565155
-Node: I18N Functions566311
-Node: User-defined567962
-Node: Definition Syntax568767
-Ref: Definition Syntax-Footnote-1574454
-Node: Function Example574525
-Ref: Function Example-Footnote-1577447
-Node: Function Caveats577469
-Node: Calling A Function577987
-Node: Variable Scope578945
-Node: Pass By Value/Reference581939
-Node: Return Statement585438
-Node: Dynamic Typing588417
-Node: Indirect Calls589347
-Ref: Indirect Calls-Footnote-1599598
-Node: Functions Summary599726
-Node: Library Functions602431
-Ref: Library Functions-Footnote-1606038
-Ref: Library Functions-Footnote-2606181
-Node: Library Names606352
-Ref: Library Names-Footnote-1609812
-Ref: Library Names-Footnote-2610035
-Node: General Functions610121
-Node: Strtonum Function611224
-Node: Assert Function614246
-Node: Round Function617572
-Node: Cliff Random Function619113
-Node: Ordinal Functions620129
-Ref: Ordinal Functions-Footnote-1623192
-Ref: Ordinal Functions-Footnote-2623444
-Node: Join Function623654
-Ref: Join Function-Footnote-1625424
-Node: Getlocaltime Function625624
-Node: Readfile Function629366
-Node: Shell Quoting631338
-Node: Data File Management632739
-Node: Filetrans Function633371
-Node: Rewind Function637467
-Node: File Checking639373
-Ref: File Checking-Footnote-1640707
-Node: Empty Files640908
-Node: Ignoring Assigns642887
-Node: Getopt Function644437
-Ref: Getopt Function-Footnote-1655906
-Node: Passwd Functions656106
-Ref: Passwd Functions-Footnote-1664945
-Node: Group Functions665033
-Ref: Group Functions-Footnote-1672931
-Node: Walking Arrays673138
-Node: Library Functions Summary676146
-Node: Library Exercises677552
-Node: Sample Programs678017
-Node: Running Examples678787
-Node: Clones679515
-Node: Cut Program680739
-Node: Egrep Program690668
-Ref: Egrep Program-Footnote-1698180
-Node: Id Program698290
-Node: Split Program701970
-Ref: Split Program-Footnote-1705429
-Node: Tee Program705558
-Node: Uniq Program708348
-Node: Wc Program715774
-Ref: Wc Program-Footnote-1720029
-Node: Miscellaneous Programs720123
-Node: Dupword Program721336
-Node: Alarm Program723366
-Node: Translate Program728221
-Ref: Translate Program-Footnote-1732786
-Node: Labels Program733056
-Ref: Labels Program-Footnote-1736407
-Node: Word Sorting736491
-Node: History Sorting740563
-Node: Extract Program742398
-Node: Simple Sed749927
-Node: Igawk Program753001
-Ref: Igawk Program-Footnote-1767332
-Ref: Igawk Program-Footnote-2767534
-Ref: Igawk Program-Footnote-3767656
-Node: Anagram Program767771
-Node: Signature Program770833
-Node: Programs Summary772080
-Node: Programs Exercises773294
-Ref: Programs Exercises-Footnote-1777423
-Node: Advanced Features777514
-Node: Nondecimal Data779504
-Node: Array Sorting781095
-Node: Controlling Array Traversal781795
-Ref: Controlling Array Traversal-Footnote-1790162
-Node: Array Sorting Functions790280
-Ref: Array Sorting Functions-Footnote-1795371
-Node: Two-way I/O795567
-Ref: Two-way I/O-Footnote-1801861
-Ref: Two-way I/O-Footnote-2802048
-Node: TCP/IP Networking802130
-Node: Profiling805248
-Node: Advanced Features Summary812787
-Node: Internationalization814723
-Node: I18N and L10N816203
-Node: Explaining gettext816890
-Ref: Explaining gettext-Footnote-1822782
-Ref: Explaining gettext-Footnote-2822967
-Node: Programmer i18n823132
-Ref: Programmer i18n-Footnote-1828081
-Node: Translator i18n828130
-Node: String Extraction828924
-Ref: String Extraction-Footnote-1830056
-Node: Printf Ordering830142
-Ref: Printf Ordering-Footnote-1832928
-Node: I18N Portability832992
-Ref: I18N Portability-Footnote-1835448
-Node: I18N Example835511
-Ref: I18N Example-Footnote-1838317
-Node: Gawk I18N838390
-Node: I18N Summary839035
-Node: Debugger840376
-Node: Debugging841398
-Node: Debugging Concepts841839
-Node: Debugging Terms843648
-Node: Awk Debugging846223
-Node: Sample Debugging Session847129
-Node: Debugger Invocation847663
-Node: Finding The Bug849049
-Node: List of Debugger Commands855527
-Node: Breakpoint Control856860
-Node: Debugger Execution Control860554
-Node: Viewing And Changing Data863916
-Node: Execution Stack867290
-Node: Debugger Info868927
-Node: Miscellaneous Debugger Commands872998
-Node: Readline Support878086
-Node: Limitations878982
-Node: Debugging Summary881091
-Node: Arbitrary Precision Arithmetic882264
-Node: Computer Arithmetic883680
-Ref: table-numeric-ranges887271
-Ref: Computer Arithmetic-Footnote-1887993
-Node: Math Definitions888050
-Ref: table-ieee-formats891364
-Ref: Math Definitions-Footnote-1891967
-Node: MPFR features892072
-Node: FP Math Caution893789
-Ref: FP Math Caution-Footnote-1894861
-Node: Inexactness of computations895230
-Node: Inexact representation896190
-Node: Comparing FP Values897550
-Node: Errors accumulate898632
-Node: Getting Accuracy900065
-Node: Try To Round902775
-Node: Setting precision903674
-Ref: table-predefined-precision-strings904371
-Node: Setting the rounding mode906201
-Ref: table-gawk-rounding-modes906575
-Ref: Setting the rounding mode-Footnote-1909983
-Node: Arbitrary Precision Integers910162
-Ref: Arbitrary Precision Integers-Footnote-1913146
-Node: POSIX Floating Point Problems913295
-Ref: POSIX Floating Point Problems-Footnote-1917177
-Node: Floating point summary917215
-Node: Dynamic Extensions919405
-Node: Extension Intro920958
-Node: Plugin License922224
-Node: Extension Mechanism Outline923021
-Ref: figure-load-extension923460
-Ref: figure-register-new-function925025
-Ref: figure-call-new-function926117
-Node: Extension API Description928179
-Node: Extension API Functions Introduction929627
-Node: General Data Types934439
-Ref: General Data Types-Footnote-1940394
-Node: Memory Allocation Functions940693
-Ref: Memory Allocation Functions-Footnote-1943538
-Node: Constructor Functions943637
-Node: Registration Functions945382
-Node: Extension Functions946067
-Node: Exit Callback Functions948366
-Node: Extension Version String949616
-Node: Input Parsers950279
-Node: Output Wrappers960161
-Node: Two-way processors964673
-Node: Printing Messages966938
-Ref: Printing Messages-Footnote-1968012
-Node: Updating ERRNO968165
-Node: Requesting Values968904
-Ref: table-value-types-returned969641
-Node: Accessing Parameters970524
-Node: Symbol Table Access971759
-Node: Symbol table by name972271
-Node: Symbol table by cookie974292
-Ref: Symbol table by cookie-Footnote-1978444
-Node: Cached values978508
-Ref: Cached values-Footnote-1982015
-Node: Array Manipulation982106
-Ref: Array Manipulation-Footnote-1983205
-Node: Array Data Types983242
-Ref: Array Data Types-Footnote-1985900
-Node: Array Functions985992
-Node: Flattening Arrays989850
-Node: Creating Arrays996758
-Node: Extension API Variables1001527
-Node: Extension Versioning1002163
-Ref: gawk-api-version1002600
-Node: Extension API Informational Variables1004356
-Node: Extension API Boilerplate1005420
-Node: Finding Extensions1009234
-Node: Extension Example1009793
-Node: Internal File Description1010591
-Node: Internal File Ops1014671
-Ref: Internal File Ops-Footnote-11026433
-Node: Using Internal File Ops1026573
-Ref: Using Internal File Ops-Footnote-11028956
-Node: Extension Samples1029230
-Node: Extension Sample File Functions1030759
-Node: Extension Sample Fnmatch1038408
-Node: Extension Sample Fork1039895
-Node: Extension Sample Inplace1041113
-Node: Extension Sample Ord1044323
-Node: Extension Sample Readdir1045159
-Ref: table-readdir-file-types1046048
-Node: Extension Sample Revout1046853
-Node: Extension Sample Rev2way1047442
-Node: Extension Sample Read write array1048182
-Node: Extension Sample Readfile1050124
-Node: Extension Sample Time1051219
-Node: Extension Sample API Tests1052567
-Node: gawkextlib1053059
-Node: Extension summary1055483
-Node: Extension Exercises1059175
-Node: Language History1060673
-Node: V7/SVR3.11062329
-Node: SVR41064481
-Node: POSIX1065915
-Node: BTL1067294
-Node: POSIX/GNU1068023
-Node: Feature History1073544
-Node: Common Extensions1086873
-Node: Ranges and Locales1088156
-Ref: Ranges and Locales-Footnote-11092772
-Ref: Ranges and Locales-Footnote-21092799
-Ref: Ranges and Locales-Footnote-31093034
-Node: Contributors1093255
-Node: History summary1098815
-Node: Installation1100195
-Node: Gawk Distribution1101139
-Node: Getting1101623
-Node: Extracting1102584
-Node: Distribution contents1104222
-Node: Unix Installation1109964
-Node: Quick Installation1110580
-Node: Additional Configuration Options1113007
-Node: Configuration Philosophy1114811
-Node: Non-Unix Installation1117180
-Node: PC Installation1117640
-Node: PC Binary Installation1118478
-Node: PC Compiling1118913
-Node: PC Using1120030
-Node: Cygwin1123075
-Node: MSYS1123845
-Node: VMS Installation1124346
-Node: VMS Compilation1125137
-Ref: VMS Compilation-Footnote-11126366
-Node: VMS Dynamic Extensions1126424
-Node: VMS Installation Details1128109
-Node: VMS Running1130362
-Node: VMS GNV1134641
-Node: VMS Old Gawk1135376
-Node: Bugs1135847
-Node: Bug address1136510
-Node: Usenet1138907
-Node: Maintainers1139684
-Node: Other Versions1141060
-Node: Installation summary1147644
-Node: Notes1148679
-Node: Compatibility Mode1149544
-Node: Additions1150326
-Node: Accessing The Source1151251
-Node: Adding Code1152686
-Node: New Ports1158904
-Node: Derived Files1163392
-Ref: Derived Files-Footnote-11168877
-Ref: Derived Files-Footnote-21168912
-Ref: Derived Files-Footnote-31169510
-Node: Future Extensions1169624
-Node: Implementation Limitations1170282
-Node: Extension Design1171465
-Node: Old Extension Problems1172619
-Ref: Old Extension Problems-Footnote-11174137
-Node: Extension New Mechanism Goals1174194
-Ref: Extension New Mechanism Goals-Footnote-11177558
-Node: Extension Other Design Decisions1177747
-Node: Extension Future Growth1179860
-Node: Old Extension Mechanism1180696
-Node: Notes summary1182459
-Node: Basic Concepts1183641
-Node: Basic High Level1184322
-Ref: figure-general-flow1184604
-Ref: figure-process-flow1185289
-Ref: Basic High Level-Footnote-11188590
-Node: Basic Data Typing1188775
-Node: Glossary1192103
-Node: Copying1224050
-Node: GNU Free Documentation License1261589
-Node: Index1286707
+Node: Foreword342794
+Node: Foreword447236
+Node: Preface48768
+Ref: Preface-Footnote-151627
+Ref: Preface-Footnote-251734
+Ref: Preface-Footnote-351968
+Node: History52110
+Node: Names54462
+Ref: Names-Footnote-155556
+Node: This Manual55703
+Ref: This Manual-Footnote-162188
+Node: Conventions62288
+Node: Manual History64642
+Ref: Manual History-Footnote-167637
+Ref: Manual History-Footnote-267678
+Node: How To Contribute67752
+Node: Acknowledgments68403
+Node: Getting Started73289
+Node: Running gawk75728
+Node: One-shot76918
+Node: Read Terminal78181
+Node: Long80174
+Node: Executable Scripts81687
+Ref: Executable Scripts-Footnote-184482
+Node: Comments84585
+Node: Quoting87069
+Node: DOS Quoting92586
+Node: Sample Data Files94641
+Node: Very Simple97236
+Node: Two Rules102138
+Node: More Complex104023
+Node: Statements/Lines106889
+Ref: Statements/Lines-Footnote-1111348
+Node: Other Features111613
+Node: When112549
+Ref: When-Footnote-1114303
+Node: Intro Summary114368
+Node: Invoking Gawk115252
+Node: Command Line116766
+Node: Options117564
+Ref: Options-Footnote-1133663
+Ref: Options-Footnote-2133893
+Node: Other Arguments133918
+Node: Naming Standard Input136865
+Node: Environment Variables137958
+Node: AWKPATH Variable138516
+Ref: AWKPATH Variable-Footnote-1141927
+Ref: AWKPATH Variable-Footnote-2141961
+Node: AWKLIBPATH Variable142222
+Node: Other Environment Variables143479
+Node: Exit Status147300
+Node: Include Files147977
+Node: Loading Shared Libraries151572
+Node: Obsolete153000
+Node: Undocumented153692
+Node: Invoking Summary153989
+Node: Regexp155649
+Node: Regexp Usage157103
+Node: Escape Sequences159140
+Node: Regexp Operators165372
+Ref: Regexp Operators-Footnote-1172788
+Ref: Regexp Operators-Footnote-2172935
+Node: Bracket Expressions173033
+Ref: table-char-classes175509
+Node: Leftmost Longest178646
+Node: Computed Regexps179949
+Node: GNU Regexp Operators183376
+Node: Case-sensitivity187055
+Ref: Case-sensitivity-Footnote-1189942
+Ref: Case-sensitivity-Footnote-2190177
+Node: Regexp Summary190285
+Node: Reading Files191751
+Node: Records193914
+Node: awk split records194647
+Node: gawk split records199578
+Ref: gawk split records-Footnote-1204118
+Node: Fields204155
+Node: Nonconstant Fields206896
+Ref: Nonconstant Fields-Footnote-1209132
+Node: Changing Fields209336
+Node: Field Separators215264
+Node: Default Field Splitting217962
+Node: Regexp Field Splitting219080
+Node: Single Character Fields222433
+Node: Command Line Field Separator223493
+Node: Full Line Fields226711
+Ref: Full Line Fields-Footnote-1228233
+Ref: Full Line Fields-Footnote-2228279
+Node: Field Splitting Summary228380
+Node: Constant Size230454
+Node: Splitting By Content235032
+Ref: Splitting By Content-Footnote-1239003
+Node: Multiple Line239166
+Ref: Multiple Line-Footnote-1245048
+Node: Getline245227
+Node: Plain Getline247694
+Node: Getline/Variable250333
+Node: Getline/File251482
+Node: Getline/Variable/File252868
+Ref: Getline/Variable/File-Footnote-1254471
+Node: Getline/Pipe254559
+Node: Getline/Variable/Pipe257264
+Node: Getline/Coprocess258397
+Node: Getline/Variable/Coprocess259662
+Node: Getline Notes260402
+Node: Getline Summary263197
+Ref: table-getline-variants263619
+Node: Read Timeout264367
+Ref: Read Timeout-Footnote-1268273
+Node: Retrying Input268331
+Node: Command-line directories269530
+Node: Input Summary270436
+Node: Input Exercises273608
+Node: Printing274336
+Node: Print276170
+Node: Print Examples277627
+Node: Output Separators280407
+Node: OFMT282424
+Node: Printf283780
+Node: Basic Printf284565
+Node: Control Letters286139
+Node: Format Modifiers290127
+Node: Printf Examples296142
+Node: Redirection298628
+Node: Special FD305469
+Ref: Special FD-Footnote-1308637
+Node: Special Files308711
+Node: Other Inherited Files309328
+Node: Special Network310329
+Node: Special Caveats311189
+Node: Close Files And Pipes312138
+Ref: table-close-pipe-return-values319045
+Ref: Close Files And Pipes-Footnote-1319828
+Ref: Close Files And Pipes-Footnote-2319976
+Node: Nonfatal320128
+Node: Output Summary322453
+Node: Output Exercises323675
+Node: Expressions324354
+Node: Values325542
+Node: Constants326220
+Node: Scalar Constants326911
+Ref: Scalar Constants-Footnote-1327775
+Node: Nondecimal-numbers328025
+Node: Regexp Constants331026
+Node: Using Constant Regexps331552
+Node: Standard Regexp Constants332174
+Node: Strong Regexp Constants335362
+Node: Variables338320
+Node: Using Variables338977
+Node: Assignment Options340887
+Node: Conversion342760
+Node: Strings And Numbers343284
+Ref: Strings And Numbers-Footnote-1346347
+Node: Locale influences conversions346456
+Ref: table-locale-affects349214
+Node: All Operators349832
+Node: Arithmetic Ops350461
+Node: Concatenation352967
+Ref: Concatenation-Footnote-1355814
+Node: Assignment Ops355921
+Ref: table-assign-ops360912
+Node: Increment Ops362225
+Node: Truth Values and Conditions365685
+Node: Truth Values366759
+Node: Typing and Comparison367807
+Node: Variable Typing368627
+Ref: Variable Typing-Footnote-1375090
+Ref: Variable Typing-Footnote-2375162
+Node: Comparison Operators375239
+Ref: table-relational-ops375658
+Node: POSIX String Comparison379153
+Ref: POSIX String Comparison-Footnote-1380848
+Ref: POSIX String Comparison-Footnote-2380987
+Node: Boolean Ops381071
+Ref: Boolean Ops-Footnote-1385553
+Node: Conditional Exp385645
+Node: Function Calls387381
+Node: Precedence391258
+Node: Locales394917
+Node: Expressions Summary396549
+Node: Patterns and Actions399122
+Node: Pattern Overview400242
+Node: Regexp Patterns401919
+Node: Expression Patterns402461
+Node: Ranges406242
+Node: BEGIN/END409350
+Node: Using BEGIN/END410111
+Ref: Using BEGIN/END-Footnote-1412847
+Node: I/O And BEGIN/END412953
+Node: BEGINFILE/ENDFILE415267
+Node: Empty418174
+Node: Using Shell Variables418491
+Node: Action Overview420765
+Node: Statements423090
+Node: If Statement424938
+Node: While Statement426433
+Node: Do Statement428461
+Node: For Statement429609
+Node: Switch Statement432767
+Node: Break Statement435153
+Node: Continue Statement437245
+Node: Next Statement439072
+Node: Nextfile Statement441455
+Node: Exit Statement444107
+Node: Built-in Variables446510
+Node: User-modified447643
+Node: Auto-set455229
+Ref: Auto-set-Footnote-1469882
+Ref: Auto-set-Footnote-2470088
+Node: ARGC and ARGV470144
+Node: Pattern Action Summary474357
+Node: Arrays476787
+Node: Array Basics478116
+Node: Array Intro478960
+Ref: figure-array-elements480935
+Ref: Array Intro-Footnote-1483639
+Node: Reference to Elements483767
+Node: Assigning Elements486231
+Node: Array Example486722
+Node: Scanning an Array488481
+Node: Controlling Scanning491503
+Ref: Controlling Scanning-Footnote-1496902
+Node: Numeric Array Subscripts497218
+Node: Uninitialized Subscripts499402
+Node: Delete501021
+Ref: Delete-Footnote-1503773
+Node: Multidimensional503830
+Node: Multiscanning506925
+Node: Arrays of Arrays508516
+Node: Arrays Summary513283
+Node: Functions515376
+Node: Built-in516414
+Node: Calling Built-in517495
+Node: Numeric Functions519491
+Ref: Numeric Functions-Footnote-1524324
+Ref: Numeric Functions-Footnote-2524681
+Ref: Numeric Functions-Footnote-3524729
+Node: String Functions525001
+Ref: String Functions-Footnote-1548505
+Ref: String Functions-Footnote-2548633
+Ref: String Functions-Footnote-3548881
+Node: Gory Details548968
+Ref: table-sub-escapes550759
+Ref: table-sub-proposed552278
+Ref: table-posix-sub553641
+Ref: table-gensub-escapes555182
+Ref: Gory Details-Footnote-1556005
+Node: I/O Functions556159
+Ref: table-system-return-values562741
+Ref: I/O Functions-Footnote-1564721
+Ref: I/O Functions-Footnote-2564869
+Node: Time Functions564989
+Ref: Time Functions-Footnote-1575656
+Ref: Time Functions-Footnote-2575724
+Ref: Time Functions-Footnote-3575882
+Ref: Time Functions-Footnote-4575993
+Ref: Time Functions-Footnote-5576105
+Ref: Time Functions-Footnote-6576332
+Node: Bitwise Functions576598
+Ref: table-bitwise-ops577192
+Ref: Bitwise Functions-Footnote-1583225
+Ref: Bitwise Functions-Footnote-2583398
+Node: Type Functions583589
+Node: I18N Functions586264
+Node: User-defined587915
+Node: Definition Syntax588720
+Ref: Definition Syntax-Footnote-1594407
+Node: Function Example594478
+Ref: Function Example-Footnote-1597400
+Node: Function Caveats597422
+Node: Calling A Function597940
+Node: Variable Scope598898
+Node: Pass By Value/Reference601892
+Node: Return Statement605391
+Node: Dynamic Typing608370
+Node: Indirect Calls609300
+Ref: Indirect Calls-Footnote-1619551
+Node: Functions Summary619679
+Node: Library Functions622384
+Ref: Library Functions-Footnote-1625991
+Ref: Library Functions-Footnote-2626134
+Node: Library Names626305
+Ref: Library Names-Footnote-1629765
+Ref: Library Names-Footnote-2629988
+Node: General Functions630074
+Node: Strtonum Function631177
+Node: Assert Function634199
+Node: Round Function637525
+Node: Cliff Random Function639066
+Node: Ordinal Functions640082
+Ref: Ordinal Functions-Footnote-1643145
+Ref: Ordinal Functions-Footnote-2643397
+Node: Join Function643607
+Ref: Join Function-Footnote-1645377
+Node: Getlocaltime Function645577
+Node: Readfile Function649319
+Node: Shell Quoting651291
+Node: Data File Management652692
+Node: Filetrans Function653324
+Node: Rewind Function657420
+Node: File Checking659326
+Ref: File Checking-Footnote-1660660
+Node: Empty Files660861
+Node: Ignoring Assigns662840
+Node: Getopt Function664390
+Ref: Getopt Function-Footnote-1675859
+Node: Passwd Functions676059
+Ref: Passwd Functions-Footnote-1684898
+Node: Group Functions684986
+Ref: Group Functions-Footnote-1692884
+Node: Walking Arrays693091
+Node: Library Functions Summary696099
+Node: Library Exercises697505
+Node: Sample Programs697970
+Node: Running Examples698740
+Node: Clones699468
+Node: Cut Program700692
+Node: Egrep Program710621
+Ref: Egrep Program-Footnote-1718133
+Node: Id Program718243
+Node: Split Program721923
+Ref: Split Program-Footnote-1725382
+Node: Tee Program725511
+Node: Uniq Program728301
+Node: Wc Program735727
+Ref: Wc Program-Footnote-1739982
+Node: Miscellaneous Programs740076
+Node: Dupword Program741289
+Node: Alarm Program743319
+Node: Translate Program748174
+Ref: Translate Program-Footnote-1752739
+Node: Labels Program753009
+Ref: Labels Program-Footnote-1756360
+Node: Word Sorting756444
+Node: History Sorting760516
+Node: Extract Program762351
+Node: Simple Sed769880
+Node: Igawk Program772954
+Ref: Igawk Program-Footnote-1787285
+Ref: Igawk Program-Footnote-2787487
+Ref: Igawk Program-Footnote-3787609
+Node: Anagram Program787724
+Node: Signature Program790786
+Node: Programs Summary792033
+Node: Programs Exercises793247
+Ref: Programs Exercises-Footnote-1797376
+Node: Advanced Features797467
+Node: Nondecimal Data799457
+Node: Array Sorting801048
+Node: Controlling Array Traversal801748
+Ref: Controlling Array Traversal-Footnote-1810115
+Node: Array Sorting Functions810233
+Ref: Array Sorting Functions-Footnote-1815324
+Node: Two-way I/O815520
+Ref: Two-way I/O-Footnote-1822071
+Ref: Two-way I/O-Footnote-2822258
+Node: TCP/IP Networking822340
+Node: Profiling825458
+Ref: Profiling-Footnote-1834130
+Node: Advanced Features Summary834453
+Node: Internationalization836297
+Node: I18N and L10N837777
+Node: Explaining gettext838464
+Ref: Explaining gettext-Footnote-1844356
+Ref: Explaining gettext-Footnote-2844541
+Node: Programmer i18n844706
+Ref: Programmer i18n-Footnote-1849655
+Node: Translator i18n849704
+Node: String Extraction850498
+Ref: String Extraction-Footnote-1851630
+Node: Printf Ordering851716
+Ref: Printf Ordering-Footnote-1854502
+Node: I18N Portability854566
+Ref: I18N Portability-Footnote-1857022
+Node: I18N Example857085
+Ref: I18N Example-Footnote-1859891
+Node: Gawk I18N859964
+Node: I18N Summary860609
+Node: Debugger861950
+Node: Debugging862972
+Node: Debugging Concepts863413
+Node: Debugging Terms865222
+Node: Awk Debugging867797
+Node: Sample Debugging Session868703
+Node: Debugger Invocation869237
+Node: Finding The Bug870623
+Node: List of Debugger Commands877101
+Node: Breakpoint Control878434
+Node: Debugger Execution Control882128
+Node: Viewing And Changing Data885490
+Node: Execution Stack888864
+Node: Debugger Info890501
+Node: Miscellaneous Debugger Commands894572
+Node: Readline Support899660
+Node: Limitations900556
+Node: Debugging Summary902665
+Node: Arbitrary Precision Arithmetic903944
+Node: Computer Arithmetic905360
+Ref: table-numeric-ranges908951
+Ref: Computer Arithmetic-Footnote-1909673
+Node: Math Definitions909730
+Ref: table-ieee-formats913044
+Ref: Math Definitions-Footnote-1913647
+Node: MPFR features913752
+Node: FP Math Caution915469
+Ref: FP Math Caution-Footnote-1916541
+Node: Inexactness of computations916910
+Node: Inexact representation917870
+Node: Comparing FP Values919230
+Node: Errors accumulate920312
+Node: Getting Accuracy921745
+Node: Try To Round924455
+Node: Setting precision925354
+Ref: table-predefined-precision-strings926051
+Node: Setting the rounding mode927881
+Ref: table-gawk-rounding-modes928255
+Ref: Setting the rounding mode-Footnote-1931663
+Node: Arbitrary Precision Integers931842
+Ref: Arbitrary Precision Integers-Footnote-1936759
+Node: POSIX Floating Point Problems936908
+Ref: POSIX Floating Point Problems-Footnote-1940790
+Node: Floating point summary940828
+Node: Dynamic Extensions943018
+Node: Extension Intro944571
+Node: Plugin License945837
+Node: Extension Mechanism Outline946634
+Ref: figure-load-extension947073
+Ref: figure-register-new-function948638
+Ref: figure-call-new-function949730
+Node: Extension API Description951792
+Node: Extension API Functions Introduction953434
+Node: General Data Types958768
+Ref: General Data Types-Footnote-1965973
+Node: Memory Allocation Functions966272
+Ref: Memory Allocation Functions-Footnote-1969117
+Node: Constructor Functions969216
+Node: Registration Functions972215
+Node: Extension Functions972900
+Node: Exit Callback Functions978113
+Node: Extension Version String979363
+Node: Input Parsers980026
+Node: Output Wrappers989908
+Node: Two-way processors994420
+Node: Printing Messages996685
+Ref: Printing Messages-Footnote-1997856
+Node: Updating ERRNO998009
+Node: Requesting Values998748
+Ref: table-value-types-returned999485
+Node: Accessing Parameters1000421
+Node: Symbol Table Access1001656
+Node: Symbol table by name1002168
+Node: Symbol table by cookie1003957
+Ref: Symbol table by cookie-Footnote-11008142
+Node: Cached values1008206
+Ref: Cached values-Footnote-11011742
+Node: Array Manipulation1011833
+Ref: Array Manipulation-Footnote-11012924
+Node: Array Data Types1012961
+Ref: Array Data Types-Footnote-11015619
+Node: Array Functions1015711
+Node: Flattening Arrays1020110
+Node: Creating Arrays1027051
+Node: Redirection API1031820
+Node: Extension API Variables1034662
+Node: Extension Versioning1035295
+Ref: gawk-api-version1035732
+Node: Extension API Informational Variables1037460
+Node: Extension API Boilerplate1038524
+Node: Changes from API V11042386
+Node: Finding Extensions1043046
+Node: Extension Example1043605
+Node: Internal File Description1044403
+Node: Internal File Ops1048483
+Ref: Internal File Ops-Footnote-11059883
+Node: Using Internal File Ops1060023
+Ref: Using Internal File Ops-Footnote-11062406
+Node: Extension Samples1062680
+Node: Extension Sample File Functions1064209
+Node: Extension Sample Fnmatch1071858
+Node: Extension Sample Fork1073345
+Node: Extension Sample Inplace1074563
+Node: Extension Sample Ord1077773
+Node: Extension Sample Readdir1078609
+Ref: table-readdir-file-types1079498
+Node: Extension Sample Revout1080303
+Node: Extension Sample Rev2way1080892
+Node: Extension Sample Read write array1081632
+Node: Extension Sample Readfile1083574
+Node: Extension Sample Time1084669
+Node: Extension Sample API Tests1086017
+Node: gawkextlib1086509
+Node: Extension summary1088956
+Node: Extension Exercises1092658
+Node: Language History1094156
+Node: V7/SVR3.11095812
+Node: SVR41097964
+Node: POSIX1099398
+Node: BTL1100777
+Node: POSIX/GNU1101506
+Node: Feature History1107398
+Node: Common Extensions1121768
+Node: Ranges and Locales1123051
+Ref: Ranges and Locales-Footnote-11127667
+Ref: Ranges and Locales-Footnote-21127694
+Ref: Ranges and Locales-Footnote-31127929
+Node: Contributors1128150
+Node: History summary1133710
+Node: Installation1135090
+Node: Gawk Distribution1136034
+Node: Getting1136518
+Node: Extracting1137479
+Node: Distribution contents1139117
+Node: Unix Installation1145459
+Node: Quick Installation1146141
+Node: Shell Startup Files1148555
+Node: Additional Configuration Options1149644
+Node: Configuration Philosophy1151449
+Node: Non-Unix Installation1153818
+Node: PC Installation1154278
+Node: PC Binary Installation1155116
+Node: PC Compiling1155551
+Node: PC Using1156668
+Node: Cygwin1159713
+Node: MSYS1160483
+Node: VMS Installation1160984
+Node: VMS Compilation1161775
+Ref: VMS Compilation-Footnote-11163004
+Node: VMS Dynamic Extensions1163062
+Node: VMS Installation Details1164747
+Node: VMS Running1167000
+Node: VMS GNV1171279
+Node: VMS Old Gawk1172014
+Node: Bugs1172485
+Node: Bug address1173148
+Node: Usenet1175545
+Node: Maintainers1176322
+Node: Other Versions1177698
+Node: Installation summary1184282
+Node: Notes1185317
+Node: Compatibility Mode1186182
+Node: Additions1186964
+Node: Accessing The Source1187889
+Node: Adding Code1189324
+Node: New Ports1195542
+Node: Derived Files1200030
+Ref: Derived Files-Footnote-11205515
+Ref: Derived Files-Footnote-21205550
+Ref: Derived Files-Footnote-31206148
+Node: Future Extensions1206262
+Node: Implementation Limitations1206920
+Node: Extension Design1208103
+Node: Old Extension Problems1209257
+Ref: Old Extension Problems-Footnote-11210775
+Node: Extension New Mechanism Goals1210832
+Ref: Extension New Mechanism Goals-Footnote-11214196
+Node: Extension Other Design Decisions1214385
+Node: Extension Future Growth1216498
+Node: Old Extension Mechanism1217334
+Node: Notes summary1219097
+Node: Basic Concepts1220279
+Node: Basic High Level1220960
+Ref: figure-general-flow1221242
+Ref: figure-process-flow1221927
+Ref: Basic High Level-Footnote-11225228
+Node: Basic Data Typing1225413
+Node: Glossary1228741
+Node: Copying1260688
+Node: GNU Free Documentation License1298227
+Node: Index1323345

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 5a5a395d..658ac17e 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -49,6 +49,14 @@
@set MINUS
@end ifdocbook
+@iftex
+@set TIMES @times
+@end iftex
+@ifnottex
+@set TIMES *
+@end ifnottex
+
+
@set xref-automatic-section-title
@c The following information should be updated here only!
@@ -581,6 +589,7 @@ particular records in a file and perform operations upon them.
@code{getline}.
* Getline Summary:: Summary of @code{getline} Variants.
* Read Timeout:: Reading input with a timeout.
+* Retrying Input:: Retrying input after certain errors.
* Command-line directories:: What happens if you put a directory on
the command line.
* Input Summary:: Input summary.
@@ -610,6 +619,7 @@ particular records in a file and perform operations upon them.
* Special Caveats:: Things to watch out for.
* Close Files And Pipes:: Closing Input and Output Files and
Pipes.
+* Nonfatal:: Enabling Nonfatal Output.
* Output Summary:: Output summary.
* Output Exercises:: Exercises.
* Values:: Constants, Variables, and Regular
@@ -619,6 +629,9 @@ particular records in a file and perform operations upon them.
* Nondecimal-numbers:: What are octal and hex numbers.
* Regexp Constants:: Regular Expression constants.
* Using Constant Regexps:: When and how to use a regexp constant.
+* Standard Regexp Constants:: Regexp constants in standard
+ @command{awk}.
+* Strong Regexp Constants:: Strongly typed regexp constants.
* Variables:: Variables give names to values for
later use.
* Using Variables:: Using variables in your programs.
@@ -921,11 +934,14 @@ particular records in a file and perform operations upon them.
* Array Functions:: Functions for working with arrays.
* Flattening Arrays:: How to flatten arrays.
* Creating Arrays:: How to create and populate arrays.
+* Redirection API:: How to access and manipulate
+ redirections.
* Extension API Variables:: Variables provided by the API.
* Extension Versioning:: API Version information.
* Extension API Informational Variables:: Variables providing information about
@command{gawk}'s invocation.
* Extension API Boilerplate:: Boilerplate code for using the API.
+* Changes from API V1:: Changes from V1 of the API.
* Finding Extensions:: How @command{gawk} finds compiled
extensions.
* Extension Example:: Example C code for an extension.
@@ -979,14 +995,16 @@ particular records in a file and perform operations upon them.
* Unix Installation:: Installing @command{gawk} under
various versions of Unix.
* Quick Installation:: Compiling @command{gawk} under Unix.
+* Shell Startup Files:: Shell convenience functions.
* Additional Configuration Options:: Other compile-time options.
* Configuration Philosophy:: How it's all supposed to work.
* Non-Unix Installation:: Installation on Other Operating
Systems.
-* PC Installation:: Installing and Compiling @command{gawk} on
- Microsoft Windows.
+* PC Installation:: Installing and Compiling
+ @command{gawk} on Microsoft Windows.
* PC Binary Installation:: Installing a prepared distribution.
-* PC Compiling:: Compiling @command{gawk} for Windows32.
+* PC Compiling:: Compiling @command{gawk} for
+ Windows32.
* PC Using:: Running @command{gawk} on Windows32.
* Cygwin:: Building and running @command{gawk}
for Cygwin.
@@ -3022,14 +3040,59 @@ it is worth addressing.
@cindex Brink, Jeroen
The ``shells'' on Microsoft Windows systems use the double-quote
character for quoting, and make it difficult or impossible to include an
-escaped double-quote character in a command-line script.
-The following example, courtesy of Jeroen Brink, shows
-how to print all lines in a file surrounded by double quotes:
+escaped double-quote character in a command-line script. The following
+example, courtesy of Jeroen Brink, shows how to escape the double quotes
+from this one liner script that prints all lines in a file surrounded by
+double quotes:
+
+@example
+@{ print "\"" $0 "\"" @}
+@end example
+
+@noindent
+In an MS-Windows command-line the one-liner script above may be passed as
+follows:
@example
gawk "@{ print \"\042\" $0 \"\042\" @}" @var{file}
@end example
+In this example the @samp{\042} is the octal code for a double-quote;
+@command{gawk} converts it into a real double-quote for output by
+the @code{print} statement.
+
+In MS-Windows escaping double-quotes is a little tricky because you use
+backslashes to escape double-quotes, but backslashes themselves are not
+escaped in the usual way; indeed they are either duplicated or not,
+depending upon whether there is a subsequent double-quote. The MS-Windows
+rule for double-quoting a string is the following:
+
+@enumerate
+@item
+For each double quote in the orginal string, let @var{N} be the number
+of backslash(es) before it, @var{N} might be zero. Replace these @var{N}
+backslash(es) by @math{2@value{TIMES}@var{N}+1} backslash(es)
+
+@item
+Let @var{N} be the number of backslash(es) tailing the original string,
+@var{N} might be zero. Replace these @var{N} backslash(es) by
+@math{2@value{TIMES}@var{N}} backslash(es)
+
+@item
+Surround the resulting string by double-quotes.
+@end enumerate
+
+So to double-quote the one-liner script @samp{@{ print "\"" $0 "\"" @}}
+from the previous example you would do it this way:
+
+@example
+gawk "@{ print \"\\\"\" $0 \"\\\"\" @}" @var{file}
+@end example
+
+@noindent
+However, the use of @samp{\042} instead of @samp{\\\"} is also possible
+and easier to read, because backslashes that are not followed by a
+double-quote don't need duplication.
@node Sample Data Files
@section @value{DDF}s for the Examples
@@ -4049,6 +4112,7 @@ when parsing numeric input data (@pxref{Locales}).
@cindex @option{-o} option
@cindex @option{--pretty-print} option
Enable pretty-printing of @command{awk} programs.
+Implies @option{--no-optimize}.
By default, the output program is created in a file named @file{awkprof.out}
(@pxref{Profiling}).
The optional @var{file} argument allows you to specify a different
@@ -4057,18 +4121,22 @@ No space is allowed between the @option{-o} and @var{file}, if
@var{file} is supplied.
@quotation NOTE
-Due to the way @command{gawk} has evolved, with this option
-your program still executes. This will change in the
-next major release, such that @command{gawk} will only
-pretty-print the program and not run it.
+In the past, this option would also execute your program.
+This is no longer the case.
@end quotation
@item @option{-O}
@itemx @option{--optimize}
@cindex @option{--optimize} option
@cindex @option{-O} option
-Enable some optimizations on the internal representation of the program.
-At the moment, this includes just simple constant folding.
+Enable @command{gawk}'s default optimizations on the internal
+representation of the program. At the moment, this includes simple
+constant folding and tail recursion elimination in function calls.
+
+These optimizations are enabled by default.
+This option remains primarily for backwards compatibility. However, it may
+be used to cancel the effect of an earlier @option{-s} option
+(see later in this list).
@item @option{-p}[@var{file}]
@itemx @option{--profile}[@code{=}@var{file}]
@@ -4077,6 +4145,7 @@ At the moment, this includes just simple constant folding.
@cindex @command{awk} profiling, enabling
Enable profiling of @command{awk} programs
(@pxref{Profiling}).
+Implies @option{--no-optimize}.
By default, profiles are created in a file named @file{awkprof.out}.
The optional @var{file} argument allows you to specify a different
@value{FN} for the profile file.
@@ -4106,11 +4175,6 @@ restrictions apply:
@cindex newlines
@cindex whitespace, newlines as
@item
-Newlines do not act as whitespace to separate fields when @code{FS} is
-equal to a single space
-(@pxref{Fields}).
-
-@item
Newlines are not allowed after @samp{?} or @samp{:}
(@pxref{Conditional Exp}).
@@ -4148,6 +4212,13 @@ This is now @command{gawk}'s default behavior.
Nevertheless, this option remains (both for backward compatibility
and for use in combination with @option{--traditional}).
+@item @option{-s}
+@itemx @option{--no-optimize}
+@cindex @option{--no-optimize} option
+@cindex @option{-s} option
+Disable @command{gawk}'s default optimizations on the internal
+representation of the program.
+
@item @option{-S}
@itemx @option{--sandbox}
@cindex @option{-S} option
@@ -4461,6 +4532,9 @@ searches first in the current directory and then in @file{/usr/local/share/awk}.
In practice, this means that you will rarely need to change the
value of @env{AWKPATH}.
+@xref{Shell Startup Files}, for information on functions that help to
+manipulate the @env{AWKPATH} variable.
+
@command{gawk} places the value of the search path that it used into
@code{ENVIRON["AWKPATH"]}. This provides access to the actual search
path value from within an @command{awk} program.
@@ -4492,6 +4566,9 @@ an empty value, @command{gawk} uses a default path; this
is typically @samp{/usr/local/lib/gawk}, although it can vary depending
upon how @command{gawk} was built.
+@xref{Shell Startup Files}, for information on functions that help to
+manipulate the @env{AWKLIBPATH} variable.
+
@command{gawk} places the value of the search path that it used into
@code{ENVIRON["AWKLIBPATH"]}. This provides access to the actual search
path value from within an @command{awk} program.
@@ -4519,6 +4596,8 @@ wait for input before returning with an error.
Controls the number of times @command{gawk} attempts to
retry a two-way TCP/IP (socket) connection before giving up.
@xref{TCP/IP Networking}.
+Note that when nonfatal I/O is enabled (@pxref{Nonfatal}),
+@command{gawk} only tries to open a TCP/IP socket once.
@item POSIXLY_CORRECT
Causes @command{gawk} to switch to POSIX-compatibility
@@ -4573,14 +4652,6 @@ two regexp matchers that @command{gawk} uses internally. (There aren't
supposed to be differences, but occasionally theory and practice don't
coordinate with each other.)
-@item GAWK_NO_PP_RUN
-When @command{gawk} is invoked with the @option{--pretty-print} option,
-it will not run the program if this environment variable exists.
-
-@quotation CAUTION
-This variable will not survive into the next major release.
-@end quotation
-
@item GAWK_STACKSIZE
This specifies the amount by which @command{gawk} should grow its
internal evaluation stack, when needed.
@@ -4878,6 +4949,32 @@ Similarly, you may use @code{print} or @code{printf} statements in the
@var{init} and @var{increment} parts of a @code{for} loop. This is another
long-undocumented ``feature'' of Unix @command{awk}.
+@command{gawk} lets you use the names of built-in functions that are
+@command{gawk} extensions as the names of parameters in user-defined functions.
+This is intended to ``future-proof'' old code that happens to use
+function names added by @command{gawk} after the code was written.
+Standard @command{awk} built-in functions, such as @code{sin()} or
+@code{substr()} are @emph{not} shadowed in this way.
+
+The @code{PROCINFO["argv"]} array contains all of the command-line arguments
+(after glob expansion and redirection processing on platforms where that must
+be done manually by the program) with subscripts ranging from 0 through
+@code{argc} @minus{} 1. For example, @code{PROCINFO["argv"][0]} will contain
+the name by which @command{gawk} was invoked. Here is an example of how this
+feature may be used:
+
+@example
+awk '
+BEGIN @{
+ for (i = 0; i < length(PROCINFO["argv"]); i++)
+ print i, PROCINFO["argv"][i]
+@}'
+@end example
+
+Please note that this differs from the standard @code{ARGV} array which does
+not include command-line arguments that have already been processed by
+@command{gawk} (@pxref{ARGC and ARGV}).
+
@end ignore
@node Invoking Summary
@@ -5160,17 +5257,21 @@ between @samp{0} and @samp{7}. For example, the code for the ASCII ESC
@item \x@var{hh}@dots{}
The hexadecimal value @var{hh}, where @var{hh} stands for a sequence
of hexadecimal digits (@samp{0}--@samp{9}, and either @samp{A}--@samp{F}
-or @samp{a}--@samp{f}). Like the same construct
-in ISO C, the escape sequence continues until the first nonhexadecimal
-digit is seen. @value{COMMONEXT}
-However, using more than two hexadecimal digits produces
-undefined results. (The @samp{\x} escape sequence is not allowed in
-POSIX @command{awk}.)
+or @samp{a}--@samp{f}). A maximum of two digts are allowed after
+the @samp{\x}. Any further hexadecimal digits are treated as simple
+letters or numbers. @value{COMMONEXT}
+(The @samp{\x} escape sequence is not allowed in POSIX awk.)
@quotation CAUTION
-The next major release of @command{gawk} will change, such
-that a maximum of two hexadecimal digits following the
-@samp{\x} will be used.
+In ISO C, the escape sequence continues until the first nonhexadecimal
+digit is seen.
+For many years, @command{gawk} would continue incorporating
+hexadecimal digits into the value until a non-hexadecimal digit
+or the end of the string was encountered.
+However, using more than two hexadecimal digits produced
+undefined results.
+As of @value{PVERSION} 4.2, only two digits
+are processed.
@end quotation
@cindex @code{\} (backslash), @code{\/} escape sequence
@@ -6316,6 +6417,7 @@ used with it do not have to be named on the @command{awk} command line
* Getline:: Reading files under explicit program control
using the @code{getline} function.
* Read Timeout:: Reading input with a timeout.
+* Retrying Input:: Retrying input after certain errors.
* Command-line directories:: What happens if you put a directory on the
command line.
* Input Summary:: Input summary.
@@ -6690,16 +6792,12 @@ Readfile} for another option.
@cindex fields
@cindex accessing fields
@cindex fields, examining
-@cindex POSIX @command{awk}, field separators and
-@cindex field separators, POSIX and
-@cindex separators, field, POSIX and
When @command{awk} reads an input record, the record is
automatically @dfn{parsed} or separated by the @command{awk} utility into chunks
called @dfn{fields}. By default, fields are separated by @dfn{whitespace},
like words in a line.
Whitespace in @command{awk} means any string of one or more spaces,
-TABs, or newlines;@footnote{In POSIX @command{awk}, newlines are not
-considered whitespace for separating fields.} other characters
+TABs, or newlines; other characters
that are considered whitespace by other languages
(such as formfeed, vertical tab, etc.) are @emph{not} considered
whitespace by @command{awk}.
@@ -7144,7 +7242,6 @@ can massage it first with a separate @command{awk} program.)
@node Default Field Splitting
@subsection Whitespace Normally Separates Fields
-@cindex newlines, as field separators
@cindex whitespace, as field separators
Fields are normally separated by whitespace sequences
(spaces, TABs, and newlines), not by single spaces. Two spaces in a row do not
@@ -8107,6 +8204,13 @@ a record, such as a file that cannot be opened, then @code{getline}
returns @minus{}1. In this case, @command{gawk} sets the variable
@code{ERRNO} to a string describing the error that occurred.
+If @code{ERRNO} indicates that the I/O operation may be
+retried, and @code{PROCINFO["@var{input}", "RETRY"]} is set,
+then @code{getline} returns @minus{}2
+instead of @minus{}1, and further calls to @code{getline}
+may be attempted. @xref{Retrying Input} for further information about
+this feature.
+
In the following examples, @var{command} stands for a string value that
represents a shell command.
@@ -8761,7 +8865,8 @@ on a per-command or per-connection basis.
the attempt to read from the underlying device may
succeed in a later attempt. This is a limitation, and it also
means that you cannot use this to multiplex input from
-two or more sources.
+two or more sources. @xref{Retrying Input} for a way to enable
+later I/O attempts to succeed.
Assigning a timeout value prevents read operations from
blocking indefinitely. But bear in mind that there are other ways
@@ -8771,6 +8876,36 @@ a connection before it can start reading any data,
or the attempt to open a FIFO special file for reading can block
indefinitely until some other process opens it for writing.
+@node Retrying Input
+@section Retrying Reads After Certain Input Errors
+@cindex retrying input
+
+@cindex differences in @command{awk} and @command{gawk}, retrying input
+This @value{SECTION} describes a feature that is specific to @command{gawk}.
+
+When @command{gawk} encounters an error while reading input, by
+default @code{getline} returns @minus{}1, and subsequent attempts to
+read from that file result in an end-of-file indication. However, you
+may optionally instruct @command{gawk} to allow I/O to be retried when
+certain errors are encountered by setting a special element in
+the @code{PROCINFO} array (@pxref{Auto-set}):
+
+@example
+PROCINFO["@var{input_name}", "RETRY"] = 1
+@end example
+
+When this element exists, @command{gawk} checks the value of the system
+(C language)
+@code{errno} variable when an I/O error occurs. If @code{errno} indicates
+a subsequent I/O attempt may succeed, @code{getline} instead returns
+@minus{}2 and
+further calls to @code{getline} may succeed. This applies to the @code{errno}
+values @code{EAGAIN}, @code{EWOULDBLOCK}, @code{EINTR}, or @code{ETIMEDOUT}.
+
+This feature is useful in conjunction with
+@code{PROCINFO["@var{input_name}", "READ_TIMEOUT"]} or situations where a file
+descriptor has been configured to behave in a non-blocking fashion.
+
@node Command-line directories
@section Directories on the Command Line
@cindex differences in @command{awk} and @command{gawk}, command-line directories
@@ -8932,6 +9067,7 @@ and discusses the @code{close()} built-in function.
@command{gawk} allows access to inherited file
descriptors.
* Close Files And Pipes:: Closing Input and Output Files and Pipes.
+* Nonfatal:: Enabling Nonfatal Output.
* Output Summary:: Output summary.
* Output Exercises:: Exercises.
@end menu
@@ -10358,17 +10494,26 @@ a system problem closing the file or process.
In these cases, @command{gawk} sets the predefined variable
@code{ERRNO} to a string describing the problem.
-In @command{gawk},
-when closing a pipe or coprocess (input or output),
-the return value is the exit status of the command.@footnote{
-This is a full 16-bit value as returned by the @code{wait()}
-system call. See the system manual pages for information on
-how to decode this value.}
-Otherwise, it is the return value from the system's @code{close()} or
-@code{fclose()} C functions when closing input or output
-files, respectively.
-This value is zero if the close succeeds, or @minus{}1 if
-it fails.
+In @command{gawk}, starting with @value{PVERSION} 4.2, when closing a pipe or
+coprocess (input or output), the return value is the exit status of the
+command, as described in @ref{table-close-pipe-return-values}.@footnote{Prior
+to @value{PVERSION} 4.2, the return value from closing a pipe or co-process
+was the full 16-bit exit value as defined by the @code{wait()} system
+call.} Otherwise, it is the return value from the system's @code{close()}
+or @code{fclose()} C functions when closing input or output files,
+respectively. This value is zero if the close succeeds, or @minus{}1
+if it fails.
+
+@float Table,table-close-pipe-return-values
+@caption{Return values from @code{close()} of a pipe}
+@multitable @columnfractions .40 .60
+@headitem Situation @tab Return value from @code{close()}
+@item Normal exit of command @tab Command's exit status
+@item Death by signal of command @tab 256 + number of murderous signal
+@item Death by signal of command with core dump @tab 512 + number of murderous signal
+@item Some kind of error @tab @minus{}1
+@end multitable
+@end float
The POSIX standard is very vague; it says that @code{close()}
returns zero on success and a nonzero value otherwise. In general,
@@ -10415,17 +10560,26 @@ a system problem closing the file or process.
In these cases, @command{gawk} sets the predefined variable
@code{ERRNO} to a string describing the problem.
-In @command{gawk},
-when closing a pipe or coprocess (input or output),
-the return value is the exit status of the command.@footnote{
-This is a full 16-bit value as returned by the @code{wait()}
-system call. See the system manual pages for information on
-how to decode this value.}
-Otherwise, it is the return value from the system's @code{close()} or
-@code{fclose()} C functions when closing input or output
-files, respectively.
-This value is zero if the close succeeds, or @minus{}1 if
-it fails.
+In @command{gawk}, starting with @value{PVERSION} 4.2, when closing a pipe or
+coprocess (input or output), the return value is the exit status of the
+command, as described in @ref{table-close-pipe-return-values}.@footnote{Prior
+to @value{PVERSION} 4.2, the return value from closing a pipe or co-process
+was the full 16-bit exit value as defined by the @code{wait()} system
+call.} Otherwise, it is the return value from the system's @code{close()}
+or @code{fclose()} C functions when closing input or output files,
+respectively. This value is zero if the close succeeds, or @minus{}1
+if it fails.
+
+@float Table,table-close-pipe-return-values
+@caption{Return values from @code{close()} of a pipe}
+@multitable @columnfractions .40 .60
+@headitem Situation @tab Return value from @code{close()}
+@item Normal exit of command @tab Command's exit status
+@item Death by signal of command @tab 256 + number of murderous signal
+@item Death by signal of command with core dump @tab 512 + number of murderous signal
+@item Some kind of error @tab @minus{}1
+@end multitable
+@end float
The POSIX standard is very vague; it says that @code{close()}
returns zero on success and a nonzero value otherwise. In general,
@@ -10437,6 +10591,70 @@ when closing a pipe.
@end cartouche
@end ifnotdocbook
+@node Nonfatal
+@section Enabling Nonfatal Output
+
+This @value{SECTION} describes a @command{gawk}-specific feature.
+
+In standard @command{awk}, output with @code{print} or @code{printf}
+to a nonexistent file, or some other I/O error (such as filling up the
+disk) is a fatal error.
+
+@example
+$ @kbd{gawk 'BEGIN @{ print "hi" > "/no/such/file" @}'}
+@error{} gawk: cmd. line:1: fatal: can't redirect to `/no/such/file' (No such file or directory)
+@end example
+
+@command{gawk} makes it possible to detect that an error has
+occurred, allowing you to possibly recover from the error, or
+at least print an error message of your choosing before exiting.
+You can do this in one of two ways:
+
+@itemize @bullet
+@item
+For all output files, by assigning any value to @code{PROCINFO["NONFATAL"]}.
+
+@item
+On a per-file basis, by assigning any value to
+@code{PROCINFO[@var{filename}, "NONFATAL"]}.
+Here, @var{filename} is the name of the file to which
+you wish output to be nonfatal.
+@end itemize
+
+Once you have enabled nonfatal output, you must check @code{ERRNO}
+after every relevant @code{print} or @code{printf} statement to
+see if something went wrong. It is also a good idea to initialize
+@code{ERRNO} to zero before attempting the output. For example:
+
+@example
+$ @kbd{gawk '}
+> @kbd{BEGIN @{}
+> @kbd{ PROCINFO["NONFATAL"] = 1}
+> @kbd{ ERRNO = 0}
+> @kbd{ print "hi" > "/no/such/file"}
+> @kbd{ if (ERRNO) @{}
+> @kbd{ print("Output failed:", ERRNO) > "/dev/stderr"}
+> @kbd{ exit 1}
+> @kbd{ @}}
+> @kbd{@}'}
+@error{} Output failed: No such file or directory
+@end example
+
+Here, @command{gawk} did not produce a fatal error; instead
+it let the @command{awk} program code detect the problem and handle it.
+
+This mechanism works also for standard output and standard error.
+For standard output, you may use @code{PROCINFO["-", "NONFATAL"]}
+or @code{PROCINFO["/dev/stdout", "NONFATAL"]}. For standard error, use
+@code{PROCINFO["/dev/stderr", "NONFATAL"]}.
+
+When attempting to open a TCP/IP socket (@pxref{TCP/IP Networking}),
+@command{gawk} tries multiple times. The @env{GAWK_SOCK_RETRIES}
+environment variable (@pxref{Other Environment Variables}) allows you to
+override @command{gawk}'s builtin default number of attempts. However,
+once nonfatal I/O is enabled for a given socket, @command{gawk} only
+retries once, relying on @command{awk}-level code to notice that there
+was a problem.
@node Output Summary
@section Summary
@@ -10466,6 +10684,12 @@ Use @code{close()} to close open file, pipe, and coprocess redirections.
For coprocesses, it is possible to close only one direction of the
communications.
+@item
+Normally errors with @code{print} or @code{printf} are fatal.
+@command{gawk} lets you make output errors be nonfatal either for
+all files or on a per-file basis. You must then check for errors
+after every relevant output statement.
+
@end itemize
@c EXCLUDE START
@@ -10613,7 +10837,7 @@ Just as @samp{11} in decimal is 1 times 10 plus 1, so
@samp{11} in octal is 1 times 8 plus 1. This equals 9 in decimal.
In hexadecimal, there are 16 digits. Because the everyday decimal
number system only has ten digits (@samp{0}--@samp{9}), the letters
-@samp{a} through @samp{f} are used to represent the rest.
+@samp{a} through @samp{f} represent the rest.
(Case in the letters is usually irrelevant; hexadecimal @samp{a} and @samp{A}
have the same value.)
Thus, @samp{11} in
@@ -10745,6 +10969,20 @@ but could be more complex expressions).
@node Using Constant Regexps
@subsection Using Regular Expression Constants
+Regular expression constants consist of text describing
+a regular expression enclosed in slashes (such as @code{/the +answer/}).
+This @value{SECTION} describes how such constants work in
+POSIX @command{awk} and @command{gawk}, and then goes on to describe
+@dfn{strongly typed regexp constants}, which are a @command{gawk} extension.
+
+@menu
+* Standard Regexp Constants:: Regexp constants in standard @command{awk}.
+* Strong Regexp Constants:: Strongly typed regexp constants.
+@end menu
+
+@node Standard Regexp Constants
+@subsubsection Standard Regular Expression Constants
+
@cindex dark corner, regexp constants
When used on the righthand side of the @samp{~} or @samp{!~}
operators, a regexp constant merely stands for the regexp that is to be
@@ -10852,6 +11090,90 @@ or not @code{$0} matches @code{/hi/}.
a parameter to a user-defined function, because passing a truth value in
this way is probably not what was intended.
+@node Strong Regexp Constants
+@subsubsection Strongly Typed Regexp Constants
+
+This @value{SECTION} describes a @command{gawk}-specific feature.
+
+As we saw in the previous @value{SECTION},
+regexp constants (@code{/@dots{}/}) hold a strange position in the
+@command{awk} language. In most contexts, they act like an expression:
+@samp{$0 ~ /@dots{}/}. In other contexts, they denote only a regexp to
+be matched. In no case are they really a ``first class citizen'' of the
+language. That is, you cannot define a scalar variable whose type is
+``regexp'' in the same sense that you can define a variable to be a
+number or a string:
+
+@example
+num = 42 @ii{Numeric variable}
+str = "hi" @ii{String variable}
+re = /foo/ @ii{Wrong!} re @ii{is the result of} $0 ~ /foo/
+@end example
+
+For a number of more advanced use cases,
+it would be nice to have regexp constants that
+are @dfn{strongly typed}; in other words, that denote a regexp useful
+for matching, and not an expression.
+
+@command{gawk} provides this feature. A strongly typed regexp constant
+looks almost like a regular regexp constant, except that it is preceded
+by an @samp{@@} sign:
+
+@example
+re = @@/foo/ @ii{Regexp variable}
+@end example
+
+Strongly typed regexp constants @emph{cannot} be used everywhere that a
+regular regexp constant can, because this would make the language even more
+confusing. Instead, you may use them only in certain contexts:
+
+@itemize @bullet
+@item
+On the righthand side of the @samp{~} and @samp{!~} operators: @samp{some_var ~ @@/foo/}
+(@pxref{Regexp Usage}).
+
+@item
+In the @code{case} part of a @code{switch} statement
+(@pxref{Switch Statement}).
+
+@item
+As an argument to one of the built-in functions that accept regexp constants:
+@code{gensub()},
+@code{gsub()},
+@code{match()},
+@code{patsplit()},
+@code{split()},
+and
+@code{sub()}
+(@pxref{String Functions}).
+
+@item
+As a parameter in a call to a user-defined function
+(@pxref{User-defined}).
+
+@item
+On the righthand side of an assignment to a variable: @samp{some_var = @@/foo/}.
+In this case, the type of @code{some_var} is regexp. Additionally, @code{some_var}
+can be used with @samp{~} and @samp{!~}, passed to one of the built-in functions
+listed above, or passed as a parameter to a user-defined function.
+@end itemize
+
+You may use the @code{typeof()} built-in function
+(@pxref{Type Functions})
+to determine if a variable or function parameter is
+a regexp variable.
+
+The true power of this feature comes from the ability to create variables that
+have regexp type. Such variables can be passed on to user-defined functions,
+without the confusing aspects of computed regular expressions created from
+strings or string constants. They may also be passed through indirect function
+calls (@pxref{Indirect Calls})
+and on to the built-in functions that accept regexp constants.
+
+When used in numeric conversions, strongly typed regexp variables convert
+to zero. When used in string conversions, they convert to the string
+value of the original regexp text.
+
@node Variables
@subsection Variables
@@ -12027,17 +12349,94 @@ compares variables.
@node Variable Typing
@subsubsection String Type versus Numeric Type
+Scalar objects in @command{awk} (variables, array elements, and fields)
+are @emph{dynamically} typed. This means their type can change as the
+program runs, from @dfn{untyped} before any use,@footnote{@command{gawk}
+calls this @dfn{unassigned}, as the following example shows.} to string
+or number, and then from string to number or number to string, as the
+program progresses. (@command{gawk} also provides regexp-typed scalars,
+but let's ignore that for now; @pxref{Strong Regexp Constants}.)
+
+You can't do much with untyped variables, other than tell that they
+are untyped. The following program tests @code{a} against @code{""}
+and @code{0}; the test succeeds when @code{a} has never been assigned
+a value. It also uses the built-in @code{typeof()} function
+(not presented yet; @pxref{Type Functions}) to show @code{a}'s type:
+
+@example
+$ @kbd{gawk 'BEGIN @{ print (a == "" && a == 0 ?}
+> @kbd{"a is untyped" : "a has a type!") ; print typeof(a) @}'}
+@print{} a is untyped
+@print{} unassigned
+@end example
+
+A scalar has numeric type when assigned a numeric value,
+such as from a numeric constant, or from another scalar
+with numeric type:
+
+@example
+$ @kbd{gawk 'BEGIN @{ a = 42 ; print typeof(a)}
+> @kbd{b = a ; print typeof(b) @}'}
+number
+number
+@end example
+
+Similarly, a scalar has string type when assigned a string
+value, such as from a string constant, or from another scalar
+with string type:
+
+@example
+$ @kbd{gawk 'BEGIN @{ a = "forty two" ; print typeof(a)}
+> @kbd{b = a ; print typeof(b) @}'}
+string
+string
+@end example
+
+So far, this is all simple and straightforward. What happens, though,
+when @command{awk} has to process data from a user? Let's start with
+field data. What should the following command produce as output?
+
+@example
+echo hello | awk '@{ printf("%s %s < 42\n", $1,
+ ($1 < 42 ? "is" : "is not")) @}'
+@end example
+
+@noindent
+Since @samp{hello} is alphabetic data, @command{awk} can only do a string
+comparison. Internally, it converts @code{42} into @code{"42"} and compares
+the two string values @code{"hello"} and @code{"42"}. Here's the result:
+
+@example
+$ @kbd{echo hello | awk '@{ printf("%s %s < 42\n", $1,}
+> @kbd{ ($1 < 42 ? "is" : "is not")) @}'}
+@print{} hello is not < 42
+@end example
+
+However, what happens when data from a user @emph{looks like} a number?
+On the one hand, in reality, the input data consists of characters, not
+binary numeric
+values. But, on the other hand, the data looks numeric, and @command{awk}
+really ought to treat it as such. And indeed, it does:
+
+@example
+$ @kbd{echo 37 | awk '@{ printf("%s %s < 42\n", $1,}
+> @kbd{ ($1 < 42 ? "is" : "is not")) @}'}
+@print{} 37 is < 42
+@end example
+
+Here are the rules for when @command{awk}
+treats data as a number, and for when it treats data as a string.
+
@cindex numeric, strings
@cindex strings, numeric
@cindex POSIX @command{awk}, numeric strings and
-The POSIX standard introduced
-the concept of a @dfn{numeric string}, which is simply a string that looks
-like a number---for example, @code{@w{" +2"}}. This concept is used
-for determining the type of a variable.
-The type of the variable is important because the types of two variables
-determine how they are compared.
-Variable typing follows these rules:
+The POSIX standard uses the term @dfn{numeric string} for input data that
+looks numeric. The @samp{37} in the previous example is a numeric string.
+So what is the type of a numeric string? Answer: numeric.
+The type of a variable is important because the types of two variables
+determine how they are compared.
+Variable typing follows these definitions and rules:
@itemize @value{BULLET}
@item
@@ -12052,7 +12451,9 @@ attribute.
Fields, @code{getline} input, @code{FILENAME}, @code{ARGV} elements,
@code{ENVIRON} elements, and the elements of an array created by
@code{match()}, @code{split()}, and @code{patsplit()} that are numeric
-strings have the @dfn{strnum} attribute. Otherwise, they have
+strings have the @dfn{strnum} attribute.@footnote{Thus, a POSIX
+numeric string and @command{gawk}'s strnum are the same thing.}
+Otherwise, they have
the @dfn{string} attribute. Uninitialized variables also have the
@dfn{strnum} attribute.
@@ -12126,7 +12527,7 @@ STRNUM &&string &numeric &numeric\cr
@end tex
@ifnottex
@ifnotdocbook
-@display
+@verbatim
+----------------------------------------------
| STRING NUMERIC STRNUM
--------+----------------------------------------------
@@ -12137,7 +12538,7 @@ NUMERIC | string numeric numeric
|
STRNUM | string numeric numeric
--------+----------------------------------------------
-@end display
+@end verbatim
@end ifnotdocbook
@end ifnottex
@docbook
@@ -12196,10 +12597,14 @@ purposes.
In short, when one operand is a ``pure'' string, such as a string
constant, then a string comparison is performed. Otherwise, a
numeric comparison is performed.
+(The primary difference between a number and a strnum is that
+for strnums @command{gawk} preserves the original string value that
+the scalar had when it came in.)
+
+This point bears additional emphasis:
+Input that looks numeric @emph{is} numeric.
+All other input is treated as strings.
-This point bears additional emphasis: All user input is made of characters,
-and so is first and foremost of string type; input strings
-that look numeric are additionally given the strnum attribute.
Thus, the six-character input string @w{@samp{ +3.14}} receives the
strnum attribute. In contrast, the eight characters
@w{@code{" +3.14"}} appearing in program text comprise a string constant.
@@ -12226,6 +12631,14 @@ $ @kbd{echo ' +3.14' | awk '@{ print($1 == 3.14) @}'} @ii{True}
@print{} 1
@end example
+You can see the type of an input field (or other user input)
+using @code{typeof()}:
+
+@example
+$ @kbd{echo hello 37 | gawk '@{ print typeof($1), typeof($2) @}'}
+@print{} string strnum
+@end example
+
@node Comparison Operators
@subsubsection Comparison Operators
@@ -12385,19 +12798,19 @@ One special place where @code{/foo/} is @emph{not} an abbreviation for
where this is discussed in more detail.
@node POSIX String Comparison
-@subsubsection String Comparison with POSIX Rules
+@subsubsection String Comparison Based on Locale Collating Order
-The POSIX standard says that string comparison is performed based
-on the locale's @dfn{collating order}. This is the order in which
-characters sort, as defined by the locale (for more discussion,
-@pxref{Locales}). This order is usually very different
-from the results obtained when doing straight character-by-character
-comparison.@footnote{Technically, string comparison is supposed
-to behave the same way as if the strings were compared with the C
-@code{strcoll()} function.}
+The POSIX standard used to say that all string comparisons are
+performed based on the locale's @dfn{collating order}. This
+is the order in which characters sort, as defined by the locale
+(for more discussion, @pxref{Locales}). This order is usually very
+different from the results obtained when doing straight byte-by-byte
+comparison.@footnote{Technically, string comparison is supposed to behave
+the same way as if the strings were compared with the C @code{strcoll()}
+function.}
Because this behavior differs considerably from existing practice,
-@command{gawk} only implements it when in POSIX mode (@pxref{Options}).
+@command{gawk} only implemented it when in POSIX mode (@pxref{Options}).
Here is an example to illustrate the difference, in an @code{en_US.UTF-8}
locale:
@@ -12410,6 +12823,26 @@ $ @kbd{gawk --posix 'BEGIN @{ printf("ABC < abc = %s\n",}
@print{} ABC < abc = FALSE
@end example
+Fortunately, as of August 2016, comparison based on locale
+collating order is no longer required for the @code{==} and @code{!=}
+operators.@footnote{See @uref{http://austingroupbugs.net/view.php?id=1070,
+the Austin Group website}.} However, comparison based on locales is still
+required for @code{<}, @code{<=}, @code{>}, and @code{>=}. POSIX thus
+recommends as follows:
+
+@quotation
+Since the @code{==} operator checks whether strings are identical,
+not whether they collate equally, applications needing to check whether
+strings collate equally can use:
+
+@example
+a <= b && a >= b
+@end example
+@end quotation
+
+As of @value{PVERSION} 4.2, @command{gawk} continues to use locale
+collating order for @code{<}, @code{<=}, @code{>}, and @code{>=} only
+in POSIX mode.
@node Boolean Ops
@subsection Boolean Expressions
@@ -14569,12 +15002,11 @@ specify the behavior when @code{FS} is the null string.
Nonetheless, some other versions of @command{awk} also treat
@code{""} specially.)
-@cindex POSIX @command{awk}, @code{FS} variable and
The default value is @w{@code{" "}}, a string consisting of a single
-space. As a special exception, this value means that any
-sequence of spaces, TABs, and/or newlines is a single separator.@footnote{In
-POSIX @command{awk}, newline does not count as whitespace.} It also causes
-spaces, TABs, and newlines at the beginning and end of a record to be ignored.
+space. As a special exception, this value means that any sequence of
+spaces, TABs, and/or newlines is a single separator. It also causes
+spaces, TABs, and newlines at the beginning and end of a record to
+be ignored.
You can set the value of @code{FS} on the command line using the
@option{-F} option:
@@ -14798,10 +15230,24 @@ opens the next file.
An associative array containing the values of the environment. The array
indices are the environment variable names; the elements are the values of
the particular environment variables. For example,
-@code{ENVIRON["HOME"]} might be @code{"/home/arnold"}. Changing this array
-does not affect the environment passed on to any programs that
-@command{awk} may spawn via redirection or the @code{system()} function.
-(In a future version of @command{gawk}, it may do so.)
+@code{ENVIRON["HOME"]} might be @code{/home/arnold}.
+
+For POSIX @command{awk}, changing this array does not affect the
+environment passed on to any programs that @command{awk} may spawn via
+redirection or the @code{system()} function.
+
+However, beginning with @value{PVERSION} 4.2, if not in POSIX
+compatibility mode, @command{gawk} does update its own environment when
+@code{ENVIRON} is changed, thus changing the environment seen by programs
+that it creates. You should therefore be especially careful if you
+modify @code{ENVIRON["PATH"]}, which is the search path for finding
+executable programs.
+
+This can also affect the running @command{gawk} program, since some of the
+built-in functions may pay attention to certain environment variables.
+The most notable instance of this is @code{mktime()} (@pxref{Time
+Functions}), which pays attention the value of the @env{TZ} environment
+variable on many systems.
Some operating systems may not have environment variables.
On such systems, the @code{ENVIRON} array is empty (except for
@@ -14835,6 +15281,11 @@ value to be meaningful when an I/O operation returns a failure value,
such as @code{getline} returning @minus{}1. You are, of course, free
to clear it yourself before doing an I/O operation.
+If the value of @code{ERRNO} corresponds to a system error in the C
+@code{errno} variable, then @code{PROCINFO["errno"]} will be set to the value
+of @code{errno}. For non-system errors, @code{PROCINFO["errno"]} will
+be zero.
+
@cindex @code{FILENAME} variable
@cindex dark corner, @code{FILENAME} variable
@item @code{FILENAME}
@@ -14903,6 +15354,10 @@ are guaranteed to be available:
@item PROCINFO["egid"]
The value of the @code{getegid()} system call.
+@item PROCINFO["errno"]
+The value of the C @code{errno} variable when @code{ERRNO} is set to
+the associated error message.
+
@item PROCINFO["euid"]
@cindex effective user ID of @command{gawk} user
The value of the @code{geteuid()} system call.
@@ -15026,6 +15481,14 @@ to test for these elements
The following elements allow you to change @command{gawk}'s behavior:
@table @code
+@item PROCINFO["NONFATAL"]
+If this element exists, then I/O errors for all output redirections become nonfatal.
+@xref{Nonfatal}.
+
+@item PROCINFO["@var{output_name}", "NONFATAL"]
+Make output errors for @var{output_name} be nonfatal.
+@xref{Nonfatal}.
+
@item PROCINFO["@var{command}", "pty"]
For two-way communication to @var{command}, use a pseudo-tty instead
of setting up a two-way pipe.
@@ -16966,6 +17429,23 @@ truncated toward zero.
For example, @code{int(3)} is 3, @code{int(3.9)} is 3, @code{int(-3.9)}
is @minus{}3, and @code{int(-3)} is @minus{}3 as well.
+@item @code{intdiv(@var{numerator}, @var{denominator}, @var{result})}
+@cindexawkfunc{intdiv}
+@cindex intdiv
+Perform integer division, similar to the standard C function of the
+same name. First, truncate @code{numerator} and @code{denominator}
+towards zero, creating integer values. Clear the @code{result}
+array, and then set @code{result["quotient"]} to the result of
+@samp{numerator / denominator}, truncated towards zero to an integer,
+and set @code{result["remainder"]} to the result of @samp{numerator %
+denominator}, truncated towards zero to an integer. This function is
+primarily intended for use with arbitrary length integers; it avoids
+creating MPFR arbitrary precision floating-point values (@pxref{Arbitrary
+Precision Integers}).
+
+This function is a @code{gawk} extension. It is not available in
+compatibility mode (@pxref{Options}).
+
@item @code{log(@var{x})}
@cindexawkfunc{log}
@cindex logarithm
@@ -18612,7 +19092,7 @@ Optional parameters are enclosed in square brackets ([ ]):
@c @asis for docbook
@table @asis
-@item @code{mktime(@var{datespec})}
+@item @code{mktime(@var{datespec}} [@code{, @var{utc-flag}} ]@code{)}
@cindexgawkfunc{mktime}
@cindex generate time values
Turn @var{datespec} into a timestamp in the same form
@@ -18631,7 +19111,9 @@ The values of these numbers need not be within the ranges specified;
for example, an hour of @minus{}1 means 1 hour before midnight.
The origin-zero Gregorian calendar is assumed, with year 0 preceding
year 1 and year @minus{}1 preceding year 0.
-The time is assumed to be in the local time zone.
+If @var{utc-flag} is present and is either nonzero or non-null, the time
+is assumed to be in the UTC time zone; otherwise, the
+time is assumed to be in the local time zone.
If the daylight-savings flag is positive, the time is assumed to be
daylight savings time; if zero, the time is assumed to be standard
time; and if negative (the default), @code{mktime()} attempts to determine
@@ -19131,12 +19613,12 @@ Return the value of @var{val}, shifted right by @var{count} bits.
Return the bitwise XOR of the arguments. There must be at least two.
@end table
-For all of these functions, first the double-precision floating-point value is
-converted to the widest C unsigned integer type, then the bitwise operation is
-performed. If the result cannot be represented exactly as a C @code{double},
-leading nonzero bits are removed one by one until it can be represented
-exactly. The result is then converted back into a C @code{double}. (If
-you don't understand this paragraph, don't worry about it.)
+@quotation CAUTION
+Beginning with @command{gawk} @value{PVERSION} 4.2, negative
+operands are not allowed for any of these functions. A negative
+operand produces a fatal error. See the sidebar
+``Beware The Smoke and Mirrors!'' for more information as to why.
+@end quotation
Here is a user-defined function (@pxref{User-defined})
that illustrates the use of these functions:
@@ -19241,19 +19723,196 @@ decimal and octal values for the same numbers
and then demonstrates the
results of the @code{compl()}, @code{lshift()}, and @code{rshift()} functions.
+@cindex sidebar, Beware The Smoke and Mirrors!
+@ifdocbook
+@docbook
+<sidebar><title>Beware The Smoke and Mirrors!</title>
+@end docbook
+
+
+It other languages, bitwise operations are performed on integer values,
+not floating-point values. As a general statement, such operations work
+best when performed on unsigned integers.
+
+@command{gawk} attempts to treat the arguments to the bitwise functions
+as unsigned integers. For this reason, negative arguments produce a
+fatal error.
+
+In normal operation, for all of these functions, first the
+double-precision floating-point value is converted to the widest C
+unsigned integer type, then the bitwise operation is performed. If the
+result cannot be represented exactly as a C @code{double}, leading
+nonzero bits are removed one by one until it can be represented exactly.
+The result is then converted back into a C @code{double}.@footnote{If you don't
+understand this paragraph, the upshot is that @command{gawk} can only
+store a particular range of integer values; numbers outside that range
+are reduced to fit within the range.}
+
+However, when using arbitrary precision arithmetic with the @option{-M}
+option (@pxref{Arbitrary Precision Arithmetic}), the results may differ.
+This is particularly noticeable with the @code{compl()} function:
+
+@example
+$ @kbd{gawk 'BEGIN @{ print compl(42) @}'}
+@print{} 9007199254740949
+$ @kbd{gawk -M 'BEGIN @{ print compl(42) @}'}
+@print{} -43
+@end example
+
+What's going on becomes clear when printing the results
+in hexadecimal:
+
+@example
+$ @kbd{gawk 'BEGIN @{ printf "%#x\n", compl(42) @}'}
+@print{} 0x1fffffffffffd5
+$ @kbd{gawk -M 'BEGIN @{ printf "%#x\n", compl(42) @}'}
+@print{} 0xffffffffffffffd5
+@end example
+
+When using the @option{-M} option, under the hood, @command{gawk} uses
+GNU MP arbitrary precision integers which have at least 64 bits of precision.
+When not using @option{-M}, @command{gawk} stores integral values in
+regular double-precision floating point, which only maintain 53 bits of
+precision. Furthermore, the GNU MP library treats (or at least seems to treat)
+the leading bit as a sign bit; thus the result with @option{-M} in this case is
+a negative number.
+
+In short, using @command{gawk} for any but the simplest kind of bitwise
+operations is probably a bad idea; caveat emptor!
+
+
+@docbook
+</sidebar>
+@end docbook
+@end ifdocbook
+
+@ifnotdocbook
+@cartouche
+@center @b{Beware The Smoke and Mirrors!}
+
+
+
+It other languages, bitwise operations are performed on integer values,
+not floating-point values. As a general statement, such operations work
+best when performed on unsigned integers.
+
+@command{gawk} attempts to treat the arguments to the bitwise functions
+as unsigned integers. For this reason, negative arguments produce a
+fatal error.
+
+In normal operation, for all of these functions, first the
+double-precision floating-point value is converted to the widest C
+unsigned integer type, then the bitwise operation is performed. If the
+result cannot be represented exactly as a C @code{double}, leading
+nonzero bits are removed one by one until it can be represented exactly.
+The result is then converted back into a C @code{double}.@footnote{If you don't
+understand this paragraph, the upshot is that @command{gawk} can only
+store a particular range of integer values; numbers outside that range
+are reduced to fit within the range.}
+
+However, when using arbitrary precision arithmetic with the @option{-M}
+option (@pxref{Arbitrary Precision Arithmetic}), the results may differ.
+This is particularly noticeable with the @code{compl()} function:
+
+@example
+$ @kbd{gawk 'BEGIN @{ print compl(42) @}'}
+@print{} 9007199254740949
+$ @kbd{gawk -M 'BEGIN @{ print compl(42) @}'}
+@print{} -43
+@end example
+
+What's going on becomes clear when printing the results
+in hexadecimal:
+
+@example
+$ @kbd{gawk 'BEGIN @{ printf "%#x\n", compl(42) @}'}
+@print{} 0x1fffffffffffd5
+$ @kbd{gawk -M 'BEGIN @{ printf "%#x\n", compl(42) @}'}
+@print{} 0xffffffffffffffd5
+@end example
+
+When using the @option{-M} option, under the hood, @command{gawk} uses
+GNU MP arbitrary precision integers which have at least 64 bits of precision.
+When not using @option{-M}, @command{gawk} stores integral values in
+regular double-precision floating point, which only maintain 53 bits of
+precision. Furthermore, the GNU MP library treats (or at least seems to treat)
+the leading bit as a sign bit; thus the result with @option{-M} in this case is
+a negative number.
+
+In short, using @command{gawk} for any but the simplest kind of bitwise
+operations is probably a bad idea; caveat emptor!
+
+@end cartouche
+@end ifnotdocbook
+
@node Type Functions
@subsection Getting Type Information
-@command{gawk} provides a single function that lets you distinguish
-an array from a scalar variable. This is necessary for writing code
+@command{gawk} provides two functions that let you distinguish
+the type of a variable.
+This is necessary for writing code
that traverses every element of an array of arrays
-(@pxref{Arrays of Arrays}).
+(@pxref{Arrays of Arrays}), and in other contexts.
@table @code
@cindexgawkfunc{isarray}
@cindex scalar or array
@item isarray(@var{x})
Return a true value if @var{x} is an array. Otherwise, return false.
+
+@cindexgawkfunc{typeof}
+@cindex variable type
+@cindex type, of variable
+@item typeof(@var{x})
+Return one of the following strings, depending upon the type of @var{x}:
+
+@c nested table
+@table @code
+@item "array"
+@var{x} is an array.
+
+@item "regexp"
+@var{x} is a strongly typed regexp (@pxref{Strong Regexp Constants}).
+
+@item "number"
+@var{x} is a number.
+
+@item "string"
+@var{x} is a string.
+
+@item "strnum"
+@var{x} is a number that started life as user input, such as a field or
+the result of calling @code{split()}. (I.e., @var{x} has the strnum
+attribute; @pxref{Variable Typing}.)
+
+@item "unassigned"
+@var{x} is a scalar variable that has not been assigned a value yet.
+For example:
+
+@example
+BEGIN @{
+ # creates a[1] but it has no assigned value
+ a[1]
+ print typeof(a[1]) # unassigned
+@}
+@end example
+
+@item "untyped"
+@var{x} has not yet been used yet at all; it can become a scalar or an
+array.
+For example:
+
+@example
+BEGIN @{
+ print typeof(x) # x never used --> untyped
+ mk_arr(x)
+ print typeof(x) # x now an array --> array
+@}
+
+function mk_arr(a) @{ a[1] = 1 @}
+@end example
+
+@end table
@end table
@code{isarray()} is meant for use in two circumstances. The first is when
@@ -19271,6 +19930,14 @@ that has not been previously used to @code{isarray()}, @command{gawk}
ends up turning it into a scalar.
@end quotation
+The @code{typeof()} function is general; it allows you to determine
+if a variable or function parameter is a scalar, an array, or a strongly
+typed regexp.
+
+@code{isarray()} is deprecated; you should use @code{typeof()} instead.
+You should replace any existing uses of @samp{isarray(var)} in your
+code with @samp{typeof(var) == "array"}.
+
@node I18N Functions
@subsection String-Translation Functions
@cindex @command{gawk}, string-translation functions
@@ -27531,9 +28198,16 @@ your program to hang. (Thus, this particular feature is of much less
use in practice than being able to close the @code{"to"} end.)
@quotation CAUTION
-It is a fatal error to write to the @code{"to"} end of a two-way
-pipe which has been closed. It is also a fatal error to read
+Normally,
+it is a fatal error to write to the @code{"to"} end of a two-way
+pipe which has been closed, and it is also a fatal error to read
from the @code{"from"} end of a two-way pipe that has been closed.
+
+You may set @code{PROCINFO["@var{command}", "NONFATAL"]} to
+make such operations become nonfatal. If you do so, you then need
+to check @code{ERRNO} after each @code{print}, @code{printf},
+or @code{getline}.
+@xref{Nonfatal}, for more information.
@end quotation
@cindex @command{gawk}, @code{PROCINFO} array in
@@ -27917,8 +28591,7 @@ The profiled version of your program may not look exactly like what you
typed when you wrote it. This is because @command{gawk} creates the
profiled version by ``pretty-printing'' its internal representation of
the program. The advantage to this is that @command{gawk} can produce
-a standard representation. The disadvantage is that all source code
-comments are lost.
+a standard representation.
Also, things such as:
@example
@@ -28012,10 +28685,39 @@ When called this way, @command{gawk} ``pretty-prints'' the program into
@file{awkprof.out}, without any execution counts.
@quotation NOTE
-The @option{--pretty-print} option still runs your program.
-This will change in the next major release.
+Once upon a time, the @option{--pretty-print} option would also run
+your program. This is is no longer the case.
@end quotation
+There is a significant difference between the output created when
+profiling, and that created when pretty-printing. Pretty-printed output
+preserves the original comments that were in the program, although their
+placement may not correspond exactly to their original locations in the
+source code.@footnote{@command{gawk} does the best it can to preserve
+the distinction between comments at the end of a statement and comments
+on lines by themselves. Due to implementation constraints, it does not
+always do so correctly, particularly for @code{switch} statements. The
+@command{gawk} maintainers hope to improve this in a subsequent
+release.}
+
+However, as a deliberate design decision, profiling output @emph{omits}
+the original program's comments. This allows you to focus on the
+execution count data and helps you avoid the temptation to use the
+profiler for pretty-printing.
+
+Additionally, pretty-printed output does not have the leading indentation
+that the profiling output does. This makes it easy to pretty-print your
+code once development is completed, and then use the result as the final
+version of your program.
+
+Because the internal representation of your program is formatted to
+recreate an @command{awk} program, profiling and pretty-printing
+automatically disable @command{gawk}'s default optimizations.
+
+Pretty printing also preserves the original format of numeric
+constants; if you used an octal or hexadecimal value in your source
+code, it will appear that way in the output.
+
@node Advanced Features Summary
@section Summary
@@ -28056,8 +28758,7 @@ you tune them more easily. Sending the @code{USR1} signal while profiling cause
@command{gawk} to dump the profile and keep going, including a function call stack.
@item
-You can also just ``pretty-print'' the program. This currently also runs
-the program, but that will change in the next major release.
+You can also just ``pretty-print'' the program.
@end itemize
@@ -30250,6 +30951,68 @@ The @command{gawk} debugger only accepts source code supplied with the @option{-
@end itemize
@ignore
+@c 11/2016: This no longer applies after all the type cleanup work that's been done.
+One other point is worth discussing. Conventional debuggers run in a
+separate process (and thus address space) from the programs that they
+debug (the @dfn{debuggee}, if you will).
+
+The @command{gawk} debugger is different; it is an integrated part
+of @command{gawk} itself. This makes it possible, in rare cases,
+for @command{gawk} to become an excellent demonstrator of Heisenberg
+Uncertainty physics, where the mere act of observing something can change
+it. Consider the following:@footnote{Thanks to Hermann Peifer for
+this example.}
+
+@example
+$ @kbd{cat test.awk}
+@print{} @{ print typeof($1), typeof($2) @}
+$ @kbd{cat test.data}
+@print{} abc 123
+$ @kbd{gawk -f test.awk test.data}
+@print{} strnum strnum
+@end example
+
+This is all as expected: field data has the STRNUM attribute
+(@pxref{Variable Typing}). Now watch what happens when we run
+this program under the debugger:
+
+@example
+$ @kbd{gawk -D -f test.awk test.data}
+gawk> @kbd{w $1} @ii{Set watchpoint on} $1
+@print{} Watchpoint 1: $1
+gawk> @kbd{w $2} @ii{Set watchpoint on} $2
+@print{} Watchpoint 2: $2
+gawk> @kbd{r} @ii{Start the program}
+@print{} Starting program:
+@print{} Stopping in Rule ...
+@print{} Watchpoint 1: $1 @ii{Watchpoint fires}
+@print{} Old value: ""
+@print{} New value: "abc"
+@print{} main() at `test.awk':1
+@print{} 1 @{ print typeof($1), typeof($2) @}
+gawk> @kbd{n} @ii{Keep going @dots{}}
+@print{} Watchpoint 2: $2 @ii{Watchpoint fires}
+@print{} Old value: ""
+@print{} New value: "123"
+@print{} main() at `test.awk':1
+@print{} 1 @{ print typeof($1), typeof($2) @}
+gawk> @kbd{n} @ii{Get result from} typeof()
+@print{} strnum number @ii{Result for} $2 @ii{isn't right}
+@print{} Program exited normally with exit value: 0
+gawk> @kbd{quit}
+@end example
+
+In this case, the act of comparing the new value of @code{$2}
+with the old one caused @command{gawk} to evaluate it and determine that it
+is indeed a number, and this is reflected in the result of
+@code{typeof()}.
+
+Cases like this where the debugger is not transparent to the program's
+execution should be rare. If you encounter one, please report it
+(@pxref{Bugs}).
+@end ignore
+
+@ignore
Look forward to a future release when these and other missing features may
be added, and of course feel free to try to add them yourself!
@end ignore
@@ -30285,6 +31048,10 @@ If the GNU Readline library is available when @command{gawk} is
compiled, it is used by the debugger to provide command-line history
and editing.
+@item
+Usually, the debugger does not not affect the
+program being debugged, but occasionally it can.
+
@end itemize
@node Arbitrary Precision Arithmetic
@@ -31102,6 +31869,122 @@ to just use the following:
gawk -M 'BEGIN @{ n = 13; print n % 2 @}'
@end example
+When dividing two arbitrary precision integers with either
+@samp{/} or @samp{%}, the result is typically an arbitrary
+precision floating point value (unless the denominator evenly
+divides into the numerator). In order to do integer division
+or remainder with arbitrary precision integers, use the built-in
+@code{intdiv()} function (@pxref{Numeric Functions}).
+
+You can simulate the @code{intdiv()} function in standard @command{awk}
+using this user-defined function:
+
+@example
+@c file eg/lib/intdiv.awk
+# intdiv --- do integer division
+
+@c endfile
+@ignore
+@c file eg/lib/intdiv.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# July, 2014
+#
+# Name changed from div() to intdiv()
+# April, 2015
+
+@c endfile
+
+@end ignore
+@c file eg/lib/intdiv.awk
+function intdiv(numerator, denominator, result)
+@{
+ split("", result)
+
+ numerator = int(numerator)
+ denominator = int(denominator)
+ result["quotient"] = int(numerator / denominator)
+ result["remainder"] = int(numerator % denominator)
+
+ return 0.0
+@}
+@c endfile
+@end example
+
+The following example program, contributed by Katie Wasserman,
+uses @code{intdiv()} to
+compute the digits of @value{PI} to as many places as you
+choose to set:
+
+@example
+@c file eg/prog/pi.awk
+# pi.awk --- compute the digits of pi
+@c endfile
+@c endfile
+@ignore
+@c file eg/prog/pi.awk
+#
+# Katie Wasserman, katie@@wass.net
+# August 2014
+@c endfile
+@end ignore
+@c file eg/prog/pi.awk
+
+BEGIN @{
+ digits = 100000
+ two = 2 * 10 ^ digits
+ pi = two
+ for (m = digits * 4; m > 0; --m) @{
+ d = m * 2 + 1
+ x = pi * m
+ intdiv(x, d, result)
+ pi = result["quotient"]
+ pi = pi + two
+ @}
+ print pi
+@}
+@c endfile
+@end example
+
+@ignore
+Date: Wed, 20 Aug 2014 10:19:11 -0400
+To: arnold@skeeve.com
+From: Katherine Wasserman <katie@wass.net>
+Subject: Re: computation of digits of pi?
+
+Arnold,
+
+>The program that you sent to compute the digits of pi using div(). Is
+>that some standard algorithm that every math student knows? If so,
+>what's it called?
+
+It's not that well known but it's not that obscure either
+
+It's Euler's modification to Newton's method for calculating pi.
+
+Take a look at lines (23) - (25) here: http://mathworld.wolfram.com/PiFormulas.htm
+
+The algorithm I wrote simply expands the multiply by 2 and works from the innermost expression outwards. I used this to program HP calculators because it's quite easy to modify for tiny memory devices with smallish word sizes.
+
+http://www.hpmuseum.org/cgi-sys/cgiwrap/hpmuseum/articles.cgi?read=899
+
+-Katie
+@end ignore
+
+When asked about the algorithm used, Katie replied:
+
+@quotation
+It's not that well known but it's not that obscure either.
+It's Euler's modification to Newton's method for calculating pi.
+Take a look at lines (23) - (25) here: @uref{http://mathworld.wolfram.com/PiFormulas.html}.
+
+The algorithm I wrote simply expands the multiply by 2 and works from
+the innermost expression outwards. I used this to program HP calculators
+because it's quite easy to modify for tiny memory devices with smallish
+word sizes. See
+@uref{http://www.hpmuseum.org/cgi-sys/cgiwrap/hpmuseum/articles.cgi?read=899}.
+@end quotation
+
@node POSIX Floating Point Problems
@section Standards Versus Existing Practice
@@ -31501,8 +32384,11 @@ This (rather large) @value{SECTION} describes the API in detail.
* Symbol Table Access:: Functions for accessing global
variables.
* Array Manipulation:: Functions for working with arrays.
+* Redirection API:: How to access and manipulate
+ redirections.
* Extension API Variables:: Variables provided by the API.
* Extension API Boilerplate:: Boilerplate code for using the API.
+* Changes from API V1:: Changes from V1 of the API.
@end menu
@node Extension API Functions Introduction
@@ -31576,6 +32462,10 @@ Clearing an array
@item
Flattening an array for easy C-style looping over all its indices and elements
@end itemize
+
+@item
+Accessing and manipulating redirections.
+
@end itemize
Some points about using the API:
@@ -31629,14 +32519,26 @@ and is managed by @command{gawk} from then on.
The API defines several simple @code{struct}s that map values as seen
from @command{awk}. A value can be a @code{double}, a string, or an
array (as in multidimensional arrays, or when creating a new array).
+
String values maintain both pointer and length, because embedded @sc{nul}
characters are allowed.
@quotation NOTE
-By intent, strings are maintained using the current multibyte encoding (as
-defined by @env{LC_@var{xxx}} environment variables) and not using wide
-characters. This matches how @command{gawk} stores strings internally
-and also how characters are likely to be input into and output from files.
+By intent, @command{gawk} maintains strings using the current multibyte
+encoding (as defined by @env{LC_@var{xxx}} environment variables)
+and not using wide characters. This matches how @command{gawk} stores
+strings internally and also how characters are likely to be input into
+and output from files.
+@end quotation
+
+@quotation NOTE
+String values passed to an extension by @command{gawk} are always
+@sc{nul}-terminated. Thus it is safe to pass such string values to
+standard library and system routines. However, because @command{gawk}
+allows embedded @sc{nul} characters in string data, before using the data
+as a regular C string, you should check that the length for that string
+passed to the extension matches the return value of @code{strlen()}
+for it.
@end quotation
@item
@@ -31719,6 +32621,8 @@ multibyte encoding.
@itemx @ @ @ @ AWK_UNDEFINED,
@itemx @ @ @ @ AWK_NUMBER,
@itemx @ @ @ @ AWK_STRING,
+@itemx @ @ @ @ AWK_REGEX,
+@itemx @ @ @ @ AWK_STRNUM,
@itemx @ @ @ @ AWK_ARRAY,
@itemx @ @ @ @ AWK_SCALAR,@ @ @ @ @ @ @ @ @ /* opaque access to a variable */
@itemx @ @ @ @ AWK_VALUE_COOKIE@ @ @ @ /* for updating a previously created value */
@@ -31741,6 +32645,8 @@ The @code{val_type} member indicates what kind of value the
@code{union} holds, and each member is of the appropriate type.
@item #define str_value@ @ @ @ @ @ u.s
+@itemx #define strnum_value@ @ @ str_value
+@itemx #define regex_value@ @ @ @ str_value
@itemx #define num_value@ @ @ @ @ @ u.d
@itemx #define array_cookie@ @ @ u.a
@itemx #define scalar_cookie@ @ u.scl
@@ -31761,7 +32667,7 @@ and in more detail in @ref{Cached values}.
@end table
-Scalar values in @command{awk} are either numbers or strings. The
+Scalar values in @command{awk} are numbers, strings, strnums, or typed regexps. The
@code{awk_value_t} struct represents values. The @code{val_type} member
indicates what is in the @code{union}.
@@ -31770,6 +32676,26 @@ require more work. Because @command{gawk} allows embedded @sc{nul} bytes
in string values, a string must be represented as a pair containing a
data pointer and length. This is the @code{awk_string_t} type.
+A strnum (numeric string) value is represented as a string and consists
+of user input data that appears to be numeric.
+When an extension creates a strnum value, the result is a string flagged
+as user input. Subsequent parsing by @command{gawk} then determines whether it
+looks like a number and should be treated as a strnum, or as a regular string.
+
+This is useful in cases where an extension function would like to do something
+comparable to the @code{split()} function which sets the strnum attribute
+on the array elements it creates. For example, an extension that implements
+CSV splitting would want to use this feature. This is also useful for a
+function that retrieves a data item from a database. The PostgreSQL
+@code{PQgetvalue()} function, for example, returns a string that may be numeric
+or textual depending on the contents.
+
+Typed regexp values (@pxref{Strong Regexp Constants}) are not of
+much use to extension functions. Extension functions can tell that
+they've received them, and create them for scalar values. Otherwise,
+they can examine the text of the regexp through @code{regex_value.str}
+and @code{regex_value.len}.
+
Identifiers (i.e., the names of global variables) can be associated
with either scalar values or with arrays. In addition, @command{gawk}
provides true arrays of arrays, where any given array element can
@@ -31936,6 +32862,31 @@ It returns @code{result}.
@itemx make_number(double num, awk_value_t *result);
This function simply creates a numeric value in the @code{awk_value_t} variable
pointed to by @code{result}.
+
+@item static inline awk_value_t *
+@itemx make_const_user_input(const char *string, size_t length, awk_value_t *result);
+This function is identical to @code{make_const_string()}, but the string is
+flagged as user input that should be treated as a strnum value if the contents
+of the string are numeric.
+
+@item static inline awk_value_t *
+@itemx make_malloced_user_input(const char *string, size_t length, awk_value_t *result);
+This function is identical to @code{make_malloced_string()}, but the string is
+flagged as user input that should be treated as a strnum value if the contents
+of the string are numeric.
+
+@item static inline awk_value_t *
+@itemx make_const_regex(const char *string, size_t length, awk_value_t *result);
+This function creates a strongly typed regexp value by allocating a copy of the string.
+@code{string} is the regular expression of length @code{len}.
+
+@item static inline awk_value_t *
+@itemx make_malloced_regex(const char *string, size_t length, awk_value_t *result);
+This function creates a strongly typed regexp value. @code{string} is
+the regular expression of length @code{len}. It expects @code{string}
+to be a @samp{char *} value pointing to data previously obtained from
+@code{gawk_malloc()}, @code{gawk_calloc()}, or @code{gawk_realloc()}.
+
@end table
@node Registration Functions
@@ -31963,8 +32914,13 @@ Extension functions are described by the following record:
@example
typedef struct awk_ext_func @{
@ @ @ @ const char *name;
-@ @ @ @ awk_value_t *(*function)(int num_actual_args, awk_value_t *result);
-@ @ @ @ size_t num_expected_args;
+@ @ @ @ awk_value_t *(*const function)(int num_actual_args,
+@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_value_t *result,
+@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ struct awk_ext_func *finfo);
+@ @ @ @ const size_t max_expected_args;
+@ @ @ @ const size_t min_required_args;
+@ @ @ @ awk_bool_t suppress_lint;
+@ @ @ @ void *data; /* opaque pointer to any extra state */
@} awk_ext_func_t;
@end example
@@ -31982,36 +32938,94 @@ or an underscore, which may be followed by any number of
letters, digits, and underscores.
Letter case in function names is significant.
-@item awk_value_t *(*function)(int num_actual_args, awk_value_t *result);
+@item awk_value_t *(*const function)(int num_actual_args,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_value_t *result,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ struct awk_ext_func *finfo);
This is a pointer to the C function that provides the extension's
functionality.
-The function must fill in @code{*result} with either a number
-or a string. @command{gawk} takes ownership of any string memory.
+The function must fill in @code{*result} with either a number,
+a string, or a regexp.
+@command{gawk} takes ownership of any string memory.
As mentioned earlier, string memory @emph{must} come from one of
@code{gawk_malloc()}, @code{gawk_calloc()}, or @code{gawk_realloc()}.
The @code{num_actual_args} argument tells the C function how many
actual parameters were passed from the calling @command{awk} code.
+The @code{finfo} parameter is a pointer to the @code{awk_ext_func_t} for
+this function. The called function may access data within it as desired, or not.
+
The function must return the value of @code{result}.
This is for the convenience of the calling code inside @command{gawk}.
-@item size_t num_expected_args;
-This is the number of arguments the function expects to receive.
-Each extension function may decide what to do if the number of
-arguments isn't what it expected. As with real @command{awk} functions, it
-is likely OK to ignore extra arguments.
+@item const size_t max_expected_args;
+This is the maximum number of arguments the function expects to receive.
+If called with more arguments than this, and if lint checking has
+been enabled, then @command{gawk} prints a warning message. For more
+information, see the entry for @code{suppress_lint}, later in this list.
+
+@item const size_t min_required_args;
+This is the minimum number of arguments the function expects to receive.
+If called with fewer arguments, @command{gawk} prints a fatal error
+message and exits.
+
+@item awk_bool_t suppress_lint;
+This flag tells @command{gawk} not to print a lint message if lint
+checking has been enabled and if more arguments were supplied in the call
+than expected. An extension function can tell if @command{gawk} already
+printed at least one such message by checking if @samp{num_actual_args >
+finfo->max_expected_args}. If so, and the function does not want more
+lint messages to be printed, it should set @code{finfo->suppress_lint}
+to @code{awk_true}.
+
+@item void *data;
+This is an opaque pointer to any data that an extension function may
+wish to have available when called. Passing the @code{awk_ext_func_t}
+structure to the extension function, and having this pointer available
+in it enable writing a single C or C++ function that implements multiple
+@command{awk}-level extension functions.
@end table
Once you have a record representing your extension function, you register
it with @command{gawk} using this API function:
@table @code
-@item awk_bool_t add_ext_func(const char *namespace, const awk_ext_func_t *func);
+@item awk_bool_t add_ext_func(const char *namespace, awk_ext_func_t *func);
This function returns true upon success, false otherwise.
The @code{namespace} parameter is currently not used; you should pass in an
empty string (@code{""}). The @code{func} pointer is the address of a
@code{struct} representing your function, as just described.
+
+@command{gawk} does not modify what @code{func} points to, but the
+extension function itself receives this pointer and can modify what it
+points to, thus it is purposely not declared to be @code{const}.
+@end table
+
+The combination of @code{min_required_args}, @code{max_expected_args},
+and @code{suppress_lint} may be confusing. Here is how you should
+set things up.
+
+@table @asis
+@item Any number of arguments is valid
+Set @code{min_required_args} and @code{max_expected_args} to zero and
+set @code{suppress_lint} to @code{awk_true}.
+
+@item A minimum number of arguments is required, no limit on maximum number of arguments
+Set @code{min_required_args} to the minimum required. Set
+@code{max_expected_args} to zero and
+set @code{suppress_lint} to @code{awk_true}.
+
+@item A minimum number of arguments is required, a maximum number is expected
+Set @code{min_required_args} to the minimum required. Set
+@code{max_expected_args} to the maximum expected.
+Set @code{suppress_lint} to @code{awk_false}.
+
+@item A minimum number of arguments is required, and no more than a maximum is allowed
+Set @code{min_required_args} to the minimum required. Set
+@code{max_expected_args} to the maximum expected.
+Set @code{suppress_lint} to @code{awk_false}.
+In your extension function, check that @code{num_actual_args} does not
+exceed @code{f->max_expected_args}. If it does, issue a fatal error message.
@end table
@node Exit Callback Functions
@@ -32503,6 +33517,9 @@ that parameter. More's the pity.}
@item void fatal(awk_ext_id_t id, const char *format, ...);
Print a message and then cause @command{gawk} to exit immediately.
+@item void nonfatal(awk_ext_id_t id, const char *format, ...);
+Print a nonfatal error message.
+
@item void warning(awk_ext_id_t id, const char *format, ...);
Print a warning message.
@@ -32555,21 +33572,25 @@ value type, as appropriate. This behavior is summarized in
@caption{API value types returned}
@docbook
<informaltable>
-<tgroup cols="6">
- <colspec colwidth="16.6*"/>
- <colspec colwidth="16.6*"/>
- <colspec colwidth="19.8*" colname="c3"/>
- <colspec colwidth="15*" colname="c4"/>
- <colspec colwidth="15*" colname="c5"/>
- <colspec colwidth="16.6*" colname="c6"/>
- <spanspec spanname="hspan" namest="c3" nameend="c6" align="center"/>
+<tgroup cols="8">
+ <colspec colname="c1"/>
+ <colspec colname="c2"/>
+ <colspec colname="c3"/>
+ <colspec colname="c4"/>
+ <colspec colname="c5"/>
+ <colspec colname="c6"/>
+ <colspec colname="c7"/>
+ <colspec colname="c8"/>
+ <spanspec spanname="hspan" namest="c3" nameend="c8" align="center"/>
<thead>
<row><entry></entry><entry spanname="hspan"><para>Type of Actual Value</para></entry></row>
<row>
<entry></entry>
<entry></entry>
<entry><para>String</para></entry>
+ <entry><para>Strnum</para></entry>
<entry><para>Number</para></entry>
+ <entry><para>Regex</para></entry>
<entry><para>Array</para></entry>
<entry><para>Undefined</para></entry>
</row>
@@ -32580,48 +33601,80 @@ value type, as appropriate. This behavior is summarized in
<entry><para><emphasis role="bold">String</emphasis></para></entry>
<entry><para>String</para></entry>
<entry><para>String</para></entry>
- <entry><para>False</para></entry>
- <entry><para>False</para></entry>
+ <entry><para>String</para></entry>
+ <entry><para>String</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry><para><emphasis role="bold">Strnum</emphasis></para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>Strnum</para></entry>
+ <entry><para>Strnum</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
</row>
<row>
<entry></entry>
<entry><para><emphasis role="bold">Number</emphasis></para></entry>
- <entry><para>Number if can be converted, else false</para></entry>
<entry><para>Number</para></entry>
- <entry><para>False</para></entry>
- <entry><para>False</para></entry>
+ <entry><para>Number</para></entry>
+ <entry><para>Number</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
</row>
<row>
<entry><para><emphasis role="bold">Type</emphasis></para></entry>
+ <entry><para><emphasis role="bold">Regex</emphasis></para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>Regex</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ </row>
+ <row>
+ <entry><para><emphasis role="bold">Requested</emphasis></para></entry>
<entry><para><emphasis role="bold">Array</emphasis></para></entry>
- <entry><para>False</para></entry>
- <entry><para>False</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
<entry><para>Array</para></entry>
- <entry><para>False</para></entry>
+ <entry><para>false</para></entry>
</row>
<row>
- <entry><para><emphasis role="bold">Requested</emphasis></para></entry>
+ <entry></entry>
<entry><para><emphasis role="bold">Scalar</emphasis></para></entry>
<entry><para>Scalar</para></entry>
<entry><para>Scalar</para></entry>
- <entry><para>False</para></entry>
- <entry><para>False</para></entry>
+ <entry><para>Scalar</para></entry>
+ <entry><para>Scalar</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
</row>
<row>
<entry></entry>
<entry><para><emphasis role="bold">Undefined</emphasis></para></entry>
<entry><para>String</para></entry>
+ <entry><para>Strnum</para></entry>
<entry><para>Number</para></entry>
+ <entry><para>Regex</para></entry>
<entry><para>Array</para></entry>
<entry><para>Undefined</para></entry>
</row>
<row>
<entry></entry>
<entry><para><emphasis role="bold">Value cookie</emphasis></para></entry>
- <entry><para>False</para></entry>
- <entry><para>False</para></entry>
- <entry><para>False</para>
- </entry><entry><para>False</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
</row>
</tbody>
</tgroup>
@@ -32637,41 +33690,45 @@ value type, as appropriate. This behavior is summarized in
@tex
\vglue-1.1\baselineskip
@end tex
-@multitable @columnfractions .166 .166 .198 .15 .15 .166
-@headitem @tab @tab String @tab Number @tab Array @tab Undefined
-@item @tab @b{String} @tab String @tab String @tab False @tab False
-@item @tab @b{Number} @tab Number if can be converted, else false @tab Number @tab False @tab False
-@item @b{Type} @tab @b{Array} @tab False @tab False @tab Array @tab False
-@item @b{Requested} @tab @b{Scalar} @tab Scalar @tab Scalar @tab False @tab False
-@item @tab @b{Undefined} @tab String @tab Number @tab Array @tab Undefined
-@item @tab @b{Value cookie} @tab False @tab False @tab False @tab False
+@c @multitable @columnfractions .166 .166 .198 .15 .15 .166
+@multitable {Requested} {Undefined} {Number} {Number} {Scalar} {Regex} {Array} {Undefined}
+@headitem @tab @tab String @tab Strnum @tab Number @tab Regex @tab Array @tab Undefined
+@item @tab @b{String} @tab String @tab String @tab String @tab String @tab false @tab false
+@item @tab @b{Strnum} @tab false @tab Strnum @tab Strnum @tab false @tab false @tab false
+@item @tab @b{Number} @tab Number @tab Number @tab Number @tab false @tab false @tab false
+@item @b{Type} @tab @b{Regex} @tab false @tab false @tab false @tab Regex @tab false @tab false
+@item @b{Requested} @tab @b{Array} @tab false @tab false @tab false @tab false @tab Array @tab false
+@item @tab @b{Scalar} @tab Scalar @tab Scalar @tab Scalar @tab Scalar @tab false @tab false
+@item @tab @b{Undefined} @tab String @tab Strnum @tab Number @tab Regex @tab Array @tab Undefined
+@item @tab @b{Value cookie} @tab false @tab false @tab false @tab false @tab false @tab false
@end multitable
@end ifnotdocbook
@end ifnotplaintext
@ifplaintext
-@example
- +-------------------------------------------------+
- | Type of Actual Value: |
- +------------+------------+-----------+-----------+
- | String | Number | Array | Undefined |
-+-----------+-----------+------------+------------+-----------+-----------+
-| | String | String | String | False | False |
-| |-----------+------------+------------+-----------+-----------+
-| | Number | Number if | Number | False | False |
-| | | can be | | | |
-| | | converted, | | | |
-| | | else false | | | |
-| |-----------+------------+------------+-----------+-----------+
-| Type | Array | False | False | Array | False |
-| Requested |-----------+------------+------------+-----------+-----------+
-| | Scalar | Scalar | Scalar | False | False |
-| |-----------+------------+------------+-----------+-----------+
-| | Undefined | String | Number | Array | Undefined |
-| |-----------+------------+------------+-----------+-----------+
-| | Value | False | False | False | False |
-| | cookie | | | | |
-+-----------+-----------+------------+------------+-----------+-----------+
-@end example
+@verbatim
+ +-------------------------------------------------------+
+ | Type of Actual Value: |
+ +--------+--------+--------+--------+-------+-----------+
+ | String | Strnum | Number | Regex | Array | Undefined |
++-----------+-----------+--------+--------+--------+--------+-------+-----------+
+| | String | String | String | String | String | false | false |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Strnum | false | Strnum | Strnum | false | false | false |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Number | Number | Number | Number | false | false | false |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Regex | false | false | false | Regex | false | false |
+| Type +-----------+--------+--------+--------+--------+-------+-----------+
+| Requested | Array | false | false | false | false | Array | false |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Scalar | Scalar | Scalar | Scalar | Scalar | false | false |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Undefined | String | Strnum | Number | Regex | Array | Undefined |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Value | false | false | false | false | false | false |
+| | Cookie | | | | | | |
++-----------+-----------+--------+--------+--------+--------+-------+-----------+
+@end verbatim
@end ifplaintext
@end float
@@ -32749,13 +33806,6 @@ An extension can look up the value of @command{gawk}'s special variables.
However, with the exception of the @code{PROCINFO} array, an extension
cannot change any of those variables.
-@quotation CAUTION
-It is possible for the lookup of @code{PROCINFO} to fail. This happens if
-the @command{awk} program being run does not reference @code{PROCINFO};
-in this case, @command{gawk} doesn't bother to create the array and
-populate it.
-@end quotation
-
@node Symbol table by cookie
@subsubsection Variable Access and Update by Cookie
@@ -32777,7 +33827,7 @@ Return false if the value cannot be retrieved.
@item awk_bool_t sym_update_scalar(awk_scalar_t cookie, awk_value_t *value);
Update the value associated with a scalar cookie. Return false if
-the new value is not of type @code{AWK_STRING} or @code{AWK_NUMBER}.
+the new value is not of type @code{AWK_STRING}, @code{AWK_STRNUM}, @code{AWK_REGEX}, or @code{AWK_NUMBER}.
Here too, the predefined variables may not be updated.
@end table
@@ -32898,7 +33948,7 @@ is what the routines in this @value{SECTION} let you do. The functions are as f
@table @code
@item awk_bool_t create_value(awk_value_t *value, awk_value_cookie_t *result);
Create a cached string or numeric value from @code{value} for
-efficient later assignment. Only values of type @code{AWK_NUMBER}
+efficient later assignment. Only values of type @code{AWK_NUMBER}, @code{AWK_REGEX}, @code{AWK_STRNUM},
and @code{AWK_STRING} are allowed. Any other type is rejected.
@code{AWK_UNDEFINED} could be allowed, but doing so would result in
inferior performance.
@@ -33124,9 +34174,10 @@ The array remains an array, but after calling this function, it
has no elements. This is equivalent to using the @code{delete}
statement (@pxref{Delete}).
-@item awk_bool_t flatten_array(awk_array_t a_cookie, awk_flat_array_t **data);
+@item awk_bool_t flatten_array_typed(awk_array_t a_cookie, awk_flat_array_t **data, awk_valtype_t index_type, awk_valtype_t value_type);
For the array represented by @code{a_cookie}, create an @code{awk_flat_array_t}
-structure and fill it in. Set the pointer whose address is passed as @code{data}
+structure and fill it in with indices and values of the requested types.
+Set the pointer whose address is passed as @code{data}
to point to this structure.
Return true upon success, or false otherwise.
@ifset FOR_PRINT
@@ -33138,6 +34189,14 @@ See the next @value{SECTION}
for a discussion of how to
flatten an array and work with it.
+@item awk_bool_t flatten_array(awk_array_t a_cookie, awk_flat_array_t **data);
+For the array represented by @code{a_cookie}, create an @code{awk_flat_array_t}
+structure and fill it in with @code{AWK_STRING} indices and
+@code{AWK_UNDEFINED} values.
+This is superseded by @code{flatten_array_typed()}.
+It is provided as a macro, and remains for convenience and for source code
+compatibility with the previous version of the API.
+
@item awk_bool_t release_flattened_array(awk_array_t a_cookie,
@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_flat_array_t *data);
When done with a flattened array, release the storage using this function.
@@ -33250,7 +34309,7 @@ to double-check that the count in the @code{awk_flat_array_t}
is the same as the count just retrieved:
@example
- if (! flatten_array(value2.array_cookie, & flat_array)) @{
+ if (! flatten_array_typed(value2.array_cookie, & flat_array, AWK_STRING, AWK_UNDEFINED)) @{
printf("dump_array_and_delete: could not flatten array\n");
goto out;
@}
@@ -33546,6 +34605,75 @@ $ @kbd{AWKLIBPATH=$PWD ./gawk -f subarray.awk}
(@xref{Finding Extensions} for more information on the
@env{AWKLIBPATH} environment variable.)
+@node Redirection API
+@subsection Accessing and Manipulating Redirections
+
+The following function allows extensions to access and manipulate redirections.
+
+@table @code
+@item awk_bool_t get_file(const char *name,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ size_t name_len,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const char *filetype,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ int fd,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const awk_input_buf_t **ibufp,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const awk_output_buf_t **obufp);
+Look up file @code{name} in @command{gawk}'s internal redirection table.
+If @code{name} is @code{NULL} or @code{name_len} is zero, return
+data for the currently open input file corresponding to @code{FILENAME}.
+(This does not access the @code{filetype} argument, so that may be undefined).
+If the file is not already open, attempt to open it.
+The @code{filetype} argument must be zero-terminated and should be one of:
+
+@table @code
+@item ">"
+A file opened for output.
+
+@item ">>"
+A file opened for append.
+
+@item "<"
+A file opened for input.
+
+@item "|>"
+A pipe opened for output.
+
+@item "|<"
+A pipe opened for input.
+
+@item "|&"
+A two-way coprocess.
+@end table
+
+On error, return an @code{awk_false} value. Otherwise, return
+@code{awk_true}, and return additional information about the redirection
+in the @code{ibufp} and @code{obufp} pointers. For input
+redirections, the @code{*ibufp} value should be non-@code{NULL},
+and @code{*obufp} should be @code{NULL}. For output redirections,
+the @code{*obufp} value should be non-@code{NULL}, and @code{*ibufp}
+should be @code{NULL}. For two-way coprocesses, both values should
+be non-@code{NULL}.
+
+In the usual case, the extension is interested in @code{(*ibufp)->fd}
+and/or @code{fileno((*obufp)->fp)}. If the file is not already
+open, and the @code{fd} argument is nonnegative, @command{gawk}
+will use that file descriptor instead of opening the file in the
+usual way. If @code{fd} is nonnegative, but the file exists already,
+@command{gawk} ignores @code{fd} and returns the existing file. It is
+the caller's responsibility to notice that neither the @code{fd} in
+the returned @code{awk_input_buf_t} nor the @code{fd} in the returned
+@code{awk_output_buf_t} matches the requested value.
+
+Note that supplying a file descriptor is currently @emph{not} supported
+for pipes. However, supplying a file descriptor should work for input,
+output, append, and two-way (coprocess) sockets. If @code{filetype}
+is two-way, @command{gawk} assumes that it is a socket! Note that in
+the two-way case, the input and output file descriptors may differ.
+To check for success, you must check whether either matches.
+@end table
+
+It is anticipated that this API function will be used to implement I/O
+multiplexing and a socket library.
+
@node Extension API Variables
@subsection API Variables
@@ -33572,10 +34700,10 @@ debugging:
@float Table,gawk-api-version
@caption{gawk API version constants}
-@multitable @columnfractions .33 .33 .33
-@headitem API Version @tab C preprocessor define @tab enum constant
-@item Major @tab gawk_api_major_version @tab GAWK_API_MAJOR_VERSION
-@item Minor @tab gawk_api_minor_version @tab GAWK_API_MINOR_VERSION
+@multitable {@b{API Version}} {@code{gawk_api_major_version}} {@code{GAWK_API_MAJOR_VERSION}}
+@headitem API Version @tab C Preprocessor Define @tab enum constant
+@item Major @tab @code{gawk_api_major_version} @tab @code{GAWK_API_MAJOR_VERSION}
+@item Minor @tab @code{gawk_api_minor_version} @tab @code{GAWK_API_MINOR_VERSION}
@end multitable
@end float
@@ -33594,10 +34722,10 @@ constant integers:
@table @code
@item api->major_version
-The major version of the running @command{gawk}
+The major version of the running @command{gawk}.
@item api->minor_version
-The minor version of the running @command{gawk}
+The minor version of the running @command{gawk}.
@end table
It is up to the extension to decide if there are API incompatibilities.
@@ -33670,7 +34798,7 @@ static awk_ext_id_t ext_id;
static const char *ext_version = NULL; /* or @dots{} = "some string" */
static awk_ext_func_t func_table[] = @{
- @{ "name", do_name, 1 @},
+ @{ "name", do_name, 1, 0, awk_false, NULL @},
/* @dots{} */
@};
@@ -33771,6 +34899,19 @@ If @code{ext_version} is not @code{NULL}, register
the version string with @command{gawk}.
@end enumerate
+
+@node Changes from API V1
+@subsection Changes From Version 1 of the API
+
+The current API is @emph{not} binary compatible with version 1 of the API.
+You will have to recompile your extensions in order to use them with
+the current version of @command{gawk}.
+
+Fortunately, at the possible expense of some compile-time warnings, the API remains
+source-code--compatible with the previous API. The major differences are
+the additional members in the @code{awk_ext_func_t} structure, and the
+addition of the third argument to the C implementation function.
+
@node Finding Extensions
@section How @command{gawk} Finds Extensions
@cindex extension search path
@@ -34011,17 +35152,12 @@ The second is a pointer to an @code{awk_value_t} structure, usually named
/* do_chdir --- provide dynamically loaded chdir() function for gawk */
static awk_value_t *
-do_chdir(int nargs, awk_value_t *result)
+do_chdir(int nargs, awk_value_t *result, struct awk_ext_func *unused)
@{
awk_value_t newdir;
int ret = -1;
assert(result != NULL);
-
- if (do_lint && nargs != 1)
- lintwarn(ext_id,
- _("chdir: called with incorrect number of arguments, "
- "expecting 1"));
@end example
The @code{newdir}
@@ -34030,8 +35166,8 @@ with @code{get_argument()}. Note that the first argument is
numbered zero.
If the argument is retrieved successfully, the function calls the
-@code{chdir()} system call. If the @code{chdir()} fails, @code{ERRNO}
-is updated:
+@code{chdir()} system call. Otherwise, if the @code{chdir()} fails,
+it updates @code{ERRNO}:
@example
if (get_argument(0, AWK_STRING, & newdir)) @{
@@ -34235,15 +35371,11 @@ is set to point to @code{stat()}, instead.
Here is the @code{do_stat()} function, which starts with
variable declarations and argument checking:
-@ignore
-Changed message for page breaking. Used to be:
- "stat: called with incorrect number of arguments (%d), should be 2",
-@end ignore
@example
/* do_stat --- provide a stat() function for gawk */
static awk_value_t *
-do_stat(int nargs, awk_value_t *result)
+do_stat(int nargs, awk_value_t *result, struct awk_ext_func *unused)
@{
awk_value_t file_param, array_param;
char *name;
@@ -34254,13 +35386,6 @@ do_stat(int nargs, awk_value_t *result)
int (*statfunc)(const char *path, struct stat *sbuf) = lstat;
assert(result != NULL);
-
- if (nargs != 2 && nargs != 3) @{
- if (do_lint)
- lintwarn(ext_id,
- _("stat: called with wrong number of arguments"));
- return make_number(-1, result);
- @}
@end example
Then comes the actual work. First, the function gets the arguments.
@@ -34328,11 +35453,9 @@ structures for loading each function into @command{gawk}:
@example
static awk_ext_func_t func_table[] = @{
- @{ "chdir", do_chdir, 1 @},
- @{ "stat", do_stat, 2 @},
-#ifndef __MINGW32__
- @{ "fts", do_fts, 3 @},
-#endif
+ @{ "chdir", do_chdir, 1, 1, awk_false, NULL @},
+ @{ "stat", do_stat, 3, 2, awk_false, NULL @},
+ @dots{}
@};
@end example
@@ -35113,18 +36236,21 @@ As of this writing, there are seven extensions:
GD graphics library extension
@item
+MPFR library extension
+(this provides access to a number of MPFR functions that @command{gawk}'s
+native MPFR support does not)
+
+@item
PDF extension
@item
PostgreSQL extension
@item
-MPFR library extension
-(this provides access to a number of MPFR functions that @command{gawk}'s
-native MPFR support does not)
+Redis extension
@item
-Redis extension
+Select extension
@item
XML parser extension, using the @uref{http://expat.sourceforge.net, Expat}
@@ -35224,7 +36350,7 @@ output wrappers,
and two-way processors)
@item
-Printing fatal, warning, and ``lint'' warning messages
+Printing fatal, nonfatal, warning, and ``lint'' warning messages
@item
Updating @code{ERRNO}, or unsetting it
@@ -35753,6 +36879,10 @@ Indirect function calls
@item
Directories on the command line produce a warning and are skipped
(@pxref{Command-line directories})
+
+@item
+Output with @code{print} and @code{printf} need not be fatal
+(@pxref{Nonfatal})
@end itemize
@item
@@ -35840,6 +36970,11 @@ The @code{isarray()} function to check if a variable is an array or not
The @code{bindtextdomain()}, @code{dcgettext()}, and @code{dcngettext()}
functions for internationalization
(@pxref{Programmer i18n})
+
+@item
+The @code{intdiv()} function for doing integer
+division and remainder
+(@pxref{Numeric Functions})
@end itemize
@item
@@ -35878,6 +37013,7 @@ The
@option{-p},
@option{-P},
@option{-r},
+@option{-s},
@option{-S},
@option{-t},
and
@@ -35902,6 +37038,7 @@ and the
@option{--load},
@option{--non-decimal-data},
@option{--optimize},
+@option{--no-optimize},
@option{--posix},
@option{--pretty-print},
@option{--profile},
@@ -35972,6 +37109,19 @@ for @command{gawk} @value{PVERSION} 4.1:
Ultrix
@end itemize
+@item
+Support for the following systems was removed from the code
+for @command{gawk} @value{PVERSION} 4.2:
+
+@c nested table
+@itemize @value{MINUS}
+@item
+MirBSD
+
+@item
+GNU/Linux on Alpha
+@end itemize
+
@end itemize
@c XXX ADD MORE STUFF HERE
@@ -36598,6 +37748,52 @@ Support for Ultrix was removed.
@end itemize
+Version 4.2 introduced the following changes:
+
+@itemize @bullet
+@item
+Changes to @code{ENVIRON} are reflected into @command{gawk}'s
+environment and that of programs that it runs.
+@xref{Auto-set}.
+
+@item
+The @option{--pretty-print} option no longer runs the @command{awk}
+program too.
+@xref{Options}.
+
+@item
+The @command{igawk} program and its manual page are no longer
+installed when @command{gawk} is built.
+@xref{Igawk Program}.
+
+@item
+The @code{intdiv()} function.
+@xref{Numeric Functions}.
+
+@item
+The maximum number of hexadecimal digits in @samp{\x} escapes
+is now two.
+@xref{Escape Sequences}.
+
+@item
+Nonfatal output with @code{print} and @code{printf}.
+@xref{Nonfatal}.
+
+@item
+For many years, POSIX specified that default field splitting
+only allowed spaces and tabs to separate fields, and this was
+how @command{gawk} behaved with @option{--posix}. As of 2013,
+the standard restored historical behavior, and now default
+field splitting with @option{--posix} also allows newlines to
+separate fields.
+
+@item
+Support for MirBSD was removed.
+
+@item
+Support for GNU/Linux on Alpha was removed.
+@end itemize
+
@c XXX ADD MORE STUFF HERE
@end ifclear
@@ -36727,7 +37923,7 @@ and
@uref{http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap09.html#tag_21_09_03_05, its rationale}.}
By using this lovely technical term, the standard gives license
-to implementors to implement ranges in whatever way they choose.
+to implementers to implement ranges in whatever way they choose.
The @command{gawk} maintainer chose to apply the pre-POSIX meaning
both with the default regexp matching and when @option{--traditional} or
@option{--posix} are used.
@@ -37164,6 +38360,12 @@ These files contain the actual @command{gawk} source code.
@end table
@table @file
+@item support/*
+C header and source files for routines that @command{gawk}
+uses, but that are not part of its core functionality.
+For example, argument parsing, regular expression matching,
+and random number generating routines are all kept here.
+
@item ABOUT-NLS
A file containing information about GNU @command{gettext} and translations.
@@ -37264,6 +38466,8 @@ The generated Info file for
The @command{troff} source for a manual page describing the @command{igawk}
program presented in
@ref{Igawk Program}.
+(Since @command{gawk} can do its own @code{@@include} processing,
+neither @command{igawk} nor @file{igawk.1} are installed.)
@item doc/Makefile.in
The input file used during the configuration process to generate the
@@ -37308,8 +38512,6 @@ source file for this @value{DOCUMENT}. It also contains a @file{Makefile.in} fil
@file{Makefile.am} is used by GNU Automake to create @file{Makefile.in}.
The library functions from
@ref{Library Functions},
-and the @command{igawk} program from
-@ref{Igawk Program}
are included as ready-to-use files in the @command{gawk} distribution.
They are installed as part of the installation process.
The rest of the programs in this @value{DOCUMENT} are available in appropriate
@@ -37320,6 +38522,12 @@ The source code, manual pages, and infrastructure files for
the sample extensions included with @command{gawk}.
@xref{Dynamic Extensions}, for more information.
+@item extras/*
+Additional non-essential files. Currently, this directory contains some shell
+startup files to be installed in @file{/etc/profile.d} to aid in manipulating
+the @env{AWKPATH} and @env{AWKLIBPATH} environment variables.
+@xref{Shell Startup Files}, for more information.
+
@item posix/*
Files needed for building @command{gawk} on POSIX-compliant systems.
@@ -37348,6 +38556,7 @@ to configure @command{gawk} for your system yourself.
@menu
* Quick Installation:: Compiling @command{gawk} under Unix.
+* Shell Startup Files:: Shell convenience functions.
* Additional Configuration Options:: Other compile-time options.
* Configuration Philosophy:: How it's all supposed to work.
@end menu
@@ -37428,6 +38637,44 @@ is likely that you will be asked for your password, and you will have
to have been set up previously as a user who is allowed to run the
@command{sudo} command.
+@node Shell Startup Files
+@appendixsubsec Shell Startup Files
+
+The distribution contains shell startup files @file{gawk.sh} and
+@file{gawk.csh}, containing functions to aid in manipulating
+the @env{AWKPATH} and @env{AWKLIBPATH} environment variables.
+On a Fedora GNU/Linux system, these files should be installed in @file{/etc/profile.d};
+on other platforms, the appropriate location may be different.
+
+@table @command
+
+@cindex @command{gawkpath_default} shell function
+@item gawkpath_default
+Reset the @env{AWKPATH} environment variable to its default value.
+
+@cindex @command{gawkpath_prepend} shell function
+@item gawkpath_prepend
+Add the argument to the front of the @env{AWKPATH} environment variable.
+
+@cindex @command{gawkpath_append} shell function
+@item gawkpath_append
+Add the argument to the end of the @env{AWKPATH} environment variable.
+
+@cindex @command{gawklibpath_default} shell function
+@item gawklibpath_default
+Reset the @env{AWKLIBPATH} environment variable to its default value.
+
+@cindex @command{gawklibpath_prepend} shell function
+@item gawklibpath_prepend
+Add the argument to the front of the @env{AWKLIBPATH} environment variable.
+
+@cindex @command{gawklibpath_append} shell function
+@item gawklibpath_append
+Add the argument to the end of the @env{AWKLIBPATH} environment variable.
+
+@end table
+
+
@node Additional Configuration Options
@appendixsubsec Additional Configuration Options
@cindex @command{gawk}, configuring, options
@@ -42011,6 +43258,7 @@ Consistency issues:
Use MS-DOS not MS DOS
Use an empty set of parentheses after built-in and awk function names.
Use "multiFOO" without a hyphen.
+ Use "time zone" as two words, not "timezone".
Date: Wed, 13 Apr 94 15:20:52 -0400
From: rms@gnu.org (Richard Stallman)
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index ee4c39b9..d8e9654f 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -44,6 +44,14 @@
@set MINUS
@end ifdocbook
+@iftex
+@set TIMES @times
+@end iftex
+@ifnottex
+@set TIMES *
+@end ifnottex
+
+
@set xref-automatic-section-title
@c The following information should be updated here only!
@@ -576,6 +584,7 @@ particular records in a file and perform operations upon them.
@code{getline}.
* Getline Summary:: Summary of @code{getline} Variants.
* Read Timeout:: Reading input with a timeout.
+* Retrying Input:: Retrying input after certain errors.
* Command-line directories:: What happens if you put a directory on
the command line.
* Input Summary:: Input summary.
@@ -605,6 +614,7 @@ particular records in a file and perform operations upon them.
* Special Caveats:: Things to watch out for.
* Close Files And Pipes:: Closing Input and Output Files and
Pipes.
+* Nonfatal:: Enabling Nonfatal Output.
* Output Summary:: Output summary.
* Output Exercises:: Exercises.
* Values:: Constants, Variables, and Regular
@@ -614,6 +624,9 @@ particular records in a file and perform operations upon them.
* Nondecimal-numbers:: What are octal and hex numbers.
* Regexp Constants:: Regular Expression constants.
* Using Constant Regexps:: When and how to use a regexp constant.
+* Standard Regexp Constants:: Regexp constants in standard
+ @command{awk}.
+* Strong Regexp Constants:: Strongly typed regexp constants.
* Variables:: Variables give names to values for
later use.
* Using Variables:: Using variables in your programs.
@@ -916,11 +929,14 @@ particular records in a file and perform operations upon them.
* Array Functions:: Functions for working with arrays.
* Flattening Arrays:: How to flatten arrays.
* Creating Arrays:: How to create and populate arrays.
+* Redirection API:: How to access and manipulate
+ redirections.
* Extension API Variables:: Variables provided by the API.
* Extension Versioning:: API Version information.
* Extension API Informational Variables:: Variables providing information about
@command{gawk}'s invocation.
* Extension API Boilerplate:: Boilerplate code for using the API.
+* Changes from API V1:: Changes from V1 of the API.
* Finding Extensions:: How @command{gawk} finds compiled
extensions.
* Extension Example:: Example C code for an extension.
@@ -974,14 +990,16 @@ particular records in a file and perform operations upon them.
* Unix Installation:: Installing @command{gawk} under
various versions of Unix.
* Quick Installation:: Compiling @command{gawk} under Unix.
+* Shell Startup Files:: Shell convenience functions.
* Additional Configuration Options:: Other compile-time options.
* Configuration Philosophy:: How it's all supposed to work.
* Non-Unix Installation:: Installation on Other Operating
Systems.
-* PC Installation:: Installing and Compiling @command{gawk} on
- Microsoft Windows.
+* PC Installation:: Installing and Compiling
+ @command{gawk} on Microsoft Windows.
* PC Binary Installation:: Installing a prepared distribution.
-* PC Compiling:: Compiling @command{gawk} for Windows32.
+* PC Compiling:: Compiling @command{gawk} for
+ Windows32.
* PC Using:: Running @command{gawk} on Windows32.
* Cygwin:: Building and running @command{gawk}
for Cygwin.
@@ -2933,14 +2951,59 @@ it is worth addressing.
@cindex Brink, Jeroen
The ``shells'' on Microsoft Windows systems use the double-quote
character for quoting, and make it difficult or impossible to include an
-escaped double-quote character in a command-line script.
-The following example, courtesy of Jeroen Brink, shows
-how to print all lines in a file surrounded by double quotes:
+escaped double-quote character in a command-line script. The following
+example, courtesy of Jeroen Brink, shows how to escape the double quotes
+from this one liner script that prints all lines in a file surrounded by
+double quotes:
+
+@example
+@{ print "\"" $0 "\"" @}
+@end example
+
+@noindent
+In an MS-Windows command-line the one-liner script above may be passed as
+follows:
@example
gawk "@{ print \"\042\" $0 \"\042\" @}" @var{file}
@end example
+In this example the @samp{\042} is the octal code for a double-quote;
+@command{gawk} converts it into a real double-quote for output by
+the @code{print} statement.
+
+In MS-Windows escaping double-quotes is a little tricky because you use
+backslashes to escape double-quotes, but backslashes themselves are not
+escaped in the usual way; indeed they are either duplicated or not,
+depending upon whether there is a subsequent double-quote. The MS-Windows
+rule for double-quoting a string is the following:
+
+@enumerate
+@item
+For each double quote in the orginal string, let @var{N} be the number
+of backslash(es) before it, @var{N} might be zero. Replace these @var{N}
+backslash(es) by @math{2@value{TIMES}@var{N}+1} backslash(es)
+
+@item
+Let @var{N} be the number of backslash(es) tailing the original string,
+@var{N} might be zero. Replace these @var{N} backslash(es) by
+@math{2@value{TIMES}@var{N}} backslash(es)
+
+@item
+Surround the resulting string by double-quotes.
+@end enumerate
+
+So to double-quote the one-liner script @samp{@{ print "\"" $0 "\"" @}}
+from the previous example you would do it this way:
+
+@example
+gawk "@{ print \"\\\"\" $0 \"\\\"\" @}" @var{file}
+@end example
+
+@noindent
+However, the use of @samp{\042} instead of @samp{\\\"} is also possible
+and easier to read, because backslashes that are not followed by a
+double-quote don't need duplication.
@node Sample Data Files
@section @value{DDF}s for the Examples
@@ -3960,6 +4023,7 @@ when parsing numeric input data (@pxref{Locales}).
@cindex @option{-o} option
@cindex @option{--pretty-print} option
Enable pretty-printing of @command{awk} programs.
+Implies @option{--no-optimize}.
By default, the output program is created in a file named @file{awkprof.out}
(@pxref{Profiling}).
The optional @var{file} argument allows you to specify a different
@@ -3968,18 +4032,22 @@ No space is allowed between the @option{-o} and @var{file}, if
@var{file} is supplied.
@quotation NOTE
-Due to the way @command{gawk} has evolved, with this option
-your program still executes. This will change in the
-next major release, such that @command{gawk} will only
-pretty-print the program and not run it.
+In the past, this option would also execute your program.
+This is no longer the case.
@end quotation
@item @option{-O}
@itemx @option{--optimize}
@cindex @option{--optimize} option
@cindex @option{-O} option
-Enable some optimizations on the internal representation of the program.
-At the moment, this includes just simple constant folding.
+Enable @command{gawk}'s default optimizations on the internal
+representation of the program. At the moment, this includes simple
+constant folding and tail recursion elimination in function calls.
+
+These optimizations are enabled by default.
+This option remains primarily for backwards compatibility. However, it may
+be used to cancel the effect of an earlier @option{-s} option
+(see later in this list).
@item @option{-p}[@var{file}]
@itemx @option{--profile}[@code{=}@var{file}]
@@ -3988,6 +4056,7 @@ At the moment, this includes just simple constant folding.
@cindex @command{awk} profiling, enabling
Enable profiling of @command{awk} programs
(@pxref{Profiling}).
+Implies @option{--no-optimize}.
By default, profiles are created in a file named @file{awkprof.out}.
The optional @var{file} argument allows you to specify a different
@value{FN} for the profile file.
@@ -4017,11 +4086,6 @@ restrictions apply:
@cindex newlines
@cindex whitespace, newlines as
@item
-Newlines do not act as whitespace to separate fields when @code{FS} is
-equal to a single space
-(@pxref{Fields}).
-
-@item
Newlines are not allowed after @samp{?} or @samp{:}
(@pxref{Conditional Exp}).
@@ -4059,6 +4123,13 @@ This is now @command{gawk}'s default behavior.
Nevertheless, this option remains (both for backward compatibility
and for use in combination with @option{--traditional}).
+@item @option{-s}
+@itemx @option{--no-optimize}
+@cindex @option{--no-optimize} option
+@cindex @option{-s} option
+Disable @command{gawk}'s default optimizations on the internal
+representation of the program.
+
@item @option{-S}
@itemx @option{--sandbox}
@cindex @option{-S} option
@@ -4372,6 +4443,9 @@ searches first in the current directory and then in @file{/usr/local/share/awk}.
In practice, this means that you will rarely need to change the
value of @env{AWKPATH}.
+@xref{Shell Startup Files}, for information on functions that help to
+manipulate the @env{AWKPATH} variable.
+
@command{gawk} places the value of the search path that it used into
@code{ENVIRON["AWKPATH"]}. This provides access to the actual search
path value from within an @command{awk} program.
@@ -4403,6 +4477,9 @@ an empty value, @command{gawk} uses a default path; this
is typically @samp{/usr/local/lib/gawk}, although it can vary depending
upon how @command{gawk} was built.
+@xref{Shell Startup Files}, for information on functions that help to
+manipulate the @env{AWKLIBPATH} variable.
+
@command{gawk} places the value of the search path that it used into
@code{ENVIRON["AWKLIBPATH"]}. This provides access to the actual search
path value from within an @command{awk} program.
@@ -4430,6 +4507,8 @@ wait for input before returning with an error.
Controls the number of times @command{gawk} attempts to
retry a two-way TCP/IP (socket) connection before giving up.
@xref{TCP/IP Networking}.
+Note that when nonfatal I/O is enabled (@pxref{Nonfatal}),
+@command{gawk} only tries to open a TCP/IP socket once.
@item POSIXLY_CORRECT
Causes @command{gawk} to switch to POSIX-compatibility
@@ -4484,14 +4563,6 @@ two regexp matchers that @command{gawk} uses internally. (There aren't
supposed to be differences, but occasionally theory and practice don't
coordinate with each other.)
-@item GAWK_NO_PP_RUN
-When @command{gawk} is invoked with the @option{--pretty-print} option,
-it will not run the program if this environment variable exists.
-
-@quotation CAUTION
-This variable will not survive into the next major release.
-@end quotation
-
@item GAWK_STACKSIZE
This specifies the amount by which @command{gawk} should grow its
internal evaluation stack, when needed.
@@ -4789,6 +4860,32 @@ Similarly, you may use @code{print} or @code{printf} statements in the
@var{init} and @var{increment} parts of a @code{for} loop. This is another
long-undocumented ``feature'' of Unix @command{awk}.
+@command{gawk} lets you use the names of built-in functions that are
+@command{gawk} extensions as the names of parameters in user-defined functions.
+This is intended to ``future-proof'' old code that happens to use
+function names added by @command{gawk} after the code was written.
+Standard @command{awk} built-in functions, such as @code{sin()} or
+@code{substr()} are @emph{not} shadowed in this way.
+
+The @code{PROCINFO["argv"]} array contains all of the command-line arguments
+(after glob expansion and redirection processing on platforms where that must
+be done manually by the program) with subscripts ranging from 0 through
+@code{argc} @minus{} 1. For example, @code{PROCINFO["argv"][0]} will contain
+the name by which @command{gawk} was invoked. Here is an example of how this
+feature may be used:
+
+@example
+awk '
+BEGIN @{
+ for (i = 0; i < length(PROCINFO["argv"]); i++)
+ print i, PROCINFO["argv"][i]
+@}'
+@end example
+
+Please note that this differs from the standard @code{ARGV} array which does
+not include command-line arguments that have already been processed by
+@command{gawk} (@pxref{ARGC and ARGV}).
+
@end ignore
@node Invoking Summary
@@ -5071,17 +5168,21 @@ between @samp{0} and @samp{7}. For example, the code for the ASCII ESC
@item \x@var{hh}@dots{}
The hexadecimal value @var{hh}, where @var{hh} stands for a sequence
of hexadecimal digits (@samp{0}--@samp{9}, and either @samp{A}--@samp{F}
-or @samp{a}--@samp{f}). Like the same construct
-in ISO C, the escape sequence continues until the first nonhexadecimal
-digit is seen. @value{COMMONEXT}
-However, using more than two hexadecimal digits produces
-undefined results. (The @samp{\x} escape sequence is not allowed in
-POSIX @command{awk}.)
+or @samp{a}--@samp{f}). A maximum of two digts are allowed after
+the @samp{\x}. Any further hexadecimal digits are treated as simple
+letters or numbers. @value{COMMONEXT}
+(The @samp{\x} escape sequence is not allowed in POSIX awk.)
@quotation CAUTION
-The next major release of @command{gawk} will change, such
-that a maximum of two hexadecimal digits following the
-@samp{\x} will be used.
+In ISO C, the escape sequence continues until the first nonhexadecimal
+digit is seen.
+For many years, @command{gawk} would continue incorporating
+hexadecimal digits into the value until a non-hexadecimal digit
+or the end of the string was encountered.
+However, using more than two hexadecimal digits produced
+undefined results.
+As of @value{PVERSION} 4.2, only two digits
+are processed.
@end quotation
@cindex @code{\} (backslash), @code{\/} escape sequence
@@ -6100,6 +6201,7 @@ used with it do not have to be named on the @command{awk} command line
* Getline:: Reading files under explicit program control
using the @code{getline} function.
* Read Timeout:: Reading input with a timeout.
+* Retrying Input:: Retrying input after certain errors.
* Command-line directories:: What happens if you put a directory on the
command line.
* Input Summary:: Input summary.
@@ -6417,16 +6519,12 @@ Readfile} for another option.
@cindex fields
@cindex accessing fields
@cindex fields, examining
-@cindex POSIX @command{awk}, field separators and
-@cindex field separators, POSIX and
-@cindex separators, field, POSIX and
When @command{awk} reads an input record, the record is
automatically @dfn{parsed} or separated by the @command{awk} utility into chunks
called @dfn{fields}. By default, fields are separated by @dfn{whitespace},
like words in a line.
Whitespace in @command{awk} means any string of one or more spaces,
-TABs, or newlines;@footnote{In POSIX @command{awk}, newlines are not
-considered whitespace for separating fields.} other characters
+TABs, or newlines; other characters
that are considered whitespace by other languages
(such as formfeed, vertical tab, etc.) are @emph{not} considered
whitespace by @command{awk}.
@@ -6840,7 +6938,6 @@ can massage it first with a separate @command{awk} program.)
@node Default Field Splitting
@subsection Whitespace Normally Separates Fields
-@cindex newlines, as field separators
@cindex whitespace, as field separators
Fields are normally separated by whitespace sequences
(spaces, TABs, and newlines), not by single spaces. Two spaces in a row do not
@@ -7707,6 +7804,13 @@ a record, such as a file that cannot be opened, then @code{getline}
returns @minus{}1. In this case, @command{gawk} sets the variable
@code{ERRNO} to a string describing the error that occurred.
+If @code{ERRNO} indicates that the I/O operation may be
+retried, and @code{PROCINFO["@var{input}", "RETRY"]} is set,
+then @code{getline} returns @minus{}2
+instead of @minus{}1, and further calls to @code{getline}
+may be attempted. @xref{Retrying Input} for further information about
+this feature.
+
In the following examples, @var{command} stands for a string value that
represents a shell command.
@@ -8361,7 +8465,8 @@ on a per-command or per-connection basis.
the attempt to read from the underlying device may
succeed in a later attempt. This is a limitation, and it also
means that you cannot use this to multiplex input from
-two or more sources.
+two or more sources. @xref{Retrying Input} for a way to enable
+later I/O attempts to succeed.
Assigning a timeout value prevents read operations from
blocking indefinitely. But bear in mind that there are other ways
@@ -8371,6 +8476,36 @@ a connection before it can start reading any data,
or the attempt to open a FIFO special file for reading can block
indefinitely until some other process opens it for writing.
+@node Retrying Input
+@section Retrying Reads After Certain Input Errors
+@cindex retrying input
+
+@cindex differences in @command{awk} and @command{gawk}, retrying input
+This @value{SECTION} describes a feature that is specific to @command{gawk}.
+
+When @command{gawk} encounters an error while reading input, by
+default @code{getline} returns @minus{}1, and subsequent attempts to
+read from that file result in an end-of-file indication. However, you
+may optionally instruct @command{gawk} to allow I/O to be retried when
+certain errors are encountered by setting a special element in
+the @code{PROCINFO} array (@pxref{Auto-set}):
+
+@example
+PROCINFO["@var{input_name}", "RETRY"] = 1
+@end example
+
+When this element exists, @command{gawk} checks the value of the system
+(C language)
+@code{errno} variable when an I/O error occurs. If @code{errno} indicates
+a subsequent I/O attempt may succeed, @code{getline} instead returns
+@minus{}2 and
+further calls to @code{getline} may succeed. This applies to the @code{errno}
+values @code{EAGAIN}, @code{EWOULDBLOCK}, @code{EINTR}, or @code{ETIMEDOUT}.
+
+This feature is useful in conjunction with
+@code{PROCINFO["@var{input_name}", "READ_TIMEOUT"]} or situations where a file
+descriptor has been configured to behave in a non-blocking fashion.
+
@node Command-line directories
@section Directories on the Command Line
@cindex differences in @command{awk} and @command{gawk}, command-line directories
@@ -8532,6 +8667,7 @@ and discusses the @code{close()} built-in function.
@command{gawk} allows access to inherited file
descriptors.
* Close Files And Pipes:: Closing Input and Output Files and Pipes.
+* Nonfatal:: Enabling Nonfatal Output.
* Output Summary:: Output summary.
* Output Exercises:: Exercises.
@end menu
@@ -9912,17 +10048,26 @@ a system problem closing the file or process.
In these cases, @command{gawk} sets the predefined variable
@code{ERRNO} to a string describing the problem.
-In @command{gawk},
-when closing a pipe or coprocess (input or output),
-the return value is the exit status of the command.@footnote{
-This is a full 16-bit value as returned by the @code{wait()}
-system call. See the system manual pages for information on
-how to decode this value.}
-Otherwise, it is the return value from the system's @code{close()} or
-@code{fclose()} C functions when closing input or output
-files, respectively.
-This value is zero if the close succeeds, or @minus{}1 if
-it fails.
+In @command{gawk}, starting with @value{PVERSION} 4.2, when closing a pipe or
+coprocess (input or output), the return value is the exit status of the
+command, as described in @ref{table-close-pipe-return-values}.@footnote{Prior
+to @value{PVERSION} 4.2, the return value from closing a pipe or co-process
+was the full 16-bit exit value as defined by the @code{wait()} system
+call.} Otherwise, it is the return value from the system's @code{close()}
+or @code{fclose()} C functions when closing input or output files,
+respectively. This value is zero if the close succeeds, or @minus{}1
+if it fails.
+
+@float Table,table-close-pipe-return-values
+@caption{Return values from @code{close()} of a pipe}
+@multitable @columnfractions .40 .60
+@headitem Situation @tab Return value from @code{close()}
+@item Normal exit of command @tab Command's exit status
+@item Death by signal of command @tab 256 + number of murderous signal
+@item Death by signal of command with core dump @tab 512 + number of murderous signal
+@item Some kind of error @tab @minus{}1
+@end multitable
+@end float
The POSIX standard is very vague; it says that @code{close()}
returns zero on success and a nonzero value otherwise. In general,
@@ -9933,6 +10078,70 @@ In POSIX mode (@pxref{Options}), @command{gawk} just returns zero
when closing a pipe.
@end sidebar
+@node Nonfatal
+@section Enabling Nonfatal Output
+
+This @value{SECTION} describes a @command{gawk}-specific feature.
+
+In standard @command{awk}, output with @code{print} or @code{printf}
+to a nonexistent file, or some other I/O error (such as filling up the
+disk) is a fatal error.
+
+@example
+$ @kbd{gawk 'BEGIN @{ print "hi" > "/no/such/file" @}'}
+@error{} gawk: cmd. line:1: fatal: can't redirect to `/no/such/file' (No such file or directory)
+@end example
+
+@command{gawk} makes it possible to detect that an error has
+occurred, allowing you to possibly recover from the error, or
+at least print an error message of your choosing before exiting.
+You can do this in one of two ways:
+
+@itemize @bullet
+@item
+For all output files, by assigning any value to @code{PROCINFO["NONFATAL"]}.
+
+@item
+On a per-file basis, by assigning any value to
+@code{PROCINFO[@var{filename}, "NONFATAL"]}.
+Here, @var{filename} is the name of the file to which
+you wish output to be nonfatal.
+@end itemize
+
+Once you have enabled nonfatal output, you must check @code{ERRNO}
+after every relevant @code{print} or @code{printf} statement to
+see if something went wrong. It is also a good idea to initialize
+@code{ERRNO} to zero before attempting the output. For example:
+
+@example
+$ @kbd{gawk '}
+> @kbd{BEGIN @{}
+> @kbd{ PROCINFO["NONFATAL"] = 1}
+> @kbd{ ERRNO = 0}
+> @kbd{ print "hi" > "/no/such/file"}
+> @kbd{ if (ERRNO) @{}
+> @kbd{ print("Output failed:", ERRNO) > "/dev/stderr"}
+> @kbd{ exit 1}
+> @kbd{ @}}
+> @kbd{@}'}
+@error{} Output failed: No such file or directory
+@end example
+
+Here, @command{gawk} did not produce a fatal error; instead
+it let the @command{awk} program code detect the problem and handle it.
+
+This mechanism works also for standard output and standard error.
+For standard output, you may use @code{PROCINFO["-", "NONFATAL"]}
+or @code{PROCINFO["/dev/stdout", "NONFATAL"]}. For standard error, use
+@code{PROCINFO["/dev/stderr", "NONFATAL"]}.
+
+When attempting to open a TCP/IP socket (@pxref{TCP/IP Networking}),
+@command{gawk} tries multiple times. The @env{GAWK_SOCK_RETRIES}
+environment variable (@pxref{Other Environment Variables}) allows you to
+override @command{gawk}'s builtin default number of attempts. However,
+once nonfatal I/O is enabled for a given socket, @command{gawk} only
+retries once, relying on @command{awk}-level code to notice that there
+was a problem.
@node Output Summary
@section Summary
@@ -9962,6 +10171,12 @@ Use @code{close()} to close open file, pipe, and coprocess redirections.
For coprocesses, it is possible to close only one direction of the
communications.
+@item
+Normally errors with @code{print} or @code{printf} are fatal.
+@command{gawk} lets you make output errors be nonfatal either for
+all files or on a per-file basis. You must then check for errors
+after every relevant output statement.
+
@end itemize
@c EXCLUDE START
@@ -10109,7 +10324,7 @@ Just as @samp{11} in decimal is 1 times 10 plus 1, so
@samp{11} in octal is 1 times 8 plus 1. This equals 9 in decimal.
In hexadecimal, there are 16 digits. Because the everyday decimal
number system only has ten digits (@samp{0}--@samp{9}), the letters
-@samp{a} through @samp{f} are used to represent the rest.
+@samp{a} through @samp{f} represent the rest.
(Case in the letters is usually irrelevant; hexadecimal @samp{a} and @samp{A}
have the same value.)
Thus, @samp{11} in
@@ -10212,6 +10427,20 @@ but could be more complex expressions).
@node Using Constant Regexps
@subsection Using Regular Expression Constants
+Regular expression constants consist of text describing
+a regular expression enclosed in slashes (such as @code{/the +answer/}).
+This @value{SECTION} describes how such constants work in
+POSIX @command{awk} and @command{gawk}, and then goes on to describe
+@dfn{strongly typed regexp constants}, which are a @command{gawk} extension.
+
+@menu
+* Standard Regexp Constants:: Regexp constants in standard @command{awk}.
+* Strong Regexp Constants:: Strongly typed regexp constants.
+@end menu
+
+@node Standard Regexp Constants
+@subsubsection Standard Regular Expression Constants
+
@cindex dark corner, regexp constants
When used on the righthand side of the @samp{~} or @samp{!~}
operators, a regexp constant merely stands for the regexp that is to be
@@ -10319,6 +10548,90 @@ or not @code{$0} matches @code{/hi/}.
a parameter to a user-defined function, because passing a truth value in
this way is probably not what was intended.
+@node Strong Regexp Constants
+@subsubsection Strongly Typed Regexp Constants
+
+This @value{SECTION} describes a @command{gawk}-specific feature.
+
+As we saw in the previous @value{SECTION},
+regexp constants (@code{/@dots{}/}) hold a strange position in the
+@command{awk} language. In most contexts, they act like an expression:
+@samp{$0 ~ /@dots{}/}. In other contexts, they denote only a regexp to
+be matched. In no case are they really a ``first class citizen'' of the
+language. That is, you cannot define a scalar variable whose type is
+``regexp'' in the same sense that you can define a variable to be a
+number or a string:
+
+@example
+num = 42 @ii{Numeric variable}
+str = "hi" @ii{String variable}
+re = /foo/ @ii{Wrong!} re @ii{is the result of} $0 ~ /foo/
+@end example
+
+For a number of more advanced use cases,
+it would be nice to have regexp constants that
+are @dfn{strongly typed}; in other words, that denote a regexp useful
+for matching, and not an expression.
+
+@command{gawk} provides this feature. A strongly typed regexp constant
+looks almost like a regular regexp constant, except that it is preceded
+by an @samp{@@} sign:
+
+@example
+re = @@/foo/ @ii{Regexp variable}
+@end example
+
+Strongly typed regexp constants @emph{cannot} be used everywhere that a
+regular regexp constant can, because this would make the language even more
+confusing. Instead, you may use them only in certain contexts:
+
+@itemize @bullet
+@item
+On the righthand side of the @samp{~} and @samp{!~} operators: @samp{some_var ~ @@/foo/}
+(@pxref{Regexp Usage}).
+
+@item
+In the @code{case} part of a @code{switch} statement
+(@pxref{Switch Statement}).
+
+@item
+As an argument to one of the built-in functions that accept regexp constants:
+@code{gensub()},
+@code{gsub()},
+@code{match()},
+@code{patsplit()},
+@code{split()},
+and
+@code{sub()}
+(@pxref{String Functions}).
+
+@item
+As a parameter in a call to a user-defined function
+(@pxref{User-defined}).
+
+@item
+On the righthand side of an assignment to a variable: @samp{some_var = @@/foo/}.
+In this case, the type of @code{some_var} is regexp. Additionally, @code{some_var}
+can be used with @samp{~} and @samp{!~}, passed to one of the built-in functions
+listed above, or passed as a parameter to a user-defined function.
+@end itemize
+
+You may use the @code{typeof()} built-in function
+(@pxref{Type Functions})
+to determine if a variable or function parameter is
+a regexp variable.
+
+The true power of this feature comes from the ability to create variables that
+have regexp type. Such variables can be passed on to user-defined functions,
+without the confusing aspects of computed regular expressions created from
+strings or string constants. They may also be passed through indirect function
+calls (@pxref{Indirect Calls})
+and on to the built-in functions that accept regexp constants.
+
+When used in numeric conversions, strongly typed regexp variables convert
+to zero. When used in string conversions, they convert to the string
+value of the original regexp text.
+
@node Variables
@subsection Variables
@@ -11355,17 +11668,94 @@ compares variables.
@node Variable Typing
@subsubsection String Type versus Numeric Type
+Scalar objects in @command{awk} (variables, array elements, and fields)
+are @emph{dynamically} typed. This means their type can change as the
+program runs, from @dfn{untyped} before any use,@footnote{@command{gawk}
+calls this @dfn{unassigned}, as the following example shows.} to string
+or number, and then from string to number or number to string, as the
+program progresses. (@command{gawk} also provides regexp-typed scalars,
+but let's ignore that for now; @pxref{Strong Regexp Constants}.)
+
+You can't do much with untyped variables, other than tell that they
+are untyped. The following program tests @code{a} against @code{""}
+and @code{0}; the test succeeds when @code{a} has never been assigned
+a value. It also uses the built-in @code{typeof()} function
+(not presented yet; @pxref{Type Functions}) to show @code{a}'s type:
+
+@example
+$ @kbd{gawk 'BEGIN @{ print (a == "" && a == 0 ?}
+> @kbd{"a is untyped" : "a has a type!") ; print typeof(a) @}'}
+@print{} a is untyped
+@print{} unassigned
+@end example
+
+A scalar has numeric type when assigned a numeric value,
+such as from a numeric constant, or from another scalar
+with numeric type:
+
+@example
+$ @kbd{gawk 'BEGIN @{ a = 42 ; print typeof(a)}
+> @kbd{b = a ; print typeof(b) @}'}
+number
+number
+@end example
+
+Similarly, a scalar has string type when assigned a string
+value, such as from a string constant, or from another scalar
+with string type:
+
+@example
+$ @kbd{gawk 'BEGIN @{ a = "forty two" ; print typeof(a)}
+> @kbd{b = a ; print typeof(b) @}'}
+string
+string
+@end example
+
+So far, this is all simple and straightforward. What happens, though,
+when @command{awk} has to process data from a user? Let's start with
+field data. What should the following command produce as output?
+
+@example
+echo hello | awk '@{ printf("%s %s < 42\n", $1,
+ ($1 < 42 ? "is" : "is not")) @}'
+@end example
+
+@noindent
+Since @samp{hello} is alphabetic data, @command{awk} can only do a string
+comparison. Internally, it converts @code{42} into @code{"42"} and compares
+the two string values @code{"hello"} and @code{"42"}. Here's the result:
+
+@example
+$ @kbd{echo hello | awk '@{ printf("%s %s < 42\n", $1,}
+> @kbd{ ($1 < 42 ? "is" : "is not")) @}'}
+@print{} hello is not < 42
+@end example
+
+However, what happens when data from a user @emph{looks like} a number?
+On the one hand, in reality, the input data consists of characters, not
+binary numeric
+values. But, on the other hand, the data looks numeric, and @command{awk}
+really ought to treat it as such. And indeed, it does:
+
+@example
+$ @kbd{echo 37 | awk '@{ printf("%s %s < 42\n", $1,}
+> @kbd{ ($1 < 42 ? "is" : "is not")) @}'}
+@print{} 37 is < 42
+@end example
+
+Here are the rules for when @command{awk}
+treats data as a number, and for when it treats data as a string.
+
@cindex numeric, strings
@cindex strings, numeric
@cindex POSIX @command{awk}, numeric strings and
-The POSIX standard introduced
-the concept of a @dfn{numeric string}, which is simply a string that looks
-like a number---for example, @code{@w{" +2"}}. This concept is used
-for determining the type of a variable.
-The type of the variable is important because the types of two variables
-determine how they are compared.
-Variable typing follows these rules:
+The POSIX standard uses the term @dfn{numeric string} for input data that
+looks numeric. The @samp{37} in the previous example is a numeric string.
+So what is the type of a numeric string? Answer: numeric.
+The type of a variable is important because the types of two variables
+determine how they are compared.
+Variable typing follows these definitions and rules:
@itemize @value{BULLET}
@item
@@ -11380,7 +11770,9 @@ attribute.
Fields, @code{getline} input, @code{FILENAME}, @code{ARGV} elements,
@code{ENVIRON} elements, and the elements of an array created by
@code{match()}, @code{split()}, and @code{patsplit()} that are numeric
-strings have the @dfn{strnum} attribute. Otherwise, they have
+strings have the @dfn{strnum} attribute.@footnote{Thus, a POSIX
+numeric string and @command{gawk}'s strnum are the same thing.}
+Otherwise, they have
the @dfn{string} attribute. Uninitialized variables also have the
@dfn{strnum} attribute.
@@ -11454,7 +11846,7 @@ STRNUM &&string &numeric &numeric\cr
@end tex
@ifnottex
@ifnotdocbook
-@display
+@verbatim
+----------------------------------------------
| STRING NUMERIC STRNUM
--------+----------------------------------------------
@@ -11465,7 +11857,7 @@ NUMERIC | string numeric numeric
|
STRNUM | string numeric numeric
--------+----------------------------------------------
-@end display
+@end verbatim
@end ifnotdocbook
@end ifnottex
@docbook
@@ -11524,10 +11916,14 @@ purposes.
In short, when one operand is a ``pure'' string, such as a string
constant, then a string comparison is performed. Otherwise, a
numeric comparison is performed.
+(The primary difference between a number and a strnum is that
+for strnums @command{gawk} preserves the original string value that
+the scalar had when it came in.)
+
+This point bears additional emphasis:
+Input that looks numeric @emph{is} numeric.
+All other input is treated as strings.
-This point bears additional emphasis: All user input is made of characters,
-and so is first and foremost of string type; input strings
-that look numeric are additionally given the strnum attribute.
Thus, the six-character input string @w{@samp{ +3.14}} receives the
strnum attribute. In contrast, the eight characters
@w{@code{" +3.14"}} appearing in program text comprise a string constant.
@@ -11554,6 +11950,14 @@ $ @kbd{echo ' +3.14' | awk '@{ print($1 == 3.14) @}'} @ii{True}
@print{} 1
@end example
+You can see the type of an input field (or other user input)
+using @code{typeof()}:
+
+@example
+$ @kbd{echo hello 37 | gawk '@{ print typeof($1), typeof($2) @}'}
+@print{} string strnum
+@end example
+
@node Comparison Operators
@subsubsection Comparison Operators
@@ -11713,19 +12117,19 @@ One special place where @code{/foo/} is @emph{not} an abbreviation for
where this is discussed in more detail.
@node POSIX String Comparison
-@subsubsection String Comparison with POSIX Rules
+@subsubsection String Comparison Based on Locale Collating Order
-The POSIX standard says that string comparison is performed based
-on the locale's @dfn{collating order}. This is the order in which
-characters sort, as defined by the locale (for more discussion,
-@pxref{Locales}). This order is usually very different
-from the results obtained when doing straight character-by-character
-comparison.@footnote{Technically, string comparison is supposed
-to behave the same way as if the strings were compared with the C
-@code{strcoll()} function.}
+The POSIX standard used to say that all string comparisons are
+performed based on the locale's @dfn{collating order}. This
+is the order in which characters sort, as defined by the locale
+(for more discussion, @pxref{Locales}). This order is usually very
+different from the results obtained when doing straight byte-by-byte
+comparison.@footnote{Technically, string comparison is supposed to behave
+the same way as if the strings were compared with the C @code{strcoll()}
+function.}
Because this behavior differs considerably from existing practice,
-@command{gawk} only implements it when in POSIX mode (@pxref{Options}).
+@command{gawk} only implemented it when in POSIX mode (@pxref{Options}).
Here is an example to illustrate the difference, in an @code{en_US.UTF-8}
locale:
@@ -11738,6 +12142,26 @@ $ @kbd{gawk --posix 'BEGIN @{ printf("ABC < abc = %s\n",}
@print{} ABC < abc = FALSE
@end example
+Fortunately, as of August 2016, comparison based on locale
+collating order is no longer required for the @code{==} and @code{!=}
+operators.@footnote{See @uref{http://austingroupbugs.net/view.php?id=1070,
+the Austin Group website}.} However, comparison based on locales is still
+required for @code{<}, @code{<=}, @code{>}, and @code{>=}. POSIX thus
+recommends as follows:
+
+@quotation
+Since the @code{==} operator checks whether strings are identical,
+not whether they collate equally, applications needing to check whether
+strings collate equally can use:
+
+@example
+a <= b && a >= b
+@end example
+@end quotation
+
+As of @value{PVERSION} 4.2, @command{gawk} continues to use locale
+collating order for @code{<}, @code{<=}, @code{>}, and @code{>=} only
+in POSIX mode.
@node Boolean Ops
@subsection Boolean Expressions
@@ -13897,12 +14321,11 @@ specify the behavior when @code{FS} is the null string.
Nonetheless, some other versions of @command{awk} also treat
@code{""} specially.)
-@cindex POSIX @command{awk}, @code{FS} variable and
The default value is @w{@code{" "}}, a string consisting of a single
-space. As a special exception, this value means that any
-sequence of spaces, TABs, and/or newlines is a single separator.@footnote{In
-POSIX @command{awk}, newline does not count as whitespace.} It also causes
-spaces, TABs, and newlines at the beginning and end of a record to be ignored.
+space. As a special exception, this value means that any sequence of
+spaces, TABs, and/or newlines is a single separator. It also causes
+spaces, TABs, and newlines at the beginning and end of a record to
+be ignored.
You can set the value of @code{FS} on the command line using the
@option{-F} option:
@@ -14126,10 +14549,24 @@ opens the next file.
An associative array containing the values of the environment. The array
indices are the environment variable names; the elements are the values of
the particular environment variables. For example,
-@code{ENVIRON["HOME"]} might be @code{"/home/arnold"}. Changing this array
-does not affect the environment passed on to any programs that
-@command{awk} may spawn via redirection or the @code{system()} function.
-(In a future version of @command{gawk}, it may do so.)
+@code{ENVIRON["HOME"]} might be @code{/home/arnold}.
+
+For POSIX @command{awk}, changing this array does not affect the
+environment passed on to any programs that @command{awk} may spawn via
+redirection or the @code{system()} function.
+
+However, beginning with @value{PVERSION} 4.2, if not in POSIX
+compatibility mode, @command{gawk} does update its own environment when
+@code{ENVIRON} is changed, thus changing the environment seen by programs
+that it creates. You should therefore be especially careful if you
+modify @code{ENVIRON["PATH"]}, which is the search path for finding
+executable programs.
+
+This can also affect the running @command{gawk} program, since some of the
+built-in functions may pay attention to certain environment variables.
+The most notable instance of this is @code{mktime()} (@pxref{Time
+Functions}), which pays attention the value of the @env{TZ} environment
+variable on many systems.
Some operating systems may not have environment variables.
On such systems, the @code{ENVIRON} array is empty (except for
@@ -14163,6 +14600,11 @@ value to be meaningful when an I/O operation returns a failure value,
such as @code{getline} returning @minus{}1. You are, of course, free
to clear it yourself before doing an I/O operation.
+If the value of @code{ERRNO} corresponds to a system error in the C
+@code{errno} variable, then @code{PROCINFO["errno"]} will be set to the value
+of @code{errno}. For non-system errors, @code{PROCINFO["errno"]} will
+be zero.
+
@cindex @code{FILENAME} variable
@cindex dark corner, @code{FILENAME} variable
@item @code{FILENAME}
@@ -14231,6 +14673,10 @@ are guaranteed to be available:
@item PROCINFO["egid"]
The value of the @code{getegid()} system call.
+@item PROCINFO["errno"]
+The value of the C @code{errno} variable when @code{ERRNO} is set to
+the associated error message.
+
@item PROCINFO["euid"]
@cindex effective user ID of @command{gawk} user
The value of the @code{geteuid()} system call.
@@ -14354,6 +14800,14 @@ to test for these elements
The following elements allow you to change @command{gawk}'s behavior:
@table @code
+@item PROCINFO["NONFATAL"]
+If this element exists, then I/O errors for all output redirections become nonfatal.
+@xref{Nonfatal}.
+
+@item PROCINFO["@var{output_name}", "NONFATAL"]
+Make output errors for @var{output_name} be nonfatal.
+@xref{Nonfatal}.
+
@item PROCINFO["@var{command}", "pty"]
For two-way communication to @var{command}, use a pseudo-tty instead
of setting up a two-way pipe.
@@ -16248,6 +16702,23 @@ truncated toward zero.
For example, @code{int(3)} is 3, @code{int(3.9)} is 3, @code{int(-3.9)}
is @minus{}3, and @code{int(-3)} is @minus{}3 as well.
+@item @code{intdiv(@var{numerator}, @var{denominator}, @var{result})}
+@cindexawkfunc{intdiv}
+@cindex intdiv
+Perform integer division, similar to the standard C function of the
+same name. First, truncate @code{numerator} and @code{denominator}
+towards zero, creating integer values. Clear the @code{result}
+array, and then set @code{result["quotient"]} to the result of
+@samp{numerator / denominator}, truncated towards zero to an integer,
+and set @code{result["remainder"]} to the result of @samp{numerator %
+denominator}, truncated towards zero to an integer. This function is
+primarily intended for use with arbitrary length integers; it avoids
+creating MPFR arbitrary precision floating-point values (@pxref{Arbitrary
+Precision Integers}).
+
+This function is a @code{gawk} extension. It is not available in
+compatibility mode (@pxref{Options}).
+
@item @code{log(@var{x})}
@cindexawkfunc{log}
@cindex logarithm
@@ -17733,7 +18204,7 @@ Optional parameters are enclosed in square brackets ([ ]):
@c @asis for docbook
@table @asis
-@item @code{mktime(@var{datespec})}
+@item @code{mktime(@var{datespec}} [@code{, @var{utc-flag}} ]@code{)}
@cindexgawkfunc{mktime}
@cindex generate time values
Turn @var{datespec} into a timestamp in the same form
@@ -17752,7 +18223,9 @@ The values of these numbers need not be within the ranges specified;
for example, an hour of @minus{}1 means 1 hour before midnight.
The origin-zero Gregorian calendar is assumed, with year 0 preceding
year 1 and year @minus{}1 preceding year 0.
-The time is assumed to be in the local time zone.
+If @var{utc-flag} is present and is either nonzero or non-null, the time
+is assumed to be in the UTC time zone; otherwise, the
+time is assumed to be in the local time zone.
If the daylight-savings flag is positive, the time is assumed to be
daylight savings time; if zero, the time is assumed to be standard
time; and if negative (the default), @code{mktime()} attempts to determine
@@ -18252,12 +18725,12 @@ Return the value of @var{val}, shifted right by @var{count} bits.
Return the bitwise XOR of the arguments. There must be at least two.
@end table
-For all of these functions, first the double-precision floating-point value is
-converted to the widest C unsigned integer type, then the bitwise operation is
-performed. If the result cannot be represented exactly as a C @code{double},
-leading nonzero bits are removed one by one until it can be represented
-exactly. The result is then converted back into a C @code{double}. (If
-you don't understand this paragraph, don't worry about it.)
+@quotation CAUTION
+Beginning with @command{gawk} @value{PVERSION} 4.2, negative
+operands are not allowed for any of these functions. A negative
+operand produces a fatal error. See the sidebar
+``Beware The Smoke and Mirrors!'' for more information as to why.
+@end quotation
Here is a user-defined function (@pxref{User-defined})
that illustrates the use of these functions:
@@ -18362,19 +18835,128 @@ decimal and octal values for the same numbers
and then demonstrates the
results of the @code{compl()}, @code{lshift()}, and @code{rshift()} functions.
+@sidebar Beware The Smoke and Mirrors!
+
+It other languages, bitwise operations are performed on integer values,
+not floating-point values. As a general statement, such operations work
+best when performed on unsigned integers.
+
+@command{gawk} attempts to treat the arguments to the bitwise functions
+as unsigned integers. For this reason, negative arguments produce a
+fatal error.
+
+In normal operation, for all of these functions, first the
+double-precision floating-point value is converted to the widest C
+unsigned integer type, then the bitwise operation is performed. If the
+result cannot be represented exactly as a C @code{double}, leading
+nonzero bits are removed one by one until it can be represented exactly.
+The result is then converted back into a C @code{double}.@footnote{If you don't
+understand this paragraph, the upshot is that @command{gawk} can only
+store a particular range of integer values; numbers outside that range
+are reduced to fit within the range.}
+
+However, when using arbitrary precision arithmetic with the @option{-M}
+option (@pxref{Arbitrary Precision Arithmetic}), the results may differ.
+This is particularly noticeable with the @code{compl()} function:
+
+@example
+$ @kbd{gawk 'BEGIN @{ print compl(42) @}'}
+@print{} 9007199254740949
+$ @kbd{gawk -M 'BEGIN @{ print compl(42) @}'}
+@print{} -43
+@end example
+
+What's going on becomes clear when printing the results
+in hexadecimal:
+
+@example
+$ @kbd{gawk 'BEGIN @{ printf "%#x\n", compl(42) @}'}
+@print{} 0x1fffffffffffd5
+$ @kbd{gawk -M 'BEGIN @{ printf "%#x\n", compl(42) @}'}
+@print{} 0xffffffffffffffd5
+@end example
+
+When using the @option{-M} option, under the hood, @command{gawk} uses
+GNU MP arbitrary precision integers which have at least 64 bits of precision.
+When not using @option{-M}, @command{gawk} stores integral values in
+regular double-precision floating point, which only maintain 53 bits of
+precision. Furthermore, the GNU MP library treats (or at least seems to treat)
+the leading bit as a sign bit; thus the result with @option{-M} in this case is
+a negative number.
+
+In short, using @command{gawk} for any but the simplest kind of bitwise
+operations is probably a bad idea; caveat emptor!
+
+@end sidebar
+
@node Type Functions
@subsection Getting Type Information
-@command{gawk} provides a single function that lets you distinguish
-an array from a scalar variable. This is necessary for writing code
+@command{gawk} provides two functions that let you distinguish
+the type of a variable.
+This is necessary for writing code
that traverses every element of an array of arrays
-(@pxref{Arrays of Arrays}).
+(@pxref{Arrays of Arrays}), and in other contexts.
@table @code
@cindexgawkfunc{isarray}
@cindex scalar or array
@item isarray(@var{x})
Return a true value if @var{x} is an array. Otherwise, return false.
+
+@cindexgawkfunc{typeof}
+@cindex variable type
+@cindex type, of variable
+@item typeof(@var{x})
+Return one of the following strings, depending upon the type of @var{x}:
+
+@c nested table
+@table @code
+@item "array"
+@var{x} is an array.
+
+@item "regexp"
+@var{x} is a strongly typed regexp (@pxref{Strong Regexp Constants}).
+
+@item "number"
+@var{x} is a number.
+
+@item "string"
+@var{x} is a string.
+
+@item "strnum"
+@var{x} is a number that started life as user input, such as a field or
+the result of calling @code{split()}. (I.e., @var{x} has the strnum
+attribute; @pxref{Variable Typing}.)
+
+@item "unassigned"
+@var{x} is a scalar variable that has not been assigned a value yet.
+For example:
+
+@example
+BEGIN @{
+ # creates a[1] but it has no assigned value
+ a[1]
+ print typeof(a[1]) # unassigned
+@}
+@end example
+
+@item "untyped"
+@var{x} has not yet been used yet at all; it can become a scalar or an
+array.
+For example:
+
+@example
+BEGIN @{
+ print typeof(x) # x never used --> untyped
+ mk_arr(x)
+ print typeof(x) # x now an array --> array
+@}
+
+function mk_arr(a) @{ a[1] = 1 @}
+@end example
+
+@end table
@end table
@code{isarray()} is meant for use in two circumstances. The first is when
@@ -18392,6 +18974,14 @@ that has not been previously used to @code{isarray()}, @command{gawk}
ends up turning it into a scalar.
@end quotation
+The @code{typeof()} function is general; it allows you to determine
+if a variable or function parameter is a scalar, an array, or a strongly
+typed regexp.
+
+@code{isarray()} is deprecated; you should use @code{typeof()} instead.
+You should replace any existing uses of @samp{isarray(var)} in your
+code with @samp{typeof(var) == "array"}.
+
@node I18N Functions
@subsection String-Translation Functions
@cindex @command{gawk}, string-translation functions
@@ -26622,9 +27212,16 @@ your program to hang. (Thus, this particular feature is of much less
use in practice than being able to close the @code{"to"} end.)
@quotation CAUTION
-It is a fatal error to write to the @code{"to"} end of a two-way
-pipe which has been closed. It is also a fatal error to read
+Normally,
+it is a fatal error to write to the @code{"to"} end of a two-way
+pipe which has been closed, and it is also a fatal error to read
from the @code{"from"} end of a two-way pipe that has been closed.
+
+You may set @code{PROCINFO["@var{command}", "NONFATAL"]} to
+make such operations become nonfatal. If you do so, you then need
+to check @code{ERRNO} after each @code{print}, @code{printf},
+or @code{getline}.
+@xref{Nonfatal}, for more information.
@end quotation
@cindex @command{gawk}, @code{PROCINFO} array in
@@ -27008,8 +27605,7 @@ The profiled version of your program may not look exactly like what you
typed when you wrote it. This is because @command{gawk} creates the
profiled version by ``pretty-printing'' its internal representation of
the program. The advantage to this is that @command{gawk} can produce
-a standard representation. The disadvantage is that all source code
-comments are lost.
+a standard representation.
Also, things such as:
@example
@@ -27103,10 +27699,39 @@ When called this way, @command{gawk} ``pretty-prints'' the program into
@file{awkprof.out}, without any execution counts.
@quotation NOTE
-The @option{--pretty-print} option still runs your program.
-This will change in the next major release.
+Once upon a time, the @option{--pretty-print} option would also run
+your program. This is is no longer the case.
@end quotation
+There is a significant difference between the output created when
+profiling, and that created when pretty-printing. Pretty-printed output
+preserves the original comments that were in the program, although their
+placement may not correspond exactly to their original locations in the
+source code.@footnote{@command{gawk} does the best it can to preserve
+the distinction between comments at the end of a statement and comments
+on lines by themselves. Due to implementation constraints, it does not
+always do so correctly, particularly for @code{switch} statements. The
+@command{gawk} maintainers hope to improve this in a subsequent
+release.}
+
+However, as a deliberate design decision, profiling output @emph{omits}
+the original program's comments. This allows you to focus on the
+execution count data and helps you avoid the temptation to use the
+profiler for pretty-printing.
+
+Additionally, pretty-printed output does not have the leading indentation
+that the profiling output does. This makes it easy to pretty-print your
+code once development is completed, and then use the result as the final
+version of your program.
+
+Because the internal representation of your program is formatted to
+recreate an @command{awk} program, profiling and pretty-printing
+automatically disable @command{gawk}'s default optimizations.
+
+Pretty printing also preserves the original format of numeric
+constants; if you used an octal or hexadecimal value in your source
+code, it will appear that way in the output.
+
@node Advanced Features Summary
@section Summary
@@ -27147,8 +27772,7 @@ you tune them more easily. Sending the @code{USR1} signal while profiling cause
@command{gawk} to dump the profile and keep going, including a function call stack.
@item
-You can also just ``pretty-print'' the program. This currently also runs
-the program, but that will change in the next major release.
+You can also just ``pretty-print'' the program.
@end itemize
@@ -29341,6 +29965,68 @@ The @command{gawk} debugger only accepts source code supplied with the @option{-
@end itemize
@ignore
+@c 11/2016: This no longer applies after all the type cleanup work that's been done.
+One other point is worth discussing. Conventional debuggers run in a
+separate process (and thus address space) from the programs that they
+debug (the @dfn{debuggee}, if you will).
+
+The @command{gawk} debugger is different; it is an integrated part
+of @command{gawk} itself. This makes it possible, in rare cases,
+for @command{gawk} to become an excellent demonstrator of Heisenberg
+Uncertainty physics, where the mere act of observing something can change
+it. Consider the following:@footnote{Thanks to Hermann Peifer for
+this example.}
+
+@example
+$ @kbd{cat test.awk}
+@print{} @{ print typeof($1), typeof($2) @}
+$ @kbd{cat test.data}
+@print{} abc 123
+$ @kbd{gawk -f test.awk test.data}
+@print{} strnum strnum
+@end example
+
+This is all as expected: field data has the STRNUM attribute
+(@pxref{Variable Typing}). Now watch what happens when we run
+this program under the debugger:
+
+@example
+$ @kbd{gawk -D -f test.awk test.data}
+gawk> @kbd{w $1} @ii{Set watchpoint on} $1
+@print{} Watchpoint 1: $1
+gawk> @kbd{w $2} @ii{Set watchpoint on} $2
+@print{} Watchpoint 2: $2
+gawk> @kbd{r} @ii{Start the program}
+@print{} Starting program:
+@print{} Stopping in Rule ...
+@print{} Watchpoint 1: $1 @ii{Watchpoint fires}
+@print{} Old value: ""
+@print{} New value: "abc"
+@print{} main() at `test.awk':1
+@print{} 1 @{ print typeof($1), typeof($2) @}
+gawk> @kbd{n} @ii{Keep going @dots{}}
+@print{} Watchpoint 2: $2 @ii{Watchpoint fires}
+@print{} Old value: ""
+@print{} New value: "123"
+@print{} main() at `test.awk':1
+@print{} 1 @{ print typeof($1), typeof($2) @}
+gawk> @kbd{n} @ii{Get result from} typeof()
+@print{} strnum number @ii{Result for} $2 @ii{isn't right}
+@print{} Program exited normally with exit value: 0
+gawk> @kbd{quit}
+@end example
+
+In this case, the act of comparing the new value of @code{$2}
+with the old one caused @command{gawk} to evaluate it and determine that it
+is indeed a number, and this is reflected in the result of
+@code{typeof()}.
+
+Cases like this where the debugger is not transparent to the program's
+execution should be rare. If you encounter one, please report it
+(@pxref{Bugs}).
+@end ignore
+
+@ignore
Look forward to a future release when these and other missing features may
be added, and of course feel free to try to add them yourself!
@end ignore
@@ -29376,6 +30062,10 @@ If the GNU Readline library is available when @command{gawk} is
compiled, it is used by the debugger to provide command-line history
and editing.
+@item
+Usually, the debugger does not not affect the
+program being debugged, but occasionally it can.
+
@end itemize
@node Arbitrary Precision Arithmetic
@@ -30193,6 +30883,122 @@ to just use the following:
gawk -M 'BEGIN @{ n = 13; print n % 2 @}'
@end example
+When dividing two arbitrary precision integers with either
+@samp{/} or @samp{%}, the result is typically an arbitrary
+precision floating point value (unless the denominator evenly
+divides into the numerator). In order to do integer division
+or remainder with arbitrary precision integers, use the built-in
+@code{intdiv()} function (@pxref{Numeric Functions}).
+
+You can simulate the @code{intdiv()} function in standard @command{awk}
+using this user-defined function:
+
+@example
+@c file eg/lib/intdiv.awk
+# intdiv --- do integer division
+
+@c endfile
+@ignore
+@c file eg/lib/intdiv.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# July, 2014
+#
+# Name changed from div() to intdiv()
+# April, 2015
+
+@c endfile
+
+@end ignore
+@c file eg/lib/intdiv.awk
+function intdiv(numerator, denominator, result)
+@{
+ split("", result)
+
+ numerator = int(numerator)
+ denominator = int(denominator)
+ result["quotient"] = int(numerator / denominator)
+ result["remainder"] = int(numerator % denominator)
+
+ return 0.0
+@}
+@c endfile
+@end example
+
+The following example program, contributed by Katie Wasserman,
+uses @code{intdiv()} to
+compute the digits of @value{PI} to as many places as you
+choose to set:
+
+@example
+@c file eg/prog/pi.awk
+# pi.awk --- compute the digits of pi
+@c endfile
+@c endfile
+@ignore
+@c file eg/prog/pi.awk
+#
+# Katie Wasserman, katie@@wass.net
+# August 2014
+@c endfile
+@end ignore
+@c file eg/prog/pi.awk
+
+BEGIN @{
+ digits = 100000
+ two = 2 * 10 ^ digits
+ pi = two
+ for (m = digits * 4; m > 0; --m) @{
+ d = m * 2 + 1
+ x = pi * m
+ intdiv(x, d, result)
+ pi = result["quotient"]
+ pi = pi + two
+ @}
+ print pi
+@}
+@c endfile
+@end example
+
+@ignore
+Date: Wed, 20 Aug 2014 10:19:11 -0400
+To: arnold@skeeve.com
+From: Katherine Wasserman <katie@wass.net>
+Subject: Re: computation of digits of pi?
+
+Arnold,
+
+>The program that you sent to compute the digits of pi using div(). Is
+>that some standard algorithm that every math student knows? If so,
+>what's it called?
+
+It's not that well known but it's not that obscure either
+
+It's Euler's modification to Newton's method for calculating pi.
+
+Take a look at lines (23) - (25) here: http://mathworld.wolfram.com/PiFormulas.htm
+
+The algorithm I wrote simply expands the multiply by 2 and works from the innermost expression outwards. I used this to program HP calculators because it's quite easy to modify for tiny memory devices with smallish word sizes.
+
+http://www.hpmuseum.org/cgi-sys/cgiwrap/hpmuseum/articles.cgi?read=899
+
+-Katie
+@end ignore
+
+When asked about the algorithm used, Katie replied:
+
+@quotation
+It's not that well known but it's not that obscure either.
+It's Euler's modification to Newton's method for calculating pi.
+Take a look at lines (23) - (25) here: @uref{http://mathworld.wolfram.com/PiFormulas.html}.
+
+The algorithm I wrote simply expands the multiply by 2 and works from
+the innermost expression outwards. I used this to program HP calculators
+because it's quite easy to modify for tiny memory devices with smallish
+word sizes. See
+@uref{http://www.hpmuseum.org/cgi-sys/cgiwrap/hpmuseum/articles.cgi?read=899}.
+@end quotation
+
@node POSIX Floating Point Problems
@section Standards Versus Existing Practice
@@ -30592,8 +31398,11 @@ This (rather large) @value{SECTION} describes the API in detail.
* Symbol Table Access:: Functions for accessing global
variables.
* Array Manipulation:: Functions for working with arrays.
+* Redirection API:: How to access and manipulate
+ redirections.
* Extension API Variables:: Variables provided by the API.
* Extension API Boilerplate:: Boilerplate code for using the API.
+* Changes from API V1:: Changes from V1 of the API.
@end menu
@node Extension API Functions Introduction
@@ -30667,6 +31476,10 @@ Clearing an array
@item
Flattening an array for easy C-style looping over all its indices and elements
@end itemize
+
+@item
+Accessing and manipulating redirections.
+
@end itemize
Some points about using the API:
@@ -30720,14 +31533,26 @@ and is managed by @command{gawk} from then on.
The API defines several simple @code{struct}s that map values as seen
from @command{awk}. A value can be a @code{double}, a string, or an
array (as in multidimensional arrays, or when creating a new array).
+
String values maintain both pointer and length, because embedded @sc{nul}
characters are allowed.
@quotation NOTE
-By intent, strings are maintained using the current multibyte encoding (as
-defined by @env{LC_@var{xxx}} environment variables) and not using wide
-characters. This matches how @command{gawk} stores strings internally
-and also how characters are likely to be input into and output from files.
+By intent, @command{gawk} maintains strings using the current multibyte
+encoding (as defined by @env{LC_@var{xxx}} environment variables)
+and not using wide characters. This matches how @command{gawk} stores
+strings internally and also how characters are likely to be input into
+and output from files.
+@end quotation
+
+@quotation NOTE
+String values passed to an extension by @command{gawk} are always
+@sc{nul}-terminated. Thus it is safe to pass such string values to
+standard library and system routines. However, because @command{gawk}
+allows embedded @sc{nul} characters in string data, before using the data
+as a regular C string, you should check that the length for that string
+passed to the extension matches the return value of @code{strlen()}
+for it.
@end quotation
@item
@@ -30810,6 +31635,8 @@ multibyte encoding.
@itemx @ @ @ @ AWK_UNDEFINED,
@itemx @ @ @ @ AWK_NUMBER,
@itemx @ @ @ @ AWK_STRING,
+@itemx @ @ @ @ AWK_REGEX,
+@itemx @ @ @ @ AWK_STRNUM,
@itemx @ @ @ @ AWK_ARRAY,
@itemx @ @ @ @ AWK_SCALAR,@ @ @ @ @ @ @ @ @ /* opaque access to a variable */
@itemx @ @ @ @ AWK_VALUE_COOKIE@ @ @ @ /* for updating a previously created value */
@@ -30832,6 +31659,8 @@ The @code{val_type} member indicates what kind of value the
@code{union} holds, and each member is of the appropriate type.
@item #define str_value@ @ @ @ @ @ u.s
+@itemx #define strnum_value@ @ @ str_value
+@itemx #define regex_value@ @ @ @ str_value
@itemx #define num_value@ @ @ @ @ @ u.d
@itemx #define array_cookie@ @ @ u.a
@itemx #define scalar_cookie@ @ u.scl
@@ -30852,7 +31681,7 @@ and in more detail in @ref{Cached values}.
@end table
-Scalar values in @command{awk} are either numbers or strings. The
+Scalar values in @command{awk} are numbers, strings, strnums, or typed regexps. The
@code{awk_value_t} struct represents values. The @code{val_type} member
indicates what is in the @code{union}.
@@ -30861,6 +31690,26 @@ require more work. Because @command{gawk} allows embedded @sc{nul} bytes
in string values, a string must be represented as a pair containing a
data pointer and length. This is the @code{awk_string_t} type.
+A strnum (numeric string) value is represented as a string and consists
+of user input data that appears to be numeric.
+When an extension creates a strnum value, the result is a string flagged
+as user input. Subsequent parsing by @command{gawk} then determines whether it
+looks like a number and should be treated as a strnum, or as a regular string.
+
+This is useful in cases where an extension function would like to do something
+comparable to the @code{split()} function which sets the strnum attribute
+on the array elements it creates. For example, an extension that implements
+CSV splitting would want to use this feature. This is also useful for a
+function that retrieves a data item from a database. The PostgreSQL
+@code{PQgetvalue()} function, for example, returns a string that may be numeric
+or textual depending on the contents.
+
+Typed regexp values (@pxref{Strong Regexp Constants}) are not of
+much use to extension functions. Extension functions can tell that
+they've received them, and create them for scalar values. Otherwise,
+they can examine the text of the regexp through @code{regex_value.str}
+and @code{regex_value.len}.
+
Identifiers (i.e., the names of global variables) can be associated
with either scalar values or with arrays. In addition, @command{gawk}
provides true arrays of arrays, where any given array element can
@@ -31027,6 +31876,31 @@ It returns @code{result}.
@itemx make_number(double num, awk_value_t *result);
This function simply creates a numeric value in the @code{awk_value_t} variable
pointed to by @code{result}.
+
+@item static inline awk_value_t *
+@itemx make_const_user_input(const char *string, size_t length, awk_value_t *result);
+This function is identical to @code{make_const_string()}, but the string is
+flagged as user input that should be treated as a strnum value if the contents
+of the string are numeric.
+
+@item static inline awk_value_t *
+@itemx make_malloced_user_input(const char *string, size_t length, awk_value_t *result);
+This function is identical to @code{make_malloced_string()}, but the string is
+flagged as user input that should be treated as a strnum value if the contents
+of the string are numeric.
+
+@item static inline awk_value_t *
+@itemx make_const_regex(const char *string, size_t length, awk_value_t *result);
+This function creates a strongly typed regexp value by allocating a copy of the string.
+@code{string} is the regular expression of length @code{len}.
+
+@item static inline awk_value_t *
+@itemx make_malloced_regex(const char *string, size_t length, awk_value_t *result);
+This function creates a strongly typed regexp value. @code{string} is
+the regular expression of length @code{len}. It expects @code{string}
+to be a @samp{char *} value pointing to data previously obtained from
+@code{gawk_malloc()}, @code{gawk_calloc()}, or @code{gawk_realloc()}.
+
@end table
@node Registration Functions
@@ -31054,8 +31928,13 @@ Extension functions are described by the following record:
@example
typedef struct awk_ext_func @{
@ @ @ @ const char *name;
-@ @ @ @ awk_value_t *(*function)(int num_actual_args, awk_value_t *result);
-@ @ @ @ size_t num_expected_args;
+@ @ @ @ awk_value_t *(*const function)(int num_actual_args,
+@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_value_t *result,
+@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ struct awk_ext_func *finfo);
+@ @ @ @ const size_t max_expected_args;
+@ @ @ @ const size_t min_required_args;
+@ @ @ @ awk_bool_t suppress_lint;
+@ @ @ @ void *data; /* opaque pointer to any extra state */
@} awk_ext_func_t;
@end example
@@ -31073,36 +31952,94 @@ or an underscore, which may be followed by any number of
letters, digits, and underscores.
Letter case in function names is significant.
-@item awk_value_t *(*function)(int num_actual_args, awk_value_t *result);
+@item awk_value_t *(*const function)(int num_actual_args,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_value_t *result,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ struct awk_ext_func *finfo);
This is a pointer to the C function that provides the extension's
functionality.
-The function must fill in @code{*result} with either a number
-or a string. @command{gawk} takes ownership of any string memory.
+The function must fill in @code{*result} with either a number,
+a string, or a regexp.
+@command{gawk} takes ownership of any string memory.
As mentioned earlier, string memory @emph{must} come from one of
@code{gawk_malloc()}, @code{gawk_calloc()}, or @code{gawk_realloc()}.
The @code{num_actual_args} argument tells the C function how many
actual parameters were passed from the calling @command{awk} code.
+The @code{finfo} parameter is a pointer to the @code{awk_ext_func_t} for
+this function. The called function may access data within it as desired, or not.
+
The function must return the value of @code{result}.
This is for the convenience of the calling code inside @command{gawk}.
-@item size_t num_expected_args;
-This is the number of arguments the function expects to receive.
-Each extension function may decide what to do if the number of
-arguments isn't what it expected. As with real @command{awk} functions, it
-is likely OK to ignore extra arguments.
+@item const size_t max_expected_args;
+This is the maximum number of arguments the function expects to receive.
+If called with more arguments than this, and if lint checking has
+been enabled, then @command{gawk} prints a warning message. For more
+information, see the entry for @code{suppress_lint}, later in this list.
+
+@item const size_t min_required_args;
+This is the minimum number of arguments the function expects to receive.
+If called with fewer arguments, @command{gawk} prints a fatal error
+message and exits.
+
+@item awk_bool_t suppress_lint;
+This flag tells @command{gawk} not to print a lint message if lint
+checking has been enabled and if more arguments were supplied in the call
+than expected. An extension function can tell if @command{gawk} already
+printed at least one such message by checking if @samp{num_actual_args >
+finfo->max_expected_args}. If so, and the function does not want more
+lint messages to be printed, it should set @code{finfo->suppress_lint}
+to @code{awk_true}.
+
+@item void *data;
+This is an opaque pointer to any data that an extension function may
+wish to have available when called. Passing the @code{awk_ext_func_t}
+structure to the extension function, and having this pointer available
+in it enable writing a single C or C++ function that implements multiple
+@command{awk}-level extension functions.
@end table
Once you have a record representing your extension function, you register
it with @command{gawk} using this API function:
@table @code
-@item awk_bool_t add_ext_func(const char *namespace, const awk_ext_func_t *func);
+@item awk_bool_t add_ext_func(const char *namespace, awk_ext_func_t *func);
This function returns true upon success, false otherwise.
The @code{namespace} parameter is currently not used; you should pass in an
empty string (@code{""}). The @code{func} pointer is the address of a
@code{struct} representing your function, as just described.
+
+@command{gawk} does not modify what @code{func} points to, but the
+extension function itself receives this pointer and can modify what it
+points to, thus it is purposely not declared to be @code{const}.
+@end table
+
+The combination of @code{min_required_args}, @code{max_expected_args},
+and @code{suppress_lint} may be confusing. Here is how you should
+set things up.
+
+@table @asis
+@item Any number of arguments is valid
+Set @code{min_required_args} and @code{max_expected_args} to zero and
+set @code{suppress_lint} to @code{awk_true}.
+
+@item A minimum number of arguments is required, no limit on maximum number of arguments
+Set @code{min_required_args} to the minimum required. Set
+@code{max_expected_args} to zero and
+set @code{suppress_lint} to @code{awk_true}.
+
+@item A minimum number of arguments is required, a maximum number is expected
+Set @code{min_required_args} to the minimum required. Set
+@code{max_expected_args} to the maximum expected.
+Set @code{suppress_lint} to @code{awk_false}.
+
+@item A minimum number of arguments is required, and no more than a maximum is allowed
+Set @code{min_required_args} to the minimum required. Set
+@code{max_expected_args} to the maximum expected.
+Set @code{suppress_lint} to @code{awk_false}.
+In your extension function, check that @code{num_actual_args} does not
+exceed @code{f->max_expected_args}. If it does, issue a fatal error message.
@end table
@node Exit Callback Functions
@@ -31594,6 +32531,9 @@ that parameter. More's the pity.}
@item void fatal(awk_ext_id_t id, const char *format, ...);
Print a message and then cause @command{gawk} to exit immediately.
+@item void nonfatal(awk_ext_id_t id, const char *format, ...);
+Print a nonfatal error message.
+
@item void warning(awk_ext_id_t id, const char *format, ...);
Print a warning message.
@@ -31646,21 +32586,25 @@ value type, as appropriate. This behavior is summarized in
@caption{API value types returned}
@docbook
<informaltable>
-<tgroup cols="6">
- <colspec colwidth="16.6*"/>
- <colspec colwidth="16.6*"/>
- <colspec colwidth="19.8*" colname="c3"/>
- <colspec colwidth="15*" colname="c4"/>
- <colspec colwidth="15*" colname="c5"/>
- <colspec colwidth="16.6*" colname="c6"/>
- <spanspec spanname="hspan" namest="c3" nameend="c6" align="center"/>
+<tgroup cols="8">
+ <colspec colname="c1"/>
+ <colspec colname="c2"/>
+ <colspec colname="c3"/>
+ <colspec colname="c4"/>
+ <colspec colname="c5"/>
+ <colspec colname="c6"/>
+ <colspec colname="c7"/>
+ <colspec colname="c8"/>
+ <spanspec spanname="hspan" namest="c3" nameend="c8" align="center"/>
<thead>
<row><entry></entry><entry spanname="hspan"><para>Type of Actual Value</para></entry></row>
<row>
<entry></entry>
<entry></entry>
<entry><para>String</para></entry>
+ <entry><para>Strnum</para></entry>
<entry><para>Number</para></entry>
+ <entry><para>Regex</para></entry>
<entry><para>Array</para></entry>
<entry><para>Undefined</para></entry>
</row>
@@ -31671,48 +32615,80 @@ value type, as appropriate. This behavior is summarized in
<entry><para><emphasis role="bold">String</emphasis></para></entry>
<entry><para>String</para></entry>
<entry><para>String</para></entry>
- <entry><para>False</para></entry>
- <entry><para>False</para></entry>
+ <entry><para>String</para></entry>
+ <entry><para>String</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry><para><emphasis role="bold">Strnum</emphasis></para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>Strnum</para></entry>
+ <entry><para>Strnum</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
</row>
<row>
<entry></entry>
<entry><para><emphasis role="bold">Number</emphasis></para></entry>
- <entry><para>Number if can be converted, else false</para></entry>
<entry><para>Number</para></entry>
- <entry><para>False</para></entry>
- <entry><para>False</para></entry>
+ <entry><para>Number</para></entry>
+ <entry><para>Number</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
</row>
<row>
<entry><para><emphasis role="bold">Type</emphasis></para></entry>
+ <entry><para><emphasis role="bold">Regex</emphasis></para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>Regex</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ </row>
+ <row>
+ <entry><para><emphasis role="bold">Requested</emphasis></para></entry>
<entry><para><emphasis role="bold">Array</emphasis></para></entry>
- <entry><para>False</para></entry>
- <entry><para>False</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
<entry><para>Array</para></entry>
- <entry><para>False</para></entry>
+ <entry><para>false</para></entry>
</row>
<row>
- <entry><para><emphasis role="bold">Requested</emphasis></para></entry>
+ <entry></entry>
<entry><para><emphasis role="bold">Scalar</emphasis></para></entry>
<entry><para>Scalar</para></entry>
<entry><para>Scalar</para></entry>
- <entry><para>False</para></entry>
- <entry><para>False</para></entry>
+ <entry><para>Scalar</para></entry>
+ <entry><para>Scalar</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
</row>
<row>
<entry></entry>
<entry><para><emphasis role="bold">Undefined</emphasis></para></entry>
<entry><para>String</para></entry>
+ <entry><para>Strnum</para></entry>
<entry><para>Number</para></entry>
+ <entry><para>Regex</para></entry>
<entry><para>Array</para></entry>
<entry><para>Undefined</para></entry>
</row>
<row>
<entry></entry>
<entry><para><emphasis role="bold">Value cookie</emphasis></para></entry>
- <entry><para>False</para></entry>
- <entry><para>False</para></entry>
- <entry><para>False</para>
- </entry><entry><para>False</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
+ <entry><para>false</para></entry>
</row>
</tbody>
</tgroup>
@@ -31728,41 +32704,45 @@ value type, as appropriate. This behavior is summarized in
@tex
\vglue-1.1\baselineskip
@end tex
-@multitable @columnfractions .166 .166 .198 .15 .15 .166
-@headitem @tab @tab String @tab Number @tab Array @tab Undefined
-@item @tab @b{String} @tab String @tab String @tab False @tab False
-@item @tab @b{Number} @tab Number if can be converted, else false @tab Number @tab False @tab False
-@item @b{Type} @tab @b{Array} @tab False @tab False @tab Array @tab False
-@item @b{Requested} @tab @b{Scalar} @tab Scalar @tab Scalar @tab False @tab False
-@item @tab @b{Undefined} @tab String @tab Number @tab Array @tab Undefined
-@item @tab @b{Value cookie} @tab False @tab False @tab False @tab False
+@c @multitable @columnfractions .166 .166 .198 .15 .15 .166
+@multitable {Requested} {Undefined} {Number} {Number} {Scalar} {Regex} {Array} {Undefined}
+@headitem @tab @tab String @tab Strnum @tab Number @tab Regex @tab Array @tab Undefined
+@item @tab @b{String} @tab String @tab String @tab String @tab String @tab false @tab false
+@item @tab @b{Strnum} @tab false @tab Strnum @tab Strnum @tab false @tab false @tab false
+@item @tab @b{Number} @tab Number @tab Number @tab Number @tab false @tab false @tab false
+@item @b{Type} @tab @b{Regex} @tab false @tab false @tab false @tab Regex @tab false @tab false
+@item @b{Requested} @tab @b{Array} @tab false @tab false @tab false @tab false @tab Array @tab false
+@item @tab @b{Scalar} @tab Scalar @tab Scalar @tab Scalar @tab Scalar @tab false @tab false
+@item @tab @b{Undefined} @tab String @tab Strnum @tab Number @tab Regex @tab Array @tab Undefined
+@item @tab @b{Value cookie} @tab false @tab false @tab false @tab false @tab false @tab false
@end multitable
@end ifnotdocbook
@end ifnotplaintext
@ifplaintext
-@example
- +-------------------------------------------------+
- | Type of Actual Value: |
- +------------+------------+-----------+-----------+
- | String | Number | Array | Undefined |
-+-----------+-----------+------------+------------+-----------+-----------+
-| | String | String | String | False | False |
-| |-----------+------------+------------+-----------+-----------+
-| | Number | Number if | Number | False | False |
-| | | can be | | | |
-| | | converted, | | | |
-| | | else false | | | |
-| |-----------+------------+------------+-----------+-----------+
-| Type | Array | False | False | Array | False |
-| Requested |-----------+------------+------------+-----------+-----------+
-| | Scalar | Scalar | Scalar | False | False |
-| |-----------+------------+------------+-----------+-----------+
-| | Undefined | String | Number | Array | Undefined |
-| |-----------+------------+------------+-----------+-----------+
-| | Value | False | False | False | False |
-| | cookie | | | | |
-+-----------+-----------+------------+------------+-----------+-----------+
-@end example
+@verbatim
+ +-------------------------------------------------------+
+ | Type of Actual Value: |
+ +--------+--------+--------+--------+-------+-----------+
+ | String | Strnum | Number | Regex | Array | Undefined |
++-----------+-----------+--------+--------+--------+--------+-------+-----------+
+| | String | String | String | String | String | false | false |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Strnum | false | Strnum | Strnum | false | false | false |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Number | Number | Number | Number | false | false | false |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Regex | false | false | false | Regex | false | false |
+| Type +-----------+--------+--------+--------+--------+-------+-----------+
+| Requested | Array | false | false | false | false | Array | false |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Scalar | Scalar | Scalar | Scalar | Scalar | false | false |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Undefined | String | Strnum | Number | Regex | Array | Undefined |
+| +-----------+--------+--------+--------+--------+-------+-----------+
+| | Value | false | false | false | false | false | false |
+| | Cookie | | | | | | |
++-----------+-----------+--------+--------+--------+--------+-------+-----------+
+@end verbatim
@end ifplaintext
@end float
@@ -31840,13 +32820,6 @@ An extension can look up the value of @command{gawk}'s special variables.
However, with the exception of the @code{PROCINFO} array, an extension
cannot change any of those variables.
-@quotation CAUTION
-It is possible for the lookup of @code{PROCINFO} to fail. This happens if
-the @command{awk} program being run does not reference @code{PROCINFO};
-in this case, @command{gawk} doesn't bother to create the array and
-populate it.
-@end quotation
-
@node Symbol table by cookie
@subsubsection Variable Access and Update by Cookie
@@ -31868,7 +32841,7 @@ Return false if the value cannot be retrieved.
@item awk_bool_t sym_update_scalar(awk_scalar_t cookie, awk_value_t *value);
Update the value associated with a scalar cookie. Return false if
-the new value is not of type @code{AWK_STRING} or @code{AWK_NUMBER}.
+the new value is not of type @code{AWK_STRING}, @code{AWK_STRNUM}, @code{AWK_REGEX}, or @code{AWK_NUMBER}.
Here too, the predefined variables may not be updated.
@end table
@@ -31989,7 +32962,7 @@ is what the routines in this @value{SECTION} let you do. The functions are as f
@table @code
@item awk_bool_t create_value(awk_value_t *value, awk_value_cookie_t *result);
Create a cached string or numeric value from @code{value} for
-efficient later assignment. Only values of type @code{AWK_NUMBER}
+efficient later assignment. Only values of type @code{AWK_NUMBER}, @code{AWK_REGEX}, @code{AWK_STRNUM},
and @code{AWK_STRING} are allowed. Any other type is rejected.
@code{AWK_UNDEFINED} could be allowed, but doing so would result in
inferior performance.
@@ -32215,9 +33188,10 @@ The array remains an array, but after calling this function, it
has no elements. This is equivalent to using the @code{delete}
statement (@pxref{Delete}).
-@item awk_bool_t flatten_array(awk_array_t a_cookie, awk_flat_array_t **data);
+@item awk_bool_t flatten_array_typed(awk_array_t a_cookie, awk_flat_array_t **data, awk_valtype_t index_type, awk_valtype_t value_type);
For the array represented by @code{a_cookie}, create an @code{awk_flat_array_t}
-structure and fill it in. Set the pointer whose address is passed as @code{data}
+structure and fill it in with indices and values of the requested types.
+Set the pointer whose address is passed as @code{data}
to point to this structure.
Return true upon success, or false otherwise.
@ifset FOR_PRINT
@@ -32229,6 +33203,14 @@ See the next @value{SECTION}
for a discussion of how to
flatten an array and work with it.
+@item awk_bool_t flatten_array(awk_array_t a_cookie, awk_flat_array_t **data);
+For the array represented by @code{a_cookie}, create an @code{awk_flat_array_t}
+structure and fill it in with @code{AWK_STRING} indices and
+@code{AWK_UNDEFINED} values.
+This is superseded by @code{flatten_array_typed()}.
+It is provided as a macro, and remains for convenience and for source code
+compatibility with the previous version of the API.
+
@item awk_bool_t release_flattened_array(awk_array_t a_cookie,
@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_flat_array_t *data);
When done with a flattened array, release the storage using this function.
@@ -32341,7 +33323,7 @@ to double-check that the count in the @code{awk_flat_array_t}
is the same as the count just retrieved:
@example
- if (! flatten_array(value2.array_cookie, & flat_array)) @{
+ if (! flatten_array_typed(value2.array_cookie, & flat_array, AWK_STRING, AWK_UNDEFINED)) @{
printf("dump_array_and_delete: could not flatten array\n");
goto out;
@}
@@ -32637,6 +33619,75 @@ $ @kbd{AWKLIBPATH=$PWD ./gawk -f subarray.awk}
(@xref{Finding Extensions} for more information on the
@env{AWKLIBPATH} environment variable.)
+@node Redirection API
+@subsection Accessing and Manipulating Redirections
+
+The following function allows extensions to access and manipulate redirections.
+
+@table @code
+@item awk_bool_t get_file(const char *name,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ size_t name_len,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const char *filetype,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ int fd,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const awk_input_buf_t **ibufp,
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const awk_output_buf_t **obufp);
+Look up file @code{name} in @command{gawk}'s internal redirection table.
+If @code{name} is @code{NULL} or @code{name_len} is zero, return
+data for the currently open input file corresponding to @code{FILENAME}.
+(This does not access the @code{filetype} argument, so that may be undefined).
+If the file is not already open, attempt to open it.
+The @code{filetype} argument must be zero-terminated and should be one of:
+
+@table @code
+@item ">"
+A file opened for output.
+
+@item ">>"
+A file opened for append.
+
+@item "<"
+A file opened for input.
+
+@item "|>"
+A pipe opened for output.
+
+@item "|<"
+A pipe opened for input.
+
+@item "|&"
+A two-way coprocess.
+@end table
+
+On error, return an @code{awk_false} value. Otherwise, return
+@code{awk_true}, and return additional information about the redirection
+in the @code{ibufp} and @code{obufp} pointers. For input
+redirections, the @code{*ibufp} value should be non-@code{NULL},
+and @code{*obufp} should be @code{NULL}. For output redirections,
+the @code{*obufp} value should be non-@code{NULL}, and @code{*ibufp}
+should be @code{NULL}. For two-way coprocesses, both values should
+be non-@code{NULL}.
+
+In the usual case, the extension is interested in @code{(*ibufp)->fd}
+and/or @code{fileno((*obufp)->fp)}. If the file is not already
+open, and the @code{fd} argument is nonnegative, @command{gawk}
+will use that file descriptor instead of opening the file in the
+usual way. If @code{fd} is nonnegative, but the file exists already,
+@command{gawk} ignores @code{fd} and returns the existing file. It is
+the caller's responsibility to notice that neither the @code{fd} in
+the returned @code{awk_input_buf_t} nor the @code{fd} in the returned
+@code{awk_output_buf_t} matches the requested value.
+
+Note that supplying a file descriptor is currently @emph{not} supported
+for pipes. However, supplying a file descriptor should work for input,
+output, append, and two-way (coprocess) sockets. If @code{filetype}
+is two-way, @command{gawk} assumes that it is a socket! Note that in
+the two-way case, the input and output file descriptors may differ.
+To check for success, you must check whether either matches.
+@end table
+
+It is anticipated that this API function will be used to implement I/O
+multiplexing and a socket library.
+
@node Extension API Variables
@subsection API Variables
@@ -32663,10 +33714,10 @@ debugging:
@float Table,gawk-api-version
@caption{gawk API version constants}
-@multitable @columnfractions .33 .33 .33
-@headitem API Version @tab C preprocessor define @tab enum constant
-@item Major @tab gawk_api_major_version @tab GAWK_API_MAJOR_VERSION
-@item Minor @tab gawk_api_minor_version @tab GAWK_API_MINOR_VERSION
+@multitable {@b{API Version}} {@code{gawk_api_major_version}} {@code{GAWK_API_MAJOR_VERSION}}
+@headitem API Version @tab C Preprocessor Define @tab enum constant
+@item Major @tab @code{gawk_api_major_version} @tab @code{GAWK_API_MAJOR_VERSION}
+@item Minor @tab @code{gawk_api_minor_version} @tab @code{GAWK_API_MINOR_VERSION}
@end multitable
@end float
@@ -32685,10 +33736,10 @@ constant integers:
@table @code
@item api->major_version
-The major version of the running @command{gawk}
+The major version of the running @command{gawk}.
@item api->minor_version
-The minor version of the running @command{gawk}
+The minor version of the running @command{gawk}.
@end table
It is up to the extension to decide if there are API incompatibilities.
@@ -32761,7 +33812,7 @@ static awk_ext_id_t ext_id;
static const char *ext_version = NULL; /* or @dots{} = "some string" */
static awk_ext_func_t func_table[] = @{
- @{ "name", do_name, 1 @},
+ @{ "name", do_name, 1, 0, awk_false, NULL @},
/* @dots{} */
@};
@@ -32862,6 +33913,19 @@ If @code{ext_version} is not @code{NULL}, register
the version string with @command{gawk}.
@end enumerate
+
+@node Changes from API V1
+@subsection Changes From Version 1 of the API
+
+The current API is @emph{not} binary compatible with version 1 of the API.
+You will have to recompile your extensions in order to use them with
+the current version of @command{gawk}.
+
+Fortunately, at the possible expense of some compile-time warnings, the API remains
+source-code--compatible with the previous API. The major differences are
+the additional members in the @code{awk_ext_func_t} structure, and the
+addition of the third argument to the C implementation function.
+
@node Finding Extensions
@section How @command{gawk} Finds Extensions
@cindex extension search path
@@ -33102,17 +34166,12 @@ The second is a pointer to an @code{awk_value_t} structure, usually named
/* do_chdir --- provide dynamically loaded chdir() function for gawk */
static awk_value_t *
-do_chdir(int nargs, awk_value_t *result)
+do_chdir(int nargs, awk_value_t *result, struct awk_ext_func *unused)
@{
awk_value_t newdir;
int ret = -1;
assert(result != NULL);
-
- if (do_lint && nargs != 1)
- lintwarn(ext_id,
- _("chdir: called with incorrect number of arguments, "
- "expecting 1"));
@end example
The @code{newdir}
@@ -33121,8 +34180,8 @@ with @code{get_argument()}. Note that the first argument is
numbered zero.
If the argument is retrieved successfully, the function calls the
-@code{chdir()} system call. If the @code{chdir()} fails, @code{ERRNO}
-is updated:
+@code{chdir()} system call. Otherwise, if the @code{chdir()} fails,
+it updates @code{ERRNO}:
@example
if (get_argument(0, AWK_STRING, & newdir)) @{
@@ -33326,15 +34385,11 @@ is set to point to @code{stat()}, instead.
Here is the @code{do_stat()} function, which starts with
variable declarations and argument checking:
-@ignore
-Changed message for page breaking. Used to be:
- "stat: called with incorrect number of arguments (%d), should be 2",
-@end ignore
@example
/* do_stat --- provide a stat() function for gawk */
static awk_value_t *
-do_stat(int nargs, awk_value_t *result)
+do_stat(int nargs, awk_value_t *result, struct awk_ext_func *unused)
@{
awk_value_t file_param, array_param;
char *name;
@@ -33345,13 +34400,6 @@ do_stat(int nargs, awk_value_t *result)
int (*statfunc)(const char *path, struct stat *sbuf) = lstat;
assert(result != NULL);
-
- if (nargs != 2 && nargs != 3) @{
- if (do_lint)
- lintwarn(ext_id,
- _("stat: called with wrong number of arguments"));
- return make_number(-1, result);
- @}
@end example
Then comes the actual work. First, the function gets the arguments.
@@ -33419,11 +34467,9 @@ structures for loading each function into @command{gawk}:
@example
static awk_ext_func_t func_table[] = @{
- @{ "chdir", do_chdir, 1 @},
- @{ "stat", do_stat, 2 @},
-#ifndef __MINGW32__
- @{ "fts", do_fts, 3 @},
-#endif
+ @{ "chdir", do_chdir, 1, 1, awk_false, NULL @},
+ @{ "stat", do_stat, 3, 2, awk_false, NULL @},
+ @dots{}
@};
@end example
@@ -34204,18 +35250,21 @@ As of this writing, there are seven extensions:
GD graphics library extension
@item
+MPFR library extension
+(this provides access to a number of MPFR functions that @command{gawk}'s
+native MPFR support does not)
+
+@item
PDF extension
@item
PostgreSQL extension
@item
-MPFR library extension
-(this provides access to a number of MPFR functions that @command{gawk}'s
-native MPFR support does not)
+Redis extension
@item
-Redis extension
+Select extension
@item
XML parser extension, using the @uref{http://expat.sourceforge.net, Expat}
@@ -34315,7 +35364,7 @@ output wrappers,
and two-way processors)
@item
-Printing fatal, warning, and ``lint'' warning messages
+Printing fatal, nonfatal, warning, and ``lint'' warning messages
@item
Updating @code{ERRNO}, or unsetting it
@@ -34844,6 +35893,10 @@ Indirect function calls
@item
Directories on the command line produce a warning and are skipped
(@pxref{Command-line directories})
+
+@item
+Output with @code{print} and @code{printf} need not be fatal
+(@pxref{Nonfatal})
@end itemize
@item
@@ -34931,6 +35984,11 @@ The @code{isarray()} function to check if a variable is an array or not
The @code{bindtextdomain()}, @code{dcgettext()}, and @code{dcngettext()}
functions for internationalization
(@pxref{Programmer i18n})
+
+@item
+The @code{intdiv()} function for doing integer
+division and remainder
+(@pxref{Numeric Functions})
@end itemize
@item
@@ -34969,6 +36027,7 @@ The
@option{-p},
@option{-P},
@option{-r},
+@option{-s},
@option{-S},
@option{-t},
and
@@ -34993,6 +36052,7 @@ and the
@option{--load},
@option{--non-decimal-data},
@option{--optimize},
+@option{--no-optimize},
@option{--posix},
@option{--pretty-print},
@option{--profile},
@@ -35063,6 +36123,19 @@ for @command{gawk} @value{PVERSION} 4.1:
Ultrix
@end itemize
+@item
+Support for the following systems was removed from the code
+for @command{gawk} @value{PVERSION} 4.2:
+
+@c nested table
+@itemize @value{MINUS}
+@item
+MirBSD
+
+@item
+GNU/Linux on Alpha
+@end itemize
+
@end itemize
@c XXX ADD MORE STUFF HERE
@@ -35689,6 +36762,52 @@ Support for Ultrix was removed.
@end itemize
+Version 4.2 introduced the following changes:
+
+@itemize @bullet
+@item
+Changes to @code{ENVIRON} are reflected into @command{gawk}'s
+environment and that of programs that it runs.
+@xref{Auto-set}.
+
+@item
+The @option{--pretty-print} option no longer runs the @command{awk}
+program too.
+@xref{Options}.
+
+@item
+The @command{igawk} program and its manual page are no longer
+installed when @command{gawk} is built.
+@xref{Igawk Program}.
+
+@item
+The @code{intdiv()} function.
+@xref{Numeric Functions}.
+
+@item
+The maximum number of hexadecimal digits in @samp{\x} escapes
+is now two.
+@xref{Escape Sequences}.
+
+@item
+Nonfatal output with @code{print} and @code{printf}.
+@xref{Nonfatal}.
+
+@item
+For many years, POSIX specified that default field splitting
+only allowed spaces and tabs to separate fields, and this was
+how @command{gawk} behaved with @option{--posix}. As of 2013,
+the standard restored historical behavior, and now default
+field splitting with @option{--posix} also allows newlines to
+separate fields.
+
+@item
+Support for MirBSD was removed.
+
+@item
+Support for GNU/Linux on Alpha was removed.
+@end itemize
+
@c XXX ADD MORE STUFF HERE
@end ifclear
@@ -35818,7 +36937,7 @@ and
@uref{http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap09.html#tag_21_09_03_05, its rationale}.}
By using this lovely technical term, the standard gives license
-to implementors to implement ranges in whatever way they choose.
+to implementers to implement ranges in whatever way they choose.
The @command{gawk} maintainer chose to apply the pre-POSIX meaning
both with the default regexp matching and when @option{--traditional} or
@option{--posix} are used.
@@ -36255,6 +37374,12 @@ These files contain the actual @command{gawk} source code.
@end table
@table @file
+@item support/*
+C header and source files for routines that @command{gawk}
+uses, but that are not part of its core functionality.
+For example, argument parsing, regular expression matching,
+and random number generating routines are all kept here.
+
@item ABOUT-NLS
A file containing information about GNU @command{gettext} and translations.
@@ -36355,6 +37480,8 @@ The generated Info file for
The @command{troff} source for a manual page describing the @command{igawk}
program presented in
@ref{Igawk Program}.
+(Since @command{gawk} can do its own @code{@@include} processing,
+neither @command{igawk} nor @file{igawk.1} are installed.)
@item doc/Makefile.in
The input file used during the configuration process to generate the
@@ -36399,8 +37526,6 @@ source file for this @value{DOCUMENT}. It also contains a @file{Makefile.in} fil
@file{Makefile.am} is used by GNU Automake to create @file{Makefile.in}.
The library functions from
@ref{Library Functions},
-and the @command{igawk} program from
-@ref{Igawk Program}
are included as ready-to-use files in the @command{gawk} distribution.
They are installed as part of the installation process.
The rest of the programs in this @value{DOCUMENT} are available in appropriate
@@ -36411,6 +37536,12 @@ The source code, manual pages, and infrastructure files for
the sample extensions included with @command{gawk}.
@xref{Dynamic Extensions}, for more information.
+@item extras/*
+Additional non-essential files. Currently, this directory contains some shell
+startup files to be installed in @file{/etc/profile.d} to aid in manipulating
+the @env{AWKPATH} and @env{AWKLIBPATH} environment variables.
+@xref{Shell Startup Files}, for more information.
+
@item posix/*
Files needed for building @command{gawk} on POSIX-compliant systems.
@@ -36439,6 +37570,7 @@ to configure @command{gawk} for your system yourself.
@menu
* Quick Installation:: Compiling @command{gawk} under Unix.
+* Shell Startup Files:: Shell convenience functions.
* Additional Configuration Options:: Other compile-time options.
* Configuration Philosophy:: How it's all supposed to work.
@end menu
@@ -36519,6 +37651,44 @@ is likely that you will be asked for your password, and you will have
to have been set up previously as a user who is allowed to run the
@command{sudo} command.
+@node Shell Startup Files
+@appendixsubsec Shell Startup Files
+
+The distribution contains shell startup files @file{gawk.sh} and
+@file{gawk.csh}, containing functions to aid in manipulating
+the @env{AWKPATH} and @env{AWKLIBPATH} environment variables.
+On a Fedora GNU/Linux system, these files should be installed in @file{/etc/profile.d};
+on other platforms, the appropriate location may be different.
+
+@table @command
+
+@cindex @command{gawkpath_default} shell function
+@item gawkpath_default
+Reset the @env{AWKPATH} environment variable to its default value.
+
+@cindex @command{gawkpath_prepend} shell function
+@item gawkpath_prepend
+Add the argument to the front of the @env{AWKPATH} environment variable.
+
+@cindex @command{gawkpath_append} shell function
+@item gawkpath_append
+Add the argument to the end of the @env{AWKPATH} environment variable.
+
+@cindex @command{gawklibpath_default} shell function
+@item gawklibpath_default
+Reset the @env{AWKLIBPATH} environment variable to its default value.
+
+@cindex @command{gawklibpath_prepend} shell function
+@item gawklibpath_prepend
+Add the argument to the front of the @env{AWKLIBPATH} environment variable.
+
+@cindex @command{gawklibpath_append} shell function
+@item gawklibpath_append
+Add the argument to the end of the @env{AWKLIBPATH} environment variable.
+
+@end table
+
+
@node Additional Configuration Options
@appendixsubsec Additional Configuration Options
@cindex @command{gawk}, configuring, options
@@ -41102,6 +42272,7 @@ Consistency issues:
Use MS-DOS not MS DOS
Use an empty set of parentheses after built-in and awk function names.
Use "multiFOO" without a hyphen.
+ Use "time zone" as two words, not "timezone".
Date: Wed, 13 Apr 94 15:20:52 -0400
From: rms@gnu.org (Richard Stallman)
diff --git a/doc/using-git.texi b/doc/using-git.texi
new file mode 100644
index 00000000..1812c153
--- /dev/null
+++ b/doc/using-git.texi
@@ -0,0 +1,1179 @@
+\input texinfo @c -*-texinfo-*-
+@c %**start of header (This is for running Texinfo on a region.)
+@setfilename using-git.info
+@settitle Workflow in the @command{gawk} project
+@c %**end of header (This is for running Texinfo on a region.)
+
+@dircategory Network applications
+@direntry
+* Gawkworkflow: (using-git). Workflow in the `gawk' project.
+@end direntry
+
+@iftex
+@set DOCUMENT book
+@set CHAPTER chapter
+@set SECTION section
+@set DARKCORNER @inmargin{@image{lflashlight,1cm}, @image{rflashlight,1cm}}
+@end iftex
+@ifinfo
+@set DOCUMENT Info file
+@set CHAPTER major node
+@set SECTION node
+@set DARKCORNER (d.c.)
+@end ifinfo
+@ifhtml
+@set DOCUMENT web page
+@set CHAPTER chapter
+@set SECTION section
+@set DARKCORNER (d.c.)
+@end ifhtml
+
+@set FN file name
+@set FFN File Name
+
+@c merge the function and variable indexes into the concept index
+@ifinfo
+@synindex fn cp
+@synindex vr cp
+@end ifinfo
+@iftex
+@syncodeindex fn cp
+@syncodeindex vr cp
+@end iftex
+
+@c If "finalout" is commented out, the printed output will show
+@c black boxes that mark lines that are too long. Thus, it is
+@c unwise to comment it out when running a master in case there are
+@c overfulls which are deemed okay.
+
+@iftex
+@finalout
+@end iftex
+
+@smallbook
+
+@set TITLE Workflow in the @command{gawk} project
+@set EDITION 0.0
+@set UPDATE-MONTH August, 2014
+@c gawk versions:
+@set VERSION 4.1
+@set PATCHLEVEL 0
+
+@copying
+This is Edition @value{EDITION} of @cite{@value{TITLE}},
+for the @value{VERSION}.@value{PATCHLEVEL} (or later) version of the GNU
+implementation of AWK.
+@sp 2
+Copyright (C) 2014, 2015 Free Software Foundation, Inc.
+@sp 2
+Permission is granted to copy, distribute and/or modify this document
+under the terms of the GNU Free Documentation License, Version 1.3 or
+any later version published by the Free Software Foundation; with the
+Invariant Sections being ``GNU General Public License'', the Front-Cover
+texts being (a) (see below), and with the Back-Cover Texts being (b)
+(see below). A copy of the license is included in the section entitled
+``GNU Free Documentation License''.
+
+@enumerate a
+@item
+The FSF's Back-Cover Text is: ``You have the freedom to
+copy and modify this GNU manual.''
+@end enumerate
+@end copying
+
+@ifinfo
+This file documents the workflow of the developers in the GNU
+@command{awk} project.
+
+@insertcopying
+@end ifinfo
+
+@setchapternewpage odd
+
+@titlepage
+@title @value{TITLE}
+@subtitle Edition @value{EDITION}
+@subtitle @value{UPDATE-MONTH}
+@author J@"urgen Kahrs
+@author with Arnold D. Robbins
+
+@c Include the Distribution inside the titlepage environment so
+@c that headings are turned off. Headings on and off do not work.
+
+@page
+@vskip 0pt plus 1filll
+@sp 2
+Published by:
+@sp 1
+
+Free Software Foundation @*
+51 Franklin Street, Fifth Floor @*
+Boston, MA 02110-1301 USA @*
+Phone: +1-617-542-5942 @*
+Fax: +1-617-542-2652 @*
+Email: @email{gnu@@gnu.org} @*
+URL: @uref{http://www.gnu.org/} @*
+
+ISBN 1-882114-93-0 @*
+
+@insertcopying
+
+@c @sp 2
+@c Cover art by ?????.
+@end titlepage
+
+@iftex
+@headings off
+@evenheading @thispage@ @ @ @strong{@value{TITLE}} @| @|
+@oddheading @| @| @strong{@thischapter}@ @ @ @thispage
+@end iftex
+
+@ifnottex
+@node Top
+@top Introduction
+@comment node-name, next, previous, up
+
+This file documents the workflow of the developers in the GNU Awk (@command{gawk})
+version 4.1 and later.
+
+@insertcopying
+@end ifnottex
+
+@menu
+* Introduction:: About networking.
+* Basics of GIT repositories:: The fundamental environment of
+ the developer.
+* Conventions used in the repository:: How to behave.
+* Tutorial for a first-time-gawk-contributor:: How to get started with least
+ pain.
+* FAQs and HOWTOs:: General recipes for daily work.
+* Links:: Where to find the stuff
+ mentioned in this document.
+* GNU Free Documentation License:: The license for this document.
+* Index:: The index.
+
+@detailmenu
+* Quick Start::
+* Setting up a proper @command{git} repository::
+* Pulling the latest changes from the remote repository::
+* Checking out a feature branch from the remote repository::
+* Semantics of Cloning:: What to
+ consider
+ before you
+ clone.
+* Local versus Remote:: Where my
+ source code
+ really is.
+* Tracking and Merging:: What the
+ others are
+ doing.
+* master::
+* stable::
+* feature::
+* who does what::
+* step-by-step instructions for a first-time-gawk-contributor::
+* step-by-step instructions for a first-time-gawk-administrator::
+* general recipes for daily work::
+* references and URLs to books and other texts::
+@end detailmenu
+@end menu
+
+@contents
+
+@node Introduction
+@chapter Introduction
+
+This @value{DOCUMENT} is meant to be a description of the working habits
+that were established for collaboration in the GNU Awk project.
+Such stuff tends to become rather dry, and to prevent you from getting
+bored at this early stage, we will begin this @value{CHAPTER} with a
+brief introduction that shows you how to get the
+source code of the GNU Awk project compiled on your machine.
+
+We do this in order to get you motivated to follow us through the later
+steps that consist mainly of conceptual considerations.
+We hope that (in later, more abstract steps) you will always remember
+this down-to-earth introduction, should you ever wonder what all the
+later bizarre trickery is good for.
+
+@menu
+* Quick Start::
+* Setting up a proper @command{git} repository::
+* Pulling the latest changes from the remote repository::
+* Checking out a feature branch from the remote repository::
+@end menu
+
+@node Quick Start
+@section Quick Start: Compiling @command{gawk} in 5 Minutes
+
+The following steps will look familiar to you; they are not that much
+different from the steps you used in the old days when you downloaded
+a tar ball, extracted it and compiled the source code. It is mainly
+the very first step that looks different; instead of downloading the
+tar ball you need the tool @command{git}.@footnote{If the command
+@command{git} does not exist on your machine,
+you need adminstrator privileges to install it. By convention, the
+command is usually part of an installation package by the same name.}
+
+@example
+git clone git://git.savannah.gnu.org/gawk.git
+cd gawk
+git checkout gawk-4.1-stable
+./bootstrap.sh
+./configure
+make
+./gawk --version
+@end example
+
+There are two differences to your working habits. In the third step,,
+you have to extract (or @dfn{check out}) the @code{gawk-4.1-stable} branch of the current source
+code (there are other branches available, that's the point where
+things get interesting).
+
+In the fourth step, you must run the @command{bootstrap.sh} script in
+order to set correctly timestamps on various files. Doing this is essential;
+it allows you to avoid having to install the correct versions of the
+various autotools as used by the @command{gawk} maintainer.
+
+Isn't this simple? No, it's not that simple.
+If you plan to go any further (for example compile the source
+code again next week, including next week's latest update), you will
+need to know what's going on when you use this seemingly simple
+@command{git} command (and that's the point where things get bizarre).
+
+In the next @value{CHAPTER} you will find a more thorough conceptual
+explanation, here we are satisfied with getting to know the practical
+steps necessary to get a working environment going that you can use
+in your daily work in a reliable way.
+
+@node Setting up a proper @command{git} repository
+@section Setting up a proper @command{git} repository
+
+After the initial @emph{checkout} you have access to all the source code
+files that the maintainers have pushed through the official release procedure.
+
+You may not have noticed, but each change is well documented and traceable.
+This process of tracing the change history is so precise, reproducable and
+fine-grained that any dubious change may be kicked out later and the author
+of dubious stuff identified by name and change date.
+
+Some bookkeeping is
+necessary for this and that's why you need @command{git}. @command{git}
+does all this for you. Developers who have used @command{svn} or
+@command{cvs} in the past will not be surprised to hear that each change
+is traceable precisely (they know that @command{svn} and @command{cvs}
+can do this, too).
+
+But the first-time user of @command{git} (as well as the @command{svn} user)
+may still have failed to notice what he actually did earlier in this @value{CHAPTER}.
+It is not just a mere copy of the source code that you created,
+it is a full copy of the entire @dfn{upstream} repository server that you created
+(or @dfn{cloned}). This means that others could make their own copy of
+@emph{your} repository and treat it as @emph{their upstream} repository.
+
+This is the essential difference between working with @command{svn} and
+working with @command{git}: by @emph{cloning} you become a repository
+administrator, whether you like it or not. As such you have some duties that
+go beyond the duties of an @command{svn} user. For example, you have to
+identify yourself properly as the owner of the repository by setting
+some global variables identifying you. The global settings will be used
+every time you connect again to the upstream repository.
+
+@smallexample
+git config --global user.name "@var{First-Name Last-Name}"
+git config --global user.email @var{email@@address.site}
+git config --global color.ui auto
+@end smallexample
+
+You may leave these variables unset, but then you are reduced to an
+anonymous consumer-only behaviour whenever you connect to the upstream
+repository. Later you will learn that there are many other variables
+to be set, most of them serving as defaults that can be overridden if
+you like. Choosing to work with defaults makes work quick and easy for the most frequent
+use cases, but that comes at a cost: With so many helpful defaults
+you may be overwhelmed by the detail and complexity of the real inner working.
+Here is an example of one of the author's configuration variables:
+
+@smallexample
+$ @kbd{git config --list}
+@print{} user.name=First-Name Last-Name
+@print{} user.email=email@@address.site
+@print{} color.diff=auto
+@print{} color.status=auto
+@print{} color.branch=auto
+@print{} gui.spellingdictionary=en_US
+@print{} core.repositoryformatversion=0
+@print{} core.filemode=true
+@print{} core.logallrefupdaIsn't this simple? No, it's not that simple. tes=true
+@print{} remote.origin.fetch=+refs/heads/*:refs/remotes/origin/*
+@print{} remote.origin.url=ssh://jkahrs@@git.sv.gnu.org/srv/git/gawk.git
+@print{} branch.master.remote=origin
+@print{} branch.master.merge=refs/heads/master
+@print{} branch.xgawk_load.remote=origin
+@print{} branch.xgawk_load.merge=refs/heads/xgawk_load
+@end smallexample
+
+Changing these variables with specialized variants of the @command{git} command
+may seem awkward to you and perhaps you prefer to use your favourite text editor
+to overview and change the variables. That's easy: edit the file @file{.git/config}.
+
+@smallexample
+$ @kbd{cat .git/config}
+@print{} [core]
+@print{} repositoryformatversion = 0
+@print{} filemode = true
+@print{} bare = false
+@print{} logallrefupdates = true
+@print{} [remote "origin"]
+@print{} fetch = +refs/heads/*:refs/remotes/origin/*
+@print{} url = ssh://jkahrs@@git.sv.gnu.org/srv/git/gawk.git
+@print{} [branch "master"]
+@print{} remote = origin
+@print{} merge = refs/heads/master
+@print{} [branch "cmake"]
+@print{} remote = origin
+@print{} merge = refs/heads/cmake
+@end smallexample
+
+Now you can see how variables are structured group-wise.
+But wait, where is the e-mail address in this list of variables?
+It is missing in the file @file{.git/config} because that file
+contains only the local settings of this one repository
+(while there may be others on your machine).
+The e-mail address is a variable of a more general kind that
+should be stored above all the repositories.
+These are referred to as the @dfn{global} variables:
+
+@smallexample
+$ @kbd{git config --list --global}
+@print{} user.name=First-Name Last-Name
+@print{} user.email=email@@address.site
+@print{} color.diff=auto
+@print{} color.status=auto
+@print{} color.branch=auto
+@print{} gui.spellingdictionary=en_US
+@end smallexample
+
+If you wonder whether there is a parameter @command{--local} to list
+the local variables, then you should look into the well-structured
+man pages of @command{git}. The level of detail may overwhelm you,
+but one day you might appreciate it.
+
+@smallexample
+git help config
+@end smallexample
+
+@node Pulling the latest changes from the remote repository
+@section Pulling the latest changes from the remote repository
+
+Whether you set any of these variables or not, sooner or later you will want
+to catch up with the changes that happened in the upstream repository.
+So, how can you update your copy of the repository and re-build the source code?
+The easiest way is to rely on defaults and use the @emph{pull} command to request
+updates from the upstream repository:
+
+@smallexample
+git pull
+./bootstrap.sh
+./configure
+make
+@end smallexample
+
+When using the @emph{pull} command, all the changes available in all branches of
+the upstream repository will be copied (and merged) into your local repository.
+We assume here that we still have the @emph{gawk-4.1-stable} branch checked out (as described earlier)
+and we are not interested in changes to other existing branches.
+The merging of changes will be done inside the branches only, so that changes in one
+branch are kept inside this branch and don't mix up other branches.
+
+@c ========================================
+
+But @emph{what is a branch?} you may wonder. It is the name given to a sequence of changes
+that were made to the master branch outside the master branch.
+It is easy to look up all the available branches
+(the names of the change sequences) in the remote upstream repository.
+
+@smallexample
+$ @kbd{git branch -a}
+@print{} * master
+@print{} remotes/origin/cmake
+@end smallexample
+
+The asterisk in front of the branch name assures you of the fact that you see
+the source files as they are in the @emph{master} branch.
+
+@node Checking out a feature branch from the remote repository
+@section Checking out a feature branch from the remote repository
+
+It is also easy to
+have a look at other branches, for example when you are interested in what is
+going on in a certain @emph{feature branch} that the maintainer set up recently
+for a new feature to be developed separately (so that others can go on undisturbed).
+
+@smallexample
+$ @kbd{git checkout origin/cmake}
+$ @kbd{git branch -a}
+@print{} master
+@print{} * remotes/origin/cmake
+$ @kbd{./bootstrap.sh}
+$ @kbd{./configure}
+$ @kbd{make}
+@end smallexample
+
+When you try this, take care that you have not changed anything in any source file.
+@command{git} would notice changes and refuse to checkout the other branch.
+This is meant to protect you from losing any local changes that you forgot to save.
+Any source file that is part of the repository and gets generated during the build
+in a slightly different way than the original would cause such a problem.
+
+@smallexample
+$ @kbd{git status}
+@print{} # On branch master
+@print{} # Changes not staged for commit:
+@print{} # awkgram.c
+@end smallexample
+
+Here we have @file{awkgram.c} that was generated from @file{awkgram.y}.
+But what was generated differently in the file?
+
+@smallexample
+git diff awkgram.c
+@end smallexample
+
+Ok, you are not interested in textual changes to the copyright notice
+that are only due to a new calendar year. You are also not interested
+in the internals of the generated parser and only wonder
+@emph{How do we get back the original file from the repository?}
+
+@smallexample
+$ @kbd{git checkout awkgram.c}
+$ @kbd{git diff awkgram.c | wc -l}
+@print{} 0
+@end smallexample
+
+After checking the file out once more, there is obviously no difference
+to the copy saved in the repository. But let's not get distracted, we
+wanted to find out what was going on in this feature branch. We can
+find out by asking @command{git} what has changed in the file @file{ChangeLog}
+of this feature branch relative to the master branch.
+
+@smallexample
+git diff origin/master ChangeLog
+@end smallexample
+
+@noindent
+This produces:
+
+@smallexample
+diff --git a/ChangeLog b/ChangeLog
+index eab657c..a499ec5 100644
+--- a/ChangeLog
++++ b/ChangeLog
+@@ -1,81 +1,3 @@
+-2014-09-07 Arnold D. Robbins <arnold@@skeeve.com>
+-
+- * awk.h: Move libsigsegv stuff to ...
+- * main.c: here. Thanks to Yehezkel Bernat for motivating
+- the cleanup.
+- * symbol.c (make_symbol, install, install_symbol): Add const to
+- first parameter. Adjust decls and fix up uses.
+@end smallexample
+
+Looks like a minor cleanup operation in the master branch that has not
+yet been merged into the feature branch. We still don't know what is new
+in this feature branch, how can we know? By looking at all changes that exist.
+
+@smallexample
+$ @kbd{git diff origin/master --numstat}
+@print{} 0 78 ChangeLog
+@print{} 8 3 README_d/README.cmake
+@end smallexample
+
+On your screen you see a list of all differences between the currently
+checked-out branch and the master branch. It tells you the names of the
+files that have changed, along with the number of added and deleted lines.
+Now we can have a closer look at who changed what.
+Let's single out one particular file that looks interesting.
+As usual there is a @command{diff} sub-command to list all the changed
+lines, but there is also a @command{blame} sub-command that tells you
+who made the last change to any of the lines.
+
+@smallexample
+git blame README_d/README.cmake
+@end smallexample
+
+@noindent
+This produces (in part):
+
+@smallexample
+2092a35f (Juergen Kahrs 2014-08-12 17:11:20 +0200 1) CMake is a build automation system
+2092a35f (Juergen Kahrs 2014-08-12 17:11:20 +0200 2) http://en.wikipedia.org/wiki/Cmake
+2092a35f (Juergen Kahrs 2014-08-12 17:11:20 +0200 3)
+2092a35f (Juergen Kahrs 2014-08-12 17:11:20 +0200 4) We try to use it as a replacement for the established GNU build system.
+2092a35f (Juergen Kahrs 2014-08-12 17:11:20 +0200 5) This attempt is currently only experimental. If you wonder why anyone
+2092a35f (Juergen Kahrs 2014-08-12 17:11:20 +0200 6) should do this, read
+@end smallexample
+
+The strange number on the left margin is the short form of a numerical
+identifier of the change set. At the moment you can safely ignore it,
+but this number is the key you need in case you should ever want to
+cherry-pick some change sets. But cherry-picking is still far away,
+before you can do this, you have to learn how to make changes to your
+local repository and @command{push} them to the upstream repository.
+Some conceptual basics are needed for understanding this essential part
+of the workflow.
+
+@node Basics of GIT repositories
+@chapter Basics of GIT repositories
+
+@menu
+* Semantics of Cloning:: What to consider before you clone.
+* Local versus Remote:: Where my source code really is.
+* Tracking and Merging:: What the others are doing.
+@end menu
+
+@c http://iverilog.wikia.com/wiki/Installation_Guide
+@c http://www.linuxjournal.com/article/2840
+@c http://git-scm.com/book/en/Git-Branching-Branching-Workflows
+@c https://www.atlassian.com/en/git/workflows
+@c https://help.github.com/articles/what-is-a-good-git-workflow
+@c https://guides.github.com/introduction/flow/index.html
+@c http://supercollider.sourceforge.net/wiki/index.php/Developer_cheatsheet_for_git
+@c http://savannah.gnu.org/maintenance/UsingGit/
+@c http://www.emacswiki.org/emacs/GitForEmacsDevs
+
+What is tracking ?
+
+@display
+- How can I use git to contribute source code ?
+You need an account at Savannah. Read this to understand the first steps:
+ http://savannah.gnu.org/maintenance/UsingGit
+ README.git
+Use your account there to register your public ssh key at Savannah.
+Then you are ready to checkout. Remember that (when cloning) you are
+setting up your own local repository and make sure you configure it
+properly.
+ git clone ssh://my_account_name@@git.sv.gnu.org/srv/git/gawk.git
+@end display
+
+@node Semantics of Cloning
+@section Semantics of Cloning
+
+@node Local versus Remote
+@section Local versus Remote
+
+@node Tracking and Merging
+@section Tracking and Merging
+
+@node Conventions used in the repository
+@chapter Conventions used in the repository
+
+@menu
+* master::
+* stable::
+* feature::
+* who does what::
+@end menu
+
+@node master
+@section master
+
+@node stable
+@section stable
+
+@node feature
+@section feature
+
+@node who does what
+@section who does what
+
+@node Tutorial for a first-time-gawk-contributor
+@chapter Tutorial for a first-time-gawk-contributor
+
+@menu
+* step-by-step instructions for a first-time-gawk-contributor::
+* step-by-step instructions for a first-time-gawk-administrator::
+@end menu
+
+@node step-by-step instructions for a first-time-gawk-contributor
+@section step-by-step instructions for a first-time-gawk-contributor
+
+@node step-by-step instructions for a first-time-gawk-administrator
+@section step-by-step instructions for a first-time-gawk-administrator
+
+@c e-mail from Arnold 2014-08.24
+@c Thanks to Michal for pointing us in the right direction!
+@c I see this:
+@c
+@c bash-4.2$ git config --get push.default
+@c simple
+@c
+@c What does yours say?
+@c
+@c It appears that "simple" will be the default in version 2.0:
+@c
+@c From:
+@c http://blog.nicoschuele.com/posts/git-2-0-changes-push-default-to-simple
+@c
+@c Matching
+@c
+@c The 'matching' option is the default behavior in Git 1.x. It means that if you do a git push without specifying a branch, it will push all your local branches to their matching ones on your remote repository.
+@c
+@c Simple
+@c
+@c The new default in Git 2.x is 'simple'. It means that when doing a git push without specifying a branch, only your current branch will be pushed to the one git pull would normally get your code from."
+@c
+@c So this must explain it. I'll bet yours is set to "matching". I have no
+@c idea how mine got set to "simple", since I don't recall doing that.
+@c
+@c In the future, I will simply make sure to push before switching branches.
+@c I think I actually prefer that behavior, since it's more intuitive to me.
+
+
+@node FAQs and HOWTOs
+@chapter FAQs and HOWTOs
+
+@menu
+* general recipes for daily work::
+@end menu
+
+@node general recipes for daily work
+@section general recipes for daily work
+
+@node Links
+@chapter Links
+
+@menu
+* references and URLs to books and other texts::
+@end menu
+
+@node references and URLs to books and other texts
+@section references and URLs to books and other texts
+
+@c The GNU Free Documentation License.
+@node GNU Free Documentation License
+@unnumbered GNU Free Documentation License
+@cindex FDL (Free Documentation License)
+@cindex Free Documentation License (FDL)
+@cindex GNU Free Documentation License
+@center Version 1.3, 3 November 2008
+
+@c This file is intended to be included within another document,
+@c hence no sectioning command or @node.
+
+@display
+Copyright @copyright{} 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.
+@uref{http://fsf.org/}
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+@end display
+
+@enumerate 0
+@item
+PREAMBLE
+
+The purpose of this License is to make a manual, textbook, or other
+functional and useful document @dfn{free} in the sense of freedom: to
+assure everyone the effective freedom to copy and redistribute it,
+with or without modifying it, either commercially or noncommercially.
+Secondarily, this License preserves for the author and publisher a way
+to get credit for their work, while not being considered responsible
+for modifications made by others.
+
+This License is a kind of ``copyleft'', which means that derivative
+works of the document must themselves be free in the same sense. It
+complements the GNU General Public License, which is a copyleft
+license designed for free software.
+
+We have designed this License in order to use it for manuals for free
+software, because free software needs free documentation: a free
+program should come with manuals providing the same freedoms that the
+software does. But this License is not limited to software manuals;
+it can be used for any textual work, regardless of subject matter or
+whether it is published as a printed book. We recommend this License
+principally for works whose purpose is instruction or reference.
+
+@item
+APPLICABILITY AND DEFINITIONS
+
+This License applies to any manual or other work, in any medium, that
+contains a notice placed by the copyright holder saying it can be
+distributed under the terms of this License. Such a notice grants a
+world-wide, royalty-free license, unlimited in duration, to use that
+work under the conditions stated herein. The ``Document'', below,
+refers to any such manual or work. Any member of the public is a
+licensee, and is addressed as ``you''. You accept the license if you
+copy, modify or distribute the work in a way requiring permission
+under copyright law.
+
+A ``Modified Version'' of the Document means any work containing the
+Document or a portion of it, either copied verbatim, or with
+modifications and/or translated into another language.
+
+A ``Secondary Section'' is a named appendix or a front-matter section
+of the Document that deals exclusively with the relationship of the
+publishers or authors of the Document to the Document's overall
+subject (or to related matters) and contains nothing that could fall
+directly within that overall subject. (Thus, if the Document is in
+part a textbook of mathematics, a Secondary Section may not explain
+any mathematics.) The relationship could be a matter of historical
+connection with the subject or with related matters, or of legal,
+commercial, philosophical, ethical or political position regarding
+them.
+
+The ``Invariant Sections'' are certain Secondary Sections whose titles
+are designated, as being those of Invariant Sections, in the notice
+that says that the Document is released under this License. If a
+section does not fit the above definition of Secondary then it is not
+allowed to be designated as Invariant. The Document may contain zero
+Invariant Sections. If the Document does not identify any Invariant
+Sections then there are none.
+
+The ``Cover Texts'' are certain short passages of text that are listed,
+as Front-Cover Texts or Back-Cover Texts, in the notice that says that
+the Document is released under this License. A Front-Cover Text may
+be at most 5 words, and a Back-Cover Text may be at most 25 words.
+
+A ``Transparent'' copy of the Document means a machine-readable copy,
+represented in a format whose specification is available to the
+general public, that is suitable for revising the document
+straightforwardly with generic text editors or (for images composed of
+pixels) generic paint programs or (for drawings) some widely available
+drawing editor, and that is suitable for input to text formatters or
+for automatic translation to a variety of formats suitable for input
+to text formatters. A copy made in an otherwise Transparent file
+format whose markup, or absence of markup, has been arranged to thwart
+or discourage subsequent modification by readers is not Transparent.
+An image format is not Transparent if used for any substantial amount
+of text. A copy that is not ``Transparent'' is called ``Opaque''.
+
+Examples of suitable formats for Transparent copies include plain
+@sc{ascii} without markup, Texinfo input format, La@TeX{} input
+format, @acronym{SGML} or @acronym{XML} using a publicly available
+@acronym{DTD}, and standard-conforming simple @acronym{HTML},
+PostScript or @acronym{PDF} designed for human modification. Examples
+of transparent image formats include @acronym{PNG}, @acronym{XCF} and
+@acronym{JPG}. Opaque formats include proprietary formats that can be
+read and edited only by proprietary word processors, @acronym{SGML} or
+@acronym{XML} for which the @acronym{DTD} and/or processing tools are
+not generally available, and the machine-generated @acronym{HTML},
+PostScript or @acronym{PDF} produced by some word processors for
+output purposes only.
+
+The ``Title Page'' means, for a printed book, the title page itself,
+plus such following pages as are needed to hold, legibly, the material
+this License requires to appear in the title page. For works in
+formats which do not have any title page as such, ``Title Page'' means
+the text near the most prominent appearance of the work's title,
+preceding the beginning of the body of the text.
+
+The ``publisher'' means any person or entity that distributes copies
+of the Document to the public.
+
+A section ``Entitled XYZ'' means a named subunit of the Document whose
+title either is precisely XYZ or contains XYZ in parentheses following
+text that translates XYZ in another language. (Here XYZ stands for a
+specific section name mentioned below, such as ``Acknowledgements'',
+``Dedications'', ``Endorsements'', or ``History''.) To ``Preserve the Title''
+of such a section when you modify the Document means that it remains a
+section ``Entitled XYZ'' according to this definition.
+
+The Document may include Warranty Disclaimers next to the notice which
+states that this License applies to the Document. These Warranty
+Disclaimers are considered to be included by reference in this
+License, but only as regards disclaiming warranties: any other
+implication that these Warranty Disclaimers may have is void and has
+no effect on the meaning of this License.
+
+@item
+VERBATIM COPYING
+
+You may copy and distribute the Document in any medium, either
+commercially or noncommercially, provided that this License, the
+copyright notices, and the license notice saying this License applies
+to the Document are reproduced in all copies, and that you add no other
+conditions whatsoever to those of this License. You may not use
+technical measures to obstruct or control the reading or further
+copying of the copies you make or distribute. However, you may accept
+compensation in exchange for copies. If you distribute a large enough
+number of copies you must also follow the conditions in section 3.
+
+You may also lend copies, under the same conditions stated above, and
+you may publicly display copies.
+
+@item
+COPYING IN QUANTITY
+
+If you publish printed copies (or copies in media that commonly have
+printed covers) of the Document, numbering more than 100, and the
+Document's license notice requires Cover Texts, you must enclose the
+copies in covers that carry, clearly and legibly, all these Cover
+Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on
+the back cover. Both covers must also clearly and legibly identify
+you as the publisher of these copies. The front cover must present
+the full title with all words of the title equally prominent and
+visible. You may add other material on the covers in addition.
+Copying with changes limited to the covers, as long as they preserve
+the title of the Document and satisfy these conditions, can be treated
+as verbatim copying in other respects.
+
+If the required texts for either cover are too voluminous to fit
+legibly, you should put the first ones listed (as many as fit
+reasonably) on the actual cover, and continue the rest onto adjacent
+pages.
+
+If you publish or distribute Opaque copies of the Document numbering
+more than 100, you must either include a machine-readable Transparent
+copy along with each Opaque copy, or state in or with each Opaque copy
+a computer-network location from which the general network-using
+public has access to download using public-standard network protocols
+a complete Transparent copy of the Document, free of added material.
+If you use the latter option, you must take reasonably prudent steps,
+when you begin distribution of Opaque copies in quantity, to ensure
+that this Transparent copy will remain thus accessible at the stated
+location until at least one year after the last time you distribute an
+Opaque copy (directly or through your agents or retailers) of that
+edition to the public.
+
+It is requested, but not required, that you contact the authors of the
+Document well before redistributing any large number of copies, to give
+them a chance to provide you with an updated version of the Document.
+
+@item
+MODIFICATIONS
+
+You may copy and distribute a Modified Version of the Document under
+the conditions of sections 2 and 3 above, provided that you release
+the Modified Version under precisely this License, with the Modified
+Version filling the role of the Document, thus licensing distribution
+and modification of the Modified Version to whoever possesses a copy
+of it. In addition, you must do these things in the Modified Version:
+
+@enumerate A
+@item
+Use in the Title Page (and on the covers, if any) a title distinct
+from that of the Document, and from those of previous versions
+(which should, if there were any, be listed in the History section
+of the Document). You may use the same title as a previous version
+if the original publisher of that version gives permission.
+
+@item
+List on the Title Page, as authors, one or more persons or entities
+responsible for authorship of the modifications in the Modified
+Version, together with at least five of the principal authors of the
+Document (all of its principal authors, if it has fewer than five),
+unless they release you from this requirement.
+
+@item
+State on the Title page the name of the publisher of the
+Modified Version, as the publisher.
+
+@item
+Preserve all the copyright notices of the Document.
+
+@item
+Add an appropriate copyright notice for your modifications
+adjacent to the other copyright notices.
+
+@item
+Include, immediately after the copyright notices, a license notice
+giving the public permission to use the Modified Version under the
+terms of this License, in the form shown in the Addendum below.
+
+@item
+Preserve in that license notice the full lists of Invariant Sections
+and required Cover Texts given in the Document's license notice.
+
+@item
+Include an unaltered copy of this License.
+
+@item
+Preserve the section Entitled ``History'', Preserve its Title, and add
+to it an item stating at least the title, year, new authors, and
+publisher of the Modified Version as given on the Title Page. If
+there is no section Entitled ``History'' in the Document, create one
+stating the title, year, authors, and publisher of the Document as
+given on its Title Page, then add an item describing the Modified
+Version as stated in the previous sentence.
+
+@item
+Preserve the network location, if any, given in the Document for
+public access to a Transparent copy of the Document, and likewise
+the network locations given in the Document for previous versions
+it was based on. These may be placed in the ``History'' section.
+You may omit a network location for a work that was published at
+least four years before the Document itself, or if the original
+publisher of the version it refers to gives permission.
+
+@item
+For any section Entitled ``Acknowledgements'' or ``Dedications'', Preserve
+the Title of the section, and preserve in the section all the
+substance and tone of each of the contributor acknowledgements and/or
+dedications given therein.
+
+@item
+Preserve all the Invariant Sections of the Document,
+unaltered in their text and in their titles. Section numbers
+or the equivalent are not considered part of the section titles.
+
+@item
+Delete any section Entitled ``Endorsements''. Such a section
+may not be included in the Modified Version.
+
+@item
+Do not retitle any existing section to be Entitled ``Endorsements'' or
+to conflict in title with any Invariant Section.
+
+@item
+Preserve any Warranty Disclaimers.
+@end enumerate
+
+If the Modified Version includes new front-matter sections or
+appendices that qualify as Secondary Sections and contain no material
+copied from the Document, you may at your option designate some or all
+of these sections as invariant. To do this, add their titles to the
+list of Invariant Sections in the Modified Version's license notice.
+These titles must be distinct from any other section titles.
+
+You may add a section Entitled ``Endorsements'', provided it contains
+nothing but endorsements of your Modified Version by various
+parties---for example, statements of peer review or that the text has
+been approved by an organization as the authoritative definition of a
+standard.
+
+You may add a passage of up to five words as a Front-Cover Text, and a
+passage of up to 25 words as a Back-Cover Text, to the end of the list
+of Cover Texts in the Modified Version. Only one passage of
+Front-Cover Text and one of Back-Cover Text may be added by (or
+through arrangements made by) any one entity. If the Document already
+includes a cover text for the same cover, previously added by you or
+by arrangement made by the same entity you are acting on behalf of,
+you may not add another; but you may replace the old one, on explicit
+permission from the previous publisher that added the old one.
+
+The author(s) and publisher(s) of the Document do not by this License
+give permission to use their names for publicity for or to assert or
+imply endorsement of any Modified Version.
+
+@item
+COMBINING DOCUMENTS
+
+You may combine the Document with other documents released under this
+License, under the terms defined in section 4 above for modified
+versions, provided that you include in the combination all of the
+Invariant Sections of all of the original documents, unmodified, and
+list them all as Invariant Sections of your combined work in its
+license notice, and that you preserve all their Warranty Disclaimers.
+
+The combined work need only contain one copy of this License, and
+multiple identical Invariant Sections may be replaced with a single
+copy. If there are multiple Invariant Sections with the same name but
+different contents, make the title of each such section unique by
+adding at the end of it, in parentheses, the name of the original
+author or publisher of that section if known, or else a unique number.
+Make the same adjustment to the section titles in the list of
+Invariant Sections in the license notice of the combined work.
+
+In the combination, you must combine any sections Entitled ``History''
+in the various original documents, forming one section Entitled
+``History''; likewise combine any sections Entitled ``Acknowledgements'',
+and any sections Entitled ``Dedications''. You must delete all
+sections Entitled ``Endorsements.''
+
+@item
+COLLECTIONS OF DOCUMENTS
+
+You may make a collection consisting of the Document and other documents
+released under this License, and replace the individual copies of this
+License in the various documents with a single copy that is included in
+the collection, provided that you follow the rules of this License for
+verbatim copying of each of the documents in all other respects.
+
+You may extract a single document from such a collection, and distribute
+it individually under this License, provided you insert a copy of this
+License into the extracted document, and follow this License in all
+other respects regarding verbatim copying of that document.
+
+@item
+AGGREGATION WITH INDEPENDENT WORKS
+
+A compilation of the Document or its derivatives with other separate
+and independent documents or works, in or on a volume of a storage or
+distribution medium, is called an ``aggregate'' if the copyright
+resulting from the compilation is not used to limit the legal rights
+of the compilation's users beyond what the individual works permit.
+When the Document is included in an aggregate, this License does not
+apply to the other works in the aggregate which are not themselves
+derivative works of the Document.
+
+If the Cover Text requirement of section 3 is applicable to these
+copies of the Document, then if the Document is less than one half of
+the entire aggregate, the Document's Cover Texts may be placed on
+covers that bracket the Document within the aggregate, or the
+electronic equivalent of covers if the Document is in electronic form.
+Otherwise they must appear on printed covers that bracket the whole
+aggregate.
+
+@item
+TRANSLATION
+
+Translation is considered a kind of modification, so you may
+distribute translations of the Document under the terms of section 4.
+Replacing Invariant Sections with translations requires special
+permission from their copyright holders, but you may include
+translations of some or all Invariant Sections in addition to the
+original versions of these Invariant Sections. You may include a
+translation of this License, and all the license notices in the
+Document, and any Warranty Disclaimers, provided that you also include
+the original English version of this License and the original versions
+of those notices and disclaimers. In case of a disagreement between
+the translation and the original version of this License or a notice
+or disclaimer, the original version will prevail.
+
+If a section in the Document is Entitled ``Acknowledgements'',
+``Dedications'', or ``History'', the requirement (section 4) to Preserve
+its Title (section 1) will typically require changing the actual
+title.
+
+@item
+TERMINATION
+
+You may not copy, modify, sublicense, or distribute the Document
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense, or distribute it is void, and
+will automatically terminate your rights under this License.
+
+However, if you cease all violation of this License, then your license
+from a particular copyright holder is reinstated (a) provisionally,
+unless and until the copyright holder explicitly and finally
+terminates your license, and (b) permanently, if the copyright holder
+fails to notify you of the violation by some reasonable means prior to
+60 days after the cessation.
+
+Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, receipt of a copy of some or all of the same material does
+not give you any rights to use it.
+
+@item
+FUTURE REVISIONS OF THIS LICENSE
+
+The Free Software Foundation may publish new, revised versions
+of the GNU Free Documentation License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns. See
+@uref{http://www.gnu.org/copyleft/}.
+
+Each version of the License is given a distinguishing version number.
+If the Document specifies that a particular numbered version of this
+License ``or any later version'' applies to it, you have the option of
+following the terms and conditions either of that specified version or
+of any later version that has been published (not as a draft) by the
+Free Software Foundation. If the Document does not specify a version
+number of this License, you may choose any version ever published (not
+as a draft) by the Free Software Foundation. If the Document
+specifies that a proxy can decide which future versions of this
+License can be used, that proxy's public statement of acceptance of a
+version permanently authorizes you to choose that version for the
+Document.
+
+@item
+RELICENSING
+
+``Massive Multiauthor Collaboration Site'' (or ``MMC Site'') means any
+World Wide Web server that publishes copyrightable works and also
+provides prominent facilities for anybody to edit those works. A
+public wiki that anybody can edit is an example of such a server. A
+``Massive Multiauthor Collaboration'' (or ``MMC'') contained in the
+site means any set of copyrightable works thus published on the MMC
+site.
+
+``CC-BY-SA'' means the Creative Commons Attribution-Share Alike 3.0
+license published by Creative Commons Corporation, a not-for-profit
+corporation with a principal place of business in San Francisco,
+California, as well as future copyleft versions of that license
+published by that same organization.
+
+``Incorporate'' means to publish or republish a Document, in whole or
+in part, as part of another Document.
+
+An MMC is ``eligible for relicensing'' if it is licensed under this
+License, and if all works that were first published under this License
+somewhere other than this MMC, and subsequently incorporated in whole
+or in part into the MMC, (1) had no cover texts or invariant sections,
+and (2) were thus incorporated prior to November 1, 2008.
+
+The operator of an MMC Site may republish an MMC contained in the site
+under CC-BY-SA on the same site at any time before August 1, 2009,
+provided the MMC is eligible for relicensing.
+
+@end enumerate
+
+@c fakenode --- for prepinfo
+@unnumberedsec ADDENDUM: How to use this License for your documents
+
+To use this License in a document you have written, include a copy of
+the License in the document and put the following copyright and
+license notices just after the title page:
+
+@smallexample
+@group
+ Copyright (C) @var{year} @var{your name}.
+ Permission is granted to copy, distribute and/or modify this document
+ under the terms of the GNU Free Documentation License, Version 1.3
+ or any later version published by the Free Software Foundation;
+ with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
+ Texts. A copy of the license is included in the section entitled ``GNU
+ Free Documentation License''.
+@end group
+@end smallexample
+
+If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts,
+replace the ``with@dots{}Texts.'' line with this:
+
+@smallexample
+@group
+ with the Invariant Sections being @var{list their titles}, with
+ the Front-Cover Texts being @var{list}, and with the Back-Cover Texts
+ being @var{list}.
+@end group
+@end smallexample
+
+If you have Invariant Sections without Cover Texts, or some other
+combination of the three, merge those two alternatives to suit the
+situation.
+
+If your document contains nontrivial examples of program code, we
+recommend releasing these examples in parallel under your choice of
+free software license, such as the GNU General Public License,
+to permit their use in free software.
+
+@c Local Variables:
+@c ispell-local-pdict: "ispell-dict"
+@c End:
+
+
+@node Index
+@comment node-name, next, previous, up
+
+@unnumbered Index
+@printindex cp
+@bye
+
+Conventions:
+1. Functions, built-in or otherwise, do NOT have () after them.
+2. Gawk built-in vars and functions are in @code. Also program vars and
+ functions.
+3. HTTP method names are in @code.
+4. Protocols such as echo, ftp, etc are in @samp.
+5. URLs are in @url.
+6. All RFCs in the index. Put a space between `RFC' and the number.
diff --git a/doc/wordlist b/doc/wordlist
index 37e1552c..d0464dc9 100644
--- a/doc/wordlist
+++ b/doc/wordlist
@@ -390,6 +390,7 @@ POS
POSIX
POSIXLY
PP
+PQgetvalue
PREC
PROCINFO
PVERSION
@@ -421,6 +422,7 @@ Quicksort
QuikTrim
RANLIB
README
+REGEX
RELICENSING
REVOUT
RLENGTH
@@ -436,6 +438,7 @@ Readfile
Readline
Redirections
Redis
+Regex
Regexp
Regexps
Reimplementing
@@ -483,6 +486,7 @@ Solaris
Stallman
Stepan
Stewartson's
+Strnum
Strtonum
Subarrays
Sublicensing
@@ -879,6 +883,7 @@ dir
dircategory
direntry
distributable
+div
djgpp
dl
dlload
@@ -969,6 +974,7 @@ fdata
fe
ferror
fffffffffff
+fffffffffffd
fflush
fi
fieldlist
@@ -988,6 +994,7 @@ filetype
filll
finalout
findpat
+finfo
finx
firstname
flac
@@ -1700,6 +1707,7 @@ strerror
strftime
strlen
strnum
+strnums
strtod
strtonum
struct
@@ -1902,6 +1910,7 @@ xdeadBEEF
xdigit
xfcc
xfcd
+xffffffffffffffd
xgawk
xgettext
xnrkb