aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2022-03-03 14:21:20 +0200
committerArnold D. Robbins <arnold@skeeve.com>2022-03-03 14:21:20 +0200
commita0eb03daf877ecdc38cd3f2d5cd6620fc3a406d3 (patch)
treec81c503cb8490266520b7f9dd3442af6a09a278d
parentfd30fd05540a3a26a525ae9ad519a096c355b6b2 (diff)
parente38662457d3f502d246921e6084db2c765b74058 (diff)
downloadegawk-a0eb03daf877ecdc38cd3f2d5cd6620fc3a406d3.tar.gz
egawk-a0eb03daf877ecdc38cd3f2d5cd6620fc3a406d3.tar.bz2
egawk-a0eb03daf877ecdc38cd3f2d5cd6620fc3a406d3.zip
Merge branch 'master' into feature/docit
-rw-r--r--ChangeLog4
-rw-r--r--NEWS6
-rw-r--r--doc/ChangeLog6
-rw-r--r--doc/gawk.12226
-rw-r--r--doc/wordlist22
-rw-r--r--doc/wordlist3106
6 files changed, 246 insertions, 2124 deletions
diff --git a/ChangeLog b/ChangeLog
index 09700d81..7329943a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2022-03-03 Arnold D. Robbins <arnold@skeeve.com>
+
+ * NEWS: Updated.
+
2022-02-27 Arnold D. Robbins <arnold@skeeve.com>
* main.c (usage): Update the message for the gawkbug program.
diff --git a/NEWS b/NEWS
index e7af9926..5946cea6 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,4 @@
- Copyright (C) 2019, 2020, 2021 Free Software Foundation, Inc.
+ Copyright (C) 2019, 2020, 2021, 2022 Free Software Foundation, Inc.
Copying and distribution of this file, with or without modification,
are permitted in any medium without royalty provided the copyright
@@ -33,6 +33,10 @@ for saving / restoring all of gawk's variables and arrays.
7. The new `gawkbug' script should be used for reporting bugs.
+8. The manual page (doc/gawk.1) has been considerably reduced in size.
+Wherever possible, details were replaced with references to the online
+copy of the manual.
+
Changes from 5.1.1 to 5.1.2
---------------------------
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 58645fe4..bddb738a 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,9 @@
+2022-03-03 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawk.1: Chopped down in size by about 40%.
+ * wordlist3: Updated for new man page.
+ * wordlist: Updated since it needed it.
+
2022-03-01 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Typo fix. Thanks to Antonio Columbo for pointing
diff --git a/doc/gawk.1 b/doc/gawk.1
index b22a9485..8543c8b5 100644
--- a/doc/gawk.1
+++ b/doc/gawk.1
@@ -13,7 +13,7 @@
. if \w'\(rq' .ds rq "\(rq
. \}
.\}
-.TH GAWK 1 "Jul 05 2021" "Free Software Foundation" "Utility Commands"
+.TH GAWK 1 "March 1 2022" "Free Software Foundation" "Utility Commands"
.SH NAME
gawk \- pattern scanning and processing language
.SH SYNOPSIS
@@ -58,37 +58,12 @@ available in the
and
.B ARGV
pre-defined \*(AK variables.
-.PP
-When
-.I gawk
-is invoked with the
-.B \-\^\-profile
-option, it starts gathering profiling statistics
-from the execution of the program.
-.I Gawk
-runs more slowly in this mode, and automatically produces an execution
-profile in the file
-.B awkprof.out
-when done.
-See the
-.B \-\^\-profile
-option, below.
-.PP
-.I Gawk
-also has an integrated debugger. An interactive debugging session can
-be started by supplying the
-.B \-\^\-debug
-option to the command line. In this mode of execution,
-.I gawk
-loads the
-AWK source code and then prompts for debugging commands.
-.I Gawk
-can only debug AWK program source provided with the
-.B \-f
-and
-.B \-\^\-include
-options.
-The debugger is documented in \*(EP.
+.SH PREFACE
+This manual page is intentionally as terse as possible.
+Full details are provided in \*(EP, and you should look
+there for the full story on any specific feature.
+Where possible, links to the online version of the manual
+are provided.
.SH OPTION FORMAT
.I Gawk
options may be either traditional \*(PX-style one letter options,
@@ -119,27 +94,19 @@ Standard options are listed first, followed by options for
.I gawk
extensions, listed alphabetically by short option.
.TP
-.PD 0
-.BI \-f " program-file"
-.TP
-.PD
+.BI \-f " program-file\fR,\fP "\c
.BI \-\^\-file " program-file"
Read the \*(AK program source from the file
.IR program-file ,
instead of from the first command line argument.
Multiple
.B \-f
-(or
-.BR \-\^\-file )
options may be used.
Files read with
.B \-f
are treated as if they begin with an implicit \fB@namespace "awk"\fR statement.
.TP
-.PD 0
-.BI \-F " fs"
-.TP
-.PD
+.BI \-F " fs\fR, \fP"\c
.BI \-\^\-field-separator " fs"
Use
.I fs
@@ -148,11 +115,7 @@ for the input field separator (the value of the
predefined
variable).
.TP
-.PD 0
-\fB\-v\fI var\fB\^=\^\fIval\fR
-.TP
-.PD
-\fB\-\^\-assign \fIvar\fB\^=\^\fIval\fR
+\fB\-v\fI var\fB\^=\^\fIval\fR, \fB\-\^\-assign \fIvar\fB\^=\^\fIval\fR
Assign the value
.I val
to the variable
@@ -162,23 +125,14 @@ Such variable values are available to the
.B BEGIN
rule of an \*(AK program.
.TP
-.PD 0
-.B \-b
-.TP
-.PD
+.BR \-b ", "\c
.B \-\^\-characters\-as\-bytes
-Treat all input data as single-byte characters. In other words,
-don't pay any attention to the locale information when attempting to
-process strings as multibyte characters.
+Treat all input data as single-byte characters.
The
.B \-\^\-posix
option overrides this one.
-.bp
.TP
-.PD 0
-.B \-c
-.TP
-.PD
+.BR \-c ", "\c
.B \-\^\-traditional
Run in
.I compatibility
@@ -187,52 +141,20 @@ mode. In compatibility mode,
behaves identically to Brian Kernighan's
.IR awk ;
none of the \*(GN-specific extensions are recognized.
-.\" The use of
-.\" .B \-\^\-traditional
-.\" is preferred over the other forms of this option.
-See
-.BR "GNU EXTENSIONS" ,
-below, for more information.
-.TP
-.PD 0
-.B \-C
.TP
-.PD
+.BR \-C ", "\c
.B \-\^\-copyright
Print the short version of the \*(GN copyright information message on
the standard output and exit successfully.
.TP
-.PD 0
-\fB\-d\fR[\fIfile\fR]
-.TP
-.PD
-\fB\-\^\-dump-variables\fR[\fB=\fIfile\fR]
+\fB\-d\fR[\fIfile\fR], \fB\-\^\-dump-variables\fR[\fB=\fIfile\fR]
Print a sorted list of global variables, their types and final values to
.IR file .
-If no
-.I file
-is provided,
-.I gawk
-uses a file named
+The default file is
.B awkvars.out
in the current directory.
-.sp .5
-Having a list of all the global variables is a good way to look for
-typographical errors in your programs.
-You would also use this option if you have a large program with a lot of
-functions, and you want to be sure that your functions don't
-inadvertently use global variables that you meant to be local.
-(This is a particularly easy mistake to make with simple variable
-names like
-.BR i ,
-.BR j ,
-and so on.)
.TP
-.PD 0
-\fB\-D\fR[\fIfile\fR]
-.TP
-.PD
-\fB\-\^\-debug\fR[\fB=\fIfile\fR]
+\fB\-D\fR[\fIfile\fR], \fB\-\^\-debug\fR[\fB=\fIfile\fR]
Enable debugging of \*(AK programs.
By default, the debugger reads commands interactively from the keyboard
(standard input).
@@ -240,30 +162,30 @@ The optional
.I file
argument specifies a file with a list
of commands for the debugger to execute non-interactively.
+.sp .5
+In this mode of execution,
+.I gawk
+loads the
+AWK source code and then prompts for debugging commands.
+.I Gawk
+can only debug AWK program source provided with the
+.B \-f
+and
+.B \-\^\-include
+options.
+The debugger is documented in \*(EP; see
+.IR https://www.gnu.org/software/gawk/manual/html_node/Debugger.html#Debugger .
.TP
-.PD 0
-.BI "\-e " program-text
-.TP
-.PD
+.BI \-e " program-text\fR, \fP"\c
.BI \-\^\-source " program-text"
Use
.I program-text
as \*(AK program source code.
-This option allows the easy intermixing of library functions (used via the
-.B \-f
-and
-.B \-\^\-include
-options) with source code entered on the command line.
-It is intended primarily for medium to large \*(AK programs used
-in shell scripts.
Each argument supplied via
.B \-e
is treated as if it begins with an implicit \fB@namespace "awk"\fR statement.
.TP
-.PD 0
-.BI "\-E " file
-.TP
-.PD
+.BI \-E " file"\fR, "\c
.BI \-\^\-exec " file"
Similar to
.BR \-f ,
@@ -275,37 +197,23 @@ passing in options or source code (!) on the command line
from a URL.
This option disables command-line variable assignments.
.TP
-.PD 0
-.B \-g
-.TP
-.PD
+.BR \-g ", "\c
.B \-\^\-gen\-pot
Scan and parse the \*(AK program, and generate a \*(GN
.B \&.pot
(Portable Object Template)
format file on standard output with entries for all localizable
strings in the program. The program itself is not executed.
-See the \*(GN
-.I gettext
-distribution for more information on
-.B \&.pot
-files.
-.TP
-.PD 0
-.B \-h
.TP
-.PD
+.BR \-h ", "\c
.B \-\^\-help
Print a relatively short summary of the available options on
the standard output.
-(Per the
+Per the
.IR "GNU Coding Standards" ,
-these options cause an immediate, successful exit.)
-.TP
-.PD 0
-.BI "\-i " include-file
+these options cause an immediate, successful exit.
.TP
-.PD
+.BI "\-i " include-file"\fR, "\c
.BI \-\^\-include " include-file"
Load an awk source library.
This searches for the library using the
@@ -320,10 +228,7 @@ Files read with
.B \-\^\-include
are treated as if they begin with an implicit \fB@namespace "awk"\fR statement.
.TP
-.PD 0
-.BI \-I
-.TP
-.PD
+.BR \-I ", "\c
.B \-\^\-trace
Print the internal byte code names as they are executed when running
the program. The trace is printed to standard error. Each ``op code''
@@ -331,10 +236,7 @@ is preceded by a
.B +
sign in the output.
.TP
-.PD 0
-.BI "\-l " lib
-.TP
-.PD
+.BI \-l " lib\fR, "\c
.BI \-\^\-load " lib"
Load a
.I gawk
@@ -347,33 +249,15 @@ be made after appending the default shared library suffix for the platform.
The library initialization routine is expected to be named
.BR dl_load() .
.TP
-.PD 0
-.BR "\-L " [ \fIvalue\fR ]
-.TP
-.PD
-.BR \-\^\-lint [ =\fIvalue\fR ]
+\fB\-L \fR[\fIvalue\fR], \fB\-\^\-lint\fR[\fB=\fIvalue\fR]
Provide warnings about constructs that are
dubious or non-portable to other \*(AK implementations.
-With an optional argument of
-.BR fatal ,
-lint warnings become fatal errors.
-This may be drastic, but its use will certainly encourage the
-development of cleaner \*(AK programs.
-With an optional argument of
-.BR invalid ,
-only warnings about things that are
-actually invalid are issued.
-(This is not fully implemented yet.)
-With an optional argument of
-.BR no-ext ,
-warnings about
-.I gawk
-extensions are disabled.
-.TP
-.PD 0
-.B \-M
+See
+.I https://www.gnu.org/software/gawk/manual/html_node/Options.html#Options
+for the list of possible values for
+.IR value .
.TP
-.PD
+.BR \-M ", "\c
.B \-\^\-bignum
Force arbitrary precision arithmetic on numbers. This option has
no effect if
@@ -383,158 +267,76 @@ is not compiled to use the GNU MPFR and GMP libraries.
.I gawk
issues a warning.)
.TP
-.PD 0
-.B \-n
-.TP
-.PD
+.BR \-n ", "\c
.B \-\^\-non\-decimal\-data
Recognize octal and hexadecimal values in input data.
.I "Use this option with great caution!"
.TP
-.PD 0
-.B \-N
-.TP
-.PD
+.BR \-N ", "\c
.B \-\^\-use\-lc\-numeric
Force
.I gawk
to use the locale's decimal point character when parsing input data.
-Although the POSIX standard requires this behavior, and
-.I gawk
-does so when
-.B \-\^\-posix
-is in effect, the default is to follow traditional behavior and use a
-period as the decimal point, even in locales where the period is not the
-decimal point character. This option overrides the default behavior,
-without the full draconian strictness of the
-.B \-\^\-posix
-option.
.ig
.\" This option is left undocumented, on purpose.
.TP
-.PD 0
-.B "\-W nostalgia"
-.TP
-.PD
+.BR "\-W nostalgia" ", "\c
.B \-\^\-nostalgia
Provide a moment of nostalgia for long time
.I awk
users.
..
.TP
-.PD 0
-\fB\-o\fR[\fIfile\fR]
-.TP
-.PD
-\fB\-\^\-pretty-print\fR[\fB=\fIfile\fR]
+\fB\-o\fR[\fIfile\fR], \fB\-\^\-pretty-print\fR[\fB=\fIfile\fR]
Output a pretty printed version of the program to
.IR file .
-If no
-.I file
-is provided,
-.I gawk
-uses a file named
+The default file is
.B awkprof.out
in the current directory.
This option implies
.BR \-\^\-no\-optimize .
.TP
-.PD 0
-.B \-O
-.TP
-.PD
+.BR \-O ", "\c
.B \-\^\-optimize
Enable
.IR gawk 's
default optimizations upon the internal representation of the program.
-Currently, this just includes simple constant folding.
This option is on by default.
.TP
-.PD 0
-\fB\-p\fR[\fIprof-file\fR]
-.TP
-.PD
-\fB\-\^\-profile\fR[\fB=\fIprof-file\fR]
+\fB\-p\fR[\fIprof-file\fR], \fB\-\^\-profile\fR[\fB=\fIprof-file\fR]
Start a profiling session, and send the profiling data to
.IR prof-file .
The default is
-.BR awkprof.out .
+.B awkprof.out
+in the current directory.
The profile contains execution counts of each statement in the program
in the left margin and function call counts for each user-defined function.
+.I Gawk
+runs more slowly in this mode.
This option implies
.BR \-\^\-no\-optimize .
.TP
-.PD 0
-.B \-P
-.TP
-.PD
+.BR \-P ", "\c
.B \-\^\-posix
This turns on
.I compatibility
-mode, with the following additional restrictions:
-.RS
-.TP "\w'\(bu'u+1n"
-\(bu
-.B \ex
-escape sequences are not recognized.
-.TP
-\(bu
-You cannot continue lines after
-.B ?
-and
-.BR : .
-.TP
-\(bu
-The synonym
-.B func
-for the keyword
-.B function
-is not recognized.
-.TP
-\(bu
-The operators
-.B **
-and
-.B **=
-cannot be used in place of
-.B ^
-and
-.BR ^= .
-.RE
-.TP
-.PD 0
-.B \-r
+mode, and disables a number of common extensions.
.TP
-.PD
+.BR \-r ", "\c
.B \-\^\-re\-interval
Enable the use of
.I "interval expressions"
-in regular expression matching
-(see
-.BR "Regular Expressions" ,
-below).
-Interval expressions were not traditionally available in the
-\*(AK language. The \*(PX standard added them, to make
-.I awk
-and
-.I egrep
-consistent with each other.
-They are enabled by default, but this option remains for use together with
-.BR \-\^\-traditional .
-.TP
-.PD 0
-.B \-s
+in regular expression matching.
+Interval expressions
+are enabled by default, but this option remains for backwards compatibility.
.TP
-.PD
+.BR \-s ", "\c
.B \-\^\-no\-optimize
Disable
.IR gawk 's
default optimizations upon the internal representation of the program.
.TP
-.PD 0
-.B \-S
-.TP
-.PD
+.BR \-S ", "\c
.B \-\^\-sandbox
Run
.I gawk
@@ -546,51 +348,33 @@ output redirection with
.BR print " and " printf ,
and loading dynamic extensions.
Command execution (through pipelines) is also disabled.
-This effectively blocks a script from accessing local resources,
-except for the files specified on the command line.
-.TP
-.PD 0
-.B \-t
.TP
-.PD
+.BR \-t ", "\c
.B \-\^\-lint\-old
Provide warnings about constructs that are
not portable to the original version of \*(UX
.IR awk .
.TP
-.PD 0
-.B \-V
-.TP
-.PD
+.BR \-V ", "\c
.B \-\^\-version
Print version information for this particular copy of
.I gawk
on the standard output.
-This is useful mainly for knowing if the current copy of
-.I gawk
-on your system
-is up to date with respect to whatever the Free Software Foundation
-is distributing.
-This is also useful when reporting bugs.
-(Per the
+This is useful when reporting bugs.
+Per the
.IR "GNU Coding Standards" ,
-these options cause an immediate, successful exit.)
+these options cause an immediate, successful exit.
.TP
.B \-\^\-
Signal the end of options. This is useful to allow further arguments to the
\*(AK program itself to start with a \*(lq\-\*(rq.
-This provides consistency with the argument parsing convention used
-by most other \*(PX programs.
.PP
In compatibility mode,
any other options are flagged as invalid, but are otherwise ignored.
In normal operation, as long as program text has been supplied, unknown
options are passed on to the \*(AK program in the
.B ARGV
-array for processing. This is particularly useful for running \*(AK
-programs via the
-.B #!
-executable interpreter mechanism.
+array for processing.
.PP
For \*(PX compatibility, the
.B \-W
@@ -629,15 +413,12 @@ options may be used multiple times on the command line.
reads the program text as if all the
.IR program-file s
and command line source texts
-had been concatenated together. This is useful for building libraries
-of \*(AK functions, without having to include them in each new \*(AK
-program that uses them. It also provides the ability to mix library
-functions with command line programs.
+had been concatenated together.
.PP
In addition, lines beginning with
.B @include
-may be used to include other source files into your program,
-making library use even easier. This is equivalent
+may be used to include other source files into your program.
+This is equivalent
to using the
.B \-\^\-include
option.
@@ -708,11 +489,6 @@ will be assigned the value
(This happens after any
.B BEGIN
rule(s) have been run.)
-Command line variable assignment
-is most useful for dynamically assigning values to the variables
-\*(AK uses to control how input is broken into fields and records.
-It is also useful for controlling state if multiple passes are needed over
-a single data file.
.PP
If the value of a particular element of
.B ARGV
@@ -731,6 +507,7 @@ before processing the contents of the file. Similarly,
executes
the code associated with
.B ENDFILE
+rules
after processing the file.
.PP
For each record in the input,
@@ -758,7 +535,7 @@ of
.I awk
treat a directory on the command line as a fatal error.
.PP
-Starting with version 4.0 of
+For
.IR gawk ,
a directory on the command line
produces a warning, but is otherwise skipped. If either of the
@@ -779,36 +556,18 @@ Additionally,
allows variables to have regular-expression type.
\*(AK also has one dimensional
arrays; arrays with multiple dimensions may be simulated.
-.I Gawk
-provides true arrays of arrays; see
-.BR Arrays ,
-below.
+However,
+.I gawk
+provides true arrays of arrays.
Several pre-defined variables are set as a program
runs; these are described as needed and summarized below.
.SS Records
Normally, records are separated by newline characters. You can control how
records are separated by assigning values to the built-in variable
.BR RS .
-If
-.B RS
-is any single character, that character separates records.
-Otherwise,
-.B RS
-is a regular expression. Text in the input that matches this
-regular expression separates the record.
-However, in compatibility mode,
-only the first character of its string
-value is used for separating records.
-If
-.B RS
-is set to the null string, then records are separated by
-empty lines.
-When
-.B RS
-is set to the null string, the newline character always acts as
-a field separator, in addition to whatever value
-.B FS
-may have.
+See
+.I https://www.gnu.org/software/gawk/manual/html_node/Records.html
+for the details.
.SS Fields
As each input record is read,
.I gawk
@@ -817,59 +576,13 @@ splits the record into
using the value of the
.B FS
variable as the field separator.
-If
-.B FS
-is a single character, fields are separated by that character.
-If
-.B FS
-is the null string, then each individual character becomes a
-separate field.
-Otherwise,
-.B FS
-is expected to be a full regular expression.
-In the special case that
-.B FS
-is a single space, fields are separated
-by runs of spaces and/or tabs and/or newlines.
-.BR NOTE :
-The value of
-.B IGNORECASE
-(see below) also affects how fields are split when
-.B FS
-is a regular expression, and how records are separated when
-.B RS
-is a regular expression.
-.PP
-If the
+Additionally,
.B FIELDWIDTHS
-variable is set to a space-separated list of numbers, each field is
-expected to have fixed width, and
-.I gawk
-splits up the record using the specified widths.
-Each field width may optionally be preceded by a colon-separated
-value specifying the number of characters to skip before the field starts.
-The value of
-.B FS
-is ignored.
-Assigning a new value to
-.B FS
-or
-.B FPAT
-overrides the use of
-.BR FIELDWIDTHS .
-.PP
-Similarly, if the
+and
.B FPAT
-variable is set to a string representing a regular expression,
-each field is made up of text that matches that regular expression. In
-this case, the regular expression describes the fields themselves,
-instead of the text that separates the fields.
-Assigning a new value to
-.B FS
-or
-.B FIELDWIDTHS
-overrides the use of
-.BR FPAT .
+may be used to control input field splitting.
+See the details, starting at
+.IR https://www.gnu.org/software/gawk/manual/html_node/Fields.html .
.PP
Each field in the input record may be referenced by its position:
.BR $1 ,
@@ -878,17 +591,6 @@ and so on.
.B $0
is the whole record,
including leading and trailing whitespace.
-Fields need not be referenced by constants:
-.RS
-.PP
-.ft B
-n = 5
-.br
-print $n
-.ft R
-.RE
-.PP
-prints the fifth field in the input record.
.PP
The variable
.B NF
@@ -924,12 +626,12 @@ causes the record to be resplit, creating new
values for the fields.
.SS Built-in Variables
.IR Gawk\^ "'s"
-built-in variables are:
+built-in variables are listed below.
+This list is purposely terse. For details, see
+.IR https://www.gnu.org/software/gawk/manual/html_node/Built_002din-Variables .
.TP "\w'\fBFIELDWIDTHS\fR'u+1n"
.B ARGC
-The number of command line arguments (does not include options to
-.IR gawk ,
-or the program source).
+The number of command line arguments.
.TP
.B ARGIND
The index in
@@ -941,19 +643,12 @@ Array of command line arguments. The array is indexed from
0 to
.B ARGC
\- 1.
-Dynamically changing the contents of
-.B ARGV
-can control the files used for data.
.TP
.B BINMODE
On non-POSIX systems, specifies use of \*(lqbinary\*(rq mode for all file I/O.
-Numeric values of 1, 2, or 3, specify that input files, output files, or
-all files, respectively, should use binary I/O.
-String values of \fB"r"\fR, or \fB"w"\fR specify that input files, or output files,
-respectively, should use binary I/O.
-String values of \fB"rw"\fR or \fB"wr"\fR specify that all files
-should use binary I/O.
-Any other string value is treated as \fB"rw"\fR, but generates a warning message.
+See
+.I https://www.gnu.org/software/gawk/manual/html_node/PC-Using.html
+for the details.
.TP
.B CONVFMT
The conversion format for numbers, \fB"%.6g"\fR, by default.
@@ -961,19 +656,7 @@ The conversion format for numbers, \fB"%.6g"\fR, by default.
.B ENVIRON
An array containing the values of the current environment.
The array is indexed by the environment variables, each element being
-the value of that variable (e.g., \fBENVIRON["HOME"]\fP might be
-\fB"/home/arnold"\fR).
-.sp
-In POSIX mode,
-changing this array does not affect the environment seen by programs which
-.I gawk
-spawns via redirection or the
-.B system()
-function.
-Otherwise,
-.I gawk
-updates its real environment so that programs it spawns see
-the changes.
+the value of that variable.
.TP
.B ERRNO
If a system error occurs either doing a redirection for
@@ -987,15 +670,6 @@ then
is set to
a string describing the error.
The value is subject to translation in non-English locales.
-If the string in
-.B ERRNO
-corresponds to a system error in the
-.IR errno (3)
-variable, then the numeric value can be found in
-.B PROCINFO["errno"].
-For non-system errors,
-.B PROCINFO["errno"]
-will be zero.
.TP
.B FIELDWIDTHS
A whitespace-separated list of field widths. When set,
@@ -1006,9 +680,6 @@ value of the
variable as the field separator.
Each field width may optionally be preceded by a colon-separated
value specifying the number of characters to skip before the field starts.
-See
-.BR Fields ,
-above.
.TP
.B FILENAME
The name of the current input file.
@@ -1036,14 +707,12 @@ regular expression, instead of using the
value of
.B FS
as the field separator.
-See
-.BR Fields ,
-above.
.TP
.B FS
-The input field separator, a space by default. See
-.BR Fields ,
-above.
+The input field separator, a space by default.
+See
+.I https://www.gnu.org/software/gawk/manual/html_node/Field-Separators.html
+for the details.
.TP
.B FUNCTAB
An array whose indices and corresponding values
@@ -1058,66 +727,15 @@ array.
.TP
.B IGNORECASE
Controls the case-sensitivity of all regular expression
-and string operations. If
-.B IGNORECASE
-has a non-zero value, then string comparisons and
-pattern matching in rules,
-field splitting with
-.B FS
-and
-.BR FPAT ,
-record separating with
-.BR RS ,
-regular expression
-matching with
-.B ~
-and
-.BR !~ ,
-and the
-.BR gensub() ,
-.BR gsub() ,
-.BR index() ,
-.BR match() ,
-.BR patsplit() ,
-.BR split() ,
-and
-.B sub()
-built-in functions all ignore case when doing regular expression
-operations.
-.BR NOTE :
-Array subscripting is
-.I not
-affected.
-However, the
-.B asort()
-and
-.B asorti()
-functions are affected.
-.sp .5
-Thus, if
-.B IGNORECASE
-is not equal to zero,
-.B /aB/
-matches all of the strings \fB"ab"\fP, \fB"aB"\fP, \fB"Ab"\fP,
-and \fB"AB"\fP.
-As with all \*(AK variables, the initial value of
-.B IGNORECASE
-is zero, so all regular expression and string
-operations are normally case-sensitive.
+and string operations.
+See
+.I https://www.gnu.org/software/gawk/manual/html_node/Case_002dsensitivity.html
+for details.
.TP
.B LINT
Provides dynamic control of the
.B \-\^\-lint
option from within an \*(AK program.
-When true,
-.I gawk
-prints lint warnings. When false, it does not.
-The values allowed for the
-.B \-\^\-lint
-option may also be assigned to
-.BR LINT ,
-with the same effects.
-Any other true value just prints warnings.
.TP
.B NF
The number of fields in the current input record.
@@ -1141,283 +759,16 @@ numbers, 53 by default.
.B PROCINFO
The elements of this array provide access to information about the
running \*(AK program.
-On some systems,
-there may be elements in the array, \fB"group1"\fP through
-\fB"group\fIn\fB"\fR for some
-.IR n ,
-which is the number of supplementary groups that the process has.
-Use the
-.B in
-operator to test for these elements.
-The following elements are guaranteed to be available:
-.RS
-.TP \w'\fBPROCINFO["strftime"]\fR'u+1n
-\fBPROCINFO["argv"]\fP
-The command line arguments as received by
-.I gawk
-at the C-language level.
-The subscripts start from zero.
-.TP
-\fBPROCINFO["egid"]\fP
-The value of the
-.IR getegid (2)
-system call.
-.TP
-\fBPROCINFO["errno"]\fP
-The value of
-.IR errno (3)
-when
-.B ERRNO
-is set to the associated error message.
-.TP
-\fBPROCINFO["euid"]\fP
-The value of the
-.IR geteuid (2)
-system call.
-.TP
-\fBPROCINFO["FS"]\fP
-\fB"FS"\fP if field splitting with
-.B FS
-is in effect,
-\fB"FPAT"\fP if field splitting with
-.B FPAT
-is in effect,
-\fB"FIELDWIDTHS"\fP if field splitting with
-.B FIELDWIDTHS
-is in effect,
-or \fB"API"\fP if API input parser field splitting
-is in effect.
-.TP
-\fBPROCINFO["gid"]\fP
-The value of the
-.IR getgid (2)
-system call.
-.TP
-\fBPROCINFO["identifiers"]\fP
-A subarray, indexed by the names of all identifiers used in the
-text of the AWK program.
-The values indicate what
-.I gawk
-knows about the identifiers after it has finished parsing the program; they are
-.I not
-updated while the program runs.
-For each identifier, the value of the element is one of the following:
-.RS
-.TP \w'\fB"extension"\fR'u+1n
-\fB"array"\fR
-The identifier is an array.
-.TP
-\fB"builtin"\fR
-The identifier is a built-in function.
-.TP
-\fB"extension"\fR
-The identifier is an extension function loaded via
-.B @load
-or
-.BR \-\^\-load .
-.TP
-\fB"scalar"\fR
-The identifier is a scalar.
-.TP
-\fB"untyped"\fR
-The identifier is untyped (could be used as a scalar or array,
-.I gawk
-doesn't know yet).
-.TP
-\fB"user"\fR
-The identifier is a user-defined function.
-.RE
-.TP
-\fBPROCINFO["pgrpid"]\fP
-The value of the
-.IR getpgrp (2)
-system call.
-.TP
-\fBPROCINFO["pid"]\fP
-The value of the
-.IR getpid (2)
-system call.
-.TP
-\fBPROCINFO["platform"]\fP
-A string indicating the platform for which
-.I gawk
-was compiled. It is one of:
-.RS
-.TP
-\fB"djgpp"\fR, \fB"mingw"\fR
-Microsoft Windows, using either DJGPP, or MinGW, respectively.
-.TP
-\fB"os2"\fR
-OS/2.
-.TP
-\fB"posix"\fR
-GNU/Linux, Cygwin, Mac OS X, and legacy Unix systems.
-.TP
-\fB"vms"\fR
-OpenVMS or Vax/VMS.
-.RE
-.TP
-\fBPROCINFO["ppid"]\fP
-The value of the
-.IR getppid (2)
-system call.
-.TP
-\fBPROCINFO["strftime"]\fP
-The default time format string for
-.BR strftime() .
-Changing its value affects how
-.B strftime()
-formats time values when called with no arguments.
-.TP
-\fBPROCINFO["uid"]\fP
-The value of the
-.IR getuid (2)
-system call.
-.TP
-\fBPROCINFO["version"]\fP
-The version of
-.IR gawk .
-.PP
-The following elements are present if loading dynamic
-extensions is available:
-.TP
-\fBPROCINFO["api_major"]\fP
-The major version of the extension API.
-.TP
-\fBPROCINFO["api_minor"]\fP
-The minor version of the extension API.
-.PP
-The following elements are available if MPFR support is
-compiled into
-.IR gawk\^ :
-.TP
-\fBPROCINFO["gmp_version"]\fP
-The version of the GNU GMP library used for arbitrary precision
-number support in
-.IR gawk .
-.TP
-\fBPROCINFO["mpfr_version"]\fP
-The version of the GNU MPFR library used for arbitrary precision
-number support in
-.IR gawk .
-.TP
-\fBPROCINFO["prec_max"]\fP
-The maximum precision supported by the GNU MPFR library for
-arbitrary precision floating-point numbers.
-.TP
-\fBPROCINFO["prec_min"]\fP
-The minimum precision allowed by the GNU MPFR library for
-arbitrary precision floating-point numbers.
-.PP
-The following elements may set by a program to
-change
-.IR gawk 's
-behavior:
-.TP
-\fBPROCINFO["NONFATAL"]\fR
-If this exists, then I/O errors for all redirections become nonfatal.
-.TP
-\fBPROCINFO["\fIname\fB", "NONFATAL"]\fR
-Make I/O errors for
-.I name
-be nonfatal.
-.TP
-\fBPROCINFO["\fIcommand\fB", "pty"]\fR
-Use a pseudo-tty for two-way communication with
-.I command
-instead of setting up two one-way pipes.
-.TP
-\fBPROCINFO["\fIinput\fB", "READ_TIMEOUT"]\fR
-The timeout in milliseconds for reading data from
-.IR input ,
-where
-.I input
-is a redirection string or a filename. A value of zero or
-less than zero means no timeout.
-.TP
-\fBPROCINFO["\fIinput\^\fB", "RETRY"]\fR
-If an I/O error that may be retried occurs when reading data from
-.IR input ,
-and this array entry exists, then
-.B getline
-returns \-2 instead of following the default behavior of returning \-1
-and configuring
-.I input
-to return no further data.
-An I/O error that may be retried is one where
-.IR errno (3)
-has the value EAGAIN, EWOULDBLOCK, EINTR, or ETIMEDOUT.
-This may be useful in conjunction with
-\fBPROCINFO["\fIinput\^\fB", "READ_TIMEOUT"]\fR
-or in situations where a file descriptor has been configured to behave in a
-non-blocking fashion.
-.TP
-\fBPROCINFO["sorted_in"]\fP
-If this element exists in
-.BR PROCINFO ,
-then its value controls the order in which array elements
-are traversed in
-.B for
-loops.
-Supported values are
-\fB"@ind_str_asc"\fR,
-\fB"@ind_num_asc"\fR,
-\fB"@val_type_asc"\fR,
-\fB"@val_str_asc"\fR,
-\fB"@val_num_asc"\fR,
-\fB"@ind_str_desc"\fR,
-\fB"@ind_num_desc"\fR,
-\fB"@val_type_desc"\fR,
-\fB"@val_str_desc"\fR,
-\fB"@val_num_desc"\fR,
-and
-\fB"@unsorted"\fR.
-The value can also be the name (as a
-.IR string )
-of any comparison function defined
-as follows:
-.sp
-.in +5m
-\fBfunction cmp_func(i1, v1, i2, v2)\fR
-.in -5m
-.sp
-where
-.I i1
-and
-.I i2
-are the indices, and
-.I v1
-and
-.I v2
-are the
-corresponding values of the two elements being compared.
-It should return a number less than, equal to, or greater than 0,
-depending on how the elements of the array are to be ordered.
-.RE
+See
+.I https://www.gnu.org/software/gawk/manual/html_node/Auto_002dset
+for the details.
.TP
.B ROUNDMODE
The rounding mode to use for arbitrary precision arithmetic on
numbers, by default \fB"N"\fR (IEEE-754 roundTiesToEven mode).
-The accepted values are:
-.RS
-.TP
-\fB"A"\fR or \fB"a"\fR
-for rounding away from zero.
-These are only available if your version of
-the GNU MPFR library supports rounding away from zero.
-.TP "\w'\fB\(dqD\(dq\fR or \fB\(dqd\(dq\fR'u+1n"
-\fB"D"\fR or \fB"d"\fR
-for roundTowardNegative.
-.TP
-\fB"N"\fR or \fB"n"\fR
-for roundTiesToEven.
-.TP
-\fB"U"\fR or \fB"u"\fR
-for roundTowardPositive.
-.TP
-\fB"Z"\fR or \fB"z"\fR
-for roundTowardZero.
-.RE
+See
+.I https://www.gnu.org/software/gawk/manual/html_node/Setting-the-rounding-mode
+for the details.
.TP
.B RS
The input record separator, by default a newline.
@@ -1435,7 +786,6 @@ specified by
The index of the first character matched by
.BR match() ;
0 if no match.
-(This implies that character indices start at one.)
.TP
.B RLENGTH
The length of the string matched by
@@ -1448,24 +798,7 @@ elements, by default \fB"\e034"\fR.
.TP
.B SYMTAB
An array whose indices are the names of all currently defined
-global variables and arrays in the program. The array may be used
-for indirect access to read or write the value of a variable:
-.sp
-.ft B
-.nf
-.in +5m
-foo = 5
-SYMTAB["foo"] = 4
-print foo # prints 4
-.fi
-.ft R
-.in -5m
-.sp
-The
-.B typeof()
-function may be used to test if an element in
-.B SYMTAB
-is an array.
+global variables and arrays in the program.
You may not use the
.B delete
statement with the
@@ -1541,29 +874,9 @@ just by specifying the array name without a subscript.
.I gawk
supports true multidimensional arrays. It does not require that
such arrays be ``rectangular'' as in C or C++.
-For example:
-.sp
-.RS
-.ft B
-.nf
-a[1] = 5
-a[2][1] = 6
-a[2][2] = 7
-.fi
-.ft
-.RE
-.PP
-.BR NOTE :
-You may need to tell
-.I gawk
-that an array element is really a subarray in order to use it where
-.I gawk
-expects an array (such as in the second argument to
-.BR split() ).
-You can do this by creating an element in the subarray and then
-deleting it with the
-.B delete
-statement.
+See
+.I https://www.gnu.org/software/gawk/manual/html_node/Arrays
+for details.
.SS Namespaces
.I Gawk
provides a simple
@@ -1594,7 +907,8 @@ namespace names. The names of additional functions provided by
.I gawk
may be used as namespace names or as simple identifiers in other
namespaces.
-For more details, see \*(EP.
+For more details, see
+.IR https://www.gnu.org/software/gawk/manual/html_node/Namespaces.html#Namespaces .
.SS Variable Typing And Conversion
Variables and fields
may be (floating point) numbers, or strings, or both.
@@ -1620,32 +934,7 @@ with the numeric value of the variable as the argument.
However, even though all numbers in \*(AK are floating-point,
integral values are
.I always
-converted as integers. Thus, given
-.PP
-.RS
-.ft B
-.nf
-CONVFMT = "%2.2f"
-a = 12
-b = a ""
-.fi
-.ft R
-.RE
-.PP
-the variable
-.B b
-has a string value of \fB"12"\fR and not \fB"12.00"\fR.
-.PP
-.BR NOTE :
-When operating in POSIX mode (such as with the
-.B \-\^\-posix
-option),
-beware that locale settings may interfere with the way
-decimal numbers are treated: the decimal separator of the numbers you
-are feeding to
-.I gawk
-must conform to what your locale would expect, be it
-a comma (,) or a period (.).
+converted as integers.
.PP
.I Gawk
performs comparisons as follows:
@@ -1690,64 +979,16 @@ is equal to decimal 17.
String constants in \*(AK are sequences of characters enclosed
between double quotes (like \fB"value"\fR). Within strings, certain
.I "escape sequences"
-are recognized, as in C. These are:
-.TP "\w'\fB\e\^\fIddd\fR'u+1n"
-.B \e\e
-A literal backslash.
-.TP
-.B \ea
-The \*(lqalert\*(rq character; usually the \s-1ASCII\s+1 \s-1BEL\s+1 character.
-.TP
-.B \eb
-Backspace.
-.TP
-.B \ef
-Form-feed.
-.TP
-.B \en
-Newline.
-.TP
-.B \er
-Carriage return.
-.TP
-.B \et
-Horizontal tab.
-.TP
-.B \ev
-Vertical tab.
-.TP
-.BI \ex "\^hex digits"
-The character represented by the string of hexadecimal digits following
-the
-.BR \ex .
-Up to two
-following hexadecimal digits are considered part of
-the escape sequence.
-E.g., \fB"\ex1B"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
-.TP
-.BI \e ddd
-The character represented by the 1-, 2-, or 3-digit sequence of octal
-digits.
-E.g., \fB"\e033"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
-.TP
-.BI \e c
-The literal character
-.IR c\^ .
-.PP
-In compatibility mode, the characters represented by octal and
-hexadecimal escape sequences are treated literally when used in
-regular expression constants. Thus,
-.B /a\e52b/
-is equivalent to
-.BR /a\e*b/ .
+are recognized, as in C.
+See
+.I https://www.gnu.org/software/gawk/manual/html_node/Escape-Sequences
+for the details.
.SS Regexp Constants
A regular expression constant is a sequence of characters enclosed
between forward slashes (like
.BR /value/ ).
-Regular expression matching is described more fully below; see
-.BR "Regular Expressions" .
.PP
-The escape sequences described earlier may also be used inside
+The escape sequences described in the manual may also be used inside
constant regular expressions
(e.g.,
.B "/[\ \et\ef\en\er\ev]/"
@@ -1877,9 +1118,11 @@ For
.BI / "regular expression" /
patterns, the associated statement is executed for each input record that matches
the regular expression.
-Regular expressions are the same as those in
-.IR egrep (1),
-and are summarized below.
+Regular expressions are essentially the same as those in
+.IR egrep (1).
+See
+.I https://www.gnu.org/software/gawk/manual/html_node/Regexp.html
+for the details on regular expressions.
.PP
A
.I "relational expression"
@@ -1911,298 +1154,6 @@ It matches all input records starting with a record that matches
and continuing until a record that matches
.IR pattern2 ,
inclusive. It does not combine with any other sort of pattern expression.
-.SS Regular Expressions
-Regular expressions are the extended kind found in
-.IR egrep .
-They are composed of characters as follows:
-.TP "\w'\fB[^\fIabc.\|.\|.\fB]\fR'u+2n"
-.I c
-Matches the non-metacharacter
-.IR c .
-.TP
-.I \ec
-Matches the literal character
-.IR c .
-.TP
-.B .
-Matches any character
-.I including
-newline.
-.TP
-.B ^
-Matches the beginning of a string.
-.TP
-.B $
-Matches the end of a string.
-.TP
-.BI [ abc.\|.\|. ]
-A character list: matches any of the characters
-.IR abc.\|.\|.\& .
-You may include a range of characters by separating them with a dash.
-To include a literal dash in the list, put it first or last.
-.TP
-\fB[^\fIabc.\|.\|.\fB]\fR
-A negated character list: matches any character except
-.IR abc.\|.\|.\& .
-.TP
-.IB r1 | r2
-Alternation: matches either
-.I r1
-or
-.IR r2 .
-.TP
-.I r1r2
-Concatenation: matches
-.IR r1 ,
-and then
-.IR r2 .
-.TP
-.IB r\^ +
-Matches one or more
-.IR r\^ "'s."
-.TP
-.IB r *
-Matches zero or more
-.IR r\^ "'s."
-.TP
-.IB r\^ ?
-Matches zero or one
-.IR r\^ "'s."
-.TP
-.BI ( r )
-Grouping: matches
-.IR r .
-.TP
-.PD 0
-.IB r { n }
-.TP
-.PD 0
-.IB r { n ,}
-.TP
-.PD
-.IB r { n , m }
-One or two numbers inside braces denote an
-.IR "interval expression" .
-If there is one number in the braces, the preceding regular expression
-.I r
-is repeated
-.I n
-times. If there are two numbers separated by a comma,
-.I r
-is repeated
-.I n
-to
-.I m
-times.
-If there is one number followed by a comma, then
-.I r
-is repeated at least
-.I n
-times.
-.TP
-.B \ey
-Matches the empty string at either the beginning or the
-end of a word.
-.TP
-.B \eB
-Matches the empty string within a word.
-.TP
-.B \e<
-Matches the empty string at the beginning of a word.
-.TP
-.B \e>
-Matches the empty string at the end of a word.
-.TP
-.B \es
-Matches any whitespace character.
-.TP
-.B \eS
-Matches any nonwhitespace character.
-.TP
-.B \ew
-Matches any word-constituent character (letter, digit, or underscore).
-.TP
-.B \eW
-Matches any character that is not word-constituent.
-.TP
-.B \e`
-Matches the empty string at the beginning of a buffer (string).
-.TP
-.B \e'
-Matches the empty string at the end of a buffer.
-.PP
-The escape sequences that are valid in string constants (see
-.BR "String Constants" )
-are also valid in regular expressions.
-.PP
-.I "Character classes"
-are a feature introduced in the \*(PX standard.
-A character class is a special notation for describing
-lists of characters that have a specific attribute, but where the
-actual characters themselves can vary from country to country and/or
-from character set to character set. For example, the notion of what
-is an alphabetic character differs in the USA and in France.
-.PP
-A character class is only valid in a regular expression
-.I inside
-the brackets of a character list. Character classes consist of
-.BR [: ,
-a keyword denoting the class, and
-.BR :] .
-The character
-classes defined by the \*(PX standard are:
-.TP "\w'\fB[:alnum:]\fR'u+2n"
-.B [:alnum:]
-Alphanumeric characters.
-.TP
-.B [:alpha:]
-Alphabetic characters.
-.TP
-.B [:blank:]
-Space or tab characters.
-.TP
-.B [:cntrl:]
-Control characters.
-.TP
-.B [:digit:]
-Numeric characters.
-.TP
-.B [:graph:]
-Characters that are both printable and visible.
-(A space is printable, but not visible, while an
-.B a
-is both.)
-.TP
-.B [:lower:]
-Lowercase alphabetic characters.
-.TP
-.B [:print:]
-Printable characters (characters that are not control characters.)
-.TP
-.B [:punct:]
-Punctuation characters (characters that are not letter, digits,
-control characters, or space characters).
-.TP
-.B [:space:]
-Space characters (such as space, tab, and formfeed, to name a few).
-.TP
-.B [:upper:]
-Uppercase alphabetic characters.
-.TP
-.B [:xdigit:]
-Characters that are hexadecimal digits.
-.PP
-For example, before the \*(PX standard, to match alphanumeric
-characters, you would have had to write
-.BR /[A\-Za\-z0\-9]/ .
-If your character set had other alphabetic characters in it, this would not
-match them, and if your character set collated differently from
-\s-1ASCII\s+1, this might not even match the
-\s-1ASCII\s+1 alphanumeric characters.
-With the \*(PX character classes, you can write
-.BR /[[:alnum:]]/ ,
-and this matches
-the alphabetic and numeric characters in your character set,
-no matter what it is.
-.PP
-Two additional special sequences can appear in character lists.
-These apply to non-\s-1ASCII\s+1 character sets, which can have single symbols
-(called
-.IR "collating elements" )
-that are represented with more than one
-character, as well as several characters that are equivalent for
-.IR collating ,
-or sorting, purposes. (E.g., in French, a plain \*(lqe\*(rq
-and a grave-accented \*(lqe\h'-\w:e:u'\`\*(rq are equivalent.)
-.TP
-Collating Symbols
-A collating symbol is a multi-character collating element enclosed in
-.B [.
-and
-.BR .] .
-For example, if
-.B ch
-is a collating element, then
-.B [[.ch.]]
-is a regular expression that matches this collating element, while
-.B [ch]
-is a regular expression that matches either
-.B c
-or
-.BR h .
-.TP
-Equivalence Classes
-An equivalence class is a locale-specific name for a list of
-characters that are equivalent. The name is enclosed in
-.B [=
-and
-.BR =] .
-For example, the name
-.B e
-might be used to represent all of
-\*(lqe\*(rq, \*(lqe\h'-\w:e:u'\'\*(rq, and \*(lqe\h'-\w:e:u'\`\*(rq.
-In this case,
-.B [[=e=]]
-is a regular expression
-that matches any of
-.BR e ,
-.BR "e\h'-\w:e:u'\'" ,
-or
-.BR "e\h'-\w:e:u'\`" .
-.PP
-These features are very valuable in non-English speaking locales.
-The library functions that
-.I gawk
-uses for regular expression matching
-currently only recognize \*(PX character classes; they do not recognize
-collating symbols or equivalence classes.
-.PP
-The
-.BR \ey ,
-.BR \eB ,
-.BR \e< ,
-.BR \e> ,
-.BR \es ,
-.BR \eS ,
-.BR \ew ,
-.BR \eW ,
-.BR \e` ,
-and
-.B \e'
-operators are specific to
-.IR gawk ;
-they are extensions based on facilities in the \*(GN regular expression libraries.
-.PP
-The various command line options
-control how
-.I gawk
-interprets characters in regular expressions.
-.TP
-No options
-In the default case,
-.I gawk
-provides all the facilities of
-\*(PX regular expressions and the \*(GN regular expression operators described above.
-.TP
-.B \-\^\-posix
-Only \*(PX regular expressions are supported, the \*(GN operators are not special.
-(E.g.,
-.B \ew
-matches a literal
-.BR w ).
-.TP
-.B \-\^\-traditional
-Traditional \*(UX
-.I awk
-regular expressions are matched. The \*(GN operators
-are not special, and interval expressions are not available.
-Characters described by octal and hexadecimal escape sequences are
-treated literally, even if they represent regular expression metacharacters.
-.TP
-.B \-\^\-re\-interval
-Allow interval expressions in regular expressions, even if
-.B \-\^\-traditional
-has been provided.
.SS Actions
Action statements are enclosed in braces,
.B {
@@ -2225,8 +1176,7 @@ Field reference.
Increment and decrement, both prefix and postfix.
.TP
.B ^
-Exponentiation (\fB**\fR may also be used, and \fB**=\fR for
-the assignment operator).
+Exponentiation.
.TP
.B "+ \- !"
Unary plus, unary minus, and logical negation.
@@ -2252,19 +1202,6 @@ The regular relational operators.
.TP
.B "~ !~"
Regular expression match, negated match.
-.BR NOTE :
-Do not use a constant regular expression
-.RB ( /foo/ )
-on the left-hand side of a
-.B ~
-or
-.BR !~ .
-Only use one on the right-hand side. The expression
-.BI "/foo/ ~ " exp
-has the same meaning as \fB(($0 ~ /foo/) ~ \fIexp\fB)\fR.
-This is usually
-.I not
-what you want.
.TP
.B in
Array membership.
@@ -2323,7 +1260,7 @@ as follows:
The input/output statements are as follows:
.TP "\w'\fBprintf \fIfmt, expr-list\fR'u+1n"
\fBclose(\fIfile \fR[\fB, \fIhow\fR]\fB)\fR
-Close file, pipe or coprocess.
+Close an open file, pipe or coprocess.
The optional
.I how
should only be used when closing one end of a
@@ -2385,14 +1322,20 @@ or
.IR var ,
as above, and
.BR RT .
-Coprocesses are a
-.I gawk
-extension.
.RI "(The " command
can also be a socket. See the subsection
.BR "Special File Names" ,
below.)
.TP
+\&\fBfflush(\fR[\fIfile\^\fR]\fB)\fR
+Flush any buffers associated with the open output file or pipe
+.IR file .
+If
+.I file
+is missing or if it
+is the null string,
+then flush all open output files and pipes.
+.TP
.B next
Stop processing the current input record.
Read the next input record
@@ -2443,7 +1386,6 @@ The output record is terminated with the value of
.TP
.BI printf " fmt, expr-list"
Format and print.
-See \fBThe \fIprintf \fBStatement\fR, below.
.TP
.BI printf " fmt, expr-list" " >" file
Format and print on
@@ -2454,16 +1396,9 @@ Execute the command
.IR cmd-line ,
and return the exit status.
(This may not be available on non-\*(PX systems.)
-See \*(EP for the full details on the exit status.
-.TP
-\&\fBfflush(\fR[\fIfile\^\fR]\fB)\fR
-Flush any buffers associated with the open output file or pipe
-.IR file .
-If
-.I file
-is missing or if it
-is the null string,
-then flush all open output files and pipes.
+See
+.I https://www.gnu.org/software/gawk/manual/html_node/I_002fO-Functions.html#I_002fO-Functions
+for the full details on the exit status.
.PP
Additional output redirections are allowed for
.B print
@@ -2512,235 +1447,14 @@ use
to create new instances of the command or socket.
\*(AK does not automatically close pipes, sockets, or coprocesses when
they return EOF.
-.SS The \fIprintf\fP\^ Statement
+.PP
The \*(AK versions of the
.B printf
statement and
.B sprintf()
function
-(see below)
-accept the following conversion specification formats:
-.TP "\w'\fB%g\fR, \fB%G\fR'u+2n"
-.BR "%a" "," " %A"
-A floating point number of the form
-[\fB\-\fP]\fB0x\fIh\fB.\fIhhhh\fBp+\-\fIdd\fR
-(C99 hexadecimal floating point format).
-For
-.BR %A ,
-uppercase letters are used instead of lowercase ones.
-.TP
-.B %c
-A single character.
-If the argument used for
-.B %c
-is numeric, it is treated as a character and printed.
-Otherwise, the argument is assumed to be a string, and the only first
-character of that string is printed.
-.TP
-.BR "%d" "," " %i"
-A decimal number (the integer part).
-.TP
-.BR %e , " %E"
-A floating point number of the form
-[\fB\-\fP]\fId\fB.\fIdddddd\^\fBe\fR[\fB+\-\fR]\fIdd\fR.
-The
-.B %E
-format uses
-.B E
-instead of
-.BR e .
-.TP
-.BR %f , " %F"
-A floating point number of the form
-[\fB\-\fP]\fIddd\fB.\fIdddddd\fR.
-If the system library supports it,
-.B %F
-is available as well. This is like
-.BR %f ,
-but uses capital letters for special \*(lqnot a number\*(rq
-and \*(lqinfinity\*(rq values. If
-.B %F
-is not available,
-.I gawk
-uses
-.BR %f .
-.TP
-.BR %g , " %G"
-Use
-.B %e
-or
-.B %f
-conversion, whichever is shorter, with nonsignificant zeros suppressed.
-The
-.B %G
-format uses
-.B %E
-instead of
-.BR %e .
-.TP
-.B %o
-An unsigned octal number (also an integer).
-.TP
-.PD
-.B %u
-An unsigned decimal number (again, an integer).
-.TP
-.B %s
-A character string.
-.TP
-.BR %x , " %X"
-An unsigned hexadecimal number (an integer).
-The
-.B %X
-format uses
-.B ABCDEF
-instead of
-.BR abcdef .
-.TP
-.B %%
-A single
-.B %
-character; no argument is converted.
-.PP
-Optional, additional parameters may lie between the
-.B %
-and the control letter:
-.TP
-.IB count $
-Use the
-.IR count "'th"
-argument at this point in the formatting.
-This is called a
-.I "positional specifier"
-and
-is intended primarily for use in translated versions of
-format strings, not in the original text of an AWK program.
-It is a
-.I gawk
-extension.
-.TP
-.B \-
-The expression should be left-justified within its field.
-.TP
-.I space
-For numeric conversions, prefix positive values with a space, and
-negative values with a minus sign.
-.TP
-.B +
-The plus sign, used before the width modifier (see below),
-says to always supply a sign for numeric conversions, even if the data
-to be formatted is positive. The
-.B +
-overrides the space modifier.
-.TP
-.B #
-Use an \*(lqalternate form\*(rq for certain control letters.
-For
-.BR %o ,
-supply a leading zero.
-For
-.BR %x ,
-and
-.BR %X ,
-supply a leading
-.B 0x
-or
-.B 0X
-for
-a nonzero result.
-For
-.BR %e ,
-.BR %E ,
-.B %f
-and
-.BR %F ,
-the result always contains a
-decimal point.
-For
-.BR %g ,
-and
-.BR %G ,
-trailing zeros are not removed from the result.
-.TP
-.B 0
-A leading
-.B 0
-(zero) acts as a flag, indicating that output should be
-padded with zeroes instead of spaces.
-This applies only to the numeric output formats.
-This flag only has an effect when the field width is wider than the
-value to be printed.
-.TP
-.B '
-A single quote character instructs
-.I gawk
-to insert the locale's thousands-separator character
-into decimal numbers, and to also use the locale's
-decimal point character with floating point formats.
-This requires correct locale support in the C library
-and in the definition of the current locale.
-.TP
-.I width
-The field should be padded to this width. The field is normally padded
-with spaces. With the
-.B 0
-flag, it is padded with zeroes.
-.TP
-.BI \&.\& prec
-A number that specifies the precision to use when printing.
-For the
-.BR %e ,
-.BR %E ,
-.B %f
-and
-.BR %F ,
-formats, this specifies the
-number of digits you want printed to the right of the decimal point.
-For the
-.BR %g ,
-and
-.B %G
-formats, it specifies the maximum number
-of significant digits. For the
-.BR %d ,
-.BR %i ,
-.BR %o ,
-.BR %u ,
-.BR %x ,
-and
-.B %X
-formats, it specifies the minimum number of
-digits to print. For the
-.B %s
-format,
-it specifies the maximum number of
-characters from the string that should be printed.
-.PP
-The dynamic
-.I width
-and
-.I prec
-capabilities of the ISO C
-.B printf()
-routines are supported.
-A
-.B *
-in place of either the
-.I width
-or
-.I prec
-specifications causes their values to be taken from
-the argument list to
-.B printf
-or
-.BR sprintf() .
-To use a positional specifier with a dynamic width or precision,
-supply the
-.IB count $
-after the
-.B *
-in the format string.
-For example, \fB"%3$*2$.*1$s"\fP.
+are similar to those of C. For details, see
+.IR https://www.gnu.org/software/gawk/manual/html_node/Printf.html .
.SS Special File Names
When doing I/O redirection from either
.B print
@@ -2774,22 +1488,6 @@ The standard error output.
The file associated with the open file descriptor
.IR n .
.PP
-These are particularly useful for error messages. For example:
-.PP
-.RS
-.ft B
-print "You blew it!" > "/dev/stderr"
-.ft R
-.RE
-.PP
-whereas you would otherwise have to use
-.PP
-.RS
-.ft B
-print "You blew it!" | "cat 1>&2"
-.ft R
-.RE
-.PP
The following special filenames may be used with the
.B |&
coprocess operator for creating TCP/IP network connections:
@@ -2900,8 +1598,9 @@ Return the previous seed for the random
number generator.
.SS String Functions
.I Gawk
-has the following built-in string functions:
-.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n"
+has the following built-in string functions; details are provided in
+.IR https://www.gnu.org/software/gawk/manual/html_node/String-Functions .
+.TP "\w'\fBsprintf(\fIfmt\^\fB, \fIexpr-list\^\fB)\fR'u+1n"
\fBasort(\fIs \fR[\fB, \fId\fR [\fB, \fIhow\fR] ]\fB)\fR
Return the number of elements in the source
array
@@ -2935,15 +1634,12 @@ controls the direction and the comparison mode.
Valid values for
.I how
are
-any of the strings valid for
-\fBPROCINFO["sorted_in"]\fR.
-It can also be the name of a user-defined
-comparison function as described in
-\fBPROCINFO["sorted_in"]\fR.
+described in
+.IR https://www.gnu.org/software/gawk/manual/html_node/String-Functions.html#String-Functions .
.IR s " and " d
are allowed to be the same array; this only makes sense when
supplying the third argument as well.
-.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n"
+.TP
\fBasorti(\fIs \fR[\fB, \fId\fR [\fB, \fIhow\fR] ]\fB)\fR
Return the number of elements in the source
array
@@ -2959,8 +1655,7 @@ The original values are lost; thus provide
a second array if you wish to preserve the original.
The purpose of the optional string
.I how
-is the same as described
-previously for
+is the same as for
.BR asort() .
Here too,
.IR s " and " d
@@ -3013,7 +1708,7 @@ the modified string is returned as the result of the function,
and the original target string is
.I not
changed.
-.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n"
+.TP
\fBgsub(\fIr\fB, \fIs \fR[\fB, \fIt\fR]\fB)\fR
For each substring matching the regular expression
.I r
@@ -3033,8 +1728,8 @@ Use
.B \e&
to get a literal
.BR & .
-(This must be typed as \fB"\e\e&"\fP;
-see \*(EP
+(This must be typed as \fB"\e\e&"\fP; see
+.I https://www.gnu.org/software/gawk/manual/html_node/Gory-Details.html#Gory-Details
for a fuller discussion of the rules for ampersands
and backslashes in the replacement text of
.BR sub() ,
@@ -3051,8 +1746,6 @@ or zero if
.I t
is not present.
(This implies that character indices start at one.)
-It is a fatal error to use a regexp constant for
-.IR t .
.TP
\fBlength(\fR[\fIs\fR]\fB)
Return the length of the string
@@ -3062,7 +1755,7 @@ or the length of
if
.I s
is not supplied.
-As a non-standard extension, with an array argument,
+With an array argument,
.B length()
returns the number of elements in the array.
.TP
@@ -3083,30 +1776,11 @@ operator:
.IB str " ~"
.IR re .
.ft R
-If array
-.I a
-is provided,
-.I a
-is cleared and then elements 1 through
-.I n
-are filled with the portions of
-.I s
-that match the corresponding parenthesized
-subexpression in
-.IR r .
-The zero'th element of
+See
+.I https://www.gnu.org/software/gawk/manual/html_node/String-Functions.html#String-Functions
+for a description of how the array
.I a
-contains the portion
-of
-.I s
-matched by the entire regular expression
-.IR r .
-Subscripts
-\fBa[\fIn\^\fB, "start"]\fR,
-and
-\fBa[\fIn\^\fB, "length"]\fR
-provide the starting index in the string and length
-respectively, of each matching substring.
+is filled if it is provided.
.TP
\fBpatsplit(\fIs\fB, \fIa \fR[\fB, \fIr\fR [\fB, \fIseps\fR] ]\fB)\fR
Split the string
@@ -3140,8 +1814,7 @@ and
.I seps
are cleared first.
Splitting behaves identically to field splitting with
-.BR FPAT ,
-described above.
+.BR FPAT .
.TP
\fBsplit(\fIs\fB, \fIa \fR[\fB, \fIr\fR [\fB, \fIseps\fR] ]\fB)\fR
Split the string
@@ -3169,25 +1842,9 @@ between
.BI a[ i ]
and
.BI a[ i +1]\fR.
-If
-.I r
-is a single space, then leading whitespace in
-.I s
-goes into the extra array element
-.B seps[0]
-and trailing whitespace goes into the extra array element
-.BI seps[ n ]\fR,
-where
-.I n
-is the return value of
-.BI split( s ", " a ", " r ", " seps )\fR.
-Splitting behaves identically to field splitting, described above.
-In particular, if
-.I r
-is a single-character string, that string acts as the separator,
-even if it happens to be a regular expression metacharacter.
+Splitting behaves identically to field splitting.
.TP
-.BI sprintf( fmt , " expr-list" )
+.BI sprintf( fmt\^ , " expr-list\^" )
Print
.I expr-list
according to
@@ -3259,11 +1916,10 @@ and
.B match()
all work in terms of characters, not bytes.
.SS Time Functions
-Since one of the primary uses of \*(AK programs is processing log files
-that contain time stamp information,
-.I gawk
+.I Gawk
provides the following functions for obtaining time stamps and
-formatting them.
+formatting them. Details are provided in
+.IR https://www.gnu.org/software/gawk/manual/html_node/Time-Functions .
.TP "\w'\fBsystime()\fR'u+1n"
\fBmktime(\fIdatespec\fR [\fB, \fIutc-flag\fR]\fB)\fR
Turn
@@ -3271,42 +1927,21 @@ Turn
into a time stamp of the same form as returned by
.BR systime() ,
and return the result.
-The
-.I datespec
-is a string of the form
-.IR "YYYY MM DD HH MM SS[ DST]" .
-The contents of the string are six or seven numbers representing respectively
-the full year including century,
-the month from 1 to 12,
-the day of the month from 1 to 31,
-the hour of the day from 0 to 23,
-the minute from 0 to 59,
-the second from 0 to 60,
-and an optional daylight saving flag.
-The values of these numbers need not be within the ranges specified;
-for example, an hour of \-1 means 1 hour before midnight.
-The origin-zero Gregorian calendar is assumed,
-with year 0 preceding year 1 and year \-1 preceding year 0.
If
.I utc-flag
is present and is non-zero or non-null, the time is assumed to be in
the UTC time zone; otherwise, the
time is assumed to be in the local time zone.
-If the
-.I DST
-daylight saving flag is positive,
-the time is assumed to be daylight saving time;
-if zero, the time is assumed to be standard time;
-and if negative (the default),
-.B mktime()
-attempts to determine whether daylight saving time is in effect
-for the specified time.
If
.I datespec
does not contain enough elements or if the resulting time
is out of range,
.B mktime()
returns \-1.
+See
+.I https://www.gnu.org/software/gawk/manual/html_node/Time-Functions.html#Time-Functions
+for the details of
+.IR datespec .
.TP
\fBstrftime(\fR[\fIformat \fR[\fB, \fItimestamp\fR[\fB, \fIutc-flag\fR]]]\fB)\fR
Format
@@ -3347,8 +1982,6 @@ values to
.B uintmax_t
integers, doing the operation, and then converting the
result back to floating point.
-.PP
-.BR NOTE :
Passing negative operands to any of these functions causes
a fatal error.
.PP
@@ -3391,8 +2024,6 @@ their arguments.
Return true if
.I x
is an array, false otherwise.
-This function is mainly for use with the elements of multidimensional arrays
-and with function parameters.
.TP
\fBtypeof(\fIx\fB)\fR
Return a string indicating the type of
@@ -3409,7 +2040,8 @@ or
.SS Internationalization Functions
The following functions may be used from within your AWK program for
translating strings at run-time.
-For full details, see \*(EP.
+For full details, see
+.IR https://www.gnu.org/software/gawk/manual/html_node/I18N-Functions.html#I18N-Functions .
.TP
\fBbindtextdomain(\fIdirectory \fR[\fB, \fIdomain\fR]\fB)\fR
Specify the directory where
@@ -3417,8 +2049,7 @@ Specify the directory where
looks for the
.B \&.gmo
files, in case they
-will not or cannot be placed in the ``standard'' locations
-(e.g., during testing).
+will not or cannot be placed in the ``standard'' locations.
It returns the directory where
.I domain
is ``bound.''
@@ -3449,15 +2080,6 @@ is the current value of
The default value for
.I category
is \fB"LC_MESSAGES"\fR.
-.sp .5
-If you supply a value for
-.IR category ,
-it must be a string equal to
-one of the known locale categories described
-in \*(EP.
-You must also supply a text domain. Use
-.B TEXTDOMAIN
-if you want to use the current domain.
.TP
\fBdcngettext(\fIstring1\fB, \fIstring2\fB, \fInumber \fR[\fB, \fIdomain \fR[\fB, \fIcategory\fR]]\fB)\fR
Return the plural form used for
@@ -3478,15 +2100,6 @@ is the current value of
The default value for
.I category
is \fB"LC_MESSAGES"\fR.
-.sp .5
-If you supply a value for
-.IR category ,
-it must be a string equal to
-one of the known locale categories described
-in \*(EP.
-You must also supply a text domain. Use
-.B TEXTDOMAIN
-if you want to use the current domain.
.SS Boolean Valued Functions
You can create special Boolean-typed values; see the manual for how
they work and why they exist.
@@ -3509,8 +2122,7 @@ in either patterns or actions. Actual parameters supplied in the function
call are used to instantiate the formal parameters declared in the function.
Arrays are passed by reference, other variables are passed by value.
.PP
-Since functions were not originally part of the \*(AK language, the provision
-for local variables is rather clumsy: They are declared as extra parameters
+Local variables are declared as extra parameters
in the parameter list. The convention is to separate local variables from
real parameters by extra spaces in the parameter list. For example:
.PP
@@ -3530,7 +2142,6 @@ function f(p, q, a, b) # a and b are local
The left parenthesis in a function call is required
to immediately follow the function name,
without any intervening whitespace.
-This avoids a syntactic ambiguity with the concatenation operator.
This restriction does not apply to the built-in functions listed above.
.PP
Functions may call each other and may be recursive.
@@ -3543,9 +2154,7 @@ to return a value from a function. The return value is undefined if no
value is provided, or if the function returns by \*(lqfalling off\*(rq the
end.
.PP
-As a
-.I gawk
-extension, functions may be called indirectly. To do this, assign
+Functions may be called indirectly. To do this, assign
the name of the function to be called, as a string, to a variable.
Then use the variable as if it were the name of a function, prefixed with an
.B @
@@ -3567,8 +2176,6 @@ function myfunc()
.fi
.ft R
.RE
-As of version 4.1.2, this works with user-defined functions,
-built-in functions, and extension functions.
.PP
If
.B \-\^\-lint
@@ -3577,12 +2184,6 @@ has been provided,
warns about calls to undefined functions at parse time,
instead of at run time.
Calling an undefined function at run time is a fatal error.
-.PP
-The word
-.B func
-may be used in place of
-.BR function ,
-although this is deprecated.
.SH DYNAMICALLY LOADING NEW FUNCTIONS
You can dynamically add new functions written in C or C++ to the running
.I gawk
@@ -3590,7 +2191,8 @@ interpreter with the
.B @load
statement.
The full details are beyond the scope of this manual page;
-see \*(EP.
+see
+.IR https://www.gnu.org/software/gawk/manual/html_node/Dynamic-Extensions.html#Dynamic-Extensions .
.SH SIGNALS
The
.I gawk
@@ -3632,166 +2234,17 @@ gawk 'BEGIN { print _"hello, world" }'
might print
.B "bonjour, monde"
in France.
-.PP
-There are several steps involved in producing and running a localizable
+See
+.I https://www.gnu.org/software/gawk/manual/html_node/Internationalization.html#Internationalization
+for the steps involved in producing and running a localizable
\*(AK program.
-.TP "\w'4.'u+2n"
-1.
-Add a
-.B BEGIN
-action to assign a value to the
-.B TEXTDOMAIN
-variable to set the text domain to a name associated with your program:
-.sp
-.in +5m
-.ft B
-BEGIN { TEXTDOMAIN = "myprog" }
-.ft R
-.in -5m
-.sp
-This allows
-.I gawk
-to find the
-.B \&.gmo
-file associated with your program.
-Without this step,
-.I gawk
-uses the
-.B messages
-text domain,
-which likely does not contain translations for your program.
-.TP
-2.
-Mark all strings that should be translated with leading underscores.
-.TP
-3.
-If necessary, use the
-.B dcgettext()
-and/or
-.B bindtextdomain()
-functions in your program, as appropriate.
-.TP
-4.
-Run
-.B "gawk \-\^\-gen\-pot \-f myprog.awk > myprog.pot"
-to generate a
-.B \&.pot
-file for your program.
-.TP
-5.
-Provide appropriate translations, and build and install the corresponding
-.B \&.gmo
-files.
-.PP
-The internationalization features are described in full detail in \*(EP.
-.SH POSIX COMPATIBILITY
-A primary goal for
-.I gawk
-is compatibility with the \*(PX standard, as well as with the
-latest version of Brian Kernighan's
-.IR awk .
-To this end,
-.I gawk
-incorporates the following user visible
-features which are not described in the \*(AK book,
-but are part of the Brian Kernighan's version of
-.IR awk ,
-and are in the \*(PX standard.
-.PP
-The book indicates that command line variable assignment happens when
-.I awk
-would otherwise open the argument as a file, which is after the
-.B BEGIN
-rule is executed. However, in earlier implementations, when such an
-assignment appeared before any file names, the assignment would happen
-.I before
-the
-.B BEGIN
-rule was run. Applications came to depend on this \*(lqfeature.\*(rq
-When
-.I awk
-was changed to match its documentation, the
-.B \-v
-option for assigning variables before program execution was added to
-accommodate applications that depended upon the old behavior.
-(This feature was agreed upon by both the Bell Laboratories developers
-and the \*(GN developers.)
-.PP
-When processing arguments,
-.I gawk
-uses the special option \*(lq\-\^\-\*(rq to signal the end of
-arguments.
-In compatibility mode, it warns about but otherwise ignores
-undefined options.
-In normal operation, such arguments are passed on to the \*(AK program for
-it to process.
-.PP
-The \*(AK book does not define the return value of
-.BR srand() .
-The \*(PX standard
-has it return the seed it was using, to allow keeping track
-of random number sequences. Therefore
-.B srand()
-in
-.I gawk
-also returns its current seed.
-.PP
-Other features are:
-The use of multiple
-.B \-f
-options (from MKS
-.IR awk );
-the
-.B ENVIRON
-array; the
-.BR \ea ,
-and
-.B \ev
-escape sequences (done originally in
-.I gawk
-and fed back into the Bell Laboratories version); the
-.B tolower()
-and
-.B toupper()
-built-in functions (from the Bell Laboratories version);
-and the ISO C conversion specifications in
-.B printf
-(done first in the Bell Laboratories version).
-.SH HISTORICAL FEATURES
-There is one feature of historical \*(AK implementations that
-.I gawk
-supports:
-It is possible to call the
-.B length()
-built-in function not only with no argument, but even without parentheses!
-Thus,
-.RS
-.PP
-.ft B
-a = length # Holy Algol 60, Batman!
-.ft R
-.RE
-.PP
-is the same as either of
-.RS
-.PP
-.ft B
-a = length()
-.br
-a = length($0)
-.ft R
-.RE
-.PP
-Using this feature is poor practice, and
-.I gawk
-issues a warning about its use if
-.B \-\^\-lint
-is specified on the command line.
.SH GNU EXTENSIONS
.I Gawk
has a too-large number of extensions to \*(PX
.IR awk .
-They are described in this section. All the extensions described here
+They are described in
+.IR https://www.gnu.org/software/gawk/manual/html_node/POSIX_002fGNU.html .
+All the extensions
can be disabled by
invoking
.I gawk
@@ -3800,246 +2253,6 @@ with the
or
.B \-\^\-posix
options.
-.PP
-The following features of
-.I gawk
-are not available in
-\*(PX
-.IR awk .
-.\" Environment vars and startup stuff
-.TP "\w'\(bu'u+1n"
-\(bu
-No path search is performed for files named via the
-.B \-f
-option. Therefore the
-.B AWKPATH
-environment variable is not special.
-.\" POSIX and language recognition issues
-.TP
-\(bu
-There is no facility for doing file inclusion
-.RI ( gawk 's
-.B @include
-mechanism).
-.TP
-\(bu
-There is no facility for dynamically adding new functions
-written in C
-.RI ( gawk 's
-.B @load
-mechanism).
-.TP
-\(bu
-The
-.B \ex
-escape sequence.
-.TP
-\(bu
-The ability to continue lines after
-.B ?
-and
-.BR : .
-.TP
-\(bu
-Octal and hexadecimal constants in AWK programs.
-.\" Special variables
-.TP
-\(bu
-The
-.BR ARGIND ,
-.BR BINMODE ,
-.BR ERRNO ,
-.BR LINT ,
-.BR PREC ,
-.BR ROUNDMODE ,
-.B RT
-and
-.B TEXTDOMAIN
-variables are not special.
-.TP
-\(bu
-The
-.B IGNORECASE
-variable and its side-effects are not available.
-.TP
-\(bu
-The
-.B FIELDWIDTHS
-variable and fixed-width field splitting.
-.TP
-\(bu
-The
-.B FPAT
-variable and field splitting based on field values.
-.TP
-\(bu
-The
-.BR FUNCTAB ,
-.BR SYMTAB ,
-and
-.B PROCINFO
-arrays are not available.
-.\" I/O stuff
-.TP
-\(bu
-The use of
-.B RS
-as a regular expression.
-.TP
-\(bu
-The special file names available for I/O redirection are not recognized.
-.TP
-\(bu
-The
-.B |&
-operator for creating coprocesses.
-.TP
-\(bu
-The
-.B BEGINFILE
-and
-.B ENDFILE
-special patterns are not available.
-.\" Changes to standard awk functions
-.TP
-\(bu
-The ability to split out individual characters using the null string
-as the value of
-.BR FS ,
-and as the third argument to
-.BR split() .
-.TP
-\(bu
-An optional fourth argument to
-.B split()
-to receive the separator texts.
-.TP
-\(bu
-The optional second argument to the
-.B close()
-function.
-.TP
-\(bu
-The optional third argument to the
-.B match()
-function.
-.TP
-\(bu
-The ability to use positional specifiers with
-.B printf
-and
-.BR sprintf() .
-.TP
-\(bu
-The ability to pass an array to
-.BR length() .
-.\" New keywords or changes to keywords
-.\" (As of 2012, these are in POSIX)
-.\" .TP
-.\" \(bu
-.\" The use of
-.\" .BI delete " array"
-.\" to delete the entire contents of an array.
-.\" .TP
-.\" \(bu
-.\" The use of
-.\" .B "nextfile"
-.\" to abandon processing of the current input file.
-.\" New functions
-.TP
-\(bu
-The
-.BR and() ,
-.BR asort() ,
-.BR asorti() ,
-.BR bindtextdomain() ,
-.BR compl() ,
-.BR dcgettext() ,
-.BR dcngettext() ,
-.BR gensub() ,
-.BR lshift() ,
-.BR mktime() ,
-.BR or() ,
-.BR patsplit() ,
-.BR rshift() ,
-.BR strftime() ,
-.BR strtonum() ,
-.B systime()
-and
-.B xor()
-functions.
-.\" I18N stuff
-.TP
-\(bu
-Localizable strings.
-.TP
-\(bu
-Non-fatal I/O.
-.TP
-\(bu
-Retryable I/O.
-.PP
-The \*(AK book does not define the return value of the
-.B close()
-function.
-.IR Gawk\^ "'s"
-.B close()
-returns the value from
-.IR fclose (3),
-or
-.IR pclose (3),
-when closing an output file or pipe, respectively.
-It returns the process's exit status when closing an input pipe.
-The return value is \-1 if the named file, pipe
-or coprocess was not opened with a redirection.
-.PP
-When
-.I gawk
-is invoked with the
-.B \-\^\-traditional
-option,
-if the
-.I fs
-argument to the
-.B \-F
-option is \*(lqt\*(rq, then
-.B FS
-is set to the tab character.
-Note that typing
-.B "gawk \-F\et \&.\|.\|."
-simply causes the shell to quote the \*(lqt,\*(rq and does not pass
-\*(lq\et\*(rq to the
-.B \-F
-option.
-Since this is a rather ugly special case, it is not the default behavior.
-This behavior also does not occur if
-.B \-\^\-posix
-has been specified.
-To really get a tab character as the field separator, it is best to use
-single quotes:
-.BR "gawk \-F'\et' \&.\|.\|." .
-.ig
-.PP
-If
-.I gawk
-was compiled for debugging, it
-accepts the following additional options:
-.TP
-.PD 0
-.B \-Y
-.TP
-.PD
-.B \-\^\-parsedebug
-Turn on
-.IR yacc (1)
-or
-.IR bison (1)
-debugging output during program parsing.
-This option should only be of interest to the
-.I gawk
-maintainers, and may not even be compiled into
-.IR gawk .
-..
.SH ENVIRONMENT VARIABLES
The
.B AWKPATH
@@ -4160,70 +2373,32 @@ and which ports are currently supported.
.SH BUG REPORTS
If you find a bug in
.IR gawk ,
-please send electronic mail to
-.BR \%bug-gawk@gnu.org .
-Please include your operating system and its revision, the version of
-.I gawk
-(from
-.BR "gawk \-\^\-version" ),
-which C compiler you used to compile it, and a test program
-and data that are as small as possible for reproducing the problem.
-.PP
-Before sending a bug report, please do the following things. First, verify that
-you have the latest version of
-.IR gawk .
-Many bugs (usually subtle ones) are fixed at each release, and if
-yours is out of date, the problem may already have been solved.
-Second, please see if setting the environment variable
-.B LC_ALL
-to
-.B LC_ALL=C
-causes things to behave as you expect. If so, it's a locale issue,
-and may or may not really be a bug.
-Finally, please read this man page and the reference manual carefully to
-be sure that what you think is a bug really is, instead of just a quirk
-in the language.
-.PP
-Whatever you do, do
-.B NOT
-post a bug report in
-.BR comp.lang.awk .
-While the
-.I gawk
-developers occasionally read this newsgroup, posting bug reports there
-is an unreliable way to report bugs.
-Similarly, do
-.B NOT
-use a web forum (such as Stack Overflow) for reporting bugs.
-Instead, please use the electronic mail
-addresses given above.
+please use the
+.IR gawkbug (1)
+program to report it.
+.PP
+Full instructions for reporting a bug are provided in
+.IR https://www.gnu.org/software/gawk/manual/html_node/Bugs.html .
+.I Please
+carefully read and follow the instructions given there.
+This will make bug reporting and resolution much easier for everyone involved.
Really.
-.PP
-If you're using a GNU/Linux or BSD-based system,
-you may wish to submit a bug report to the vendor of your distribution.
-That's fine, but please send a copy to the official email address as well,
-since there's no guarantee that the bug report will be forwarded to the
-.I gawk
-maintainer.
.SH BUGS
The
.B \-F
option is not necessary given the command line variable assignment feature;
it remains only for backwards compatibility.
+.PP
+This manual page is too long;
+.I gawk
+has too many features.
.SH SEE ALSO
.IR egrep (1),
.IR sed (1),
-.IR getpid (2),
-.IR getppid (2),
-.IR getpgrp (2),
-.IR getuid (2),
-.IR geteuid (2),
-.IR getgid (2),
-.IR getegid (2),
-.IR getgroups (2),
+.IR gawkbug (1),
.IR printf (3),
-.IR strftime (3),
-.IR usleep (3)
+and
+.IR strftime (3).
.PP
.IR "The AWK Programming Language" ,
Alfred V.\& Aho, Brian W.\& Kernighan, Peter J.\& Weinberger,
@@ -4234,12 +2409,12 @@ Edition 5.1, shipped with the
.I gawk
source.
The current version of this document is available online at
-.BR https://www.gnu.org/software/gawk/manual .
+.IR https://www.gnu.org/software/gawk/manual .
.PP
The GNU
.B gettext
documentation, available online at
-.BR https://www.gnu.org/software/gettext .
+.IR https://www.gnu.org/software/gettext .
.SH EXAMPLES
.nf
Print and sort the login names of all users:
@@ -4275,14 +2450,17 @@ Run an external command for particular lines of data:
awk '/myhome.html/ { system("nmap " $1 ">> logdir/myhome.html") }'
.ft R
.fi
+.ig
.SH ACKNOWLEDGEMENTS
Brian Kernighan
provided valuable assistance during testing and debugging.
We thank him.
+..
.SH COPYING PERMISSIONS
Copyright \(co 1989, 1991, 1992, 1993, 1994, 1995, 1996,
1997, 1998, 1999, 2001, 2002, 2003, 2004, 2005, 2007, 2009,
-2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021,
+2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019,
+2020, 2021, 2022
Free Software Foundation, Inc.
.PP
Permission is granted to make and distribute verbatim copies of
diff --git a/doc/wordlist b/doc/wordlist
index fd7c181e..722ac71a 100644
--- a/doc/wordlist
+++ b/doc/wordlist
@@ -20,6 +20,7 @@ AWKBUFSIZE
AWKLIBPATH
AWKPATH
AWKREADFUNC
+AWKgo
AbbC
AdditionalResources
Affero
@@ -44,6 +45,7 @@ BEGINFILE
BEL
BINMODE
BLKSIZE
+BOOL
BREs
BSIZE
BTL
@@ -52,6 +54,7 @@ Banzai
BeOS
Benzinger
Bleah
+Bool
Bornstein
Brin's
Brini
@@ -115,6 +118,7 @@ DHAVE
DIR
DJGPP
DK
+DLL
DST
DTD
DVI
@@ -157,6 +161,7 @@ FIXME
FN
FNM
FNR
+FNV
FOO
FORmula
FP
@@ -273,6 +278,7 @@ LINENO
LXIX
Langston
LeBlanc
+LibGCC
Libmawk
Libtool
Lijewski
@@ -630,6 +636,7 @@ awk
awka
awkcard
awkforai
+awkgo
awkgram
awklang
awklib
@@ -681,6 +688,7 @@ blockdev
blockquote
blocksize
bool
+boolval
boundar
br
brabble
@@ -734,6 +742,7 @@ chem
chet
chien
chmod
+chongo
chown
chr
cilnqsvx
@@ -841,6 +850,7 @@ distributable
div
djgpp
dl
+dll
dlopen
dlsym
dn
@@ -858,6 +868,7 @@ duncan
dup
dupword
dvi
+dw
dx
ebcdic
eca
@@ -887,6 +898,7 @@ enrichening
enum
env
eof
+eq
eql
equiv
erealloc
@@ -948,6 +960,7 @@ fmode
fn
fname
fnmatch
+fnv
fo
foObAr
foo
@@ -985,6 +998,7 @@ ga
gai
gatech
gawkapi
+gawkbug
gawkextlib
gawkfts
gawkinet
@@ -1024,6 +1038,7 @@ getuid
gid
gierd
github
+globalstate
gmail
gmo
gmon
@@ -1048,6 +1063,7 @@ groupset
grp
gst
gsub
+gt
guerrero
gunzip
gvim
@@ -1140,6 +1156,7 @@ isatty
isnumeric
iso
ist
+isthe
itd
itemx
ith
@@ -1187,6 +1204,7 @@ lgpl
lhs
li
libexec
+libgcc
libintl
libmawk
libs
@@ -1250,6 +1268,7 @@ mingw
miriam
misc
mit
+mkbool
mkdir
mkinstalldirs
mktime
@@ -1406,6 +1425,7 @@ org
orig
orspan
os
+osdn
otherc
otherd
otiens
@@ -1489,6 +1509,7 @@ pwcat
pwd
pwent
pxref
+py
qb
qc
qquad
@@ -1621,6 +1642,7 @@ stat
statdata
statfunc
std
+stdbool
stdbuf
stddef
stderr
diff --git a/doc/wordlist3 b/doc/wordlist3
index f920c4a2..cbe0cdcb 100644
--- a/doc/wordlist3
+++ b/doc/wordlist3
@@ -1,32 +1,16 @@
-AB
-ABCDEF
-API
ARGC
ARGIND
ARGV
AWK
AWKLIBPATH
AWKPATH
-Ab
Aho
BEGINFILE
-BEL
-BI
BINMODE
CGI
CONVFMT
-Coprocesses
-Cygwin
-DD
-DJGPP
-DST
-EAGAIN
-EINTR
ENDFILE
ERRNO
-ESC
-ETIMEDOUT
-EWOULDBLOCK
FIELDWIDTHS
FNR
FPAT
@@ -34,51 +18,33 @@ FS
FUNCTAB
Fenlason
GMP
-HH
IGNORECASE
IPv
ISBN
-Jul
LC
-Localizable
-MKS
MPFR
MSEC
-MinGW
NF
NR
OFMT
OFS
ORS
-OpenVMS
PREC
PROCINFO
+Printf
RLENGTH
ROUNDMODE
RSTART
RT
Regexp
-Retryable
SIGHUP
SIGUSR
-SS
SUBSEP
SYMTAB
TEXTDOMAIN
-TP
Trueman
UTC
-YYYY
-Za
-aB
-ab
abc
-abcdef
-alnum
-api
-argv
-arnold
-asc
asort
asorti
atan
@@ -87,11 +53,7 @@ awkprof
awkvars
bignum
bindtextdomain
-ch
cmd
-cmp
-cntrl
-comp
compl
coprocess
coprocesses
@@ -99,118 +61,71 @@ cos
datespec
dcgettext
dcngettext
-dd
-ddd
-dddddd
denom
-desc
dev
-djgpp
dl
ds
-eB
-eS
-eW
-ea
-eb
-ec
+dsensitivity
+dset
ef
-egid
egrep
en
er
errno
-euid
ev
-ew
exp
expr
-ext
-ey
-fclose
+fGNU
+fO
fd
fflush
fmt
-foo
-formfeed
fs
func
+gawkbug
gen
gensub
-getegid
-geteuid
-getgid
-getgroups
getline
-getpgrp
-getpid
-getppid
gettext
-getuid
-gid
gmo
-gmp
gsub
-hhhh
html
https
-ind
inet
int
intdiv
isarray
-lang
lc
localizable
logdir
lport
lq
lshift
-metacharacter
-metacharacters
-min
-mingw
+mkbool
mktime
monde
-mpfr
multibyte
myfunc
myhome
-myprog
nextfile
nlines
nmap
-nonsignificant
-nonwhitespace
num
op
org
-os
-parsedebug
patsplit
-pclose
-pgrpid
-pid
posix
-ppid
pre
-prec
printf
-pty
-punct
rand
regex
regexp
resplit
rhost
roundTiesToEven
-roundTowardNegative
-roundTowardPositive
-roundTowardZero
rport
rq
rshift
-rw
sed
seps
sprintf
@@ -223,7 +138,6 @@ strftime
strnum
strtod
strtonum
-subarray
substr
systime
tcp
@@ -231,17 +145,11 @@ tolower
toupper
typeof
udp
-uid
uintmax
usleep
usr
utc
val
var
-vars
-vms
-wr
www
-xdigit
xor
-zero'th