aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2014-09-22 19:50:44 +0300
committerArnold D. Robbins <arnold@skeeve.com>2014-09-22 19:50:44 +0300
commit6641754c13e38dd6198832f23aa2be4b4546b324 (patch)
treef25aa9e9dcb57f97167eba8452a69d8de2f3c97c
parentce2747c81b98b70e75ec399c8bdc6c09308380d3 (diff)
parent06e16db227de0422f33b5f83817df55340f11846 (diff)
downloadegawk-6641754c13e38dd6198832f23aa2be4b4546b324.tar.gz
egawk-6641754c13e38dd6198832f23aa2be4b4546b324.tar.bz2
egawk-6641754c13e38dd6198832f23aa2be4b4546b324.zip
Merge branch 'gawk-4.1-stable'
-rw-r--r--awklib/eg/lib/ctime.awk3
-rw-r--r--awklib/eg/lib/ftrans.awk2
-rw-r--r--awklib/eg/lib/gettime.awk2
-rw-r--r--awklib/eg/lib/quicksort.awk2
-rw-r--r--awklib/eg/lib/strtonum.awk2
-rw-r--r--awklib/eg/misc/arraymax.awk10
-rw-r--r--awklib/eg/misc/findpat.awk13
-rw-r--r--doc/ChangeLog4
-rw-r--r--doc/gawk.info1734
-rw-r--r--doc/gawk.texi704
-rw-r--r--doc/gawktexi.in636
11 files changed, 1597 insertions, 1515 deletions
diff --git a/awklib/eg/lib/ctime.awk b/awklib/eg/lib/ctime.awk
index ca750370..cea25b7a 100644
--- a/awklib/eg/lib/ctime.awk
+++ b/awklib/eg/lib/ctime.awk
@@ -4,7 +4,8 @@
function ctime(ts, format)
{
- format = PROCINFO["strftime"]
+ format = "%a %b %e %H:%M:%S %Z %Y"
+
if (ts == 0)
ts = systime() # use current time as default
return strftime(format, ts)
diff --git a/awklib/eg/lib/ftrans.awk b/awklib/eg/lib/ftrans.awk
index 1709ac82..2fec27ef 100644
--- a/awklib/eg/lib/ftrans.awk
+++ b/awklib/eg/lib/ftrans.awk
@@ -12,4 +12,4 @@ FNR == 1 {
beginfile(FILENAME)
}
-END { endfile(_filename_) }
+END { endfile(_filename_) }
diff --git a/awklib/eg/lib/gettime.awk b/awklib/eg/lib/gettime.awk
index 3da9c8ab..4cb56330 100644
--- a/awklib/eg/lib/gettime.awk
+++ b/awklib/eg/lib/gettime.awk
@@ -31,7 +31,7 @@ function getlocaltime(time, ret, now, i)
now = systime()
# return date(1)-style output
- ret = strftime(PROCINFO["strftime"], now)
+ ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
# clear out target array
delete time
diff --git a/awklib/eg/lib/quicksort.awk b/awklib/eg/lib/quicksort.awk
index 43357ac6..3ba2d6e3 100644
--- a/awklib/eg/lib/quicksort.awk
+++ b/awklib/eg/lib/quicksort.awk
@@ -26,7 +26,7 @@ function quicksort(data, left, right, less_than, i, last)
# quicksort_swap --- helper function for quicksort, should really be inline
-function quicksort_swap(data, i, j, temp)
+function quicksort_swap(data, i, j, temp)
{
temp = data[i]
data[i] = data[j]
diff --git a/awklib/eg/lib/strtonum.awk b/awklib/eg/lib/strtonum.awk
index f82c89c5..cd56a449 100644
--- a/awklib/eg/lib/strtonum.awk
+++ b/awklib/eg/lib/strtonum.awk
@@ -51,7 +51,7 @@ function mystrtonum(str, ret, n, i, k, c)
# a[5] = "123.45"
# a[6] = "1.e3"
# a[7] = "1.32"
-# a[7] = "1.32E2"
+# a[8] = "1.32E2"
#
# for (i = 1; i in a; i++)
# print a[i], strtonum(a[i]), mystrtonum(a[i])
diff --git a/awklib/eg/misc/arraymax.awk b/awklib/eg/misc/arraymax.awk
index 20dd1768..64197f56 100644
--- a/awklib/eg/misc/arraymax.awk
+++ b/awklib/eg/misc/arraymax.awk
@@ -1,10 +1,10 @@
{
- if ($1 > max)
- max = $1
- arr[$1] = $0
+ if ($1 > max)
+ max = $1
+ arr[$1] = $0
}
END {
- for (x = 1; x <= max; x++)
- print arr[x]
+ for (x = 1; x <= max; x++)
+ print arr[x]
}
diff --git a/awklib/eg/misc/findpat.awk b/awklib/eg/misc/findpat.awk
index e9bef9ea..9d799434 100644
--- a/awklib/eg/misc/findpat.awk
+++ b/awklib/eg/misc/findpat.awk
@@ -1,10 +1,9 @@
{
- if ($1 == "FIND")
- regex = $2
- else {
- where = match($0, regex)
- if (where != 0)
- print "Match of", regex, "found at",
- where, "in", $0
+ if ($1 == "FIND")
+ regex = $2
+ else {
+ where = match($0, regex)
+ if (where != 0)
+ print "Match of", regex, "found at", where, "in", $0
}
}
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 67462523..c0ecbeaf 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,7 @@
+2014-09-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktex.in: Continue fixes after reading through the MS.
+
2014-09-21 Arnold D. Robbins <arnold@skeeve.com>
* gawktex.in: Start on fixes after reading through the MS.
diff --git a/doc/gawk.info b/doc/gawk.info
index 2f10b35c..dc347a0f 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -350,12 +350,12 @@ entitled "GNU Free Documentation License".
elements.
* Controlling Scanning:: Controlling the order in which arrays
are scanned.
-* Delete:: The `delete' statement removes an
- element from an array.
* Numeric Array Subscripts:: How to use numbers as subscripts in
`awk'.
* Uninitialized Subscripts:: Using Uninitialized variables as
subscripts.
+* Delete:: The `delete' statement removes an
+ element from an array.
* Multidimensional:: Emulating multidimensional arrays in
`awk'.
* Multiscanning:: Scanning multidimensional arrays.
@@ -7354,7 +7354,7 @@ and:
are exactly equivalent. One rather bizarre consequence of this rule is
that the following Boolean expression is valid, but does not do what
-the user probably intended:
+its author probably intended:
# Note that /foo/ is on the left of the ~
if (/foo/ ~ $1) print "found foo"
@@ -7380,9 +7380,10 @@ of the `match()' function, and as the third argument of the `split()'
and `patsplit()' functions (*note String Functions::). Modern
implementations of `awk', including `gawk', allow the third argument of
`split()' to be a regexp constant, but some older implementations do
-not. (d.c.) This can lead to confusion when attempting to use regexp
-constants as arguments to user-defined functions (*note User-defined::).
-For example:
+not. (d.c.) Because some built-in functions accept regexp constants
+as arguments, it can be confusing when attempting to use regexp
+constants as arguments to user-defined functions (*note
+User-defined::). For example:
function mysub(pat, repl, str, global)
{
@@ -7446,7 +7447,7 @@ variable's current value. Variables are given new values with
"assignment operators", "increment operators", and "decrement
operators". *Note Assignment Ops::. In addition, the `sub()' and
`gsub()' functions can change a variable's value, and the `match()',
-`patsplit()' and `split()' functions can change the contents of their
+`split()' and `patsplit()' functions can change the contents of their
array parameters. *Note String Functions::.
A few variables have special built-in meanings, such as `FS' (the
@@ -7461,8 +7462,8 @@ uppercase.
The kind of value a variable holds can change over the life of a
program. By default, variables are initialized to the empty string,
which is zero if converted to a number. There is no need to explicitly
-"initialize" a variable in `awk', which is what you would do in C and
-in most other traditional languages.
+initialize a variable in `awk', which is what you would do in C and in
+most other traditional languages.

File: gawk.info, Node: Assignment Options, Prev: Using Variables, Up: Variables
@@ -7637,7 +7638,7 @@ difference in behavior, on a GNU/Linux system:
The `en_DK.utf-8' locale is for English in Denmark, where the comma
acts as the decimal point separator. In the normal `"C"' locale, `gawk'
-treats `4,321' as `4', while in the Danish locale, it's treated as the
+treats `4,321' as 4, while in the Danish locale, it's treated as the
full number, 4.321.
Some earlier versions of `gawk' fully complied with this aspect of
@@ -8020,8 +8021,7 @@ A workaround is:
awk '/[=]=/' /dev/null
- `gawk' does not have this problem; BWK `awk' and `mawk' also do not
-(*note Other Versions::).
+ `gawk' does not have this problem; BWK `awk' and `mawk' also do not.

File: gawk.info, Node: Increment Ops, Prev: Assignment Ops, Up: All Operators
@@ -8198,9 +8198,9 @@ determine how they are compared. Variable typing follows these rules:
STRING attribute.
* Fields, `getline' input, `FILENAME', `ARGV' elements, `ENVIRON'
- elements, and the elements of an array created by `patsplit()',
- `split()' and `match()' that are numeric strings have the STRNUM
- attribute. Otherwise, they have the STRING attribute.
+ elements, and the elements of an array created by `match()',
+ `split()' and `patsplit()' that are numeric strings have the
+ STRNUM attribute. Otherwise, they have the STRING attribute.
Uninitialized variables also have the STRNUM attribute.
* Attributes propagate across assignments but are not changed by any
@@ -8250,21 +8250,21 @@ In contrast, the eight characters `" +3.14"' appearing in program text
comprise a string constant. The following examples print `1' when the
comparison between the two different constants is true, `0' otherwise:
- $ echo ' +3.14' | gawk '{ print $0 == " +3.14" }' True
+ $ echo ' +3.14' | awk '{ print($0 == " +3.14") }' True
-| 1
- $ echo ' +3.14' | gawk '{ print $0 == "+3.14" }' False
+ $ echo ' +3.14' | awk '{ print($0 == "+3.14") }' False
-| 0
- $ echo ' +3.14' | gawk '{ print $0 == "3.14" }' False
+ $ echo ' +3.14' | awk '{ print($0 == "3.14") }' False
-| 0
- $ echo ' +3.14' | gawk '{ print $0 == 3.14 }' True
+ $ echo ' +3.14' | awk '{ print($0 == 3.14) }' True
-| 1
- $ echo ' +3.14' | gawk '{ print $1 == " +3.14" }' False
+ $ echo ' +3.14' | awk '{ print($1 == " +3.14") }' False
-| 0
- $ echo ' +3.14' | gawk '{ print $1 == "+3.14" }' True
+ $ echo ' +3.14' | awk '{ print($1 == "+3.14") }' True
-| 1
- $ echo ' +3.14' | gawk '{ print $1 == "3.14" }' False
+ $ echo ' +3.14' | awk '{ print($1 == "3.14") }' False
-| 0
- $ echo ' +3.14' | gawk '{ print $1 == 3.14 }' True
+ $ echo ' +3.14' | awk '{ print($1 == 3.14) }' True
-| 1

@@ -8317,8 +8317,9 @@ Unless `b' happens to be zero or the null string, the `if' part of the
test always succeeds. Because the operators are so similar, this kind
of error is very difficult to spot when scanning the source code.
- The following list of expressions illustrates the kind of comparison
-`gawk' performs, as well as what the result of the comparison is:
+ The following list of expressions illustrates the kinds of
+comparisons `awk' performs, as well as what the result of each
+comparison is:
`1.5 <= 2.0'
numeric comparison (true)
@@ -8369,9 +8370,9 @@ regexp constant (`/'...`/') or an ordinary expression. In the latter
case, the value of the expression as a string is used as a dynamic
regexp (*note Regexp Usage::; also *note Computed Regexps::).
- In modern implementations of `awk', a constant regular expression in
-slashes by itself is also an expression. The regexp `/REGEXP/' is an
-abbreviation for the following comparison expression:
+ A constant regular expression in slashes by itself is also an
+expression. The regexp `/REGEXP/' is an abbreviation for the following
+comparison expression:
$0 ~ /REGEXP/
@@ -8387,9 +8388,9 @@ File: gawk.info, Node: POSIX String Comparison, Prev: Comparison Operators, U
The POSIX standard says that string comparison is performed based on
the locale's "collating order". This is the order in which characters
-sort, as defined by the locale (for more discussion, *note Ranges and
-Locales::). This order is usually very different from the results
-obtained when doing straight character-by-character comparison.(1)
+sort, as defined by the locale (for more discussion, *note Locales::).
+This order is usually very different from the results obtained when
+doing straight character-by-character comparison.(1)
Because this behavior differs considerably from existing practice,
`gawk' only implements it when in POSIX mode (*note Options::). Here
@@ -8446,13 +8447,15 @@ Boolean operators are:
`BOOLEAN1 || BOOLEAN2'
True if at least one of BOOLEAN1 or BOOLEAN2 is true. For
example, the following statement prints all records in the input
- that contain _either_ `edu' or `li' or both:
+ that contain _either_ `edu' or `li':
if ($0 ~ /edu/ || $0 ~ /li/) print
The subexpression BOOLEAN2 is evaluated only if BOOLEAN1 is false.
This can make a difference when BOOLEAN2 contains expressions that
- have side effects.
+ have side effects. (Thus, this test never really distinguishes
+ records that contain both `edu' and `li'--as soon as `edu' is
+ matched, the full test succeeds.)
`! BOOLEAN'
True if BOOLEAN is false. For example, the following program
@@ -8460,7 +8463,7 @@ Boolean operators are:
variable is not defined:
BEGIN { if (! ("HOME" in ENVIRON))
- print "no home!" }
+ print "no home!" }
(The `in' operator is described in *note Reference to Elements::.)
@@ -8758,8 +8761,8 @@ system about the local character set and language. The ISO C standard
defines a default `"C"' locale, which is an environment that is typical
of what many C programmers are used to.
- Once upon a time, the locale setting used to affect regexp matching
-(*note Ranges and Locales::), but this is no longer true.
+ Once upon a time, the locale setting used to affect regexp matching,
+but this is no longer true (*note Ranges and Locales::).
Locales can affect record splitting. For the normal case of `RS =
"\n"', the locale is largely irrelevant. For other single-character
@@ -8811,10 +8814,11 @@ File: gawk.info, Node: Expressions Summary, Prev: Locales, Up: Expressions
* `awk' provides the usual arithmetic operators (addition,
subtraction, multiplication, division, modulus), and unary plus
and minus. It also provides comparison operators, boolean
- operators, and regexp matching operators. String concatenation is
- accomplished by placing two expressions next to each other; there
- is no explicit operator. The three-operand `?:' operator provides
- an "if-else" test within expressions.
+ operators, array membership testing, and regexp matching
+ operators. String concatenation is accomplished by placing two
+ expressions next to each other; there is no explicit operator.
+ The three-operand `?:' operator provides an "if-else" test within
+ expressions.
* Assignment operators provide convenient shorthands for common
arithmetic operations.
@@ -8822,8 +8826,8 @@ File: gawk.info, Node: Expressions Summary, Prev: Locales, Up: Expressions
* In `awk', a value is considered to be true if it is non-zero _or_
non-null. Otherwise, the value is false.
- * A value's type is set upon each assignment and may change over its
- lifetime. The type determines how it behaves in comparisons
+ * A variable's type is set upon each assignment and may change over
+ its lifetime. The type determines how it behaves in comparisons
(string or numeric).
* Function calls return a value which may be used as part of a larger
@@ -8894,7 +8898,7 @@ summary of the types of `awk' patterns:
number) or non-null (if a string). (*Note Expression Patterns::.)
`BEGPAT, ENDPAT'
- A pair of patterns separated by a comma, specifying a range of
+ A pair of patterns separated by a comma, specifying a "range" of
records. The range includes both the initial record that matches
BEGPAT and the final record that matches ENDPAT. (*Note Ranges::.)
@@ -9105,7 +9109,7 @@ input is read. For example:
$ awk '
> BEGIN { print "Analysis of \"li\"" }
- > /li/ { ++n }
+ > /li/ { ++n }
> END { print "\"li\" appears in", n, "records." }' mail-list
-| Analysis of "li"
-| "li" appears in 4 records.
@@ -9174,9 +9178,10 @@ and `NF' were _undefined_ inside an `END' rule. The POSIX standard
specifies that `NF' is available in an `END' rule. It contains the
number of fields from the last input record. Most probably due to an
oversight, the standard does not say that `$0' is also preserved,
-although logically one would think that it should be. In fact, `gawk'
-does preserve the value of `$0' for use in `END' rules. Be aware,
-however, that BWK `awk', and possibly other implementations, do not.
+although logically one would think that it should be. In fact, all of
+BWK `awk', `mawk', and `gawk' preserve the value of `$0' for use in
+`END' rules. Be aware, however, that some other implementations and
+many older versions of Unix `awk' do not.
The third point follows from the first two. The meaning of `print'
inside a `BEGIN' or `END' rule is the same as always: `print $0'. If
@@ -9245,9 +9250,9 @@ makes it possible to catch and process I/O errors at the level of the
`awk' program.
The `next' statement (*note Next Statement::) is not allowed inside
-either a `BEGINFILE' or and `ENDFILE' rule. The `nextfile' statement
-(*note Nextfile Statement::) is allowed only inside a `BEGINFILE' rule,
-but not inside an `ENDFILE' rule.
+either a `BEGINFILE' or an `ENDFILE' rule. The `nextfile' statement is
+allowed only inside a `BEGINFILE' rule, but not inside an `ENDFILE'
+rule.
The `getline' statement (*note Getline::) is restricted inside both
`BEGINFILE' and `ENDFILE': only redirected forms of `getline' are
@@ -9282,9 +9287,9 @@ hold a pattern that the `awk' program searches for. There are two ways
to get the value of the shell variable into the body of the `awk'
program.
- The most common method is to use shell quoting to substitute the
-variable's value into the program inside the script. For example,
-consider the following program:
+ A common method is to use shell quoting to substitute the variable's
+value into the program inside the script. For example, consider the
+following program:
printf "Enter search pattern: "
read pattern
@@ -9475,18 +9480,18 @@ thing the `while' statement does is test the CONDITION. If the
CONDITION is true, it executes the statement BODY. (The CONDITION is
true when the value is not zero and not a null string.) After BODY has
been executed, CONDITION is tested again, and if it is still true, BODY
-is executed again. This process repeats until the CONDITION is no
-longer true. If the CONDITION is initially false, the body of the loop
-is never executed and `awk' continues with the statement following the
-loop. This example prints the first three fields of each record, one
-per line:
-
- awk '{
- i = 1
- while (i <= 3) {
- print $i
- i++
- }
+executes again. This process repeats until the CONDITION is no longer
+true. If the CONDITION is initially false, the body of the loop never
+executes and `awk' continues with the statement following the loop.
+This example prints the first three fields of each record, one per line:
+
+ awk '
+ {
+ i = 1
+ while (i <= 3) {
+ print $i
+ i++
+ }
}' inventory-shipped
The body of this loop is a compound statement enclosed in braces,
@@ -9517,22 +9522,22 @@ the CONDITION is true. It looks like this:
BODY
while (CONDITION)
- Even if the CONDITION is false at the start, the BODY is executed at
+ Even if the CONDITION is false at the start, the BODY executes at
least once (and only once, unless executing BODY makes CONDITION true).
Contrast this with the corresponding `while' statement:
while (CONDITION)
- BODY
+ BODY
This statement does not execute BODY even once if the CONDITION is
false to begin with. The following is an example of a `do' statement:
{
- i = 1
- do {
- print $0
- i++
- } while (i <= 10)
+ i = 1
+ do {
+ print $0
+ i++
+ } while (i <= 10)
}
This program prints each input record 10 times. However, it isn't a
@@ -9561,9 +9566,10 @@ INCREMENT. Typically, INITIALIZATION sets a variable to either zero or
one, INCREMENT adds one to it, and CONDITION compares it against the
desired number of iterations. For example:
- awk '{
- for (i = 1; i <= 3; i++)
- print $i
+ awk '
+ {
+ for (i = 1; i <= 3; i++)
+ print $i
}' inventory-shipped
This prints the first three fields of each input record, with one field
@@ -9587,7 +9593,7 @@ whatsoever. For example, the following statement prints all the powers
of two between 1 and 100:
for (i = 1; i <= 100; i *= 2)
- print i
+ print i
If there is nothing to be done, any of the three expressions in the
parentheses following the `for' keyword may be omitted. Thus,
@@ -9845,11 +9851,11 @@ rules. *Note BEGINFILE/ENDFILE::.
According to the POSIX standard, the behavior is undefined if the
`next' statement is used in a `BEGIN' or `END' rule. `gawk' treats it
-as a syntax error. Although POSIX permits it, most other `awk'
-implementations don't allow the `next' statement inside function bodies
-(*note User-defined::). Just as with any other `next' statement, a
-`next' statement inside a function body reads the next record and
-starts processing it with the first rule in the program.
+as a syntax error. Although POSIX does not disallow it, most other
+`awk' implementations don't allow the `next' statement inside function
+bodies (*note User-defined::). Just as with any other `next'
+statement, a `next' statement inside a function body reads the next
+record and starts processing it with the first rule in the program.

File: gawk.info, Node: Nextfile Statement, Next: Exit Statement, Prev: Next Statement, Up: Statements
@@ -9893,17 +9899,17 @@ files, pipes, and coprocesses that are opened with redirections. It is
not related to the main processing that `awk' does with the files
listed in `ARGV'.
- NOTE: For many years, `nextfile' was a `gawk' extension. As of
+ NOTE: For many years, `nextfile' was a common extension. In
September, 2012, it was accepted for inclusion into the POSIX
standard. See the Austin Group website
(http://austingroupbugs.net/view.php?id=607).
- The current version of BWK `awk', and `mawk' (*note Other
-Versions::) also support `nextfile'. However, they don't allow the
-`nextfile' statement inside function bodies (*note User-defined::).
-`gawk' does; a `nextfile' inside a function body reads the next record
-and starts processing it with the first rule in the program, just as
-any other `nextfile' statement.
+ The current version of BWK `awk', and `mawk' also support
+`nextfile'. However, they don't allow the `nextfile' statement inside
+function bodies (*note User-defined::). `gawk' does; a `nextfile'
+inside a function body reads the next record and starts processing it
+with the first rule in the program, just as any other `nextfile'
+statement.

File: gawk.info, Node: Exit Statement, Prev: Nextfile Statement, Up: Statements
@@ -9927,8 +9933,8 @@ stop immediately.
An `exit' statement that is not part of a `BEGIN' or `END' rule
stops the execution of any further automatic rules for the current
record, skips reading any remaining input records, and executes the
-`END' rule if there is one. Any `ENDFILE' rules are also skipped; they
-are not executed.
+`END' rule if there is one. `gawk' also skips any `ENDFILE' rules;
+they do not execute.
In such a case, if you don't want the `END' rule to do its job, set
a variable to nonzero before the `exit' statement and check that
@@ -10015,7 +10021,7 @@ description of each variable.)
use binary I/O. Any other string value is treated the same as
`"rw"', but causes `gawk' to generate a warning message.
`BINMODE' is described in more detail in *note PC Using::. `mawk'
- *note Other Versions::), also supports this variable, but only
+ (*note Other Versions::), also supports this variable, but only
using numeric values.
``CONVFMT''
@@ -10098,9 +10104,8 @@ description of each variable.)
printing with the `print' statement. It works by being passed as
the first argument to the `sprintf()' function (*note String
Functions::). Its default value is `"%.6g"'. Earlier versions of
- `awk' also used `OFMT' to specify the format for converting
- numbers to strings in general expressions; this is now done by
- `CONVFMT'.
+ `awk' used `OFMT' to specify the format for converting numbers to
+ strings in general expressions; this is now done by `CONVFMT'.
`OFS'
This is the output field separator (*note Output Separators::).
@@ -10209,8 +10214,8 @@ Options::), they are not special.
the command line.
While you can change the value of `ARGIND' within your `awk'
- program, `gawk' automatically sets it to a new value when the next
- file is opened.
+ program, `gawk' automatically sets it to a new value when it opens
+ the next file.
`ENVIRON'
An associative array containing the values of the environment.
@@ -10260,9 +10265,9 @@ Options::), they are not special.
Getline::) inside a `BEGIN' rule can give `FILENAME' a value.
`FNR'
- The current record number in the current file. `FNR' is
- incremented each time a new record is read (*note Records::). It
- is reinitialized to zero each time a new input file is started.
+ The current record number in the current file. `awk' increments
+ `FNR' each time it reads a new record (*note Records::). `awk'
+ resets `FNR' to zero each time it starts a new input file.
`NF'
The number of fields in the current input record. `NF' is set
@@ -10286,8 +10291,8 @@ Options::), they are not special.
`NR'
The number of input records `awk' has processed since the
- beginning of the program's execution (*note Records::). `NR' is
- incremented each time a new record is read.
+ beginning of the program's execution (*note Records::). `awk'
+ increments `NR' each time it reads a new record.
`PROCINFO #'
The elements of this array provide access to information about the
@@ -10352,7 +10357,7 @@ Options::), they are not special.
`PROCINFO["sorted_in"]'
If this element exists in `PROCINFO', its value controls the
- order in which array indices will be processed by `for (INDEX
+ order in which array indices will be processed by `for (INDX
in ARRAY)' loops. Since this is an advanced feature, we
defer the full description until later; see *note Scanning an
Array::.
@@ -10370,7 +10375,7 @@ Options::), they are not special.
The following additional elements in the array are available to
provide information about the MPFR and GMP libraries if your
- version of `gawk' supports arbitrary precision numbers (*note
+ version of `gawk' supports arbitrary precision arithmetic (*note
Arbitrary Precision Arithmetic::):
`PROCINFO["mpfr_version"]'
@@ -10403,14 +10408,14 @@ Options::), they are not special.
The `PROCINFO' array has the following additional uses:
- * It may be used to cause coprocesses to communicate over
- pseudo-ttys instead of through two-way pipes; this is
- discussed further in *note Two-way I/O::.
-
* It may be used to provide a timeout when reading from any
open input file, pipe, or coprocess. *Note Read Timeout::,
for more information.
+ * It may be used to cause coprocesses to communicate over
+ pseudo-ttys instead of through two-way pipes; this is
+ discussed further in *note Two-way I/O::.
+
`RLENGTH'
The length of the substring matched by the `match()' function
(*note String Functions::). `RLENGTH' is set by invoking the
@@ -10599,6 +10604,12 @@ Because `-q' is not a valid `gawk' option, it and the following `-v'
are passed on to the `awk' program. (*Note Getopt Function::, for an
`awk' library function that parses command-line options.)
+ When designing your program, you should choose options that don't
+conflict with `gawk''s, since it will process any options that it
+accepts before passing the rest of the command line on to your program.
+Using `#!' with the `-E' option may help (*note Executable Scripts::,
+and *note Options::).
+

File: gawk.info, Node: Pattern Action Summary, Prev: Built-in Variables, Up: Patterns and Actions
@@ -10628,8 +10639,8 @@ File: gawk.info, Node: Pattern Action Summary, Prev: Built-in Variables, Up:
* The control statements in `awk' are `if'-`else', `while', `for',
and `do'-`while'. `gawk' adds the `switch' statement. There are
- two flavors of `for' statement: one for for performing general
- looping, and the other iterating through an array.
+ two flavors of `for' statement: one for performing general
+ looping, and the other for iterating through an array.
* `break' and `continue' let you exit early or start the next
iteration of a loop (or get out of a `switch').
@@ -10641,12 +10652,16 @@ File: gawk.info, Node: Pattern Action Summary, Prev: Built-in Variables, Up:
* The `exit' statement terminates your program. When executed from
an action (or function body) it transfers control to the `END'
statements. From an `END' statement body, it exits immediately.
- You may pass an optional numeric value to be used at `awk''s exit
+ You may pass an optional numeric value to be used as `awk''s exit
status.
* Some built-in variables provide control over `awk', mainly for I/O.
Other variables convey information from `awk' to your program.
+ * `ARGC' and `ARGV' make the command-line arguments available to
+ your program. Manipulating them from a `BEGIN' rule lets you
+ control how `awk' will process the provided data files.
+

File: gawk.info, Node: Arrays, Next: Functions, Prev: Patterns and Actions, Up: Top
@@ -10666,26 +10681,21 @@ about array usage. The major node moves on to discuss `gawk''s facility
for sorting arrays, and ends with a brief description of `gawk''s
ability to support true arrays of arrays.
- `awk' maintains a single set of names that may be used for naming
-variables, arrays, and functions (*note User-defined::). Thus, you
-cannot have a variable and an array with the same name in the same
-`awk' program.
-
* Menu:
* Array Basics:: The basics of arrays.
-* Delete:: The `delete' statement removes an element
- from an array.
* Numeric Array Subscripts:: How to use numbers as subscripts in
`awk'.
* Uninitialized Subscripts:: Using Uninitialized variables as subscripts.
+* Delete:: The `delete' statement removes an element
+ from an array.
* Multidimensional:: Emulating multidimensional arrays in
`awk'.
* Arrays of Arrays:: True multidimensional arrays.
* Arrays Summary:: Summary of arrays.

-File: gawk.info, Node: Array Basics, Next: Delete, Up: Arrays
+File: gawk.info, Node: Array Basics, Next: Numeric Array Subscripts, Up: Arrays
8.1 The Basics of Arrays
========================
@@ -10904,14 +10914,14 @@ encountering repeated numbers, gaps, or lines that don't begin with a
number:
{
- if ($1 > max)
- max = $1
- arr[$1] = $0
+ if ($1 > max)
+ max = $1
+ arr[$1] = $0
}
END {
- for (x = 1; x <= max; x++)
- print arr[x]
+ for (x = 1; x <= max; x++)
+ print arr[x]
}
The first rule keeps track of the largest line number seen so far;
@@ -10939,9 +10949,9 @@ overrides the others. Gaps in the line numbers can be handled with an
easy improvement to the program's `END' rule, as follows:
END {
- for (x = 1; x <= max; x++)
- if (x in arr)
- print arr[x]
+ for (x = 1; x <= max; x++)
+ if (x in arr)
+ print arr[x]
}

@@ -10959,7 +10969,7 @@ lowest index up to the highest. This technique won't do the job in
has a special kind of `for' statement for scanning an array:
for (VAR in ARRAY)
- BODY
+ BODY
This loop executes BODY once for each index in ARRAY that the program
has previously used, with the variable VAR set to that index.
@@ -11016,7 +11026,7 @@ all `awk' versions do so. Consider this program, named `loopcheck.awk':
}
}
- Here is what happens when run with `gawk':
+ Here is what happens when run with `gawk' (and `mawk'):
$ gawk -f loopcheck.awk
-| here
@@ -11119,7 +11129,8 @@ available:
to run. Changing `PROCINFO["sorted_in"]' in the loop body does not
affect the loop. For example:
- $ gawk 'BEGIN {
+ $ gawk '
+ > BEGIN {
> a[4] = 4
> a[3] = 3
> for (i in a)
@@ -11127,7 +11138,8 @@ affect the loop. For example:
> }'
-| 4 4
-| 3 3
- $ gawk 'BEGIN {
+ $ gawk '
+ > BEGIN {
> PROCINFO["sorted_in"] = "@ind_str_asc"
> a[4] = 4
> a[3] = 3
@@ -11179,87 +11191,9 @@ ordering when the numeric values are equal ensures that `gawk' behaves
consistently across different environments.

-File: gawk.info, Node: Delete, Next: Numeric Array Subscripts, Prev: Array Basics, Up: Arrays
-
-8.2 The `delete' Statement
-==========================
-
-To remove an individual element of an array, use the `delete' statement:
-
- delete ARRAY[INDEX-EXPRESSION]
+File: gawk.info, Node: Numeric Array Subscripts, Next: Uninitialized Subscripts, Prev: Array Basics, Up: Arrays
- Once an array element has been deleted, any value the element once
-had is no longer available. It is as if the element had never been
-referred to or been given a value. The following is an example of
-deleting elements in an array:
-
- for (i in frequencies)
- delete frequencies[i]
-
-This example removes all the elements from the array `frequencies'.
-Once an element is deleted, a subsequent `for' statement to scan the
-array does not report that element and the `in' operator to check for
-the presence of that element returns zero (i.e., false):
-
- delete foo[4]
- if (4 in foo)
- print "This will never be printed"
-
- It is important to note that deleting an element is _not_ the same
-as assigning it a null value (the empty string, `""'). For example:
-
- foo[4] = ""
- if (4 in foo)
- print "This is printed, even though foo[4] is empty"
-
- It is not an error to delete an element that does not exist.
-However, if `--lint' is provided on the command line (*note Options::),
-`gawk' issues a warning message when an element that is not in the
-array is deleted.
-
- All the elements of an array may be deleted with a single statement
-by leaving off the subscript in the `delete' statement, as follows:
-
- delete ARRAY
-
- Using this version of the `delete' statement is about three times
-more efficient than the equivalent loop that deletes each element one
-at a time.
-
- NOTE: For many years, using `delete' without a subscript was a
- `gawk' extension. As of September, 2012, it was accepted for
- inclusion into the POSIX standard. See the Austin Group website
- (http://austingroupbugs.net/view.php?id=544). This form of the
- `delete' statement is also supported by BWK `awk' and `mawk', as
- well as by a number of other implementations (*note Other
- Versions::).
-
- The following statement provides a portable but nonobvious way to
-clear out an array:(1)
-
- split("", array)
-
- The `split()' function (*note String Functions::) clears out the
-target array first. This call asks it to split apart the null string.
-Because there is no data to split out, the function simply clears the
-array and then returns.
-
- CAUTION: Deleting an array does not change its type; you cannot
- delete an array and then use the array's name as a scalar (i.e., a
- regular variable). For example, the following does not work:
-
- a[1] = 3
- delete a
- a = 3
-
- ---------- Footnotes ----------
-
- (1) Thanks to Michael Brennan for pointing this out.
-
-
-File: gawk.info, Node: Numeric Array Subscripts, Next: Uninitialized Subscripts, Prev: Delete, Up: Arrays
-
-8.3 Using Numbers to Subscript Arrays
+8.2 Using Numbers to Subscript Arrays
=====================================
An important aspect to remember about arrays is that _array subscripts
@@ -11288,9 +11222,9 @@ two significant digits. This test fails, since `"12.15"' is different
from `"12.153"'.
According to the rules for conversions (*note Conversion::), integer
-values are always converted to strings as integers, no matter what the
-value of `CONVFMT' may happen to be. So the usual case of the
-following works:
+values always convert to strings as integers, no matter what the value
+of `CONVFMT' may happen to be. So the usual case of the following
+works:
for (i = 1; i <= maxsub; i++)
do something with array[i]
@@ -11303,14 +11237,14 @@ example, that `array[17]', `array[021]', and `array[0x11]' all refer to
the same element!
As with many things in `awk', the majority of the time things work
-as one would expect them to. But it is useful to have a precise
+as you would expect them to. But it is useful to have a precise
knowledge of the actual rules since they can sometimes have a subtle
effect on your programs.

-File: gawk.info, Node: Uninitialized Subscripts, Next: Multidimensional, Prev: Numeric Array Subscripts, Up: Arrays
+File: gawk.info, Node: Uninitialized Subscripts, Next: Delete, Prev: Numeric Array Subscripts, Up: Arrays
-8.4 Using Uninitialized Variables as Subscripts
+8.3 Using Uninitialized Variables as Subscripts
===============================================
Suppose it's necessary to write a program to print the input data in
@@ -11356,7 +11290,86 @@ string as a subscript if `--lint' is provided on the command line
(*note Options::).

-File: gawk.info, Node: Multidimensional, Next: Arrays of Arrays, Prev: Uninitialized Subscripts, Up: Arrays
+File: gawk.info, Node: Delete, Next: Multidimensional, Prev: Uninitialized Subscripts, Up: Arrays
+
+8.4 The `delete' Statement
+==========================
+
+To remove an individual element of an array, use the `delete' statement:
+
+ delete ARRAY[INDEX-EXPRESSION]
+
+ Once an array element has been deleted, any value the element once
+had is no longer available. It is as if the element had never been
+referred to or been given a value. The following is an example of
+deleting elements in an array:
+
+ for (i in frequencies)
+ delete frequencies[i]
+
+This example removes all the elements from the array `frequencies'.
+Once an element is deleted, a subsequent `for' statement to scan the
+array does not report that element and the `in' operator to check for
+the presence of that element returns zero (i.e., false):
+
+ delete foo[4]
+ if (4 in foo)
+ print "This will never be printed"
+
+ It is important to note that deleting an element is _not_ the same
+as assigning it a null value (the empty string, `""'). For example:
+
+ foo[4] = ""
+ if (4 in foo)
+ print "This is printed, even though foo[4] is empty"
+
+ It is not an error to delete an element that does not exist.
+However, if `--lint' is provided on the command line (*note Options::),
+`gawk' issues a warning message when an element that is not in the
+array is deleted.
+
+ All the elements of an array may be deleted with a single statement
+by leaving off the subscript in the `delete' statement, as follows:
+
+ delete ARRAY
+
+ Using this version of the `delete' statement is about three times
+more efficient than the equivalent loop that deletes each element one
+at a time.
+
+ This form of the `delete' statement is also supported by BWK `awk'
+and `mawk', as well as by a number of other implementations.
+
+ NOTE: For many years, using `delete' without a subscript was a
+ common extension. In September, 2012, it was accepted for
+ inclusion into the POSIX standard. See the Austin Group website
+ (http://austingroupbugs.net/view.php?id=544).
+
+ The following statement provides a portable but nonobvious way to
+clear out an array:(1)
+
+ split("", array)
+
+ The `split()' function (*note String Functions::) clears out the
+target array first. This call asks it to split apart the null string.
+Because there is no data to split out, the function simply clears the
+array and then returns.
+
+ CAUTION: Deleting all the elements from an array does not change
+ its type; you cannot clear an array and then use the array's name
+ as a scalar (i.e., a regular variable). For example, the following
+ does not work:
+
+ a[1] = 3
+ delete a
+ a = 3
+
+ ---------- Footnotes ----------
+
+ (1) Thanks to Michael Brennan for pointing this out.
+
+
+File: gawk.info, Node: Multidimensional, Next: Arrays of Arrays, Prev: Delete, Up: Arrays
8.5 Multidimensional Arrays
===========================
@@ -11368,7 +11381,7 @@ File: gawk.info, Node: Multidimensional, Next: Arrays of Arrays, Prev: Uninit
A multidimensional array is an array in which an element is
identified by a sequence of indices instead of a single index. For
example, a two-dimensional array requires two indices. The usual way
-(in most languages, including `awk') to refer to an element of a
+(in many languages, including `awk') to refer to an element of a
two-dimensional array named `grid' is with `grid[X,Y]'.
Multidimensional arrays are supported in `awk' through concatenation
@@ -11509,8 +11522,9 @@ multidimensional subscript). So the following is valid in `gawk':
Each subarray and the main array can be of different length. In
fact, the elements of an array or its subarray do not all have to have
the same type. This means that the main array and any of its subarrays
-can be non-rectangular, or jagged in structure. One can assign a scalar
-value to the index `4' of the main array `a':
+can be non-rectangular, or jagged in structure. You can assign a scalar
+value to the index `4' of the main array `a', even though `a[1]' is
+itself an array and not a scalar:
a[4] = "An element in a jagged array"
@@ -11571,6 +11585,8 @@ an array element is itself an array:
print array[i][j]
}
}
+ else
+ print array[i]
}
If the structure of a jagged array of arrays is known in advance,
@@ -11801,8 +11817,9 @@ brackets ([ ]):
user-defined function that can be used to obtain a random
non-negative integer less than N:
- function randint(n) {
- return int(n * rand())
+ function randint(n)
+ {
+ return int(n * rand())
}
The multiplication produces a random number greater than zero and
@@ -11819,8 +11836,7 @@ brackets ([ ]):
# Roll 3 six-sided dice and
# print total number of points.
{
- printf("%d points\n",
- roll(6)+roll(6)+roll(6))
+ printf("%d points\n", roll(6) + roll(6) + roll(6))
}
CAUTION: In most `awk' implementations, including `gawk',
@@ -11907,8 +11923,7 @@ with character indices, and not byte indices.
In the following list, optional parameters are enclosed in square
brackets ([ ]). Several functions perform string substitution; the
full discussion is provided in the description of the `sub()' function,
-which comes towards the end since the list is presented in alphabetic
-order.
+which comes towards the end since the list is presented alphabetically.
Those functions that are specific to `gawk' are marked with a pound
sign (`#'). They are not available in compatibility mode (*note
@@ -11941,7 +11956,8 @@ Options::):
When comparing strings, `IGNORECASE' affects the sorting (*note
Array Sorting Functions::). If the SOURCE array contains
subarrays as values (*note Arrays of Arrays::), they will come
- last, after all scalar values.
+ last, after all scalar values. Subarrays are _not_ recursively
+ sorted.
For example, if the contents of `a' are as follows:
@@ -12044,7 +12060,10 @@ Options::):
If FIND is not found, `index()' returns zero.
- It is a fatal error to use a regexp constant for FIND.
+ With BWK `awk' and `gawk', it is a fatal error to use a regexp
+ constant for FIND. Other implementations allow it, simply
+ treating the regexp constant as an expression meaning `$0 ~
+ /regexp/'.
`length('[STRING]`)'
Return the number of characters in STRING. If STRING is a number,
@@ -12112,13 +12131,12 @@ Options::):
For example:
{
- if ($1 == "FIND")
- regex = $2
- else {
- where = match($0, regex)
- if (where != 0)
- print "Match of", regex, "found at",
- where, "in", $0
+ if ($1 == "FIND")
+ regex = $2
+ else {
+ where = match($0, regex)
+ if (where != 0)
+ print "Match of", regex, "found at", where, "in", $0
}
}
@@ -12187,7 +12205,7 @@ Options::):
The `patsplit()' function splits strings into pieces in a manner
similar to the way input lines are split into fields using `FPAT'
- (*note Splitting By Content::.
+ (*note Splitting By Content::).
Before splitting the string, `patsplit()' deletes any previously
existing elements in the arrays ARRAY and SEPS.
@@ -12198,15 +12216,14 @@ Options::):
first piece is stored in `ARRAY[1]', the second piece in
`ARRAY[2]', and so forth. The string value of the third argument,
FIELDSEP, is a regexp describing where to split STRING (much as
- `FS' can be a regexp describing where to split input records;
- *note Regexp Field Splitting::). If FIELDSEP is omitted, the
- value of `FS' is used. `split()' returns the number of elements
- created. SEPS is a `gawk' extension with `SEPS[I]' being the
- separator string between `ARRAY[I]' and `ARRAY[I+1]'. If FIELDSEP
- is a single space then any leading whitespace goes into `SEPS[0]'
- and any trailing whitespace goes into `SEPS[N]' where N is the
- return value of `split()' (that is, the number of elements in
- ARRAY).
+ `FS' can be a regexp describing where to split input records). If
+ FIELDSEP is omitted, the value of `FS' is used. `split()' returns
+ the number of elements created. SEPS is a `gawk' extension with
+ `SEPS[I]' being the separator string between `ARRAY[I]' and
+ `ARRAY[I+1]'. If FIELDSEP is a single space then any leading
+ whitespace goes into `SEPS[0]' and any trailing whitespace goes
+ into `SEPS[N]' where N is the return value of `split()' (that is,
+ the number of elements in ARRAY).
The `split()' function splits strings into pieces in a manner
similar to the way input lines are split into fields. For example:
@@ -12412,6 +12429,17 @@ Options::):
Nonalphabetic characters are left unchanged. For example,
`toupper("MiXeD cAsE 123")' returns `"MIXED CASE 123"'.
+ Matching the Null String
+
+ In `awk', the `*' operator can match the null string. This is
+particularly important for the `sub()', `gsub()', and `gensub()'
+functions. For example:
+
+ $ echo abc | awk '{ gsub(/m*/, "X"); print }'
+ -| XaXbXcX
+
+Although this makes a certain amount of sense, it can be surprising.
+
---------- Footnotes ----------
(1) Unless you use the `--non-decimal-data' option, which isn't
@@ -12431,8 +12459,8 @@ File: gawk.info, Node: Gory Details, Up: String Functions
9.1.3.1 More About `\' and `&' with `sub()', `gsub()', and `gensub()'
.....................................................................
- CAUTION: This section has been known to cause headaches. You
- might want to skip it upon first reading.
+ CAUTION: This subsubsection has been reported to cause headaches.
+ You might want to skip it upon first reading.
When using `sub()', `gsub()', or `gensub()', and trying to get
literal backslashes and ampersands into the replacement text, you need
@@ -12566,17 +12594,6 @@ Table 9.4: Escape Sequence Processing For `gensub()'
and the special cases for `sub()' and `gsub()', we recommend the use of
`gawk' and `gensub()' when you have to do substitutions.
- Matching the Null String
-
- In `awk', the `*' operator can match the null string. This is
-particularly important for the `sub()', `gsub()', and `gensub()'
-functions. For example:
-
- $ echo abc | awk '{ gsub(/m*/, "X"); print }'
- -| XaXbXcX
-
-Although this makes a certain amount of sense, it can be surprising.
-
---------- Footnotes ----------
(1) This was rather naive of him, despite there being a note in this
@@ -12626,11 +12643,10 @@ parameters are enclosed in square brackets ([ ]):
function--`gawk' also buffers its output and the `fflush()'
function forces `gawk' to flush its buffers.
- `fflush()' was added to BWK `awk' in April of 1992. For two
- decades, it was not part of the POSIX standard. As of December,
- 2012, it was accepted for inclusion into the POSIX standard. See
- the Austin Group website
- (http://austingroupbugs.net/view.php?id=634).
+ Brian Kernighan added `fflush()' to his `awk' in April of 1992.
+ For two decades, it was a common extension. In December, 2012, it
+ was accepted for inclusion into the POSIX standard. See the
+ Austin Group website (http://austingroupbugs.net/view.php?id=634).
POSIX standardizes `fflush()' as follows: If there is no argument,
or if the argument is the null string (`""'), then `awk' flushes
@@ -12817,7 +12833,7 @@ enclosed in square brackets ([ ]):
If DATESPEC does not contain enough elements or if the resulting
time is out of range, `mktime()' returns -1.
-`strftime(' [FORMAT [`,' TIMESTAMP [`,' UTC-FLAG] ] ]`)'
+`strftime('[FORMAT [`,' TIMESTAMP [`,' UTC-FLAG] ] ]`)'
Format the time specified by TIMESTAMP based on the contents of
the FORMAT string and return the result. It is similar to the
function of the same name in ISO C. If UTC-FLAG is present and is
@@ -13032,7 +13048,7 @@ to the standard output and interprets the current time according to the
format specifiers in the string. For example:
$ date '+Today is %A, %B %d, %Y.'
- -| Today is Monday, May 05, 2014.
+ -| Today is Monday, September 22, 2014.
Here is the `gawk' version of the `date' utility. It has a shell
"wrapper" to handle the `-u' option, which requires that `date' run as
@@ -13121,12 +13137,13 @@ a given value.
Finally, two other common operations are to shift the bits left or
right. For example, if you have a bit string `10111001' and you shift
-it right by three bits, you end up with `00010111'.(1) If you start over
-again with `10111001' and shift it left by three bits, you end up with
-`11001000'. `gawk' provides built-in functions that implement the
-bitwise operations just described. They are:
+it right by three bits, you end up with `00010111'.(1) If you start
+over again with `10111001' and shift it left by three bits, you end up
+with `11001000'. The following list describes `gawk''s built-in
+functions that implement the bitwise operations. Optional parameters
+are enclosed in square brackets ([ ]):
-``and(V1, V2' [`,' ...]`)''
+``and('V1`,' V2 [`,' ...]`)''
Return the bitwise AND of the arguments. There must be at least
two.
@@ -13136,13 +13153,13 @@ bitwise operations just described. They are:
``lshift(VAL, COUNT)''
Return the value of VAL, shifted left by COUNT bits.
-``or(V1, V2' [`,' ...]`)''
+``or('V1`,' V2 [`,' ...]`)''
Return the bitwise OR of the arguments. There must be at least two.
``rshift(VAL, COUNT)''
Return the value of VAL, shifted right by COUNT bits.
-``xor(V1, V2' [`,' ...]`)''
+``xor('V1`,' V2 [`,' ...]`)''
Return the bitwise XOR of the arguments. There must be at least
two.
@@ -13227,7 +13244,7 @@ File: gawk.info, Node: Type Functions, Next: I18N Functions, Prev: Bitwise Fu
`gawk' provides a single function that lets you distinguish an array
from a scalar variable. This is necessary for writing code that
-traverses every element of an array of arrays. (*note Arrays of
+traverses every element of an array of arrays (*note Arrays of
Arrays::).
`isarray(X)'
@@ -13239,12 +13256,12 @@ itself an array or not. The second is inside the body of a
user-defined function (not discussed yet; *note User-defined::), to
test if a parameter is an array or not.
- Note, however, that using `isarray()' at the global level to test
-variables makes no sense. Since you are the one writing the program, you
-are supposed to know if your variables are arrays or not. And in fact,
-due to the way `gawk' works, if you pass the name of a variable that
-has not been previously used to `isarray()', `gawk' will end up turning
-it into a scalar.
+ NOTE: Using `isarray()' at the global level to test variables
+ makes no sense. Since you are the one writing the program, you are
+ supposed to know if your variables are arrays or not. And in fact,
+ due to the way `gawk' works, if you pass the name of a variable
+ that has not been previously used to `isarray()', `gawk' ends up
+ turning it into a scalar.

File: gawk.info, Node: I18N Functions, Prev: Type Functions, Up: Built-in
@@ -13455,7 +13472,7 @@ extra whitespace signifies the start of the local variable list):
function delarray(a, i)
{
for (i in a)
- delete a[i]
+ delete a[i]
}
When working with arrays, it is often necessary to delete all the
@@ -13463,8 +13480,8 @@ elements in an array and start over with a new list of elements (*note
Delete::). Instead of having to repeat this loop everywhere that you
need to clear out an array, your program can just call `delarray'.
(This guarantees portability. The use of `delete ARRAY' to delete the
-contents of an entire array is a recent(1) addition to the POSIX
-standard.)
+contents of an entire array is a relatively recent(1) addition to the
+POSIX standard.)
The following is an example of a recursive function. It takes a
string as an input parameter and returns the string in backwards order.
@@ -13487,7 +13504,7 @@ way:
> gawk -e '{ print rev($0) }' -f rev.awk
-| !cinaP t'noD
- The C `ctime()' function takes a timestamp and returns it in a
+ The C `ctime()' function takes a timestamp and returns it as a
string, formatted in a well-known fashion. The following example uses
the built-in `strftime()' function (*note Time Functions::) to create
an `awk' version of `ctime()':
@@ -13498,12 +13515,18 @@ an `awk' version of `ctime()':
function ctime(ts, format)
{
- format = PROCINFO["strftime"]
+ format = "%a %b %e %H:%M:%S %Z %Y"
+
if (ts == 0)
ts = systime() # use current time as default
return strftime(format, ts)
}
+ You might think that `ctime()' could use `PROCINFO["strftime"]' for
+its format string. That would be a mistake, since `ctime()' is supposed
+to return the time formatted in a standard fashion, and user-level code
+could have changed `PROCINFO["strftime"]'.
+
---------- Footnotes ----------
(1) Late in 2012.
@@ -14045,7 +14068,7 @@ mechanism allows you to sort arbitrary data in an arbitrary fashion.
# quicksort_swap --- helper function for quicksort, should really be inline
- function quicksort_swap(data, i, j, temp)
+ function quicksort_swap(data, i, j, temp)
{
temp = data[i]
data[i] = data[j]
@@ -14180,11 +14203,12 @@ File: gawk.info, Node: Functions Summary, Prev: Indirect Calls, Up: Functions
functions.
* POSIX `awk' provides three kinds of built-in functions: numeric,
- string, and I/O. `gawk' provides functions that work with values
- representing time, do bit manipulation, sort arrays, and
- internationalize and localize programs. `gawk' also provides
- several extensions to some of standard functions, typically in the
- form of additional arguments.
+ string, and I/O. `gawk' provides functions that sort arrays, work
+ with values representing time, do bit manipulation, determine
+ variable type (array vs. scalar), and internationalize and
+ localize programs. `gawk' also provides several extensions to
+ some of standard functions, typically in the form of additional
+ arguments.
* Functions accept zero or more arguments and return a value. The
expressions that provide the argument values are completely
@@ -14369,8 +14393,9 @@ program, leading to bugs that are very difficult to track down:
function lib_func(x, y, l1, l2)
{
...
- USE VARIABLE some_var # some_var should be local
- ... # but is not by oversight
+ # some_var should be local but by oversight is not
+ USE VARIABLE some_var
+ ...
}
A different convention, common in the Tcl community, is to use a
@@ -14478,7 +14503,7 @@ versions of `awk':
# a[5] = "123.45"
# a[6] = "1.e3"
# a[7] = "1.32"
- # a[7] = "1.32E2"
+ # a[8] = "1.32E2"
#
# for (i = 1; i in a; i++)
# print a[i], strtonum(a[i]), mystrtonum(a[i])
@@ -14487,9 +14512,11 @@ versions of `awk':
The function first looks for C-style octal numbers (base 8). If the
input string matches a regular expression describing octal numbers,
then `mystrtonum()' loops through each character in the string. It
-sets `k' to the index in `"01234567"' of the current octal digit.
-Since the return value is one-based, the `k--' adjusts `k' so it can be
-used in computing the return value.
+sets `k' to the index in `"1234567"' of the current octal digit. The
+return value will either be the same number as the digit, or zero if
+the character is not there, which will be true for a `0'. This is
+safe, since the regexp test in the `if' ensures that only octal values
+are converted.
Similar logic applies to the code that checks for and converts a
hexadecimal value, which starts with `0x' or `0X'. The use of
@@ -14515,7 +14542,7 @@ condition or set of conditions is true. Before proceeding with a
particular computation, you make a statement about what you believe to
be the case. Such a statement is known as an "assertion". The C
language provides an `<assert.h>' header file and corresponding
-`assert()' macro that the programmer can use to make assertions. If an
+`assert()' macro that a programmer can use to make assertions. If an
assertion fails, the `assert()' macro arranges to print a diagnostic
message describing the condition that should have been true but was
not, and then it kills the program. In C, using `assert()' looks this:
@@ -14855,7 +14882,7 @@ current time formatted in the same way as the `date' utility:
now = systime()
# return date(1)-style output
- ret = strftime(PROCINFO["strftime"], now)
+ ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
# clear out target array
delete time
@@ -14951,6 +14978,9 @@ string. Thus calling code may use something like:
This tests the result to see if it is empty or not. An equivalent
test would be `contents == ""'.
+ *Note Extension Sample Readfile::, for an extension function that
+also reads an entire file into memory.
+

File: gawk.info, Node: Data File Management, Next: Getopt Function, Prev: General Functions, Up: Library Functions
@@ -15000,15 +15030,14 @@ does so _portably_; this works with any implementation of `awk':
# that each take the name of the file being started or
# finished, respectively.
- FILENAME != _oldfilename \
- {
+ FILENAME != _oldfilename {
if (_oldfilename != "")
endfile(_oldfilename)
_oldfilename = FILENAME
beginfile(FILENAME)
}
- END { endfile(FILENAME) }
+ END { endfile(FILENAME) }
This file must be loaded before the user's "main" program, so that
the rule it supplies is executed first.
@@ -15046,7 +15075,7 @@ solves the problem:
beginfile(FILENAME)
}
- END { endfile(_filename_) }
+ END { endfile(_filename_) }
*note Wc Program::, shows how this library function can be used and
how it simplifies writing the main program.
@@ -31039,7 +31068,7 @@ Index
* Menu:
-* ! (exclamation point), ! operator: Boolean Ops. (line 67)
+* ! (exclamation point), ! operator: Boolean Ops. (line 69)
* ! (exclamation point), ! operator <1>: Egrep Program. (line 175)
* ! (exclamation point), ! operator <2>: Ranges. (line 48)
* ! (exclamation point), ! operator: Precedence. (line 52)
@@ -31069,7 +31098,7 @@ Index
* % (percent sign), %= operator <1>: Precedence. (line 95)
* % (percent sign), %= operator: Assignment Ops. (line 130)
* & (ampersand), && operator <1>: Precedence. (line 86)
-* & (ampersand), && operator: Boolean Ops. (line 57)
+* & (ampersand), && operator: Boolean Ops. (line 59)
* & (ampersand), gsub()/gensub()/sub() functions and: Gory Details.
(line 6)
* ' (single quote): One-shot. (line 15)
@@ -31083,8 +31112,8 @@ Index
(line 55)
* * (asterisk), * operator, as regexp operator: Regexp Operators.
(line 89)
-* * (asterisk), * operator, null strings, matching: Gory Details.
- (line 143)
+* * (asterisk), * operator, null strings, matching: String Functions.
+ (line 535)
* * (asterisk), ** operator <1>: Precedence. (line 49)
* * (asterisk), ** operator: Arithmetic Ops. (line 81)
* * (asterisk), **= operator <1>: Precedence. (line 95)
@@ -31143,7 +31172,7 @@ Index
* --re-interval option: Options. (line 277)
* --sandbox option: Options. (line 284)
* --sandbox option, disabling system() function: I/O Functions.
- (line 97)
+ (line 96)
* --sandbox option, input redirection with getline: Getline. (line 19)
* --sandbox option, output redirection with print, printf: Redirection.
(line 6)
@@ -31341,12 +31370,12 @@ Index
* ambiguity, syntactic: /= operator vs. /=.../ regexp constant: Assignment Ops.
(line 148)
* ampersand (&), && operator <1>: Precedence. (line 86)
-* ampersand (&), && operator: Boolean Ops. (line 57)
+* ampersand (&), && operator: Boolean Ops. (line 59)
* ampersand (&), gsub()/gensub()/sub() functions and: Gory Details.
(line 6)
* anagram.awk program: Anagram Program. (line 22)
* anagrams, finding: Anagram Program. (line 6)
-* and: Bitwise Functions. (line 39)
+* and: Bitwise Functions. (line 40)
* AND bitwise operation: Bitwise Functions. (line 6)
* and Boolean-logic operator: Boolean Ops. (line 6)
* ANSI: Glossary. (line 34)
@@ -31380,7 +31409,7 @@ Index
(line 6)
* array scanning order, controlling: Controlling Scanning.
(line 14)
-* array, number of elements: String Functions. (line 197)
+* array, number of elements: String Functions. (line 200)
* arrays: Arrays. (line 6)
* arrays of arrays: Arrays of Arrays. (line 6)
* arrays, an example of using: Array Example. (line 6)
@@ -31388,7 +31417,7 @@ Index
* arrays, as parameters to functions: Pass By Value/Reference.
(line 47)
* arrays, associative: Array Intro. (line 50)
-* arrays, associative, library functions and: Library Names. (line 57)
+* arrays, associative, library functions and: Library Names. (line 58)
* arrays, deleting entire contents: Delete. (line 39)
* arrays, elements that don't exist: Reference to Elements.
(line 23)
@@ -31396,13 +31425,12 @@ Index
* arrays, elements, deleting: Delete. (line 6)
* arrays, elements, order of access by in operator: Scanning an Array.
(line 48)
-* arrays, elements, retrieving number of: String Functions. (line 42)
+* arrays, elements, retrieving number of: String Functions. (line 41)
* arrays, for statement and: Scanning an Array. (line 20)
* arrays, indexing: Array Intro. (line 50)
* arrays, merging into strings: Join Function. (line 6)
* arrays, multidimensional: Multidimensional. (line 10)
* arrays, multidimensional, scanning: Multiscanning. (line 11)
-* arrays, names of, and names of functions/variables: Arrays. (line 18)
* arrays, numeric subscripts: Numeric Array Subscripts.
(line 6)
* arrays, referencing elements: Reference to Elements.
@@ -31423,12 +31451,12 @@ Index
* ASCII: Ordinal Functions. (line 45)
* asort <1>: Array Sorting Functions.
(line 6)
-* asort: String Functions. (line 42)
+* asort: String Functions. (line 41)
* asort() function (gawk), arrays, sorting: Array Sorting Functions.
(line 6)
* asorti <1>: Array Sorting Functions.
(line 6)
-* asorti: String Functions. (line 42)
+* asorti: String Functions. (line 41)
* asorti() function (gawk), arrays, sorting: Array Sorting Functions.
(line 6)
* assert() function (C library): Assert Function. (line 6)
@@ -31445,8 +31473,8 @@ Index
(line 55)
* asterisk (*), * operator, as regexp operator: Regexp Operators.
(line 89)
-* asterisk (*), * operator, null strings, matching: Gory Details.
- (line 143)
+* asterisk (*), * operator, null strings, matching: String Functions.
+ (line 535)
* asterisk (*), ** operator <1>: Precedence. (line 49)
* asterisk (*), ** operator: Arithmetic Ops. (line 81)
* asterisk (*), **= operator <1>: Precedence. (line 95)
@@ -31493,7 +31521,7 @@ Index
* awk, POSIX and: Preface. (line 21)
* awk, POSIX and, See Also POSIX awk: Preface. (line 21)
* awk, regexp constants and: Comparison Operators.
- (line 102)
+ (line 103)
* awk, See Also gawk: Preface. (line 34)
* awk, terms describing: This Manual. (line 6)
* awk, uses for <1>: When. (line 6)
@@ -31579,7 +31607,7 @@ Index
* BEGIN pattern, next/nextfile statements and <1>: Next Statement.
(line 44)
* BEGIN pattern, next/nextfile statements and: I/O And BEGIN/END.
- (line 36)
+ (line 37)
* BEGIN pattern, OFS/ORS variables, assigning values to: Output Separators.
(line 20)
* BEGIN pattern, operators and: Using BEGIN/END. (line 17)
@@ -31590,7 +31618,7 @@ Index
* BEGINFILE pattern: BEGINFILE/ENDFILE. (line 6)
* BEGINFILE pattern, Boolean patterns and: Expression Patterns.
(line 70)
-* beginfile() user-defined function: Filetrans Function. (line 62)
+* beginfile() user-defined function: Filetrans Function. (line 61)
* Bentley, Jon: Glossary. (line 143)
* Benzinger, Michael: Contributors. (line 97)
* Berry, Karl <1>: Ranges and Locales. (line 74)
@@ -31604,11 +31632,11 @@ Index
* BINMODE variable <1>: PC Using. (line 33)
* BINMODE variable: User-modified. (line 15)
* bit-manipulation functions: Bitwise Functions. (line 6)
-* bits2str() user-defined function: Bitwise Functions. (line 70)
-* bitwise AND: Bitwise Functions. (line 39)
-* bitwise complement: Bitwise Functions. (line 43)
-* bitwise OR: Bitwise Functions. (line 49)
-* bitwise XOR: Bitwise Functions. (line 55)
+* bits2str() user-defined function: Bitwise Functions. (line 71)
+* bitwise AND: Bitwise Functions. (line 40)
+* bitwise complement: Bitwise Functions. (line 44)
+* bitwise OR: Bitwise Functions. (line 50)
+* bitwise XOR: Bitwise Functions. (line 56)
* bitwise, complement: Bitwise Functions. (line 25)
* bitwise, operations: Bitwise Functions. (line 6)
* bitwise, shift: Bitwise Functions. (line 32)
@@ -31652,8 +31680,8 @@ Index
* Brennan, Michael: Foreword. (line 83)
* Brian Kernighan's awk <1>: I/O Functions. (line 43)
* Brian Kernighan's awk <2>: Gory Details. (line 19)
-* Brian Kernighan's awk <3>: String Functions. (line 490)
-* Brian Kernighan's awk <4>: Delete. (line 48)
+* Brian Kernighan's awk <3>: String Functions. (line 491)
+* Brian Kernighan's awk <4>: Delete. (line 51)
* Brian Kernighan's awk <5>: Nextfile Statement. (line 47)
* Brian Kernighan's awk <6>: Continue Statement. (line 44)
* Brian Kernighan's awk <7>: Break Statement. (line 51)
@@ -31678,8 +31706,8 @@ Index
* Buening, Andreas <2>: Contributors. (line 92)
* Buening, Andreas: Acknowledgments. (line 60)
* buffering, input/output <1>: Two-way I/O. (line 52)
-* buffering, input/output: I/O Functions. (line 140)
-* buffering, interactive vs. noninteractive: I/O Functions. (line 109)
+* buffering, input/output: I/O Functions. (line 139)
+* buffering, interactive vs. noninteractive: I/O Functions. (line 108)
* buffers, flushing: I/O Functions. (line 32)
* buffers, operators for: GNU Regexp Operators.
(line 48)
@@ -31709,7 +31737,7 @@ Index
* case sensitivity, and regexps: User-modified. (line 76)
* case sensitivity, and string comparisons: User-modified. (line 76)
* case sensitivity, array indices and: Array Intro. (line 94)
-* case sensitivity, converting case: String Functions. (line 520)
+* case sensitivity, converting case: String Functions. (line 521)
* case sensitivity, example programs: Library Functions. (line 53)
* case sensitivity, gawk: Case-sensitivity. (line 26)
* case sensitivity, regexps and: Case-sensitivity. (line 6)
@@ -31789,7 +31817,7 @@ Index
* common extensions, delete to delete entire arrays: Delete. (line 39)
* common extensions, func keyword: Definition Syntax. (line 93)
* common extensions, length() applied to an array: String Functions.
- (line 197)
+ (line 200)
* common extensions, RS as a regexp: gawk split records. (line 6)
* common extensions, single character fields: Single Character Fields.
(line 6)
@@ -31798,7 +31826,7 @@ Index
(line 9)
* comparison expressions, as patterns: Expression Patterns. (line 14)
* comparison expressions, string vs. regexp: Comparison Operators.
- (line 78)
+ (line 79)
* compatibility mode (gawk), extensions: POSIX/GNU. (line 6)
* compatibility mode (gawk), file names: Special Caveats. (line 9)
* compatibility mode (gawk), hexadecimal numbers: Nondecimal-numbers.
@@ -31812,7 +31840,7 @@ Index
* compiling gawk for MS-DOS and MS-Windows: PC Compiling. (line 13)
* compiling gawk for VMS: VMS Compilation. (line 6)
* compiling gawk with EMX for OS/2: PC Compiling. (line 28)
-* compl: Bitwise Functions. (line 43)
+* compl: Bitwise Functions. (line 44)
* complement, bitwise: Bitwise Functions. (line 25)
* compound statements, control statements and: Statements. (line 10)
* concatenating: Concatenation. (line 8)
@@ -31838,15 +31866,15 @@ Index
* control statements: Statements. (line 6)
* controlling array scanning order: Controlling Scanning.
(line 14)
-* convert string to lower case: String Functions. (line 521)
-* convert string to number: String Functions. (line 388)
-* convert string to upper case: String Functions. (line 527)
+* convert string to lower case: String Functions. (line 522)
+* convert string to number: String Functions. (line 389)
+* convert string to upper case: String Functions. (line 528)
* converting integer array subscripts: Numeric Array Subscripts.
(line 31)
* converting, dates to timestamps: Time Functions. (line 76)
-* converting, numbers to strings <1>: Bitwise Functions. (line 109)
+* converting, numbers to strings <1>: Bitwise Functions. (line 110)
* converting, numbers to strings: Strings And Numbers. (line 6)
-* converting, strings to numbers <1>: Bitwise Functions. (line 109)
+* converting, strings to numbers <1>: Bitwise Functions. (line 110)
* converting, strings to numbers: Strings And Numbers. (line 6)
* CONVFMT variable <1>: User-modified. (line 30)
* CONVFMT variable: Strings And Numbers. (line 29)
@@ -31905,7 +31933,7 @@ Index
(line 20)
* dark corner, input files: awk split records. (line 111)
* dark corner, invoking awk: Command Line. (line 16)
-* dark corner, length() function: String Functions. (line 183)
+* dark corner, length() function: String Functions. (line 186)
* dark corner, locale's decimal point character: Locale influences conversions.
(line 17)
* dark corner, multiline records: Multiple Line. (line 35)
@@ -31917,7 +31945,7 @@ Index
(line 148)
* dark corner, regexp constants, as arguments to user-defined functions: Using Constant Regexps.
(line 43)
-* dark corner, split() function: String Functions. (line 359)
+* dark corner, split() function: String Functions. (line 360)
* dark corner, strings, storing: gawk split records. (line 83)
* dark corner, value of ARGV[0]: Auto-set. (line 39)
* data, fixed-width: Constant Size. (line 10)
@@ -32063,7 +32091,7 @@ Index
* deleting entire arrays: Delete. (line 39)
* Demaille, Akim: Acknowledgments. (line 60)
* describe call stack frame, in debugger: Debugger Info. (line 27)
-* differences between gawk and awk: String Functions. (line 197)
+* differences between gawk and awk: String Functions. (line 200)
* differences in awk and gawk, ARGC/ARGV variables: ARGC and ARGV.
(line 90)
* differences in awk and gawk, ARGIND variable: Auto-set. (line 44)
@@ -32110,7 +32138,7 @@ Index
(line 34)
* differences in awk and gawk, LINT variable: User-modified. (line 88)
* differences in awk and gawk, match() function: String Functions.
- (line 260)
+ (line 262)
* differences in awk and gawk, print/printf statements: Format Modifiers.
(line 13)
* differences in awk and gawk, PROCINFO array: Auto-set. (line 137)
@@ -32127,13 +32155,13 @@ Index
* differences in awk and gawk, single-character fields: Single Character Fields.
(line 6)
* differences in awk and gawk, split() function: String Functions.
- (line 347)
+ (line 348)
* differences in awk and gawk, strings: Scalar Constants. (line 20)
* differences in awk and gawk, strings, storing: gawk split records.
(line 77)
* differences in awk and gawk, SYMTAB variable: Auto-set. (line 276)
* differences in awk and gawk, TEXTDOMAIN variable: User-modified.
- (line 152)
+ (line 151)
* differences in awk and gawk, trunc-mod operation: Arithmetic Ops.
(line 66)
* directories, command-line: Command-line directories.
@@ -32205,12 +32233,12 @@ Index
* END pattern, next/nextfile statements and <1>: Next Statement.
(line 44)
* END pattern, next/nextfile statements and: I/O And BEGIN/END.
- (line 36)
+ (line 37)
* END pattern, operators and: Using BEGIN/END. (line 17)
* END pattern, print statement and: I/O And BEGIN/END. (line 16)
* ENDFILE pattern: BEGINFILE/ENDFILE. (line 6)
* ENDFILE pattern, Boolean patterns and: Expression Patterns. (line 70)
-* endfile() user-defined function: Filetrans Function. (line 62)
+* endfile() user-defined function: Filetrans Function. (line 61)
* endgrent() function (C library): Group Functions. (line 212)
* endgrent() user-defined function: Group Functions. (line 215)
* endpwent() function (C library): Passwd Functions. (line 210)
@@ -32248,7 +32276,7 @@ Index
* examining fields: Fields. (line 6)
* exclamation point (!), ! operator <1>: Egrep Program. (line 175)
* exclamation point (!), ! operator <2>: Precedence. (line 52)
-* exclamation point (!), ! operator: Boolean Ops. (line 67)
+* exclamation point (!), ! operator: Boolean Ops. (line 69)
* exclamation point (!), != operator <1>: Precedence. (line 65)
* exclamation point (!), != operator: Comparison Operators.
(line 11)
@@ -32308,7 +32336,7 @@ Index
* extensions, common, fflush() function: I/O Functions. (line 43)
* extensions, common, func keyword: Definition Syntax. (line 93)
* extensions, common, length() applied to an array: String Functions.
- (line 197)
+ (line 200)
* extensions, common, RS as a regexp: gawk split records. (line 6)
* extensions, common, single character fields: Single Character Fields.
(line 6)
@@ -32430,7 +32458,7 @@ Index
* Fish, Fred: Contributors. (line 50)
* fixed-width data: Constant Size. (line 10)
* flag variables <1>: Tee Program. (line 20)
-* flag variables: Boolean Ops. (line 67)
+* flag variables: Boolean Ops. (line 69)
* floating-point, numbers, arbitrary precision: Arbitrary Precision Arithmetic.
(line 6)
* floating-point, VAX/VMS: VMS Running. (line 51)
@@ -32453,7 +32481,7 @@ Index
* format time string: Time Functions. (line 48)
* formats, numeric output: OFMT. (line 6)
* formatting output: Printf. (line 6)
-* formatting strings: String Functions. (line 381)
+* formatting strings: String Functions. (line 382)
* forward slash (/) to enclose regular expressions: Regexp. (line 10)
* forward slash (/), / operator: Precedence. (line 55)
* forward slash (/), /= operator <1>: Precedence. (line 95)
@@ -32503,7 +32531,7 @@ Index
* functions, defining: Definition Syntax. (line 9)
* functions, library: Library Functions. (line 6)
* functions, library, assertions: Assert Function. (line 6)
-* functions, library, associative arrays and: Library Names. (line 57)
+* functions, library, associative arrays and: Library Names. (line 58)
* functions, library, C library: Getopt Function. (line 6)
* functions, library, character values as numbers: Ordinal Functions.
(line 6)
@@ -32523,8 +32551,7 @@ Index
* functions, library, rounding numbers: Round Function. (line 6)
* functions, library, user database, reading: Passwd Functions.
(line 6)
-* functions, names of <1>: Definition Syntax. (line 23)
-* functions, names of: Arrays. (line 18)
+* functions, names of: Definition Syntax. (line 23)
* functions, recursive: Definition Syntax. (line 83)
* functions, string-translation: I18N Functions. (line 6)
* functions, undefined: Pass By Value/Reference.
@@ -32544,15 +32571,13 @@ Index
* gawk, ARGIND variable in: Other Arguments. (line 15)
* gawk, awk and <1>: This Manual. (line 14)
* gawk, awk and: Preface. (line 21)
-* gawk, bitwise operations in: Bitwise Functions. (line 39)
+* gawk, bitwise operations in: Bitwise Functions. (line 40)
* gawk, break statement in: Break Statement. (line 51)
* gawk, built-in variables and: Built-in Variables. (line 14)
* gawk, character classes and: Bracket Expressions. (line 100)
* gawk, coding style in: Adding Code. (line 39)
* gawk, command-line options, and regular expressions: GNU Regexp Operators.
(line 70)
-* gawk, comparison operators and: Comparison Operators.
- (line 50)
* gawk, configuring: Configuration Philosophy.
(line 6)
* gawk, configuring, options: Additional Configuration Options.
@@ -32583,7 +32608,7 @@ Index
* gawk, hexadecimal numbers and: Nondecimal-numbers. (line 42)
* gawk, IGNORECASE variable in <1>: Array Sorting Functions.
(line 83)
-* gawk, IGNORECASE variable in <2>: String Functions. (line 58)
+* gawk, IGNORECASE variable in <2>: String Functions. (line 57)
* gawk, IGNORECASE variable in <3>: Array Intro. (line 94)
* gawk, IGNORECASE variable in <4>: User-modified. (line 76)
* gawk, IGNORECASE variable in: Case-sensitivity. (line 26)
@@ -32625,7 +32650,7 @@ Index
* gawk, splitting fields and: Constant Size. (line 88)
* gawk, string-translation functions: I18N Functions. (line 6)
* gawk, SYMTAB array in: Auto-set. (line 276)
-* gawk, TEXTDOMAIN variable in: User-modified. (line 152)
+* gawk, TEXTDOMAIN variable in: User-modified. (line 151)
* gawk, timestamps: Time Functions. (line 6)
* gawk, uses for: Preface. (line 34)
* gawk, versions of, information about, printing: Options. (line 298)
@@ -32715,7 +32740,7 @@ Index
* gsub <1>: String Functions. (line 139)
* gsub: Using Constant Regexps.
(line 43)
-* gsub() function, arguments of: String Functions. (line 460)
+* gsub() function, arguments of: String Functions. (line 461)
* gsub() function, escape processing: Gory Details. (line 6)
* h debugger command (alias for help): Miscellaneous Debugger Commands.
(line 66)
@@ -32763,7 +32788,7 @@ Index
* implementation issues, gawk, debugging: Compatibility Mode. (line 6)
* implementation issues, gawk, limits <1>: Redirection. (line 129)
* implementation issues, gawk, limits: Getline Notes. (line 14)
-* in operator <1>: For Statement. (line 75)
+* in operator <1>: For Statement. (line 76)
* in operator <2>: Precedence. (line 83)
* in operator: Comparison Operators.
(line 11)
@@ -32794,7 +32819,7 @@ Index
* input files, running awk without: Read Terminal. (line 6)
* input files, variable assignments and: Other Arguments. (line 26)
* input pipeline: Getline/Pipe. (line 9)
-* input record, length of: String Functions. (line 174)
+* input record, length of: String Functions. (line 177)
* input redirection: Getline/File. (line 6)
* input, data, nondecimal: Nondecimal Data. (line 6)
* input, explicit: Getline. (line 6)
@@ -32818,12 +32843,12 @@ Index
* integers, arbitrary precision: Arbitrary Precision Integers.
(line 6)
* integers, unsigned: Computer Arithmetic. (line 41)
-* interacting with other programs: I/O Functions. (line 75)
+* interacting with other programs: I/O Functions. (line 74)
* internationalization <1>: I18N and L10N. (line 6)
* internationalization: I18N Functions. (line 6)
* internationalization, localization <1>: Internationalization.
(line 13)
-* internationalization, localization: User-modified. (line 152)
+* internationalization, localization: User-modified. (line 151)
* internationalization, localization, character classes: Bracket Expressions.
(line 100)
* internationalization, localization, gawk and: Internationalization.
@@ -32839,7 +32864,7 @@ Index
* interpreted programs: Basic High Level. (line 15)
* interval expressions, regexp operator: Regexp Operators. (line 116)
* inventory-shipped file: Sample Data Files. (line 32)
-* invoke shell command: I/O Functions. (line 75)
+* invoke shell command: I/O Functions. (line 74)
* isarray: Type Functions. (line 11)
* ISO: Glossary. (line 367)
* ISO 8859-1: Glossary. (line 133)
@@ -32892,19 +32917,19 @@ Index
* left angle bracket (<), <= operator <1>: Precedence. (line 65)
* left angle bracket (<), <= operator: Comparison Operators.
(line 11)
-* left shift: Bitwise Functions. (line 46)
+* left shift: Bitwise Functions. (line 47)
* left shift, bitwise: Bitwise Functions. (line 32)
* leftmost longest match: Multiple Line. (line 26)
-* length: String Functions. (line 167)
-* length of input record: String Functions. (line 174)
-* length of string: String Functions. (line 167)
+* length: String Functions. (line 170)
+* length of input record: String Functions. (line 177)
+* length of string: String Functions. (line 170)
* Lesser General Public License (LGPL): Glossary. (line 396)
* LGPL (Lesser General Public License): Glossary. (line 396)
* libmawk: Other Versions. (line 121)
* libraries of awk functions: Library Functions. (line 6)
* libraries of awk functions, assertions: Assert Function. (line 6)
* libraries of awk functions, associative arrays and: Library Names.
- (line 57)
+ (line 58)
* libraries of awk functions, character values as numbers: Ordinal Functions.
(line 6)
* libraries of awk functions, command-line options: Getopt Function.
@@ -32924,7 +32949,7 @@ Index
* libraries of awk functions, user database, reading: Passwd Functions.
(line 6)
* line breaks: Statements/Lines. (line 6)
-* line continuations: Boolean Ops. (line 62)
+* line continuations: Boolean Ops. (line 64)
* line continuations, gawk: Conditional Exp. (line 34)
* line continuations, in print statement: Print Examples. (line 76)
* line continuations, with C shell: More Complex. (line 30)
@@ -32970,7 +32995,7 @@ Index
* long options: Command Line. (line 13)
* loops: While Statement. (line 6)
* loops, break statement and: Break Statement. (line 6)
-* loops, continue statements and: For Statement. (line 64)
+* loops, continue statements and: For Statement. (line 65)
* loops, count for header, in a profile: Profiling. (line 131)
* loops, do-while: Do Statement. (line 6)
* loops, exiting: Break Statement. (line 6)
@@ -32979,7 +33004,7 @@ Index
* loops, See Also while statement: While Statement. (line 6)
* loops, while: While Statement. (line 6)
* ls utility: More Complex. (line 15)
-* lshift: Bitwise Functions. (line 46)
+* lshift: Bitwise Functions. (line 47)
* lvalues/rvalues: Assignment Ops. (line 32)
* mail-list file: Sample Data Files. (line 6)
* mailing labels, printing: Labels Program. (line 6)
@@ -32991,14 +33016,14 @@ Index
(line 6)
* marked strings, extracting: String Extraction. (line 6)
* Marx, Groucho: Increment Ops. (line 60)
-* match: String Functions. (line 207)
-* match regexp in string: String Functions. (line 207)
+* match: String Functions. (line 210)
+* match regexp in string: String Functions. (line 210)
* match() function, RSTART/RLENGTH variables: String Functions.
- (line 224)
+ (line 227)
* matching, expressions, See comparison expressions: Typing and Comparison.
(line 9)
* matching, leftmost longest: Multiple Line. (line 26)
-* matching, null strings: Gory Details. (line 143)
+* matching, null strings: String Functions. (line 535)
* mawk utility <1>: Other Versions. (line 44)
* mawk utility <2>: Nextfile Statement. (line 47)
* mawk utility <3>: Concatenation. (line 36)
@@ -33028,17 +33053,15 @@ Index
* multiple-line records: Multiple Line. (line 6)
* n debugger command (alias for next): Debugger Execution Control.
(line 43)
-* names, arrays/variables <1>: Library Names. (line 6)
-* names, arrays/variables: Arrays. (line 18)
+* names, arrays/variables: Library Names. (line 6)
* names, functions <1>: Library Names. (line 6)
* names, functions: Definition Syntax. (line 23)
-* namespace issues <1>: Library Names. (line 6)
-* namespace issues: Arrays. (line 18)
+* namespace issues: Library Names. (line 6)
* namespace issues, functions: Definition Syntax. (line 23)
* NetBSD: Glossary. (line 611)
* networks, programming: TCP/IP Networking. (line 6)
* networks, support for: Special Network. (line 6)
-* newlines <1>: Boolean Ops. (line 67)
+* newlines <1>: Boolean Ops. (line 69)
* newlines <2>: Options. (line 258)
* newlines: Statements/Lines. (line 6)
* newlines, as field separators: Default Field Splitting.
@@ -33054,14 +33077,14 @@ Index
(line 43)
* next file statement: Feature History. (line 169)
* next statement <1>: Next Statement. (line 6)
-* next statement: Boolean Ops. (line 93)
-* next statement, BEGIN/END patterns and: I/O And BEGIN/END. (line 36)
+* next statement: Boolean Ops. (line 95)
+* next statement, BEGIN/END patterns and: I/O And BEGIN/END. (line 37)
* next statement, BEGINFILE/ENDFILE patterns and: BEGINFILE/ENDFILE.
(line 49)
* next statement, user-defined functions and: Next Statement. (line 44)
* nextfile statement: Nextfile Statement. (line 6)
* nextfile statement, BEGIN/END patterns and: I/O And BEGIN/END.
- (line 36)
+ (line 37)
* nextfile statement, BEGINFILE/ENDFILE patterns and: BEGINFILE/ENDFILE.
(line 26)
* nextfile statement, user-defined functions and: Nextfile Statement.
@@ -33091,9 +33114,9 @@ Index
(line 43)
* null strings, converting numbers to strings: Strings And Numbers.
(line 21)
-* null strings, matching: Gory Details. (line 143)
-* number as string of bits: Bitwise Functions. (line 109)
-* number of array elements: String Functions. (line 197)
+* null strings, matching: String Functions. (line 535)
+* number as string of bits: Bitwise Functions. (line 110)
+* number of array elements: String Functions. (line 200)
* number sign (#), #! (executable scripts): Executable Scripts.
(line 6)
* number sign (#), commenting: Comments. (line 6)
@@ -33102,7 +33125,7 @@ Index
* numbers, as values of characters: Ordinal Functions. (line 6)
* numbers, Cliff random: Cliff Random Function.
(line 6)
-* numbers, converting <1>: Bitwise Functions. (line 109)
+* numbers, converting <1>: Bitwise Functions. (line 110)
* numbers, converting: Strings And Numbers. (line 6)
* numbers, converting, to strings: User-modified. (line 30)
* numbers, hexadecimal: Nondecimal-numbers. (line 6)
@@ -33120,7 +33143,7 @@ Index
* OFMT variable <2>: Strings And Numbers. (line 57)
* OFMT variable: OFMT. (line 15)
* OFMT variable, POSIX awk and: OFMT. (line 27)
-* OFS variable <1>: User-modified. (line 114)
+* OFS variable <1>: User-modified. (line 113)
* OFS variable <2>: Output Separators. (line 6)
* OFS variable: Changing Fields. (line 64)
* OpenBSD: Glossary. (line 611)
@@ -33150,7 +33173,7 @@ Index
* operators, precedence: Increment Ops. (line 60)
* operators, relational, See operators, comparison: Typing and Comparison.
(line 9)
-* operators, short-circuit: Boolean Ops. (line 57)
+* operators, short-circuit: Boolean Ops. (line 59)
* operators, string: Concatenation. (line 8)
* operators, string-matching: Regexp Usage. (line 19)
* operators, string-matching, for buffers: GNU Regexp Operators.
@@ -33166,14 +33189,14 @@ Index
* options, long <1>: Options. (line 6)
* options, long: Command Line. (line 13)
* options, printing list of: Options. (line 154)
-* or: Bitwise Functions. (line 49)
+* or: Bitwise Functions. (line 50)
* OR bitwise operation: Bitwise Functions. (line 6)
* or Boolean-logic operator: Boolean Ops. (line 6)
* ord() extension function: Extension Sample Ord.
(line 12)
* ord() user-defined function: Ordinal Functions. (line 16)
* order of evaluation, concatenation: Concatenation. (line 41)
-* ORS variable <1>: User-modified. (line 119)
+* ORS variable <1>: User-modified. (line 118)
* ORS variable: Output Separators. (line 20)
* output field separator, See OFS variable: Changing Fields. (line 64)
* output record separator, See ORS variable: Output Separators.
@@ -33197,7 +33220,7 @@ Index
* parentheses (), in a profile: Profiling. (line 146)
* parentheses (), regexp operator: Regexp Operators. (line 81)
* password file: Passwd Functions. (line 16)
-* patsplit: String Functions. (line 294)
+* patsplit: String Functions. (line 296)
* patterns: Patterns and Actions.
(line 6)
* patterns, comparison expressions as: Expression Patterns. (line 14)
@@ -33253,7 +33276,7 @@ Index
* portability, gawk: New Ports. (line 6)
* portability, gettext library and: Explaining gettext. (line 11)
* portability, internationalization and: I18N Portability. (line 6)
-* portability, length() function: String Functions. (line 176)
+* portability, length() function: String Functions. (line 179)
* portability, new awk vs. old awk: Strings And Numbers. (line 57)
* portability, next statement in user-defined functions: Pass By Value/Reference.
(line 91)
@@ -33261,7 +33284,7 @@ Index
* portability, operators: Increment Ops. (line 60)
* portability, operators, not in POSIX awk: Precedence. (line 98)
* portability, POSIXLY_CORRECT environment variable: Options. (line 357)
-* portability, substr() function: String Functions. (line 510)
+* portability, substr() function: String Functions. (line 511)
* portable object files <1>: Translator i18n. (line 6)
* portable object files: Explaining gettext. (line 37)
* portable object files, converting to message object files: I18N Example.
@@ -33297,7 +33320,7 @@ Index
* POSIX awk, FS variable and: User-modified. (line 60)
* POSIX awk, function keyword in: Definition Syntax. (line 93)
* POSIX awk, functions and, gsub()/sub(): Gory Details. (line 90)
-* POSIX awk, functions and, length(): String Functions. (line 176)
+* POSIX awk, functions and, length(): String Functions. (line 179)
* POSIX awk, GNU long options and: Options. (line 15)
* POSIX awk, interval expressions in: Regexp Operators. (line 135)
* POSIX awk, next/nextfile statements and: Next Statement. (line 44)
@@ -33314,7 +33337,7 @@ Index
* POSIX, gawk extensions not included in: POSIX/GNU. (line 6)
* POSIX, programs, implementing in awk: Clones. (line 6)
* POSIXLY_CORRECT environment variable: Options. (line 337)
-* PREC variable: User-modified. (line 124)
+* PREC variable: User-modified. (line 123)
* precedence <1>: Precedence. (line 6)
* precedence: Increment Ops. (line 60)
* precedence, regexp operators: Regexp Operators. (line 156)
@@ -33325,7 +33348,7 @@ Index
* print statement, commas, omitting: Print Examples. (line 31)
* print statement, I/O operators in: Precedence. (line 71)
* print statement, line continuations and: Print Examples. (line 76)
-* print statement, OFMT variable and: User-modified. (line 114)
+* print statement, OFMT variable and: User-modified. (line 113)
* print statement, See Also redirection, of output: Redirection.
(line 17)
* print statement, sprintf() function and: Round Function. (line 6)
@@ -33441,7 +33464,7 @@ Index
* readfile() user-defined function: Readfile Function. (line 30)
* reading input files: Reading Files. (line 6)
* recipe for a programming language: History. (line 6)
-* record separators <1>: User-modified. (line 133)
+* record separators <1>: User-modified. (line 132)
* record separators: awk split records. (line 6)
* record separators, changing: awk split records. (line 85)
* record separators, regular expressions as: awk split records.
@@ -33462,7 +33485,7 @@ Index
(line 77)
* regexp: Regexp. (line 6)
* regexp constants <1>: Comparison Operators.
- (line 102)
+ (line 103)
* regexp constants <2>: Regexp Constants. (line 6)
* regexp constants: Regexp Usage. (line 57)
* regexp constants, /=.../, /= operator and: Assignment Ops. (line 148)
@@ -33508,7 +33531,7 @@ Index
* regular expressions, searching for: Egrep Program. (line 6)
* relational operators, See comparison operators: Typing and Comparison.
(line 9)
-* replace in string: String Functions. (line 406)
+* replace in string: String Functions. (line 407)
* return debugger command: Debugger Execution Control.
(line 54)
* return statement, user-defined functions: Return Statement. (line 6)
@@ -33529,11 +33552,11 @@ Index
(line 11)
* right angle bracket (>), >> operator (I/O) <1>: Precedence. (line 65)
* right angle bracket (>), >> operator (I/O): Redirection. (line 50)
-* right shift: Bitwise Functions. (line 52)
+* right shift: Bitwise Functions. (line 53)
* right shift, bitwise: Bitwise Functions. (line 32)
* Ritchie, Dennis: Basic Data Typing. (line 54)
* RLENGTH variable: Auto-set. (line 259)
-* RLENGTH variable, match() function and: String Functions. (line 224)
+* RLENGTH variable, match() function and: String Functions. (line 227)
* Robbins, Arnold <1>: Future Extensions. (line 6)
* Robbins, Arnold <2>: Bugs. (line 32)
* Robbins, Arnold <3>: Contributors. (line 141)
@@ -33553,13 +33576,13 @@ Index
* round to nearest integer: Numeric Functions. (line 38)
* round() user-defined function: Round Function. (line 16)
* rounding numbers: Round Function. (line 6)
-* ROUNDMODE variable: User-modified. (line 128)
-* RS variable <1>: User-modified. (line 133)
+* ROUNDMODE variable: User-modified. (line 127)
+* RS variable <1>: User-modified. (line 132)
* RS variable: awk split records. (line 12)
* RS variable, multiline records and: Multiple Line. (line 17)
-* rshift: Bitwise Functions. (line 52)
+* rshift: Bitwise Functions. (line 53)
* RSTART variable: Auto-set. (line 265)
-* RSTART variable, match() function and: String Functions. (line 224)
+* RSTART variable, match() function and: String Functions. (line 227)
* RT variable <1>: Auto-set. (line 272)
* RT variable <2>: Multiple Line. (line 129)
* RT variable: awk split records. (line 125)
@@ -33612,12 +33635,12 @@ Index
* separators, field, FIELDWIDTHS variable and: User-modified. (line 37)
* separators, field, FPAT variable and: User-modified. (line 43)
* separators, field, POSIX and: Fields. (line 6)
-* separators, for records <1>: User-modified. (line 133)
+* separators, for records <1>: User-modified. (line 132)
* separators, for records: awk split records. (line 6)
* separators, for records, regular expressions as: awk split records.
(line 125)
* separators, for statements in actions: Action Overview. (line 19)
-* separators, subscript: User-modified. (line 146)
+* separators, subscript: User-modified. (line 145)
* set breakpoint: Breakpoint Control. (line 11)
* set debugger command: Viewing And Changing Data.
(line 59)
@@ -33635,7 +33658,7 @@ Index
* shells, variables: Using Shell Variables.
(line 6)
* shift, bitwise: Bitwise Functions. (line 32)
-* short-circuit operators: Boolean Ops. (line 57)
+* short-circuit operators: Boolean Ops. (line 59)
* show all source files, in debugger: Debugger Info. (line 45)
* show breakpoints: Debugger Info. (line 21)
* show function arguments, in debugger: Debugger Info. (line 18)
@@ -33666,14 +33689,14 @@ Index
(line 38)
* sidebar, Changing NR and FNR: Auto-set. (line 314)
* sidebar, Controlling Output Buffering with system(): I/O Functions.
- (line 138)
+ (line 137)
* sidebar, Escape Sequences for Metacharacters: Escape Sequences.
(line 136)
* sidebar, FS and IGNORECASE: Field Splitting Summary.
(line 64)
* sidebar, Interactive Versus Noninteractive Buffering: I/O Functions.
- (line 107)
-* sidebar, Matching the Null String: Gory Details. (line 141)
+ (line 106)
+* sidebar, Matching the Null String: String Functions. (line 533)
* sidebar, Operator Evaluation Order: Increment Ops. (line 58)
* sidebar, Piping into sh: Redirection. (line 134)
* sidebar, Pre-POSIX awk Used OFMT For String Conversion: Strings And Numbers.
@@ -33681,7 +33704,7 @@ Index
* sidebar, Recipe For A Programming Language: History. (line 6)
* sidebar, RS = "\0" Is Not Portable: gawk split records. (line 63)
* sidebar, So Why Does gawk have BEGINFILE and ENDFILE?: Filetrans Function.
- (line 83)
+ (line 82)
* sidebar, Syntactic Ambiguities Between /= and Regular Expressions: Assignment Ops.
(line 146)
* sidebar, Understanding #!: Executable Scripts. (line 31)
@@ -33717,8 +33740,8 @@ Index
* sleep() extension function: Extension Sample Time.
(line 22)
* Solaris, POSIX-compliant awk: Other Versions. (line 96)
-* sort array: String Functions. (line 42)
-* sort array indices: String Functions. (line 42)
+* sort array: String Functions. (line 41)
+* sort array indices: String Functions. (line 41)
* sort function, arrays, sorting: Array Sorting Functions.
(line 6)
* sort utility: Word Sorting. (line 50)
@@ -33742,14 +33765,14 @@ Index
* source files, search path for: Programs Exercises. (line 70)
* sparse arrays: Array Intro. (line 72)
* Spencer, Henry: Glossary. (line 11)
-* split: String Functions. (line 313)
-* split string into array: String Functions. (line 294)
+* split: String Functions. (line 315)
+* split string into array: String Functions. (line 296)
* split utility: Split Program. (line 6)
* split() function, array elements, deleting: Delete. (line 61)
* split.awk program: Split Program. (line 30)
-* sprintf <1>: String Functions. (line 381)
+* sprintf <1>: String Functions. (line 382)
* sprintf: OFMT. (line 15)
-* sprintf() function, OFMT variable and: User-modified. (line 114)
+* sprintf() function, OFMT variable and: User-modified. (line 113)
* sprintf() function, print/printf statements and: Round Function.
(line 6)
* sqrt: Numeric Functions. (line 94)
@@ -33785,16 +33808,16 @@ Index
* string constants, vs. regexp constants: Computed Regexps. (line 39)
* string extraction (internationalization): String Extraction.
(line 6)
-* string length: String Functions. (line 167)
+* string length: String Functions. (line 170)
* string operators: Concatenation. (line 8)
-* string, regular expression match: String Functions. (line 207)
+* string, regular expression match: String Functions. (line 210)
* string-manipulation functions: String Functions. (line 6)
* string-matching operators: Regexp Usage. (line 19)
* string-translation functions: I18N Functions. (line 6)
-* strings splitting, example: String Functions. (line 333)
-* strings, converting <1>: Bitwise Functions. (line 109)
+* strings splitting, example: String Functions. (line 334)
+* strings, converting <1>: Bitwise Functions. (line 110)
* strings, converting: Strings And Numbers. (line 6)
-* strings, converting letter case: String Functions. (line 520)
+* strings, converting letter case: String Functions. (line 521)
* strings, converting, numbers to: User-modified. (line 30)
* strings, empty, See null strings: awk split records. (line 115)
* strings, extracting: String Extraction. (line 6)
@@ -33804,15 +33827,15 @@ Index
* strings, null: Regexp Field Splitting.
(line 43)
* strings, numeric: Variable Typing. (line 6)
-* strtonum: String Functions. (line 388)
+* strtonum: String Functions. (line 389)
* strtonum() function (gawk), --non-decimal-data option and: Nondecimal Data.
(line 36)
-* sub <1>: String Functions. (line 406)
+* sub <1>: String Functions. (line 407)
* sub: Using Constant Regexps.
(line 43)
-* sub() function, arguments of: String Functions. (line 460)
+* sub() function, arguments of: String Functions. (line 461)
* sub() function, escape processing: Gory Details. (line 6)
-* subscript separators: User-modified. (line 146)
+* subscript separators: User-modified. (line 145)
* subscripts in arrays, multidimensional: Multidimensional. (line 10)
* subscripts in arrays, multidimensional, scanning: Multiscanning.
(line 11)
@@ -33820,30 +33843,30 @@ Index
(line 6)
* subscripts in arrays, uninitialized variables as: Uninitialized Subscripts.
(line 6)
-* SUBSEP variable: User-modified. (line 146)
+* SUBSEP variable: User-modified. (line 145)
* SUBSEP variable, and multidimensional arrays: Multidimensional.
(line 16)
* substitute in string: String Functions. (line 89)
-* substr: String Functions. (line 479)
-* substring: String Functions. (line 479)
+* substr: String Functions. (line 480)
+* substring: String Functions. (line 480)
* Sumner, Andrew: Other Versions. (line 64)
* supplementary groups of gawk process: Auto-set. (line 244)
* switch statement: Switch Statement. (line 6)
* SYMTAB array: Auto-set. (line 276)
* syntactic ambiguity: /= operator vs. /=.../ regexp constant: Assignment Ops.
(line 148)
-* system: I/O Functions. (line 75)
+* system: I/O Functions. (line 74)
* systime: Time Functions. (line 66)
* t debugger command (alias for tbreak): Breakpoint Control. (line 90)
* tbreak debugger command: Breakpoint Control. (line 90)
-* Tcl: Library Names. (line 57)
+* Tcl: Library Names. (line 58)
* TCP/IP: TCP/IP Networking. (line 6)
* TCP/IP, support for: Special Network. (line 6)
* tee utility: Tee Program. (line 6)
* tee.awk program: Tee Program. (line 26)
* temporary breakpoint: Breakpoint Control. (line 90)
* terminating records: awk split records. (line 125)
-* testbits.awk program: Bitwise Functions. (line 70)
+* testbits.awk program: Bitwise Functions. (line 71)
* testext extension: Extension Sample API Tests.
(line 6)
* Texinfo <1>: Adding Code. (line 100)
@@ -33859,7 +33882,7 @@ Index
* text, printing: Print. (line 22)
* text, printing, unduplicated lines of: Uniq Program. (line 6)
* TEXTDOMAIN variable <1>: Programmer i18n. (line 9)
-* TEXTDOMAIN variable: User-modified. (line 152)
+* TEXTDOMAIN variable: User-modified. (line 151)
* TEXTDOMAIN variable, BEGIN pattern and: Programmer i18n. (line 60)
* TEXTDOMAIN variable, portability and: I18N Portability. (line 20)
* textdomain() function (C library): Explaining gettext. (line 28)
@@ -33882,8 +33905,8 @@ Index
* timestamps, converting dates to: Time Functions. (line 76)
* timestamps, formatted: Getlocaltime Function.
(line 6)
-* tolower: String Functions. (line 521)
-* toupper: String Functions. (line 527)
+* tolower: String Functions. (line 522)
+* toupper: String Functions. (line 528)
* tr utility: Translate Program. (line 6)
* trace debugger command: Miscellaneous Debugger Commands.
(line 108)
@@ -33902,15 +33925,15 @@ Index
(line 23)
* troubleshooting, fatal errors, printf format strings: Format Modifiers.
(line 158)
-* troubleshooting, fflush() function: I/O Functions. (line 63)
+* troubleshooting, fflush() function: I/O Functions. (line 62)
* troubleshooting, function call syntax: Function Calls. (line 30)
* troubleshooting, gawk: Compatibility Mode. (line 6)
* troubleshooting, gawk, bug reports: Bugs. (line 9)
* troubleshooting, gawk, fatal errors, function arguments: Calling Built-in.
(line 16)
* troubleshooting, getline function: File Checking. (line 25)
-* troubleshooting, gsub()/sub() functions: String Functions. (line 470)
-* troubleshooting, match() function: String Functions. (line 289)
+* troubleshooting, gsub()/sub() functions: String Functions. (line 471)
+* troubleshooting, match() function: String Functions. (line 291)
* troubleshooting, print statement, omitting commas: Print Examples.
(line 31)
* troubleshooting, printing: Redirection. (line 112)
@@ -33919,8 +33942,8 @@ Index
* troubleshooting, regexp constants vs. string constants: Computed Regexps.
(line 39)
* troubleshooting, string concatenation: Concatenation. (line 26)
-* troubleshooting, substr() function: String Functions. (line 497)
-* troubleshooting, system() function: I/O Functions. (line 97)
+* troubleshooting, substr() function: String Functions. (line 498)
+* troubleshooting, system() function: I/O Functions. (line 96)
* troubleshooting, typographical errors, global variables: Options.
(line 98)
* true, logical: Truth Values. (line 6)
@@ -33985,7 +34008,7 @@ Index
* variables, built-in: Using Variables. (line 23)
* variables, built-in, -v option, setting with: Options. (line 40)
* variables, built-in, conveying information: Auto-set. (line 6)
-* variables, flag: Boolean Ops. (line 67)
+* variables, flag: Boolean Ops. (line 69)
* variables, getline command into, using <1>: Getline/Variable/Coprocess.
(line 6)
* variables, getline command into, using <2>: Getline/Variable/Pipe.
@@ -33997,7 +34020,6 @@ Index
* variables, global, printing list of: Options. (line 93)
* variables, initializing: Using Variables. (line 23)
* variables, local to a function: Variable Scope. (line 6)
-* variables, names of: Arrays. (line 18)
* variables, private: Library Names. (line 11)
* variables, setting: Options. (line 32)
* variables, shadowing: Definition Syntax. (line 71)
@@ -34018,7 +34040,7 @@ Index
* vertical bar (|), |& operator (I/O) <2>: Precedence. (line 65)
* vertical bar (|), |& operator (I/O): Getline/Coprocess. (line 6)
* vertical bar (|), || operator <1>: Precedence. (line 89)
-* vertical bar (|), || operator: Boolean Ops. (line 57)
+* vertical bar (|), || operator: Boolean Ops. (line 59)
* Vinschen, Corinna: Acknowledgments. (line 60)
* w debugger command (alias for watch): Viewing And Changing Data.
(line 67)
@@ -34061,7 +34083,7 @@ Index
* writea() extension function: Extension Sample Read write array.
(line 9)
* xgettext utility: String Extraction. (line 13)
-* xor: Bitwise Functions. (line 55)
+* xor: Bitwise Functions. (line 56)
* XOR bitwise operation: Bitwise Functions. (line 6)
* Yawitz, Efraim: Contributors. (line 131)
* Zaretskii, Eli <1>: Bugs. (line 71)
@@ -34083,7 +34105,7 @@ Index
* | (vertical bar), |& operator (I/O), pipes, closing: Close Files And Pipes.
(line 120)
* | (vertical bar), || operator <1>: Precedence. (line 89)
-* | (vertical bar), || operator: Boolean Ops. (line 57)
+* | (vertical bar), || operator: Boolean Ops. (line 59)
* ~ (tilde), ~ operator <1>: Expression Patterns. (line 24)
* ~ (tilde), ~ operator <2>: Precedence. (line 80)
* ~ (tilde), ~ operator <3>: Comparison Operators.
@@ -34237,413 +34259,413 @@ Ref: Scalar Constants-Footnote-1315459
Node: Nondecimal-numbers315709
Node: Regexp Constants318709
Node: Using Constant Regexps319234
-Node: Variables322306
-Node: Using Variables322961
-Node: Assignment Options324867
-Node: Conversion326742
-Node: Strings And Numbers327266
-Ref: Strings And Numbers-Footnote-1330328
-Node: Locale influences conversions330437
-Ref: table-locale-affects333154
-Node: All Operators333742
-Node: Arithmetic Ops334372
-Node: Concatenation336877
-Ref: Concatenation-Footnote-1339696
-Node: Assignment Ops339802
-Ref: table-assign-ops344785
-Node: Increment Ops346088
-Node: Truth Values and Conditions349526
-Node: Truth Values350609
-Node: Typing and Comparison351658
-Node: Variable Typing352451
-Node: Comparison Operators356103
-Ref: table-relational-ops356513
-Node: POSIX String Comparison360062
-Ref: POSIX String Comparison-Footnote-1361146
-Node: Boolean Ops361284
-Ref: Boolean Ops-Footnote-1365623
-Node: Conditional Exp365714
-Node: Function Calls367441
-Node: Precedence371321
-Node: Locales374989
-Node: Expressions Summary376620
-Node: Patterns and Actions379161
-Node: Pattern Overview380277
-Node: Regexp Patterns381954
-Node: Expression Patterns382497
-Node: Ranges386277
-Node: BEGIN/END389383
-Node: Using BEGIN/END390145
-Ref: Using BEGIN/END-Footnote-1392881
-Node: I/O And BEGIN/END392987
-Node: BEGINFILE/ENDFILE395258
-Node: Empty398189
-Node: Using Shell Variables398506
-Node: Action Overview400789
-Node: Statements403116
-Node: If Statement404964
-Node: While Statement406462
-Node: Do Statement408506
-Node: For Statement409662
-Node: Switch Statement412814
-Node: Break Statement415202
-Node: Continue Statement417243
-Node: Next Statement419068
-Node: Nextfile Statement421438
-Node: Exit Statement424095
-Node: Built-in Variables426499
-Node: User-modified427626
-Ref: User-modified-Footnote-1435315
-Node: Auto-set435377
-Ref: Auto-set-Footnote-1448566
-Ref: Auto-set-Footnote-2448771
-Node: ARGC and ARGV448827
-Node: Pattern Action Summary452731
-Node: Arrays454954
-Node: Array Basics456503
-Node: Array Intro457329
-Ref: figure-array-elements459302
-Ref: Array Intro-Footnote-1461826
-Node: Reference to Elements461954
-Node: Assigning Elements464404
-Node: Array Example464895
-Node: Scanning an Array466627
-Node: Controlling Scanning469628
-Ref: Controlling Scanning-Footnote-1474801
-Node: Delete475117
-Ref: Delete-Footnote-1477868
-Node: Numeric Array Subscripts477925
-Node: Uninitialized Subscripts480108
-Node: Multidimensional481735
-Node: Multiscanning484848
-Node: Arrays of Arrays486437
-Node: Arrays Summary491100
-Node: Functions493205
-Node: Built-in494078
-Node: Calling Built-in495156
-Node: Numeric Functions497144
-Ref: Numeric Functions-Footnote-1501980
-Ref: Numeric Functions-Footnote-2502337
-Ref: Numeric Functions-Footnote-3502385
-Node: String Functions502654
-Ref: String Functions-Footnote-1525651
-Ref: String Functions-Footnote-2525780
-Ref: String Functions-Footnote-3526028
-Node: Gory Details526115
-Ref: table-sub-escapes527888
-Ref: table-sub-proposed529408
-Ref: table-posix-sub530772
-Ref: table-gensub-escapes532312
-Ref: Gory Details-Footnote-1533488
-Node: I/O Functions533639
-Ref: I/O Functions-Footnote-1540749
-Node: Time Functions540896
-Ref: Time Functions-Footnote-1551360
-Ref: Time Functions-Footnote-2551428
-Ref: Time Functions-Footnote-3551586
-Ref: Time Functions-Footnote-4551697
-Ref: Time Functions-Footnote-5551809
-Ref: Time Functions-Footnote-6552036
-Node: Bitwise Functions552302
-Ref: table-bitwise-ops552864
-Ref: Bitwise Functions-Footnote-1557109
-Node: Type Functions557278
-Node: I18N Functions558420
-Node: User-defined560065
-Node: Definition Syntax560869
-Ref: Definition Syntax-Footnote-1566273
-Node: Function Example566342
-Ref: Function Example-Footnote-1568982
-Node: Function Caveats569004
-Node: Calling A Function569522
-Node: Variable Scope570477
-Node: Pass By Value/Reference573465
-Node: Return Statement576975
-Node: Dynamic Typing579959
-Node: Indirect Calls580888
-Ref: Indirect Calls-Footnote-1590604
-Node: Functions Summary590732
-Node: Library Functions593382
-Ref: Library Functions-Footnote-1597000
-Ref: Library Functions-Footnote-2597143
-Node: Library Names597314
-Ref: Library Names-Footnote-1600787
-Ref: Library Names-Footnote-2601007
-Node: General Functions601093
-Node: Strtonum Function602121
-Node: Assert Function605023
-Node: Round Function608349
-Node: Cliff Random Function609890
-Node: Ordinal Functions610906
-Ref: Ordinal Functions-Footnote-1613971
-Ref: Ordinal Functions-Footnote-2614223
-Node: Join Function614434
-Ref: Join Function-Footnote-1616205
-Node: Getlocaltime Function616405
-Node: Readfile Function620141
-Node: Data File Management621980
-Node: Filetrans Function622612
-Node: Rewind Function626681
-Node: File Checking628239
-Ref: File Checking-Footnote-1629371
-Node: Empty Files629572
-Node: Ignoring Assigns631551
-Node: Getopt Function633105
-Ref: Getopt Function-Footnote-1644369
-Node: Passwd Functions644572
-Ref: Passwd Functions-Footnote-1653551
-Node: Group Functions653639
-Ref: Group Functions-Footnote-1661570
-Node: Walking Arrays661783
-Node: Library Functions Summary663386
-Node: Library Exercises664774
-Node: Sample Programs666054
-Node: Running Examples666824
-Node: Clones667552
-Node: Cut Program668776
-Node: Egrep Program678634
-Ref: Egrep Program-Footnote-1686221
-Node: Id Program686331
-Node: Split Program689985
-Ref: Split Program-Footnote-1693523
-Node: Tee Program693651
-Node: Uniq Program696438
-Node: Wc Program703861
-Ref: Wc Program-Footnote-1708126
-Node: Miscellaneous Programs708218
-Node: Dupword Program709431
-Node: Alarm Program711462
-Node: Translate Program716266
-Ref: Translate Program-Footnote-1720839
-Ref: Translate Program-Footnote-2721109
-Node: Labels Program721248
-Ref: Labels Program-Footnote-1724609
-Node: Word Sorting724693
-Node: History Sorting728736
-Node: Extract Program730572
-Node: Simple Sed738108
-Node: Igawk Program741170
-Ref: Igawk Program-Footnote-1755474
-Ref: Igawk Program-Footnote-2755675
-Node: Anagram Program755797
-Node: Signature Program758865
-Node: Programs Summary760112
-Node: Programs Exercises761327
-Ref: Programs Exercises-Footnote-1765458
-Node: Advanced Features765549
-Node: Nondecimal Data767497
-Node: Array Sorting769074
-Node: Controlling Array Traversal769771
-Node: Array Sorting Functions778051
-Ref: Array Sorting Functions-Footnote-1781943
-Node: Two-way I/O782137
-Ref: Two-way I/O-Footnote-1787081
-Ref: Two-way I/O-Footnote-2787260
-Node: TCP/IP Networking787342
-Node: Profiling790184
-Node: Advanced Features Summary797735
-Node: Internationalization799596
-Node: I18N and L10N801076
-Node: Explaining gettext801762
-Ref: Explaining gettext-Footnote-1806788
-Ref: Explaining gettext-Footnote-2806972
-Node: Programmer i18n807137
-Ref: Programmer i18n-Footnote-1811931
-Node: Translator i18n811980
-Node: String Extraction812774
-Ref: String Extraction-Footnote-1813907
-Node: Printf Ordering813993
-Ref: Printf Ordering-Footnote-1816775
-Node: I18N Portability816839
-Ref: I18N Portability-Footnote-1819288
-Node: I18N Example819351
-Ref: I18N Example-Footnote-1822057
-Node: Gawk I18N822129
-Node: I18N Summary822767
-Node: Debugger824106
-Node: Debugging825128
-Node: Debugging Concepts825569
-Node: Debugging Terms827425
-Node: Awk Debugging830022
-Node: Sample Debugging Session830914
-Node: Debugger Invocation831434
-Node: Finding The Bug832770
-Node: List of Debugger Commands839249
-Node: Breakpoint Control840581
-Node: Debugger Execution Control844245
-Node: Viewing And Changing Data847605
-Node: Execution Stack850963
-Node: Debugger Info852476
-Node: Miscellaneous Debugger Commands856470
-Node: Readline Support861654
-Node: Limitations862546
-Node: Debugging Summary864819
-Node: Arbitrary Precision Arithmetic865987
-Node: Computer Arithmetic867474
-Ref: Computer Arithmetic-Footnote-1871861
-Node: Math Definitions871918
-Ref: table-ieee-formats875207
-Ref: Math Definitions-Footnote-1875747
-Node: MPFR features875850
-Node: FP Math Caution877467
-Ref: FP Math Caution-Footnote-1878517
-Node: Inexactness of computations878886
-Node: Inexact representation879834
-Node: Comparing FP Values881189
-Node: Errors accumulate882153
-Node: Getting Accuracy883586
-Node: Try To Round886245
-Node: Setting precision887144
-Ref: table-predefined-precision-strings887826
-Node: Setting the rounding mode889619
-Ref: table-gawk-rounding-modes889983
-Ref: Setting the rounding mode-Footnote-1893437
-Node: Arbitrary Precision Integers893616
-Ref: Arbitrary Precision Integers-Footnote-1897389
-Node: POSIX Floating Point Problems897538
-Ref: POSIX Floating Point Problems-Footnote-1901414
-Node: Floating point summary901452
-Node: Dynamic Extensions903656
-Node: Extension Intro905208
-Node: Plugin License906473
-Node: Extension Mechanism Outline907158
-Ref: figure-load-extension907582
-Ref: figure-load-new-function909067
-Ref: figure-call-new-function910069
-Node: Extension API Description912053
-Node: Extension API Functions Introduction913503
-Node: General Data Types918370
-Ref: General Data Types-Footnote-1924063
-Node: Requesting Values924362
-Ref: table-value-types-returned925099
-Node: Memory Allocation Functions926057
-Ref: Memory Allocation Functions-Footnote-1928804
-Node: Constructor Functions928900
-Node: Registration Functions930658
-Node: Extension Functions931343
-Node: Exit Callback Functions933645
-Node: Extension Version String934893
-Node: Input Parsers935543
-Node: Output Wrappers945357
-Node: Two-way processors949873
-Node: Printing Messages952077
-Ref: Printing Messages-Footnote-1953154
-Node: Updating `ERRNO'953306
-Node: Accessing Parameters954045
-Node: Symbol Table Access955275
-Node: Symbol table by name955789
-Node: Symbol table by cookie957765
-Ref: Symbol table by cookie-Footnote-1961898
-Node: Cached values961961
-Ref: Cached values-Footnote-1965465
-Node: Array Manipulation965556
-Ref: Array Manipulation-Footnote-1966654
-Node: Array Data Types966693
-Ref: Array Data Types-Footnote-1969396
-Node: Array Functions969488
-Node: Flattening Arrays973362
-Node: Creating Arrays980214
-Node: Extension API Variables984945
-Node: Extension Versioning985581
-Node: Extension API Informational Variables987482
-Node: Extension API Boilerplate988568
-Node: Finding Extensions992372
-Node: Extension Example992932
-Node: Internal File Description993662
-Node: Internal File Ops997753
-Ref: Internal File Ops-Footnote-11009185
-Node: Using Internal File Ops1009325
-Ref: Using Internal File Ops-Footnote-11011672
-Node: Extension Samples1011940
-Node: Extension Sample File Functions1013464
-Node: Extension Sample Fnmatch1021032
-Node: Extension Sample Fork1022514
-Node: Extension Sample Inplace1023727
-Node: Extension Sample Ord1025402
-Node: Extension Sample Readdir1026238
-Ref: table-readdir-file-types1027094
-Node: Extension Sample Revout1027893
-Node: Extension Sample Rev2way1028484
-Node: Extension Sample Read write array1029225
-Node: Extension Sample Readfile1031104
-Node: Extension Sample API Tests1032204
-Node: Extension Sample Time1032729
-Node: gawkextlib1034044
-Node: Extension summary1036857
-Node: Extension Exercises1040550
-Node: Language History1041272
-Node: V7/SVR3.11042915
-Node: SVR41045235
-Node: POSIX1046677
-Node: BTL1048063
-Node: POSIX/GNU1048797
-Node: Feature History1054573
-Node: Common Extensions1067664
-Node: Ranges and Locales1068976
-Ref: Ranges and Locales-Footnote-11073593
-Ref: Ranges and Locales-Footnote-21073620
-Ref: Ranges and Locales-Footnote-31073854
-Node: Contributors1074075
-Node: History summary1079500
-Node: Installation1080869
-Node: Gawk Distribution1081820
-Node: Getting1082304
-Node: Extracting1083128
-Node: Distribution contents1084770
-Node: Unix Installation1090540
-Node: Quick Installation1091157
-Node: Additional Configuration Options1093599
-Node: Configuration Philosophy1095337
-Node: Non-Unix Installation1097688
-Node: PC Installation1098146
-Node: PC Binary Installation1099457
-Node: PC Compiling1101305
-Ref: PC Compiling-Footnote-11104304
-Node: PC Testing1104409
-Node: PC Using1105585
-Node: Cygwin1109737
-Node: MSYS1110546
-Node: VMS Installation1111044
-Node: VMS Compilation1111840
-Ref: VMS Compilation-Footnote-11113062
-Node: VMS Dynamic Extensions1113120
-Node: VMS Installation Details1114493
-Node: VMS Running1116745
-Node: VMS GNV1119579
-Node: VMS Old Gawk1120302
-Node: Bugs1120772
-Node: Other Versions1124776
-Node: Installation summary1131000
-Node: Notes1132056
-Node: Compatibility Mode1132921
-Node: Additions1133703
-Node: Accessing The Source1134628
-Node: Adding Code1136064
-Node: New Ports1142242
-Node: Derived Files1146723
-Ref: Derived Files-Footnote-11152198
-Ref: Derived Files-Footnote-21152232
-Ref: Derived Files-Footnote-31152828
-Node: Future Extensions1152942
-Node: Implementation Limitations1153548
-Node: Extension Design1154796
-Node: Old Extension Problems1155950
-Ref: Old Extension Problems-Footnote-11157467
-Node: Extension New Mechanism Goals1157524
-Ref: Extension New Mechanism Goals-Footnote-11160884
-Node: Extension Other Design Decisions1161073
-Node: Extension Future Growth1163179
-Node: Old Extension Mechanism1164015
-Node: Notes summary1165777
-Node: Basic Concepts1166963
-Node: Basic High Level1167644
-Ref: figure-general-flow1167916
-Ref: figure-process-flow1168515
-Ref: Basic High Level-Footnote-11171744
-Node: Basic Data Typing1171929
-Node: Glossary1175257
-Node: Copying1200409
-Node: GNU Free Documentation License1237965
-Node: Index1263101
+Node: Variables322372
+Node: Using Variables323027
+Node: Assignment Options324931
+Node: Conversion326806
+Node: Strings And Numbers327330
+Ref: Strings And Numbers-Footnote-1330392
+Node: Locale influences conversions330501
+Ref: table-locale-affects333216
+Node: All Operators333804
+Node: Arithmetic Ops334434
+Node: Concatenation336939
+Ref: Concatenation-Footnote-1339758
+Node: Assignment Ops339864
+Ref: table-assign-ops344847
+Node: Increment Ops346125
+Node: Truth Values and Conditions349563
+Node: Truth Values350646
+Node: Typing and Comparison351695
+Node: Variable Typing352488
+Node: Comparison Operators356140
+Ref: table-relational-ops356550
+Node: POSIX String Comparison360065
+Ref: POSIX String Comparison-Footnote-1361137
+Node: Boolean Ops361275
+Ref: Boolean Ops-Footnote-1365754
+Node: Conditional Exp365845
+Node: Function Calls367572
+Node: Precedence371452
+Node: Locales375120
+Node: Expressions Summary376751
+Node: Patterns and Actions379325
+Node: Pattern Overview380441
+Node: Regexp Patterns382120
+Node: Expression Patterns382663
+Node: Ranges386443
+Node: BEGIN/END389549
+Node: Using BEGIN/END390311
+Ref: Using BEGIN/END-Footnote-1393048
+Node: I/O And BEGIN/END393154
+Node: BEGINFILE/ENDFILE395468
+Node: Empty398369
+Node: Using Shell Variables398686
+Node: Action Overview400962
+Node: Statements403289
+Node: If Statement405137
+Node: While Statement406635
+Node: Do Statement408663
+Node: For Statement409805
+Node: Switch Statement412960
+Node: Break Statement415348
+Node: Continue Statement417389
+Node: Next Statement419214
+Node: Nextfile Statement421594
+Node: Exit Statement424224
+Node: Built-in Variables426627
+Node: User-modified427754
+Ref: User-modified-Footnote-1435434
+Node: Auto-set435496
+Ref: Auto-set-Footnote-1448690
+Ref: Auto-set-Footnote-2448895
+Node: ARGC and ARGV448951
+Node: Pattern Action Summary453155
+Node: Arrays455574
+Node: Array Basics456903
+Node: Array Intro457747
+Ref: figure-array-elements459720
+Ref: Array Intro-Footnote-1462244
+Node: Reference to Elements462372
+Node: Assigning Elements464822
+Node: Array Example465313
+Node: Scanning an Array467071
+Node: Controlling Scanning470087
+Ref: Controlling Scanning-Footnote-1475276
+Node: Numeric Array Subscripts475592
+Node: Uninitialized Subscripts477775
+Node: Delete479392
+Ref: Delete-Footnote-1482136
+Node: Multidimensional482193
+Node: Multiscanning485288
+Node: Arrays of Arrays486877
+Node: Arrays Summary491638
+Node: Functions493743
+Node: Built-in494616
+Node: Calling Built-in495694
+Node: Numeric Functions497682
+Ref: Numeric Functions-Footnote-1502506
+Ref: Numeric Functions-Footnote-2502863
+Ref: Numeric Functions-Footnote-3502911
+Node: String Functions503180
+Ref: String Functions-Footnote-1526640
+Ref: String Functions-Footnote-2526769
+Ref: String Functions-Footnote-3527017
+Node: Gory Details527104
+Ref: table-sub-escapes528885
+Ref: table-sub-proposed530405
+Ref: table-posix-sub531769
+Ref: table-gensub-escapes533309
+Ref: Gory Details-Footnote-1534141
+Node: I/O Functions534292
+Ref: I/O Functions-Footnote-1541393
+Node: Time Functions541540
+Ref: Time Functions-Footnote-1552009
+Ref: Time Functions-Footnote-2552077
+Ref: Time Functions-Footnote-3552235
+Ref: Time Functions-Footnote-4552346
+Ref: Time Functions-Footnote-5552458
+Ref: Time Functions-Footnote-6552685
+Node: Bitwise Functions552951
+Ref: table-bitwise-ops553513
+Ref: Bitwise Functions-Footnote-1557821
+Node: Type Functions557990
+Node: I18N Functions559139
+Node: User-defined560784
+Node: Definition Syntax561588
+Ref: Definition Syntax-Footnote-1566992
+Node: Function Example567061
+Ref: Function Example-Footnote-1569978
+Node: Function Caveats570000
+Node: Calling A Function570518
+Node: Variable Scope571473
+Node: Pass By Value/Reference574461
+Node: Return Statement577971
+Node: Dynamic Typing580955
+Node: Indirect Calls581884
+Ref: Indirect Calls-Footnote-1591605
+Node: Functions Summary591733
+Node: Library Functions594432
+Ref: Library Functions-Footnote-1598050
+Ref: Library Functions-Footnote-2598193
+Node: Library Names598364
+Ref: Library Names-Footnote-1601822
+Ref: Library Names-Footnote-2602042
+Node: General Functions602128
+Node: Strtonum Function603156
+Node: Assert Function606176
+Node: Round Function609500
+Node: Cliff Random Function611041
+Node: Ordinal Functions612057
+Ref: Ordinal Functions-Footnote-1615122
+Ref: Ordinal Functions-Footnote-2615374
+Node: Join Function615585
+Ref: Join Function-Footnote-1617356
+Node: Getlocaltime Function617556
+Node: Readfile Function621297
+Node: Data File Management623245
+Node: Filetrans Function623877
+Node: Rewind Function627936
+Node: File Checking629494
+Ref: File Checking-Footnote-1630626
+Node: Empty Files630827
+Node: Ignoring Assigns632806
+Node: Getopt Function634360
+Ref: Getopt Function-Footnote-1645624
+Node: Passwd Functions645827
+Ref: Passwd Functions-Footnote-1654806
+Node: Group Functions654894
+Ref: Group Functions-Footnote-1662825
+Node: Walking Arrays663038
+Node: Library Functions Summary664641
+Node: Library Exercises666029
+Node: Sample Programs667309
+Node: Running Examples668079
+Node: Clones668807
+Node: Cut Program670031
+Node: Egrep Program679889
+Ref: Egrep Program-Footnote-1687476
+Node: Id Program687586
+Node: Split Program691240
+Ref: Split Program-Footnote-1694778
+Node: Tee Program694906
+Node: Uniq Program697693
+Node: Wc Program705116
+Ref: Wc Program-Footnote-1709381
+Node: Miscellaneous Programs709473
+Node: Dupword Program710686
+Node: Alarm Program712717
+Node: Translate Program717521
+Ref: Translate Program-Footnote-1722094
+Ref: Translate Program-Footnote-2722364
+Node: Labels Program722503
+Ref: Labels Program-Footnote-1725864
+Node: Word Sorting725948
+Node: History Sorting729991
+Node: Extract Program731827
+Node: Simple Sed739363
+Node: Igawk Program742425
+Ref: Igawk Program-Footnote-1756729
+Ref: Igawk Program-Footnote-2756930
+Node: Anagram Program757052
+Node: Signature Program760120
+Node: Programs Summary761367
+Node: Programs Exercises762582
+Ref: Programs Exercises-Footnote-1766713
+Node: Advanced Features766804
+Node: Nondecimal Data768752
+Node: Array Sorting770329
+Node: Controlling Array Traversal771026
+Node: Array Sorting Functions779306
+Ref: Array Sorting Functions-Footnote-1783198
+Node: Two-way I/O783392
+Ref: Two-way I/O-Footnote-1788336
+Ref: Two-way I/O-Footnote-2788515
+Node: TCP/IP Networking788597
+Node: Profiling791439
+Node: Advanced Features Summary798990
+Node: Internationalization800851
+Node: I18N and L10N802331
+Node: Explaining gettext803017
+Ref: Explaining gettext-Footnote-1808043
+Ref: Explaining gettext-Footnote-2808227
+Node: Programmer i18n808392
+Ref: Programmer i18n-Footnote-1813186
+Node: Translator i18n813235
+Node: String Extraction814029
+Ref: String Extraction-Footnote-1815162
+Node: Printf Ordering815248
+Ref: Printf Ordering-Footnote-1818030
+Node: I18N Portability818094
+Ref: I18N Portability-Footnote-1820543
+Node: I18N Example820606
+Ref: I18N Example-Footnote-1823312
+Node: Gawk I18N823384
+Node: I18N Summary824022
+Node: Debugger825361
+Node: Debugging826383
+Node: Debugging Concepts826824
+Node: Debugging Terms828680
+Node: Awk Debugging831277
+Node: Sample Debugging Session832169
+Node: Debugger Invocation832689
+Node: Finding The Bug834025
+Node: List of Debugger Commands840504
+Node: Breakpoint Control841836
+Node: Debugger Execution Control845500
+Node: Viewing And Changing Data848860
+Node: Execution Stack852218
+Node: Debugger Info853731
+Node: Miscellaneous Debugger Commands857725
+Node: Readline Support862909
+Node: Limitations863801
+Node: Debugging Summary866074
+Node: Arbitrary Precision Arithmetic867242
+Node: Computer Arithmetic868729
+Ref: Computer Arithmetic-Footnote-1873116
+Node: Math Definitions873173
+Ref: table-ieee-formats876462
+Ref: Math Definitions-Footnote-1877002
+Node: MPFR features877105
+Node: FP Math Caution878722
+Ref: FP Math Caution-Footnote-1879772
+Node: Inexactness of computations880141
+Node: Inexact representation881089
+Node: Comparing FP Values882444
+Node: Errors accumulate883408
+Node: Getting Accuracy884841
+Node: Try To Round887500
+Node: Setting precision888399
+Ref: table-predefined-precision-strings889081
+Node: Setting the rounding mode890874
+Ref: table-gawk-rounding-modes891238
+Ref: Setting the rounding mode-Footnote-1894692
+Node: Arbitrary Precision Integers894871
+Ref: Arbitrary Precision Integers-Footnote-1898644
+Node: POSIX Floating Point Problems898793
+Ref: POSIX Floating Point Problems-Footnote-1902669
+Node: Floating point summary902707
+Node: Dynamic Extensions904911
+Node: Extension Intro906463
+Node: Plugin License907728
+Node: Extension Mechanism Outline908413
+Ref: figure-load-extension908837
+Ref: figure-load-new-function910322
+Ref: figure-call-new-function911324
+Node: Extension API Description913308
+Node: Extension API Functions Introduction914758
+Node: General Data Types919625
+Ref: General Data Types-Footnote-1925318
+Node: Requesting Values925617
+Ref: table-value-types-returned926354
+Node: Memory Allocation Functions927312
+Ref: Memory Allocation Functions-Footnote-1930059
+Node: Constructor Functions930155
+Node: Registration Functions931913
+Node: Extension Functions932598
+Node: Exit Callback Functions934900
+Node: Extension Version String936148
+Node: Input Parsers936798
+Node: Output Wrappers946612
+Node: Two-way processors951128
+Node: Printing Messages953332
+Ref: Printing Messages-Footnote-1954409
+Node: Updating `ERRNO'954561
+Node: Accessing Parameters955300
+Node: Symbol Table Access956530
+Node: Symbol table by name957044
+Node: Symbol table by cookie959020
+Ref: Symbol table by cookie-Footnote-1963153
+Node: Cached values963216
+Ref: Cached values-Footnote-1966720
+Node: Array Manipulation966811
+Ref: Array Manipulation-Footnote-1967909
+Node: Array Data Types967948
+Ref: Array Data Types-Footnote-1970651
+Node: Array Functions970743
+Node: Flattening Arrays974617
+Node: Creating Arrays981469
+Node: Extension API Variables986200
+Node: Extension Versioning986836
+Node: Extension API Informational Variables988737
+Node: Extension API Boilerplate989823
+Node: Finding Extensions993627
+Node: Extension Example994187
+Node: Internal File Description994917
+Node: Internal File Ops999008
+Ref: Internal File Ops-Footnote-11010440
+Node: Using Internal File Ops1010580
+Ref: Using Internal File Ops-Footnote-11012927
+Node: Extension Samples1013195
+Node: Extension Sample File Functions1014719
+Node: Extension Sample Fnmatch1022287
+Node: Extension Sample Fork1023769
+Node: Extension Sample Inplace1024982
+Node: Extension Sample Ord1026657
+Node: Extension Sample Readdir1027493
+Ref: table-readdir-file-types1028349
+Node: Extension Sample Revout1029148
+Node: Extension Sample Rev2way1029739
+Node: Extension Sample Read write array1030480
+Node: Extension Sample Readfile1032359
+Node: Extension Sample API Tests1033459
+Node: Extension Sample Time1033984
+Node: gawkextlib1035299
+Node: Extension summary1038112
+Node: Extension Exercises1041805
+Node: Language History1042527
+Node: V7/SVR3.11044170
+Node: SVR41046490
+Node: POSIX1047932
+Node: BTL1049318
+Node: POSIX/GNU1050052
+Node: Feature History1055828
+Node: Common Extensions1068919
+Node: Ranges and Locales1070231
+Ref: Ranges and Locales-Footnote-11074848
+Ref: Ranges and Locales-Footnote-21074875
+Ref: Ranges and Locales-Footnote-31075109
+Node: Contributors1075330
+Node: History summary1080755
+Node: Installation1082124
+Node: Gawk Distribution1083075
+Node: Getting1083559
+Node: Extracting1084383
+Node: Distribution contents1086025
+Node: Unix Installation1091795
+Node: Quick Installation1092412
+Node: Additional Configuration Options1094854
+Node: Configuration Philosophy1096592
+Node: Non-Unix Installation1098943
+Node: PC Installation1099401
+Node: PC Binary Installation1100712
+Node: PC Compiling1102560
+Ref: PC Compiling-Footnote-11105559
+Node: PC Testing1105664
+Node: PC Using1106840
+Node: Cygwin1110992
+Node: MSYS1111801
+Node: VMS Installation1112299
+Node: VMS Compilation1113095
+Ref: VMS Compilation-Footnote-11114317
+Node: VMS Dynamic Extensions1114375
+Node: VMS Installation Details1115748
+Node: VMS Running1118000
+Node: VMS GNV1120834
+Node: VMS Old Gawk1121557
+Node: Bugs1122027
+Node: Other Versions1126031
+Node: Installation summary1132255
+Node: Notes1133311
+Node: Compatibility Mode1134176
+Node: Additions1134958
+Node: Accessing The Source1135883
+Node: Adding Code1137319
+Node: New Ports1143497
+Node: Derived Files1147978
+Ref: Derived Files-Footnote-11153453
+Ref: Derived Files-Footnote-21153487
+Ref: Derived Files-Footnote-31154083
+Node: Future Extensions1154197
+Node: Implementation Limitations1154803
+Node: Extension Design1156051
+Node: Old Extension Problems1157205
+Ref: Old Extension Problems-Footnote-11158722
+Node: Extension New Mechanism Goals1158779
+Ref: Extension New Mechanism Goals-Footnote-11162139
+Node: Extension Other Design Decisions1162328
+Node: Extension Future Growth1164434
+Node: Old Extension Mechanism1165270
+Node: Notes summary1167032
+Node: Basic Concepts1168218
+Node: Basic High Level1168899
+Ref: figure-general-flow1169171
+Ref: figure-process-flow1169770
+Ref: Basic High Level-Footnote-11172999
+Node: Basic Data Typing1173184
+Node: Glossary1176512
+Node: Copying1201664
+Node: GNU Free Documentation License1239220
+Node: Index1264356

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 652c6981..718afe09 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -726,12 +726,12 @@ particular records in a file and perform operations upon them.
elements.
* Controlling Scanning:: Controlling the order in which arrays
are scanned.
-* Delete:: The @code{delete} statement removes an
- element from an array.
* Numeric Array Subscripts:: How to use numbers as subscripts in
@command{awk}.
* Uninitialized Subscripts:: Using Uninitialized variables as
subscripts.
+* Delete:: The @code{delete} statement removes an
+ element from an array.
* Multidimensional:: Emulating multidimensional arrays in
@command{awk}.
* Multiscanning:: Scanning multidimensional arrays.
@@ -10629,7 +10629,7 @@ if (/barfly/ || /camelot/)
@noindent
are exactly equivalent.
One rather bizarre consequence of this rule is that the following
-Boolean expression is valid, but does not do what the user probably
+Boolean expression is valid, but does not do what its author probably
intended:
@example
@@ -10675,10 +10675,9 @@ Modern implementations of @command{awk}, including @command{gawk}, allow
the third argument of @code{split()} to be a regexp constant, but some
older implementations do not.
@value{DARKCORNER}
-This can lead to confusion when attempting to use regexp constants
-as arguments to user-defined functions
-(@pxref{User-defined}).
-For example:
+Because some built-in functions accept regexp constants as arguments,
+it can be confusing when attempting to use regexp constants as arguments
+to user-defined functions (@pxref{User-defined}). For example:
@example
function mysub(pat, repl, str, global)
@@ -10746,8 +10745,8 @@ variable's current value. Variables are given new values with
@dfn{decrement operators}.
@xref{Assignment Ops}.
In addition, the @code{sub()} and @code{gsub()} functions can
-change a variable's value, and the @code{match()}, @code{patsplit()}
-and @code{split()} functions can change the contents of their
+change a variable's value, and the @code{match()}, @code{split()}
+and @code{patsplit()} functions can change the contents of their
array parameters. @xref{String Functions}.
@cindex variables, built-in
@@ -10763,7 +10762,7 @@ Variables in @command{awk} can be assigned either numeric or string values.
The kind of value a variable holds can change over the life of a program.
By default, variables are initialized to the empty string, which
is zero if converted to a number. There is no need to explicitly
-``initialize'' a variable in @command{awk},
+initialize a variable in @command{awk},
which is what you would do in C and in most other traditional languages.
@node Assignment Options
@@ -11000,7 +10999,7 @@ $ @kbd{echo 4,321 | LC_ALL=en_DK.utf-8 gawk '@{ print $1 + 1 @}'}
@noindent
The @code{en_DK.utf-8} locale is for English in Denmark, where the comma acts as
the decimal point separator. In the normal @code{"C"} locale, @command{gawk}
-treats @samp{4,321} as @samp{4}, while in the Danish locale, it's treated
+treats @samp{4,321} as 4, while in the Danish locale, it's treated
as the full number, 4.321.
Some earlier versions of @command{gawk} fully complied with this aspect
@@ -11557,7 +11556,7 @@ awk '/[=]=/' /dev/null
@end example
@command{gawk} does not have this problem; BWK @command{awk}
-and @command{mawk} also do not (@pxref{Other Versions}).
+and @command{mawk} also do not.
@docbook
</sidebar>
@@ -11603,7 +11602,7 @@ awk '/[=]=/' /dev/null
@end example
@command{gawk} does not have this problem; BWK @command{awk}
-and @command{mawk} also do not (@pxref{Other Versions}).
+and @command{mawk} also do not.
@end cartouche
@end ifnotdocbook
@c ENDOFRANGE exas
@@ -11915,7 +11914,7 @@ attribute.
@item
Fields, @code{getline} input, @code{FILENAME}, @code{ARGV} elements,
@code{ENVIRON} elements, and the elements of an array created by
-@code{patsplit()}, @code{split()} and @code{match()} that are numeric
+@code{match()}, @code{split()} and @code{patsplit()} that are numeric
strings have the @var{strnum} attribute. Otherwise, they have
the @var{string} attribute. Uninitialized variables also have the
@var{strnum} attribute.
@@ -12070,22 +12069,23 @@ Thus, the six-character input string @w{@samp{ +3.14}} receives the
The following examples print @samp{1} when the comparison between
the two different constants is true, @samp{0} otherwise:
+@c 22.9.2014: Tested with mawk and BWK awk, got same results.
@example
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == " +3.14" @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == " +3.14") @}'} @ii{True}
@print{} 1
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == "+3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == "+3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == "3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == "3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == 3.14 @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == 3.14) @}'} @ii{True}
@print{} 1
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == " +3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == " +3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == "+3.14" @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == "+3.14") @}'} @ii{True}
@print{} 1
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == "3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == "3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == 3.14 @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == 3.14) @}'} @ii{True}
@print{} 1
@end example
@@ -12159,9 +12159,8 @@ part of the test always succeeds. Because the operators are
so similar, this kind of error is very difficult to spot when
scanning the source code.
-@cindex @command{gawk}, comparison operators and
-The following list of expressions illustrates the kind of comparison
-@command{gawk} performs, as well as what the result of the comparison is:
+The following list of expressions illustrates the kinds of comparisons
+@command{awk} performs, as well as what the result of each comparison is:
@table @code
@item 1.5 <= 2.0
@@ -12234,7 +12233,7 @@ dynamic regexp (@pxref{Regexp Usage}; also
@cindex @command{awk}, regexp constants and
@cindex regexp constants
-In modern implementations of @command{awk}, a constant regular
+A constant regular
expression in slashes by itself is also an expression. The regexp
@code{/@var{regexp}/} is an abbreviation for the following comparison expression:
@@ -12254,7 +12253,7 @@ where this is discussed in more detail.
The POSIX standard says that string comparison is performed based
on the locale's @dfn{collating order}. This is the order in which
characters sort, as defined by the locale (for more discussion,
-@pxref{Ranges and Locales}). This order is usually very different
+@pxref{Locales}). This order is usually very different
from the results obtained when doing straight character-by-character
comparison.@footnote{Technically, string comparison is supposed
to behave the same way as if the strings are compared with the C
@@ -12334,7 +12333,7 @@ no substring @samp{foo} in the record.
True if at least one of @var{boolean1} or @var{boolean2} is true.
For example, the following statement prints all records in the input
that contain @emph{either} @samp{edu} or
-@samp{li} or both:
+@samp{li}:
@example
if ($0 ~ /edu/ || $0 ~ /li/) print
@@ -12343,6 +12342,9 @@ if ($0 ~ /edu/ || $0 ~ /li/) print
The subexpression @var{boolean2} is evaluated only if @var{boolean1}
is false. This can make a difference when @var{boolean2} contains
expressions that have side effects.
+(Thus, this test never really distinguishes records that contain both
+@samp{edu} and @samp{li}---as soon as @samp{edu} is matched,
+the full test succeeds.)
@item ! @var{boolean}
True if @var{boolean} is false. For example,
@@ -12352,7 +12354,7 @@ variable is not defined:
@example
BEGIN @{ if (! ("HOME" in ENVIRON))
- print "no home!" @}
+ print "no home!" @}
@end example
(The @code{in} operator is described in
@@ -12808,8 +12810,8 @@ system about the local character set and language. The ISO C standard
defines a default @code{"C"} locale, which is an environment that is
typical of what many C programmers are used to.
-Once upon a time, the locale setting used to affect regexp matching
-(@pxref{Ranges and Locales}), but this is no longer true.
+Once upon a time, the locale setting used to affect regexp matching,
+but this is no longer true (@pxref{Ranges and Locales}).
Locales can affect record splitting. For the normal case of @samp{RS =
"\n"}, the locale is largely irrelevant. For other single-character
@@ -12863,7 +12865,8 @@ Locales can influence the conversions.
@item
@command{awk} provides the usual arithmetic operators (addition,
subtraction, multiplication, division, modulus), and unary plus and minus.
-It also provides comparison operators, boolean operators, and regexp
+It also provides comparison operators, boolean operators, array membership
+testing, and regexp
matching operators. String concatenation is accomplished by placing
two expressions next to each other; there is no explicit operator.
The three-operand @samp{?:} operator provides an ``if-else'' test within
@@ -12878,7 +12881,7 @@ In @command{awk}, a value is considered to be true if it is non-zero
@emph{or} non-null. Otherwise, the value is false.
@item
-A value's type is set upon each assignment and may change over its
+A variable's type is set upon each assignment and may change over its
lifetime. The type determines how it behaves in comparisons (string
or numeric).
@@ -12958,7 +12961,7 @@ is nonzero (if a number) or non-null (if a string).
(@xref{Expression Patterns}.)
@item @var{begpat}, @var{endpat}
-A pair of patterns separated by a comma, specifying a range of records.
+A pair of patterns separated by a comma, specifying a @dfn{range} of records.
The range includes both the initial record that matches @var{begpat} and
the final record that matches @var{endpat}.
(@xref{Ranges}.)
@@ -13048,8 +13051,8 @@ $ @kbd{awk '$1 ~ /li/ @{ print $2 @}' mail-list}
@cindex regexp constants, as patterns
@cindex patterns, regexp constants as
A regexp constant as a pattern is also a special case of an expression
-pattern. The expression @code{/li/} has the value one if @samp{li}
-appears in the current input record. Thus, as a pattern, @code{/li/}
+pattern. The expression @samp{/li/} has the value one if @samp{li}
+appears in the current input record. Thus, as a pattern, @samp{/li/}
matches any record containing @samp{li}.
@cindex Boolean expressions, as patterns
@@ -13231,7 +13234,7 @@ input is read. For example:
@example
$ @kbd{awk '}
> @kbd{BEGIN @{ print "Analysis of \"li\"" @}}
-> @kbd{/li/ @{ ++n @}}
+> @kbd{/li/ @{ ++n @}}
> @kbd{END @{ print "\"li\" appears in", n, "records." @}' mail-list}
@print{} Analysis of "li"
@print{} "li" appears in 4 records.
@@ -13311,9 +13314,10 @@ The POSIX standard specifies that @code{NF} is available in an @code{END}
rule. It contains the number of fields from the last input record.
Most probably due to an oversight, the standard does not say that @code{$0}
is also preserved, although logically one would think that it should be.
-In fact, @command{gawk} does preserve the value of @code{$0} for use in
-@code{END} rules. Be aware, however, that BWK @command{awk}, and possibly
-other implementations, do not.
+In fact, all of BWK @command{awk}, @command{mawk}, and @command{gawk}
+preserve the value of @code{$0} for use in @code{END} rules. Be aware,
+however, that some other implementations and many older versions
+of Unix @command{awk} do not.
The third point follows from the first two. The meaning of @samp{print}
inside a @code{BEGIN} or @code{END} rule is the same as always:
@@ -13408,8 +13412,8 @@ level of the @command{awk} program.
@cindex @code{next} statement, @code{BEGINFILE}/@code{ENDFILE} patterns and
The @code{next} statement (@pxref{Next Statement}) is not allowed inside
-either a @code{BEGINFILE} or and @code{ENDFILE} rule. The @code{nextfile}
-statement (@pxref{Nextfile Statement}) is allowed only inside a
+either a @code{BEGINFILE} or an @code{ENDFILE} rule. The @code{nextfile}
+statement is allowed only inside a
@code{BEGINFILE} rule, but not inside an @code{ENDFILE} rule.
@cindex @code{getline} statement, @code{BEGINFILE}/@code{ENDFILE} patterns and
@@ -13473,7 +13477,7 @@ There are two ways to get the value of the shell variable
into the body of the @command{awk} program.
@cindex shells, quoting
-The most common method is to use shell quoting to substitute
+A common method is to use shell quoting to substitute
the variable's value into the program inside the script.
For example, consider the following program:
@@ -13730,20 +13734,21 @@ If the @var{condition} is true, it executes the statement @var{body}.
is not zero and not a null string.)
@end ifinfo
After @var{body} has been executed,
-@var{condition} is tested again, and if it is still true, @var{body} is
-executed again. This process repeats until the @var{condition} is no longer
-true. If the @var{condition} is initially false, the body of the loop is
-never executed and @command{awk} continues with the statement following
+@var{condition} is tested again, and if it is still true, @var{body}
+executes again. This process repeats until the @var{condition} is no longer
+true. If the @var{condition} is initially false, the body of the loop
+never executes and @command{awk} continues with the statement following
the loop.
This example prints the first three fields of each record, one per line:
@example
-awk '@{
- i = 1
- while (i <= 3) @{
- print $i
- i++
- @}
+awk '
+@{
+ i = 1
+ while (i <= 3) @{
+ print $i
+ i++
+ @}
@}' inventory-shipped
@end example
@@ -13777,14 +13782,14 @@ do
while (@var{condition})
@end example
-Even if the @var{condition} is false at the start, the @var{body} is
-executed at least once (and only once, unless executing @var{body}
+Even if the @var{condition} is false at the start, the @var{body}
+executes at least once (and only once, unless executing @var{body}
makes @var{condition} true). Contrast this with the corresponding
@code{while} statement:
@example
while (@var{condition})
- @var{body}
+ @var{body}
@end example
@noindent
@@ -13794,11 +13799,11 @@ The following is an example of a @code{do} statement:
@example
@{
- i = 1
- do @{
- print $0
- i++
- @} while (i <= 10)
+ i = 1
+ do @{
+ print $0
+ i++
+ @} while (i <= 10)
@}
@end example
@@ -13835,9 +13840,10 @@ compares it against the desired number of iterations.
For example:
@example
-awk '@{
- for (i = 1; i <= 3; i++)
- print $i
+awk '
+@{
+ for (i = 1; i <= 3; i++)
+ print $i
@}' inventory-shipped
@end example
@@ -13865,7 +13871,7 @@ between 1 and 100:
@example
for (i = 1; i <= 100; i *= 2)
- print i
+ print i
@end example
If there is nothing to be done, any of the three expressions in the
@@ -14185,7 +14191,7 @@ The @code{next} statement is not allowed inside @code{BEGINFILE} and
@cindex functions, user-defined, @code{next}/@code{nextfile} statements and
According to the POSIX standard, the behavior is undefined if the
@code{next} statement is used in a @code{BEGIN} or @code{END} rule.
-@command{gawk} treats it as a syntax error. Although POSIX permits it,
+@command{gawk} treats it as a syntax error. Although POSIX does not disallow it,
most other @command{awk} implementations don't allow the @code{next}
statement inside function bodies (@pxref{User-defined}). Just as with any
other @code{next} statement, a @code{next} statement inside a function
@@ -14240,7 +14246,7 @@ opened with redirections. It is not related to the main processing that
@quotation NOTE
For many years, @code{nextfile} was a
-@command{gawk} extension. As of September, 2012, it was accepted for
+common extension. In September, 2012, it was accepted for
inclusion into the POSIX standard.
See @uref{http://austingroupbugs.net/view.php?id=607, the Austin Group website}.
@end quotation
@@ -14249,8 +14255,8 @@ See @uref{http://austingroupbugs.net/view.php?id=607, the Austin Group website}.
@cindex @code{nextfile} statement, user-defined functions and
@cindex Brian Kernighan's @command{awk}
@cindex @command{mawk} utility
-The current version of BWK @command{awk}, and @command{mawk} (@pxref{Other
-Versions}) also support @code{nextfile}. However, they don't allow the
+The current version of BWK @command{awk}, and @command{mawk}
+also support @code{nextfile}. However, they don't allow the
@code{nextfile} statement inside function bodies (@pxref{User-defined}).
@command{gawk} does; a @code{nextfile} inside a function body reads the
next record and starts processing it with the first rule in the program,
@@ -14282,8 +14288,8 @@ the program to stop immediately.
An @code{exit} statement that is not part of a @code{BEGIN} or @code{END}
rule stops the execution of any further automatic rules for the current
record, skips reading any remaining input records, and executes the
-@code{END} rule if there is one.
-Any @code{ENDFILE} rules are also skipped; they are not executed.
+@code{END} rule if there is one. @command{gawk} also skips
+any @code{ENDFILE} rules; they do not execute.
In such a case,
if you don't want the @code{END} rule to do its job, set a variable
@@ -14391,7 +14397,7 @@ respectively, should use binary I/O. A string value of @code{"rw"} or
@code{"wr"} indicates that all files should use binary I/O. Any other
string value is treated the same as @code{"rw"}, but causes @command{gawk}
to generate a warning message. @code{BINMODE} is described in more
-detail in @ref{PC Using}. @command{mawk} @pxref{Other Versions}),
+detail in @ref{PC Using}. @command{mawk} (@pxref{Other Versions}),
also supports this variable, but only using numeric values.
@cindex @code{CONVFMT} variable
@@ -14518,7 +14524,7 @@ printing with the @code{print} statement. It works by being passed
as the first argument to the @code{sprintf()} function
(@pxref{String Functions}).
Its default value is @code{"%.6g"}. Earlier versions of @command{awk}
-also used @code{OFMT} to specify the format for converting numbers to
+used @code{OFMT} to specify the format for converting numbers to
strings in general expressions; this is now done by @code{CONVFMT}.
@cindex @code{sprintf()} function, @code{OFMT} variable and
@@ -14670,8 +14676,8 @@ successive instances of the same @value{FN} on the command line.
@cindex file names, distinguishing
While you can change the value of @code{ARGIND} within your @command{awk}
-program, @command{gawk} automatically sets it to a new value when the
-next file is opened.
+program, @command{gawk} automatically sets it to a new value when it
+opens the next file.
@cindex @code{ENVIRON} array
@cindex environment variables, in @code{ENVIRON} array
@@ -14736,10 +14742,10 @@ can give @code{FILENAME} a value.
@cindex @code{FNR} variable
@item @code{FNR}
-The current record number in the current file. @code{FNR} is
-incremented each time a new record is read
-(@pxref{Records}). It is reinitialized
-to zero each time a new input file is started.
+The current record number in the current file. @command{awk} increments
+@code{FNR} each time it reads a new record (@pxref{Records}).
+@command{awk} resets @code{FNR} to zero each time it starts a new
+input file.
@cindex @code{NF} variable
@item @code{NF}
@@ -14771,7 +14777,7 @@ array causes a fatal error. Any attempt to assign to an element of
The number of input records @command{awk} has processed since
the beginning of the program's execution
(@pxref{Records}).
-@code{NR} is incremented each time a new record is read.
+@command{awk} increments @code{NR} each time it reads a new record.
@cindex @command{gawk}, @code{PROCINFO} array in
@cindex @code{PROCINFO} array
@@ -14851,7 +14857,7 @@ The parent process ID of the current process.
@item PROCINFO["sorted_in"]
If this element exists in @code{PROCINFO}, its value controls the
order in which array indices will be processed by
-@samp{for (@var{index} in @var{array})} loops.
+@samp{for (@var{indx} in @var{array})} loops.
Since this is an advanced feature, we defer the
full description until later; see
@ref{Scanning an Array}.
@@ -14872,7 +14878,7 @@ The version of @command{gawk}.
The following additional elements in the array
are available to provide information about the MPFR and GMP libraries
-if your version of @command{gawk} supports arbitrary precision numbers
+if your version of @command{gawk} supports arbitrary precision arithmetic
(@pxref{Arbitrary Precision Arithmetic}):
@table @code
@@ -14921,14 +14927,14 @@ The @code{PROCINFO} array has the following additional uses:
@itemize @value{BULLET}
@item
-It may be used to cause coprocesses to communicate over pseudo-ttys
-instead of through two-way pipes; this is discussed further in
-@ref{Two-way I/O}.
-
-@item
It may be used to provide a timeout when reading from any
open input file, pipe, or coprocess.
@xref{Read Timeout}, for more information.
+
+@item
+It may be used to cause coprocesses to communicate over pseudo-ttys
+instead of through two-way pipes; this is discussed further in
+@ref{Two-way I/O}.
@end itemize
@cindex @code{RLENGTH} variable
@@ -15216,6 +15222,12 @@ following @option{-v} are passed on to the @command{awk} program.
(@xref{Getopt Function}, for an @command{awk} library function that
parses command-line options.)
+When designing your program, you should choose options that don't
+conflict with @command{gawk}'s, since it will process any options
+that it accepts before passing the rest of the command line on to
+your program. Using @samp{#!} with the @option{-E} option may help
+(@pxref{Executable Scripts}, and @pxref{Options}).
+
@node Pattern Action Summary
@section Summary
@@ -15250,7 +15262,7 @@ input and output statements, and deletion statements.
The control statements in @command{awk} are @code{if}-@code{else},
@code{while}, @code{for}, and @code{do}-@code{while}. @command{gawk}
adds the @code{switch} statement. There are two flavors of @code{for}
-statement: one for for performing general looping, and the other iterating
+statement: one for performing general looping, and the other for iterating
through an array.
@item
@@ -15267,12 +15279,17 @@ The @code{exit} statement terminates your program. When executed
from an action (or function body) it transfers control to the
@code{END} statements. From an @code{END} statement body, it exits
immediately. You may pass an optional numeric value to be used
-at @command{awk}'s exit status.
+as @command{awk}'s exit status.
@item
Some built-in variables provide control over @command{awk}, mainly for I/O.
Other variables convey information from @command{awk} to your program.
+@item
+@code{ARGC} and @code{ARGV} make the command-line arguments available
+to your program. Manipulating them from a @code{BEGIN} rule lets you
+control how @command{awk} will process the provided @value{DF}s.
+
@end itemize
@node Arrays
@@ -15293,24 +15310,13 @@ The @value{CHAPTER} moves on to discuss @command{gawk}'s facility
for sorting arrays, and ends with a brief description of @command{gawk}'s
ability to support true arrays of arrays.
-@cindex variables, names of
-@cindex functions, names of
-@cindex arrays, names of, and names of functions/variables
-@cindex names, arrays/variables
-@cindex namespace issues
-@command{awk} maintains a single set
-of names that may be used for naming variables, arrays, and functions
-(@pxref{User-defined}).
-Thus, you cannot have a variable and an array with the same name in the
-same @command{awk} program.
-
@menu
* Array Basics:: The basics of arrays.
-* Delete:: The @code{delete} statement removes an element
- from an array.
* Numeric Array Subscripts:: How to use numbers as subscripts in
@command{awk}.
* Uninitialized Subscripts:: Using Uninitialized variables as subscripts.
+* Delete:: The @code{delete} statement removes an element
+ from an array.
* Multidimensional:: Emulating multidimensional arrays in
@command{awk}.
* Arrays of Arrays:: True multidimensional arrays.
@@ -15738,14 +15744,14 @@ begin with a number:
@example
@c file eg/misc/arraymax.awk
@{
- if ($1 > max)
- max = $1
- arr[$1] = $0
+ if ($1 > max)
+ max = $1
+ arr[$1] = $0
@}
END @{
- for (x = 1; x <= max; x++)
- print arr[x]
+ for (x = 1; x <= max; x++)
+ print arr[x]
@}
@c endfile
@end example
@@ -15785,9 +15791,9 @@ program's @code{END} rule, as follows:
@example
END @{
- for (x = 1; x <= max; x++)
- if (x in arr)
- print arr[x]
+ for (x = 1; x <= max; x++)
+ if (x in arr)
+ print arr[x]
@}
@end example
@@ -15809,7 +15815,7 @@ an array:
@example
for (@var{var} in @var{array})
- @var{body}
+ @var{body}
@end example
@noindent
@@ -15882,7 +15888,7 @@ BEGIN @{
@}
@end example
-Here is what happens when run with @command{gawk}:
+Here is what happens when run with @command{gawk} (and @command{mawk}):
@example
$ @kbd{gawk -f loopcheck.awk}
@@ -16000,7 +16006,8 @@ does not affect the loop.
For example:
@example
-$ @kbd{gawk 'BEGIN @{}
+$ @kbd{gawk '}
+> @kbd{BEGIN @{}
> @kbd{ a[4] = 4}
> @kbd{ a[3] = 3}
> @kbd{ for (i in a)}
@@ -16008,7 +16015,8 @@ $ @kbd{gawk 'BEGIN @{}
> @kbd{@}'}
@print{} 4 4
@print{} 3 3
-$ @kbd{gawk 'BEGIN @{}
+$ @kbd{gawk '}
+> @kbd{BEGIN @{}
> @kbd{ PROCINFO["sorted_in"] = "@@ind_str_asc"}
> @kbd{ a[4] = 4}
> @kbd{ a[3] = 3}
@@ -16057,118 +16065,6 @@ the @code{delete} statement.
In addition, @command{gawk} provides built-in functions for
sorting arrays; see @ref{Array Sorting Functions}.
-@node Delete
-@section The @code{delete} Statement
-@cindex @code{delete} statement
-@cindex deleting elements in arrays
-@cindex arrays, elements, deleting
-@cindex elements in arrays, deleting
-
-To remove an individual element of an array, use the @code{delete}
-statement:
-
-@example
-delete @var{array}[@var{index-expression}]
-@end example
-
-Once an array element has been deleted, any value the element once
-had is no longer available. It is as if the element had never
-been referred to or been given a value.
-The following is an example of deleting elements in an array:
-
-@example
-for (i in frequencies)
- delete frequencies[i]
-@end example
-
-@noindent
-This example removes all the elements from the array @code{frequencies}.
-Once an element is deleted, a subsequent @code{for} statement to scan the array
-does not report that element and the @code{in} operator to check for
-the presence of that element returns zero (i.e., false):
-
-@example
-delete foo[4]
-if (4 in foo)
- print "This will never be printed"
-@end example
-
-@cindex null strings, and deleting array elements
-It is important to note that deleting an element is @emph{not} the
-same as assigning it a null value (the empty string, @code{""}).
-For example:
-
-@example
-foo[4] = ""
-if (4 in foo)
- print "This is printed, even though foo[4] is empty"
-@end example
-
-@cindex lint checking, array elements
-It is not an error to delete an element that does not exist.
-However, if @option{--lint} is provided on the command line
-(@pxref{Options}),
-@command{gawk} issues a warning message when an element that
-is not in the array is deleted.
-
-@cindex common extensions, @code{delete} to delete entire arrays
-@cindex extensions, common@comma{} @code{delete} to delete entire arrays
-@cindex arrays, deleting entire contents
-@cindex deleting entire arrays
-@cindex @code{delete} @var{array}
-@cindex differences in @command{awk} and @command{gawk}, array elements, deleting
-All the elements of an array may be deleted with a single statement
-by leaving off the subscript in the @code{delete} statement,
-as follows:
-
-
-@example
-delete @var{array}
-@end example
-
-Using this version of the @code{delete} statement is about three times
-more efficient than the equivalent loop that deletes each element one
-at a time.
-
-@cindex Brian Kernighan's @command{awk}
-@quotation NOTE
-For many years,
-using @code{delete} without a subscript was a @command{gawk} extension.
-As of September, 2012, it was accepted for
-inclusion into the POSIX standard. See @uref{http://austingroupbugs.net/view.php?id=544,
-the Austin Group website}. This form of the @code{delete} statement is also supported
-by BWK @command{awk} and @command{mawk}, as well as
-by a number of other implementations (@pxref{Other Versions}).
-@end quotation
-
-@cindex portability, deleting array elements
-@cindex Brennan, Michael
-The following statement provides a portable but nonobvious way to clear
-out an array:@footnote{Thanks to Michael Brennan for pointing this out.}
-
-@example
-split("", array)
-@end example
-
-@cindex @code{split()} function, array elements@comma{} deleting
-The @code{split()} function
-(@pxref{String Functions})
-clears out the target array first. This call asks it to split
-apart the null string. Because there is no data to split out, the
-function simply clears the array and then returns.
-
-@quotation CAUTION
-Deleting an array does not change its type; you cannot
-delete an array and then use the array's name as a scalar
-(i.e., a regular variable). For example, the following does not work:
-
-@example
-a[1] = 3
-delete a
-a = 3
-@end example
-@end quotation
-
@node Numeric Array Subscripts
@section Using Numbers to Subscript Arrays
@@ -16209,7 +16105,7 @@ since @code{"12.15"} is different from @code{"12.153"}.
@cindex integer array indices
According to the rules for conversions
(@pxref{Conversion}), integer
-values are always converted to strings as integers, no matter what the
+values always convert to strings as integers, no matter what the
value of @code{CONVFMT} may happen to be. So the usual case of
the following works:
@@ -16232,7 +16128,7 @@ and
all refer to the same element!
As with many things in @command{awk}, the majority of the time
-things work as one would expect them to. But it is useful to have a precise
+things work as you would expect them to. But it is useful to have a precise
knowledge of the actual rules since they can sometimes have a subtle
effect on your programs.
@@ -16296,6 +16192,119 @@ Even though it is somewhat unusual, the null string
if @option{--lint} is provided
on the command line (@pxref{Options}).
+@node Delete
+@section The @code{delete} Statement
+@cindex @code{delete} statement
+@cindex deleting elements in arrays
+@cindex arrays, elements, deleting
+@cindex elements in arrays, deleting
+
+To remove an individual element of an array, use the @code{delete}
+statement:
+
+@example
+delete @var{array}[@var{index-expression}]
+@end example
+
+Once an array element has been deleted, any value the element once
+had is no longer available. It is as if the element had never
+been referred to or been given a value.
+The following is an example of deleting elements in an array:
+
+@example
+for (i in frequencies)
+ delete frequencies[i]
+@end example
+
+@noindent
+This example removes all the elements from the array @code{frequencies}.
+Once an element is deleted, a subsequent @code{for} statement to scan the array
+does not report that element and the @code{in} operator to check for
+the presence of that element returns zero (i.e., false):
+
+@example
+delete foo[4]
+if (4 in foo)
+ print "This will never be printed"
+@end example
+
+@cindex null strings, and deleting array elements
+It is important to note that deleting an element is @emph{not} the
+same as assigning it a null value (the empty string, @code{""}).
+For example:
+
+@example
+foo[4] = ""
+if (4 in foo)
+ print "This is printed, even though foo[4] is empty"
+@end example
+
+@cindex lint checking, array elements
+It is not an error to delete an element that does not exist.
+However, if @option{--lint} is provided on the command line
+(@pxref{Options}),
+@command{gawk} issues a warning message when an element that
+is not in the array is deleted.
+
+@cindex common extensions, @code{delete} to delete entire arrays
+@cindex extensions, common@comma{} @code{delete} to delete entire arrays
+@cindex arrays, deleting entire contents
+@cindex deleting entire arrays
+@cindex @code{delete} @var{array}
+@cindex differences in @command{awk} and @command{gawk}, array elements, deleting
+All the elements of an array may be deleted with a single statement
+by leaving off the subscript in the @code{delete} statement,
+as follows:
+
+
+@example
+delete @var{array}
+@end example
+
+Using this version of the @code{delete} statement is about three times
+more efficient than the equivalent loop that deletes each element one
+at a time.
+
+This form of the @code{delete} statement is also supported
+by BWK @command{awk} and @command{mawk}, as well as
+by a number of other implementations.
+
+@cindex Brian Kernighan's @command{awk}
+@quotation NOTE
+For many years, using @code{delete} without a subscript was a common
+extension. In September, 2012, it was accepted for inclusion into the
+POSIX standard. See @uref{http://austingroupbugs.net/view.php?id=544,
+the Austin Group website}.
+@end quotation
+
+@cindex portability, deleting array elements
+@cindex Brennan, Michael
+The following statement provides a portable but nonobvious way to clear
+out an array:@footnote{Thanks to Michael Brennan for pointing this out.}
+
+@example
+split("", array)
+@end example
+
+@cindex @code{split()} function, array elements@comma{} deleting
+The @code{split()} function
+(@pxref{String Functions})
+clears out the target array first. This call asks it to split
+apart the null string. Because there is no data to split out, the
+function simply clears the array and then returns.
+
+@quotation CAUTION
+Deleting all the elements from an array does not change its type; you cannot
+clear an array and then use the array's name as a scalar
+(i.e., a regular variable). For example, the following does not work:
+
+@example
+a[1] = 3
+delete a
+a = 3
+@end example
+@end quotation
+
@node Multidimensional
@section Multidimensional Arrays
@@ -16307,7 +16316,7 @@ on the command line (@pxref{Options}).
@cindex arrays, multidimensional
A multidimensional array is an array in which an element is identified
by a sequence of indices instead of a single index. For example, a
-two-dimensional array requires two indices. The usual way (in most
+two-dimensional array requires two indices. The usual way (in many
languages, including @command{awk}) to refer to an element of a
two-dimensional array named @code{grid} is with
@code{grid[@var{x},@var{y}]}.
@@ -16482,8 +16491,9 @@ a[1][3][1, "name"] = "barney"
Each subarray and the main array can be of different length. In fact, the
elements of an array or its subarray do not all have to have the same
type. This means that the main array and any of its subarrays can be
-non-rectangular, or jagged in structure. One can assign a scalar value to
-the index @code{4} of the main array @code{a}:
+non-rectangular, or jagged in structure. You can assign a scalar value to
+the index @code{4} of the main array @code{a}, even though @code{a[1]}
+is itself an array and not a scalar:
@example
a[4] = "An element in a jagged array"
@@ -16565,6 +16575,8 @@ for (i in array) @{
print array[i][j]
@}
@}
+ else
+ print array[i]
@}
@end example
@@ -16849,8 +16861,9 @@ Often random integers are needed instead. Following is a user-defined function
that can be used to obtain a random non-negative integer less than @var{n}:
@example
-function randint(n) @{
- return int(n * rand())
+function randint(n)
+@{
+ return int(n * rand())
@}
@end example
@@ -16870,8 +16883,7 @@ function roll(n) @{ return 1 + int(rand() * n) @}
# Roll 3 six-sided dice and
# print total number of points.
@{
- printf("%d points\n",
- roll(6)+roll(6)+roll(6))
+ printf("%d points\n", roll(6) + roll(6) + roll(6))
@}
@end example
@@ -16960,7 +16972,7 @@ doing index calculations, particularly if you are used to C.
In the following list, optional parameters are enclosed in square brackets@w{ ([ ]).}
Several functions perform string substitution; the full discussion is
provided in the description of the @code{sub()} function, which comes
-towards the end since the list is presented in alphabetic order.
+towards the end since the list is presented alphabetically.
Those functions that are specific to @command{gawk} are marked with a
pound sign (@samp{#}). They are not available in compatibility mode
@@ -17004,6 +17016,7 @@ When comparing strings, @code{IGNORECASE} affects the sorting
(@pxref{Array Sorting Functions}). If the
@var{source} array contains subarrays as values (@pxref{Arrays of
Arrays}), they will come last, after all scalar values.
+Subarrays are @emph{not} recursively sorted.
For example, if the contents of @code{a} are as follows:
@@ -17140,7 +17153,10 @@ $ @kbd{awk 'BEGIN @{ print index("peanut", "an") @}'}
@noindent
If @var{find} is not found, @code{index()} returns zero.
-It is a fatal error to use a regexp constant for @var{find}.
+With BWK @command{awk} and @command{gawk},
+it is a fatal error to use a regexp constant for @var{find}.
+Other implementations allow it, simply treating the regexp
+constant as an expression meaning @samp{$0 ~ /regexp/}.
@item @code{length(}[@var{string}]@code{)}
@cindexawkfunc{length}
@@ -17254,13 +17270,12 @@ For example:
@example
@c file eg/misc/findpat.awk
@{
- if ($1 == "FIND")
- regex = $2
- else @{
- where = match($0, regex)
- if (where != 0)
- print "Match of", regex, "found at",
- where, "in", $0
+ if ($1 == "FIND")
+ regex = $2
+ else @{
+ where = match($0, regex)
+ if (where != 0)
+ print "Match of", regex, "found at", where, "in", $0
@}
@}
@c endfile
@@ -17356,7 +17371,7 @@ Any leading separator will be in @code{@var{seps}[0]}.
The @code{patsplit()} function splits strings into pieces in a
manner similar to the way input lines are split into fields using @code{FPAT}
-(@pxref{Splitting By Content}.
+(@pxref{Splitting By Content}).
Before splitting the string, @code{patsplit()} deletes any previously existing
elements in the arrays @var{array} and @var{seps}.
@@ -17369,8 +17384,7 @@ and store the pieces in @var{array} and the separator strings in the
@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so
forth. The string value of the third argument, @var{fieldsep}, is
a regexp describing where to split @var{string} (much as @code{FS} can
-be a regexp describing where to split input records;
-@pxref{Regexp Field Splitting}).
+be a regexp describing where to split input records).
If @var{fieldsep} is omitted, the value of @code{FS} is used.
@code{split()} returns the number of elements created.
@var{seps} is a @command{gawk} extension with @code{@var{seps}[@var{i}]}
@@ -17665,6 +17679,59 @@ Nonalphabetic characters are left unchanged. For example,
@code{toupper("MiXeD cAsE 123")} returns @code{"MIXED CASE 123"}.
@end table
+@cindex sidebar, Matching the Null String
+@ifdocbook
+@docbook
+<sidebar><title>Matching the Null String</title>
+@end docbook
+
+@cindex matching, null strings
+@cindex null strings, matching
+@cindex @code{*} (asterisk), @code{*} operator, null strings@comma{} matching
+@cindex asterisk (@code{*}), @code{*} operator, null strings@comma{} matching
+
+In @command{awk}, the @samp{*} operator can match the null string.
+This is particularly important for the @code{sub()}, @code{gsub()},
+and @code{gensub()} functions. For example:
+
+@example
+$ @kbd{echo abc | awk '@{ gsub(/m*/, "X"); print @}'}
+@print{} XaXbXcX
+@end example
+
+@noindent
+Although this makes a certain amount of sense, it can be surprising.
+
+@docbook
+</sidebar>
+@end docbook
+@end ifdocbook
+
+@ifnotdocbook
+@cartouche
+@center @b{Matching the Null String}
+
+
+@cindex matching, null strings
+@cindex null strings, matching
+@cindex @code{*} (asterisk), @code{*} operator, null strings@comma{} matching
+@cindex asterisk (@code{*}), @code{*} operator, null strings@comma{} matching
+
+In @command{awk}, the @samp{*} operator can match the null string.
+This is particularly important for the @code{sub()}, @code{gsub()},
+and @code{gensub()} functions. For example:
+
+@example
+$ @kbd{echo abc | awk '@{ gsub(/m*/, "X"); print @}'}
+@print{} XaXbXcX
+@end example
+
+@noindent
+Although this makes a certain amount of sense, it can be surprising.
+@end cartouche
+@end ifnotdocbook
+
+
@node Gory Details
@subsubsection More About @samp{\} and @samp{&} with @code{sub()}, @code{gsub()}, and @code{gensub()}
@@ -17678,7 +17745,7 @@ Nonalphabetic characters are left unchanged. For example,
@cindex ampersand (@code{&}), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
@quotation CAUTION
-This section has been known to cause headaches.
+This subsubsection has been reported to cause headaches.
You might want to skip it upon first reading.
@end quotation
@@ -17969,58 +18036,6 @@ and the special cases for @code{sub()} and @code{gsub()},
we recommend the use of @command{gawk} and @code{gensub()} when you have
to do substitutions.
-@cindex sidebar, Matching the Null String
-@ifdocbook
-@docbook
-<sidebar><title>Matching the Null String</title>
-@end docbook
-
-@cindex matching, null strings
-@cindex null strings, matching
-@cindex @code{*} (asterisk), @code{*} operator, null strings@comma{} matching
-@cindex asterisk (@code{*}), @code{*} operator, null strings@comma{} matching
-
-In @command{awk}, the @samp{*} operator can match the null string.
-This is particularly important for the @code{sub()}, @code{gsub()},
-and @code{gensub()} functions. For example:
-
-@example
-$ @kbd{echo abc | awk '@{ gsub(/m*/, "X"); print @}'}
-@print{} XaXbXcX
-@end example
-
-@noindent
-Although this makes a certain amount of sense, it can be surprising.
-
-@docbook
-</sidebar>
-@end docbook
-@end ifdocbook
-
-@ifnotdocbook
-@cartouche
-@center @b{Matching the Null String}
-
-
-@cindex matching, null strings
-@cindex null strings, matching
-@cindex @code{*} (asterisk), @code{*} operator, null strings@comma{} matching
-@cindex asterisk (@code{*}), @code{*} operator, null strings@comma{} matching
-
-In @command{awk}, the @samp{*} operator can match the null string.
-This is particularly important for the @code{sub()}, @code{gsub()},
-and @code{gensub()} functions. For example:
-
-@example
-$ @kbd{echo abc | awk '@{ gsub(/m*/, "X"); print @}'}
-@print{} XaXbXcX
-@end example
-
-@noindent
-Although this makes a certain amount of sense, it can be surprising.
-@end cartouche
-@end ifnotdocbook
-
@node I/O Functions
@subsection Input/Output Functions
@cindex input/output functions
@@ -18073,10 +18088,9 @@ buffers its output and the @code{fflush()} function forces
@cindex extensions, common@comma{} @code{fflush()} function
@cindex Brian Kernighan's @command{awk}
-@code{fflush()} was added to BWK @command{awk} in
-April of 1992. For two decades, it was not part of the POSIX standard.
-As of December, 2012, it was accepted for inclusion into the POSIX
-standard.
+Brian Kernighan added @code{fflush()} to his @command{awk} in April
+of 1992. For two decades, it was a common extension. In December,
+2012, it was accepted for inclusion into the POSIX standard.
See @uref{http://austingroupbugs.net/view.php?id=634, the Austin Group website}.
POSIX standardizes @code{fflush()} as follows: If there
@@ -18473,7 +18487,7 @@ is out of range, @code{mktime()} returns @minus{}1.
@cindex @command{gawk}, @code{PROCINFO} array in
@cindex @code{PROCINFO} array
-@item @code{strftime(} [@var{format} [@code{,} @var{timestamp} [@code{,} @var{utc-flag}] ] ]@code{)}
+@item @code{strftime(}[@var{format} [@code{,} @var{timestamp} [@code{,} @var{utc-flag}] ] ]@code{)}
@c STARTOFRANGE strf
@cindexgawkfunc{strftime}
@cindex format time string
@@ -18740,7 +18754,7 @@ the string. For example:
@example
$ date '+Today is %A, %B %d, %Y.'
-@print{} Today is Monday, May 05, 2014.
+@print{} Today is Monday, September 22, 2014.
@end example
Here is the @command{gawk} version of the @command{date} utility.
@@ -18934,17 +18948,16 @@ shows that 0's come in on the left side. For @command{gawk}, this is
always true, but in some languages, it's possible to have the left side
fill with 1's.}
@c Purposely decided to use 0's and 1's here. 2/2001.
-If you start over
-again with @samp{10111001} and shift it left by three bits, you end up
-with @samp{11001000}.
-@command{gawk} provides built-in functions that implement the
-bitwise operations just described. They are:
+If you start over again with @samp{10111001} and shift it left by three
+bits, you end up with @samp{11001000}. The following list describes
+@command{gawk}'s built-in functions that implement the bitwise operations.
+Optional parameters are enclosed in square brackets ([ ]):
@cindex @command{gawk}, bitwise operations in
@table @code
@cindexgawkfunc{and}
@cindex bitwise AND
-@item @code{and(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
+@item @code{and(}@var{v1}@code{,} @var{v2} [@code{,} @dots{}]@code{)}
Return the bitwise AND of the arguments. There must be at least two.
@cindexgawkfunc{compl}
@@ -18959,7 +18972,7 @@ Return the value of @var{val}, shifted left by @var{count} bits.
@cindexgawkfunc{or}
@cindex bitwise OR
-@item @code{or(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
+@item @code{or(}@var{v1}@code{,} @var{v2} [@code{,} @dots{}]@code{)}
Return the bitwise OR of the arguments. There must be at least two.
@cindexgawkfunc{rshift}
@@ -18969,7 +18982,7 @@ Return the value of @var{val}, shifted right by @var{count} bits.
@cindexgawkfunc{xor}
@cindex bitwise XOR
-@item @code{xor(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
+@item @code{xor(}@var{v1}@code{,} @var{v2} [@code{,} @dots{}]@code{)}
Return the bitwise XOR of the arguments. There must be at least two.
@end table
@@ -19092,7 +19105,7 @@ results of the @code{compl()}, @code{lshift()}, and @code{rshift()} functions.
@command{gawk} provides a single function that lets you distinguish
an array from a scalar variable. This is necessary for writing code
-that traverses every element of an array of arrays.
+that traverses every element of an array of arrays
(@pxref{Arrays of Arrays}).
@table @code
@@ -19108,12 +19121,14 @@ an array or not. The second is inside the body of a user-defined function
(not discussed yet; @pxref{User-defined}), to test if a parameter is an
array or not.
-Note, however, that using @code{isarray()} at the global level to test
+@quotation NOTE
+Using @code{isarray()} at the global level to test
variables makes no sense. Since you are the one writing the program, you
are supposed to know if your variables are arrays or not. And in fact,
due to the way @command{gawk} works, if you pass the name of a variable
that has not been previously used to @code{isarray()}, @command{gawk}
-will end up turning it into a scalar.
+ends up turning it into a scalar.
+@end quotation
@node I18N Functions
@subsection String-Translation Functions
@@ -19374,7 +19389,7 @@ extra whitespace signifies the start of the local variable list):
function delarray(a, i)
@{
for (i in a)
- delete a[i]
+ delete a[i]
@}
@end example
@@ -19385,7 +19400,7 @@ Instead of having
to repeat this loop everywhere that you need to clear out
an array, your program can just call @code{delarray}.
(This guarantees portability. The use of @samp{delete @var{array}} to delete
-the contents of an entire array is a recent@footnote{Late in 2012.}
+the contents of an entire array is a relatively recent@footnote{Late in 2012.}
addition to the POSIX standard.)
The following is an example of a recursive function. It takes a string
@@ -19415,7 +19430,7 @@ $ @kbd{echo "Don't Panic!" |}
@print{} !cinaP t'noD
@end example
-The C @code{ctime()} function takes a timestamp and returns it in a string,
+The C @code{ctime()} function takes a timestamp and returns it as a string,
formatted in a well-known fashion.
The following example uses the built-in @code{strftime()} function
(@pxref{Time Functions})
@@ -19430,13 +19445,19 @@ to create an @command{awk} version of @code{ctime()}:
function ctime(ts, format)
@{
- format = PROCINFO["strftime"]
+ format = "%a %b %e %H:%M:%S %Z %Y"
+
if (ts == 0)
ts = systime() # use current time as default
return strftime(format, ts)
@}
@c endfile
@end example
+
+You might think that @code{ctime()} could use @code{PROCINFO["strftime"]}
+for its format string. That would be a mistake, since @code{ctime()} is
+supposed to return the time formatted in a standard fashion, and user-level
+code could have changed @code{PROCINFO["strftime"]}.
@c ENDOFRANGE fdef
@node Function Caveats
@@ -20085,7 +20106,7 @@ function quicksort(data, left, right, less_than, i, last)
# quicksort_swap --- helper function for quicksort, should really be inline
-function quicksort_swap(data, i, j, temp)
+function quicksort_swap(data, i, j, temp)
@{
temp = data[i]
data[i] = data[j]
@@ -20236,10 +20257,11 @@ functions.
@item
POSIX @command{awk} provides three kinds of built-in functions: numeric,
-string, and I/O. @command{gawk} provides functions that work with values
-representing time, do bit manipulation, sort arrays, and internationalize
-and localize programs. @command{gawk} also provides several extensions to
-some of standard functions, typically in the form of additional arguments.
+string, and I/O. @command{gawk} provides functions that sort arrays, work
+with values representing time, do bit manipulation, determine variable
+type (array vs.@: scalar), and internationalize and localize programs.
+@command{gawk} also provides several extensions to some of standard
+functions, typically in the form of additional arguments.
@item
Functions accept zero or more arguments and return a value. The
@@ -20490,8 +20512,9 @@ are very difficult to track down:
function lib_func(x, y, l1, l2)
@{
@dots{}
- @var{use variable} some_var # some_var should be local
- @dots{} # but is not by oversight
+ # some_var should be local but by oversight is not
+ @var{use variable} some_var
+ @dots{}
@}
@end example
@@ -20602,7 +20625,7 @@ function mystrtonum(str, ret, n, i, k, c)
# a[5] = "123.45"
# a[6] = "1.e3"
# a[7] = "1.32"
-# a[7] = "1.32E2"
+# a[8] = "1.32E2"
#
# for (i = 1; i in a; i++)
# print a[i], strtonum(a[i]), mystrtonum(a[i])
@@ -20613,9 +20636,12 @@ function mystrtonum(str, ret, n, i, k, c)
The function first looks for C-style octal numbers (base 8).
If the input string matches a regular expression describing octal
numbers, then @code{mystrtonum()} loops through each character in the
-string. It sets @code{k} to the index in @code{"01234567"} of the current
-octal digit. Since the return value is one-based, the @samp{k--}
-adjusts @code{k} so it can be used in computing the return value.
+string. It sets @code{k} to the index in @code{"1234567"} of the current
+octal digit.
+The return value will either be the same number as the digit, or zero
+if the character is not there, which will be true for a @samp{0}.
+This is safe, since the regexp test in the @code{if} ensures that
+only octal values are converted.
Similar logic applies to the code that checks for and converts a
hexadecimal value, which starts with @samp{0x} or @samp{0X}.
@@ -20648,7 +20674,7 @@ that a condition or set of conditions is true. Before proceeding with a
particular computation, you make a statement about what you believe to be
the case. Such a statement is known as an
@dfn{assertion}. The C language provides an @code{<assert.h>} header file
-and corresponding @code{assert()} macro that the programmer can use to make
+and corresponding @code{assert()} macro that a programmer can use to make
assertions. If an assertion fails, the @code{assert()} macro arranges to
print a diagnostic message describing the condition that should have
been true but was not, and then it kills the program. In C, using
@@ -21118,7 +21144,7 @@ function getlocaltime(time, ret, now, i)
now = systime()
# return date(1)-style output
- ret = strftime(PROCINFO["strftime"], now)
+ ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
# clear out target array
delete time
@@ -21233,6 +21259,9 @@ if (length(contents) == 0)
This tests the result to see if it is empty or not. An equivalent
test would be @samp{contents == ""}.
+@xref{Extension Sample Readfile}, for an extension function that
+also reads an entire file into memory.
+
@node Data File Management
@section @value{DDF} Management
@@ -21290,15 +21319,14 @@ Besides solving the problem in only nine(!) lines of code, it does so
@c # Arnold Robbins, arnold@@skeeve.com, Public Domain
@c # January 1992
-FILENAME != _oldfilename \
-@{
+FILENAME != _oldfilename @{
if (_oldfilename != "")
endfile(_oldfilename)
_oldfilename = FILENAME
beginfile(FILENAME)
@}
-END @{ endfile(FILENAME) @}
+END @{ endfile(FILENAME) @}
@end example
This file must be loaded before the user's ``main'' program, so that the
@@ -21351,7 +21379,7 @@ FNR == 1 @{
beginfile(FILENAME)
@}
-END @{ endfile(_filename_) @}
+END @{ endfile(_filename_) @}
@c endfile
@end example
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index 595771d9..3bea4ba2 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -721,12 +721,12 @@ particular records in a file and perform operations upon them.
elements.
* Controlling Scanning:: Controlling the order in which arrays
are scanned.
-* Delete:: The @code{delete} statement removes an
- element from an array.
* Numeric Array Subscripts:: How to use numbers as subscripts in
@command{awk}.
* Uninitialized Subscripts:: Using Uninitialized variables as
subscripts.
+* Delete:: The @code{delete} statement removes an
+ element from an array.
* Multidimensional:: Emulating multidimensional arrays in
@command{awk}.
* Multiscanning:: Scanning multidimensional arrays.
@@ -10101,7 +10101,7 @@ if (/barfly/ || /camelot/)
@noindent
are exactly equivalent.
One rather bizarre consequence of this rule is that the following
-Boolean expression is valid, but does not do what the user probably
+Boolean expression is valid, but does not do what its author probably
intended:
@example
@@ -10147,10 +10147,9 @@ Modern implementations of @command{awk}, including @command{gawk}, allow
the third argument of @code{split()} to be a regexp constant, but some
older implementations do not.
@value{DARKCORNER}
-This can lead to confusion when attempting to use regexp constants
-as arguments to user-defined functions
-(@pxref{User-defined}).
-For example:
+Because some built-in functions accept regexp constants as arguments,
+it can be confusing when attempting to use regexp constants as arguments
+to user-defined functions (@pxref{User-defined}). For example:
@example
function mysub(pat, repl, str, global)
@@ -10218,8 +10217,8 @@ variable's current value. Variables are given new values with
@dfn{decrement operators}.
@xref{Assignment Ops}.
In addition, the @code{sub()} and @code{gsub()} functions can
-change a variable's value, and the @code{match()}, @code{patsplit()}
-and @code{split()} functions can change the contents of their
+change a variable's value, and the @code{match()}, @code{split()}
+and @code{patsplit()} functions can change the contents of their
array parameters. @xref{String Functions}.
@cindex variables, built-in
@@ -10235,7 +10234,7 @@ Variables in @command{awk} can be assigned either numeric or string values.
The kind of value a variable holds can change over the life of a program.
By default, variables are initialized to the empty string, which
is zero if converted to a number. There is no need to explicitly
-``initialize'' a variable in @command{awk},
+initialize a variable in @command{awk},
which is what you would do in C and in most other traditional languages.
@node Assignment Options
@@ -10443,7 +10442,7 @@ $ @kbd{echo 4,321 | LC_ALL=en_DK.utf-8 gawk '@{ print $1 + 1 @}'}
@noindent
The @code{en_DK.utf-8} locale is for English in Denmark, where the comma acts as
the decimal point separator. In the normal @code{"C"} locale, @command{gawk}
-treats @samp{4,321} as @samp{4}, while in the Danish locale, it's treated
+treats @samp{4,321} as 4, while in the Danish locale, it's treated
as the full number, 4.321.
Some earlier versions of @command{gawk} fully complied with this aspect
@@ -10995,7 +10994,7 @@ awk '/[=]=/' /dev/null
@end example
@command{gawk} does not have this problem; BWK @command{awk}
-and @command{mawk} also do not (@pxref{Other Versions}).
+and @command{mawk} also do not.
@end sidebar
@c ENDOFRANGE exas
@c ENDOFRANGE opas
@@ -11248,7 +11247,7 @@ attribute.
@item
Fields, @code{getline} input, @code{FILENAME}, @code{ARGV} elements,
@code{ENVIRON} elements, and the elements of an array created by
-@code{patsplit()}, @code{split()} and @code{match()} that are numeric
+@code{match()}, @code{split()} and @code{patsplit()} that are numeric
strings have the @var{strnum} attribute. Otherwise, they have
the @var{string} attribute. Uninitialized variables also have the
@var{strnum} attribute.
@@ -11403,22 +11402,23 @@ Thus, the six-character input string @w{@samp{ +3.14}} receives the
The following examples print @samp{1} when the comparison between
the two different constants is true, @samp{0} otherwise:
+@c 22.9.2014: Tested with mawk and BWK awk, got same results.
@example
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == " +3.14" @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == " +3.14") @}'} @ii{True}
@print{} 1
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == "+3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == "+3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == "3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == "3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $0 == 3.14 @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($0 == 3.14) @}'} @ii{True}
@print{} 1
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == " +3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == " +3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == "+3.14" @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == "+3.14") @}'} @ii{True}
@print{} 1
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == "3.14" @}'} @ii{False}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == "3.14") @}'} @ii{False}
@print{} 0
-$ @kbd{echo ' +3.14' | gawk '@{ print $1 == 3.14 @}'} @ii{True}
+$ @kbd{echo ' +3.14' | awk '@{ print($1 == 3.14) @}'} @ii{True}
@print{} 1
@end example
@@ -11492,9 +11492,8 @@ part of the test always succeeds. Because the operators are
so similar, this kind of error is very difficult to spot when
scanning the source code.
-@cindex @command{gawk}, comparison operators and
-The following list of expressions illustrates the kind of comparison
-@command{gawk} performs, as well as what the result of the comparison is:
+The following list of expressions illustrates the kinds of comparisons
+@command{awk} performs, as well as what the result of each comparison is:
@table @code
@item 1.5 <= 2.0
@@ -11567,7 +11566,7 @@ dynamic regexp (@pxref{Regexp Usage}; also
@cindex @command{awk}, regexp constants and
@cindex regexp constants
-In modern implementations of @command{awk}, a constant regular
+A constant regular
expression in slashes by itself is also an expression. The regexp
@code{/@var{regexp}/} is an abbreviation for the following comparison expression:
@@ -11587,7 +11586,7 @@ where this is discussed in more detail.
The POSIX standard says that string comparison is performed based
on the locale's @dfn{collating order}. This is the order in which
characters sort, as defined by the locale (for more discussion,
-@pxref{Ranges and Locales}). This order is usually very different
+@pxref{Locales}). This order is usually very different
from the results obtained when doing straight character-by-character
comparison.@footnote{Technically, string comparison is supposed
to behave the same way as if the strings are compared with the C
@@ -11667,7 +11666,7 @@ no substring @samp{foo} in the record.
True if at least one of @var{boolean1} or @var{boolean2} is true.
For example, the following statement prints all records in the input
that contain @emph{either} @samp{edu} or
-@samp{li} or both:
+@samp{li}:
@example
if ($0 ~ /edu/ || $0 ~ /li/) print
@@ -11676,6 +11675,9 @@ if ($0 ~ /edu/ || $0 ~ /li/) print
The subexpression @var{boolean2} is evaluated only if @var{boolean1}
is false. This can make a difference when @var{boolean2} contains
expressions that have side effects.
+(Thus, this test never really distinguishes records that contain both
+@samp{edu} and @samp{li}---as soon as @samp{edu} is matched,
+the full test succeeds.)
@item ! @var{boolean}
True if @var{boolean} is false. For example,
@@ -11685,7 +11687,7 @@ variable is not defined:
@example
BEGIN @{ if (! ("HOME" in ENVIRON))
- print "no home!" @}
+ print "no home!" @}
@end example
(The @code{in} operator is described in
@@ -12141,8 +12143,8 @@ system about the local character set and language. The ISO C standard
defines a default @code{"C"} locale, which is an environment that is
typical of what many C programmers are used to.
-Once upon a time, the locale setting used to affect regexp matching
-(@pxref{Ranges and Locales}), but this is no longer true.
+Once upon a time, the locale setting used to affect regexp matching,
+but this is no longer true (@pxref{Ranges and Locales}).
Locales can affect record splitting. For the normal case of @samp{RS =
"\n"}, the locale is largely irrelevant. For other single-character
@@ -12196,7 +12198,8 @@ Locales can influence the conversions.
@item
@command{awk} provides the usual arithmetic operators (addition,
subtraction, multiplication, division, modulus), and unary plus and minus.
-It also provides comparison operators, boolean operators, and regexp
+It also provides comparison operators, boolean operators, array membership
+testing, and regexp
matching operators. String concatenation is accomplished by placing
two expressions next to each other; there is no explicit operator.
The three-operand @samp{?:} operator provides an ``if-else'' test within
@@ -12211,7 +12214,7 @@ In @command{awk}, a value is considered to be true if it is non-zero
@emph{or} non-null. Otherwise, the value is false.
@item
-A value's type is set upon each assignment and may change over its
+A variable's type is set upon each assignment and may change over its
lifetime. The type determines how it behaves in comparisons (string
or numeric).
@@ -12291,7 +12294,7 @@ is nonzero (if a number) or non-null (if a string).
(@xref{Expression Patterns}.)
@item @var{begpat}, @var{endpat}
-A pair of patterns separated by a comma, specifying a range of records.
+A pair of patterns separated by a comma, specifying a @dfn{range} of records.
The range includes both the initial record that matches @var{begpat} and
the final record that matches @var{endpat}.
(@xref{Ranges}.)
@@ -12381,8 +12384,8 @@ $ @kbd{awk '$1 ~ /li/ @{ print $2 @}' mail-list}
@cindex regexp constants, as patterns
@cindex patterns, regexp constants as
A regexp constant as a pattern is also a special case of an expression
-pattern. The expression @code{/li/} has the value one if @samp{li}
-appears in the current input record. Thus, as a pattern, @code{/li/}
+pattern. The expression @samp{/li/} has the value one if @samp{li}
+appears in the current input record. Thus, as a pattern, @samp{/li/}
matches any record containing @samp{li}.
@cindex Boolean expressions, as patterns
@@ -12564,7 +12567,7 @@ input is read. For example:
@example
$ @kbd{awk '}
> @kbd{BEGIN @{ print "Analysis of \"li\"" @}}
-> @kbd{/li/ @{ ++n @}}
+> @kbd{/li/ @{ ++n @}}
> @kbd{END @{ print "\"li\" appears in", n, "records." @}' mail-list}
@print{} Analysis of "li"
@print{} "li" appears in 4 records.
@@ -12644,9 +12647,10 @@ The POSIX standard specifies that @code{NF} is available in an @code{END}
rule. It contains the number of fields from the last input record.
Most probably due to an oversight, the standard does not say that @code{$0}
is also preserved, although logically one would think that it should be.
-In fact, @command{gawk} does preserve the value of @code{$0} for use in
-@code{END} rules. Be aware, however, that BWK @command{awk}, and possibly
-other implementations, do not.
+In fact, all of BWK @command{awk}, @command{mawk}, and @command{gawk}
+preserve the value of @code{$0} for use in @code{END} rules. Be aware,
+however, that some other implementations and many older versions
+of Unix @command{awk} do not.
The third point follows from the first two. The meaning of @samp{print}
inside a @code{BEGIN} or @code{END} rule is the same as always:
@@ -12741,8 +12745,8 @@ level of the @command{awk} program.
@cindex @code{next} statement, @code{BEGINFILE}/@code{ENDFILE} patterns and
The @code{next} statement (@pxref{Next Statement}) is not allowed inside
-either a @code{BEGINFILE} or and @code{ENDFILE} rule. The @code{nextfile}
-statement (@pxref{Nextfile Statement}) is allowed only inside a
+either a @code{BEGINFILE} or an @code{ENDFILE} rule. The @code{nextfile}
+statement is allowed only inside a
@code{BEGINFILE} rule, but not inside an @code{ENDFILE} rule.
@cindex @code{getline} statement, @code{BEGINFILE}/@code{ENDFILE} patterns and
@@ -12806,7 +12810,7 @@ There are two ways to get the value of the shell variable
into the body of the @command{awk} program.
@cindex shells, quoting
-The most common method is to use shell quoting to substitute
+A common method is to use shell quoting to substitute
the variable's value into the program inside the script.
For example, consider the following program:
@@ -13063,20 +13067,21 @@ If the @var{condition} is true, it executes the statement @var{body}.
is not zero and not a null string.)
@end ifinfo
After @var{body} has been executed,
-@var{condition} is tested again, and if it is still true, @var{body} is
-executed again. This process repeats until the @var{condition} is no longer
-true. If the @var{condition} is initially false, the body of the loop is
-never executed and @command{awk} continues with the statement following
+@var{condition} is tested again, and if it is still true, @var{body}
+executes again. This process repeats until the @var{condition} is no longer
+true. If the @var{condition} is initially false, the body of the loop
+never executes and @command{awk} continues with the statement following
the loop.
This example prints the first three fields of each record, one per line:
@example
-awk '@{
- i = 1
- while (i <= 3) @{
- print $i
- i++
- @}
+awk '
+@{
+ i = 1
+ while (i <= 3) @{
+ print $i
+ i++
+ @}
@}' inventory-shipped
@end example
@@ -13110,14 +13115,14 @@ do
while (@var{condition})
@end example
-Even if the @var{condition} is false at the start, the @var{body} is
-executed at least once (and only once, unless executing @var{body}
+Even if the @var{condition} is false at the start, the @var{body}
+executes at least once (and only once, unless executing @var{body}
makes @var{condition} true). Contrast this with the corresponding
@code{while} statement:
@example
while (@var{condition})
- @var{body}
+ @var{body}
@end example
@noindent
@@ -13127,11 +13132,11 @@ The following is an example of a @code{do} statement:
@example
@{
- i = 1
- do @{
- print $0
- i++
- @} while (i <= 10)
+ i = 1
+ do @{
+ print $0
+ i++
+ @} while (i <= 10)
@}
@end example
@@ -13168,9 +13173,10 @@ compares it against the desired number of iterations.
For example:
@example
-awk '@{
- for (i = 1; i <= 3; i++)
- print $i
+awk '
+@{
+ for (i = 1; i <= 3; i++)
+ print $i
@}' inventory-shipped
@end example
@@ -13198,7 +13204,7 @@ between 1 and 100:
@example
for (i = 1; i <= 100; i *= 2)
- print i
+ print i
@end example
If there is nothing to be done, any of the three expressions in the
@@ -13518,7 +13524,7 @@ The @code{next} statement is not allowed inside @code{BEGINFILE} and
@cindex functions, user-defined, @code{next}/@code{nextfile} statements and
According to the POSIX standard, the behavior is undefined if the
@code{next} statement is used in a @code{BEGIN} or @code{END} rule.
-@command{gawk} treats it as a syntax error. Although POSIX permits it,
+@command{gawk} treats it as a syntax error. Although POSIX does not disallow it,
most other @command{awk} implementations don't allow the @code{next}
statement inside function bodies (@pxref{User-defined}). Just as with any
other @code{next} statement, a @code{next} statement inside a function
@@ -13573,7 +13579,7 @@ opened with redirections. It is not related to the main processing that
@quotation NOTE
For many years, @code{nextfile} was a
-@command{gawk} extension. As of September, 2012, it was accepted for
+common extension. In September, 2012, it was accepted for
inclusion into the POSIX standard.
See @uref{http://austingroupbugs.net/view.php?id=607, the Austin Group website}.
@end quotation
@@ -13582,8 +13588,8 @@ See @uref{http://austingroupbugs.net/view.php?id=607, the Austin Group website}.
@cindex @code{nextfile} statement, user-defined functions and
@cindex Brian Kernighan's @command{awk}
@cindex @command{mawk} utility
-The current version of BWK @command{awk}, and @command{mawk} (@pxref{Other
-Versions}) also support @code{nextfile}. However, they don't allow the
+The current version of BWK @command{awk}, and @command{mawk}
+also support @code{nextfile}. However, they don't allow the
@code{nextfile} statement inside function bodies (@pxref{User-defined}).
@command{gawk} does; a @code{nextfile} inside a function body reads the
next record and starts processing it with the first rule in the program,
@@ -13615,8 +13621,8 @@ the program to stop immediately.
An @code{exit} statement that is not part of a @code{BEGIN} or @code{END}
rule stops the execution of any further automatic rules for the current
record, skips reading any remaining input records, and executes the
-@code{END} rule if there is one.
-Any @code{ENDFILE} rules are also skipped; they are not executed.
+@code{END} rule if there is one. @command{gawk} also skips
+any @code{ENDFILE} rules; they do not execute.
In such a case,
if you don't want the @code{END} rule to do its job, set a variable
@@ -13724,7 +13730,7 @@ respectively, should use binary I/O. A string value of @code{"rw"} or
@code{"wr"} indicates that all files should use binary I/O. Any other
string value is treated the same as @code{"rw"}, but causes @command{gawk}
to generate a warning message. @code{BINMODE} is described in more
-detail in @ref{PC Using}. @command{mawk} @pxref{Other Versions}),
+detail in @ref{PC Using}. @command{mawk} (@pxref{Other Versions}),
also supports this variable, but only using numeric values.
@cindex @code{CONVFMT} variable
@@ -13851,7 +13857,7 @@ printing with the @code{print} statement. It works by being passed
as the first argument to the @code{sprintf()} function
(@pxref{String Functions}).
Its default value is @code{"%.6g"}. Earlier versions of @command{awk}
-also used @code{OFMT} to specify the format for converting numbers to
+used @code{OFMT} to specify the format for converting numbers to
strings in general expressions; this is now done by @code{CONVFMT}.
@cindex @code{sprintf()} function, @code{OFMT} variable and
@@ -14003,8 +14009,8 @@ successive instances of the same @value{FN} on the command line.
@cindex file names, distinguishing
While you can change the value of @code{ARGIND} within your @command{awk}
-program, @command{gawk} automatically sets it to a new value when the
-next file is opened.
+program, @command{gawk} automatically sets it to a new value when it
+opens the next file.
@cindex @code{ENVIRON} array
@cindex environment variables, in @code{ENVIRON} array
@@ -14069,10 +14075,10 @@ can give @code{FILENAME} a value.
@cindex @code{FNR} variable
@item @code{FNR}
-The current record number in the current file. @code{FNR} is
-incremented each time a new record is read
-(@pxref{Records}). It is reinitialized
-to zero each time a new input file is started.
+The current record number in the current file. @command{awk} increments
+@code{FNR} each time it reads a new record (@pxref{Records}).
+@command{awk} resets @code{FNR} to zero each time it starts a new
+input file.
@cindex @code{NF} variable
@item @code{NF}
@@ -14104,7 +14110,7 @@ array causes a fatal error. Any attempt to assign to an element of
The number of input records @command{awk} has processed since
the beginning of the program's execution
(@pxref{Records}).
-@code{NR} is incremented each time a new record is read.
+@command{awk} increments @code{NR} each time it reads a new record.
@cindex @command{gawk}, @code{PROCINFO} array in
@cindex @code{PROCINFO} array
@@ -14184,7 +14190,7 @@ The parent process ID of the current process.
@item PROCINFO["sorted_in"]
If this element exists in @code{PROCINFO}, its value controls the
order in which array indices will be processed by
-@samp{for (@var{index} in @var{array})} loops.
+@samp{for (@var{indx} in @var{array})} loops.
Since this is an advanced feature, we defer the
full description until later; see
@ref{Scanning an Array}.
@@ -14205,7 +14211,7 @@ The version of @command{gawk}.
The following additional elements in the array
are available to provide information about the MPFR and GMP libraries
-if your version of @command{gawk} supports arbitrary precision numbers
+if your version of @command{gawk} supports arbitrary precision arithmetic
(@pxref{Arbitrary Precision Arithmetic}):
@table @code
@@ -14254,14 +14260,14 @@ The @code{PROCINFO} array has the following additional uses:
@itemize @value{BULLET}
@item
-It may be used to cause coprocesses to communicate over pseudo-ttys
-instead of through two-way pipes; this is discussed further in
-@ref{Two-way I/O}.
-
-@item
It may be used to provide a timeout when reading from any
open input file, pipe, or coprocess.
@xref{Read Timeout}, for more information.
+
+@item
+It may be used to cause coprocesses to communicate over pseudo-ttys
+instead of through two-way pipes; this is discussed further in
+@ref{Two-way I/O}.
@end itemize
@cindex @code{RLENGTH} variable
@@ -14503,6 +14509,12 @@ following @option{-v} are passed on to the @command{awk} program.
(@xref{Getopt Function}, for an @command{awk} library function that
parses command-line options.)
+When designing your program, you should choose options that don't
+conflict with @command{gawk}'s, since it will process any options
+that it accepts before passing the rest of the command line on to
+your program. Using @samp{#!} with the @option{-E} option may help
+(@pxref{Executable Scripts}, and @pxref{Options}).
+
@node Pattern Action Summary
@section Summary
@@ -14537,7 +14549,7 @@ input and output statements, and deletion statements.
The control statements in @command{awk} are @code{if}-@code{else},
@code{while}, @code{for}, and @code{do}-@code{while}. @command{gawk}
adds the @code{switch} statement. There are two flavors of @code{for}
-statement: one for for performing general looping, and the other iterating
+statement: one for performing general looping, and the other for iterating
through an array.
@item
@@ -14554,12 +14566,17 @@ The @code{exit} statement terminates your program. When executed
from an action (or function body) it transfers control to the
@code{END} statements. From an @code{END} statement body, it exits
immediately. You may pass an optional numeric value to be used
-at @command{awk}'s exit status.
+as @command{awk}'s exit status.
@item
Some built-in variables provide control over @command{awk}, mainly for I/O.
Other variables convey information from @command{awk} to your program.
+@item
+@code{ARGC} and @code{ARGV} make the command-line arguments available
+to your program. Manipulating them from a @code{BEGIN} rule lets you
+control how @command{awk} will process the provided @value{DF}s.
+
@end itemize
@node Arrays
@@ -14580,24 +14597,13 @@ The @value{CHAPTER} moves on to discuss @command{gawk}'s facility
for sorting arrays, and ends with a brief description of @command{gawk}'s
ability to support true arrays of arrays.
-@cindex variables, names of
-@cindex functions, names of
-@cindex arrays, names of, and names of functions/variables
-@cindex names, arrays/variables
-@cindex namespace issues
-@command{awk} maintains a single set
-of names that may be used for naming variables, arrays, and functions
-(@pxref{User-defined}).
-Thus, you cannot have a variable and an array with the same name in the
-same @command{awk} program.
-
@menu
* Array Basics:: The basics of arrays.
-* Delete:: The @code{delete} statement removes an element
- from an array.
* Numeric Array Subscripts:: How to use numbers as subscripts in
@command{awk}.
* Uninitialized Subscripts:: Using Uninitialized variables as subscripts.
+* Delete:: The @code{delete} statement removes an element
+ from an array.
* Multidimensional:: Emulating multidimensional arrays in
@command{awk}.
* Arrays of Arrays:: True multidimensional arrays.
@@ -15025,14 +15031,14 @@ begin with a number:
@example
@c file eg/misc/arraymax.awk
@{
- if ($1 > max)
- max = $1
- arr[$1] = $0
+ if ($1 > max)
+ max = $1
+ arr[$1] = $0
@}
END @{
- for (x = 1; x <= max; x++)
- print arr[x]
+ for (x = 1; x <= max; x++)
+ print arr[x]
@}
@c endfile
@end example
@@ -15072,9 +15078,9 @@ program's @code{END} rule, as follows:
@example
END @{
- for (x = 1; x <= max; x++)
- if (x in arr)
- print arr[x]
+ for (x = 1; x <= max; x++)
+ if (x in arr)
+ print arr[x]
@}
@end example
@@ -15096,7 +15102,7 @@ an array:
@example
for (@var{var} in @var{array})
- @var{body}
+ @var{body}
@end example
@noindent
@@ -15169,7 +15175,7 @@ BEGIN @{
@}
@end example
-Here is what happens when run with @command{gawk}:
+Here is what happens when run with @command{gawk} (and @command{mawk}):
@example
$ @kbd{gawk -f loopcheck.awk}
@@ -15287,7 +15293,8 @@ does not affect the loop.
For example:
@example
-$ @kbd{gawk 'BEGIN @{}
+$ @kbd{gawk '}
+> @kbd{BEGIN @{}
> @kbd{ a[4] = 4}
> @kbd{ a[3] = 3}
> @kbd{ for (i in a)}
@@ -15295,7 +15302,8 @@ $ @kbd{gawk 'BEGIN @{}
> @kbd{@}'}
@print{} 4 4
@print{} 3 3
-$ @kbd{gawk 'BEGIN @{}
+$ @kbd{gawk '}
+> @kbd{BEGIN @{}
> @kbd{ PROCINFO["sorted_in"] = "@@ind_str_asc"}
> @kbd{ a[4] = 4}
> @kbd{ a[3] = 3}
@@ -15344,118 +15352,6 @@ the @code{delete} statement.
In addition, @command{gawk} provides built-in functions for
sorting arrays; see @ref{Array Sorting Functions}.
-@node Delete
-@section The @code{delete} Statement
-@cindex @code{delete} statement
-@cindex deleting elements in arrays
-@cindex arrays, elements, deleting
-@cindex elements in arrays, deleting
-
-To remove an individual element of an array, use the @code{delete}
-statement:
-
-@example
-delete @var{array}[@var{index-expression}]
-@end example
-
-Once an array element has been deleted, any value the element once
-had is no longer available. It is as if the element had never
-been referred to or been given a value.
-The following is an example of deleting elements in an array:
-
-@example
-for (i in frequencies)
- delete frequencies[i]
-@end example
-
-@noindent
-This example removes all the elements from the array @code{frequencies}.
-Once an element is deleted, a subsequent @code{for} statement to scan the array
-does not report that element and the @code{in} operator to check for
-the presence of that element returns zero (i.e., false):
-
-@example
-delete foo[4]
-if (4 in foo)
- print "This will never be printed"
-@end example
-
-@cindex null strings, and deleting array elements
-It is important to note that deleting an element is @emph{not} the
-same as assigning it a null value (the empty string, @code{""}).
-For example:
-
-@example
-foo[4] = ""
-if (4 in foo)
- print "This is printed, even though foo[4] is empty"
-@end example
-
-@cindex lint checking, array elements
-It is not an error to delete an element that does not exist.
-However, if @option{--lint} is provided on the command line
-(@pxref{Options}),
-@command{gawk} issues a warning message when an element that
-is not in the array is deleted.
-
-@cindex common extensions, @code{delete} to delete entire arrays
-@cindex extensions, common@comma{} @code{delete} to delete entire arrays
-@cindex arrays, deleting entire contents
-@cindex deleting entire arrays
-@cindex @code{delete} @var{array}
-@cindex differences in @command{awk} and @command{gawk}, array elements, deleting
-All the elements of an array may be deleted with a single statement
-by leaving off the subscript in the @code{delete} statement,
-as follows:
-
-
-@example
-delete @var{array}
-@end example
-
-Using this version of the @code{delete} statement is about three times
-more efficient than the equivalent loop that deletes each element one
-at a time.
-
-@cindex Brian Kernighan's @command{awk}
-@quotation NOTE
-For many years,
-using @code{delete} without a subscript was a @command{gawk} extension.
-As of September, 2012, it was accepted for
-inclusion into the POSIX standard. See @uref{http://austingroupbugs.net/view.php?id=544,
-the Austin Group website}. This form of the @code{delete} statement is also supported
-by BWK @command{awk} and @command{mawk}, as well as
-by a number of other implementations (@pxref{Other Versions}).
-@end quotation
-
-@cindex portability, deleting array elements
-@cindex Brennan, Michael
-The following statement provides a portable but nonobvious way to clear
-out an array:@footnote{Thanks to Michael Brennan for pointing this out.}
-
-@example
-split("", array)
-@end example
-
-@cindex @code{split()} function, array elements@comma{} deleting
-The @code{split()} function
-(@pxref{String Functions})
-clears out the target array first. This call asks it to split
-apart the null string. Because there is no data to split out, the
-function simply clears the array and then returns.
-
-@quotation CAUTION
-Deleting an array does not change its type; you cannot
-delete an array and then use the array's name as a scalar
-(i.e., a regular variable). For example, the following does not work:
-
-@example
-a[1] = 3
-delete a
-a = 3
-@end example
-@end quotation
-
@node Numeric Array Subscripts
@section Using Numbers to Subscript Arrays
@@ -15496,7 +15392,7 @@ since @code{"12.15"} is different from @code{"12.153"}.
@cindex integer array indices
According to the rules for conversions
(@pxref{Conversion}), integer
-values are always converted to strings as integers, no matter what the
+values always convert to strings as integers, no matter what the
value of @code{CONVFMT} may happen to be. So the usual case of
the following works:
@@ -15519,7 +15415,7 @@ and
all refer to the same element!
As with many things in @command{awk}, the majority of the time
-things work as one would expect them to. But it is useful to have a precise
+things work as you would expect them to. But it is useful to have a precise
knowledge of the actual rules since they can sometimes have a subtle
effect on your programs.
@@ -15583,6 +15479,119 @@ Even though it is somewhat unusual, the null string
if @option{--lint} is provided
on the command line (@pxref{Options}).
+@node Delete
+@section The @code{delete} Statement
+@cindex @code{delete} statement
+@cindex deleting elements in arrays
+@cindex arrays, elements, deleting
+@cindex elements in arrays, deleting
+
+To remove an individual element of an array, use the @code{delete}
+statement:
+
+@example
+delete @var{array}[@var{index-expression}]
+@end example
+
+Once an array element has been deleted, any value the element once
+had is no longer available. It is as if the element had never
+been referred to or been given a value.
+The following is an example of deleting elements in an array:
+
+@example
+for (i in frequencies)
+ delete frequencies[i]
+@end example
+
+@noindent
+This example removes all the elements from the array @code{frequencies}.
+Once an element is deleted, a subsequent @code{for} statement to scan the array
+does not report that element and the @code{in} operator to check for
+the presence of that element returns zero (i.e., false):
+
+@example
+delete foo[4]
+if (4 in foo)
+ print "This will never be printed"
+@end example
+
+@cindex null strings, and deleting array elements
+It is important to note that deleting an element is @emph{not} the
+same as assigning it a null value (the empty string, @code{""}).
+For example:
+
+@example
+foo[4] = ""
+if (4 in foo)
+ print "This is printed, even though foo[4] is empty"
+@end example
+
+@cindex lint checking, array elements
+It is not an error to delete an element that does not exist.
+However, if @option{--lint} is provided on the command line
+(@pxref{Options}),
+@command{gawk} issues a warning message when an element that
+is not in the array is deleted.
+
+@cindex common extensions, @code{delete} to delete entire arrays
+@cindex extensions, common@comma{} @code{delete} to delete entire arrays
+@cindex arrays, deleting entire contents
+@cindex deleting entire arrays
+@cindex @code{delete} @var{array}
+@cindex differences in @command{awk} and @command{gawk}, array elements, deleting
+All the elements of an array may be deleted with a single statement
+by leaving off the subscript in the @code{delete} statement,
+as follows:
+
+
+@example
+delete @var{array}
+@end example
+
+Using this version of the @code{delete} statement is about three times
+more efficient than the equivalent loop that deletes each element one
+at a time.
+
+This form of the @code{delete} statement is also supported
+by BWK @command{awk} and @command{mawk}, as well as
+by a number of other implementations.
+
+@cindex Brian Kernighan's @command{awk}
+@quotation NOTE
+For many years, using @code{delete} without a subscript was a common
+extension. In September, 2012, it was accepted for inclusion into the
+POSIX standard. See @uref{http://austingroupbugs.net/view.php?id=544,
+the Austin Group website}.
+@end quotation
+
+@cindex portability, deleting array elements
+@cindex Brennan, Michael
+The following statement provides a portable but nonobvious way to clear
+out an array:@footnote{Thanks to Michael Brennan for pointing this out.}
+
+@example
+split("", array)
+@end example
+
+@cindex @code{split()} function, array elements@comma{} deleting
+The @code{split()} function
+(@pxref{String Functions})
+clears out the target array first. This call asks it to split
+apart the null string. Because there is no data to split out, the
+function simply clears the array and then returns.
+
+@quotation CAUTION
+Deleting all the elements from an array does not change its type; you cannot
+clear an array and then use the array's name as a scalar
+(i.e., a regular variable). For example, the following does not work:
+
+@example
+a[1] = 3
+delete a
+a = 3
+@end example
+@end quotation
+
@node Multidimensional
@section Multidimensional Arrays
@@ -15594,7 +15603,7 @@ on the command line (@pxref{Options}).
@cindex arrays, multidimensional
A multidimensional array is an array in which an element is identified
by a sequence of indices instead of a single index. For example, a
-two-dimensional array requires two indices. The usual way (in most
+two-dimensional array requires two indices. The usual way (in many
languages, including @command{awk}) to refer to an element of a
two-dimensional array named @code{grid} is with
@code{grid[@var{x},@var{y}]}.
@@ -15769,8 +15778,9 @@ a[1][3][1, "name"] = "barney"
Each subarray and the main array can be of different length. In fact, the
elements of an array or its subarray do not all have to have the same
type. This means that the main array and any of its subarrays can be
-non-rectangular, or jagged in structure. One can assign a scalar value to
-the index @code{4} of the main array @code{a}:
+non-rectangular, or jagged in structure. You can assign a scalar value to
+the index @code{4} of the main array @code{a}, even though @code{a[1]}
+is itself an array and not a scalar:
@example
a[4] = "An element in a jagged array"
@@ -15852,6 +15862,8 @@ for (i in array) @{
print array[i][j]
@}
@}
+ else
+ print array[i]
@}
@end example
@@ -16136,8 +16148,9 @@ Often random integers are needed instead. Following is a user-defined function
that can be used to obtain a random non-negative integer less than @var{n}:
@example
-function randint(n) @{
- return int(n * rand())
+function randint(n)
+@{
+ return int(n * rand())
@}
@end example
@@ -16157,8 +16170,7 @@ function roll(n) @{ return 1 + int(rand() * n) @}
# Roll 3 six-sided dice and
# print total number of points.
@{
- printf("%d points\n",
- roll(6)+roll(6)+roll(6))
+ printf("%d points\n", roll(6) + roll(6) + roll(6))
@}
@end example
@@ -16247,7 +16259,7 @@ doing index calculations, particularly if you are used to C.
In the following list, optional parameters are enclosed in square brackets@w{ ([ ]).}
Several functions perform string substitution; the full discussion is
provided in the description of the @code{sub()} function, which comes
-towards the end since the list is presented in alphabetic order.
+towards the end since the list is presented alphabetically.
Those functions that are specific to @command{gawk} are marked with a
pound sign (@samp{#}). They are not available in compatibility mode
@@ -16291,6 +16303,7 @@ When comparing strings, @code{IGNORECASE} affects the sorting
(@pxref{Array Sorting Functions}). If the
@var{source} array contains subarrays as values (@pxref{Arrays of
Arrays}), they will come last, after all scalar values.
+Subarrays are @emph{not} recursively sorted.
For example, if the contents of @code{a} are as follows:
@@ -16427,7 +16440,10 @@ $ @kbd{awk 'BEGIN @{ print index("peanut", "an") @}'}
@noindent
If @var{find} is not found, @code{index()} returns zero.
-It is a fatal error to use a regexp constant for @var{find}.
+With BWK @command{awk} and @command{gawk},
+it is a fatal error to use a regexp constant for @var{find}.
+Other implementations allow it, simply treating the regexp
+constant as an expression meaning @samp{$0 ~ /regexp/}.
@item @code{length(}[@var{string}]@code{)}
@cindexawkfunc{length}
@@ -16541,13 +16557,12 @@ For example:
@example
@c file eg/misc/findpat.awk
@{
- if ($1 == "FIND")
- regex = $2
- else @{
- where = match($0, regex)
- if (where != 0)
- print "Match of", regex, "found at",
- where, "in", $0
+ if ($1 == "FIND")
+ regex = $2
+ else @{
+ where = match($0, regex)
+ if (where != 0)
+ print "Match of", regex, "found at", where, "in", $0
@}
@}
@c endfile
@@ -16643,7 +16658,7 @@ Any leading separator will be in @code{@var{seps}[0]}.
The @code{patsplit()} function splits strings into pieces in a
manner similar to the way input lines are split into fields using @code{FPAT}
-(@pxref{Splitting By Content}.
+(@pxref{Splitting By Content}).
Before splitting the string, @code{patsplit()} deletes any previously existing
elements in the arrays @var{array} and @var{seps}.
@@ -16656,8 +16671,7 @@ and store the pieces in @var{array} and the separator strings in the
@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so
forth. The string value of the third argument, @var{fieldsep}, is
a regexp describing where to split @var{string} (much as @code{FS} can
-be a regexp describing where to split input records;
-@pxref{Regexp Field Splitting}).
+be a regexp describing where to split input records).
If @var{fieldsep} is omitted, the value of @code{FS} is used.
@code{split()} returns the number of elements created.
@var{seps} is a @command{gawk} extension with @code{@var{seps}[@var{i}]}
@@ -16952,6 +16966,26 @@ Nonalphabetic characters are left unchanged. For example,
@code{toupper("MiXeD cAsE 123")} returns @code{"MIXED CASE 123"}.
@end table
+@sidebar Matching the Null String
+@cindex matching, null strings
+@cindex null strings, matching
+@cindex @code{*} (asterisk), @code{*} operator, null strings@comma{} matching
+@cindex asterisk (@code{*}), @code{*} operator, null strings@comma{} matching
+
+In @command{awk}, the @samp{*} operator can match the null string.
+This is particularly important for the @code{sub()}, @code{gsub()},
+and @code{gensub()} functions. For example:
+
+@example
+$ @kbd{echo abc | awk '@{ gsub(/m*/, "X"); print @}'}
+@print{} XaXbXcX
+@end example
+
+@noindent
+Although this makes a certain amount of sense, it can be surprising.
+@end sidebar
+
+
@node Gory Details
@subsubsection More About @samp{\} and @samp{&} with @code{sub()}, @code{gsub()}, and @code{gensub()}
@@ -16965,7 +16999,7 @@ Nonalphabetic characters are left unchanged. For example,
@cindex ampersand (@code{&}), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
@quotation CAUTION
-This section has been known to cause headaches.
+This subsubsection has been reported to cause headaches.
You might want to skip it upon first reading.
@end quotation
@@ -17256,25 +17290,6 @@ and the special cases for @code{sub()} and @code{gsub()},
we recommend the use of @command{gawk} and @code{gensub()} when you have
to do substitutions.
-@sidebar Matching the Null String
-@cindex matching, null strings
-@cindex null strings, matching
-@cindex @code{*} (asterisk), @code{*} operator, null strings@comma{} matching
-@cindex asterisk (@code{*}), @code{*} operator, null strings@comma{} matching
-
-In @command{awk}, the @samp{*} operator can match the null string.
-This is particularly important for the @code{sub()}, @code{gsub()},
-and @code{gensub()} functions. For example:
-
-@example
-$ @kbd{echo abc | awk '@{ gsub(/m*/, "X"); print @}'}
-@print{} XaXbXcX
-@end example
-
-@noindent
-Although this makes a certain amount of sense, it can be surprising.
-@end sidebar
-
@node I/O Functions
@subsection Input/Output Functions
@cindex input/output functions
@@ -17327,10 +17342,9 @@ buffers its output and the @code{fflush()} function forces
@cindex extensions, common@comma{} @code{fflush()} function
@cindex Brian Kernighan's @command{awk}
-@code{fflush()} was added to BWK @command{awk} in
-April of 1992. For two decades, it was not part of the POSIX standard.
-As of December, 2012, it was accepted for inclusion into the POSIX
-standard.
+Brian Kernighan added @code{fflush()} to his @command{awk} in April
+of 1992. For two decades, it was a common extension. In December,
+2012, it was accepted for inclusion into the POSIX standard.
See @uref{http://austingroupbugs.net/view.php?id=634, the Austin Group website}.
POSIX standardizes @code{fflush()} as follows: If there
@@ -17599,7 +17613,7 @@ is out of range, @code{mktime()} returns @minus{}1.
@cindex @command{gawk}, @code{PROCINFO} array in
@cindex @code{PROCINFO} array
-@item @code{strftime(} [@var{format} [@code{,} @var{timestamp} [@code{,} @var{utc-flag}] ] ]@code{)}
+@item @code{strftime(}[@var{format} [@code{,} @var{timestamp} [@code{,} @var{utc-flag}] ] ]@code{)}
@c STARTOFRANGE strf
@cindexgawkfunc{strftime}
@cindex format time string
@@ -17866,7 +17880,7 @@ the string. For example:
@example
$ date '+Today is %A, %B %d, %Y.'
-@print{} Today is Monday, May 05, 2014.
+@print{} Today is Monday, September 22, 2014.
@end example
Here is the @command{gawk} version of the @command{date} utility.
@@ -18060,17 +18074,16 @@ shows that 0's come in on the left side. For @command{gawk}, this is
always true, but in some languages, it's possible to have the left side
fill with 1's.}
@c Purposely decided to use 0's and 1's here. 2/2001.
-If you start over
-again with @samp{10111001} and shift it left by three bits, you end up
-with @samp{11001000}.
-@command{gawk} provides built-in functions that implement the
-bitwise operations just described. They are:
+If you start over again with @samp{10111001} and shift it left by three
+bits, you end up with @samp{11001000}. The following list describes
+@command{gawk}'s built-in functions that implement the bitwise operations.
+Optional parameters are enclosed in square brackets ([ ]):
@cindex @command{gawk}, bitwise operations in
@table @code
@cindexgawkfunc{and}
@cindex bitwise AND
-@item @code{and(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
+@item @code{and(}@var{v1}@code{,} @var{v2} [@code{,} @dots{}]@code{)}
Return the bitwise AND of the arguments. There must be at least two.
@cindexgawkfunc{compl}
@@ -18085,7 +18098,7 @@ Return the value of @var{val}, shifted left by @var{count} bits.
@cindexgawkfunc{or}
@cindex bitwise OR
-@item @code{or(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
+@item @code{or(}@var{v1}@code{,} @var{v2} [@code{,} @dots{}]@code{)}
Return the bitwise OR of the arguments. There must be at least two.
@cindexgawkfunc{rshift}
@@ -18095,7 +18108,7 @@ Return the value of @var{val}, shifted right by @var{count} bits.
@cindexgawkfunc{xor}
@cindex bitwise XOR
-@item @code{xor(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
+@item @code{xor(}@var{v1}@code{,} @var{v2} [@code{,} @dots{}]@code{)}
Return the bitwise XOR of the arguments. There must be at least two.
@end table
@@ -18218,7 +18231,7 @@ results of the @code{compl()}, @code{lshift()}, and @code{rshift()} functions.
@command{gawk} provides a single function that lets you distinguish
an array from a scalar variable. This is necessary for writing code
-that traverses every element of an array of arrays.
+that traverses every element of an array of arrays
(@pxref{Arrays of Arrays}).
@table @code
@@ -18234,12 +18247,14 @@ an array or not. The second is inside the body of a user-defined function
(not discussed yet; @pxref{User-defined}), to test if a parameter is an
array or not.
-Note, however, that using @code{isarray()} at the global level to test
+@quotation NOTE
+Using @code{isarray()} at the global level to test
variables makes no sense. Since you are the one writing the program, you
are supposed to know if your variables are arrays or not. And in fact,
due to the way @command{gawk} works, if you pass the name of a variable
that has not been previously used to @code{isarray()}, @command{gawk}
-will end up turning it into a scalar.
+ends up turning it into a scalar.
+@end quotation
@node I18N Functions
@subsection String-Translation Functions
@@ -18500,7 +18515,7 @@ extra whitespace signifies the start of the local variable list):
function delarray(a, i)
@{
for (i in a)
- delete a[i]
+ delete a[i]
@}
@end example
@@ -18511,7 +18526,7 @@ Instead of having
to repeat this loop everywhere that you need to clear out
an array, your program can just call @code{delarray}.
(This guarantees portability. The use of @samp{delete @var{array}} to delete
-the contents of an entire array is a recent@footnote{Late in 2012.}
+the contents of an entire array is a relatively recent@footnote{Late in 2012.}
addition to the POSIX standard.)
The following is an example of a recursive function. It takes a string
@@ -18541,7 +18556,7 @@ $ @kbd{echo "Don't Panic!" |}
@print{} !cinaP t'noD
@end example
-The C @code{ctime()} function takes a timestamp and returns it in a string,
+The C @code{ctime()} function takes a timestamp and returns it as a string,
formatted in a well-known fashion.
The following example uses the built-in @code{strftime()} function
(@pxref{Time Functions})
@@ -18556,13 +18571,19 @@ to create an @command{awk} version of @code{ctime()}:
function ctime(ts, format)
@{
- format = PROCINFO["strftime"]
+ format = "%a %b %e %H:%M:%S %Z %Y"
+
if (ts == 0)
ts = systime() # use current time as default
return strftime(format, ts)
@}
@c endfile
@end example
+
+You might think that @code{ctime()} could use @code{PROCINFO["strftime"]}
+for its format string. That would be a mistake, since @code{ctime()} is
+supposed to return the time formatted in a standard fashion, and user-level
+code could have changed @code{PROCINFO["strftime"]}.
@c ENDOFRANGE fdef
@node Function Caveats
@@ -19211,7 +19232,7 @@ function quicksort(data, left, right, less_than, i, last)
# quicksort_swap --- helper function for quicksort, should really be inline
-function quicksort_swap(data, i, j, temp)
+function quicksort_swap(data, i, j, temp)
@{
temp = data[i]
data[i] = data[j]
@@ -19362,10 +19383,11 @@ functions.
@item
POSIX @command{awk} provides three kinds of built-in functions: numeric,
-string, and I/O. @command{gawk} provides functions that work with values
-representing time, do bit manipulation, sort arrays, and internationalize
-and localize programs. @command{gawk} also provides several extensions to
-some of standard functions, typically in the form of additional arguments.
+string, and I/O. @command{gawk} provides functions that sort arrays, work
+with values representing time, do bit manipulation, determine variable
+type (array vs.@: scalar), and internationalize and localize programs.
+@command{gawk} also provides several extensions to some of standard
+functions, typically in the form of additional arguments.
@item
Functions accept zero or more arguments and return a value. The
@@ -19616,8 +19638,9 @@ are very difficult to track down:
function lib_func(x, y, l1, l2)
@{
@dots{}
- @var{use variable} some_var # some_var should be local
- @dots{} # but is not by oversight
+ # some_var should be local but by oversight is not
+ @var{use variable} some_var
+ @dots{}
@}
@end example
@@ -19728,7 +19751,7 @@ function mystrtonum(str, ret, n, i, k, c)
# a[5] = "123.45"
# a[6] = "1.e3"
# a[7] = "1.32"
-# a[7] = "1.32E2"
+# a[8] = "1.32E2"
#
# for (i = 1; i in a; i++)
# print a[i], strtonum(a[i]), mystrtonum(a[i])
@@ -19739,9 +19762,12 @@ function mystrtonum(str, ret, n, i, k, c)
The function first looks for C-style octal numbers (base 8).
If the input string matches a regular expression describing octal
numbers, then @code{mystrtonum()} loops through each character in the
-string. It sets @code{k} to the index in @code{"01234567"} of the current
-octal digit. Since the return value is one-based, the @samp{k--}
-adjusts @code{k} so it can be used in computing the return value.
+string. It sets @code{k} to the index in @code{"1234567"} of the current
+octal digit.
+The return value will either be the same number as the digit, or zero
+if the character is not there, which will be true for a @samp{0}.
+This is safe, since the regexp test in the @code{if} ensures that
+only octal values are converted.
Similar logic applies to the code that checks for and converts a
hexadecimal value, which starts with @samp{0x} or @samp{0X}.
@@ -19774,7 +19800,7 @@ that a condition or set of conditions is true. Before proceeding with a
particular computation, you make a statement about what you believe to be
the case. Such a statement is known as an
@dfn{assertion}. The C language provides an @code{<assert.h>} header file
-and corresponding @code{assert()} macro that the programmer can use to make
+and corresponding @code{assert()} macro that a programmer can use to make
assertions. If an assertion fails, the @code{assert()} macro arranges to
print a diagnostic message describing the condition that should have
been true but was not, and then it kills the program. In C, using
@@ -20244,7 +20270,7 @@ function getlocaltime(time, ret, now, i)
now = systime()
# return date(1)-style output
- ret = strftime(PROCINFO["strftime"], now)
+ ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
# clear out target array
delete time
@@ -20359,6 +20385,9 @@ if (length(contents) == 0)
This tests the result to see if it is empty or not. An equivalent
test would be @samp{contents == ""}.
+@xref{Extension Sample Readfile}, for an extension function that
+also reads an entire file into memory.
+
@node Data File Management
@section @value{DDF} Management
@@ -20416,15 +20445,14 @@ Besides solving the problem in only nine(!) lines of code, it does so
@c # Arnold Robbins, arnold@@skeeve.com, Public Domain
@c # January 1992
-FILENAME != _oldfilename \
-@{
+FILENAME != _oldfilename @{
if (_oldfilename != "")
endfile(_oldfilename)
_oldfilename = FILENAME
beginfile(FILENAME)
@}
-END @{ endfile(FILENAME) @}
+END @{ endfile(FILENAME) @}
@end example
This file must be loaded before the user's ``main'' program, so that the
@@ -20477,7 +20505,7 @@ FNR == 1 @{
beginfile(FILENAME)
@}
-END @{ endfile(_filename_) @}
+END @{ endfile(_filename_) @}
@c endfile
@end example