aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2014-08-29 13:11:45 +0300
committerArnold D. Robbins <arnold@skeeve.com>2014-08-29 13:11:45 +0300
commit6c541fd0f75cd328dd80afec757ecccc833719af (patch)
tree163707da9efd9c8d0c7a7e9d0ef3887222c88bf3
parentff28c07f95ff2400eb0ad1becc0eae1eab9dc93d (diff)
downloadegawk-6c541fd0f75cd328dd80afec757ecccc833719af.tar.gz
egawk-6c541fd0f75cd328dd80afec757ecccc833719af.tar.bz2
egawk-6c541fd0f75cd328dd80afec757ecccc833719af.zip
More doc updates.
-rw-r--r--awklib/eg/lib/getopt.awk3
-rw-r--r--awklib/eg/lib/strtonum.awk7
-rw-r--r--doc/ChangeLog5
-rw-r--r--doc/gawk.info1507
-rw-r--r--doc/gawk.texi549
-rw-r--r--doc/gawktexi.in461
6 files changed, 1326 insertions, 1206 deletions
diff --git a/awklib/eg/lib/getopt.awk b/awklib/eg/lib/getopt.awk
index db957ceb..6b1f4c50 100644
--- a/awklib/eg/lib/getopt.awk
+++ b/awklib/eg/lib/getopt.awk
@@ -38,8 +38,7 @@ function getopt(argc, argv, options, thisopt, i)
i = index(options, thisopt)
if (i == 0) {
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) {
Optind++
_opti = 0
diff --git a/awklib/eg/lib/strtonum.awk b/awklib/eg/lib/strtonum.awk
index 9342e789..5e20626b 100644
--- a/awklib/eg/lib/strtonum.awk
+++ b/awklib/eg/lib/strtonum.awk
@@ -13,8 +13,9 @@ function mystrtonum(str, ret, n, i, k, c)
ret = 0
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
}
@@ -26,6 +27,8 @@ function mystrtonum(str, ret, n, i, k, c)
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
c = tolower(c)
+ # index() returns 0 if c not in string,
+ # includes c == "0"
k = index("123456789abcdef", c)
ret = ret * 16 + k
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 980eb023..79c69a30 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,8 @@
+2014-08-29 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments, and other
+ bug fixes, miscellanious improvements.
+
2014-08-26 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Use a different mechanism to exclude
diff --git a/doc/gawk.info b/doc/gawk.info
index 7e6e1b89..2841d21e 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -170,10 +170,10 @@ entitled "GNU Free Documentation License".
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between `[...]'.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.
* Records:: Controlling how data is split into
records.
@@ -1455,15 +1455,17 @@ end-of-file character may be different. For example, on OS/2, it is
As an example, the following program prints a friendly piece of
advice (from Douglas Adams's `The Hitchhiker's Guide to the Galaxy'),
to keep you from worrying about the complexities of computer
-programming (`BEGIN' is a feature we haven't discussed yet):
+programming:
- $ awk "BEGIN { print \"Don't Panic!\" }"
+ $ awk "BEGIN { print "Don\47t Panic!" }"
-| Don't Panic!
- This program does not read any input. The `\' before each of the
-inner double quotes is necessary because of the shell's quoting
-rules--in particular because it mixes both single quotes and double
-quotes.(1)
+ `awk' executes statements associated with `BEGIN' before reading any
+input. If there are no other statements in your program, as is the
+case here, `awk' just stops, instead of trying to read input it doesn't
+know how to process. The `\47' is a magic way of getting a single
+quote into the program, without having to engage in ugly shell quoting
+tricks.
NOTE: As a side note, if you use Bash as your shell, you should
execute the command `set +H' before running this program
@@ -1486,12 +1488,6 @@ works is explained shortly).
-| What, me worry?
Ctrl-d
- ---------- Footnotes ----------
-
- (1) Although we generally recommend the use of single quotes around
-the program text, double quotes are needed here in order to put the
-single quote into the message.
-

File: gawk.info, Node: Long, Next: Executable Scripts, Prev: Read Terminal, Up: Running gawk
@@ -1937,6 +1933,9 @@ different ways to do the same things shown here:
awk '{ if (length($0) > max) max = length($0) }
END { print max }' data
+ The code associated with `END' executes after all input has been
+ read; it's the other side of the coin to `BEGIN'.
+
* Print the length of the longest line in `data':
expand data | awk '{ if (x < length($0)) x = length($0) }
@@ -2731,6 +2730,10 @@ arguments, including variable assignments, are included. As each
element of `ARGV' is processed, `gawk' sets the variable `ARGIND' to
the index in `ARGV' of the current element.
+ Changing `ARGC' and `ARGV' in your `awk' program lets you control
+how `awk' processes the input files; this is described in more detail
+in *note ARGC and ARGV::.
+
The distinction between file name arguments and variable-assignment
arguments is made when `awk' is about to open the next input file. At
that point in execution, it checks the file name to see whether it is
@@ -3225,10 +3228,10 @@ you specify more complicated classes of strings.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between `[...]'.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.

@@ -3368,17 +3371,19 @@ apply to both string constants and regexp constants:
`\/'
A literal slash (necessary for regexp constants only). This
sequence is used when you want to write a regexp constant that
- contains a slash. Because the regexp is delimited by slashes, you
- need to escape the slash that is part of the pattern, in order to
+ contains a slash (such as `/.*:\/home\/[[:alnum:]]+:.*/'; the
+ `[[:alnum:]]' notation is discussed shortly, in *note Bracket
+ Expressions::). Because the regexp is delimited by slashes, you
+ need to escape any slash that is part of the pattern, in order to
tell `awk' to keep processing the rest of the regexp.
`\"'
A literal double quote (necessary for string constants only).
This sequence is used when you want to write a string constant
- that contains a double quote. Because the string is delimited by
- double quotes, you need to escape the quote that is part of the
- string, in order to tell `awk' to keep processing the rest of the
- string.
+ that contains a double quote (such as `"He said \"hi!\" to her."').
+ Because the string is delimited by double quotes, you need to
+ escape any quote that is part of the string, in order to tell
+ `awk' to keep processing the rest of the string.
In `gawk', a number of additional two-character sequences that begin
with a backslash have special meaning in regexps. *Note GNU Regexp
@@ -3616,7 +3621,7 @@ list".
regexp operator or function.

-File: gawk.info, Node: Bracket Expressions, Next: GNU Regexp Operators, Prev: Regexp Operators, Up: Regexp
+File: gawk.info, Node: Bracket Expressions, Next: Leftmost Longest, Prev: Regexp Operators, Up: Regexp
3.4 Using Bracket Expressions
=============================
@@ -3721,9 +3726,118 @@ Equivalence classes
classes.

-File: gawk.info, Node: GNU Regexp Operators, Next: Case-sensitivity, Prev: Bracket Expressions, Up: Regexp
+File: gawk.info, Node: Leftmost Longest, Next: Computed Regexps, Prev: Bracket Expressions, Up: Regexp
+
+3.5 How Much Text Matches?
+==========================
+
+Consider the following:
+
+ echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
+
+ This example uses the `sub()' function (which we haven't discussed
+yet; *note String Functions::) to make a change to the input record.
+Here, the regexp `/a+/' indicates "one or more `a' characters," and the
+replacement text is `<A>'.
+
+ The input contains four `a' characters. `awk' (and POSIX) regular
+expressions always match the leftmost, _longest_ sequence of input
+characters that can match. Thus, all four `a' characters are replaced
+with `<A>' in this example:
+
+ $ echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
+ -| <A>bcd
+
+ For simple match/no-match tests, this is not so important. But when
+doing text matching and substitutions with the `match()', `sub()',
+`gsub()', and `gensub()' functions, it is very important. *Note String
+Functions::, for more information on these functions. Understanding
+this principle is also important for regexp-based record and field
+splitting (*note Records::, and also *note Field Separators::).
+
+
+File: gawk.info, Node: Computed Regexps, Next: GNU Regexp Operators, Prev: Leftmost Longest, Up: Regexp
+
+3.6 Using Dynamic Regexps
+=========================
+
+The righthand side of a `~' or `!~' operator need not be a regexp
+constant (i.e., a string of characters between slashes). It may be any
+expression. The expression is evaluated and converted to a string if
+necessary; the contents of the string are then used as the regexp. A
+regexp computed in this way is called a "dynamic regexp" or a "computed
+regexp":
+
+ BEGIN { digits_regexp = "[[:digit:]]+" }
+ $0 ~ digits_regexp { print }
+
+This sets `digits_regexp' to a regexp that describes one or more digits,
+and tests whether the input record matches this regexp.
+
+ NOTE: When using the `~' and `!~' operators, there is a difference
+ between a regexp constant enclosed in slashes and a string
+ constant enclosed in double quotes. If you are going to use a
+ string constant, you have to understand that the string is, in
+ essence, scanned _twice_: the first time when `awk' reads your
+ program, and the second time when it goes to match the string on
+ the lefthand side of the operator with the pattern on the right.
+ This is true of any string-valued expression (such as
+ `digits_regexp', shown previously), not just string constants.
+
+ What difference does it make if the string is scanned twice? The
+answer has to do with escape sequences, and particularly with
+backslashes. To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+ For example, `/\*/' is a regexp constant for a literal `*'. Only
+one backslash is needed. To do the same thing with a string, you have
+to type `"\\*"'. The first backslash escapes the second one so that
+the string actually contains the two characters `\' and `*'.
+
+ Given that you can use both regexp and string constants to describe
+regular expressions, which should you use? The answer is "regexp
+constants," for several reasons:
+
+ * String constants are more complicated to write and more difficult
+ to read. Using regexp constants makes your programs less
+ error-prone. Not understanding the difference between the two
+ kinds of constants is a common source of errors.
+
+ * It is more efficient to use regexp constants. `awk' can note that
+ you have supplied a regexp and store it internally in a form that
+ makes pattern matching more efficient. When using a string
+ constant, `awk' must first convert the string into this internal
+ form and then perform the pattern matching.
+
+ * Using regexp constants is better form; it shows clearly that you
+ intend a regexp match.
+
+ Using `\n' in Bracket Expressions of Dynamic Regexps
+
+ Some versions of `awk' do not allow the newline character to be used
+inside a bracket expression for a dynamic regexp:
+
+ $ awk '$0 ~ "[ \t\n]"'
+ error--> awk: newline in character class [
+ error--> ]...
+ error--> source line number 1
+ error--> context is
+ error--> >>> <<<
+
+ But a newline in a regexp constant works with no problem:
+
+ $ awk '$0 ~ /[ \t\n]/'
+ here is a sample line
+ -| here is a sample line
+ Ctrl-d
+
+ `gawk' does not have this problem, and it isn't likely to occur
+often in practice, but it's worth noting for future reference.
+
+
+File: gawk.info, Node: GNU Regexp Operators, Next: Case-sensitivity, Prev: Computed Regexps, Up: Regexp
-3.5 `gawk'-Specific Regexp Operators
+3.7 `gawk'-Specific Regexp Operators
====================================
GNU software that deals with regular expressions provides a number of
@@ -3817,9 +3931,9 @@ No options
default.

-File: gawk.info, Node: Case-sensitivity, Next: Leftmost Longest, Prev: GNU Regexp Operators, Up: Regexp
+File: gawk.info, Node: Case-sensitivity, Next: Regexp Summary, Prev: GNU Regexp Operators, Up: Regexp
-3.6 Case Sensitivity in Matching
+3.8 Case Sensitivity in Matching
================================
Case is normally significant in regular expressions, both when matching
@@ -3892,116 +4006,7 @@ obscure and we don't recommend it.
means that `gawk' does the right thing.

-File: gawk.info, Node: Leftmost Longest, Next: Computed Regexps, Prev: Case-sensitivity, Up: Regexp
-
-3.7 How Much Text Matches?
-==========================
-
-Consider the following:
-
- echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
-
- This example uses the `sub()' function (which we haven't discussed
-yet; *note String Functions::) to make a change to the input record.
-Here, the regexp `/a+/' indicates "one or more `a' characters," and the
-replacement text is `<A>'.
-
- The input contains four `a' characters. `awk' (and POSIX) regular
-expressions always match the leftmost, _longest_ sequence of input
-characters that can match. Thus, all four `a' characters are replaced
-with `<A>' in this example:
-
- $ echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
- -| <A>bcd
-
- For simple match/no-match tests, this is not so important. But when
-doing text matching and substitutions with the `match()', `sub()',
-`gsub()', and `gensub()' functions, it is very important. *Note String
-Functions::, for more information on these functions. Understanding
-this principle is also important for regexp-based record and field
-splitting (*note Records::, and also *note Field Separators::).
-
-
-File: gawk.info, Node: Computed Regexps, Next: Regexp Summary, Prev: Leftmost Longest, Up: Regexp
-
-3.8 Using Dynamic Regexps
-=========================
-
-The righthand side of a `~' or `!~' operator need not be a regexp
-constant (i.e., a string of characters between slashes). It may be any
-expression. The expression is evaluated and converted to a string if
-necessary; the contents of the string are then used as the regexp. A
-regexp computed in this way is called a "dynamic regexp" or a "computed
-regexp":
-
- BEGIN { digits_regexp = "[[:digit:]]+" }
- $0 ~ digits_regexp { print }
-
-This sets `digits_regexp' to a regexp that describes one or more digits,
-and tests whether the input record matches this regexp.
-
- NOTE: When using the `~' and `!~' operators, there is a difference
- between a regexp constant enclosed in slashes and a string
- constant enclosed in double quotes. If you are going to use a
- string constant, you have to understand that the string is, in
- essence, scanned _twice_: the first time when `awk' reads your
- program, and the second time when it goes to match the string on
- the lefthand side of the operator with the pattern on the right.
- This is true of any string-valued expression (such as
- `digits_regexp', shown previously), not just string constants.
-
- What difference does it make if the string is scanned twice? The
-answer has to do with escape sequences, and particularly with
-backslashes. To get a backslash into a regular expression inside a
-string, you have to type two backslashes.
-
- For example, `/\*/' is a regexp constant for a literal `*'. Only
-one backslash is needed. To do the same thing with a string, you have
-to type `"\\*"'. The first backslash escapes the second one so that
-the string actually contains the two characters `\' and `*'.
-
- Given that you can use both regexp and string constants to describe
-regular expressions, which should you use? The answer is "regexp
-constants," for several reasons:
-
- * String constants are more complicated to write and more difficult
- to read. Using regexp constants makes your programs less
- error-prone. Not understanding the difference between the two
- kinds of constants is a common source of errors.
-
- * It is more efficient to use regexp constants. `awk' can note that
- you have supplied a regexp and store it internally in a form that
- makes pattern matching more efficient. When using a string
- constant, `awk' must first convert the string into this internal
- form and then perform the pattern matching.
-
- * Using regexp constants is better form; it shows clearly that you
- intend a regexp match.
-
- Using `\n' in Bracket Expressions of Dynamic Regexps
-
- Some versions of `awk' do not allow the newline character to be used
-inside a bracket expression for a dynamic regexp:
-
- $ awk '$0 ~ "[ \t\n]"'
- error--> awk: newline in character class [
- error--> ]...
- error--> source line number 1
- error--> context is
- error--> >>> <<<
-
- But a newline in a regexp constant works with no problem:
-
- $ awk '$0 ~ /[ \t\n]/'
- here is a sample line
- -| here is a sample line
- Ctrl-d
-
- `gawk' does not have this problem, and it isn't likely to occur
-often in practice, but it's worth noting for future reference.
-
-
-File: gawk.info, Node: Regexp Summary, Prev: Computed Regexps, Up: Regexp
+File: gawk.info, Node: Regexp Summary, Prev: Case-sensitivity, Up: Regexp
3.9 Summary
===========
@@ -5388,35 +5393,47 @@ input record and split it up into fields. This is useful if you've
finished processing the current record, but want to do some special
processing on the next record _right now_. For example:
+ # Remove text between /* and */, inclusive
{
- if ((t = index($0, "/*")) != 0) {
- # value of `tmp' will be "" if t is 1
- tmp = substr($0, 1, t - 1)
- u = index(substr($0, t + 2), "*/")
- offset = t + 2
- while (u == 0) {
- if (getline <= 0) {
+ if ((i = index($0, "/*")) != 0) {
+ out = substr($0, 1, i - 1) # leading part of the string
+ rest = substr($0, i + 2) # ... */ ...
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j > 0) {
+ rest = substr(rest, j + 2) # remove comment
+ } else {
+ while (j == 0) {
+ # get more text
+ if (getline <= 0) {
m = "unexpected EOF or error"
m = (m ": " ERRNO)
print m > "/dev/stderr"
exit
- }
- u = index($0, "*/")
- offset = 0
- }
- # substr() expression will be "" if */
- # occurred at end of line
- $0 = tmp substr($0, offset + u + 2)
- }
- print $0
+ }
+ # build up the line using string concatenation
+ rest = rest $0
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j != 0) {
+ rest = substr(rest, j + 2)
+ break
+ }
+ }
+ }
+ # build up the output line using string concatenation
+ $0 = out rest
+ }
+ print $0
}
This `awk' program deletes C-style comments (`/* ... */') from the
-input. By replacing the `print $0' with other statements, you could
-perform more complicated processing on the decommented input, such as
-searching for matches of a regular expression. (This program has a
-subtle problem--it does not work if one comment ends and another begins
-on the same line.)
+input. It uses a number of features we haven't covered yet, including
+string concatenation (*note Concatenation::) and the `index()' and
+`substr()' built-in functions (*note String Functions::). By replacing
+the `print $0' with other statements, you could perform more
+complicated processing on the decommented input, such as searching for
+matches of a regular expression. (This program has a subtle
+problem--it does not work if one comment ends and another begins on the
+same line.)
This form of the `getline' command sets `NF', `NR', `FNR', `RT', and
the value of `$0'.
@@ -5980,8 +5997,8 @@ File: gawk.info, Node: Input Exercises, Prev: Input Summary, Up: Reading File
2. *note Plain Getline::, presented a program to remove C-style
comments (`/* ... */') from the input. That program does not work
if one comment ends on one line and another one starts later on
- the same line. Write a program that does handle multiple comments
- on the line.
+ the same line. That can be fixed by making one simple change.
+ What is it?

@@ -7312,8 +7329,9 @@ File: gawk.info, Node: Regexp Constants, Prev: Nondecimal-numbers, Up: Consta
A regexp constant is a regular expression description enclosed in
slashes, such as `/^beginning and end$/'. Most regexps used in `awk'
programs are constant, but the `~' and `!~' matching operators can also
-match computed or dynamic regexps (which are just ordinary strings or
-variables that contain a regexp).
+match computed or dynamic regexps (which are typically just ordinary
+strings or variables that contain a regexp, but could be a more complex
+expression).

File: gawk.info, Node: Using Constant Regexps, Next: Variables, Prev: Constants, Up: Values
@@ -8463,7 +8481,7 @@ following program is one way to print lines in between special
bracketing lines:
$1 == "START" { interested = ! interested; next }
- interested == 1 { print }
+ interested { print }
$1 == "END" { interested = ! interested; next }
The variable `interested', as with all `awk' variables, starts out
@@ -8473,6 +8491,14 @@ using `!'. The next rule prints lines as long as `interested' is true.
When a line is seen whose first field is `END', `interested' is toggled
back to false.(1)
+ Most commonly, the `!' operator is used in the conditions of `if'
+and `while' statements, where it often makes more sense to phrase the
+logic in the negative:
+
+ if (! SOME CONDITION || SOME OTHER CONDITION) {
+ ... DO WHATEVER PROCESSING ...
+ }
+
NOTE: The `next' statement is discussed in *note Next Statement::.
`next' tells `awk' to skip the rest of the rules, get the next
record, and start processing the rules over again at the top. The
@@ -9841,7 +9867,7 @@ reset to one, and processing starts over with the first rule in the
program. If the `nextfile' statement causes the end of the input to be
reached, then the code in any `END' rules is executed. An exception to
this is when `nextfile' is invoked during execution of any statement in
-an `END' rule; In this case, it causes the program to stop immediately.
+an `END' rule; in this case, it causes the program to stop immediately.
*Note BEGIN/END::.
The `nextfile' statement is useful when there are many data files to
@@ -9851,10 +9877,10 @@ would have to continue scanning the unwanted records. The `nextfile'
statement accomplishes this much more efficiently.
In `gawk', execution of `nextfile' causes additional things to
-happen: any `ENDFILE' rules are executed except in the case as
-mentioned below, `ARGIND' is incremented, and any `BEGINFILE' rules are
-executed. (`ARGIND' hasn't been introduced yet. *Note Built-in
-Variables::.)
+happen: any `ENDFILE' rules are executed if `gawk' is not currently in
+an `END' or `BEGINFILE' rule, `ARGIND' is incremented, and any
+`BEGINFILE' rules are executed. (`ARGIND' hasn't been introduced yet.
+*Note Built-in Variables::.)
With `gawk', `nextfile' is useful inside a `BEGINFILE' rule to skip
over a file that would otherwise cause `gawk' to exit with a fatal
@@ -11280,7 +11306,7 @@ might look like this:
> line 2
> line 3' | awk '{ l[lines] = $0; ++lines }
> END {
- > for (i = lines-1; i >= 0; --i)
+ > for (i = lines - 1; i >= 0; i--)
> print l[i]
> }'
-| line 3
@@ -11301,7 +11327,7 @@ following version of the program works correctly:
{ l[lines++] = $0 }
END {
- for (i = lines - 1; i >= 0; --i)
+ for (i = lines - 1; i >= 0; i--)
print l[i]
}
@@ -14378,8 +14404,9 @@ versions of `awk':
ret = 0
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
}
@@ -14391,6 +14418,8 @@ versions of `awk':
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
c = tolower(c)
+ # index() returns 0 if c not in string,
+ # includes c == "0"
k = index("123456789abcdef", c)
ret = ret * 16 + k
@@ -14851,7 +14880,7 @@ that might be as follows:
This function reads from `file' one record at a time, building up
the full contents of the file in the local variable `contents'. It
-works, but is not necessarily efficient.
+works, but is not necessarily efficient.(1)
The following function, based on a suggestion by Denis Shirokov,
reads the entire contents of the named file in one shot:
@@ -14886,6 +14915,13 @@ string. Thus calling code may use something like:
This tests the result to see if it is empty or not. An equivalent
test would be `contents == ""'.
+ ---------- Footnotes ----------
+
+ (1) Execution time grows quadratically in the size of the input; for
+each record, `awk' has to allocate a bigger internal buffer for
+`contents', copy the old contents into it, and then append the contents
+of the new record.
+

File: gawk.info, Node: Data File Management, Next: Getopt Function, Prev: General Functions, Up: Library Functions
@@ -15339,8 +15375,7 @@ not an option, and it ends option processing. Continuing on:
i = index(options, thisopt)
if (i == 0) {
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) {
Optind++
_opti = 0
@@ -30938,10 +30973,9 @@ Index
* ! (exclamation point), !~ operator <3>: Comparison Operators.
(line 11)
* ! (exclamation point), !~ operator <4>: Regexp Constants. (line 6)
-* ! (exclamation point), !~ operator <5>: Computed Regexps. (line 6)
-* ! (exclamation point), !~ operator <6>: Case-sensitivity. (line 26)
+* ! (exclamation point), !~ operator <5>: Case-sensitivity. (line 26)
+* ! (exclamation point), !~ operator <6>: Computed Regexps. (line 6)
* ! (exclamation point), !~ operator: Regexp Usage. (line 19)
-* " (double quote) in shell commands: Read Terminal. (line 25)
* " (double quote), in regexp constants: Computed Regexps. (line 29)
* " (double quote), in shell commands: Quoting. (line 54)
* # (number sign), #! (executable scripts): Executable Scripts.
@@ -31129,8 +31163,7 @@ Index
* ? (question mark), regexp operator: Regexp Operators. (line 111)
* [] (square brackets), regexp operator: Regexp Operators. (line 56)
* \ (backslash): Comments. (line 50)
-* \ (backslash) in shell commands: Read Terminal. (line 25)
-* \ (backslash), \" escape sequence: Escape Sequences. (line 80)
+* \ (backslash), \" escape sequence: Escape Sequences. (line 82)
* \ (backslash), \' operator (gawk): GNU Regexp Operators.
(line 56)
* \ (backslash), \/ escape sequence: Escape Sequences. (line 73)
@@ -31173,7 +31206,7 @@ Index
* \ (backslash), in bracket expressions: Bracket Expressions. (line 17)
* \ (backslash), in escape sequences: Escape Sequences. (line 6)
* \ (backslash), in escape sequences, POSIX and: Escape Sequences.
- (line 116)
+ (line 118)
* \ (backslash), in regexp constants: Computed Regexps. (line 29)
* \ (backslash), in shell commands: Quoting. (line 48)
* \ (backslash), regexp operator: Regexp Operators. (line 18)
@@ -31400,8 +31433,7 @@ Index
* awkvars.out file: Options. (line 93)
* b debugger command (alias for break): Breakpoint Control. (line 11)
* backslash (\): Comments. (line 50)
-* backslash (\) in shell commands: Read Terminal. (line 25)
-* backslash (\), \" escape sequence: Escape Sequences. (line 80)
+* backslash (\), \" escape sequence: Escape Sequences. (line 82)
* backslash (\), \' operator (gawk): GNU Regexp Operators.
(line 56)
* backslash (\), \/ escape sequence: Escape Sequences. (line 73)
@@ -31444,7 +31476,7 @@ Index
* backslash (\), in bracket expressions: Bracket Expressions. (line 17)
* backslash (\), in escape sequences: Escape Sequences. (line 6)
* backslash (\), in escape sequences, POSIX and: Escape Sequences.
- (line 116)
+ (line 118)
* backslash (\), in regexp constants: Computed Regexps. (line 29)
* backslash (\), in shell commands: Quoting. (line 48)
* backslash (\), regexp operator: Regexp Operators. (line 18)
@@ -31549,7 +31581,7 @@ Index
(line 67)
* Brian Kernighan's awk <12>: GNU Regexp Operators.
(line 83)
-* Brian Kernighan's awk <13>: Escape Sequences. (line 120)
+* Brian Kernighan's awk <13>: Escape Sequences. (line 122)
* Brian Kernighan's awk: When. (line 21)
* Brian Kernighan's awk, extensions: BTL. (line 6)
* Brian Kernighan's awk, source code: Other Versions. (line 13)
@@ -31577,6 +31609,7 @@ Index
* built-in variables, conveying information: Auto-set. (line 6)
* built-in variables, user-modifiable: User-modified. (line 6)
* Busybox Awk: Other Versions. (line 88)
+* c.e., See common extensions: Conventions. (line 51)
* call by reference: Pass By Value/Reference.
(line 47)
* call by value: Pass By Value/Reference.
@@ -31775,9 +31808,9 @@ Index
* dark corner, command-line arguments: Assignment Options. (line 43)
* dark corner, continue statement: Continue Statement. (line 44)
* dark corner, CONVFMT variable: Strings And Numbers. (line 40)
-* dark corner, escape sequences: Other Arguments. (line 31)
+* dark corner, escape sequences: Other Arguments. (line 35)
* dark corner, escape sequences, for metacharacters: Escape Sequences.
- (line 138)
+ (line 140)
* dark corner, exit statement: Exit Statement. (line 30)
* dark corner, field separators: Field Splitting Summary.
(line 46)
@@ -32042,7 +32075,6 @@ Index
* dollar sign ($), incrementing fields and arrays: Increment Ops.
(line 30)
* dollar sign ($), regexp operator: Regexp Operators. (line 35)
-* double quote (") in shell commands: Read Terminal. (line 25)
* double quote ("), in regexp constants: Computed Regexps. (line 29)
* double quote ("), in shell commands: Quoting. (line 54)
* down debugger command: Execution Stack. (line 21)
@@ -32142,8 +32174,8 @@ Index
* exclamation point (!), !~ operator <3>: Comparison Operators.
(line 11)
* exclamation point (!), !~ operator <4>: Regexp Constants. (line 6)
-* exclamation point (!), !~ operator <5>: Computed Regexps. (line 6)
-* exclamation point (!), !~ operator <6>: Case-sensitivity. (line 26)
+* exclamation point (!), !~ operator <5>: Case-sensitivity. (line 26)
+* exclamation point (!), !~ operator <6>: Computed Regexps. (line 6)
* exclamation point (!), !~ operator: Regexp Usage. (line 19)
* exit statement: Exit Statement. (line 6)
* exit status, of gawk: Exit Status. (line 6)
@@ -32151,7 +32183,7 @@ Index
* exit the debugger: Miscellaneous Debugger Commands.
(line 99)
* exp: Numeric Functions. (line 18)
-* expand utility: Very Simple. (line 69)
+* expand utility: Very Simple. (line 72)
* Expat XML parser library: gawkextlib. (line 35)
* exponent: Numeric Functions. (line 18)
* expressions: Expressions. (line 6)
@@ -32285,7 +32317,7 @@ Index
(line 47)
* files, message object, specifying directory of: Explaining gettext.
(line 54)
-* files, multiple passes over: Other Arguments. (line 49)
+* files, multiple passes over: Other Arguments. (line 53)
* files, multiple, duplicating output into: Tee Program. (line 6)
* files, output, See output files: Close Files And Pipes.
(line 6)
@@ -32446,7 +32478,7 @@ Index
* gawk, ERRNO variable in <4>: Close Files And Pipes.
(line 139)
* gawk, ERRNO variable in: Getline. (line 19)
-* gawk, escape sequences: Escape Sequences. (line 128)
+* gawk, escape sequences: Escape Sequences. (line 130)
* gawk, extensions, disabling: Options. (line 254)
* gawk, features, adding: Adding Code. (line 6)
* gawk, features, advanced: Advanced Features. (line 6)
@@ -32670,7 +32702,7 @@ Index
* input files, examples: Sample Data Files. (line 6)
* input files, reading: Reading Files. (line 6)
* input files, running awk without: Read Terminal. (line 6)
-* input files, variable assignments and: Other Arguments. (line 19)
+* input files, variable assignments and: Other Arguments. (line 23)
* input pipeline: Getline/Pipe. (line 9)
* input record, length of: String Functions. (line 174)
* input redirection: Getline/File. (line 6)
@@ -32879,7 +32911,7 @@ Index
* mawk utility <2>: Nextfile Statement. (line 47)
* mawk utility <3>: Concatenation. (line 36)
* mawk utility <4>: Getline/Pipe. (line 62)
-* mawk utility: Escape Sequences. (line 128)
+* mawk utility: Escape Sequences. (line 130)
* maximum precision supported by MPFR library: Auto-set. (line 213)
* McIlroy, Doug: Glossary. (line 149)
* McPhee, Patrick: Contributors. (line 100)
@@ -32892,7 +32924,7 @@ Index
(line 54)
* messages from extensions: Printing Messages. (line 6)
* metacharacters in regular expressions: Regexp Operators. (line 6)
-* metacharacters, escape sequences for: Escape Sequences. (line 134)
+* metacharacters, escape sequences for: Escape Sequences. (line 136)
* minimum precision supported by MPFR library: Auto-set. (line 216)
* mktime: Time Functions. (line 25)
* modifiers, in format specifiers: Format Modifiers. (line 6)
@@ -32930,7 +32962,7 @@ Index
(line 43)
* next file statement: Feature History. (line 169)
* next statement <1>: Next Statement. (line 6)
-* next statement: Boolean Ops. (line 85)
+* next statement: Boolean Ops. (line 93)
* next statement, BEGIN/END patterns and: I/O And BEGIN/END. (line 36)
* next statement, BEGINFILE/ENDFILE patterns and: BEGINFILE/ENDFILE.
(line 49)
@@ -33111,14 +33143,14 @@ Index
* plus sign (+), += operator: Assignment Ops. (line 82)
* plus sign (+), regexp operator: Regexp Operators. (line 105)
* pointers to functions: Indirect Calls. (line 6)
-* portability: Escape Sequences. (line 98)
+* portability: Escape Sequences. (line 100)
* portability, #! (executable scripts): Executable Scripts. (line 33)
* portability, ** operator and: Arithmetic Ops. (line 81)
* portability, **= operator and: Assignment Ops. (line 143)
* portability, ARGV variable: Executable Scripts. (line 59)
* portability, backslash continuation and: Statements/Lines. (line 30)
* portability, backslash in escape sequences: Escape Sequences.
- (line 116)
+ (line 118)
* portability, close() function and: Close Files And Pipes.
(line 81)
* portability, data files as single record: gawk split records.
@@ -33157,7 +33189,7 @@ Index
* POSIX awk, < operator and: Getline/File. (line 26)
* POSIX awk, arithmetic operators and: Arithmetic Ops. (line 30)
* POSIX awk, backslashes in string constants: Escape Sequences.
- (line 116)
+ (line 118)
* POSIX awk, BEGIN/END patterns: I/O And BEGIN/END. (line 16)
* POSIX awk, bracket expressions and: Bracket Expressions. (line 26)
* POSIX awk, bracket expressions and, character classes: Bracket Expressions.
@@ -33501,7 +33533,6 @@ Index
* set watchpoint: Viewing And Changing Data.
(line 67)
* shadowing of variable values: Definition Syntax. (line 70)
-* shell quoting, double quote: Read Terminal. (line 25)
* shell quoting, rules for: Quoting. (line 6)
* shells, piping commands into: Redirection. (line 142)
* shells, quoting: Using Shell Variables.
@@ -33538,14 +33569,14 @@ Index
* sidebar, A Constant's Base Does Not Affect Its Value: Nondecimal-numbers.
(line 64)
* sidebar, Backslash Before Regular Characters: Escape Sequences.
- (line 114)
+ (line 116)
* sidebar, Changing FS Does Not Affect the Fields: Field Splitting Summary.
(line 38)
* sidebar, Changing NR and FNR: Auto-set. (line 299)
* sidebar, Controlling Output Buffering with system(): I/O Functions.
(line 138)
* sidebar, Escape Sequences for Metacharacters: Escape Sequences.
- (line 132)
+ (line 134)
* sidebar, FS and IGNORECASE: Field Splitting Summary.
(line 64)
* sidebar, Interactive Versus Noninteractive Buffering: I/O Functions.
@@ -33745,8 +33776,8 @@ Index
* tilde (~), ~ operator <3>: Comparison Operators.
(line 11)
* tilde (~), ~ operator <4>: Regexp Constants. (line 6)
-* tilde (~), ~ operator <5>: Computed Regexps. (line 6)
-* tilde (~), ~ operator <6>: Case-sensitivity. (line 26)
+* tilde (~), ~ operator <5>: Case-sensitivity. (line 26)
+* tilde (~), ~ operator <6>: Computed Regexps. (line 6)
* tilde (~), ~ operator: Regexp Usage. (line 19)
* time functions: Time Functions. (line 6)
* time, alarm clock example program: Alarm Program. (line 11)
@@ -33773,7 +33804,7 @@ Index
(line 37)
* troubleshooting, awk uses FS not IFS: Field Separators. (line 30)
* troubleshooting, backslash before nonspecial character: Escape Sequences.
- (line 116)
+ (line 118)
* troubleshooting, division: Arithmetic Ops. (line 44)
* troubleshooting, fatal errors, field widths, specifying: Constant Size.
(line 23)
@@ -33829,7 +33860,7 @@ Index
* uniq.awk program: Uniq Program. (line 65)
* Unix: Glossary. (line 611)
* Unix awk, backslashes in escape sequences: Escape Sequences.
- (line 128)
+ (line 130)
* Unix awk, close() function and: Close Files And Pipes.
(line 131)
* Unix awk, password files, field separators and: Command Line Field Separator.
@@ -33852,7 +33883,7 @@ Index
* USR1 signal, for dynamic profiling: Profiling. (line 188)
* values, numeric: Basic Data Typing. (line 13)
* values, string: Basic Data Typing. (line 13)
-* variable assignments and input files: Other Arguments. (line 19)
+* variable assignments and input files: Other Arguments. (line 23)
* variable typing: Typing and Comparison.
(line 9)
* variables <1>: Basic Data Typing. (line 6)
@@ -33966,8 +33997,8 @@ Index
* ~ (tilde), ~ operator <3>: Comparison Operators.
(line 11)
* ~ (tilde), ~ operator <4>: Regexp Constants. (line 6)
-* ~ (tilde), ~ operator <5>: Computed Regexps. (line 6)
-* ~ (tilde), ~ operator <6>: Case-sensitivity. (line 26)
+* ~ (tilde), ~ operator <5>: Case-sensitivity. (line 26)
+* ~ (tilde), ~ operator <6>: Computed Regexps. (line 6)
* ~ (tilde), ~ operator: Regexp Usage. (line 19)
@@ -33993,533 +34024,533 @@ Node: Getting Started70581
Node: Running gawk73015
Node: One-shot74205
Node: Read Terminal75430
-Ref: Read Terminal-Footnote-177393
-Node: Long77564
-Node: Executable Scripts78958
-Ref: Executable Scripts-Footnote-181759
-Node: Comments81861
-Node: Quoting84334
-Node: DOS Quoting89647
-Node: Sample Data Files90322
-Node: Very Simple92929
-Node: Two Rules97688
-Node: More Complex99582
-Ref: More Complex-Footnote-1102496
-Node: Statements/Lines102581
-Ref: Statements/Lines-Footnote-1107037
-Node: Other Features107302
-Node: When108230
-Ref: When-Footnote-1109986
-Node: Intro Summary110051
-Node: Invoking Gawk110934
-Node: Command Line112449
-Node: Options113240
-Ref: Options-Footnote-1129016
-Node: Other Arguments129041
-Node: Naming Standard Input131703
-Node: Environment Variables132796
-Node: AWKPATH Variable133354
-Ref: AWKPATH Variable-Footnote-1136220
-Ref: AWKPATH Variable-Footnote-2136265
-Node: AWKLIBPATH Variable136525
-Node: Other Environment Variables137284
-Node: Exit Status140941
-Node: Include Files141616
-Node: Loading Shared Libraries145194
-Node: Obsolete146578
-Node: Undocumented147275
-Node: Invoking Summary147542
-Node: Regexp149142
-Node: Regexp Usage150601
-Node: Escape Sequences152634
-Node: Regexp Operators158451
-Ref: Regexp Operators-Footnote-1165882
-Ref: Regexp Operators-Footnote-2166029
-Node: Bracket Expressions166127
-Ref: table-char-classes168149
-Node: GNU Regexp Operators171089
-Node: Case-sensitivity174798
-Ref: Case-sensitivity-Footnote-1177690
-Ref: Case-sensitivity-Footnote-2177925
-Node: Leftmost Longest178033
-Node: Computed Regexps179234
-Node: Regexp Summary182606
-Node: Reading Files184075
-Node: Records186167
-Node: awk split records186889
-Node: gawk split records191747
-Ref: gawk split records-Footnote-1196268
-Node: Fields196305
-Ref: Fields-Footnote-1199269
-Node: Nonconstant Fields199355
-Ref: Nonconstant Fields-Footnote-1201585
-Node: Changing Fields201787
-Node: Field Separators207741
-Node: Default Field Splitting210443
-Node: Regexp Field Splitting211560
-Node: Single Character Fields214887
-Node: Command Line Field Separator215946
-Node: Full Line Fields219372
-Ref: Full Line Fields-Footnote-1219880
-Node: Field Splitting Summary219926
-Ref: Field Splitting Summary-Footnote-1223058
-Node: Constant Size223159
-Node: Splitting By Content227765
-Ref: Splitting By Content-Footnote-1231838
-Node: Multiple Line231878
-Ref: Multiple Line-Footnote-1237734
-Node: Getline237913
-Node: Plain Getline240124
-Node: Getline/Variable242219
-Node: Getline/File243366
-Node: Getline/Variable/File244750
-Ref: Getline/Variable/File-Footnote-1246349
-Node: Getline/Pipe246436
-Node: Getline/Variable/Pipe249122
-Node: Getline/Coprocess250229
-Node: Getline/Variable/Coprocess251481
-Node: Getline Notes252218
-Node: Getline Summary255022
-Ref: table-getline-variants255430
-Node: Read Timeout256342
-Ref: Read Timeout-Footnote-1260169
-Node: Command-line directories260227
-Node: Input Summary261131
-Node: Input Exercises264268
-Node: Printing265001
-Node: Print266723
-Node: Print Examples268216
-Node: Output Separators270995
-Node: OFMT273011
-Node: Printf274369
-Node: Basic Printf275275
-Node: Control Letters276814
-Node: Format Modifiers280805
-Node: Printf Examples286832
-Node: Redirection289296
-Node: Special Files296268
-Node: Special FD296801
-Ref: Special FD-Footnote-1300398
-Node: Special Network300472
-Node: Special Caveats301322
-Node: Close Files And Pipes302118
-Ref: Close Files And Pipes-Footnote-1309279
-Ref: Close Files And Pipes-Footnote-2309427
-Node: Output Summary309577
-Node: Output Exercises310574
-Node: Expressions311254
-Node: Values312439
-Node: Constants313115
-Node: Scalar Constants313795
-Ref: Scalar Constants-Footnote-1314654
-Node: Nondecimal-numbers314904
-Node: Regexp Constants317904
-Node: Using Constant Regexps318379
-Node: Variables321451
-Node: Using Variables322106
-Node: Assignment Options323830
-Node: Conversion325705
-Node: Strings And Numbers326229
-Ref: Strings And Numbers-Footnote-1329291
-Node: Locale influences conversions329400
-Ref: table-locale-affects332117
-Node: All Operators332705
-Node: Arithmetic Ops333335
-Node: Concatenation335840
-Ref: Concatenation-Footnote-1338659
-Node: Assignment Ops338765
-Ref: table-assign-ops343748
-Node: Increment Ops345051
-Node: Truth Values and Conditions348489
-Node: Truth Values349572
-Node: Typing and Comparison350621
-Node: Variable Typing351414
-Node: Comparison Operators355066
-Ref: table-relational-ops355476
-Node: POSIX String Comparison359026
-Ref: POSIX String Comparison-Footnote-1360110
-Node: Boolean Ops360248
-Ref: Boolean Ops-Footnote-1364323
-Node: Conditional Exp364414
-Node: Function Calls366141
-Node: Precedence370021
-Node: Locales373690
-Node: Expressions Summary375321
-Node: Patterns and Actions377862
-Node: Pattern Overview378978
-Node: Regexp Patterns380655
-Node: Expression Patterns381198
-Node: Ranges384978
-Node: BEGIN/END388084
-Node: Using BEGIN/END388846
-Ref: Using BEGIN/END-Footnote-1391582
-Node: I/O And BEGIN/END391688
-Node: BEGINFILE/ENDFILE393959
-Node: Empty396890
-Node: Using Shell Variables397207
-Node: Action Overview399490
-Node: Statements401817
-Node: If Statement403665
-Node: While Statement405163
-Node: Do Statement407207
-Node: For Statement408363
-Node: Switch Statement411515
-Node: Break Statement413903
-Node: Continue Statement415944
-Node: Next Statement417769
-Node: Nextfile Statement420159
-Node: Exit Statement422795
-Node: Built-in Variables425199
-Node: User-modified426326
-Ref: User-modified-Footnote-1434015
-Node: Auto-set434077
-Ref: Auto-set-Footnote-1446659
-Ref: Auto-set-Footnote-2446864
-Node: ARGC and ARGV446920
-Node: Pattern Action Summary450824
-Node: Arrays453047
-Node: Array Basics454596
-Node: Array Intro455422
-Ref: figure-array-elements457395
-Ref: Array Intro-Footnote-1459919
-Node: Reference to Elements460047
-Node: Assigning Elements462497
-Node: Array Example462988
-Node: Scanning an Array464720
-Node: Controlling Scanning467721
-Ref: Controlling Scanning-Footnote-1472894
-Node: Delete473210
-Ref: Delete-Footnote-1475961
-Node: Numeric Array Subscripts476018
-Node: Uninitialized Subscripts478201
-Node: Multidimensional479826
-Node: Multiscanning482939
-Node: Arrays of Arrays484528
-Node: Arrays Summary489191
-Node: Functions491296
-Node: Built-in492169
-Node: Calling Built-in493247
-Node: Numeric Functions495235
-Ref: Numeric Functions-Footnote-1499269
-Ref: Numeric Functions-Footnote-2499626
-Ref: Numeric Functions-Footnote-3499674
-Node: String Functions499943
-Ref: String Functions-Footnote-1522940
-Ref: String Functions-Footnote-2523069
-Ref: String Functions-Footnote-3523317
-Node: Gory Details523404
-Ref: table-sub-escapes525177
-Ref: table-sub-proposed526697
-Ref: table-posix-sub528061
-Ref: table-gensub-escapes529601
-Ref: Gory Details-Footnote-1530777
-Node: I/O Functions530928
-Ref: I/O Functions-Footnote-1538038
-Node: Time Functions538185
-Ref: Time Functions-Footnote-1548649
-Ref: Time Functions-Footnote-2548717
-Ref: Time Functions-Footnote-3548875
-Ref: Time Functions-Footnote-4548986
-Ref: Time Functions-Footnote-5549098
-Ref: Time Functions-Footnote-6549325
-Node: Bitwise Functions549591
-Ref: table-bitwise-ops550153
-Ref: Bitwise Functions-Footnote-1554398
-Node: Type Functions554582
-Node: I18N Functions555724
-Node: User-defined557369
-Node: Definition Syntax558173
-Ref: Definition Syntax-Footnote-1563486
-Node: Function Example563555
-Ref: Function Example-Footnote-1566195
-Node: Function Caveats566217
-Node: Calling A Function566735
-Node: Variable Scope567690
-Node: Pass By Value/Reference570678
-Node: Return Statement574188
-Node: Dynamic Typing577172
-Node: Indirect Calls578101
-Node: Functions Summary587814
-Node: Library Functions590353
-Ref: Library Functions-Footnote-1593971
-Ref: Library Functions-Footnote-2594114
-Node: Library Names594285
-Ref: Library Names-Footnote-1597758
-Ref: Library Names-Footnote-2597978
-Node: General Functions598064
-Node: Strtonum Function599092
-Node: Assert Function601872
-Node: Round Function605198
-Node: Cliff Random Function606739
-Node: Ordinal Functions607755
-Ref: Ordinal Functions-Footnote-1610820
-Ref: Ordinal Functions-Footnote-2611072
-Node: Join Function611283
-Ref: Join Function-Footnote-1613054
-Node: Getlocaltime Function613254
-Node: Readfile Function616990
-Node: Data File Management618829
-Node: Filetrans Function619461
-Node: Rewind Function623530
-Node: File Checking625088
-Ref: File Checking-Footnote-1626220
-Node: Empty Files626421
-Node: Ignoring Assigns628400
-Node: Getopt Function629954
-Ref: Getopt Function-Footnote-1641257
-Node: Passwd Functions641460
-Ref: Passwd Functions-Footnote-1650439
-Node: Group Functions650527
-Ref: Group Functions-Footnote-1658458
-Node: Walking Arrays658671
-Node: Library Functions Summary660274
-Node: Library Exercises661662
-Node: Sample Programs662942
-Node: Running Examples663712
-Node: Clones664440
-Node: Cut Program665664
-Node: Egrep Program675522
-Ref: Egrep Program-Footnote-1683109
-Node: Id Program683219
-Node: Split Program686873
-Ref: Split Program-Footnote-1690411
-Node: Tee Program690539
-Node: Uniq Program693326
-Node: Wc Program700747
-Ref: Wc Program-Footnote-1705012
-Node: Miscellaneous Programs705104
-Node: Dupword Program706317
-Node: Alarm Program708348
-Node: Translate Program713152
-Ref: Translate Program-Footnote-1717543
-Ref: Translate Program-Footnote-2717813
-Node: Labels Program717947
-Ref: Labels Program-Footnote-1721308
-Node: Word Sorting721392
-Node: History Sorting725435
-Node: Extract Program727271
-Node: Simple Sed734807
-Node: Igawk Program737869
-Ref: Igawk Program-Footnote-1752173
-Ref: Igawk Program-Footnote-2752374
-Node: Anagram Program752512
-Node: Signature Program755580
-Node: Programs Summary756827
-Node: Programs Exercises758042
-Node: Advanced Features761693
-Node: Nondecimal Data763641
-Node: Array Sorting765218
-Node: Controlling Array Traversal765915
-Node: Array Sorting Functions774195
-Ref: Array Sorting Functions-Footnote-1778102
-Node: Two-way I/O778296
-Ref: Two-way I/O-Footnote-1783240
-Ref: Two-way I/O-Footnote-2783419
-Node: TCP/IP Networking783501
-Node: Profiling786346
-Node: Advanced Features Summary793888
-Node: Internationalization795752
-Node: I18N and L10N797232
-Node: Explaining gettext797918
-Ref: Explaining gettext-Footnote-1802944
-Ref: Explaining gettext-Footnote-2803128
-Node: Programmer i18n803293
-Ref: Programmer i18n-Footnote-1808087
-Node: Translator i18n808136
-Node: String Extraction808930
-Ref: String Extraction-Footnote-1810063
-Node: Printf Ordering810149
-Ref: Printf Ordering-Footnote-1812931
-Node: I18N Portability812995
-Ref: I18N Portability-Footnote-1815444
-Node: I18N Example815507
-Ref: I18N Example-Footnote-1818213
-Node: Gawk I18N818285
-Node: I18N Summary818923
-Node: Debugger820262
-Node: Debugging821284
-Node: Debugging Concepts821725
-Node: Debugging Terms823581
-Node: Awk Debugging826178
-Node: Sample Debugging Session827070
-Node: Debugger Invocation827590
-Node: Finding The Bug828923
-Node: List of Debugger Commands835405
-Node: Breakpoint Control836737
-Node: Debugger Execution Control840401
-Node: Viewing And Changing Data843761
-Node: Execution Stack847119
-Node: Debugger Info848632
-Node: Miscellaneous Debugger Commands852626
-Node: Readline Support857810
-Node: Limitations858702
-Node: Debugging Summary860976
-Node: Arbitrary Precision Arithmetic862144
-Node: Computer Arithmetic863631
-Ref: Computer Arithmetic-Footnote-1868018
-Node: Math Definitions868075
-Ref: table-ieee-formats871364
-Ref: Math Definitions-Footnote-1871904
-Node: MPFR features872007
-Node: FP Math Caution873624
-Ref: FP Math Caution-Footnote-1874674
-Node: Inexactness of computations875043
-Node: Inexact representation875991
-Node: Comparing FP Values877346
-Node: Errors accumulate878310
-Node: Getting Accuracy879743
-Node: Try To Round882402
-Node: Setting precision883301
-Ref: table-predefined-precision-strings883983
-Node: Setting the rounding mode885776
-Ref: table-gawk-rounding-modes886140
-Ref: Setting the rounding mode-Footnote-1889594
-Node: Arbitrary Precision Integers889773
-Ref: Arbitrary Precision Integers-Footnote-1892754
-Node: POSIX Floating Point Problems892903
-Ref: POSIX Floating Point Problems-Footnote-1896779
-Node: Floating point summary896817
-Node: Dynamic Extensions899021
-Node: Extension Intro900573
-Node: Plugin License901838
-Node: Extension Mechanism Outline902523
-Ref: figure-load-extension902947
-Ref: figure-load-new-function904432
-Ref: figure-call-new-function905434
-Node: Extension API Description907418
-Node: Extension API Functions Introduction908868
-Node: General Data Types913735
-Ref: General Data Types-Footnote-1919428
-Node: Requesting Values919727
-Ref: table-value-types-returned920464
-Node: Memory Allocation Functions921422
-Ref: Memory Allocation Functions-Footnote-1924169
-Node: Constructor Functions924265
-Node: Registration Functions926023
-Node: Extension Functions926708
-Node: Exit Callback Functions929010
-Node: Extension Version String930258
-Node: Input Parsers930908
-Node: Output Wrappers940722
-Node: Two-way processors945238
-Node: Printing Messages947442
-Ref: Printing Messages-Footnote-1948519
-Node: Updating `ERRNO'948671
-Node: Accessing Parameters949410
-Node: Symbol Table Access950640
-Node: Symbol table by name951154
-Node: Symbol table by cookie953130
-Ref: Symbol table by cookie-Footnote-1957263
-Node: Cached values957326
-Ref: Cached values-Footnote-1960830
-Node: Array Manipulation960921
-Ref: Array Manipulation-Footnote-1962019
-Node: Array Data Types962058
-Ref: Array Data Types-Footnote-1964761
-Node: Array Functions964853
-Node: Flattening Arrays968727
-Node: Creating Arrays975579
-Node: Extension API Variables980310
-Node: Extension Versioning980946
-Node: Extension API Informational Variables982847
-Node: Extension API Boilerplate983933
-Node: Finding Extensions987737
-Node: Extension Example988297
-Node: Internal File Description989027
-Node: Internal File Ops993118
-Ref: Internal File Ops-Footnote-11004550
-Node: Using Internal File Ops1004690
-Ref: Using Internal File Ops-Footnote-11007037
-Node: Extension Samples1007305
-Node: Extension Sample File Functions1008829
-Node: Extension Sample Fnmatch1016397
-Node: Extension Sample Fork1017879
-Node: Extension Sample Inplace1019092
-Node: Extension Sample Ord1020767
-Node: Extension Sample Readdir1021603
-Ref: table-readdir-file-types1022459
-Node: Extension Sample Revout1023258
-Node: Extension Sample Rev2way1023849
-Node: Extension Sample Read write array1024590
-Node: Extension Sample Readfile1026469
-Node: Extension Sample API Tests1027569
-Node: Extension Sample Time1028094
-Node: gawkextlib1029409
-Node: Extension summary1032222
-Node: Extension Exercises1035915
-Node: Language History1036637
-Node: V7/SVR3.11038280
-Node: SVR41040600
-Node: POSIX1042042
-Node: BTL1043428
-Node: POSIX/GNU1044162
-Node: Feature History1049878
-Node: Common Extensions1062969
-Node: Ranges and Locales1064281
-Ref: Ranges and Locales-Footnote-11068898
-Ref: Ranges and Locales-Footnote-21068925
-Ref: Ranges and Locales-Footnote-31069159
-Node: Contributors1069380
-Node: History summary1074805
-Node: Installation1076174
-Node: Gawk Distribution1077125
-Node: Getting1077609
-Node: Extracting1078433
-Node: Distribution contents1080075
-Node: Unix Installation1085792
-Node: Quick Installation1086409
-Node: Additional Configuration Options1088851
-Node: Configuration Philosophy1090589
-Node: Non-Unix Installation1092940
-Node: PC Installation1093398
-Node: PC Binary Installation1094709
-Node: PC Compiling1096557
-Ref: PC Compiling-Footnote-11099556
-Node: PC Testing1099661
-Node: PC Using1100837
-Node: Cygwin1104989
-Node: MSYS1105798
-Node: VMS Installation1106312
-Node: VMS Compilation1107108
-Ref: VMS Compilation-Footnote-11108330
-Node: VMS Dynamic Extensions1108388
-Node: VMS Installation Details1109761
-Node: VMS Running1112013
-Node: VMS GNV1114847
-Node: VMS Old Gawk1115570
-Node: Bugs1116040
-Node: Other Versions1120044
-Node: Installation summary1126271
-Node: Notes1127327
-Node: Compatibility Mode1128192
-Node: Additions1128974
-Node: Accessing The Source1129899
-Node: Adding Code1131335
-Node: New Ports1137513
-Node: Derived Files1141994
-Ref: Derived Files-Footnote-11147075
-Ref: Derived Files-Footnote-21147109
-Ref: Derived Files-Footnote-31147705
-Node: Future Extensions1147819
-Node: Implementation Limitations1148425
-Node: Extension Design1149673
-Node: Old Extension Problems1150827
-Ref: Old Extension Problems-Footnote-11152344
-Node: Extension New Mechanism Goals1152401
-Ref: Extension New Mechanism Goals-Footnote-11155761
-Node: Extension Other Design Decisions1155950
-Node: Extension Future Growth1158056
-Node: Old Extension Mechanism1158892
-Node: Notes summary1160654
-Node: Basic Concepts1161840
-Node: Basic High Level1162521
-Ref: figure-general-flow1162793
-Ref: figure-process-flow1163392
-Ref: Basic High Level-Footnote-11166621
-Node: Basic Data Typing1166806
-Node: Glossary1170134
-Node: Copying1195286
-Node: GNU Free Documentation License1232842
-Node: Index1257978
+Node: Long77455
+Node: Executable Scripts78849
+Ref: Executable Scripts-Footnote-181650
+Node: Comments81752
+Node: Quoting84225
+Node: DOS Quoting89538
+Node: Sample Data Files90213
+Node: Very Simple92820
+Node: Two Rules97705
+Node: More Complex99599
+Ref: More Complex-Footnote-1102513
+Node: Statements/Lines102598
+Ref: Statements/Lines-Footnote-1107054
+Node: Other Features107319
+Node: When108247
+Ref: When-Footnote-1110003
+Node: Intro Summary110068
+Node: Invoking Gawk110951
+Node: Command Line112466
+Node: Options113257
+Ref: Options-Footnote-1129033
+Node: Other Arguments129058
+Node: Naming Standard Input131886
+Node: Environment Variables132979
+Node: AWKPATH Variable133537
+Ref: AWKPATH Variable-Footnote-1136403
+Ref: AWKPATH Variable-Footnote-2136448
+Node: AWKLIBPATH Variable136708
+Node: Other Environment Variables137467
+Node: Exit Status141124
+Node: Include Files141799
+Node: Loading Shared Libraries145377
+Node: Obsolete146761
+Node: Undocumented147458
+Node: Invoking Summary147725
+Node: Regexp149325
+Node: Regexp Usage150784
+Node: Escape Sequences152817
+Node: Regexp Operators158805
+Ref: Regexp Operators-Footnote-1166236
+Ref: Regexp Operators-Footnote-2166383
+Node: Bracket Expressions166481
+Ref: table-char-classes168499
+Node: Leftmost Longest171439
+Node: Computed Regexps172643
+Node: GNU Regexp Operators176021
+Node: Case-sensitivity179727
+Ref: Case-sensitivity-Footnote-1182617
+Ref: Case-sensitivity-Footnote-2182852
+Node: Regexp Summary182960
+Node: Reading Files184429
+Node: Records186521
+Node: awk split records187243
+Node: gawk split records192101
+Ref: gawk split records-Footnote-1196622
+Node: Fields196659
+Ref: Fields-Footnote-1199623
+Node: Nonconstant Fields199709
+Ref: Nonconstant Fields-Footnote-1201939
+Node: Changing Fields202141
+Node: Field Separators208095
+Node: Default Field Splitting210797
+Node: Regexp Field Splitting211914
+Node: Single Character Fields215241
+Node: Command Line Field Separator216300
+Node: Full Line Fields219726
+Ref: Full Line Fields-Footnote-1220234
+Node: Field Splitting Summary220280
+Ref: Field Splitting Summary-Footnote-1223412
+Node: Constant Size223513
+Node: Splitting By Content228119
+Ref: Splitting By Content-Footnote-1232192
+Node: Multiple Line232232
+Ref: Multiple Line-Footnote-1238088
+Node: Getline238267
+Node: Plain Getline240478
+Node: Getline/Variable243184
+Node: Getline/File244331
+Node: Getline/Variable/File245715
+Ref: Getline/Variable/File-Footnote-1247314
+Node: Getline/Pipe247401
+Node: Getline/Variable/Pipe250087
+Node: Getline/Coprocess251194
+Node: Getline/Variable/Coprocess252446
+Node: Getline Notes253183
+Node: Getline Summary255987
+Ref: table-getline-variants256395
+Node: Read Timeout257307
+Ref: Read Timeout-Footnote-1261134
+Node: Command-line directories261192
+Node: Input Summary262096
+Node: Input Exercises265233
+Node: Printing265961
+Node: Print267683
+Node: Print Examples269176
+Node: Output Separators271955
+Node: OFMT273971
+Node: Printf275329
+Node: Basic Printf276235
+Node: Control Letters277774
+Node: Format Modifiers281765
+Node: Printf Examples287792
+Node: Redirection290256
+Node: Special Files297228
+Node: Special FD297761
+Ref: Special FD-Footnote-1301358
+Node: Special Network301432
+Node: Special Caveats302282
+Node: Close Files And Pipes303078
+Ref: Close Files And Pipes-Footnote-1310239
+Ref: Close Files And Pipes-Footnote-2310387
+Node: Output Summary310537
+Node: Output Exercises311534
+Node: Expressions312214
+Node: Values313399
+Node: Constants314075
+Node: Scalar Constants314755
+Ref: Scalar Constants-Footnote-1315614
+Node: Nondecimal-numbers315864
+Node: Regexp Constants318864
+Node: Using Constant Regexps319389
+Node: Variables322461
+Node: Using Variables323116
+Node: Assignment Options324840
+Node: Conversion326715
+Node: Strings And Numbers327239
+Ref: Strings And Numbers-Footnote-1330301
+Node: Locale influences conversions330410
+Ref: table-locale-affects333127
+Node: All Operators333715
+Node: Arithmetic Ops334345
+Node: Concatenation336850
+Ref: Concatenation-Footnote-1339669
+Node: Assignment Ops339775
+Ref: table-assign-ops344758
+Node: Increment Ops346061
+Node: Truth Values and Conditions349499
+Node: Truth Values350582
+Node: Typing and Comparison351631
+Node: Variable Typing352424
+Node: Comparison Operators356076
+Ref: table-relational-ops356486
+Node: POSIX String Comparison360036
+Ref: POSIX String Comparison-Footnote-1361120
+Node: Boolean Ops361258
+Ref: Boolean Ops-Footnote-1365597
+Node: Conditional Exp365688
+Node: Function Calls367415
+Node: Precedence371295
+Node: Locales374964
+Node: Expressions Summary376595
+Node: Patterns and Actions379136
+Node: Pattern Overview380252
+Node: Regexp Patterns381929
+Node: Expression Patterns382472
+Node: Ranges386252
+Node: BEGIN/END389358
+Node: Using BEGIN/END390120
+Ref: Using BEGIN/END-Footnote-1392856
+Node: I/O And BEGIN/END392962
+Node: BEGINFILE/ENDFILE395233
+Node: Empty398164
+Node: Using Shell Variables398481
+Node: Action Overview400764
+Node: Statements403091
+Node: If Statement404939
+Node: While Statement406437
+Node: Do Statement408481
+Node: For Statement409637
+Node: Switch Statement412789
+Node: Break Statement415177
+Node: Continue Statement417218
+Node: Next Statement419043
+Node: Nextfile Statement421433
+Node: Exit Statement424090
+Node: Built-in Variables426494
+Node: User-modified427621
+Ref: User-modified-Footnote-1435310
+Node: Auto-set435372
+Ref: Auto-set-Footnote-1447954
+Ref: Auto-set-Footnote-2448159
+Node: ARGC and ARGV448215
+Node: Pattern Action Summary452119
+Node: Arrays454342
+Node: Array Basics455891
+Node: Array Intro456717
+Ref: figure-array-elements458690
+Ref: Array Intro-Footnote-1461214
+Node: Reference to Elements461342
+Node: Assigning Elements463792
+Node: Array Example464283
+Node: Scanning an Array466015
+Node: Controlling Scanning469016
+Ref: Controlling Scanning-Footnote-1474189
+Node: Delete474505
+Ref: Delete-Footnote-1477256
+Node: Numeric Array Subscripts477313
+Node: Uninitialized Subscripts479496
+Node: Multidimensional481123
+Node: Multiscanning484236
+Node: Arrays of Arrays485825
+Node: Arrays Summary490488
+Node: Functions492593
+Node: Built-in493466
+Node: Calling Built-in494544
+Node: Numeric Functions496532
+Ref: Numeric Functions-Footnote-1500566
+Ref: Numeric Functions-Footnote-2500923
+Ref: Numeric Functions-Footnote-3500971
+Node: String Functions501240
+Ref: String Functions-Footnote-1524237
+Ref: String Functions-Footnote-2524366
+Ref: String Functions-Footnote-3524614
+Node: Gory Details524701
+Ref: table-sub-escapes526474
+Ref: table-sub-proposed527994
+Ref: table-posix-sub529358
+Ref: table-gensub-escapes530898
+Ref: Gory Details-Footnote-1532074
+Node: I/O Functions532225
+Ref: I/O Functions-Footnote-1539335
+Node: Time Functions539482
+Ref: Time Functions-Footnote-1549946
+Ref: Time Functions-Footnote-2550014
+Ref: Time Functions-Footnote-3550172
+Ref: Time Functions-Footnote-4550283
+Ref: Time Functions-Footnote-5550395
+Ref: Time Functions-Footnote-6550622
+Node: Bitwise Functions550888
+Ref: table-bitwise-ops551450
+Ref: Bitwise Functions-Footnote-1555695
+Node: Type Functions555879
+Node: I18N Functions557021
+Node: User-defined558666
+Node: Definition Syntax559470
+Ref: Definition Syntax-Footnote-1564783
+Node: Function Example564852
+Ref: Function Example-Footnote-1567492
+Node: Function Caveats567514
+Node: Calling A Function568032
+Node: Variable Scope568987
+Node: Pass By Value/Reference571975
+Node: Return Statement575485
+Node: Dynamic Typing578469
+Node: Indirect Calls579398
+Node: Functions Summary589111
+Node: Library Functions591650
+Ref: Library Functions-Footnote-1595268
+Ref: Library Functions-Footnote-2595411
+Node: Library Names595582
+Ref: Library Names-Footnote-1599055
+Ref: Library Names-Footnote-2599275
+Node: General Functions599361
+Node: Strtonum Function600389
+Node: Assert Function603263
+Node: Round Function606589
+Node: Cliff Random Function608130
+Node: Ordinal Functions609146
+Ref: Ordinal Functions-Footnote-1612211
+Ref: Ordinal Functions-Footnote-2612463
+Node: Join Function612674
+Ref: Join Function-Footnote-1614445
+Node: Getlocaltime Function614645
+Node: Readfile Function618381
+Ref: Readfile Function-Footnote-1620259
+Node: Data File Management620487
+Node: Filetrans Function621119
+Node: Rewind Function625188
+Node: File Checking626746
+Ref: File Checking-Footnote-1627878
+Node: Empty Files628079
+Node: Ignoring Assigns630058
+Node: Getopt Function631612
+Ref: Getopt Function-Footnote-1642876
+Node: Passwd Functions643079
+Ref: Passwd Functions-Footnote-1652058
+Node: Group Functions652146
+Ref: Group Functions-Footnote-1660077
+Node: Walking Arrays660290
+Node: Library Functions Summary661893
+Node: Library Exercises663281
+Node: Sample Programs664561
+Node: Running Examples665331
+Node: Clones666059
+Node: Cut Program667283
+Node: Egrep Program677141
+Ref: Egrep Program-Footnote-1684728
+Node: Id Program684838
+Node: Split Program688492
+Ref: Split Program-Footnote-1692030
+Node: Tee Program692158
+Node: Uniq Program694945
+Node: Wc Program702366
+Ref: Wc Program-Footnote-1706631
+Node: Miscellaneous Programs706723
+Node: Dupword Program707936
+Node: Alarm Program709967
+Node: Translate Program714771
+Ref: Translate Program-Footnote-1719162
+Ref: Translate Program-Footnote-2719432
+Node: Labels Program719566
+Ref: Labels Program-Footnote-1722927
+Node: Word Sorting723011
+Node: History Sorting727054
+Node: Extract Program728890
+Node: Simple Sed736426
+Node: Igawk Program739488
+Ref: Igawk Program-Footnote-1753792
+Ref: Igawk Program-Footnote-2753993
+Node: Anagram Program754131
+Node: Signature Program757199
+Node: Programs Summary758446
+Node: Programs Exercises759661
+Node: Advanced Features763312
+Node: Nondecimal Data765260
+Node: Array Sorting766837
+Node: Controlling Array Traversal767534
+Node: Array Sorting Functions775814
+Ref: Array Sorting Functions-Footnote-1779721
+Node: Two-way I/O779915
+Ref: Two-way I/O-Footnote-1784859
+Ref: Two-way I/O-Footnote-2785038
+Node: TCP/IP Networking785120
+Node: Profiling787965
+Node: Advanced Features Summary795507
+Node: Internationalization797371
+Node: I18N and L10N798851
+Node: Explaining gettext799537
+Ref: Explaining gettext-Footnote-1804563
+Ref: Explaining gettext-Footnote-2804747
+Node: Programmer i18n804912
+Ref: Programmer i18n-Footnote-1809706
+Node: Translator i18n809755
+Node: String Extraction810549
+Ref: String Extraction-Footnote-1811682
+Node: Printf Ordering811768
+Ref: Printf Ordering-Footnote-1814550
+Node: I18N Portability814614
+Ref: I18N Portability-Footnote-1817063
+Node: I18N Example817126
+Ref: I18N Example-Footnote-1819832
+Node: Gawk I18N819904
+Node: I18N Summary820542
+Node: Debugger821881
+Node: Debugging822903
+Node: Debugging Concepts823344
+Node: Debugging Terms825200
+Node: Awk Debugging827797
+Node: Sample Debugging Session828689
+Node: Debugger Invocation829209
+Node: Finding The Bug830542
+Node: List of Debugger Commands837024
+Node: Breakpoint Control838356
+Node: Debugger Execution Control842020
+Node: Viewing And Changing Data845380
+Node: Execution Stack848738
+Node: Debugger Info850251
+Node: Miscellaneous Debugger Commands854245
+Node: Readline Support859429
+Node: Limitations860321
+Node: Debugging Summary862595
+Node: Arbitrary Precision Arithmetic863763
+Node: Computer Arithmetic865250
+Ref: Computer Arithmetic-Footnote-1869637
+Node: Math Definitions869694
+Ref: table-ieee-formats872983
+Ref: Math Definitions-Footnote-1873523
+Node: MPFR features873626
+Node: FP Math Caution875243
+Ref: FP Math Caution-Footnote-1876293
+Node: Inexactness of computations876662
+Node: Inexact representation877610
+Node: Comparing FP Values878965
+Node: Errors accumulate879929
+Node: Getting Accuracy881362
+Node: Try To Round884021
+Node: Setting precision884920
+Ref: table-predefined-precision-strings885602
+Node: Setting the rounding mode887395
+Ref: table-gawk-rounding-modes887759
+Ref: Setting the rounding mode-Footnote-1891213
+Node: Arbitrary Precision Integers891392
+Ref: Arbitrary Precision Integers-Footnote-1894373
+Node: POSIX Floating Point Problems894522
+Ref: POSIX Floating Point Problems-Footnote-1898398
+Node: Floating point summary898436
+Node: Dynamic Extensions900640
+Node: Extension Intro902192
+Node: Plugin License903457
+Node: Extension Mechanism Outline904142
+Ref: figure-load-extension904566
+Ref: figure-load-new-function906051
+Ref: figure-call-new-function907053
+Node: Extension API Description909037
+Node: Extension API Functions Introduction910487
+Node: General Data Types915354
+Ref: General Data Types-Footnote-1921047
+Node: Requesting Values921346
+Ref: table-value-types-returned922083
+Node: Memory Allocation Functions923041
+Ref: Memory Allocation Functions-Footnote-1925788
+Node: Constructor Functions925884
+Node: Registration Functions927642
+Node: Extension Functions928327
+Node: Exit Callback Functions930629
+Node: Extension Version String931877
+Node: Input Parsers932527
+Node: Output Wrappers942341
+Node: Two-way processors946857
+Node: Printing Messages949061
+Ref: Printing Messages-Footnote-1950138
+Node: Updating `ERRNO'950290
+Node: Accessing Parameters951029
+Node: Symbol Table Access952259
+Node: Symbol table by name952773
+Node: Symbol table by cookie954749
+Ref: Symbol table by cookie-Footnote-1958882
+Node: Cached values958945
+Ref: Cached values-Footnote-1962449
+Node: Array Manipulation962540
+Ref: Array Manipulation-Footnote-1963638
+Node: Array Data Types963677
+Ref: Array Data Types-Footnote-1966380
+Node: Array Functions966472
+Node: Flattening Arrays970346
+Node: Creating Arrays977198
+Node: Extension API Variables981929
+Node: Extension Versioning982565
+Node: Extension API Informational Variables984466
+Node: Extension API Boilerplate985552
+Node: Finding Extensions989356
+Node: Extension Example989916
+Node: Internal File Description990646
+Node: Internal File Ops994737
+Ref: Internal File Ops-Footnote-11006169
+Node: Using Internal File Ops1006309
+Ref: Using Internal File Ops-Footnote-11008656
+Node: Extension Samples1008924
+Node: Extension Sample File Functions1010448
+Node: Extension Sample Fnmatch1018016
+Node: Extension Sample Fork1019498
+Node: Extension Sample Inplace1020711
+Node: Extension Sample Ord1022386
+Node: Extension Sample Readdir1023222
+Ref: table-readdir-file-types1024078
+Node: Extension Sample Revout1024877
+Node: Extension Sample Rev2way1025468
+Node: Extension Sample Read write array1026209
+Node: Extension Sample Readfile1028088
+Node: Extension Sample API Tests1029188
+Node: Extension Sample Time1029713
+Node: gawkextlib1031028
+Node: Extension summary1033841
+Node: Extension Exercises1037534
+Node: Language History1038256
+Node: V7/SVR3.11039899
+Node: SVR41042219
+Node: POSIX1043661
+Node: BTL1045047
+Node: POSIX/GNU1045781
+Node: Feature History1051497
+Node: Common Extensions1064588
+Node: Ranges and Locales1065900
+Ref: Ranges and Locales-Footnote-11070517
+Ref: Ranges and Locales-Footnote-21070544
+Ref: Ranges and Locales-Footnote-31070778
+Node: Contributors1070999
+Node: History summary1076424
+Node: Installation1077793
+Node: Gawk Distribution1078744
+Node: Getting1079228
+Node: Extracting1080052
+Node: Distribution contents1081694
+Node: Unix Installation1087411
+Node: Quick Installation1088028
+Node: Additional Configuration Options1090470
+Node: Configuration Philosophy1092208
+Node: Non-Unix Installation1094559
+Node: PC Installation1095017
+Node: PC Binary Installation1096328
+Node: PC Compiling1098176
+Ref: PC Compiling-Footnote-11101175
+Node: PC Testing1101280
+Node: PC Using1102456
+Node: Cygwin1106608
+Node: MSYS1107417
+Node: VMS Installation1107931
+Node: VMS Compilation1108727
+Ref: VMS Compilation-Footnote-11109949
+Node: VMS Dynamic Extensions1110007
+Node: VMS Installation Details1111380
+Node: VMS Running1113632
+Node: VMS GNV1116466
+Node: VMS Old Gawk1117189
+Node: Bugs1117659
+Node: Other Versions1121663
+Node: Installation summary1127890
+Node: Notes1128946
+Node: Compatibility Mode1129811
+Node: Additions1130593
+Node: Accessing The Source1131518
+Node: Adding Code1132954
+Node: New Ports1139132
+Node: Derived Files1143613
+Ref: Derived Files-Footnote-11148694
+Ref: Derived Files-Footnote-21148728
+Ref: Derived Files-Footnote-31149324
+Node: Future Extensions1149438
+Node: Implementation Limitations1150044
+Node: Extension Design1151292
+Node: Old Extension Problems1152446
+Ref: Old Extension Problems-Footnote-11153963
+Node: Extension New Mechanism Goals1154020
+Ref: Extension New Mechanism Goals-Footnote-11157380
+Node: Extension Other Design Decisions1157569
+Node: Extension Future Growth1159675
+Node: Old Extension Mechanism1160511
+Node: Notes summary1162273
+Node: Basic Concepts1163459
+Node: Basic High Level1164140
+Ref: figure-general-flow1164412
+Ref: figure-process-flow1165011
+Ref: Basic High Level-Footnote-11168240
+Node: Basic Data Typing1168425
+Node: Glossary1171753
+Node: Copying1196905
+Node: GNU Free Documentation License1234461
+Node: Index1259597

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 2e5dc9bd..53b159f1 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -526,10 +526,10 @@ particular records in a file and perform operations upon them.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.
* Records:: Controlling how data is split into
records.
@@ -1774,6 +1774,7 @@ They also appear in the index under the heading ``dark corner.''
As noted by the opening quote, though, any coverage of dark corners is,
by definition, incomplete.
+@cindex c.e., See common extensions
Extensions to the standard @command{awk} language that are supported by
more than one @command{awk} implementation are marked
@ifclear FOR_PRINT
@@ -2341,24 +2342,19 @@ For example, on OS/2, it is @kbd{Ctrl-z}.)
As an example, the following program prints a friendly piece of advice
(from Douglas Adams's @cite{The Hitchhiker's Guide to the Galaxy}),
to keep you from worrying about the complexities of computer
-programming (@code{BEGIN} is a feature we haven't discussed yet):
+programming:
@example
-$ @kbd{awk "BEGIN @{ print \"Don't Panic!\" @}"}
+$ @kbd{awk "BEGIN @{ print "Don\47t Panic!" @}"}
@print{} Don't Panic!
@end example
-@cindex shell quoting, double quote
-@cindex double quote (@code{"}) in shell commands
-@cindex @code{"} (double quote) in shell commands
-@cindex @code{\} (backslash) in shell commands
-@cindex backslash (@code{\}) in shell commands
-This program does not read any input. The @samp{\} before each of the
-inner double quotes is necessary because of the shell's quoting
-rules---in particular because it mixes both single quotes and
-double quotes.@footnote{Although we generally recommend the use of single
-quotes around the program text, double quotes are needed here in order to
-put the single quote into the message.}
+@command{awk} executes statements associated with @code{BEGIN} before
+reading any input. If there are no other statements in your program,
+as is the case here, @command{awk} just stops, instead of trying to read
+input it doesn't know how to process.
+The @samp{\47} is a magic way of getting a single quote into
+the program, without having to engage in ugly shell quoting tricks.
@quotation NOTE
As a side note, if you use Bash as your shell, you should execute the
@@ -3046,6 +3042,9 @@ awk '@{ if (length($0) > max) max = length($0) @}
END @{ print max @}' data
@end example
+The code associated with @code{END} executes after all
+input has been read; it's the other side of the coin to @code{BEGIN}.
+
@cindex @command{expand} utility
@item
Print the length of the longest line in @file{data}:
@@ -4132,6 +4131,11 @@ included. As each element of @code{ARGV} is processed, @command{gawk}
sets the variable @code{ARGIND} to the index in @code{ARGV} of the
current element.
+@c FIXME: One day, move the ARGC and ARGV node closer to here.
+Changing @code{ARGC} and @code{ARGV} in your @command{awk} program lets
+you control how @command{awk} processes the input files; this is described
+in more detail in @ref{ARGC and ARGV}.
+
@cindex input files, variable assignments and
@cindex variable assignments and input files
The distinction between @value{FN} arguments and variable-assignment
@@ -4772,10 +4776,10 @@ regular expressions work, we present more complicated instances.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.
@end menu
@@ -4985,8 +4989,11 @@ that a maximum of two hexadecimal digits following the
@item \/
A literal slash (necessary for regexp constants only).
This sequence is used when you want to write a regexp
-constant that contains a slash. Because the regexp is delimited by
-slashes, you need to escape the slash that is part of the pattern,
+constant that contains a slash
+(such as @code{/.*:\/home\/[[:alnum:]]+:.*/}; the @samp{[[:alnum:]]}
+notation is discussed shortly, in @ref{Bracket Expressions}).
+Because the regexp is delimited by
+slashes, you need to escape any slash that is part of the pattern,
in order to tell @command{awk} to keep processing the rest of the regexp.
@cindex @code{\} (backslash), @code{\"} escape sequence
@@ -4994,8 +5001,10 @@ in order to tell @command{awk} to keep processing the rest of the regexp.
@item \"
A literal double quote (necessary for string constants only).
This sequence is used when you want to write a string
-constant that contains a double quote. Because the string is delimited by
-double quotes, you need to escape the quote that is part of the string,
+constant that contains a double quote
+(such as @code{"He said \"hi!\" to her."}).
+Because the string is delimited by
+double quotes, you need to escape any quote that is part of the string,
in order to tell @command{awk} to keep processing the rest of the string.
@end table
@@ -5556,6 +5565,204 @@ they do not recognize collating symbols or equivalence classes.
@c maybe one day ...
@c ENDOFRANGE charlist
+@node Leftmost Longest
+@section How Much Text Matches?
+
+@cindex regular expressions, leftmost longest match
+@c @cindex matching, leftmost longest
+Consider the following:
+
+@example
+echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
+@end example
+
+This example uses the @code{sub()} function (which we haven't discussed yet;
+@pxref{String Functions})
+to make a change to the input record. Here, the regexp @code{/a+/}
+indicates ``one or more @samp{a} characters,'' and the replacement
+text is @samp{<A>}.
+
+The input contains four @samp{a} characters.
+@command{awk} (and POSIX) regular expressions always match
+the leftmost, @emph{longest} sequence of input characters that can
+match. Thus, all four @samp{a} characters are
+replaced with @samp{<A>} in this example:
+
+@example
+$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
+@print{} <A>bcd
+@end example
+
+For simple match/no-match tests, this is not so important. But when doing
+text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
+and @code{gensub()} functions, it is very important.
+@ifinfo
+@xref{String Functions},
+for more information on these functions.
+@end ifinfo
+Understanding this principle is also important for regexp-based record
+and field splitting (@pxref{Records},
+and also @pxref{Field Separators}).
+
+@node Computed Regexps
+@section Using Dynamic Regexps
+
+@c STARTOFRANGE dregexp
+@cindex regular expressions, computed
+@c STARTOFRANGE regexpd
+@cindex regular expressions, dynamic
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+@c @cindex operators, @code{~}
+@c @cindex operators, @code{!~}
+The righthand side of a @samp{~} or @samp{!~} operator need not be a
+regexp constant (i.e., a string of characters between slashes). It may
+be any expression. The expression is evaluated and converted to a string
+if necessary; the contents of the string are then used as the
+regexp. A regexp computed in this way is called a @dfn{dynamic
+regexp} or a @dfn{computed regexp}:
+
+@example
+BEGIN @{ digits_regexp = "[[:digit:]]+" @}
+$0 ~ digits_regexp @{ print @}
+@end example
+
+@noindent
+This sets @code{digits_regexp} to a regexp that describes one or more digits,
+and tests whether the input record matches this regexp.
+
+@quotation NOTE
+When using the @samp{~} and @samp{!~}
+operators, there is a difference between a regexp constant
+enclosed in slashes and a string constant enclosed in double quotes.
+If you are going to use a string constant, you have to understand that
+the string is, in essence, scanned @emph{twice}: the first time when
+@command{awk} reads your program, and the second time when it goes to
+match the string on the lefthand side of the operator with the pattern
+on the right. This is true of any string-valued expression (such as
+@code{digits_regexp}, shown previously), not just string constants.
+@end quotation
+
+@cindex regexp constants, slashes vs.@: quotes
+@cindex @code{\} (backslash), in regexp constants
+@cindex backslash (@code{\}), in regexp constants
+@cindex @code{"} (double quote), in regexp constants
+@cindex double quote (@code{"}), in regexp constants
+What difference does it make if the string is
+scanned twice? The answer has to do with escape sequences, and particularly
+with backslashes. To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
+Only one backslash is needed. To do the same thing with a string,
+you have to type @code{"\\*"}. The first backslash escapes the
+second one so that the string actually contains the
+two characters @samp{\} and @samp{*}.
+
+@cindex troubleshooting, regexp constants vs.@: string constants
+@cindex regexp constants, vs.@: string constants
+@cindex string constants, vs.@: regexp constants
+Given that you can use both regexp and string constants to describe
+regular expressions, which should you use? The answer is ``regexp
+constants,'' for several reasons:
+
+@itemize @value{BULLET}
+@item
+String constants are more complicated to write and
+more difficult to read. Using regexp constants makes your programs
+less error-prone. Not understanding the difference between the two
+kinds of constants is a common source of errors.
+
+@item
+It is more efficient to use regexp constants. @command{awk} can note
+that you have supplied a regexp and store it internally in a form that
+makes pattern matching more efficient. When using a string constant,
+@command{awk} must first convert the string into this internal form and
+then perform the pattern matching.
+
+@item
+Using regexp constants is better form; it shows clearly that you
+intend a regexp match.
+@end itemize
+
+@cindex sidebar, Using @code{\n} in Bracket Expressions of Dynamic Regexps
+@ifdocbook
+@docbook
+<sidebar><title>Using @code{\n} in Bracket Expressions of Dynamic Regexps</title>
+@end docbook
+
+@cindex regular expressions, dynamic, with embedded newlines
+@cindex newlines, in dynamic regexps
+
+Some versions of @command{awk} do not allow the newline
+character to be used inside a bracket expression for a dynamic regexp:
+
+@example
+$ @kbd{awk '$0 ~ "[ \t\n]"'}
+@error{} awk: newline in character class [
+@error{} ]...
+@error{} source line number 1
+@error{} context is
+@error{} >>> <<<
+@end example
+
+@cindex newlines, in regexp constants
+But a newline in a regexp constant works with no problem:
+
+@example
+$ @kbd{awk '$0 ~ /[ \t\n]/'}
+@kbd{here is a sample line}
+@print{} here is a sample line
+@kbd{Ctrl-d}
+@end example
+
+@command{gawk} does not have this problem, and it isn't likely to
+occur often in practice, but it's worth noting for future reference.
+
+@docbook
+</sidebar>
+@end docbook
+@end ifdocbook
+
+@ifnotdocbook
+@cartouche
+@center @b{Using @code{\n} in Bracket Expressions of Dynamic Regexps}
+
+
+@cindex regular expressions, dynamic, with embedded newlines
+@cindex newlines, in dynamic regexps
+
+Some versions of @command{awk} do not allow the newline
+character to be used inside a bracket expression for a dynamic regexp:
+
+@example
+$ @kbd{awk '$0 ~ "[ \t\n]"'}
+@error{} awk: newline in character class [
+@error{} ]...
+@error{} source line number 1
+@error{} context is
+@error{} >>> <<<
+@end example
+
+@cindex newlines, in regexp constants
+But a newline in a regexp constant works with no problem:
+
+@example
+$ @kbd{awk '$0 ~ /[ \t\n]/'}
+@kbd{here is a sample line}
+@print{} here is a sample line
+@kbd{Ctrl-d}
+@end example
+
+@command{gawk} does not have this problem, and it isn't likely to
+occur often in practice, but it's worth noting for future reference.
+@end cartouche
+@end ifnotdocbook
+@c ENDOFRANGE dregexp
+@c ENDOFRANGE regexpd
+
@node GNU Regexp Operators
@section @command{gawk}-Specific Regexp Operators
@@ -5831,204 +6038,6 @@ Case is always significant in compatibility mode.
@c ENDOFRANGE csregexp
@c ENDOFRANGE regexpcs
-@node Leftmost Longest
-@section How Much Text Matches?
-
-@cindex regular expressions, leftmost longest match
-@c @cindex matching, leftmost longest
-Consider the following:
-
-@example
-echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
-@end example
-
-This example uses the @code{sub()} function (which we haven't discussed yet;
-@pxref{String Functions})
-to make a change to the input record. Here, the regexp @code{/a+/}
-indicates ``one or more @samp{a} characters,'' and the replacement
-text is @samp{<A>}.
-
-The input contains four @samp{a} characters.
-@command{awk} (and POSIX) regular expressions always match
-the leftmost, @emph{longest} sequence of input characters that can
-match. Thus, all four @samp{a} characters are
-replaced with @samp{<A>} in this example:
-
-@example
-$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
-@print{} <A>bcd
-@end example
-
-For simple match/no-match tests, this is not so important. But when doing
-text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
-and @code{gensub()} functions, it is very important.
-@ifinfo
-@xref{String Functions},
-for more information on these functions.
-@end ifinfo
-Understanding this principle is also important for regexp-based record
-and field splitting (@pxref{Records},
-and also @pxref{Field Separators}).
-
-@node Computed Regexps
-@section Using Dynamic Regexps
-
-@c STARTOFRANGE dregexp
-@cindex regular expressions, computed
-@c STARTOFRANGE regexpd
-@cindex regular expressions, dynamic
-@cindex @code{~} (tilde), @code{~} operator
-@cindex tilde (@code{~}), @code{~} operator
-@cindex @code{!} (exclamation point), @code{!~} operator
-@cindex exclamation point (@code{!}), @code{!~} operator
-@c @cindex operators, @code{~}
-@c @cindex operators, @code{!~}
-The righthand side of a @samp{~} or @samp{!~} operator need not be a
-regexp constant (i.e., a string of characters between slashes). It may
-be any expression. The expression is evaluated and converted to a string
-if necessary; the contents of the string are then used as the
-regexp. A regexp computed in this way is called a @dfn{dynamic
-regexp} or a @dfn{computed regexp}:
-
-@example
-BEGIN @{ digits_regexp = "[[:digit:]]+" @}
-$0 ~ digits_regexp @{ print @}
-@end example
-
-@noindent
-This sets @code{digits_regexp} to a regexp that describes one or more digits,
-and tests whether the input record matches this regexp.
-
-@quotation NOTE
-When using the @samp{~} and @samp{!~}
-operators, there is a difference between a regexp constant
-enclosed in slashes and a string constant enclosed in double quotes.
-If you are going to use a string constant, you have to understand that
-the string is, in essence, scanned @emph{twice}: the first time when
-@command{awk} reads your program, and the second time when it goes to
-match the string on the lefthand side of the operator with the pattern
-on the right. This is true of any string-valued expression (such as
-@code{digits_regexp}, shown previously), not just string constants.
-@end quotation
-
-@cindex regexp constants, slashes vs.@: quotes
-@cindex @code{\} (backslash), in regexp constants
-@cindex backslash (@code{\}), in regexp constants
-@cindex @code{"} (double quote), in regexp constants
-@cindex double quote (@code{"}), in regexp constants
-What difference does it make if the string is
-scanned twice? The answer has to do with escape sequences, and particularly
-with backslashes. To get a backslash into a regular expression inside a
-string, you have to type two backslashes.
-
-For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
-Only one backslash is needed. To do the same thing with a string,
-you have to type @code{"\\*"}. The first backslash escapes the
-second one so that the string actually contains the
-two characters @samp{\} and @samp{*}.
-
-@cindex troubleshooting, regexp constants vs.@: string constants
-@cindex regexp constants, vs.@: string constants
-@cindex string constants, vs.@: regexp constants
-Given that you can use both regexp and string constants to describe
-regular expressions, which should you use? The answer is ``regexp
-constants,'' for several reasons:
-
-@itemize @value{BULLET}
-@item
-String constants are more complicated to write and
-more difficult to read. Using regexp constants makes your programs
-less error-prone. Not understanding the difference between the two
-kinds of constants is a common source of errors.
-
-@item
-It is more efficient to use regexp constants. @command{awk} can note
-that you have supplied a regexp and store it internally in a form that
-makes pattern matching more efficient. When using a string constant,
-@command{awk} must first convert the string into this internal form and
-then perform the pattern matching.
-
-@item
-Using regexp constants is better form; it shows clearly that you
-intend a regexp match.
-@end itemize
-
-@cindex sidebar, Using @code{\n} in Bracket Expressions of Dynamic Regexps
-@ifdocbook
-@docbook
-<sidebar><title>Using @code{\n} in Bracket Expressions of Dynamic Regexps</title>
-@end docbook
-
-@cindex regular expressions, dynamic, with embedded newlines
-@cindex newlines, in dynamic regexps
-
-Some versions of @command{awk} do not allow the newline
-character to be used inside a bracket expression for a dynamic regexp:
-
-@example
-$ @kbd{awk '$0 ~ "[ \t\n]"'}
-@error{} awk: newline in character class [
-@error{} ]...
-@error{} source line number 1
-@error{} context is
-@error{} >>> <<<
-@end example
-
-@cindex newlines, in regexp constants
-But a newline in a regexp constant works with no problem:
-
-@example
-$ @kbd{awk '$0 ~ /[ \t\n]/'}
-@kbd{here is a sample line}
-@print{} here is a sample line
-@kbd{Ctrl-d}
-@end example
-
-@command{gawk} does not have this problem, and it isn't likely to
-occur often in practice, but it's worth noting for future reference.
-
-@docbook
-</sidebar>
-@end docbook
-@end ifdocbook
-
-@ifnotdocbook
-@cartouche
-@center @b{Using @code{\n} in Bracket Expressions of Dynamic Regexps}
-
-
-@cindex regular expressions, dynamic, with embedded newlines
-@cindex newlines, in dynamic regexps
-
-Some versions of @command{awk} do not allow the newline
-character to be used inside a bracket expression for a dynamic regexp:
-
-@example
-$ @kbd{awk '$0 ~ "[ \t\n]"'}
-@error{} awk: newline in character class [
-@error{} ]...
-@error{} source line number 1
-@error{} context is
-@error{} >>> <<<
-@end example
-
-@cindex newlines, in regexp constants
-But a newline in a regexp constant works with no problem:
-
-@example
-$ @kbd{awk '$0 ~ /[ \t\n]/'}
-@kbd{here is a sample line}
-@print{} here is a sample line
-@kbd{Ctrl-d}
-@end example
-
-@command{gawk} does not have this problem, and it isn't likely to
-occur often in practice, but it's worth noting for future reference.
-@end cartouche
-@end ifnotdocbook
-@c ENDOFRANGE dregexp
-@c ENDOFRANGE regexpd
-
@node Regexp Summary
@section Summary
@@ -7971,32 +7980,48 @@ finished processing the current record, but want to do some special
processing on the next record @emph{right now}. For example:
@example
+# Remove text between /* and */, inclusive
@{
- if ((t = index($0, "/*")) != 0) @{
- # value of `tmp' will be "" if t is 1
- tmp = substr($0, 1, t - 1)
- u = index(substr($0, t + 2), "*/")
- offset = t + 2
- while (u == 0) @{
- if (getline <= 0) @{
+ if ((i = index($0, "/*")) != 0) @{
+ out = substr($0, 1, i - 1) # leading part of the string
+ rest = substr($0, i + 2) # ... */ ...
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j > 0) @{
+ rest = substr(rest, j + 2) # remove comment
+ @} else @{
+ while (j == 0) @{
+ # get more text
+ if (getline <= 0) @{
m = "unexpected EOF or error"
m = (m ": " ERRNO)
print m > "/dev/stderr"
exit
- @}
- u = index($0, "*/")
- offset = 0
- @}
- # substr() expression will be "" if */
- # occurred at end of line
- $0 = tmp substr($0, offset + u + 2)
- @}
- print $0
+ @}
+ # build up the line using string concatenation
+ rest = rest $0
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j != 0) @{
+ rest = substr(rest, j + 2)
+ break
+ @}
+ @}
+ @}
+ # build up the output line using string concatenation
+ $0 = out rest
+ @}
+ print $0
@}
@end example
This @command{awk} program deletes C-style comments (@samp{/* @dots{}
-*/}) from the input. By replacing the @samp{print $0} with other
+*/}) from the input.
+It uses a number of features we haven't covered yet, including
+string concatenation
+(@pxref{Concatenation})
+and the @code{index()} and @code{substr()} built-in
+functions
+(@pxref{String Functions}).
+By replacing the @samp{print $0} with other
statements, you could perform more complicated processing on the
decommented input, such as searching for matches of a regular
expression. (This program has a subtle problem---it does not work if one
@@ -8687,7 +8712,7 @@ including abstentions, for each item.
comments (@samp{/* @dots{} */}) from the input. That program
does not work if one comment ends on one line and another one
starts later on the same line.
-Write a program that does handle multiple comments on the line.
+That can be fixed by making one simple change. What is it?
@end enumerate
@c EXCLUDE END
@@ -10517,7 +10542,8 @@ A regexp constant is a regular expression description enclosed in
slashes, such as @code{@w{/^beginning and end$/}}. Most regexps used in
@command{awk} programs are constant, but the @samp{~} and @samp{!~}
matching operators can also match computed or dynamic regexps
-(which are just ordinary strings or variables that contain a regexp).
+(which are typically just ordinary strings or variables that contain a regexp,
+but could be a more complex expression).
@c ENDOFRANGE cnst
@node Using Constant Regexps
@@ -12308,7 +12334,7 @@ program is one way to print lines in between special bracketing lines:
@example
$1 == "START" @{ interested = ! interested; next @}
-interested == 1 @{ print @}
+interested @{ print @}
$1 == "END" @{ interested = ! interested; next @}
@end example
@@ -12328,6 +12354,16 @@ bogus input data, but the point is to illustrate the use of `!',
so we'll leave well enough alone.
@end ignore
+Most commonly, the @samp{!} operator is used in the conditions of
+@code{if} and @code{while} statements, where it often makes more
+sense to phrase the logic in the negative:
+
+@example
+if (! @var{some condition} || @var{some other condition}) @{
+ @var{@dots{} do whatever processing @dots{}}
+@}
+@end example
+
@cindex @code{next} statement
@quotation NOTE
The @code{next} statement is discussed in
@@ -14120,7 +14156,8 @@ starts over with the first rule in the program.
If the @code{nextfile} statement causes the end of the input to be reached,
then the code in any @code{END} rules is executed. An exception to this is
when @code{nextfile} is invoked during execution of any statement in an
-@code{END} rule; In this case, it causes the program to stop immediately. @xref{BEGIN/END}.
+@code{END} rule; in this case, it causes the program to stop immediately.
+@xref{BEGIN/END}.
The @code{nextfile} statement is useful when there are many @value{DF}s
to process but it isn't necessary to process every record in every file.
@@ -14130,13 +14167,10 @@ would have to continue scanning the unwanted records. The @code{nextfile}
statement accomplishes this much more efficiently.
In @command{gawk}, execution of @code{nextfile} causes additional things
-to happen:
-any @code{ENDFILE} rules are executed except in the case as
-mentioned below,
-@code{ARGIND} is incremented,
-and
-any @code{BEGINFILE} rules are executed.
-(@code{ARGIND} hasn't been introduced yet. @xref{Built-in Variables}.)
+to happen: any @code{ENDFILE} rules are executed if @command{gawk} is
+not currently in an @code{END} or @code{BEGINFILE} rule, @code{ARGIND} is
+incremented, and any @code{BEGINFILE} rules are executed. (@code{ARGIND}
+hasn't been introduced yet. @xref{Built-in Variables}.)
With @command{gawk}, @code{nextfile} is useful inside a @code{BEGINFILE}
rule to skip over a file that would otherwise cause @command{gawk}
@@ -16150,7 +16184,7 @@ $ @kbd{echo 'line 1}
> @kbd{line 2}
> @kbd{line 3' | awk '@{ l[lines] = $0; ++lines @}}
> @kbd{END @{}
-> @kbd{for (i = lines-1; i >= 0; --i)}
+> @kbd{for (i = lines - 1; i >= 0; i--)}
> @kbd{print l[i]}
> @kbd{@}'}
@print{} line 3
@@ -16174,7 +16208,7 @@ The following version of the program works correctly:
@example
@{ l[lines++] = $0 @}
END @{
- for (i = lines - 1; i >= 0; --i)
+ for (i = lines - 1; i >= 0; i--)
print l[i]
@}
@end example
@@ -20436,8 +20470,9 @@ function mystrtonum(str, ret, n, i, k, c)
ret = 0
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
@}
@@ -20449,6 +20484,8 @@ function mystrtonum(str, ret, n, i, k, c)
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
c = tolower(c)
+ # index() returns 0 if c not in string,
+ # includes c == "0"
k = index("123456789abcdef", c)
ret = ret * 16 + k
@@ -21051,7 +21088,12 @@ function readfile(file, tmp, contents)
This function reads from @code{file} one record at a time, building
up the full contents of the file in the local variable @code{contents}.
-It works, but is not necessarily efficient.
+It works, but is not necessarily
+@c 8/2014. Thanks to BWK for pointing this out:
+efficient.@footnote{Execution time grows quadratically in the size of
+the input; for each record, @command{awk} has to allocate a bigger
+internal buffer for @code{contents}, copy the old contents into it,
+and then append the contents of the new record.}
The following function, based on a suggestion by Denis Shirokov,
reads the entire contents of the named file in one shot:
@@ -21724,8 +21766,7 @@ it is not an option, and it ends option processing. Continuing on:
i = index(options, thisopt)
if (i == 0) @{
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) @{
Optind++
_opti = 0
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index 954b7db0..d9846cbe 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -521,10 +521,10 @@ particular records in a file and perform operations upon them.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.
* Records:: Controlling how data is split into
records.
@@ -1741,6 +1741,7 @@ They also appear in the index under the heading ``dark corner.''
As noted by the opening quote, though, any coverage of dark corners is,
by definition, incomplete.
+@cindex c.e., See common extensions
Extensions to the standard @command{awk} language that are supported by
more than one @command{awk} implementation are marked
@ifclear FOR_PRINT
@@ -2308,24 +2309,19 @@ For example, on OS/2, it is @kbd{Ctrl-z}.)
As an example, the following program prints a friendly piece of advice
(from Douglas Adams's @cite{The Hitchhiker's Guide to the Galaxy}),
to keep you from worrying about the complexities of computer
-programming (@code{BEGIN} is a feature we haven't discussed yet):
+programming:
@example
-$ @kbd{awk "BEGIN @{ print \"Don't Panic!\" @}"}
+$ @kbd{awk "BEGIN @{ print "Don\47t Panic!" @}"}
@print{} Don't Panic!
@end example
-@cindex shell quoting, double quote
-@cindex double quote (@code{"}) in shell commands
-@cindex @code{"} (double quote) in shell commands
-@cindex @code{\} (backslash) in shell commands
-@cindex backslash (@code{\}) in shell commands
-This program does not read any input. The @samp{\} before each of the
-inner double quotes is necessary because of the shell's quoting
-rules---in particular because it mixes both single quotes and
-double quotes.@footnote{Although we generally recommend the use of single
-quotes around the program text, double quotes are needed here in order to
-put the single quote into the message.}
+@command{awk} executes statements associated with @code{BEGIN} before
+reading any input. If there are no other statements in your program,
+as is the case here, @command{awk} just stops, instead of trying to read
+input it doesn't know how to process.
+The @samp{\47} is a magic way of getting a single quote into
+the program, without having to engage in ugly shell quoting tricks.
@quotation NOTE
As a side note, if you use Bash as your shell, you should execute the
@@ -2957,6 +2953,9 @@ awk '@{ if (length($0) > max) max = length($0) @}
END @{ print max @}' data
@end example
+The code associated with @code{END} executes after all
+input has been read; it's the other side of the coin to @code{BEGIN}.
+
@cindex @command{expand} utility
@item
Print the length of the longest line in @file{data}:
@@ -4043,6 +4042,11 @@ included. As each element of @code{ARGV} is processed, @command{gawk}
sets the variable @code{ARGIND} to the index in @code{ARGV} of the
current element.
+@c FIXME: One day, move the ARGC and ARGV node closer to here.
+Changing @code{ARGC} and @code{ARGV} in your @command{awk} program lets
+you control how @command{awk} processes the input files; this is described
+in more detail in @ref{ARGC and ARGV}.
+
@cindex input files, variable assignments and
@cindex variable assignments and input files
The distinction between @value{FN} arguments and variable-assignment
@@ -4683,10 +4687,10 @@ regular expressions work, we present more complicated instances.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.
@end menu
@@ -4896,8 +4900,11 @@ that a maximum of two hexadecimal digits following the
@item \/
A literal slash (necessary for regexp constants only).
This sequence is used when you want to write a regexp
-constant that contains a slash. Because the regexp is delimited by
-slashes, you need to escape the slash that is part of the pattern,
+constant that contains a slash
+(such as @code{/.*:\/home\/[[:alnum:]]+:.*/}; the @samp{[[:alnum:]]}
+notation is discussed shortly, in @ref{Bracket Expressions}).
+Because the regexp is delimited by
+slashes, you need to escape any slash that is part of the pattern,
in order to tell @command{awk} to keep processing the rest of the regexp.
@cindex @code{\} (backslash), @code{\"} escape sequence
@@ -4905,8 +4912,10 @@ in order to tell @command{awk} to keep processing the rest of the regexp.
@item \"
A literal double quote (necessary for string constants only).
This sequence is used when you want to write a string
-constant that contains a double quote. Because the string is delimited by
-double quotes, you need to escape the quote that is part of the string,
+constant that contains a double quote
+(such as @code{"He said \"hi!\" to her."}).
+Because the string is delimited by
+double quotes, you need to escape any quote that is part of the string,
in order to tell @command{awk} to keep processing the rest of the string.
@end table
@@ -5384,6 +5393,160 @@ they do not recognize collating symbols or equivalence classes.
@c maybe one day ...
@c ENDOFRANGE charlist
+@node Leftmost Longest
+@section How Much Text Matches?
+
+@cindex regular expressions, leftmost longest match
+@c @cindex matching, leftmost longest
+Consider the following:
+
+@example
+echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
+@end example
+
+This example uses the @code{sub()} function (which we haven't discussed yet;
+@pxref{String Functions})
+to make a change to the input record. Here, the regexp @code{/a+/}
+indicates ``one or more @samp{a} characters,'' and the replacement
+text is @samp{<A>}.
+
+The input contains four @samp{a} characters.
+@command{awk} (and POSIX) regular expressions always match
+the leftmost, @emph{longest} sequence of input characters that can
+match. Thus, all four @samp{a} characters are
+replaced with @samp{<A>} in this example:
+
+@example
+$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
+@print{} <A>bcd
+@end example
+
+For simple match/no-match tests, this is not so important. But when doing
+text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
+and @code{gensub()} functions, it is very important.
+@ifinfo
+@xref{String Functions},
+for more information on these functions.
+@end ifinfo
+Understanding this principle is also important for regexp-based record
+and field splitting (@pxref{Records},
+and also @pxref{Field Separators}).
+
+@node Computed Regexps
+@section Using Dynamic Regexps
+
+@c STARTOFRANGE dregexp
+@cindex regular expressions, computed
+@c STARTOFRANGE regexpd
+@cindex regular expressions, dynamic
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+@c @cindex operators, @code{~}
+@c @cindex operators, @code{!~}
+The righthand side of a @samp{~} or @samp{!~} operator need not be a
+regexp constant (i.e., a string of characters between slashes). It may
+be any expression. The expression is evaluated and converted to a string
+if necessary; the contents of the string are then used as the
+regexp. A regexp computed in this way is called a @dfn{dynamic
+regexp} or a @dfn{computed regexp}:
+
+@example
+BEGIN @{ digits_regexp = "[[:digit:]]+" @}
+$0 ~ digits_regexp @{ print @}
+@end example
+
+@noindent
+This sets @code{digits_regexp} to a regexp that describes one or more digits,
+and tests whether the input record matches this regexp.
+
+@quotation NOTE
+When using the @samp{~} and @samp{!~}
+operators, there is a difference between a regexp constant
+enclosed in slashes and a string constant enclosed in double quotes.
+If you are going to use a string constant, you have to understand that
+the string is, in essence, scanned @emph{twice}: the first time when
+@command{awk} reads your program, and the second time when it goes to
+match the string on the lefthand side of the operator with the pattern
+on the right. This is true of any string-valued expression (such as
+@code{digits_regexp}, shown previously), not just string constants.
+@end quotation
+
+@cindex regexp constants, slashes vs.@: quotes
+@cindex @code{\} (backslash), in regexp constants
+@cindex backslash (@code{\}), in regexp constants
+@cindex @code{"} (double quote), in regexp constants
+@cindex double quote (@code{"}), in regexp constants
+What difference does it make if the string is
+scanned twice? The answer has to do with escape sequences, and particularly
+with backslashes. To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
+Only one backslash is needed. To do the same thing with a string,
+you have to type @code{"\\*"}. The first backslash escapes the
+second one so that the string actually contains the
+two characters @samp{\} and @samp{*}.
+
+@cindex troubleshooting, regexp constants vs.@: string constants
+@cindex regexp constants, vs.@: string constants
+@cindex string constants, vs.@: regexp constants
+Given that you can use both regexp and string constants to describe
+regular expressions, which should you use? The answer is ``regexp
+constants,'' for several reasons:
+
+@itemize @value{BULLET}
+@item
+String constants are more complicated to write and
+more difficult to read. Using regexp constants makes your programs
+less error-prone. Not understanding the difference between the two
+kinds of constants is a common source of errors.
+
+@item
+It is more efficient to use regexp constants. @command{awk} can note
+that you have supplied a regexp and store it internally in a form that
+makes pattern matching more efficient. When using a string constant,
+@command{awk} must first convert the string into this internal form and
+then perform the pattern matching.
+
+@item
+Using regexp constants is better form; it shows clearly that you
+intend a regexp match.
+@end itemize
+
+@sidebar Using @code{\n} in Bracket Expressions of Dynamic Regexps
+@cindex regular expressions, dynamic, with embedded newlines
+@cindex newlines, in dynamic regexps
+
+Some versions of @command{awk} do not allow the newline
+character to be used inside a bracket expression for a dynamic regexp:
+
+@example
+$ @kbd{awk '$0 ~ "[ \t\n]"'}
+@error{} awk: newline in character class [
+@error{} ]...
+@error{} source line number 1
+@error{} context is
+@error{} >>> <<<
+@end example
+
+@cindex newlines, in regexp constants
+But a newline in a regexp constant works with no problem:
+
+@example
+$ @kbd{awk '$0 ~ /[ \t\n]/'}
+@kbd{here is a sample line}
+@print{} here is a sample line
+@kbd{Ctrl-d}
+@end example
+
+@command{gawk} does not have this problem, and it isn't likely to
+occur often in practice, but it's worth noting for future reference.
+@end sidebar
+@c ENDOFRANGE dregexp
+@c ENDOFRANGE regexpd
+
@node GNU Regexp Operators
@section @command{gawk}-Specific Regexp Operators
@@ -5659,160 +5822,6 @@ Case is always significant in compatibility mode.
@c ENDOFRANGE csregexp
@c ENDOFRANGE regexpcs
-@node Leftmost Longest
-@section How Much Text Matches?
-
-@cindex regular expressions, leftmost longest match
-@c @cindex matching, leftmost longest
-Consider the following:
-
-@example
-echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
-@end example
-
-This example uses the @code{sub()} function (which we haven't discussed yet;
-@pxref{String Functions})
-to make a change to the input record. Here, the regexp @code{/a+/}
-indicates ``one or more @samp{a} characters,'' and the replacement
-text is @samp{<A>}.
-
-The input contains four @samp{a} characters.
-@command{awk} (and POSIX) regular expressions always match
-the leftmost, @emph{longest} sequence of input characters that can
-match. Thus, all four @samp{a} characters are
-replaced with @samp{<A>} in this example:
-
-@example
-$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
-@print{} <A>bcd
-@end example
-
-For simple match/no-match tests, this is not so important. But when doing
-text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
-and @code{gensub()} functions, it is very important.
-@ifinfo
-@xref{String Functions},
-for more information on these functions.
-@end ifinfo
-Understanding this principle is also important for regexp-based record
-and field splitting (@pxref{Records},
-and also @pxref{Field Separators}).
-
-@node Computed Regexps
-@section Using Dynamic Regexps
-
-@c STARTOFRANGE dregexp
-@cindex regular expressions, computed
-@c STARTOFRANGE regexpd
-@cindex regular expressions, dynamic
-@cindex @code{~} (tilde), @code{~} operator
-@cindex tilde (@code{~}), @code{~} operator
-@cindex @code{!} (exclamation point), @code{!~} operator
-@cindex exclamation point (@code{!}), @code{!~} operator
-@c @cindex operators, @code{~}
-@c @cindex operators, @code{!~}
-The righthand side of a @samp{~} or @samp{!~} operator need not be a
-regexp constant (i.e., a string of characters between slashes). It may
-be any expression. The expression is evaluated and converted to a string
-if necessary; the contents of the string are then used as the
-regexp. A regexp computed in this way is called a @dfn{dynamic
-regexp} or a @dfn{computed regexp}:
-
-@example
-BEGIN @{ digits_regexp = "[[:digit:]]+" @}
-$0 ~ digits_regexp @{ print @}
-@end example
-
-@noindent
-This sets @code{digits_regexp} to a regexp that describes one or more digits,
-and tests whether the input record matches this regexp.
-
-@quotation NOTE
-When using the @samp{~} and @samp{!~}
-operators, there is a difference between a regexp constant
-enclosed in slashes and a string constant enclosed in double quotes.
-If you are going to use a string constant, you have to understand that
-the string is, in essence, scanned @emph{twice}: the first time when
-@command{awk} reads your program, and the second time when it goes to
-match the string on the lefthand side of the operator with the pattern
-on the right. This is true of any string-valued expression (such as
-@code{digits_regexp}, shown previously), not just string constants.
-@end quotation
-
-@cindex regexp constants, slashes vs.@: quotes
-@cindex @code{\} (backslash), in regexp constants
-@cindex backslash (@code{\}), in regexp constants
-@cindex @code{"} (double quote), in regexp constants
-@cindex double quote (@code{"}), in regexp constants
-What difference does it make if the string is
-scanned twice? The answer has to do with escape sequences, and particularly
-with backslashes. To get a backslash into a regular expression inside a
-string, you have to type two backslashes.
-
-For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
-Only one backslash is needed. To do the same thing with a string,
-you have to type @code{"\\*"}. The first backslash escapes the
-second one so that the string actually contains the
-two characters @samp{\} and @samp{*}.
-
-@cindex troubleshooting, regexp constants vs.@: string constants
-@cindex regexp constants, vs.@: string constants
-@cindex string constants, vs.@: regexp constants
-Given that you can use both regexp and string constants to describe
-regular expressions, which should you use? The answer is ``regexp
-constants,'' for several reasons:
-
-@itemize @value{BULLET}
-@item
-String constants are more complicated to write and
-more difficult to read. Using regexp constants makes your programs
-less error-prone. Not understanding the difference between the two
-kinds of constants is a common source of errors.
-
-@item
-It is more efficient to use regexp constants. @command{awk} can note
-that you have supplied a regexp and store it internally in a form that
-makes pattern matching more efficient. When using a string constant,
-@command{awk} must first convert the string into this internal form and
-then perform the pattern matching.
-
-@item
-Using regexp constants is better form; it shows clearly that you
-intend a regexp match.
-@end itemize
-
-@sidebar Using @code{\n} in Bracket Expressions of Dynamic Regexps
-@cindex regular expressions, dynamic, with embedded newlines
-@cindex newlines, in dynamic regexps
-
-Some versions of @command{awk} do not allow the newline
-character to be used inside a bracket expression for a dynamic regexp:
-
-@example
-$ @kbd{awk '$0 ~ "[ \t\n]"'}
-@error{} awk: newline in character class [
-@error{} ]...
-@error{} source line number 1
-@error{} context is
-@error{} >>> <<<
-@end example
-
-@cindex newlines, in regexp constants
-But a newline in a regexp constant works with no problem:
-
-@example
-$ @kbd{awk '$0 ~ /[ \t\n]/'}
-@kbd{here is a sample line}
-@print{} here is a sample line
-@kbd{Ctrl-d}
-@end example
-
-@command{gawk} does not have this problem, and it isn't likely to
-occur often in practice, but it's worth noting for future reference.
-@end sidebar
-@c ENDOFRANGE dregexp
-@c ENDOFRANGE regexpd
-
@node Regexp Summary
@section Summary
@@ -7573,32 +7582,48 @@ finished processing the current record, but want to do some special
processing on the next record @emph{right now}. For example:
@example
+# Remove text between /* and */, inclusive
@{
- if ((t = index($0, "/*")) != 0) @{
- # value of `tmp' will be "" if t is 1
- tmp = substr($0, 1, t - 1)
- u = index(substr($0, t + 2), "*/")
- offset = t + 2
- while (u == 0) @{
- if (getline <= 0) @{
+ if ((i = index($0, "/*")) != 0) @{
+ out = substr($0, 1, i - 1) # leading part of the string
+ rest = substr($0, i + 2) # ... */ ...
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j > 0) @{
+ rest = substr(rest, j + 2) # remove comment
+ @} else @{
+ while (j == 0) @{
+ # get more text
+ if (getline <= 0) @{
m = "unexpected EOF or error"
m = (m ": " ERRNO)
print m > "/dev/stderr"
exit
- @}
- u = index($0, "*/")
- offset = 0
- @}
- # substr() expression will be "" if */
- # occurred at end of line
- $0 = tmp substr($0, offset + u + 2)
- @}
- print $0
+ @}
+ # build up the line using string concatenation
+ rest = rest $0
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j != 0) @{
+ rest = substr(rest, j + 2)
+ break
+ @}
+ @}
+ @}
+ # build up the output line using string concatenation
+ $0 = out rest
+ @}
+ print $0
@}
@end example
This @command{awk} program deletes C-style comments (@samp{/* @dots{}
-*/}) from the input. By replacing the @samp{print $0} with other
+*/}) from the input.
+It uses a number of features we haven't covered yet, including
+string concatenation
+(@pxref{Concatenation})
+and the @code{index()} and @code{substr()} built-in
+functions
+(@pxref{String Functions}).
+By replacing the @samp{print $0} with other
statements, you could perform more complicated processing on the
decommented input, such as searching for matches of a regular
expression. (This program has a subtle problem---it does not work if one
@@ -8289,7 +8314,7 @@ including abstentions, for each item.
comments (@samp{/* @dots{} */}) from the input. That program
does not work if one comment ends on one line and another one
starts later on the same line.
-Write a program that does handle multiple comments on the line.
+That can be fixed by making one simple change. What is it?
@end enumerate
@c EXCLUDE END
@@ -9990,7 +10015,8 @@ A regexp constant is a regular expression description enclosed in
slashes, such as @code{@w{/^beginning and end$/}}. Most regexps used in
@command{awk} programs are constant, but the @samp{~} and @samp{!~}
matching operators can also match computed or dynamic regexps
-(which are just ordinary strings or variables that contain a regexp).
+(which are typically just ordinary strings or variables that contain a regexp,
+but could be a more complex expression).
@c ENDOFRANGE cnst
@node Using Constant Regexps
@@ -11642,7 +11668,7 @@ program is one way to print lines in between special bracketing lines:
@example
$1 == "START" @{ interested = ! interested; next @}
-interested == 1 @{ print @}
+interested @{ print @}
$1 == "END" @{ interested = ! interested; next @}
@end example
@@ -11662,6 +11688,16 @@ bogus input data, but the point is to illustrate the use of `!',
so we'll leave well enough alone.
@end ignore
+Most commonly, the @samp{!} operator is used in the conditions of
+@code{if} and @code{while} statements, where it often makes more
+sense to phrase the logic in the negative:
+
+@example
+if (! @var{some condition} || @var{some other condition}) @{
+ @var{@dots{} do whatever processing @dots{}}
+@}
+@end example
+
@cindex @code{next} statement
@quotation NOTE
The @code{next} statement is discussed in
@@ -13454,7 +13490,8 @@ starts over with the first rule in the program.
If the @code{nextfile} statement causes the end of the input to be reached,
then the code in any @code{END} rules is executed. An exception to this is
when @code{nextfile} is invoked during execution of any statement in an
-@code{END} rule; In this case, it causes the program to stop immediately. @xref{BEGIN/END}.
+@code{END} rule; in this case, it causes the program to stop immediately.
+@xref{BEGIN/END}.
The @code{nextfile} statement is useful when there are many @value{DF}s
to process but it isn't necessary to process every record in every file.
@@ -13464,13 +13501,10 @@ would have to continue scanning the unwanted records. The @code{nextfile}
statement accomplishes this much more efficiently.
In @command{gawk}, execution of @code{nextfile} causes additional things
-to happen:
-any @code{ENDFILE} rules are executed except in the case as
-mentioned below,
-@code{ARGIND} is incremented,
-and
-any @code{BEGINFILE} rules are executed.
-(@code{ARGIND} hasn't been introduced yet. @xref{Built-in Variables}.)
+to happen: any @code{ENDFILE} rules are executed if @command{gawk} is
+not currently in an @code{END} or @code{BEGINFILE} rule, @code{ARGIND} is
+incremented, and any @code{BEGINFILE} rules are executed. (@code{ARGIND}
+hasn't been introduced yet. @xref{Built-in Variables}.)
With @command{gawk}, @code{nextfile} is useful inside a @code{BEGINFILE}
rule to skip over a file that would otherwise cause @command{gawk}
@@ -15438,7 +15472,7 @@ $ @kbd{echo 'line 1}
> @kbd{line 2}
> @kbd{line 3' | awk '@{ l[lines] = $0; ++lines @}}
> @kbd{END @{}
-> @kbd{for (i = lines-1; i >= 0; --i)}
+> @kbd{for (i = lines - 1; i >= 0; i--)}
> @kbd{print l[i]}
> @kbd{@}'}
@print{} line 3
@@ -15462,7 +15496,7 @@ The following version of the program works correctly:
@example
@{ l[lines++] = $0 @}
END @{
- for (i = lines - 1; i >= 0; --i)
+ for (i = lines - 1; i >= 0; i--)
print l[i]
@}
@end example
@@ -19563,8 +19597,9 @@ function mystrtonum(str, ret, n, i, k, c)
ret = 0
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
@}
@@ -19576,6 +19611,8 @@ function mystrtonum(str, ret, n, i, k, c)
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
c = tolower(c)
+ # index() returns 0 if c not in string,
+ # includes c == "0"
k = index("123456789abcdef", c)
ret = ret * 16 + k
@@ -20178,7 +20215,12 @@ function readfile(file, tmp, contents)
This function reads from @code{file} one record at a time, building
up the full contents of the file in the local variable @code{contents}.
-It works, but is not necessarily efficient.
+It works, but is not necessarily
+@c 8/2014. Thanks to BWK for pointing this out:
+efficient.@footnote{Execution time grows quadratically in the size of
+the input; for each record, @command{awk} has to allocate a bigger
+internal buffer for @code{contents}, copy the old contents into it,
+and then append the contents of the new record.}
The following function, based on a suggestion by Denis Shirokov,
reads the entire contents of the named file in one shot:
@@ -20822,8 +20864,7 @@ it is not an option, and it ends option processing. Continuing on:
i = index(options, thisopt)
if (i == 0) @{
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) @{
Optind++
_opti = 0