diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2014-08-29 13:11:45 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2014-08-29 13:11:45 +0300 |
commit | 6c541fd0f75cd328dd80afec757ecccc833719af (patch) | |
tree | 163707da9efd9c8d0c7a7e9d0ef3887222c88bf3 | |
parent | ff28c07f95ff2400eb0ad1becc0eae1eab9dc93d (diff) | |
download | egawk-6c541fd0f75cd328dd80afec757ecccc833719af.tar.gz egawk-6c541fd0f75cd328dd80afec757ecccc833719af.tar.bz2 egawk-6c541fd0f75cd328dd80afec757ecccc833719af.zip |
More doc updates.
-rw-r--r-- | awklib/eg/lib/getopt.awk | 3 | ||||
-rw-r--r-- | awklib/eg/lib/strtonum.awk | 7 | ||||
-rw-r--r-- | doc/ChangeLog | 5 | ||||
-rw-r--r-- | doc/gawk.info | 1507 | ||||
-rw-r--r-- | doc/gawk.texi | 549 | ||||
-rw-r--r-- | doc/gawktexi.in | 461 |
6 files changed, 1326 insertions, 1206 deletions
diff --git a/awklib/eg/lib/getopt.awk b/awklib/eg/lib/getopt.awk index db957ceb..6b1f4c50 100644 --- a/awklib/eg/lib/getopt.awk +++ b/awklib/eg/lib/getopt.awk @@ -38,8 +38,7 @@ function getopt(argc, argv, options, thisopt, i) i = index(options, thisopt) if (i == 0) { if (Opterr) - printf("%c -- invalid option\n", - thisopt) > "/dev/stderr" + printf("%c -- invalid option\n", thisopt) > "/dev/stderr" if (_opti >= length(argv[Optind])) { Optind++ _opti = 0 diff --git a/awklib/eg/lib/strtonum.awk b/awklib/eg/lib/strtonum.awk index 9342e789..5e20626b 100644 --- a/awklib/eg/lib/strtonum.awk +++ b/awklib/eg/lib/strtonum.awk @@ -13,8 +13,9 @@ function mystrtonum(str, ret, n, i, k, c) ret = 0 for (i = 1; i <= n; i++) { c = substr(str, i, 1) - if ((k = index("01234567", c)) > 0) - k-- # adjust for 1-basing in awk + # index() returns 0 if c not in string, + # includes c == "0" + k = index("1234567", c) ret = ret * 8 + k } @@ -26,6 +27,8 @@ function mystrtonum(str, ret, n, i, k, c) for (i = 1; i <= n; i++) { c = substr(str, i, 1) c = tolower(c) + # index() returns 0 if c not in string, + # includes c == "0" k = index("123456789abcdef", c) ret = ret * 16 + k diff --git a/doc/ChangeLog b/doc/ChangeLog index 980eb023..79c69a30 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,8 @@ +2014-08-29 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Continuing on reviewer comments, and other + bug fixes, miscellanious improvements. + 2014-08-26 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Use a different mechanism to exclude diff --git a/doc/gawk.info b/doc/gawk.info index 7e6e1b89..2841d21e 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -170,10 +170,10 @@ entitled "GNU Free Documentation License". * Escape Sequences:: How to write nonprinting characters. * Regexp Operators:: Regular Expression Operators. * Bracket Expressions:: What can go between `[...]'. -* GNU Regexp Operators:: Operators specific to GNU software. -* Case-sensitivity:: How to do case-insensitive matching. * Leftmost Longest:: How much text matches. * Computed Regexps:: Using Dynamic Regexps. +* GNU Regexp Operators:: Operators specific to GNU software. +* Case-sensitivity:: How to do case-insensitive matching. * Regexp Summary:: Regular expressions summary. * Records:: Controlling how data is split into records. @@ -1455,15 +1455,17 @@ end-of-file character may be different. For example, on OS/2, it is As an example, the following program prints a friendly piece of advice (from Douglas Adams's `The Hitchhiker's Guide to the Galaxy'), to keep you from worrying about the complexities of computer -programming (`BEGIN' is a feature we haven't discussed yet): +programming: - $ awk "BEGIN { print \"Don't Panic!\" }" + $ awk "BEGIN { print "Don\47t Panic!" }" -| Don't Panic! - This program does not read any input. The `\' before each of the -inner double quotes is necessary because of the shell's quoting -rules--in particular because it mixes both single quotes and double -quotes.(1) + `awk' executes statements associated with `BEGIN' before reading any +input. If there are no other statements in your program, as is the +case here, `awk' just stops, instead of trying to read input it doesn't +know how to process. The `\47' is a magic way of getting a single +quote into the program, without having to engage in ugly shell quoting +tricks. NOTE: As a side note, if you use Bash as your shell, you should execute the command `set +H' before running this program @@ -1486,12 +1488,6 @@ works is explained shortly). -| What, me worry? Ctrl-d - ---------- Footnotes ---------- - - (1) Although we generally recommend the use of single quotes around -the program text, double quotes are needed here in order to put the -single quote into the message. - File: gawk.info, Node: Long, Next: Executable Scripts, Prev: Read Terminal, Up: Running gawk @@ -1937,6 +1933,9 @@ different ways to do the same things shown here: awk '{ if (length($0) > max) max = length($0) } END { print max }' data + The code associated with `END' executes after all input has been + read; it's the other side of the coin to `BEGIN'. + * Print the length of the longest line in `data': expand data | awk '{ if (x < length($0)) x = length($0) } @@ -2731,6 +2730,10 @@ arguments, including variable assignments, are included. As each element of `ARGV' is processed, `gawk' sets the variable `ARGIND' to the index in `ARGV' of the current element. + Changing `ARGC' and `ARGV' in your `awk' program lets you control +how `awk' processes the input files; this is described in more detail +in *note ARGC and ARGV::. + The distinction between file name arguments and variable-assignment arguments is made when `awk' is about to open the next input file. At that point in execution, it checks the file name to see whether it is @@ -3225,10 +3228,10 @@ you specify more complicated classes of strings. * Escape Sequences:: How to write nonprinting characters. * Regexp Operators:: Regular Expression Operators. * Bracket Expressions:: What can go between `[...]'. -* GNU Regexp Operators:: Operators specific to GNU software. -* Case-sensitivity:: How to do case-insensitive matching. * Leftmost Longest:: How much text matches. * Computed Regexps:: Using Dynamic Regexps. +* GNU Regexp Operators:: Operators specific to GNU software. +* Case-sensitivity:: How to do case-insensitive matching. * Regexp Summary:: Regular expressions summary. @@ -3368,17 +3371,19 @@ apply to both string constants and regexp constants: `\/' A literal slash (necessary for regexp constants only). This sequence is used when you want to write a regexp constant that - contains a slash. Because the regexp is delimited by slashes, you - need to escape the slash that is part of the pattern, in order to + contains a slash (such as `/.*:\/home\/[[:alnum:]]+:.*/'; the + `[[:alnum:]]' notation is discussed shortly, in *note Bracket + Expressions::). Because the regexp is delimited by slashes, you + need to escape any slash that is part of the pattern, in order to tell `awk' to keep processing the rest of the regexp. `\"' A literal double quote (necessary for string constants only). This sequence is used when you want to write a string constant - that contains a double quote. Because the string is delimited by - double quotes, you need to escape the quote that is part of the - string, in order to tell `awk' to keep processing the rest of the - string. + that contains a double quote (such as `"He said \"hi!\" to her."'). + Because the string is delimited by double quotes, you need to + escape any quote that is part of the string, in order to tell + `awk' to keep processing the rest of the string. In `gawk', a number of additional two-character sequences that begin with a backslash have special meaning in regexps. *Note GNU Regexp @@ -3616,7 +3621,7 @@ list". regexp operator or function. -File: gawk.info, Node: Bracket Expressions, Next: GNU Regexp Operators, Prev: Regexp Operators, Up: Regexp +File: gawk.info, Node: Bracket Expressions, Next: Leftmost Longest, Prev: Regexp Operators, Up: Regexp 3.4 Using Bracket Expressions ============================= @@ -3721,9 +3726,118 @@ Equivalence classes classes. -File: gawk.info, Node: GNU Regexp Operators, Next: Case-sensitivity, Prev: Bracket Expressions, Up: Regexp +File: gawk.info, Node: Leftmost Longest, Next: Computed Regexps, Prev: Bracket Expressions, Up: Regexp + +3.5 How Much Text Matches? +========================== + +Consider the following: + + echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }' + + This example uses the `sub()' function (which we haven't discussed +yet; *note String Functions::) to make a change to the input record. +Here, the regexp `/a+/' indicates "one or more `a' characters," and the +replacement text is `<A>'. + + The input contains four `a' characters. `awk' (and POSIX) regular +expressions always match the leftmost, _longest_ sequence of input +characters that can match. Thus, all four `a' characters are replaced +with `<A>' in this example: + + $ echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }' + -| <A>bcd + + For simple match/no-match tests, this is not so important. But when +doing text matching and substitutions with the `match()', `sub()', +`gsub()', and `gensub()' functions, it is very important. *Note String +Functions::, for more information on these functions. Understanding +this principle is also important for regexp-based record and field +splitting (*note Records::, and also *note Field Separators::). + + +File: gawk.info, Node: Computed Regexps, Next: GNU Regexp Operators, Prev: Leftmost Longest, Up: Regexp + +3.6 Using Dynamic Regexps +========================= + +The righthand side of a `~' or `!~' operator need not be a regexp +constant (i.e., a string of characters between slashes). It may be any +expression. The expression is evaluated and converted to a string if +necessary; the contents of the string are then used as the regexp. A +regexp computed in this way is called a "dynamic regexp" or a "computed +regexp": + + BEGIN { digits_regexp = "[[:digit:]]+" } + $0 ~ digits_regexp { print } + +This sets `digits_regexp' to a regexp that describes one or more digits, +and tests whether the input record matches this regexp. + + NOTE: When using the `~' and `!~' operators, there is a difference + between a regexp constant enclosed in slashes and a string + constant enclosed in double quotes. If you are going to use a + string constant, you have to understand that the string is, in + essence, scanned _twice_: the first time when `awk' reads your + program, and the second time when it goes to match the string on + the lefthand side of the operator with the pattern on the right. + This is true of any string-valued expression (such as + `digits_regexp', shown previously), not just string constants. + + What difference does it make if the string is scanned twice? The +answer has to do with escape sequences, and particularly with +backslashes. To get a backslash into a regular expression inside a +string, you have to type two backslashes. + + For example, `/\*/' is a regexp constant for a literal `*'. Only +one backslash is needed. To do the same thing with a string, you have +to type `"\\*"'. The first backslash escapes the second one so that +the string actually contains the two characters `\' and `*'. + + Given that you can use both regexp and string constants to describe +regular expressions, which should you use? The answer is "regexp +constants," for several reasons: + + * String constants are more complicated to write and more difficult + to read. Using regexp constants makes your programs less + error-prone. Not understanding the difference between the two + kinds of constants is a common source of errors. + + * It is more efficient to use regexp constants. `awk' can note that + you have supplied a regexp and store it internally in a form that + makes pattern matching more efficient. When using a string + constant, `awk' must first convert the string into this internal + form and then perform the pattern matching. + + * Using regexp constants is better form; it shows clearly that you + intend a regexp match. + + Using `\n' in Bracket Expressions of Dynamic Regexps + + Some versions of `awk' do not allow the newline character to be used +inside a bracket expression for a dynamic regexp: + + $ awk '$0 ~ "[ \t\n]"' + error--> awk: newline in character class [ + error--> ]... + error--> source line number 1 + error--> context is + error--> >>> <<< + + But a newline in a regexp constant works with no problem: + + $ awk '$0 ~ /[ \t\n]/' + here is a sample line + -| here is a sample line + Ctrl-d + + `gawk' does not have this problem, and it isn't likely to occur +often in practice, but it's worth noting for future reference. + + +File: gawk.info, Node: GNU Regexp Operators, Next: Case-sensitivity, Prev: Computed Regexps, Up: Regexp -3.5 `gawk'-Specific Regexp Operators +3.7 `gawk'-Specific Regexp Operators ==================================== GNU software that deals with regular expressions provides a number of @@ -3817,9 +3931,9 @@ No options default. -File: gawk.info, Node: Case-sensitivity, Next: Leftmost Longest, Prev: GNU Regexp Operators, Up: Regexp +File: gawk.info, Node: Case-sensitivity, Next: Regexp Summary, Prev: GNU Regexp Operators, Up: Regexp -3.6 Case Sensitivity in Matching +3.8 Case Sensitivity in Matching ================================ Case is normally significant in regular expressions, both when matching @@ -3892,116 +4006,7 @@ obscure and we don't recommend it. means that `gawk' does the right thing. -File: gawk.info, Node: Leftmost Longest, Next: Computed Regexps, Prev: Case-sensitivity, Up: Regexp - -3.7 How Much Text Matches? -========================== - -Consider the following: - - echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }' - - This example uses the `sub()' function (which we haven't discussed -yet; *note String Functions::) to make a change to the input record. -Here, the regexp `/a+/' indicates "one or more `a' characters," and the -replacement text is `<A>'. - - The input contains four `a' characters. `awk' (and POSIX) regular -expressions always match the leftmost, _longest_ sequence of input -characters that can match. Thus, all four `a' characters are replaced -with `<A>' in this example: - - $ echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }' - -| <A>bcd - - For simple match/no-match tests, this is not so important. But when -doing text matching and substitutions with the `match()', `sub()', -`gsub()', and `gensub()' functions, it is very important. *Note String -Functions::, for more information on these functions. Understanding -this principle is also important for regexp-based record and field -splitting (*note Records::, and also *note Field Separators::). - - -File: gawk.info, Node: Computed Regexps, Next: Regexp Summary, Prev: Leftmost Longest, Up: Regexp - -3.8 Using Dynamic Regexps -========================= - -The righthand side of a `~' or `!~' operator need not be a regexp -constant (i.e., a string of characters between slashes). It may be any -expression. The expression is evaluated and converted to a string if -necessary; the contents of the string are then used as the regexp. A -regexp computed in this way is called a "dynamic regexp" or a "computed -regexp": - - BEGIN { digits_regexp = "[[:digit:]]+" } - $0 ~ digits_regexp { print } - -This sets `digits_regexp' to a regexp that describes one or more digits, -and tests whether the input record matches this regexp. - - NOTE: When using the `~' and `!~' operators, there is a difference - between a regexp constant enclosed in slashes and a string - constant enclosed in double quotes. If you are going to use a - string constant, you have to understand that the string is, in - essence, scanned _twice_: the first time when `awk' reads your - program, and the second time when it goes to match the string on - the lefthand side of the operator with the pattern on the right. - This is true of any string-valued expression (such as - `digits_regexp', shown previously), not just string constants. - - What difference does it make if the string is scanned twice? The -answer has to do with escape sequences, and particularly with -backslashes. To get a backslash into a regular expression inside a -string, you have to type two backslashes. - - For example, `/\*/' is a regexp constant for a literal `*'. Only -one backslash is needed. To do the same thing with a string, you have -to type `"\\*"'. The first backslash escapes the second one so that -the string actually contains the two characters `\' and `*'. - - Given that you can use both regexp and string constants to describe -regular expressions, which should you use? The answer is "regexp -constants," for several reasons: - - * String constants are more complicated to write and more difficult - to read. Using regexp constants makes your programs less - error-prone. Not understanding the difference between the two - kinds of constants is a common source of errors. - - * It is more efficient to use regexp constants. `awk' can note that - you have supplied a regexp and store it internally in a form that - makes pattern matching more efficient. When using a string - constant, `awk' must first convert the string into this internal - form and then perform the pattern matching. - - * Using regexp constants is better form; it shows clearly that you - intend a regexp match. - - Using `\n' in Bracket Expressions of Dynamic Regexps - - Some versions of `awk' do not allow the newline character to be used -inside a bracket expression for a dynamic regexp: - - $ awk '$0 ~ "[ \t\n]"' - error--> awk: newline in character class [ - error--> ]... - error--> source line number 1 - error--> context is - error--> >>> <<< - - But a newline in a regexp constant works with no problem: - - $ awk '$0 ~ /[ \t\n]/' - here is a sample line - -| here is a sample line - Ctrl-d - - `gawk' does not have this problem, and it isn't likely to occur -often in practice, but it's worth noting for future reference. - - -File: gawk.info, Node: Regexp Summary, Prev: Computed Regexps, Up: Regexp +File: gawk.info, Node: Regexp Summary, Prev: Case-sensitivity, Up: Regexp 3.9 Summary =========== @@ -5388,35 +5393,47 @@ input record and split it up into fields. This is useful if you've finished processing the current record, but want to do some special processing on the next record _right now_. For example: + # Remove text between /* and */, inclusive { - if ((t = index($0, "/*")) != 0) { - # value of `tmp' will be "" if t is 1 - tmp = substr($0, 1, t - 1) - u = index(substr($0, t + 2), "*/") - offset = t + 2 - while (u == 0) { - if (getline <= 0) { + if ((i = index($0, "/*")) != 0) { + out = substr($0, 1, i - 1) # leading part of the string + rest = substr($0, i + 2) # ... */ ... + j = index(rest, "*/") # is */ in trailing part? + if (j > 0) { + rest = substr(rest, j + 2) # remove comment + } else { + while (j == 0) { + # get more text + if (getline <= 0) { m = "unexpected EOF or error" m = (m ": " ERRNO) print m > "/dev/stderr" exit - } - u = index($0, "*/") - offset = 0 - } - # substr() expression will be "" if */ - # occurred at end of line - $0 = tmp substr($0, offset + u + 2) - } - print $0 + } + # build up the line using string concatenation + rest = rest $0 + j = index(rest, "*/") # is */ in trailing part? + if (j != 0) { + rest = substr(rest, j + 2) + break + } + } + } + # build up the output line using string concatenation + $0 = out rest + } + print $0 } This `awk' program deletes C-style comments (`/* ... */') from the -input. By replacing the `print $0' with other statements, you could -perform more complicated processing on the decommented input, such as -searching for matches of a regular expression. (This program has a -subtle problem--it does not work if one comment ends and another begins -on the same line.) +input. It uses a number of features we haven't covered yet, including +string concatenation (*note Concatenation::) and the `index()' and +`substr()' built-in functions (*note String Functions::). By replacing +the `print $0' with other statements, you could perform more +complicated processing on the decommented input, such as searching for +matches of a regular expression. (This program has a subtle +problem--it does not work if one comment ends and another begins on the +same line.) This form of the `getline' command sets `NF', `NR', `FNR', `RT', and the value of `$0'. @@ -5980,8 +5997,8 @@ File: gawk.info, Node: Input Exercises, Prev: Input Summary, Up: Reading File 2. *note Plain Getline::, presented a program to remove C-style comments (`/* ... */') from the input. That program does not work if one comment ends on one line and another one starts later on - the same line. Write a program that does handle multiple comments - on the line. + the same line. That can be fixed by making one simple change. + What is it? @@ -7312,8 +7329,9 @@ File: gawk.info, Node: Regexp Constants, Prev: Nondecimal-numbers, Up: Consta A regexp constant is a regular expression description enclosed in slashes, such as `/^beginning and end$/'. Most regexps used in `awk' programs are constant, but the `~' and `!~' matching operators can also -match computed or dynamic regexps (which are just ordinary strings or -variables that contain a regexp). +match computed or dynamic regexps (which are typically just ordinary +strings or variables that contain a regexp, but could be a more complex +expression). File: gawk.info, Node: Using Constant Regexps, Next: Variables, Prev: Constants, Up: Values @@ -8463,7 +8481,7 @@ following program is one way to print lines in between special bracketing lines: $1 == "START" { interested = ! interested; next } - interested == 1 { print } + interested { print } $1 == "END" { interested = ! interested; next } The variable `interested', as with all `awk' variables, starts out @@ -8473,6 +8491,14 @@ using `!'. The next rule prints lines as long as `interested' is true. When a line is seen whose first field is `END', `interested' is toggled back to false.(1) + Most commonly, the `!' operator is used in the conditions of `if' +and `while' statements, where it often makes more sense to phrase the +logic in the negative: + + if (! SOME CONDITION || SOME OTHER CONDITION) { + ... DO WHATEVER PROCESSING ... + } + NOTE: The `next' statement is discussed in *note Next Statement::. `next' tells `awk' to skip the rest of the rules, get the next record, and start processing the rules over again at the top. The @@ -9841,7 +9867,7 @@ reset to one, and processing starts over with the first rule in the program. If the `nextfile' statement causes the end of the input to be reached, then the code in any `END' rules is executed. An exception to this is when `nextfile' is invoked during execution of any statement in -an `END' rule; In this case, it causes the program to stop immediately. +an `END' rule; in this case, it causes the program to stop immediately. *Note BEGIN/END::. The `nextfile' statement is useful when there are many data files to @@ -9851,10 +9877,10 @@ would have to continue scanning the unwanted records. The `nextfile' statement accomplishes this much more efficiently. In `gawk', execution of `nextfile' causes additional things to -happen: any `ENDFILE' rules are executed except in the case as -mentioned below, `ARGIND' is incremented, and any `BEGINFILE' rules are -executed. (`ARGIND' hasn't been introduced yet. *Note Built-in -Variables::.) +happen: any `ENDFILE' rules are executed if `gawk' is not currently in +an `END' or `BEGINFILE' rule, `ARGIND' is incremented, and any +`BEGINFILE' rules are executed. (`ARGIND' hasn't been introduced yet. +*Note Built-in Variables::.) With `gawk', `nextfile' is useful inside a `BEGINFILE' rule to skip over a file that would otherwise cause `gawk' to exit with a fatal @@ -11280,7 +11306,7 @@ might look like this: > line 2 > line 3' | awk '{ l[lines] = $0; ++lines } > END { - > for (i = lines-1; i >= 0; --i) + > for (i = lines - 1; i >= 0; i--) > print l[i] > }' -| line 3 @@ -11301,7 +11327,7 @@ following version of the program works correctly: { l[lines++] = $0 } END { - for (i = lines - 1; i >= 0; --i) + for (i = lines - 1; i >= 0; i--) print l[i] } @@ -14378,8 +14404,9 @@ versions of `awk': ret = 0 for (i = 1; i <= n; i++) { c = substr(str, i, 1) - if ((k = index("01234567", c)) > 0) - k-- # adjust for 1-basing in awk + # index() returns 0 if c not in string, + # includes c == "0" + k = index("1234567", c) ret = ret * 8 + k } @@ -14391,6 +14418,8 @@ versions of `awk': for (i = 1; i <= n; i++) { c = substr(str, i, 1) c = tolower(c) + # index() returns 0 if c not in string, + # includes c == "0" k = index("123456789abcdef", c) ret = ret * 16 + k @@ -14851,7 +14880,7 @@ that might be as follows: This function reads from `file' one record at a time, building up the full contents of the file in the local variable `contents'. It -works, but is not necessarily efficient. +works, but is not necessarily efficient.(1) The following function, based on a suggestion by Denis Shirokov, reads the entire contents of the named file in one shot: @@ -14886,6 +14915,13 @@ string. Thus calling code may use something like: This tests the result to see if it is empty or not. An equivalent test would be `contents == ""'. + ---------- Footnotes ---------- + + (1) Execution time grows quadratically in the size of the input; for +each record, `awk' has to allocate a bigger internal buffer for +`contents', copy the old contents into it, and then append the contents +of the new record. + File: gawk.info, Node: Data File Management, Next: Getopt Function, Prev: General Functions, Up: Library Functions @@ -15339,8 +15375,7 @@ not an option, and it ends option processing. Continuing on: i = index(options, thisopt) if (i == 0) { if (Opterr) - printf("%c -- invalid option\n", - thisopt) > "/dev/stderr" + printf("%c -- invalid option\n", thisopt) > "/dev/stderr" if (_opti >= length(argv[Optind])) { Optind++ _opti = 0 @@ -30938,10 +30973,9 @@ Index * ! (exclamation point), !~ operator <3>: Comparison Operators. (line 11) * ! (exclamation point), !~ operator <4>: Regexp Constants. (line 6) -* ! (exclamation point), !~ operator <5>: Computed Regexps. (line 6) -* ! (exclamation point), !~ operator <6>: Case-sensitivity. (line 26) +* ! (exclamation point), !~ operator <5>: Case-sensitivity. (line 26) +* ! (exclamation point), !~ operator <6>: Computed Regexps. (line 6) * ! (exclamation point), !~ operator: Regexp Usage. (line 19) -* " (double quote) in shell commands: Read Terminal. (line 25) * " (double quote), in regexp constants: Computed Regexps. (line 29) * " (double quote), in shell commands: Quoting. (line 54) * # (number sign), #! (executable scripts): Executable Scripts. @@ -31129,8 +31163,7 @@ Index * ? (question mark), regexp operator: Regexp Operators. (line 111) * [] (square brackets), regexp operator: Regexp Operators. (line 56) * \ (backslash): Comments. (line 50) -* \ (backslash) in shell commands: Read Terminal. (line 25) -* \ (backslash), \" escape sequence: Escape Sequences. (line 80) +* \ (backslash), \" escape sequence: Escape Sequences. (line 82) * \ (backslash), \' operator (gawk): GNU Regexp Operators. (line 56) * \ (backslash), \/ escape sequence: Escape Sequences. (line 73) @@ -31173,7 +31206,7 @@ Index * \ (backslash), in bracket expressions: Bracket Expressions. (line 17) * \ (backslash), in escape sequences: Escape Sequences. (line 6) * \ (backslash), in escape sequences, POSIX and: Escape Sequences. - (line 116) + (line 118) * \ (backslash), in regexp constants: Computed Regexps. (line 29) * \ (backslash), in shell commands: Quoting. (line 48) * \ (backslash), regexp operator: Regexp Operators. (line 18) @@ -31400,8 +31433,7 @@ Index * awkvars.out file: Options. (line 93) * b debugger command (alias for break): Breakpoint Control. (line 11) * backslash (\): Comments. (line 50) -* backslash (\) in shell commands: Read Terminal. (line 25) -* backslash (\), \" escape sequence: Escape Sequences. (line 80) +* backslash (\), \" escape sequence: Escape Sequences. (line 82) * backslash (\), \' operator (gawk): GNU Regexp Operators. (line 56) * backslash (\), \/ escape sequence: Escape Sequences. (line 73) @@ -31444,7 +31476,7 @@ Index * backslash (\), in bracket expressions: Bracket Expressions. (line 17) * backslash (\), in escape sequences: Escape Sequences. (line 6) * backslash (\), in escape sequences, POSIX and: Escape Sequences. - (line 116) + (line 118) * backslash (\), in regexp constants: Computed Regexps. (line 29) * backslash (\), in shell commands: Quoting. (line 48) * backslash (\), regexp operator: Regexp Operators. (line 18) @@ -31549,7 +31581,7 @@ Index (line 67) * Brian Kernighan's awk <12>: GNU Regexp Operators. (line 83) -* Brian Kernighan's awk <13>: Escape Sequences. (line 120) +* Brian Kernighan's awk <13>: Escape Sequences. (line 122) * Brian Kernighan's awk: When. (line 21) * Brian Kernighan's awk, extensions: BTL. (line 6) * Brian Kernighan's awk, source code: Other Versions. (line 13) @@ -31577,6 +31609,7 @@ Index * built-in variables, conveying information: Auto-set. (line 6) * built-in variables, user-modifiable: User-modified. (line 6) * Busybox Awk: Other Versions. (line 88) +* c.e., See common extensions: Conventions. (line 51) * call by reference: Pass By Value/Reference. (line 47) * call by value: Pass By Value/Reference. @@ -31775,9 +31808,9 @@ Index * dark corner, command-line arguments: Assignment Options. (line 43) * dark corner, continue statement: Continue Statement. (line 44) * dark corner, CONVFMT variable: Strings And Numbers. (line 40) -* dark corner, escape sequences: Other Arguments. (line 31) +* dark corner, escape sequences: Other Arguments. (line 35) * dark corner, escape sequences, for metacharacters: Escape Sequences. - (line 138) + (line 140) * dark corner, exit statement: Exit Statement. (line 30) * dark corner, field separators: Field Splitting Summary. (line 46) @@ -32042,7 +32075,6 @@ Index * dollar sign ($), incrementing fields and arrays: Increment Ops. (line 30) * dollar sign ($), regexp operator: Regexp Operators. (line 35) -* double quote (") in shell commands: Read Terminal. (line 25) * double quote ("), in regexp constants: Computed Regexps. (line 29) * double quote ("), in shell commands: Quoting. (line 54) * down debugger command: Execution Stack. (line 21) @@ -32142,8 +32174,8 @@ Index * exclamation point (!), !~ operator <3>: Comparison Operators. (line 11) * exclamation point (!), !~ operator <4>: Regexp Constants. (line 6) -* exclamation point (!), !~ operator <5>: Computed Regexps. (line 6) -* exclamation point (!), !~ operator <6>: Case-sensitivity. (line 26) +* exclamation point (!), !~ operator <5>: Case-sensitivity. (line 26) +* exclamation point (!), !~ operator <6>: Computed Regexps. (line 6) * exclamation point (!), !~ operator: Regexp Usage. (line 19) * exit statement: Exit Statement. (line 6) * exit status, of gawk: Exit Status. (line 6) @@ -32151,7 +32183,7 @@ Index * exit the debugger: Miscellaneous Debugger Commands. (line 99) * exp: Numeric Functions. (line 18) -* expand utility: Very Simple. (line 69) +* expand utility: Very Simple. (line 72) * Expat XML parser library: gawkextlib. (line 35) * exponent: Numeric Functions. (line 18) * expressions: Expressions. (line 6) @@ -32285,7 +32317,7 @@ Index (line 47) * files, message object, specifying directory of: Explaining gettext. (line 54) -* files, multiple passes over: Other Arguments. (line 49) +* files, multiple passes over: Other Arguments. (line 53) * files, multiple, duplicating output into: Tee Program. (line 6) * files, output, See output files: Close Files And Pipes. (line 6) @@ -32446,7 +32478,7 @@ Index * gawk, ERRNO variable in <4>: Close Files And Pipes. (line 139) * gawk, ERRNO variable in: Getline. (line 19) -* gawk, escape sequences: Escape Sequences. (line 128) +* gawk, escape sequences: Escape Sequences. (line 130) * gawk, extensions, disabling: Options. (line 254) * gawk, features, adding: Adding Code. (line 6) * gawk, features, advanced: Advanced Features. (line 6) @@ -32670,7 +32702,7 @@ Index * input files, examples: Sample Data Files. (line 6) * input files, reading: Reading Files. (line 6) * input files, running awk without: Read Terminal. (line 6) -* input files, variable assignments and: Other Arguments. (line 19) +* input files, variable assignments and: Other Arguments. (line 23) * input pipeline: Getline/Pipe. (line 9) * input record, length of: String Functions. (line 174) * input redirection: Getline/File. (line 6) @@ -32879,7 +32911,7 @@ Index * mawk utility <2>: Nextfile Statement. (line 47) * mawk utility <3>: Concatenation. (line 36) * mawk utility <4>: Getline/Pipe. (line 62) -* mawk utility: Escape Sequences. (line 128) +* mawk utility: Escape Sequences. (line 130) * maximum precision supported by MPFR library: Auto-set. (line 213) * McIlroy, Doug: Glossary. (line 149) * McPhee, Patrick: Contributors. (line 100) @@ -32892,7 +32924,7 @@ Index (line 54) * messages from extensions: Printing Messages. (line 6) * metacharacters in regular expressions: Regexp Operators. (line 6) -* metacharacters, escape sequences for: Escape Sequences. (line 134) +* metacharacters, escape sequences for: Escape Sequences. (line 136) * minimum precision supported by MPFR library: Auto-set. (line 216) * mktime: Time Functions. (line 25) * modifiers, in format specifiers: Format Modifiers. (line 6) @@ -32930,7 +32962,7 @@ Index (line 43) * next file statement: Feature History. (line 169) * next statement <1>: Next Statement. (line 6) -* next statement: Boolean Ops. (line 85) +* next statement: Boolean Ops. (line 93) * next statement, BEGIN/END patterns and: I/O And BEGIN/END. (line 36) * next statement, BEGINFILE/ENDFILE patterns and: BEGINFILE/ENDFILE. (line 49) @@ -33111,14 +33143,14 @@ Index * plus sign (+), += operator: Assignment Ops. (line 82) * plus sign (+), regexp operator: Regexp Operators. (line 105) * pointers to functions: Indirect Calls. (line 6) -* portability: Escape Sequences. (line 98) +* portability: Escape Sequences. (line 100) * portability, #! (executable scripts): Executable Scripts. (line 33) * portability, ** operator and: Arithmetic Ops. (line 81) * portability, **= operator and: Assignment Ops. (line 143) * portability, ARGV variable: Executable Scripts. (line 59) * portability, backslash continuation and: Statements/Lines. (line 30) * portability, backslash in escape sequences: Escape Sequences. - (line 116) + (line 118) * portability, close() function and: Close Files And Pipes. (line 81) * portability, data files as single record: gawk split records. @@ -33157,7 +33189,7 @@ Index * POSIX awk, < operator and: Getline/File. (line 26) * POSIX awk, arithmetic operators and: Arithmetic Ops. (line 30) * POSIX awk, backslashes in string constants: Escape Sequences. - (line 116) + (line 118) * POSIX awk, BEGIN/END patterns: I/O And BEGIN/END. (line 16) * POSIX awk, bracket expressions and: Bracket Expressions. (line 26) * POSIX awk, bracket expressions and, character classes: Bracket Expressions. @@ -33501,7 +33533,6 @@ Index * set watchpoint: Viewing And Changing Data. (line 67) * shadowing of variable values: Definition Syntax. (line 70) -* shell quoting, double quote: Read Terminal. (line 25) * shell quoting, rules for: Quoting. (line 6) * shells, piping commands into: Redirection. (line 142) * shells, quoting: Using Shell Variables. @@ -33538,14 +33569,14 @@ Index * sidebar, A Constant's Base Does Not Affect Its Value: Nondecimal-numbers. (line 64) * sidebar, Backslash Before Regular Characters: Escape Sequences. - (line 114) + (line 116) * sidebar, Changing FS Does Not Affect the Fields: Field Splitting Summary. (line 38) * sidebar, Changing NR and FNR: Auto-set. (line 299) * sidebar, Controlling Output Buffering with system(): I/O Functions. (line 138) * sidebar, Escape Sequences for Metacharacters: Escape Sequences. - (line 132) + (line 134) * sidebar, FS and IGNORECASE: Field Splitting Summary. (line 64) * sidebar, Interactive Versus Noninteractive Buffering: I/O Functions. @@ -33745,8 +33776,8 @@ Index * tilde (~), ~ operator <3>: Comparison Operators. (line 11) * tilde (~), ~ operator <4>: Regexp Constants. (line 6) -* tilde (~), ~ operator <5>: Computed Regexps. (line 6) -* tilde (~), ~ operator <6>: Case-sensitivity. (line 26) +* tilde (~), ~ operator <5>: Case-sensitivity. (line 26) +* tilde (~), ~ operator <6>: Computed Regexps. (line 6) * tilde (~), ~ operator: Regexp Usage. (line 19) * time functions: Time Functions. (line 6) * time, alarm clock example program: Alarm Program. (line 11) @@ -33773,7 +33804,7 @@ Index (line 37) * troubleshooting, awk uses FS not IFS: Field Separators. (line 30) * troubleshooting, backslash before nonspecial character: Escape Sequences. - (line 116) + (line 118) * troubleshooting, division: Arithmetic Ops. (line 44) * troubleshooting, fatal errors, field widths, specifying: Constant Size. (line 23) @@ -33829,7 +33860,7 @@ Index * uniq.awk program: Uniq Program. (line 65) * Unix: Glossary. (line 611) * Unix awk, backslashes in escape sequences: Escape Sequences. - (line 128) + (line 130) * Unix awk, close() function and: Close Files And Pipes. (line 131) * Unix awk, password files, field separators and: Command Line Field Separator. @@ -33852,7 +33883,7 @@ Index * USR1 signal, for dynamic profiling: Profiling. (line 188) * values, numeric: Basic Data Typing. (line 13) * values, string: Basic Data Typing. (line 13) -* variable assignments and input files: Other Arguments. (line 19) +* variable assignments and input files: Other Arguments. (line 23) * variable typing: Typing and Comparison. (line 9) * variables <1>: Basic Data Typing. (line 6) @@ -33966,8 +33997,8 @@ Index * ~ (tilde), ~ operator <3>: Comparison Operators. (line 11) * ~ (tilde), ~ operator <4>: Regexp Constants. (line 6) -* ~ (tilde), ~ operator <5>: Computed Regexps. (line 6) -* ~ (tilde), ~ operator <6>: Case-sensitivity. (line 26) +* ~ (tilde), ~ operator <5>: Case-sensitivity. (line 26) +* ~ (tilde), ~ operator <6>: Computed Regexps. (line 6) * ~ (tilde), ~ operator: Regexp Usage. (line 19) @@ -33993,533 +34024,533 @@ Node: Getting Started70581 Node: Running gawk73015 Node: One-shot74205 Node: Read Terminal75430 -Ref: Read Terminal-Footnote-177393 -Node: Long77564 -Node: Executable Scripts78958 -Ref: Executable Scripts-Footnote-181759 -Node: Comments81861 -Node: Quoting84334 -Node: DOS Quoting89647 -Node: Sample Data Files90322 -Node: Very Simple92929 -Node: Two Rules97688 -Node: More Complex99582 -Ref: More Complex-Footnote-1102496 -Node: Statements/Lines102581 -Ref: Statements/Lines-Footnote-1107037 -Node: Other Features107302 -Node: When108230 -Ref: When-Footnote-1109986 -Node: Intro Summary110051 -Node: Invoking Gawk110934 -Node: Command Line112449 -Node: Options113240 -Ref: Options-Footnote-1129016 -Node: Other Arguments129041 -Node: Naming Standard Input131703 -Node: Environment Variables132796 -Node: AWKPATH Variable133354 -Ref: AWKPATH Variable-Footnote-1136220 -Ref: AWKPATH Variable-Footnote-2136265 -Node: AWKLIBPATH Variable136525 -Node: Other Environment Variables137284 -Node: Exit Status140941 -Node: Include Files141616 -Node: Loading Shared Libraries145194 -Node: Obsolete146578 -Node: Undocumented147275 -Node: Invoking Summary147542 -Node: Regexp149142 -Node: Regexp Usage150601 -Node: Escape Sequences152634 -Node: Regexp Operators158451 -Ref: Regexp Operators-Footnote-1165882 -Ref: Regexp Operators-Footnote-2166029 -Node: Bracket Expressions166127 -Ref: table-char-classes168149 -Node: GNU Regexp Operators171089 -Node: Case-sensitivity174798 -Ref: Case-sensitivity-Footnote-1177690 -Ref: Case-sensitivity-Footnote-2177925 -Node: Leftmost Longest178033 -Node: Computed Regexps179234 -Node: Regexp Summary182606 -Node: Reading Files184075 -Node: Records186167 -Node: awk split records186889 -Node: gawk split records191747 -Ref: gawk split records-Footnote-1196268 -Node: Fields196305 -Ref: Fields-Footnote-1199269 -Node: Nonconstant Fields199355 -Ref: Nonconstant Fields-Footnote-1201585 -Node: Changing Fields201787 -Node: Field Separators207741 -Node: Default Field Splitting210443 -Node: Regexp Field Splitting211560 -Node: Single Character Fields214887 -Node: Command Line Field Separator215946 -Node: Full Line Fields219372 -Ref: Full Line Fields-Footnote-1219880 -Node: Field Splitting Summary219926 -Ref: Field Splitting Summary-Footnote-1223058 -Node: Constant Size223159 -Node: Splitting By Content227765 -Ref: Splitting By Content-Footnote-1231838 -Node: Multiple Line231878 -Ref: Multiple Line-Footnote-1237734 -Node: Getline237913 -Node: Plain Getline240124 -Node: Getline/Variable242219 -Node: Getline/File243366 -Node: Getline/Variable/File244750 -Ref: Getline/Variable/File-Footnote-1246349 -Node: Getline/Pipe246436 -Node: Getline/Variable/Pipe249122 -Node: Getline/Coprocess250229 -Node: Getline/Variable/Coprocess251481 -Node: Getline Notes252218 -Node: Getline Summary255022 -Ref: table-getline-variants255430 -Node: Read Timeout256342 -Ref: Read Timeout-Footnote-1260169 -Node: Command-line directories260227 -Node: Input Summary261131 -Node: Input Exercises264268 -Node: Printing265001 -Node: Print266723 -Node: Print Examples268216 -Node: Output Separators270995 -Node: OFMT273011 -Node: Printf274369 -Node: Basic Printf275275 -Node: Control Letters276814 -Node: Format Modifiers280805 -Node: Printf Examples286832 -Node: Redirection289296 -Node: Special Files296268 -Node: Special FD296801 -Ref: Special FD-Footnote-1300398 -Node: Special Network300472 -Node: Special Caveats301322 -Node: Close Files And Pipes302118 -Ref: Close Files And Pipes-Footnote-1309279 -Ref: Close Files And Pipes-Footnote-2309427 -Node: Output Summary309577 -Node: Output Exercises310574 -Node: Expressions311254 -Node: Values312439 -Node: Constants313115 -Node: Scalar Constants313795 -Ref: Scalar Constants-Footnote-1314654 -Node: Nondecimal-numbers314904 -Node: Regexp Constants317904 -Node: Using Constant Regexps318379 -Node: Variables321451 -Node: Using Variables322106 -Node: Assignment Options323830 -Node: Conversion325705 -Node: Strings And Numbers326229 -Ref: Strings And Numbers-Footnote-1329291 -Node: Locale influences conversions329400 -Ref: table-locale-affects332117 -Node: All Operators332705 -Node: Arithmetic Ops333335 -Node: Concatenation335840 -Ref: Concatenation-Footnote-1338659 -Node: Assignment Ops338765 -Ref: table-assign-ops343748 -Node: Increment Ops345051 -Node: Truth Values and Conditions348489 -Node: Truth Values349572 -Node: Typing and Comparison350621 -Node: Variable Typing351414 -Node: Comparison Operators355066 -Ref: table-relational-ops355476 -Node: POSIX String Comparison359026 -Ref: POSIX String Comparison-Footnote-1360110 -Node: Boolean Ops360248 -Ref: Boolean Ops-Footnote-1364323 -Node: Conditional Exp364414 -Node: Function Calls366141 -Node: Precedence370021 -Node: Locales373690 -Node: Expressions Summary375321 -Node: Patterns and Actions377862 -Node: Pattern Overview378978 -Node: Regexp Patterns380655 -Node: Expression Patterns381198 -Node: Ranges384978 -Node: BEGIN/END388084 -Node: Using BEGIN/END388846 -Ref: Using BEGIN/END-Footnote-1391582 -Node: I/O And BEGIN/END391688 -Node: BEGINFILE/ENDFILE393959 -Node: Empty396890 -Node: Using Shell Variables397207 -Node: Action Overview399490 -Node: Statements401817 -Node: If Statement403665 -Node: While Statement405163 -Node: Do Statement407207 -Node: For Statement408363 -Node: Switch Statement411515 -Node: Break Statement413903 -Node: Continue Statement415944 -Node: Next Statement417769 -Node: Nextfile Statement420159 -Node: Exit Statement422795 -Node: Built-in Variables425199 -Node: User-modified426326 -Ref: User-modified-Footnote-1434015 -Node: Auto-set434077 -Ref: Auto-set-Footnote-1446659 -Ref: Auto-set-Footnote-2446864 -Node: ARGC and ARGV446920 -Node: Pattern Action Summary450824 -Node: Arrays453047 -Node: Array Basics454596 -Node: Array Intro455422 -Ref: figure-array-elements457395 -Ref: Array Intro-Footnote-1459919 -Node: Reference to Elements460047 -Node: Assigning Elements462497 -Node: Array Example462988 -Node: Scanning an Array464720 -Node: Controlling Scanning467721 -Ref: Controlling Scanning-Footnote-1472894 -Node: Delete473210 -Ref: Delete-Footnote-1475961 -Node: Numeric Array Subscripts476018 -Node: Uninitialized Subscripts478201 -Node: Multidimensional479826 -Node: Multiscanning482939 -Node: Arrays of Arrays484528 -Node: Arrays Summary489191 -Node: Functions491296 -Node: Built-in492169 -Node: Calling Built-in493247 -Node: Numeric Functions495235 -Ref: Numeric Functions-Footnote-1499269 -Ref: Numeric Functions-Footnote-2499626 -Ref: Numeric Functions-Footnote-3499674 -Node: String Functions499943 -Ref: String Functions-Footnote-1522940 -Ref: String Functions-Footnote-2523069 -Ref: String Functions-Footnote-3523317 -Node: Gory Details523404 -Ref: table-sub-escapes525177 -Ref: table-sub-proposed526697 -Ref: table-posix-sub528061 -Ref: table-gensub-escapes529601 -Ref: Gory Details-Footnote-1530777 -Node: I/O Functions530928 -Ref: I/O Functions-Footnote-1538038 -Node: Time Functions538185 -Ref: Time Functions-Footnote-1548649 -Ref: Time Functions-Footnote-2548717 -Ref: Time Functions-Footnote-3548875 -Ref: Time Functions-Footnote-4548986 -Ref: Time Functions-Footnote-5549098 -Ref: Time Functions-Footnote-6549325 -Node: Bitwise Functions549591 -Ref: table-bitwise-ops550153 -Ref: Bitwise Functions-Footnote-1554398 -Node: Type Functions554582 -Node: I18N Functions555724 -Node: User-defined557369 -Node: Definition Syntax558173 -Ref: Definition Syntax-Footnote-1563486 -Node: Function Example563555 -Ref: Function Example-Footnote-1566195 -Node: Function Caveats566217 -Node: Calling A Function566735 -Node: Variable Scope567690 -Node: Pass By Value/Reference570678 -Node: Return Statement574188 -Node: Dynamic Typing577172 -Node: Indirect Calls578101 -Node: Functions Summary587814 -Node: Library Functions590353 -Ref: Library Functions-Footnote-1593971 -Ref: Library Functions-Footnote-2594114 -Node: Library Names594285 -Ref: Library Names-Footnote-1597758 -Ref: Library Names-Footnote-2597978 -Node: General Functions598064 -Node: Strtonum Function599092 -Node: Assert Function601872 -Node: Round Function605198 -Node: Cliff Random Function606739 -Node: Ordinal Functions607755 -Ref: Ordinal Functions-Footnote-1610820 -Ref: Ordinal Functions-Footnote-2611072 -Node: Join Function611283 -Ref: Join Function-Footnote-1613054 -Node: Getlocaltime Function613254 -Node: Readfile Function616990 -Node: Data File Management618829 -Node: Filetrans Function619461 -Node: Rewind Function623530 -Node: File Checking625088 -Ref: File Checking-Footnote-1626220 -Node: Empty Files626421 -Node: Ignoring Assigns628400 -Node: Getopt Function629954 -Ref: Getopt Function-Footnote-1641257 -Node: Passwd Functions641460 -Ref: Passwd Functions-Footnote-1650439 -Node: Group Functions650527 -Ref: Group Functions-Footnote-1658458 -Node: Walking Arrays658671 -Node: Library Functions Summary660274 -Node: Library Exercises661662 -Node: Sample Programs662942 -Node: Running Examples663712 -Node: Clones664440 -Node: Cut Program665664 -Node: Egrep Program675522 -Ref: Egrep Program-Footnote-1683109 -Node: Id Program683219 -Node: Split Program686873 -Ref: Split Program-Footnote-1690411 -Node: Tee Program690539 -Node: Uniq Program693326 -Node: Wc Program700747 -Ref: Wc Program-Footnote-1705012 -Node: Miscellaneous Programs705104 -Node: Dupword Program706317 -Node: Alarm Program708348 -Node: Translate Program713152 -Ref: Translate Program-Footnote-1717543 -Ref: Translate Program-Footnote-2717813 -Node: Labels Program717947 -Ref: Labels Program-Footnote-1721308 -Node: Word Sorting721392 -Node: History Sorting725435 -Node: Extract Program727271 -Node: Simple Sed734807 -Node: Igawk Program737869 -Ref: Igawk Program-Footnote-1752173 -Ref: Igawk Program-Footnote-2752374 -Node: Anagram Program752512 -Node: Signature Program755580 -Node: Programs Summary756827 -Node: Programs Exercises758042 -Node: Advanced Features761693 -Node: Nondecimal Data763641 -Node: Array Sorting765218 -Node: Controlling Array Traversal765915 -Node: Array Sorting Functions774195 -Ref: Array Sorting Functions-Footnote-1778102 -Node: Two-way I/O778296 -Ref: Two-way I/O-Footnote-1783240 -Ref: Two-way I/O-Footnote-2783419 -Node: TCP/IP Networking783501 -Node: Profiling786346 -Node: Advanced Features Summary793888 -Node: Internationalization795752 -Node: I18N and L10N797232 -Node: Explaining gettext797918 -Ref: Explaining gettext-Footnote-1802944 -Ref: Explaining gettext-Footnote-2803128 -Node: Programmer i18n803293 -Ref: Programmer i18n-Footnote-1808087 -Node: Translator i18n808136 -Node: String Extraction808930 -Ref: String Extraction-Footnote-1810063 -Node: Printf Ordering810149 -Ref: Printf Ordering-Footnote-1812931 -Node: I18N Portability812995 -Ref: I18N Portability-Footnote-1815444 -Node: I18N Example815507 -Ref: I18N Example-Footnote-1818213 -Node: Gawk I18N818285 -Node: I18N Summary818923 -Node: Debugger820262 -Node: Debugging821284 -Node: Debugging Concepts821725 -Node: Debugging Terms823581 -Node: Awk Debugging826178 -Node: Sample Debugging Session827070 -Node: Debugger Invocation827590 -Node: Finding The Bug828923 -Node: List of Debugger Commands835405 -Node: Breakpoint Control836737 -Node: Debugger Execution Control840401 -Node: Viewing And Changing Data843761 -Node: Execution Stack847119 -Node: Debugger Info848632 -Node: Miscellaneous Debugger Commands852626 -Node: Readline Support857810 -Node: Limitations858702 -Node: Debugging Summary860976 -Node: Arbitrary Precision Arithmetic862144 -Node: Computer Arithmetic863631 -Ref: Computer Arithmetic-Footnote-1868018 -Node: Math Definitions868075 -Ref: table-ieee-formats871364 -Ref: Math Definitions-Footnote-1871904 -Node: MPFR features872007 -Node: FP Math Caution873624 -Ref: FP Math Caution-Footnote-1874674 -Node: Inexactness of computations875043 -Node: Inexact representation875991 -Node: Comparing FP Values877346 -Node: Errors accumulate878310 -Node: Getting Accuracy879743 -Node: Try To Round882402 -Node: Setting precision883301 -Ref: table-predefined-precision-strings883983 -Node: Setting the rounding mode885776 -Ref: table-gawk-rounding-modes886140 -Ref: Setting the rounding mode-Footnote-1889594 -Node: Arbitrary Precision Integers889773 -Ref: Arbitrary Precision Integers-Footnote-1892754 -Node: POSIX Floating Point Problems892903 -Ref: POSIX Floating Point Problems-Footnote-1896779 -Node: Floating point summary896817 -Node: Dynamic Extensions899021 -Node: Extension Intro900573 -Node: Plugin License901838 -Node: Extension Mechanism Outline902523 -Ref: figure-load-extension902947 -Ref: figure-load-new-function904432 -Ref: figure-call-new-function905434 -Node: Extension API Description907418 -Node: Extension API Functions Introduction908868 -Node: General Data Types913735 -Ref: General Data Types-Footnote-1919428 -Node: Requesting Values919727 -Ref: table-value-types-returned920464 -Node: Memory Allocation Functions921422 -Ref: Memory Allocation Functions-Footnote-1924169 -Node: Constructor Functions924265 -Node: Registration Functions926023 -Node: Extension Functions926708 -Node: Exit Callback Functions929010 -Node: Extension Version String930258 -Node: Input Parsers930908 -Node: Output Wrappers940722 -Node: Two-way processors945238 -Node: Printing Messages947442 -Ref: Printing Messages-Footnote-1948519 -Node: Updating `ERRNO'948671 -Node: Accessing Parameters949410 -Node: Symbol Table Access950640 -Node: Symbol table by name951154 -Node: Symbol table by cookie953130 -Ref: Symbol table by cookie-Footnote-1957263 -Node: Cached values957326 -Ref: Cached values-Footnote-1960830 -Node: Array Manipulation960921 -Ref: Array Manipulation-Footnote-1962019 -Node: Array Data Types962058 -Ref: Array Data Types-Footnote-1964761 -Node: Array Functions964853 -Node: Flattening Arrays968727 -Node: Creating Arrays975579 -Node: Extension API Variables980310 -Node: Extension Versioning980946 -Node: Extension API Informational Variables982847 -Node: Extension API Boilerplate983933 -Node: Finding Extensions987737 -Node: Extension Example988297 -Node: Internal File Description989027 -Node: Internal File Ops993118 -Ref: Internal File Ops-Footnote-11004550 -Node: Using Internal File Ops1004690 -Ref: Using Internal File Ops-Footnote-11007037 -Node: Extension Samples1007305 -Node: Extension Sample File Functions1008829 -Node: Extension Sample Fnmatch1016397 -Node: Extension Sample Fork1017879 -Node: Extension Sample Inplace1019092 -Node: Extension Sample Ord1020767 -Node: Extension Sample Readdir1021603 -Ref: table-readdir-file-types1022459 -Node: Extension Sample Revout1023258 -Node: Extension Sample Rev2way1023849 -Node: Extension Sample Read write array1024590 -Node: Extension Sample Readfile1026469 -Node: Extension Sample API Tests1027569 -Node: Extension Sample Time1028094 -Node: gawkextlib1029409 -Node: Extension summary1032222 -Node: Extension Exercises1035915 -Node: Language History1036637 -Node: V7/SVR3.11038280 -Node: SVR41040600 -Node: POSIX1042042 -Node: BTL1043428 -Node: POSIX/GNU1044162 -Node: Feature History1049878 -Node: Common Extensions1062969 -Node: Ranges and Locales1064281 -Ref: Ranges and Locales-Footnote-11068898 -Ref: Ranges and Locales-Footnote-21068925 -Ref: Ranges and Locales-Footnote-31069159 -Node: Contributors1069380 -Node: History summary1074805 -Node: Installation1076174 -Node: Gawk Distribution1077125 -Node: Getting1077609 -Node: Extracting1078433 -Node: Distribution contents1080075 -Node: Unix Installation1085792 -Node: Quick Installation1086409 -Node: Additional Configuration Options1088851 -Node: Configuration Philosophy1090589 -Node: Non-Unix Installation1092940 -Node: PC Installation1093398 -Node: PC Binary Installation1094709 -Node: PC Compiling1096557 -Ref: PC Compiling-Footnote-11099556 -Node: PC Testing1099661 -Node: PC Using1100837 -Node: Cygwin1104989 -Node: MSYS1105798 -Node: VMS Installation1106312 -Node: VMS Compilation1107108 -Ref: VMS Compilation-Footnote-11108330 -Node: VMS Dynamic Extensions1108388 -Node: VMS Installation Details1109761 -Node: VMS Running1112013 -Node: VMS GNV1114847 -Node: VMS Old Gawk1115570 -Node: Bugs1116040 -Node: Other Versions1120044 -Node: Installation summary1126271 -Node: Notes1127327 -Node: Compatibility Mode1128192 -Node: Additions1128974 -Node: Accessing The Source1129899 -Node: Adding Code1131335 -Node: New Ports1137513 -Node: Derived Files1141994 -Ref: Derived Files-Footnote-11147075 -Ref: Derived Files-Footnote-21147109 -Ref: Derived Files-Footnote-31147705 -Node: Future Extensions1147819 -Node: Implementation Limitations1148425 -Node: Extension Design1149673 -Node: Old Extension Problems1150827 -Ref: Old Extension Problems-Footnote-11152344 -Node: Extension New Mechanism Goals1152401 -Ref: Extension New Mechanism Goals-Footnote-11155761 -Node: Extension Other Design Decisions1155950 -Node: Extension Future Growth1158056 -Node: Old Extension Mechanism1158892 -Node: Notes summary1160654 -Node: Basic Concepts1161840 -Node: Basic High Level1162521 -Ref: figure-general-flow1162793 -Ref: figure-process-flow1163392 -Ref: Basic High Level-Footnote-11166621 -Node: Basic Data Typing1166806 -Node: Glossary1170134 -Node: Copying1195286 -Node: GNU Free Documentation License1232842 -Node: Index1257978 +Node: Long77455 +Node: Executable Scripts78849 +Ref: Executable Scripts-Footnote-181650 +Node: Comments81752 +Node: Quoting84225 +Node: DOS Quoting89538 +Node: Sample Data Files90213 +Node: Very Simple92820 +Node: Two Rules97705 +Node: More Complex99599 +Ref: More Complex-Footnote-1102513 +Node: Statements/Lines102598 +Ref: Statements/Lines-Footnote-1107054 +Node: Other Features107319 +Node: When108247 +Ref: When-Footnote-1110003 +Node: Intro Summary110068 +Node: Invoking Gawk110951 +Node: Command Line112466 +Node: Options113257 +Ref: Options-Footnote-1129033 +Node: Other Arguments129058 +Node: Naming Standard Input131886 +Node: Environment Variables132979 +Node: AWKPATH Variable133537 +Ref: AWKPATH Variable-Footnote-1136403 +Ref: AWKPATH Variable-Footnote-2136448 +Node: AWKLIBPATH Variable136708 +Node: Other Environment Variables137467 +Node: Exit Status141124 +Node: Include Files141799 +Node: Loading Shared Libraries145377 +Node: Obsolete146761 +Node: Undocumented147458 +Node: Invoking Summary147725 +Node: Regexp149325 +Node: Regexp Usage150784 +Node: Escape Sequences152817 +Node: Regexp Operators158805 +Ref: Regexp Operators-Footnote-1166236 +Ref: Regexp Operators-Footnote-2166383 +Node: Bracket Expressions166481 +Ref: table-char-classes168499 +Node: Leftmost Longest171439 +Node: Computed Regexps172643 +Node: GNU Regexp Operators176021 +Node: Case-sensitivity179727 +Ref: Case-sensitivity-Footnote-1182617 +Ref: Case-sensitivity-Footnote-2182852 +Node: Regexp Summary182960 +Node: Reading Files184429 +Node: Records186521 +Node: awk split records187243 +Node: gawk split records192101 +Ref: gawk split records-Footnote-1196622 +Node: Fields196659 +Ref: Fields-Footnote-1199623 +Node: Nonconstant Fields199709 +Ref: Nonconstant Fields-Footnote-1201939 +Node: Changing Fields202141 +Node: Field Separators208095 +Node: Default Field Splitting210797 +Node: Regexp Field Splitting211914 +Node: Single Character Fields215241 +Node: Command Line Field Separator216300 +Node: Full Line Fields219726 +Ref: Full Line Fields-Footnote-1220234 +Node: Field Splitting Summary220280 +Ref: Field Splitting Summary-Footnote-1223412 +Node: Constant Size223513 +Node: Splitting By Content228119 +Ref: Splitting By Content-Footnote-1232192 +Node: Multiple Line232232 +Ref: Multiple Line-Footnote-1238088 +Node: Getline238267 +Node: Plain Getline240478 +Node: Getline/Variable243184 +Node: Getline/File244331 +Node: Getline/Variable/File245715 +Ref: Getline/Variable/File-Footnote-1247314 +Node: Getline/Pipe247401 +Node: Getline/Variable/Pipe250087 +Node: Getline/Coprocess251194 +Node: Getline/Variable/Coprocess252446 +Node: Getline Notes253183 +Node: Getline Summary255987 +Ref: table-getline-variants256395 +Node: Read Timeout257307 +Ref: Read Timeout-Footnote-1261134 +Node: Command-line directories261192 +Node: Input Summary262096 +Node: Input Exercises265233 +Node: Printing265961 +Node: Print267683 +Node: Print Examples269176 +Node: Output Separators271955 +Node: OFMT273971 +Node: Printf275329 +Node: Basic Printf276235 +Node: Control Letters277774 +Node: Format Modifiers281765 +Node: Printf Examples287792 +Node: Redirection290256 +Node: Special Files297228 +Node: Special FD297761 +Ref: Special FD-Footnote-1301358 +Node: Special Network301432 +Node: Special Caveats302282 +Node: Close Files And Pipes303078 +Ref: Close Files And Pipes-Footnote-1310239 +Ref: Close Files And Pipes-Footnote-2310387 +Node: Output Summary310537 +Node: Output Exercises311534 +Node: Expressions312214 +Node: Values313399 +Node: Constants314075 +Node: Scalar Constants314755 +Ref: Scalar Constants-Footnote-1315614 +Node: Nondecimal-numbers315864 +Node: Regexp Constants318864 +Node: Using Constant Regexps319389 +Node: Variables322461 +Node: Using Variables323116 +Node: Assignment Options324840 +Node: Conversion326715 +Node: Strings And Numbers327239 +Ref: Strings And Numbers-Footnote-1330301 +Node: Locale influences conversions330410 +Ref: table-locale-affects333127 +Node: All Operators333715 +Node: Arithmetic Ops334345 +Node: Concatenation336850 +Ref: Concatenation-Footnote-1339669 +Node: Assignment Ops339775 +Ref: table-assign-ops344758 +Node: Increment Ops346061 +Node: Truth Values and Conditions349499 +Node: Truth Values350582 +Node: Typing and Comparison351631 +Node: Variable Typing352424 +Node: Comparison Operators356076 +Ref: table-relational-ops356486 +Node: POSIX String Comparison360036 +Ref: POSIX String Comparison-Footnote-1361120 +Node: Boolean Ops361258 +Ref: Boolean Ops-Footnote-1365597 +Node: Conditional Exp365688 +Node: Function Calls367415 +Node: Precedence371295 +Node: Locales374964 +Node: Expressions Summary376595 +Node: Patterns and Actions379136 +Node: Pattern Overview380252 +Node: Regexp Patterns381929 +Node: Expression Patterns382472 +Node: Ranges386252 +Node: BEGIN/END389358 +Node: Using BEGIN/END390120 +Ref: Using BEGIN/END-Footnote-1392856 +Node: I/O And BEGIN/END392962 +Node: BEGINFILE/ENDFILE395233 +Node: Empty398164 +Node: Using Shell Variables398481 +Node: Action Overview400764 +Node: Statements403091 +Node: If Statement404939 +Node: While Statement406437 +Node: Do Statement408481 +Node: For Statement409637 +Node: Switch Statement412789 +Node: Break Statement415177 +Node: Continue Statement417218 +Node: Next Statement419043 +Node: Nextfile Statement421433 +Node: Exit Statement424090 +Node: Built-in Variables426494 +Node: User-modified427621 +Ref: User-modified-Footnote-1435310 +Node: Auto-set435372 +Ref: Auto-set-Footnote-1447954 +Ref: Auto-set-Footnote-2448159 +Node: ARGC and ARGV448215 +Node: Pattern Action Summary452119 +Node: Arrays454342 +Node: Array Basics455891 +Node: Array Intro456717 +Ref: figure-array-elements458690 +Ref: Array Intro-Footnote-1461214 +Node: Reference to Elements461342 +Node: Assigning Elements463792 +Node: Array Example464283 +Node: Scanning an Array466015 +Node: Controlling Scanning469016 +Ref: Controlling Scanning-Footnote-1474189 +Node: Delete474505 +Ref: Delete-Footnote-1477256 +Node: Numeric Array Subscripts477313 +Node: Uninitialized Subscripts479496 +Node: Multidimensional481123 +Node: Multiscanning484236 +Node: Arrays of Arrays485825 +Node: Arrays Summary490488 +Node: Functions492593 +Node: Built-in493466 +Node: Calling Built-in494544 +Node: Numeric Functions496532 +Ref: Numeric Functions-Footnote-1500566 +Ref: Numeric Functions-Footnote-2500923 +Ref: Numeric Functions-Footnote-3500971 +Node: String Functions501240 +Ref: String Functions-Footnote-1524237 +Ref: String Functions-Footnote-2524366 +Ref: String Functions-Footnote-3524614 +Node: Gory Details524701 +Ref: table-sub-escapes526474 +Ref: table-sub-proposed527994 +Ref: table-posix-sub529358 +Ref: table-gensub-escapes530898 +Ref: Gory Details-Footnote-1532074 +Node: I/O Functions532225 +Ref: I/O Functions-Footnote-1539335 +Node: Time Functions539482 +Ref: Time Functions-Footnote-1549946 +Ref: Time Functions-Footnote-2550014 +Ref: Time Functions-Footnote-3550172 +Ref: Time Functions-Footnote-4550283 +Ref: Time Functions-Footnote-5550395 +Ref: Time Functions-Footnote-6550622 +Node: Bitwise Functions550888 +Ref: table-bitwise-ops551450 +Ref: Bitwise Functions-Footnote-1555695 +Node: Type Functions555879 +Node: I18N Functions557021 +Node: User-defined558666 +Node: Definition Syntax559470 +Ref: Definition Syntax-Footnote-1564783 +Node: Function Example564852 +Ref: Function Example-Footnote-1567492 +Node: Function Caveats567514 +Node: Calling A Function568032 +Node: Variable Scope568987 +Node: Pass By Value/Reference571975 +Node: Return Statement575485 +Node: Dynamic Typing578469 +Node: Indirect Calls579398 +Node: Functions Summary589111 +Node: Library Functions591650 +Ref: Library Functions-Footnote-1595268 +Ref: Library Functions-Footnote-2595411 +Node: Library Names595582 +Ref: Library Names-Footnote-1599055 +Ref: Library Names-Footnote-2599275 +Node: General Functions599361 +Node: Strtonum Function600389 +Node: Assert Function603263 +Node: Round Function606589 +Node: Cliff Random Function608130 +Node: Ordinal Functions609146 +Ref: Ordinal Functions-Footnote-1612211 +Ref: Ordinal Functions-Footnote-2612463 +Node: Join Function612674 +Ref: Join Function-Footnote-1614445 +Node: Getlocaltime Function614645 +Node: Readfile Function618381 +Ref: Readfile Function-Footnote-1620259 +Node: Data File Management620487 +Node: Filetrans Function621119 +Node: Rewind Function625188 +Node: File Checking626746 +Ref: File Checking-Footnote-1627878 +Node: Empty Files628079 +Node: Ignoring Assigns630058 +Node: Getopt Function631612 +Ref: Getopt Function-Footnote-1642876 +Node: Passwd Functions643079 +Ref: Passwd Functions-Footnote-1652058 +Node: Group Functions652146 +Ref: Group Functions-Footnote-1660077 +Node: Walking Arrays660290 +Node: Library Functions Summary661893 +Node: Library Exercises663281 +Node: Sample Programs664561 +Node: Running Examples665331 +Node: Clones666059 +Node: Cut Program667283 +Node: Egrep Program677141 +Ref: Egrep Program-Footnote-1684728 +Node: Id Program684838 +Node: Split Program688492 +Ref: Split Program-Footnote-1692030 +Node: Tee Program692158 +Node: Uniq Program694945 +Node: Wc Program702366 +Ref: Wc Program-Footnote-1706631 +Node: Miscellaneous Programs706723 +Node: Dupword Program707936 +Node: Alarm Program709967 +Node: Translate Program714771 +Ref: Translate Program-Footnote-1719162 +Ref: Translate Program-Footnote-2719432 +Node: Labels Program719566 +Ref: Labels Program-Footnote-1722927 +Node: Word Sorting723011 +Node: History Sorting727054 +Node: Extract Program728890 +Node: Simple Sed736426 +Node: Igawk Program739488 +Ref: Igawk Program-Footnote-1753792 +Ref: Igawk Program-Footnote-2753993 +Node: Anagram Program754131 +Node: Signature Program757199 +Node: Programs Summary758446 +Node: Programs Exercises759661 +Node: Advanced Features763312 +Node: Nondecimal Data765260 +Node: Array Sorting766837 +Node: Controlling Array Traversal767534 +Node: Array Sorting Functions775814 +Ref: Array Sorting Functions-Footnote-1779721 +Node: Two-way I/O779915 +Ref: Two-way I/O-Footnote-1784859 +Ref: Two-way I/O-Footnote-2785038 +Node: TCP/IP Networking785120 +Node: Profiling787965 +Node: Advanced Features Summary795507 +Node: Internationalization797371 +Node: I18N and L10N798851 +Node: Explaining gettext799537 +Ref: Explaining gettext-Footnote-1804563 +Ref: Explaining gettext-Footnote-2804747 +Node: Programmer i18n804912 +Ref: Programmer i18n-Footnote-1809706 +Node: Translator i18n809755 +Node: String Extraction810549 +Ref: String Extraction-Footnote-1811682 +Node: Printf Ordering811768 +Ref: Printf Ordering-Footnote-1814550 +Node: I18N Portability814614 +Ref: I18N Portability-Footnote-1817063 +Node: I18N Example817126 +Ref: I18N Example-Footnote-1819832 +Node: Gawk I18N819904 +Node: I18N Summary820542 +Node: Debugger821881 +Node: Debugging822903 +Node: Debugging Concepts823344 +Node: Debugging Terms825200 +Node: Awk Debugging827797 +Node: Sample Debugging Session828689 +Node: Debugger Invocation829209 +Node: Finding The Bug830542 +Node: List of Debugger Commands837024 +Node: Breakpoint Control838356 +Node: Debugger Execution Control842020 +Node: Viewing And Changing Data845380 +Node: Execution Stack848738 +Node: Debugger Info850251 +Node: Miscellaneous Debugger Commands854245 +Node: Readline Support859429 +Node: Limitations860321 +Node: Debugging Summary862595 +Node: Arbitrary Precision Arithmetic863763 +Node: Computer Arithmetic865250 +Ref: Computer Arithmetic-Footnote-1869637 +Node: Math Definitions869694 +Ref: table-ieee-formats872983 +Ref: Math Definitions-Footnote-1873523 +Node: MPFR features873626 +Node: FP Math Caution875243 +Ref: FP Math Caution-Footnote-1876293 +Node: Inexactness of computations876662 +Node: Inexact representation877610 +Node: Comparing FP Values878965 +Node: Errors accumulate879929 +Node: Getting Accuracy881362 +Node: Try To Round884021 +Node: Setting precision884920 +Ref: table-predefined-precision-strings885602 +Node: Setting the rounding mode887395 +Ref: table-gawk-rounding-modes887759 +Ref: Setting the rounding mode-Footnote-1891213 +Node: Arbitrary Precision Integers891392 +Ref: Arbitrary Precision Integers-Footnote-1894373 +Node: POSIX Floating Point Problems894522 +Ref: POSIX Floating Point Problems-Footnote-1898398 +Node: Floating point summary898436 +Node: Dynamic Extensions900640 +Node: Extension Intro902192 +Node: Plugin License903457 +Node: Extension Mechanism Outline904142 +Ref: figure-load-extension904566 +Ref: figure-load-new-function906051 +Ref: figure-call-new-function907053 +Node: Extension API Description909037 +Node: Extension API Functions Introduction910487 +Node: General Data Types915354 +Ref: General Data Types-Footnote-1921047 +Node: Requesting Values921346 +Ref: table-value-types-returned922083 +Node: Memory Allocation Functions923041 +Ref: Memory Allocation Functions-Footnote-1925788 +Node: Constructor Functions925884 +Node: Registration Functions927642 +Node: Extension Functions928327 +Node: Exit Callback Functions930629 +Node: Extension Version String931877 +Node: Input Parsers932527 +Node: Output Wrappers942341 +Node: Two-way processors946857 +Node: Printing Messages949061 +Ref: Printing Messages-Footnote-1950138 +Node: Updating `ERRNO'950290 +Node: Accessing Parameters951029 +Node: Symbol Table Access952259 +Node: Symbol table by name952773 +Node: Symbol table by cookie954749 +Ref: Symbol table by cookie-Footnote-1958882 +Node: Cached values958945 +Ref: Cached values-Footnote-1962449 +Node: Array Manipulation962540 +Ref: Array Manipulation-Footnote-1963638 +Node: Array Data Types963677 +Ref: Array Data Types-Footnote-1966380 +Node: Array Functions966472 +Node: Flattening Arrays970346 +Node: Creating Arrays977198 +Node: Extension API Variables981929 +Node: Extension Versioning982565 +Node: Extension API Informational Variables984466 +Node: Extension API Boilerplate985552 +Node: Finding Extensions989356 +Node: Extension Example989916 +Node: Internal File Description990646 +Node: Internal File Ops994737 +Ref: Internal File Ops-Footnote-11006169 +Node: Using Internal File Ops1006309 +Ref: Using Internal File Ops-Footnote-11008656 +Node: Extension Samples1008924 +Node: Extension Sample File Functions1010448 +Node: Extension Sample Fnmatch1018016 +Node: Extension Sample Fork1019498 +Node: Extension Sample Inplace1020711 +Node: Extension Sample Ord1022386 +Node: Extension Sample Readdir1023222 +Ref: table-readdir-file-types1024078 +Node: Extension Sample Revout1024877 +Node: Extension Sample Rev2way1025468 +Node: Extension Sample Read write array1026209 +Node: Extension Sample Readfile1028088 +Node: Extension Sample API Tests1029188 +Node: Extension Sample Time1029713 +Node: gawkextlib1031028 +Node: Extension summary1033841 +Node: Extension Exercises1037534 +Node: Language History1038256 +Node: V7/SVR3.11039899 +Node: SVR41042219 +Node: POSIX1043661 +Node: BTL1045047 +Node: POSIX/GNU1045781 +Node: Feature History1051497 +Node: Common Extensions1064588 +Node: Ranges and Locales1065900 +Ref: Ranges and Locales-Footnote-11070517 +Ref: Ranges and Locales-Footnote-21070544 +Ref: Ranges and Locales-Footnote-31070778 +Node: Contributors1070999 +Node: History summary1076424 +Node: Installation1077793 +Node: Gawk Distribution1078744 +Node: Getting1079228 +Node: Extracting1080052 +Node: Distribution contents1081694 +Node: Unix Installation1087411 +Node: Quick Installation1088028 +Node: Additional Configuration Options1090470 +Node: Configuration Philosophy1092208 +Node: Non-Unix Installation1094559 +Node: PC Installation1095017 +Node: PC Binary Installation1096328 +Node: PC Compiling1098176 +Ref: PC Compiling-Footnote-11101175 +Node: PC Testing1101280 +Node: PC Using1102456 +Node: Cygwin1106608 +Node: MSYS1107417 +Node: VMS Installation1107931 +Node: VMS Compilation1108727 +Ref: VMS Compilation-Footnote-11109949 +Node: VMS Dynamic Extensions1110007 +Node: VMS Installation Details1111380 +Node: VMS Running1113632 +Node: VMS GNV1116466 +Node: VMS Old Gawk1117189 +Node: Bugs1117659 +Node: Other Versions1121663 +Node: Installation summary1127890 +Node: Notes1128946 +Node: Compatibility Mode1129811 +Node: Additions1130593 +Node: Accessing The Source1131518 +Node: Adding Code1132954 +Node: New Ports1139132 +Node: Derived Files1143613 +Ref: Derived Files-Footnote-11148694 +Ref: Derived Files-Footnote-21148728 +Ref: Derived Files-Footnote-31149324 +Node: Future Extensions1149438 +Node: Implementation Limitations1150044 +Node: Extension Design1151292 +Node: Old Extension Problems1152446 +Ref: Old Extension Problems-Footnote-11153963 +Node: Extension New Mechanism Goals1154020 +Ref: Extension New Mechanism Goals-Footnote-11157380 +Node: Extension Other Design Decisions1157569 +Node: Extension Future Growth1159675 +Node: Old Extension Mechanism1160511 +Node: Notes summary1162273 +Node: Basic Concepts1163459 +Node: Basic High Level1164140 +Ref: figure-general-flow1164412 +Ref: figure-process-flow1165011 +Ref: Basic High Level-Footnote-11168240 +Node: Basic Data Typing1168425 +Node: Glossary1171753 +Node: Copying1196905 +Node: GNU Free Documentation License1234461 +Node: Index1259597 End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index 2e5dc9bd..53b159f1 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -526,10 +526,10 @@ particular records in a file and perform operations upon them. * Escape Sequences:: How to write nonprinting characters. * Regexp Operators:: Regular Expression Operators. * Bracket Expressions:: What can go between @samp{[...]}. -* GNU Regexp Operators:: Operators specific to GNU software. -* Case-sensitivity:: How to do case-insensitive matching. * Leftmost Longest:: How much text matches. * Computed Regexps:: Using Dynamic Regexps. +* GNU Regexp Operators:: Operators specific to GNU software. +* Case-sensitivity:: How to do case-insensitive matching. * Regexp Summary:: Regular expressions summary. * Records:: Controlling how data is split into records. @@ -1774,6 +1774,7 @@ They also appear in the index under the heading ``dark corner.'' As noted by the opening quote, though, any coverage of dark corners is, by definition, incomplete. +@cindex c.e., See common extensions Extensions to the standard @command{awk} language that are supported by more than one @command{awk} implementation are marked @ifclear FOR_PRINT @@ -2341,24 +2342,19 @@ For example, on OS/2, it is @kbd{Ctrl-z}.) As an example, the following program prints a friendly piece of advice (from Douglas Adams's @cite{The Hitchhiker's Guide to the Galaxy}), to keep you from worrying about the complexities of computer -programming (@code{BEGIN} is a feature we haven't discussed yet): +programming: @example -$ @kbd{awk "BEGIN @{ print \"Don't Panic!\" @}"} +$ @kbd{awk "BEGIN @{ print "Don\47t Panic!" @}"} @print{} Don't Panic! @end example -@cindex shell quoting, double quote -@cindex double quote (@code{"}) in shell commands -@cindex @code{"} (double quote) in shell commands -@cindex @code{\} (backslash) in shell commands -@cindex backslash (@code{\}) in shell commands -This program does not read any input. The @samp{\} before each of the -inner double quotes is necessary because of the shell's quoting -rules---in particular because it mixes both single quotes and -double quotes.@footnote{Although we generally recommend the use of single -quotes around the program text, double quotes are needed here in order to -put the single quote into the message.} +@command{awk} executes statements associated with @code{BEGIN} before +reading any input. If there are no other statements in your program, +as is the case here, @command{awk} just stops, instead of trying to read +input it doesn't know how to process. +The @samp{\47} is a magic way of getting a single quote into +the program, without having to engage in ugly shell quoting tricks. @quotation NOTE As a side note, if you use Bash as your shell, you should execute the @@ -3046,6 +3042,9 @@ awk '@{ if (length($0) > max) max = length($0) @} END @{ print max @}' data @end example +The code associated with @code{END} executes after all +input has been read; it's the other side of the coin to @code{BEGIN}. + @cindex @command{expand} utility @item Print the length of the longest line in @file{data}: @@ -4132,6 +4131,11 @@ included. As each element of @code{ARGV} is processed, @command{gawk} sets the variable @code{ARGIND} to the index in @code{ARGV} of the current element. +@c FIXME: One day, move the ARGC and ARGV node closer to here. +Changing @code{ARGC} and @code{ARGV} in your @command{awk} program lets +you control how @command{awk} processes the input files; this is described +in more detail in @ref{ARGC and ARGV}. + @cindex input files, variable assignments and @cindex variable assignments and input files The distinction between @value{FN} arguments and variable-assignment @@ -4772,10 +4776,10 @@ regular expressions work, we present more complicated instances. * Escape Sequences:: How to write nonprinting characters. * Regexp Operators:: Regular Expression Operators. * Bracket Expressions:: What can go between @samp{[...]}. -* GNU Regexp Operators:: Operators specific to GNU software. -* Case-sensitivity:: How to do case-insensitive matching. * Leftmost Longest:: How much text matches. * Computed Regexps:: Using Dynamic Regexps. +* GNU Regexp Operators:: Operators specific to GNU software. +* Case-sensitivity:: How to do case-insensitive matching. * Regexp Summary:: Regular expressions summary. @end menu @@ -4985,8 +4989,11 @@ that a maximum of two hexadecimal digits following the @item \/ A literal slash (necessary for regexp constants only). This sequence is used when you want to write a regexp -constant that contains a slash. Because the regexp is delimited by -slashes, you need to escape the slash that is part of the pattern, +constant that contains a slash +(such as @code{/.*:\/home\/[[:alnum:]]+:.*/}; the @samp{[[:alnum:]]} +notation is discussed shortly, in @ref{Bracket Expressions}). +Because the regexp is delimited by +slashes, you need to escape any slash that is part of the pattern, in order to tell @command{awk} to keep processing the rest of the regexp. @cindex @code{\} (backslash), @code{\"} escape sequence @@ -4994,8 +5001,10 @@ in order to tell @command{awk} to keep processing the rest of the regexp. @item \" A literal double quote (necessary for string constants only). This sequence is used when you want to write a string -constant that contains a double quote. Because the string is delimited by -double quotes, you need to escape the quote that is part of the string, +constant that contains a double quote +(such as @code{"He said \"hi!\" to her."}). +Because the string is delimited by +double quotes, you need to escape any quote that is part of the string, in order to tell @command{awk} to keep processing the rest of the string. @end table @@ -5556,6 +5565,204 @@ they do not recognize collating symbols or equivalence classes. @c maybe one day ... @c ENDOFRANGE charlist +@node Leftmost Longest +@section How Much Text Matches? + +@cindex regular expressions, leftmost longest match +@c @cindex matching, leftmost longest +Consider the following: + +@example +echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}' +@end example + +This example uses the @code{sub()} function (which we haven't discussed yet; +@pxref{String Functions}) +to make a change to the input record. Here, the regexp @code{/a+/} +indicates ``one or more @samp{a} characters,'' and the replacement +text is @samp{<A>}. + +The input contains four @samp{a} characters. +@command{awk} (and POSIX) regular expressions always match +the leftmost, @emph{longest} sequence of input characters that can +match. Thus, all four @samp{a} characters are +replaced with @samp{<A>} in this example: + +@example +$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'} +@print{} <A>bcd +@end example + +For simple match/no-match tests, this is not so important. But when doing +text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()}, +and @code{gensub()} functions, it is very important. +@ifinfo +@xref{String Functions}, +for more information on these functions. +@end ifinfo +Understanding this principle is also important for regexp-based record +and field splitting (@pxref{Records}, +and also @pxref{Field Separators}). + +@node Computed Regexps +@section Using Dynamic Regexps + +@c STARTOFRANGE dregexp +@cindex regular expressions, computed +@c STARTOFRANGE regexpd +@cindex regular expressions, dynamic +@cindex @code{~} (tilde), @code{~} operator +@cindex tilde (@code{~}), @code{~} operator +@cindex @code{!} (exclamation point), @code{!~} operator +@cindex exclamation point (@code{!}), @code{!~} operator +@c @cindex operators, @code{~} +@c @cindex operators, @code{!~} +The righthand side of a @samp{~} or @samp{!~} operator need not be a +regexp constant (i.e., a string of characters between slashes). It may +be any expression. The expression is evaluated and converted to a string +if necessary; the contents of the string are then used as the +regexp. A regexp computed in this way is called a @dfn{dynamic +regexp} or a @dfn{computed regexp}: + +@example +BEGIN @{ digits_regexp = "[[:digit:]]+" @} +$0 ~ digits_regexp @{ print @} +@end example + +@noindent +This sets @code{digits_regexp} to a regexp that describes one or more digits, +and tests whether the input record matches this regexp. + +@quotation NOTE +When using the @samp{~} and @samp{!~} +operators, there is a difference between a regexp constant +enclosed in slashes and a string constant enclosed in double quotes. +If you are going to use a string constant, you have to understand that +the string is, in essence, scanned @emph{twice}: the first time when +@command{awk} reads your program, and the second time when it goes to +match the string on the lefthand side of the operator with the pattern +on the right. This is true of any string-valued expression (such as +@code{digits_regexp}, shown previously), not just string constants. +@end quotation + +@cindex regexp constants, slashes vs.@: quotes +@cindex @code{\} (backslash), in regexp constants +@cindex backslash (@code{\}), in regexp constants +@cindex @code{"} (double quote), in regexp constants +@cindex double quote (@code{"}), in regexp constants +What difference does it make if the string is +scanned twice? The answer has to do with escape sequences, and particularly +with backslashes. To get a backslash into a regular expression inside a +string, you have to type two backslashes. + +For example, @code{/\*/} is a regexp constant for a literal @samp{*}. +Only one backslash is needed. To do the same thing with a string, +you have to type @code{"\\*"}. The first backslash escapes the +second one so that the string actually contains the +two characters @samp{\} and @samp{*}. + +@cindex troubleshooting, regexp constants vs.@: string constants +@cindex regexp constants, vs.@: string constants +@cindex string constants, vs.@: regexp constants +Given that you can use both regexp and string constants to describe +regular expressions, which should you use? The answer is ``regexp +constants,'' for several reasons: + +@itemize @value{BULLET} +@item +String constants are more complicated to write and +more difficult to read. Using regexp constants makes your programs +less error-prone. Not understanding the difference between the two +kinds of constants is a common source of errors. + +@item +It is more efficient to use regexp constants. @command{awk} can note +that you have supplied a regexp and store it internally in a form that +makes pattern matching more efficient. When using a string constant, +@command{awk} must first convert the string into this internal form and +then perform the pattern matching. + +@item +Using regexp constants is better form; it shows clearly that you +intend a regexp match. +@end itemize + +@cindex sidebar, Using @code{\n} in Bracket Expressions of Dynamic Regexps +@ifdocbook +@docbook +<sidebar><title>Using @code{\n} in Bracket Expressions of Dynamic Regexps</title> +@end docbook + +@cindex regular expressions, dynamic, with embedded newlines +@cindex newlines, in dynamic regexps + +Some versions of @command{awk} do not allow the newline +character to be used inside a bracket expression for a dynamic regexp: + +@example +$ @kbd{awk '$0 ~ "[ \t\n]"'} +@error{} awk: newline in character class [ +@error{} ]... +@error{} source line number 1 +@error{} context is +@error{} >>> <<< +@end example + +@cindex newlines, in regexp constants +But a newline in a regexp constant works with no problem: + +@example +$ @kbd{awk '$0 ~ /[ \t\n]/'} +@kbd{here is a sample line} +@print{} here is a sample line +@kbd{Ctrl-d} +@end example + +@command{gawk} does not have this problem, and it isn't likely to +occur often in practice, but it's worth noting for future reference. + +@docbook +</sidebar> +@end docbook +@end ifdocbook + +@ifnotdocbook +@cartouche +@center @b{Using @code{\n} in Bracket Expressions of Dynamic Regexps} + + +@cindex regular expressions, dynamic, with embedded newlines +@cindex newlines, in dynamic regexps + +Some versions of @command{awk} do not allow the newline +character to be used inside a bracket expression for a dynamic regexp: + +@example +$ @kbd{awk '$0 ~ "[ \t\n]"'} +@error{} awk: newline in character class [ +@error{} ]... +@error{} source line number 1 +@error{} context is +@error{} >>> <<< +@end example + +@cindex newlines, in regexp constants +But a newline in a regexp constant works with no problem: + +@example +$ @kbd{awk '$0 ~ /[ \t\n]/'} +@kbd{here is a sample line} +@print{} here is a sample line +@kbd{Ctrl-d} +@end example + +@command{gawk} does not have this problem, and it isn't likely to +occur often in practice, but it's worth noting for future reference. +@end cartouche +@end ifnotdocbook +@c ENDOFRANGE dregexp +@c ENDOFRANGE regexpd + @node GNU Regexp Operators @section @command{gawk}-Specific Regexp Operators @@ -5831,204 +6038,6 @@ Case is always significant in compatibility mode. @c ENDOFRANGE csregexp @c ENDOFRANGE regexpcs -@node Leftmost Longest -@section How Much Text Matches? - -@cindex regular expressions, leftmost longest match -@c @cindex matching, leftmost longest -Consider the following: - -@example -echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}' -@end example - -This example uses the @code{sub()} function (which we haven't discussed yet; -@pxref{String Functions}) -to make a change to the input record. Here, the regexp @code{/a+/} -indicates ``one or more @samp{a} characters,'' and the replacement -text is @samp{<A>}. - -The input contains four @samp{a} characters. -@command{awk} (and POSIX) regular expressions always match -the leftmost, @emph{longest} sequence of input characters that can -match. Thus, all four @samp{a} characters are -replaced with @samp{<A>} in this example: - -@example -$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'} -@print{} <A>bcd -@end example - -For simple match/no-match tests, this is not so important. But when doing -text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()}, -and @code{gensub()} functions, it is very important. -@ifinfo -@xref{String Functions}, -for more information on these functions. -@end ifinfo -Understanding this principle is also important for regexp-based record -and field splitting (@pxref{Records}, -and also @pxref{Field Separators}). - -@node Computed Regexps -@section Using Dynamic Regexps - -@c STARTOFRANGE dregexp -@cindex regular expressions, computed -@c STARTOFRANGE regexpd -@cindex regular expressions, dynamic -@cindex @code{~} (tilde), @code{~} operator -@cindex tilde (@code{~}), @code{~} operator -@cindex @code{!} (exclamation point), @code{!~} operator -@cindex exclamation point (@code{!}), @code{!~} operator -@c @cindex operators, @code{~} -@c @cindex operators, @code{!~} -The righthand side of a @samp{~} or @samp{!~} operator need not be a -regexp constant (i.e., a string of characters between slashes). It may -be any expression. The expression is evaluated and converted to a string -if necessary; the contents of the string are then used as the -regexp. A regexp computed in this way is called a @dfn{dynamic -regexp} or a @dfn{computed regexp}: - -@example -BEGIN @{ digits_regexp = "[[:digit:]]+" @} -$0 ~ digits_regexp @{ print @} -@end example - -@noindent -This sets @code{digits_regexp} to a regexp that describes one or more digits, -and tests whether the input record matches this regexp. - -@quotation NOTE -When using the @samp{~} and @samp{!~} -operators, there is a difference between a regexp constant -enclosed in slashes and a string constant enclosed in double quotes. -If you are going to use a string constant, you have to understand that -the string is, in essence, scanned @emph{twice}: the first time when -@command{awk} reads your program, and the second time when it goes to -match the string on the lefthand side of the operator with the pattern -on the right. This is true of any string-valued expression (such as -@code{digits_regexp}, shown previously), not just string constants. -@end quotation - -@cindex regexp constants, slashes vs.@: quotes -@cindex @code{\} (backslash), in regexp constants -@cindex backslash (@code{\}), in regexp constants -@cindex @code{"} (double quote), in regexp constants -@cindex double quote (@code{"}), in regexp constants -What difference does it make if the string is -scanned twice? The answer has to do with escape sequences, and particularly -with backslashes. To get a backslash into a regular expression inside a -string, you have to type two backslashes. - -For example, @code{/\*/} is a regexp constant for a literal @samp{*}. -Only one backslash is needed. To do the same thing with a string, -you have to type @code{"\\*"}. The first backslash escapes the -second one so that the string actually contains the -two characters @samp{\} and @samp{*}. - -@cindex troubleshooting, regexp constants vs.@: string constants -@cindex regexp constants, vs.@: string constants -@cindex string constants, vs.@: regexp constants -Given that you can use both regexp and string constants to describe -regular expressions, which should you use? The answer is ``regexp -constants,'' for several reasons: - -@itemize @value{BULLET} -@item -String constants are more complicated to write and -more difficult to read. Using regexp constants makes your programs -less error-prone. Not understanding the difference between the two -kinds of constants is a common source of errors. - -@item -It is more efficient to use regexp constants. @command{awk} can note -that you have supplied a regexp and store it internally in a form that -makes pattern matching more efficient. When using a string constant, -@command{awk} must first convert the string into this internal form and -then perform the pattern matching. - -@item -Using regexp constants is better form; it shows clearly that you -intend a regexp match. -@end itemize - -@cindex sidebar, Using @code{\n} in Bracket Expressions of Dynamic Regexps -@ifdocbook -@docbook -<sidebar><title>Using @code{\n} in Bracket Expressions of Dynamic Regexps</title> -@end docbook - -@cindex regular expressions, dynamic, with embedded newlines -@cindex newlines, in dynamic regexps - -Some versions of @command{awk} do not allow the newline -character to be used inside a bracket expression for a dynamic regexp: - -@example -$ @kbd{awk '$0 ~ "[ \t\n]"'} -@error{} awk: newline in character class [ -@error{} ]... -@error{} source line number 1 -@error{} context is -@error{} >>> <<< -@end example - -@cindex newlines, in regexp constants -But a newline in a regexp constant works with no problem: - -@example -$ @kbd{awk '$0 ~ /[ \t\n]/'} -@kbd{here is a sample line} -@print{} here is a sample line -@kbd{Ctrl-d} -@end example - -@command{gawk} does not have this problem, and it isn't likely to -occur often in practice, but it's worth noting for future reference. - -@docbook -</sidebar> -@end docbook -@end ifdocbook - -@ifnotdocbook -@cartouche -@center @b{Using @code{\n} in Bracket Expressions of Dynamic Regexps} - - -@cindex regular expressions, dynamic, with embedded newlines -@cindex newlines, in dynamic regexps - -Some versions of @command{awk} do not allow the newline -character to be used inside a bracket expression for a dynamic regexp: - -@example -$ @kbd{awk '$0 ~ "[ \t\n]"'} -@error{} awk: newline in character class [ -@error{} ]... -@error{} source line number 1 -@error{} context is -@error{} >>> <<< -@end example - -@cindex newlines, in regexp constants -But a newline in a regexp constant works with no problem: - -@example -$ @kbd{awk '$0 ~ /[ \t\n]/'} -@kbd{here is a sample line} -@print{} here is a sample line -@kbd{Ctrl-d} -@end example - -@command{gawk} does not have this problem, and it isn't likely to -occur often in practice, but it's worth noting for future reference. -@end cartouche -@end ifnotdocbook -@c ENDOFRANGE dregexp -@c ENDOFRANGE regexpd - @node Regexp Summary @section Summary @@ -7971,32 +7980,48 @@ finished processing the current record, but want to do some special processing on the next record @emph{right now}. For example: @example +# Remove text between /* and */, inclusive @{ - if ((t = index($0, "/*")) != 0) @{ - # value of `tmp' will be "" if t is 1 - tmp = substr($0, 1, t - 1) - u = index(substr($0, t + 2), "*/") - offset = t + 2 - while (u == 0) @{ - if (getline <= 0) @{ + if ((i = index($0, "/*")) != 0) @{ + out = substr($0, 1, i - 1) # leading part of the string + rest = substr($0, i + 2) # ... */ ... + j = index(rest, "*/") # is */ in trailing part? + if (j > 0) @{ + rest = substr(rest, j + 2) # remove comment + @} else @{ + while (j == 0) @{ + # get more text + if (getline <= 0) @{ m = "unexpected EOF or error" m = (m ": " ERRNO) print m > "/dev/stderr" exit - @} - u = index($0, "*/") - offset = 0 - @} - # substr() expression will be "" if */ - # occurred at end of line - $0 = tmp substr($0, offset + u + 2) - @} - print $0 + @} + # build up the line using string concatenation + rest = rest $0 + j = index(rest, "*/") # is */ in trailing part? + if (j != 0) @{ + rest = substr(rest, j + 2) + break + @} + @} + @} + # build up the output line using string concatenation + $0 = out rest + @} + print $0 @} @end example This @command{awk} program deletes C-style comments (@samp{/* @dots{} -*/}) from the input. By replacing the @samp{print $0} with other +*/}) from the input. +It uses a number of features we haven't covered yet, including +string concatenation +(@pxref{Concatenation}) +and the @code{index()} and @code{substr()} built-in +functions +(@pxref{String Functions}). +By replacing the @samp{print $0} with other statements, you could perform more complicated processing on the decommented input, such as searching for matches of a regular expression. (This program has a subtle problem---it does not work if one @@ -8687,7 +8712,7 @@ including abstentions, for each item. comments (@samp{/* @dots{} */}) from the input. That program does not work if one comment ends on one line and another one starts later on the same line. -Write a program that does handle multiple comments on the line. +That can be fixed by making one simple change. What is it? @end enumerate @c EXCLUDE END @@ -10517,7 +10542,8 @@ A regexp constant is a regular expression description enclosed in slashes, such as @code{@w{/^beginning and end$/}}. Most regexps used in @command{awk} programs are constant, but the @samp{~} and @samp{!~} matching operators can also match computed or dynamic regexps -(which are just ordinary strings or variables that contain a regexp). +(which are typically just ordinary strings or variables that contain a regexp, +but could be a more complex expression). @c ENDOFRANGE cnst @node Using Constant Regexps @@ -12308,7 +12334,7 @@ program is one way to print lines in between special bracketing lines: @example $1 == "START" @{ interested = ! interested; next @} -interested == 1 @{ print @} +interested @{ print @} $1 == "END" @{ interested = ! interested; next @} @end example @@ -12328,6 +12354,16 @@ bogus input data, but the point is to illustrate the use of `!', so we'll leave well enough alone. @end ignore +Most commonly, the @samp{!} operator is used in the conditions of +@code{if} and @code{while} statements, where it often makes more +sense to phrase the logic in the negative: + +@example +if (! @var{some condition} || @var{some other condition}) @{ + @var{@dots{} do whatever processing @dots{}} +@} +@end example + @cindex @code{next} statement @quotation NOTE The @code{next} statement is discussed in @@ -14120,7 +14156,8 @@ starts over with the first rule in the program. If the @code{nextfile} statement causes the end of the input to be reached, then the code in any @code{END} rules is executed. An exception to this is when @code{nextfile} is invoked during execution of any statement in an -@code{END} rule; In this case, it causes the program to stop immediately. @xref{BEGIN/END}. +@code{END} rule; in this case, it causes the program to stop immediately. +@xref{BEGIN/END}. The @code{nextfile} statement is useful when there are many @value{DF}s to process but it isn't necessary to process every record in every file. @@ -14130,13 +14167,10 @@ would have to continue scanning the unwanted records. The @code{nextfile} statement accomplishes this much more efficiently. In @command{gawk}, execution of @code{nextfile} causes additional things -to happen: -any @code{ENDFILE} rules are executed except in the case as -mentioned below, -@code{ARGIND} is incremented, -and -any @code{BEGINFILE} rules are executed. -(@code{ARGIND} hasn't been introduced yet. @xref{Built-in Variables}.) +to happen: any @code{ENDFILE} rules are executed if @command{gawk} is +not currently in an @code{END} or @code{BEGINFILE} rule, @code{ARGIND} is +incremented, and any @code{BEGINFILE} rules are executed. (@code{ARGIND} +hasn't been introduced yet. @xref{Built-in Variables}.) With @command{gawk}, @code{nextfile} is useful inside a @code{BEGINFILE} rule to skip over a file that would otherwise cause @command{gawk} @@ -16150,7 +16184,7 @@ $ @kbd{echo 'line 1} > @kbd{line 2} > @kbd{line 3' | awk '@{ l[lines] = $0; ++lines @}} > @kbd{END @{} -> @kbd{for (i = lines-1; i >= 0; --i)} +> @kbd{for (i = lines - 1; i >= 0; i--)} > @kbd{print l[i]} > @kbd{@}'} @print{} line 3 @@ -16174,7 +16208,7 @@ The following version of the program works correctly: @example @{ l[lines++] = $0 @} END @{ - for (i = lines - 1; i >= 0; --i) + for (i = lines - 1; i >= 0; i--) print l[i] @} @end example @@ -20436,8 +20470,9 @@ function mystrtonum(str, ret, n, i, k, c) ret = 0 for (i = 1; i <= n; i++) @{ c = substr(str, i, 1) - if ((k = index("01234567", c)) > 0) - k-- # adjust for 1-basing in awk + # index() returns 0 if c not in string, + # includes c == "0" + k = index("1234567", c) ret = ret * 8 + k @} @@ -20449,6 +20484,8 @@ function mystrtonum(str, ret, n, i, k, c) for (i = 1; i <= n; i++) @{ c = substr(str, i, 1) c = tolower(c) + # index() returns 0 if c not in string, + # includes c == "0" k = index("123456789abcdef", c) ret = ret * 16 + k @@ -21051,7 +21088,12 @@ function readfile(file, tmp, contents) This function reads from @code{file} one record at a time, building up the full contents of the file in the local variable @code{contents}. -It works, but is not necessarily efficient. +It works, but is not necessarily +@c 8/2014. Thanks to BWK for pointing this out: +efficient.@footnote{Execution time grows quadratically in the size of +the input; for each record, @command{awk} has to allocate a bigger +internal buffer for @code{contents}, copy the old contents into it, +and then append the contents of the new record.} The following function, based on a suggestion by Denis Shirokov, reads the entire contents of the named file in one shot: @@ -21724,8 +21766,7 @@ it is not an option, and it ends option processing. Continuing on: i = index(options, thisopt) if (i == 0) @{ if (Opterr) - printf("%c -- invalid option\n", - thisopt) > "/dev/stderr" + printf("%c -- invalid option\n", thisopt) > "/dev/stderr" if (_opti >= length(argv[Optind])) @{ Optind++ _opti = 0 diff --git a/doc/gawktexi.in b/doc/gawktexi.in index 954b7db0..d9846cbe 100644 --- a/doc/gawktexi.in +++ b/doc/gawktexi.in @@ -521,10 +521,10 @@ particular records in a file and perform operations upon them. * Escape Sequences:: How to write nonprinting characters. * Regexp Operators:: Regular Expression Operators. * Bracket Expressions:: What can go between @samp{[...]}. -* GNU Regexp Operators:: Operators specific to GNU software. -* Case-sensitivity:: How to do case-insensitive matching. * Leftmost Longest:: How much text matches. * Computed Regexps:: Using Dynamic Regexps. +* GNU Regexp Operators:: Operators specific to GNU software. +* Case-sensitivity:: How to do case-insensitive matching. * Regexp Summary:: Regular expressions summary. * Records:: Controlling how data is split into records. @@ -1741,6 +1741,7 @@ They also appear in the index under the heading ``dark corner.'' As noted by the opening quote, though, any coverage of dark corners is, by definition, incomplete. +@cindex c.e., See common extensions Extensions to the standard @command{awk} language that are supported by more than one @command{awk} implementation are marked @ifclear FOR_PRINT @@ -2308,24 +2309,19 @@ For example, on OS/2, it is @kbd{Ctrl-z}.) As an example, the following program prints a friendly piece of advice (from Douglas Adams's @cite{The Hitchhiker's Guide to the Galaxy}), to keep you from worrying about the complexities of computer -programming (@code{BEGIN} is a feature we haven't discussed yet): +programming: @example -$ @kbd{awk "BEGIN @{ print \"Don't Panic!\" @}"} +$ @kbd{awk "BEGIN @{ print "Don\47t Panic!" @}"} @print{} Don't Panic! @end example -@cindex shell quoting, double quote -@cindex double quote (@code{"}) in shell commands -@cindex @code{"} (double quote) in shell commands -@cindex @code{\} (backslash) in shell commands -@cindex backslash (@code{\}) in shell commands -This program does not read any input. The @samp{\} before each of the -inner double quotes is necessary because of the shell's quoting -rules---in particular because it mixes both single quotes and -double quotes.@footnote{Although we generally recommend the use of single -quotes around the program text, double quotes are needed here in order to -put the single quote into the message.} +@command{awk} executes statements associated with @code{BEGIN} before +reading any input. If there are no other statements in your program, +as is the case here, @command{awk} just stops, instead of trying to read +input it doesn't know how to process. +The @samp{\47} is a magic way of getting a single quote into +the program, without having to engage in ugly shell quoting tricks. @quotation NOTE As a side note, if you use Bash as your shell, you should execute the @@ -2957,6 +2953,9 @@ awk '@{ if (length($0) > max) max = length($0) @} END @{ print max @}' data @end example +The code associated with @code{END} executes after all +input has been read; it's the other side of the coin to @code{BEGIN}. + @cindex @command{expand} utility @item Print the length of the longest line in @file{data}: @@ -4043,6 +4042,11 @@ included. As each element of @code{ARGV} is processed, @command{gawk} sets the variable @code{ARGIND} to the index in @code{ARGV} of the current element. +@c FIXME: One day, move the ARGC and ARGV node closer to here. +Changing @code{ARGC} and @code{ARGV} in your @command{awk} program lets +you control how @command{awk} processes the input files; this is described +in more detail in @ref{ARGC and ARGV}. + @cindex input files, variable assignments and @cindex variable assignments and input files The distinction between @value{FN} arguments and variable-assignment @@ -4683,10 +4687,10 @@ regular expressions work, we present more complicated instances. * Escape Sequences:: How to write nonprinting characters. * Regexp Operators:: Regular Expression Operators. * Bracket Expressions:: What can go between @samp{[...]}. -* GNU Regexp Operators:: Operators specific to GNU software. -* Case-sensitivity:: How to do case-insensitive matching. * Leftmost Longest:: How much text matches. * Computed Regexps:: Using Dynamic Regexps. +* GNU Regexp Operators:: Operators specific to GNU software. +* Case-sensitivity:: How to do case-insensitive matching. * Regexp Summary:: Regular expressions summary. @end menu @@ -4896,8 +4900,11 @@ that a maximum of two hexadecimal digits following the @item \/ A literal slash (necessary for regexp constants only). This sequence is used when you want to write a regexp -constant that contains a slash. Because the regexp is delimited by -slashes, you need to escape the slash that is part of the pattern, +constant that contains a slash +(such as @code{/.*:\/home\/[[:alnum:]]+:.*/}; the @samp{[[:alnum:]]} +notation is discussed shortly, in @ref{Bracket Expressions}). +Because the regexp is delimited by +slashes, you need to escape any slash that is part of the pattern, in order to tell @command{awk} to keep processing the rest of the regexp. @cindex @code{\} (backslash), @code{\"} escape sequence @@ -4905,8 +4912,10 @@ in order to tell @command{awk} to keep processing the rest of the regexp. @item \" A literal double quote (necessary for string constants only). This sequence is used when you want to write a string -constant that contains a double quote. Because the string is delimited by -double quotes, you need to escape the quote that is part of the string, +constant that contains a double quote +(such as @code{"He said \"hi!\" to her."}). +Because the string is delimited by +double quotes, you need to escape any quote that is part of the string, in order to tell @command{awk} to keep processing the rest of the string. @end table @@ -5384,6 +5393,160 @@ they do not recognize collating symbols or equivalence classes. @c maybe one day ... @c ENDOFRANGE charlist +@node Leftmost Longest +@section How Much Text Matches? + +@cindex regular expressions, leftmost longest match +@c @cindex matching, leftmost longest +Consider the following: + +@example +echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}' +@end example + +This example uses the @code{sub()} function (which we haven't discussed yet; +@pxref{String Functions}) +to make a change to the input record. Here, the regexp @code{/a+/} +indicates ``one or more @samp{a} characters,'' and the replacement +text is @samp{<A>}. + +The input contains four @samp{a} characters. +@command{awk} (and POSIX) regular expressions always match +the leftmost, @emph{longest} sequence of input characters that can +match. Thus, all four @samp{a} characters are +replaced with @samp{<A>} in this example: + +@example +$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'} +@print{} <A>bcd +@end example + +For simple match/no-match tests, this is not so important. But when doing +text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()}, +and @code{gensub()} functions, it is very important. +@ifinfo +@xref{String Functions}, +for more information on these functions. +@end ifinfo +Understanding this principle is also important for regexp-based record +and field splitting (@pxref{Records}, +and also @pxref{Field Separators}). + +@node Computed Regexps +@section Using Dynamic Regexps + +@c STARTOFRANGE dregexp +@cindex regular expressions, computed +@c STARTOFRANGE regexpd +@cindex regular expressions, dynamic +@cindex @code{~} (tilde), @code{~} operator +@cindex tilde (@code{~}), @code{~} operator +@cindex @code{!} (exclamation point), @code{!~} operator +@cindex exclamation point (@code{!}), @code{!~} operator +@c @cindex operators, @code{~} +@c @cindex operators, @code{!~} +The righthand side of a @samp{~} or @samp{!~} operator need not be a +regexp constant (i.e., a string of characters between slashes). It may +be any expression. The expression is evaluated and converted to a string +if necessary; the contents of the string are then used as the +regexp. A regexp computed in this way is called a @dfn{dynamic +regexp} or a @dfn{computed regexp}: + +@example +BEGIN @{ digits_regexp = "[[:digit:]]+" @} +$0 ~ digits_regexp @{ print @} +@end example + +@noindent +This sets @code{digits_regexp} to a regexp that describes one or more digits, +and tests whether the input record matches this regexp. + +@quotation NOTE +When using the @samp{~} and @samp{!~} +operators, there is a difference between a regexp constant +enclosed in slashes and a string constant enclosed in double quotes. +If you are going to use a string constant, you have to understand that +the string is, in essence, scanned @emph{twice}: the first time when +@command{awk} reads your program, and the second time when it goes to +match the string on the lefthand side of the operator with the pattern +on the right. This is true of any string-valued expression (such as +@code{digits_regexp}, shown previously), not just string constants. +@end quotation + +@cindex regexp constants, slashes vs.@: quotes +@cindex @code{\} (backslash), in regexp constants +@cindex backslash (@code{\}), in regexp constants +@cindex @code{"} (double quote), in regexp constants +@cindex double quote (@code{"}), in regexp constants +What difference does it make if the string is +scanned twice? The answer has to do with escape sequences, and particularly +with backslashes. To get a backslash into a regular expression inside a +string, you have to type two backslashes. + +For example, @code{/\*/} is a regexp constant for a literal @samp{*}. +Only one backslash is needed. To do the same thing with a string, +you have to type @code{"\\*"}. The first backslash escapes the +second one so that the string actually contains the +two characters @samp{\} and @samp{*}. + +@cindex troubleshooting, regexp constants vs.@: string constants +@cindex regexp constants, vs.@: string constants +@cindex string constants, vs.@: regexp constants +Given that you can use both regexp and string constants to describe +regular expressions, which should you use? The answer is ``regexp +constants,'' for several reasons: + +@itemize @value{BULLET} +@item +String constants are more complicated to write and +more difficult to read. Using regexp constants makes your programs +less error-prone. Not understanding the difference between the two +kinds of constants is a common source of errors. + +@item +It is more efficient to use regexp constants. @command{awk} can note +that you have supplied a regexp and store it internally in a form that +makes pattern matching more efficient. When using a string constant, +@command{awk} must first convert the string into this internal form and +then perform the pattern matching. + +@item +Using regexp constants is better form; it shows clearly that you +intend a regexp match. +@end itemize + +@sidebar Using @code{\n} in Bracket Expressions of Dynamic Regexps +@cindex regular expressions, dynamic, with embedded newlines +@cindex newlines, in dynamic regexps + +Some versions of @command{awk} do not allow the newline +character to be used inside a bracket expression for a dynamic regexp: + +@example +$ @kbd{awk '$0 ~ "[ \t\n]"'} +@error{} awk: newline in character class [ +@error{} ]... +@error{} source line number 1 +@error{} context is +@error{} >>> <<< +@end example + +@cindex newlines, in regexp constants +But a newline in a regexp constant works with no problem: + +@example +$ @kbd{awk '$0 ~ /[ \t\n]/'} +@kbd{here is a sample line} +@print{} here is a sample line +@kbd{Ctrl-d} +@end example + +@command{gawk} does not have this problem, and it isn't likely to +occur often in practice, but it's worth noting for future reference. +@end sidebar +@c ENDOFRANGE dregexp +@c ENDOFRANGE regexpd + @node GNU Regexp Operators @section @command{gawk}-Specific Regexp Operators @@ -5659,160 +5822,6 @@ Case is always significant in compatibility mode. @c ENDOFRANGE csregexp @c ENDOFRANGE regexpcs -@node Leftmost Longest -@section How Much Text Matches? - -@cindex regular expressions, leftmost longest match -@c @cindex matching, leftmost longest -Consider the following: - -@example -echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}' -@end example - -This example uses the @code{sub()} function (which we haven't discussed yet; -@pxref{String Functions}) -to make a change to the input record. Here, the regexp @code{/a+/} -indicates ``one or more @samp{a} characters,'' and the replacement -text is @samp{<A>}. - -The input contains four @samp{a} characters. -@command{awk} (and POSIX) regular expressions always match -the leftmost, @emph{longest} sequence of input characters that can -match. Thus, all four @samp{a} characters are -replaced with @samp{<A>} in this example: - -@example -$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'} -@print{} <A>bcd -@end example - -For simple match/no-match tests, this is not so important. But when doing -text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()}, -and @code{gensub()} functions, it is very important. -@ifinfo -@xref{String Functions}, -for more information on these functions. -@end ifinfo -Understanding this principle is also important for regexp-based record -and field splitting (@pxref{Records}, -and also @pxref{Field Separators}). - -@node Computed Regexps -@section Using Dynamic Regexps - -@c STARTOFRANGE dregexp -@cindex regular expressions, computed -@c STARTOFRANGE regexpd -@cindex regular expressions, dynamic -@cindex @code{~} (tilde), @code{~} operator -@cindex tilde (@code{~}), @code{~} operator -@cindex @code{!} (exclamation point), @code{!~} operator -@cindex exclamation point (@code{!}), @code{!~} operator -@c @cindex operators, @code{~} -@c @cindex operators, @code{!~} -The righthand side of a @samp{~} or @samp{!~} operator need not be a -regexp constant (i.e., a string of characters between slashes). It may -be any expression. The expression is evaluated and converted to a string -if necessary; the contents of the string are then used as the -regexp. A regexp computed in this way is called a @dfn{dynamic -regexp} or a @dfn{computed regexp}: - -@example -BEGIN @{ digits_regexp = "[[:digit:]]+" @} -$0 ~ digits_regexp @{ print @} -@end example - -@noindent -This sets @code{digits_regexp} to a regexp that describes one or more digits, -and tests whether the input record matches this regexp. - -@quotation NOTE -When using the @samp{~} and @samp{!~} -operators, there is a difference between a regexp constant -enclosed in slashes and a string constant enclosed in double quotes. -If you are going to use a string constant, you have to understand that -the string is, in essence, scanned @emph{twice}: the first time when -@command{awk} reads your program, and the second time when it goes to -match the string on the lefthand side of the operator with the pattern -on the right. This is true of any string-valued expression (such as -@code{digits_regexp}, shown previously), not just string constants. -@end quotation - -@cindex regexp constants, slashes vs.@: quotes -@cindex @code{\} (backslash), in regexp constants -@cindex backslash (@code{\}), in regexp constants -@cindex @code{"} (double quote), in regexp constants -@cindex double quote (@code{"}), in regexp constants -What difference does it make if the string is -scanned twice? The answer has to do with escape sequences, and particularly -with backslashes. To get a backslash into a regular expression inside a -string, you have to type two backslashes. - -For example, @code{/\*/} is a regexp constant for a literal @samp{*}. -Only one backslash is needed. To do the same thing with a string, -you have to type @code{"\\*"}. The first backslash escapes the -second one so that the string actually contains the -two characters @samp{\} and @samp{*}. - -@cindex troubleshooting, regexp constants vs.@: string constants -@cindex regexp constants, vs.@: string constants -@cindex string constants, vs.@: regexp constants -Given that you can use both regexp and string constants to describe -regular expressions, which should you use? The answer is ``regexp -constants,'' for several reasons: - -@itemize @value{BULLET} -@item -String constants are more complicated to write and -more difficult to read. Using regexp constants makes your programs -less error-prone. Not understanding the difference between the two -kinds of constants is a common source of errors. - -@item -It is more efficient to use regexp constants. @command{awk} can note -that you have supplied a regexp and store it internally in a form that -makes pattern matching more efficient. When using a string constant, -@command{awk} must first convert the string into this internal form and -then perform the pattern matching. - -@item -Using regexp constants is better form; it shows clearly that you -intend a regexp match. -@end itemize - -@sidebar Using @code{\n} in Bracket Expressions of Dynamic Regexps -@cindex regular expressions, dynamic, with embedded newlines -@cindex newlines, in dynamic regexps - -Some versions of @command{awk} do not allow the newline -character to be used inside a bracket expression for a dynamic regexp: - -@example -$ @kbd{awk '$0 ~ "[ \t\n]"'} -@error{} awk: newline in character class [ -@error{} ]... -@error{} source line number 1 -@error{} context is -@error{} >>> <<< -@end example - -@cindex newlines, in regexp constants -But a newline in a regexp constant works with no problem: - -@example -$ @kbd{awk '$0 ~ /[ \t\n]/'} -@kbd{here is a sample line} -@print{} here is a sample line -@kbd{Ctrl-d} -@end example - -@command{gawk} does not have this problem, and it isn't likely to -occur often in practice, but it's worth noting for future reference. -@end sidebar -@c ENDOFRANGE dregexp -@c ENDOFRANGE regexpd - @node Regexp Summary @section Summary @@ -7573,32 +7582,48 @@ finished processing the current record, but want to do some special processing on the next record @emph{right now}. For example: @example +# Remove text between /* and */, inclusive @{ - if ((t = index($0, "/*")) != 0) @{ - # value of `tmp' will be "" if t is 1 - tmp = substr($0, 1, t - 1) - u = index(substr($0, t + 2), "*/") - offset = t + 2 - while (u == 0) @{ - if (getline <= 0) @{ + if ((i = index($0, "/*")) != 0) @{ + out = substr($0, 1, i - 1) # leading part of the string + rest = substr($0, i + 2) # ... */ ... + j = index(rest, "*/") # is */ in trailing part? + if (j > 0) @{ + rest = substr(rest, j + 2) # remove comment + @} else @{ + while (j == 0) @{ + # get more text + if (getline <= 0) @{ m = "unexpected EOF or error" m = (m ": " ERRNO) print m > "/dev/stderr" exit - @} - u = index($0, "*/") - offset = 0 - @} - # substr() expression will be "" if */ - # occurred at end of line - $0 = tmp substr($0, offset + u + 2) - @} - print $0 + @} + # build up the line using string concatenation + rest = rest $0 + j = index(rest, "*/") # is */ in trailing part? + if (j != 0) @{ + rest = substr(rest, j + 2) + break + @} + @} + @} + # build up the output line using string concatenation + $0 = out rest + @} + print $0 @} @end example This @command{awk} program deletes C-style comments (@samp{/* @dots{} -*/}) from the input. By replacing the @samp{print $0} with other +*/}) from the input. +It uses a number of features we haven't covered yet, including +string concatenation +(@pxref{Concatenation}) +and the @code{index()} and @code{substr()} built-in +functions +(@pxref{String Functions}). +By replacing the @samp{print $0} with other statements, you could perform more complicated processing on the decommented input, such as searching for matches of a regular expression. (This program has a subtle problem---it does not work if one @@ -8289,7 +8314,7 @@ including abstentions, for each item. comments (@samp{/* @dots{} */}) from the input. That program does not work if one comment ends on one line and another one starts later on the same line. -Write a program that does handle multiple comments on the line. +That can be fixed by making one simple change. What is it? @end enumerate @c EXCLUDE END @@ -9990,7 +10015,8 @@ A regexp constant is a regular expression description enclosed in slashes, such as @code{@w{/^beginning and end$/}}. Most regexps used in @command{awk} programs are constant, but the @samp{~} and @samp{!~} matching operators can also match computed or dynamic regexps -(which are just ordinary strings or variables that contain a regexp). +(which are typically just ordinary strings or variables that contain a regexp, +but could be a more complex expression). @c ENDOFRANGE cnst @node Using Constant Regexps @@ -11642,7 +11668,7 @@ program is one way to print lines in between special bracketing lines: @example $1 == "START" @{ interested = ! interested; next @} -interested == 1 @{ print @} +interested @{ print @} $1 == "END" @{ interested = ! interested; next @} @end example @@ -11662,6 +11688,16 @@ bogus input data, but the point is to illustrate the use of `!', so we'll leave well enough alone. @end ignore +Most commonly, the @samp{!} operator is used in the conditions of +@code{if} and @code{while} statements, where it often makes more +sense to phrase the logic in the negative: + +@example +if (! @var{some condition} || @var{some other condition}) @{ + @var{@dots{} do whatever processing @dots{}} +@} +@end example + @cindex @code{next} statement @quotation NOTE The @code{next} statement is discussed in @@ -13454,7 +13490,8 @@ starts over with the first rule in the program. If the @code{nextfile} statement causes the end of the input to be reached, then the code in any @code{END} rules is executed. An exception to this is when @code{nextfile} is invoked during execution of any statement in an -@code{END} rule; In this case, it causes the program to stop immediately. @xref{BEGIN/END}. +@code{END} rule; in this case, it causes the program to stop immediately. +@xref{BEGIN/END}. The @code{nextfile} statement is useful when there are many @value{DF}s to process but it isn't necessary to process every record in every file. @@ -13464,13 +13501,10 @@ would have to continue scanning the unwanted records. The @code{nextfile} statement accomplishes this much more efficiently. In @command{gawk}, execution of @code{nextfile} causes additional things -to happen: -any @code{ENDFILE} rules are executed except in the case as -mentioned below, -@code{ARGIND} is incremented, -and -any @code{BEGINFILE} rules are executed. -(@code{ARGIND} hasn't been introduced yet. @xref{Built-in Variables}.) +to happen: any @code{ENDFILE} rules are executed if @command{gawk} is +not currently in an @code{END} or @code{BEGINFILE} rule, @code{ARGIND} is +incremented, and any @code{BEGINFILE} rules are executed. (@code{ARGIND} +hasn't been introduced yet. @xref{Built-in Variables}.) With @command{gawk}, @code{nextfile} is useful inside a @code{BEGINFILE} rule to skip over a file that would otherwise cause @command{gawk} @@ -15438,7 +15472,7 @@ $ @kbd{echo 'line 1} > @kbd{line 2} > @kbd{line 3' | awk '@{ l[lines] = $0; ++lines @}} > @kbd{END @{} -> @kbd{for (i = lines-1; i >= 0; --i)} +> @kbd{for (i = lines - 1; i >= 0; i--)} > @kbd{print l[i]} > @kbd{@}'} @print{} line 3 @@ -15462,7 +15496,7 @@ The following version of the program works correctly: @example @{ l[lines++] = $0 @} END @{ - for (i = lines - 1; i >= 0; --i) + for (i = lines - 1; i >= 0; i--) print l[i] @} @end example @@ -19563,8 +19597,9 @@ function mystrtonum(str, ret, n, i, k, c) ret = 0 for (i = 1; i <= n; i++) @{ c = substr(str, i, 1) - if ((k = index("01234567", c)) > 0) - k-- # adjust for 1-basing in awk + # index() returns 0 if c not in string, + # includes c == "0" + k = index("1234567", c) ret = ret * 8 + k @} @@ -19576,6 +19611,8 @@ function mystrtonum(str, ret, n, i, k, c) for (i = 1; i <= n; i++) @{ c = substr(str, i, 1) c = tolower(c) + # index() returns 0 if c not in string, + # includes c == "0" k = index("123456789abcdef", c) ret = ret * 16 + k @@ -20178,7 +20215,12 @@ function readfile(file, tmp, contents) This function reads from @code{file} one record at a time, building up the full contents of the file in the local variable @code{contents}. -It works, but is not necessarily efficient. +It works, but is not necessarily +@c 8/2014. Thanks to BWK for pointing this out: +efficient.@footnote{Execution time grows quadratically in the size of +the input; for each record, @command{awk} has to allocate a bigger +internal buffer for @code{contents}, copy the old contents into it, +and then append the contents of the new record.} The following function, based on a suggestion by Denis Shirokov, reads the entire contents of the named file in one shot: @@ -20822,8 +20864,7 @@ it is not an option, and it ends option processing. Continuing on: i = index(options, thisopt) if (i == 0) @{ if (Opterr) - printf("%c -- invalid option\n", - thisopt) > "/dev/stderr" + printf("%c -- invalid option\n", thisopt) > "/dev/stderr" if (_opti >= length(argv[Optind])) @{ Optind++ _opti = 0 |