diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2014-09-04 09:49:44 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2014-09-04 09:49:44 +0300 |
commit | 7448f28d356fc5cd8d9117111baea3a549e0930e (patch) | |
tree | f24d82d8b121d8321cfc0366dabad236d3f3dac3 | |
parent | a205df7903bce201577df4f7049c190e283f1ea4 (diff) | |
parent | 8beb9796b17b6ca48eb62df8fd3d31421e43c761 (diff) | |
download | egawk-7448f28d356fc5cd8d9117111baea3a549e0930e.tar.gz egawk-7448f28d356fc5cd8d9117111baea3a549e0930e.tar.bz2 egawk-7448f28d356fc5cd8d9117111baea3a549e0930e.zip |
Merge branch 'gawk-4.1-stable'
-rw-r--r-- | ChangeLog | 14 | ||||
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | awk.h | 2 | ||||
-rw-r--r-- | awkgram.c | 16 | ||||
-rw-r--r-- | awkgram.y | 16 | ||||
-rw-r--r-- | awklib/eg/prog/uniq.awk | 2 | ||||
-rw-r--r-- | doc/ChangeLog | 10 | ||||
-rw-r--r-- | doc/gawk.1 | 6 | ||||
-rw-r--r-- | doc/gawk.info | 1128 | ||||
-rw-r--r-- | doc/gawk.texi | 133 | ||||
-rw-r--r-- | doc/gawktexi.in | 133 | ||||
-rw-r--r-- | helpers/ChangeLog | 4 | ||||
-rw-r--r-- | helpers/chlistref.awk | 31 | ||||
-rw-r--r-- | interpret.h | 42 | ||||
-rw-r--r-- | profile.c | 30 | ||||
-rw-r--r-- | test/ChangeLog | 6 | ||||
-rw-r--r-- | test/functab4.ok | 3 | ||||
-rw-r--r-- | test/profile2.ok | 4 |
18 files changed, 939 insertions, 644 deletions
@@ -1,3 +1,17 @@ +2014-09-04 Arnold D. Robbins <arnold@skeeve.com> + + * profile.c (pprint): Case Op_K_for: Improve printing of + empty for loop header. + + Unrelated: Make indirect function calls work for built-in and + extension functions. + + * awkgram.y (lookup_builtin): New function. + * awk.h (builtin_func_t): New typedef. + (lookup_builtin): Declare it. + * interpret.h (r_interpret): For indirect calls, add code to + find and call builtin functions, and call extension functions. + 2014-09-01 Arnold D. Robbins <arnold@skeeve.com> * builtin.c (do_substr): Return "" instead of null string in case @@ -50,6 +50,9 @@ Changes from 4.1.1 to 4.1.2 4. A number of bugs have been fixed in the MPFR code. +5. Indirect function calls now work for both built-in and + extension functions. + XX. A number of bugs have been fixed. See the ChangeLog. Changes from 4.1.0 to 4.1.1 @@ -1378,6 +1378,8 @@ extern void register_deferred_variable(const char *name, NODE *(*load_func)(void extern int files_are_same(char *path, SRCFILE *src); extern void valinfo(NODE *n, Func_print print_func, FILE *fp); extern void negate_num(NODE *n); +typedef NODE *(*builtin_func_t)(int); /* function that implements a built-in */ +extern builtin_func_t lookup_builtin(const char *name); /* builtin.c */ extern double double_to_int(double d); extern NODE *do_exp(int nargs); @@ -8054,3 +8054,19 @@ one_line_close(int fd) } +/* lookup_builtin --- find a builtin function or return NULL */ + +builtin_func_t +lookup_builtin(const char *name) +{ + int mid = check_special(name); + + if (mid == -1 || tokentab[mid].class != LEX_BUILTIN) + return NULL; +#ifdef HAVE_MPFR + if (do_mpfr) + return tokentab[mid].ptr2; +#endif + + return tokentab[mid].ptr; +} @@ -5715,3 +5715,19 @@ one_line_close(int fd) } +/* lookup_builtin --- find a builtin function or return NULL */ + +builtin_func_t +lookup_builtin(const char *name) +{ + int mid = check_special(name); + + if (mid == -1 || tokentab[mid].class != LEX_BUILTIN) + return NULL; +#ifdef HAVE_MPFR + if (do_mpfr) + return tokentab[mid].ptr2; +#endif + + return tokentab[mid].ptr; +} diff --git a/awklib/eg/prog/uniq.awk b/awklib/eg/prog/uniq.awk index effc8f6c..2a2cf63e 100644 --- a/awklib/eg/prog/uniq.awk +++ b/awklib/eg/prog/uniq.awk @@ -30,7 +30,7 @@ BEGIN { else if (c == "c") do_count++ else if (index("0123456789", c) != 0) { - # getopt requires args to options + # getopt() requires args to options # this messes us up for things like -5 if (Optarg ~ /^[[:digit:]]+$/) fcount = (c Optarg) + 0 diff --git a/doc/ChangeLog b/doc/ChangeLog index 47d74797..95327540 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,13 @@ +2014-09-04 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Document that indirect calls now work on built-in + and extension functions. + * gawk.1: Same. + +2014-09-03 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Further fixes from reviews and bug reports. + 2014-09-02 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Corrections to walkthrough in debugger chapter. @@ -13,7 +13,7 @@ . if \w'\(rq' .ds rq "\(rq . \} .\} -.TH GAWK 1 "Jul 10 2014" "Free Software Foundation" "Utility Commands" +.TH GAWK 1 "Aug 03 2014" "Free Software Foundation" "Utility Commands" .SH NAME gawk \- pattern scanning and processing language .SH SYNOPSIS @@ -3287,7 +3287,7 @@ sign, like so: .RS .ft B .nf -function myfunc() +function myfunc() { print "myfunc called" \&.\|.\|. @@ -3301,6 +3301,8 @@ function myfunc() .fi .ft R .RE +As of version 4.1.2, this works with user-defined functions, +built-in functions, and extension functions. .PP If .B \-\^\-lint diff --git a/doc/gawk.info b/doc/gawk.info index bff22dc9..1d8022f0 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -2222,7 +2222,7 @@ determining the type of a variable, and array sorting. As we develop our presentation of the `awk' language, we introduce most of the variables and many of the functions. They are described -systematically in *note Built-in Variables::, and *note Built-in::. +systematically in *note Built-in Variables::, and in *note Built-in::. File: gawk.info, Node: When, Next: Intro Summary, Prev: Other Features, Up: Getting Started @@ -13871,7 +13871,7 @@ File: gawk.info, Node: Indirect Calls, Next: Functions Summary, Prev: User-de 9.3 Indirect Function Calls =========================== -This section describes a `gawk'-specific extension. +This section describes an advanced, `gawk'-specific extension. Often, you may wish to defer the choice of function to call until runtime. For example, you may have different kinds of records, each of @@ -13910,7 +13910,7 @@ your test scores: This style of programming works, but can be awkward. With "indirect" function calls, you tell `gawk' to use the _value_ of a variable as the -name of the function to call. +_name_ of the function to call. The syntax is similar to that of a regular function call: an identifier immediately followed by a left parenthesis, any arguments, @@ -13952,7 +13952,6 @@ using indirect function calls. Otherwise they perform the expected computations and are not unusual. # For each record, print the class name and the requested statistics - { class_name = $1 gsub(/_/, " ", class_name) # Replace _ with spaces @@ -14139,11 +14138,11 @@ names of the two comparison functions: Remember that you must supply a leading `@' in front of an indirect function call. - Unfortunately, indirect function calls cannot be used with the -built-in functions. However, you can generally write "wrapper" -functions which call the built-in ones, and those can be called -indirectly. (Other than, perhaps, the mathematical functions, there is -not a lot of reason to try to call the built-in functions indirectly.) + Starting with version 4.1.2 of `gawk', indirect function calls may +also be used with built-in functions and with extension functions +(*note Dynamic Extensions::). The only thing you cannot do is pass a +regular expression constant to a built-in function through an indirect +function call.(1) `gawk' does its best to make indirect function calls efficient. For example, in the following case: @@ -14151,7 +14150,12 @@ example, in the following case: for (i = 1; i <= n; i++) @the_func() -`gawk' will look up the actual function to call only once. +`gawk' looks up the actual function to call only once. + + ---------- Footnotes ---------- + + (1) This may change in a future version; recheck the documentation +that comes with your version of `gawk' to see if it has. File: gawk.info, Node: Functions Summary, Prev: Indirect Calls, Up: Functions @@ -14188,7 +14192,9 @@ File: gawk.info, Node: Functions Summary, Prev: Indirect Calls, Up: Functions * User-defined functions may call other user-defined (and built-in) functions and may call themselves recursively. Function parameters - "hide" any global variables of the same names. + "hide" any global variables of the same names. You cannot use the + name of a reserved variable (such as `ARGC') as the name of a + parameter in user-defined functions. * Scalar values are passed to user-defined functions by value. Array parameters are passed by reference; any changes made by the @@ -14204,10 +14210,9 @@ File: gawk.info, Node: Functions Summary, Prev: Indirect Calls, Up: Functions either scalar or array. * `gawk' provides indirect function calls using a special syntax. - By setting a variable to the name of a user-defined function, you - can determine at runtime what function will be called at that - point in the program. This is equivalent to function pointers in C - and C++. + By setting a variable to the name of a function, you can determine + at runtime what function will be called at that point in the + program. This is equivalent to function pointers in C and C++. @@ -14898,7 +14903,7 @@ that might be as follows: This function reads from `file' one record at a time, building up the full contents of the file in the local variable `contents'. It -works, but is not necessarily efficient.(1) +works, but is not necessarily efficient. The following function, based on a suggestion by Denis Shirokov, reads the entire contents of the named file in one shot: @@ -14933,13 +14938,6 @@ string. Thus calling code may use something like: This tests the result to see if it is empty or not. An equivalent test would be `contents == ""'. - ---------- Footnotes ---------- - - (1) Execution time grows quadratically in the size of the input; for -each record, `awk' has to allocate a bigger internal buffer for -`contents', copy the old contents into it, and then append the contents -of the new record. - File: gawk.info, Node: Data File Management, Next: Getopt Function, Prev: General Functions, Up: Library Functions @@ -17072,7 +17070,7 @@ standard output, `/dev/stdout': else if (c == "c") do_count++ else if (index("0123456789", c) != 0) { - # getopt requires args to options + # getopt() requires args to options # this messes us up for things like -5 if (Optarg ~ /^[[:digit:]]+$/) fcount = (c Optarg) + 0 @@ -17669,8 +17667,8 @@ program. enclosed in square brackets (`[a-z]') and quoted, to prevent the shell from attempting a file name expansion. This is not a feature. - (2) This program was written before `gawk' acquired the ability to -split each character in a string into separate array elements. + (2) This program was also written before `gawk' acquired the ability +to split each character in a string into separate array elements. File: gawk.info, Node: Labels Program, Next: Word Sorting, Prev: Translate Program, Up: Miscellaneous Programs @@ -18763,23 +18761,35 @@ File: gawk.info, Node: Programs Exercises, Prev: Programs Summary, Up: Sample 5. The `split.awk' program (*note Split Program::) assumes that letters are contiguous in the character set, which isn't true for - EBCDIC systems. Fix this problem. - - 6. Why can't the `wc.awk' program (*note Wc Program::) just use the + EBCDIC systems. Fix this problem. (Hint: Consider a different + way to work through the alphabet, without relying on `ord()' and + `chr()'.) + + 6. In `uniq.awk' (*note Uniq Program::, the logic for choosing which + lines to print represents a "state machine", which is "a device + that can be in one of a set number of stable conditions depending + on its previous condition and on the present values of its + inputs."(1) Brian Kernighan suggests that "an alternative approach + to state mechines is to just read the input into an array, then + use indexing. It's almost always easier code, and for most inputs + where you would use this, just as fast." Rewrite the logic to + follow this suggestion. + + 7. Why can't the `wc.awk' program (*note Wc Program::) just use the value of `FNR' in `endfile()'? Hint: Examine the code in *note Filetrans Function::. - 7. Manipulation of individual characters in the `translate' program + 8. Manipulation of individual characters in the `translate' program (*note Translate Program::) is painful using standard `awk' functions. Given that `gawk' can split strings into individual characters using `""' as the separator, how might you use this feature to simplify the program? - 8. The `extract.awk' program (*note Extract Program::) was written + 9. The `extract.awk' program (*note Extract Program::) was written before `gawk' had the `gensub()' function. Use it to simplify the code. - 9. Compare the performance of the `awksed.awk' program (*note Simple + 10. Compare the performance of the `awksed.awk' program (*note Simple Sed::) with the more straightforward: BEGIN { @@ -18790,16 +18800,16 @@ File: gawk.info, Node: Programs Exercises, Prev: Programs Summary, Up: Sample { gsub(pat, repl); print } - 10. What are the advantages and disadvantages of `awksed.awk' versus + 11. What are the advantages and disadvantages of `awksed.awk' versus the real `sed' utility? - 11. In *note Igawk Program::, we mentioned that not trying to save the + 12. In *note Igawk Program::, we mentioned that not trying to save the line read with `getline' in the `pathto()' function when testing for the file's accessibility for use with the main program simplifies things considerably. What problem does this engender though? - 12. As an additional example of the idea that it is not always + 13. As an additional example of the idea that it is not always necessary to add new features to a program, consider the idea of having two files in a directory in the search path: @@ -18822,10 +18832,15 @@ File: gawk.info, Node: Programs Exercises, Prev: Programs Summary, Up: Sample `@include' statements for the desired library functions. Make this change. - 13. Modify `anagram.awk' (*note Anagram Program::), to avoid the use + 14. Modify `anagram.awk' (*note Anagram Program::), to avoid the use of the external `sort' utility. + ---------- Footnotes ---------- + + (1) This is the definition returned from entering `define: state +machine' into Google. + File: gawk.info, Node: Advanced Features, Next: Internationalization, Prev: Sample Programs, Up: Top @@ -21376,7 +21391,7 @@ some limitations. A few which are worth being aware of are: what your mistake was, though, you'll feel like a real guru. * If you perused the dump of opcodes in *note Miscellaneous Debugger - Commands::, (or if you are already familiar with `gawk' internals), + Commands:: (or if you are already familiar with `gawk' internals), you will realize that much of the internal manipulation of data in `gawk', as in many interpreters, is done on a stack. `Op_push', `Op_pop', etc., are the "bread and butter" of most `gawk' code. @@ -32104,7 +32119,7 @@ Index (line 66) * directories, command-line: Command-line directories. (line 6) -* directories, searching: Programs Exercises. (line 63) +* directories, searching: Programs Exercises. (line 75) * directories, searching for loadable extensions: AWKLIBPATH Variable. (line 6) * directories, searching for source files: AWKPATH Variable. (line 6) @@ -32386,7 +32401,7 @@ Index * files, reading, multiline records: Multiple Line. (line 6) * files, searching for regular expressions: Egrep Program. (line 6) * files, skipping: File Checking. (line 6) -* files, source, search path for: Programs Exercises. (line 63) +* files, source, search path for: Programs Exercises. (line 75) * files, splitting: Split Program. (line 6) * files, Texinfo, extracting programs from: Extract Program. (line 6) * find substring in string: String Functions. (line 155) @@ -33554,11 +33569,11 @@ Index * search in string: String Functions. (line 155) * search paths <1>: VMS Running. (line 58) * search paths <2>: PC Using. (line 10) -* search paths: Programs Exercises. (line 63) +* search paths: Programs Exercises. (line 75) * search paths, for loadable extensions: AWKLIBPATH Variable. (line 6) * search paths, for source files <1>: VMS Running. (line 58) * search paths, for source files <2>: PC Using. (line 10) -* search paths, for source files <3>: Programs Exercises. (line 63) +* search paths, for source files <3>: Programs Exercises. (line 75) * search paths, for source files: AWKPATH Variable. (line 6) * searching, files for regular expressions: Egrep Program. (line 6) * searching, for words: Dupword Program. (line 6) @@ -33705,7 +33720,7 @@ Index * source code, QSE Awk: Other Versions. (line 131) * source code, QuikTrim Awk: Other Versions. (line 135) * source code, Solaris awk: Other Versions. (line 96) -* source files, search path for: Programs Exercises. (line 63) +* source files, search path for: Programs Exercises. (line 75) * sparse arrays: Array Intro. (line 72) * Spencer, Henry: Glossary. (line 11) * split: String Functions. (line 313) @@ -34096,519 +34111,520 @@ Ref: More Complex-Footnote-1102513 Node: Statements/Lines102598 Ref: Statements/Lines-Footnote-1107054 Node: Other Features107319 -Node: When108247 -Ref: When-Footnote-1110003 -Node: Intro Summary110068 -Node: Invoking Gawk110951 -Node: Command Line112466 -Node: Options113257 -Ref: Options-Footnote-1128904 -Node: Other Arguments128929 -Node: Naming Standard Input131757 -Node: Environment Variables132850 -Node: AWKPATH Variable133408 -Ref: AWKPATH Variable-Footnote-1136274 -Ref: AWKPATH Variable-Footnote-2136319 -Node: AWKLIBPATH Variable136579 -Node: Other Environment Variables137338 -Node: Exit Status140790 -Node: Include Files141465 -Node: Loading Shared Libraries145043 -Node: Obsolete146427 -Node: Undocumented147124 -Node: Invoking Summary147391 -Node: Regexp148991 -Node: Regexp Usage150450 -Node: Escape Sequences152483 -Node: Regexp Operators158554 -Ref: Regexp Operators-Footnote-1165985 -Ref: Regexp Operators-Footnote-2166132 -Node: Bracket Expressions166230 -Ref: table-char-classes168248 -Node: Leftmost Longest171188 -Node: Computed Regexps172392 -Node: GNU Regexp Operators175770 -Node: Case-sensitivity179476 -Ref: Case-sensitivity-Footnote-1182366 -Ref: Case-sensitivity-Footnote-2182601 -Node: Regexp Summary182709 -Node: Reading Files184178 -Node: Records186270 -Node: awk split records186992 -Node: gawk split records191850 -Ref: gawk split records-Footnote-1196371 -Node: Fields196408 -Ref: Fields-Footnote-1199372 -Node: Nonconstant Fields199458 -Ref: Nonconstant Fields-Footnote-1201688 -Node: Changing Fields201890 -Node: Field Separators207844 -Node: Default Field Splitting210546 -Node: Regexp Field Splitting211663 -Node: Single Character Fields214990 -Node: Command Line Field Separator216049 -Node: Full Line Fields219475 -Ref: Full Line Fields-Footnote-1219983 -Node: Field Splitting Summary220029 -Ref: Field Splitting Summary-Footnote-1223161 -Node: Constant Size223262 -Node: Splitting By Content227868 -Ref: Splitting By Content-Footnote-1231941 -Node: Multiple Line231981 -Ref: Multiple Line-Footnote-1237837 -Node: Getline238016 -Node: Plain Getline240227 -Node: Getline/Variable242933 -Node: Getline/File244080 -Node: Getline/Variable/File245464 -Ref: Getline/Variable/File-Footnote-1247063 -Node: Getline/Pipe247150 -Node: Getline/Variable/Pipe249836 -Node: Getline/Coprocess250943 -Node: Getline/Variable/Coprocess252195 -Node: Getline Notes252932 -Node: Getline Summary255736 -Ref: table-getline-variants256144 -Node: Read Timeout257056 -Ref: Read Timeout-Footnote-1260883 -Node: Command-line directories260941 -Node: Input Summary261845 -Node: Input Exercises264982 -Node: Printing265710 -Node: Print267432 -Node: Print Examples268925 -Node: Output Separators271704 -Node: OFMT273720 -Node: Printf275078 -Node: Basic Printf275984 -Node: Control Letters277523 -Node: Format Modifiers281514 -Node: Printf Examples287541 -Node: Redirection290005 -Node: Special Files296977 -Node: Special FD297510 -Ref: Special FD-Footnote-1301107 -Node: Special Network301181 -Node: Special Caveats302031 -Node: Close Files And Pipes302827 -Ref: Close Files And Pipes-Footnote-1309988 -Ref: Close Files And Pipes-Footnote-2310136 -Node: Output Summary310286 -Node: Output Exercises311283 -Node: Expressions311963 -Node: Values313148 -Node: Constants313824 -Node: Scalar Constants314504 -Ref: Scalar Constants-Footnote-1315363 -Node: Nondecimal-numbers315613 -Node: Regexp Constants318613 -Node: Using Constant Regexps319138 -Node: Variables322210 -Node: Using Variables322865 -Node: Assignment Options324589 -Node: Conversion326464 -Node: Strings And Numbers326988 -Ref: Strings And Numbers-Footnote-1330050 -Node: Locale influences conversions330159 -Ref: table-locale-affects332876 -Node: All Operators333464 -Node: Arithmetic Ops334094 -Node: Concatenation336599 -Ref: Concatenation-Footnote-1339418 -Node: Assignment Ops339524 -Ref: table-assign-ops344507 -Node: Increment Ops345810 -Node: Truth Values and Conditions349248 -Node: Truth Values350331 -Node: Typing and Comparison351380 -Node: Variable Typing352173 -Node: Comparison Operators355825 -Ref: table-relational-ops356235 -Node: POSIX String Comparison359785 -Ref: POSIX String Comparison-Footnote-1360869 -Node: Boolean Ops361007 -Ref: Boolean Ops-Footnote-1365346 -Node: Conditional Exp365437 -Node: Function Calls367164 -Node: Precedence371044 -Node: Locales374713 -Node: Expressions Summary376344 -Node: Patterns and Actions378885 -Node: Pattern Overview380001 -Node: Regexp Patterns381678 -Node: Expression Patterns382221 -Node: Ranges386001 -Node: BEGIN/END389107 -Node: Using BEGIN/END389869 -Ref: Using BEGIN/END-Footnote-1392605 -Node: I/O And BEGIN/END392711 -Node: BEGINFILE/ENDFILE394982 -Node: Empty397913 -Node: Using Shell Variables398230 -Node: Action Overview400513 -Node: Statements402840 -Node: If Statement404688 -Node: While Statement406186 -Node: Do Statement408230 -Node: For Statement409386 -Node: Switch Statement412538 -Node: Break Statement414926 -Node: Continue Statement416967 -Node: Next Statement418792 -Node: Nextfile Statement421182 -Node: Exit Statement423839 -Node: Built-in Variables426243 -Node: User-modified427370 -Ref: User-modified-Footnote-1435059 -Node: Auto-set435121 -Ref: Auto-set-Footnote-1448040 -Ref: Auto-set-Footnote-2448245 -Node: ARGC and ARGV448301 -Node: Pattern Action Summary452205 -Node: Arrays454428 -Node: Array Basics455977 -Node: Array Intro456803 -Ref: figure-array-elements458776 -Ref: Array Intro-Footnote-1461300 -Node: Reference to Elements461428 -Node: Assigning Elements463878 -Node: Array Example464369 -Node: Scanning an Array466101 -Node: Controlling Scanning469102 -Ref: Controlling Scanning-Footnote-1474275 -Node: Delete474591 -Ref: Delete-Footnote-1477342 -Node: Numeric Array Subscripts477399 -Node: Uninitialized Subscripts479582 -Node: Multidimensional481209 -Node: Multiscanning484322 -Node: Arrays of Arrays485911 -Node: Arrays Summary490574 -Node: Functions492679 -Node: Built-in493552 -Node: Calling Built-in494630 -Node: Numeric Functions496618 -Ref: Numeric Functions-Footnote-1501454 -Ref: Numeric Functions-Footnote-2501811 -Ref: Numeric Functions-Footnote-3501859 -Node: String Functions502128 -Ref: String Functions-Footnote-1525125 -Ref: String Functions-Footnote-2525254 -Ref: String Functions-Footnote-3525502 -Node: Gory Details525589 -Ref: table-sub-escapes527362 -Ref: table-sub-proposed528882 -Ref: table-posix-sub530246 -Ref: table-gensub-escapes531786 -Ref: Gory Details-Footnote-1532962 -Node: I/O Functions533113 -Ref: I/O Functions-Footnote-1540223 -Node: Time Functions540370 -Ref: Time Functions-Footnote-1550834 -Ref: Time Functions-Footnote-2550902 -Ref: Time Functions-Footnote-3551060 -Ref: Time Functions-Footnote-4551171 -Ref: Time Functions-Footnote-5551283 -Ref: Time Functions-Footnote-6551510 -Node: Bitwise Functions551776 -Ref: table-bitwise-ops552338 -Ref: Bitwise Functions-Footnote-1556583 -Node: Type Functions556767 -Node: I18N Functions557909 -Node: User-defined559554 -Node: Definition Syntax560358 -Ref: Definition Syntax-Footnote-1565671 -Node: Function Example565740 -Ref: Function Example-Footnote-1568380 -Node: Function Caveats568402 -Node: Calling A Function568920 -Node: Variable Scope569875 -Node: Pass By Value/Reference572863 -Node: Return Statement576373 -Node: Dynamic Typing579357 -Node: Indirect Calls580286 -Node: Functions Summary589999 -Node: Library Functions592538 -Ref: Library Functions-Footnote-1596156 -Ref: Library Functions-Footnote-2596299 -Node: Library Names596470 -Ref: Library Names-Footnote-1599943 -Ref: Library Names-Footnote-2600163 -Node: General Functions600249 -Node: Strtonum Function601277 -Node: Assert Function604151 -Node: Round Function607477 -Node: Cliff Random Function609018 -Node: Ordinal Functions610034 -Ref: Ordinal Functions-Footnote-1613099 -Ref: Ordinal Functions-Footnote-2613351 -Node: Join Function613562 -Ref: Join Function-Footnote-1615333 -Node: Getlocaltime Function615533 -Node: Readfile Function619269 -Ref: Readfile Function-Footnote-1621147 -Node: Data File Management621375 -Node: Filetrans Function622007 -Node: Rewind Function626076 -Node: File Checking627634 -Ref: File Checking-Footnote-1628766 -Node: Empty Files628967 -Node: Ignoring Assigns630946 -Node: Getopt Function632500 -Ref: Getopt Function-Footnote-1643764 -Node: Passwd Functions643967 -Ref: Passwd Functions-Footnote-1652946 -Node: Group Functions653034 -Ref: Group Functions-Footnote-1660965 -Node: Walking Arrays661178 -Node: Library Functions Summary662781 -Node: Library Exercises664169 -Node: Sample Programs665449 -Node: Running Examples666219 -Node: Clones666947 -Node: Cut Program668171 -Node: Egrep Program678029 -Ref: Egrep Program-Footnote-1685616 -Node: Id Program685726 -Node: Split Program689380 -Ref: Split Program-Footnote-1692918 -Node: Tee Program693046 -Node: Uniq Program695833 -Node: Wc Program703254 -Ref: Wc Program-Footnote-1707519 -Node: Miscellaneous Programs707611 -Node: Dupword Program708824 -Node: Alarm Program710855 -Node: Translate Program715659 -Ref: Translate Program-Footnote-1720050 -Ref: Translate Program-Footnote-2720320 -Node: Labels Program720454 -Ref: Labels Program-Footnote-1723815 -Node: Word Sorting723899 -Node: History Sorting727942 -Node: Extract Program729778 -Node: Simple Sed737314 -Node: Igawk Program740376 -Ref: Igawk Program-Footnote-1754680 -Ref: Igawk Program-Footnote-2754881 -Node: Anagram Program755019 -Node: Signature Program758087 -Node: Programs Summary759334 -Node: Programs Exercises760549 -Node: Advanced Features764200 -Node: Nondecimal Data766148 -Node: Array Sorting767725 -Node: Controlling Array Traversal768422 -Node: Array Sorting Functions776702 -Ref: Array Sorting Functions-Footnote-1780609 -Node: Two-way I/O780803 -Ref: Two-way I/O-Footnote-1785747 -Ref: Two-way I/O-Footnote-2785926 -Node: TCP/IP Networking786008 -Node: Profiling788853 -Node: Advanced Features Summary796404 -Node: Internationalization798268 -Node: I18N and L10N799748 -Node: Explaining gettext800434 -Ref: Explaining gettext-Footnote-1805460 -Ref: Explaining gettext-Footnote-2805644 -Node: Programmer i18n805809 -Ref: Programmer i18n-Footnote-1810603 -Node: Translator i18n810652 -Node: String Extraction811446 -Ref: String Extraction-Footnote-1812579 -Node: Printf Ordering812665 -Ref: Printf Ordering-Footnote-1815447 -Node: I18N Portability815511 -Ref: I18N Portability-Footnote-1817960 -Node: I18N Example818023 -Ref: I18N Example-Footnote-1820729 -Node: Gawk I18N820801 -Node: I18N Summary821439 -Node: Debugger822778 -Node: Debugging823800 -Node: Debugging Concepts824241 -Node: Debugging Terms826097 -Node: Awk Debugging828694 -Node: Sample Debugging Session829586 -Node: Debugger Invocation830106 -Node: Finding The Bug831442 -Node: List of Debugger Commands837921 -Node: Breakpoint Control839253 -Node: Debugger Execution Control842917 -Node: Viewing And Changing Data846277 -Node: Execution Stack849635 -Node: Debugger Info851148 -Node: Miscellaneous Debugger Commands855142 -Node: Readline Support860326 -Node: Limitations861218 -Node: Debugging Summary863492 -Node: Arbitrary Precision Arithmetic864660 -Node: Computer Arithmetic866147 -Ref: Computer Arithmetic-Footnote-1870534 -Node: Math Definitions870591 -Ref: table-ieee-formats873880 -Ref: Math Definitions-Footnote-1874420 -Node: MPFR features874523 -Node: FP Math Caution876140 -Ref: FP Math Caution-Footnote-1877190 -Node: Inexactness of computations877559 -Node: Inexact representation878507 -Node: Comparing FP Values879862 -Node: Errors accumulate880826 -Node: Getting Accuracy882259 -Node: Try To Round884918 -Node: Setting precision885817 -Ref: table-predefined-precision-strings886499 -Node: Setting the rounding mode888292 -Ref: table-gawk-rounding-modes888656 -Ref: Setting the rounding mode-Footnote-1892110 -Node: Arbitrary Precision Integers892289 -Ref: Arbitrary Precision Integers-Footnote-1896062 -Node: POSIX Floating Point Problems896211 -Ref: POSIX Floating Point Problems-Footnote-1900087 -Node: Floating point summary900125 -Node: Dynamic Extensions902329 -Node: Extension Intro903881 -Node: Plugin License905146 -Node: Extension Mechanism Outline905831 -Ref: figure-load-extension906255 -Ref: figure-load-new-function907740 -Ref: figure-call-new-function908742 -Node: Extension API Description910726 -Node: Extension API Functions Introduction912176 -Node: General Data Types917043 -Ref: General Data Types-Footnote-1922736 -Node: Requesting Values923035 -Ref: table-value-types-returned923772 -Node: Memory Allocation Functions924730 -Ref: Memory Allocation Functions-Footnote-1927477 -Node: Constructor Functions927573 -Node: Registration Functions929331 -Node: Extension Functions930016 -Node: Exit Callback Functions932318 -Node: Extension Version String933566 -Node: Input Parsers934216 -Node: Output Wrappers944030 -Node: Two-way processors948546 -Node: Printing Messages950750 -Ref: Printing Messages-Footnote-1951827 -Node: Updating `ERRNO'951979 -Node: Accessing Parameters952718 -Node: Symbol Table Access953948 -Node: Symbol table by name954462 -Node: Symbol table by cookie956438 -Ref: Symbol table by cookie-Footnote-1960571 -Node: Cached values960634 -Ref: Cached values-Footnote-1964138 -Node: Array Manipulation964229 -Ref: Array Manipulation-Footnote-1965327 -Node: Array Data Types965366 -Ref: Array Data Types-Footnote-1968069 -Node: Array Functions968161 -Node: Flattening Arrays972035 -Node: Creating Arrays978887 -Node: Extension API Variables983618 -Node: Extension Versioning984254 -Node: Extension API Informational Variables986155 -Node: Extension API Boilerplate987241 -Node: Finding Extensions991045 -Node: Extension Example991605 -Node: Internal File Description992335 -Node: Internal File Ops996426 -Ref: Internal File Ops-Footnote-11007858 -Node: Using Internal File Ops1007998 -Ref: Using Internal File Ops-Footnote-11010345 -Node: Extension Samples1010613 -Node: Extension Sample File Functions1012137 -Node: Extension Sample Fnmatch1019705 -Node: Extension Sample Fork1021187 -Node: Extension Sample Inplace1022400 -Node: Extension Sample Ord1024075 -Node: Extension Sample Readdir1024911 -Ref: table-readdir-file-types1025767 -Node: Extension Sample Revout1026566 -Node: Extension Sample Rev2way1027157 -Node: Extension Sample Read write array1027898 -Node: Extension Sample Readfile1029777 -Node: Extension Sample API Tests1030877 -Node: Extension Sample Time1031402 -Node: gawkextlib1032717 -Node: Extension summary1035530 -Node: Extension Exercises1039223 -Node: Language History1039945 -Node: V7/SVR3.11041588 -Node: SVR41043908 -Node: POSIX1045350 -Node: BTL1046736 -Node: POSIX/GNU1047470 -Node: Feature History1053246 -Node: Common Extensions1066337 -Node: Ranges and Locales1067649 -Ref: Ranges and Locales-Footnote-11072266 -Ref: Ranges and Locales-Footnote-21072293 -Ref: Ranges and Locales-Footnote-31072527 -Node: Contributors1072748 -Node: History summary1078173 -Node: Installation1079542 -Node: Gawk Distribution1080493 -Node: Getting1080977 -Node: Extracting1081801 -Node: Distribution contents1083443 -Node: Unix Installation1089213 -Node: Quick Installation1089830 -Node: Additional Configuration Options1092272 -Node: Configuration Philosophy1094010 -Node: Non-Unix Installation1096361 -Node: PC Installation1096819 -Node: PC Binary Installation1098130 -Node: PC Compiling1099978 -Ref: PC Compiling-Footnote-11102977 -Node: PC Testing1103082 -Node: PC Using1104258 -Node: Cygwin1108410 -Node: MSYS1109219 -Node: VMS Installation1109733 -Node: VMS Compilation1110529 -Ref: VMS Compilation-Footnote-11111751 -Node: VMS Dynamic Extensions1111809 -Node: VMS Installation Details1113182 -Node: VMS Running1115434 -Node: VMS GNV1118268 -Node: VMS Old Gawk1118991 -Node: Bugs1119461 -Node: Other Versions1123465 -Node: Installation summary1129692 -Node: Notes1130748 -Node: Compatibility Mode1131613 -Node: Additions1132395 -Node: Accessing The Source1133320 -Node: Adding Code1134756 -Node: New Ports1140934 -Node: Derived Files1145415 -Ref: Derived Files-Footnote-11150496 -Ref: Derived Files-Footnote-21150530 -Ref: Derived Files-Footnote-31151126 -Node: Future Extensions1151240 -Node: Implementation Limitations1151846 -Node: Extension Design1153094 -Node: Old Extension Problems1154248 -Ref: Old Extension Problems-Footnote-11155765 -Node: Extension New Mechanism Goals1155822 -Ref: Extension New Mechanism Goals-Footnote-11159182 -Node: Extension Other Design Decisions1159371 -Node: Extension Future Growth1161477 -Node: Old Extension Mechanism1162313 -Node: Notes summary1164075 -Node: Basic Concepts1165261 -Node: Basic High Level1165942 -Ref: figure-general-flow1166214 -Ref: figure-process-flow1166813 -Ref: Basic High Level-Footnote-11170042 -Node: Basic Data Typing1170227 -Node: Glossary1173555 -Node: Copying1198707 -Node: GNU Free Documentation License1236263 -Node: Index1261399 +Node: When108250 +Ref: When-Footnote-1110006 +Node: Intro Summary110071 +Node: Invoking Gawk110954 +Node: Command Line112469 +Node: Options113260 +Ref: Options-Footnote-1128907 +Node: Other Arguments128932 +Node: Naming Standard Input131760 +Node: Environment Variables132853 +Node: AWKPATH Variable133411 +Ref: AWKPATH Variable-Footnote-1136277 +Ref: AWKPATH Variable-Footnote-2136322 +Node: AWKLIBPATH Variable136582 +Node: Other Environment Variables137341 +Node: Exit Status140793 +Node: Include Files141468 +Node: Loading Shared Libraries145046 +Node: Obsolete146430 +Node: Undocumented147127 +Node: Invoking Summary147394 +Node: Regexp148994 +Node: Regexp Usage150453 +Node: Escape Sequences152486 +Node: Regexp Operators158557 +Ref: Regexp Operators-Footnote-1165988 +Ref: Regexp Operators-Footnote-2166135 +Node: Bracket Expressions166233 +Ref: table-char-classes168251 +Node: Leftmost Longest171191 +Node: Computed Regexps172395 +Node: GNU Regexp Operators175773 +Node: Case-sensitivity179479 +Ref: Case-sensitivity-Footnote-1182369 +Ref: Case-sensitivity-Footnote-2182604 +Node: Regexp Summary182712 +Node: Reading Files184181 +Node: Records186273 +Node: awk split records186995 +Node: gawk split records191853 +Ref: gawk split records-Footnote-1196374 +Node: Fields196411 +Ref: Fields-Footnote-1199375 +Node: Nonconstant Fields199461 +Ref: Nonconstant Fields-Footnote-1201691 +Node: Changing Fields201893 +Node: Field Separators207847 +Node: Default Field Splitting210549 +Node: Regexp Field Splitting211666 +Node: Single Character Fields214993 +Node: Command Line Field Separator216052 +Node: Full Line Fields219478 +Ref: Full Line Fields-Footnote-1219986 +Node: Field Splitting Summary220032 +Ref: Field Splitting Summary-Footnote-1223164 +Node: Constant Size223265 +Node: Splitting By Content227871 +Ref: Splitting By Content-Footnote-1231944 +Node: Multiple Line231984 +Ref: Multiple Line-Footnote-1237840 +Node: Getline238019 +Node: Plain Getline240230 +Node: Getline/Variable242936 +Node: Getline/File244083 +Node: Getline/Variable/File245467 +Ref: Getline/Variable/File-Footnote-1247066 +Node: Getline/Pipe247153 +Node: Getline/Variable/Pipe249839 +Node: Getline/Coprocess250946 +Node: Getline/Variable/Coprocess252198 +Node: Getline Notes252935 +Node: Getline Summary255739 +Ref: table-getline-variants256147 +Node: Read Timeout257059 +Ref: Read Timeout-Footnote-1260886 +Node: Command-line directories260944 +Node: Input Summary261848 +Node: Input Exercises264985 +Node: Printing265713 +Node: Print267435 +Node: Print Examples268928 +Node: Output Separators271707 +Node: OFMT273723 +Node: Printf275081 +Node: Basic Printf275987 +Node: Control Letters277526 +Node: Format Modifiers281517 +Node: Printf Examples287544 +Node: Redirection290008 +Node: Special Files296980 +Node: Special FD297513 +Ref: Special FD-Footnote-1301110 +Node: Special Network301184 +Node: Special Caveats302034 +Node: Close Files And Pipes302830 +Ref: Close Files And Pipes-Footnote-1309991 +Ref: Close Files And Pipes-Footnote-2310139 +Node: Output Summary310289 +Node: Output Exercises311286 +Node: Expressions311966 +Node: Values313151 +Node: Constants313827 +Node: Scalar Constants314507 +Ref: Scalar Constants-Footnote-1315366 +Node: Nondecimal-numbers315616 +Node: Regexp Constants318616 +Node: Using Constant Regexps319141 +Node: Variables322213 +Node: Using Variables322868 +Node: Assignment Options324592 +Node: Conversion326467 +Node: Strings And Numbers326991 +Ref: Strings And Numbers-Footnote-1330053 +Node: Locale influences conversions330162 +Ref: table-locale-affects332879 +Node: All Operators333467 +Node: Arithmetic Ops334097 +Node: Concatenation336602 +Ref: Concatenation-Footnote-1339421 +Node: Assignment Ops339527 +Ref: table-assign-ops344510 +Node: Increment Ops345813 +Node: Truth Values and Conditions349251 +Node: Truth Values350334 +Node: Typing and Comparison351383 +Node: Variable Typing352176 +Node: Comparison Operators355828 +Ref: table-relational-ops356238 +Node: POSIX String Comparison359788 +Ref: POSIX String Comparison-Footnote-1360872 +Node: Boolean Ops361010 +Ref: Boolean Ops-Footnote-1365349 +Node: Conditional Exp365440 +Node: Function Calls367167 +Node: Precedence371047 +Node: Locales374716 +Node: Expressions Summary376347 +Node: Patterns and Actions378888 +Node: Pattern Overview380004 +Node: Regexp Patterns381681 +Node: Expression Patterns382224 +Node: Ranges386004 +Node: BEGIN/END389110 +Node: Using BEGIN/END389872 +Ref: Using BEGIN/END-Footnote-1392608 +Node: I/O And BEGIN/END392714 +Node: BEGINFILE/ENDFILE394985 +Node: Empty397916 +Node: Using Shell Variables398233 +Node: Action Overview400516 +Node: Statements402843 +Node: If Statement404691 +Node: While Statement406189 +Node: Do Statement408233 +Node: For Statement409389 +Node: Switch Statement412541 +Node: Break Statement414929 +Node: Continue Statement416970 +Node: Next Statement418795 +Node: Nextfile Statement421185 +Node: Exit Statement423842 +Node: Built-in Variables426246 +Node: User-modified427373 +Ref: User-modified-Footnote-1435062 +Node: Auto-set435124 +Ref: Auto-set-Footnote-1448043 +Ref: Auto-set-Footnote-2448248 +Node: ARGC and ARGV448304 +Node: Pattern Action Summary452208 +Node: Arrays454431 +Node: Array Basics455980 +Node: Array Intro456806 +Ref: figure-array-elements458779 +Ref: Array Intro-Footnote-1461303 +Node: Reference to Elements461431 +Node: Assigning Elements463881 +Node: Array Example464372 +Node: Scanning an Array466104 +Node: Controlling Scanning469105 +Ref: Controlling Scanning-Footnote-1474278 +Node: Delete474594 +Ref: Delete-Footnote-1477345 +Node: Numeric Array Subscripts477402 +Node: Uninitialized Subscripts479585 +Node: Multidimensional481212 +Node: Multiscanning484325 +Node: Arrays of Arrays485914 +Node: Arrays Summary490577 +Node: Functions492682 +Node: Built-in493555 +Node: Calling Built-in494633 +Node: Numeric Functions496621 +Ref: Numeric Functions-Footnote-1501457 +Ref: Numeric Functions-Footnote-2501814 +Ref: Numeric Functions-Footnote-3501862 +Node: String Functions502131 +Ref: String Functions-Footnote-1525128 +Ref: String Functions-Footnote-2525257 +Ref: String Functions-Footnote-3525505 +Node: Gory Details525592 +Ref: table-sub-escapes527365 +Ref: table-sub-proposed528885 +Ref: table-posix-sub530249 +Ref: table-gensub-escapes531789 +Ref: Gory Details-Footnote-1532965 +Node: I/O Functions533116 +Ref: I/O Functions-Footnote-1540226 +Node: Time Functions540373 +Ref: Time Functions-Footnote-1550837 +Ref: Time Functions-Footnote-2550905 +Ref: Time Functions-Footnote-3551063 +Ref: Time Functions-Footnote-4551174 +Ref: Time Functions-Footnote-5551286 +Ref: Time Functions-Footnote-6551513 +Node: Bitwise Functions551779 +Ref: table-bitwise-ops552341 +Ref: Bitwise Functions-Footnote-1556586 +Node: Type Functions556770 +Node: I18N Functions557912 +Node: User-defined559557 +Node: Definition Syntax560361 +Ref: Definition Syntax-Footnote-1565674 +Node: Function Example565743 +Ref: Function Example-Footnote-1568383 +Node: Function Caveats568405 +Node: Calling A Function568923 +Node: Variable Scope569878 +Node: Pass By Value/Reference572866 +Node: Return Statement576376 +Node: Dynamic Typing579360 +Node: Indirect Calls580289 +Ref: Indirect Calls-Footnote-1590005 +Node: Functions Summary590133 +Node: Library Functions592783 +Ref: Library Functions-Footnote-1596401 +Ref: Library Functions-Footnote-2596544 +Node: Library Names596715 +Ref: Library Names-Footnote-1600188 +Ref: Library Names-Footnote-2600408 +Node: General Functions600494 +Node: Strtonum Function601522 +Node: Assert Function604396 +Node: Round Function607722 +Node: Cliff Random Function609263 +Node: Ordinal Functions610279 +Ref: Ordinal Functions-Footnote-1613344 +Ref: Ordinal Functions-Footnote-2613596 +Node: Join Function613807 +Ref: Join Function-Footnote-1615578 +Node: Getlocaltime Function615778 +Node: Readfile Function619514 +Node: Data File Management621353 +Node: Filetrans Function621985 +Node: Rewind Function626054 +Node: File Checking627612 +Ref: File Checking-Footnote-1628744 +Node: Empty Files628945 +Node: Ignoring Assigns630924 +Node: Getopt Function632478 +Ref: Getopt Function-Footnote-1643742 +Node: Passwd Functions643945 +Ref: Passwd Functions-Footnote-1652924 +Node: Group Functions653012 +Ref: Group Functions-Footnote-1660943 +Node: Walking Arrays661156 +Node: Library Functions Summary662759 +Node: Library Exercises664147 +Node: Sample Programs665427 +Node: Running Examples666197 +Node: Clones666925 +Node: Cut Program668149 +Node: Egrep Program678007 +Ref: Egrep Program-Footnote-1685594 +Node: Id Program685704 +Node: Split Program689358 +Ref: Split Program-Footnote-1692896 +Node: Tee Program693024 +Node: Uniq Program695811 +Node: Wc Program703234 +Ref: Wc Program-Footnote-1707499 +Node: Miscellaneous Programs707591 +Node: Dupword Program708804 +Node: Alarm Program710835 +Node: Translate Program715639 +Ref: Translate Program-Footnote-1720030 +Ref: Translate Program-Footnote-2720300 +Node: Labels Program720439 +Ref: Labels Program-Footnote-1723800 +Node: Word Sorting723884 +Node: History Sorting727927 +Node: Extract Program729763 +Node: Simple Sed737299 +Node: Igawk Program740361 +Ref: Igawk Program-Footnote-1754665 +Ref: Igawk Program-Footnote-2754866 +Node: Anagram Program755004 +Node: Signature Program758072 +Node: Programs Summary759319 +Node: Programs Exercises760534 +Ref: Programs Exercises-Footnote-1764921 +Node: Advanced Features765012 +Node: Nondecimal Data766960 +Node: Array Sorting768537 +Node: Controlling Array Traversal769234 +Node: Array Sorting Functions777514 +Ref: Array Sorting Functions-Footnote-1781421 +Node: Two-way I/O781615 +Ref: Two-way I/O-Footnote-1786559 +Ref: Two-way I/O-Footnote-2786738 +Node: TCP/IP Networking786820 +Node: Profiling789665 +Node: Advanced Features Summary797216 +Node: Internationalization799080 +Node: I18N and L10N800560 +Node: Explaining gettext801246 +Ref: Explaining gettext-Footnote-1806272 +Ref: Explaining gettext-Footnote-2806456 +Node: Programmer i18n806621 +Ref: Programmer i18n-Footnote-1811415 +Node: Translator i18n811464 +Node: String Extraction812258 +Ref: String Extraction-Footnote-1813391 +Node: Printf Ordering813477 +Ref: Printf Ordering-Footnote-1816259 +Node: I18N Portability816323 +Ref: I18N Portability-Footnote-1818772 +Node: I18N Example818835 +Ref: I18N Example-Footnote-1821541 +Node: Gawk I18N821613 +Node: I18N Summary822251 +Node: Debugger823590 +Node: Debugging824612 +Node: Debugging Concepts825053 +Node: Debugging Terms826909 +Node: Awk Debugging829506 +Node: Sample Debugging Session830398 +Node: Debugger Invocation830918 +Node: Finding The Bug832254 +Node: List of Debugger Commands838733 +Node: Breakpoint Control840065 +Node: Debugger Execution Control843729 +Node: Viewing And Changing Data847089 +Node: Execution Stack850447 +Node: Debugger Info851960 +Node: Miscellaneous Debugger Commands855954 +Node: Readline Support861138 +Node: Limitations862030 +Node: Debugging Summary864303 +Node: Arbitrary Precision Arithmetic865471 +Node: Computer Arithmetic866958 +Ref: Computer Arithmetic-Footnote-1871345 +Node: Math Definitions871402 +Ref: table-ieee-formats874691 +Ref: Math Definitions-Footnote-1875231 +Node: MPFR features875334 +Node: FP Math Caution876951 +Ref: FP Math Caution-Footnote-1878001 +Node: Inexactness of computations878370 +Node: Inexact representation879318 +Node: Comparing FP Values880673 +Node: Errors accumulate881637 +Node: Getting Accuracy883070 +Node: Try To Round885729 +Node: Setting precision886628 +Ref: table-predefined-precision-strings887310 +Node: Setting the rounding mode889103 +Ref: table-gawk-rounding-modes889467 +Ref: Setting the rounding mode-Footnote-1892921 +Node: Arbitrary Precision Integers893100 +Ref: Arbitrary Precision Integers-Footnote-1896873 +Node: POSIX Floating Point Problems897022 +Ref: POSIX Floating Point Problems-Footnote-1900898 +Node: Floating point summary900936 +Node: Dynamic Extensions903140 +Node: Extension Intro904692 +Node: Plugin License905957 +Node: Extension Mechanism Outline906642 +Ref: figure-load-extension907066 +Ref: figure-load-new-function908551 +Ref: figure-call-new-function909553 +Node: Extension API Description911537 +Node: Extension API Functions Introduction912987 +Node: General Data Types917854 +Ref: General Data Types-Footnote-1923547 +Node: Requesting Values923846 +Ref: table-value-types-returned924583 +Node: Memory Allocation Functions925541 +Ref: Memory Allocation Functions-Footnote-1928288 +Node: Constructor Functions928384 +Node: Registration Functions930142 +Node: Extension Functions930827 +Node: Exit Callback Functions933129 +Node: Extension Version String934377 +Node: Input Parsers935027 +Node: Output Wrappers944841 +Node: Two-way processors949357 +Node: Printing Messages951561 +Ref: Printing Messages-Footnote-1952638 +Node: Updating `ERRNO'952790 +Node: Accessing Parameters953529 +Node: Symbol Table Access954759 +Node: Symbol table by name955273 +Node: Symbol table by cookie957249 +Ref: Symbol table by cookie-Footnote-1961382 +Node: Cached values961445 +Ref: Cached values-Footnote-1964949 +Node: Array Manipulation965040 +Ref: Array Manipulation-Footnote-1966138 +Node: Array Data Types966177 +Ref: Array Data Types-Footnote-1968880 +Node: Array Functions968972 +Node: Flattening Arrays972846 +Node: Creating Arrays979698 +Node: Extension API Variables984429 +Node: Extension Versioning985065 +Node: Extension API Informational Variables986966 +Node: Extension API Boilerplate988052 +Node: Finding Extensions991856 +Node: Extension Example992416 +Node: Internal File Description993146 +Node: Internal File Ops997237 +Ref: Internal File Ops-Footnote-11008669 +Node: Using Internal File Ops1008809 +Ref: Using Internal File Ops-Footnote-11011156 +Node: Extension Samples1011424 +Node: Extension Sample File Functions1012948 +Node: Extension Sample Fnmatch1020516 +Node: Extension Sample Fork1021998 +Node: Extension Sample Inplace1023211 +Node: Extension Sample Ord1024886 +Node: Extension Sample Readdir1025722 +Ref: table-readdir-file-types1026578 +Node: Extension Sample Revout1027377 +Node: Extension Sample Rev2way1027968 +Node: Extension Sample Read write array1028709 +Node: Extension Sample Readfile1030588 +Node: Extension Sample API Tests1031688 +Node: Extension Sample Time1032213 +Node: gawkextlib1033528 +Node: Extension summary1036341 +Node: Extension Exercises1040034 +Node: Language History1040756 +Node: V7/SVR3.11042399 +Node: SVR41044719 +Node: POSIX1046161 +Node: BTL1047547 +Node: POSIX/GNU1048281 +Node: Feature History1054057 +Node: Common Extensions1067148 +Node: Ranges and Locales1068460 +Ref: Ranges and Locales-Footnote-11073077 +Ref: Ranges and Locales-Footnote-21073104 +Ref: Ranges and Locales-Footnote-31073338 +Node: Contributors1073559 +Node: History summary1078984 +Node: Installation1080353 +Node: Gawk Distribution1081304 +Node: Getting1081788 +Node: Extracting1082612 +Node: Distribution contents1084254 +Node: Unix Installation1090024 +Node: Quick Installation1090641 +Node: Additional Configuration Options1093083 +Node: Configuration Philosophy1094821 +Node: Non-Unix Installation1097172 +Node: PC Installation1097630 +Node: PC Binary Installation1098941 +Node: PC Compiling1100789 +Ref: PC Compiling-Footnote-11103788 +Node: PC Testing1103893 +Node: PC Using1105069 +Node: Cygwin1109221 +Node: MSYS1110030 +Node: VMS Installation1110544 +Node: VMS Compilation1111340 +Ref: VMS Compilation-Footnote-11112562 +Node: VMS Dynamic Extensions1112620 +Node: VMS Installation Details1113993 +Node: VMS Running1116245 +Node: VMS GNV1119079 +Node: VMS Old Gawk1119802 +Node: Bugs1120272 +Node: Other Versions1124276 +Node: Installation summary1130503 +Node: Notes1131559 +Node: Compatibility Mode1132424 +Node: Additions1133206 +Node: Accessing The Source1134131 +Node: Adding Code1135567 +Node: New Ports1141745 +Node: Derived Files1146226 +Ref: Derived Files-Footnote-11151307 +Ref: Derived Files-Footnote-21151341 +Ref: Derived Files-Footnote-31151937 +Node: Future Extensions1152051 +Node: Implementation Limitations1152657 +Node: Extension Design1153905 +Node: Old Extension Problems1155059 +Ref: Old Extension Problems-Footnote-11156576 +Node: Extension New Mechanism Goals1156633 +Ref: Extension New Mechanism Goals-Footnote-11159993 +Node: Extension Other Design Decisions1160182 +Node: Extension Future Growth1162288 +Node: Old Extension Mechanism1163124 +Node: Notes summary1164886 +Node: Basic Concepts1166072 +Node: Basic High Level1166753 +Ref: figure-general-flow1167025 +Ref: figure-process-flow1167624 +Ref: Basic High Level-Footnote-11170853 +Node: Basic Data Typing1171038 +Node: Glossary1174366 +Node: Copying1199518 +Node: GNU Free Documentation License1237074 +Node: Index1262210 End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index 81b36ae5..7fc342c3 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -165,6 +165,19 @@ @end macro @end ifdocbook +@c hack for docbook, where comma shouldn't always follow an @ref{} +@ifdocbook +@macro DBREF{text} +@ref{\text\} +@end macro +@end ifdocbook + +@ifnotdocbook +@macro DBREF{text} +@ref{\text\}, +@end macro +@end ifnotdocbook + @ifclear FOR_PRINT @set FN file name @set FFN File Name @@ -1622,7 +1635,7 @@ available @command{awk} implementations. @ifset FOR_PRINT -@ref{Copying}, +@DBREF{Copying} presents the license that covers the @command{gawk} source code. The version of this @value{DOCUMENT} distributed with @command{gawk} @@ -3403,7 +3416,7 @@ and array sorting. As we develop our presentation of the @command{awk} language, we introduce most of the variables and many of the functions. They are described -systematically in @ref{Built-in Variables}, and +systematically in @ref{Built-in Variables}, and in @ref{Built-in}. @node When @@ -5196,7 +5209,7 @@ The escape sequences described @ifnotinfo earlier @end ifnotinfo -in @ref{Escape Sequences}, +in @DBREF{Escape Sequences} are valid inside a regexp. They are introduced by a @samp{\} and are recognized and converted into corresponding real characters as the very first step in processing regexps. @@ -5432,7 +5445,7 @@ Within a bracket expression, a @dfn{range expression} consists of two characters separated by a hyphen. It matches any single character that sorts between the two characters, based upon the system's native character set. For example, @samp{[0-9]} is equivalent to @samp{[0123456789]}. -(See @ref{Ranges and Locales}, for an explanation of how the POSIX +(See @DBREF{Ranges and Locales} for an explanation of how the POSIX standard and @command{gawk} have changed over time. This is mainly of historical interest.) @@ -8013,6 +8026,16 @@ processing on the next record @emph{right now}. For example: @} @end example +@c 8/2014: Here is some sample input: +@ignore +mon/*comment*/key +rab/*commen +t*/bit +horse /*comment*/more text +part 1 /*comment*/part 2 /*comment*/part 3 +no comment +@end ignore + This @command{awk} program deletes C-style comments (@samp{/* @dots{} */}) from the input. It uses a number of features we haven't covered yet, including @@ -8428,7 +8451,7 @@ probably by accident, and you should reconsider what it is you're trying to accomplish. @item -@ref{Getline Summary}, presents a table summarizing the +@DBREF{Getline Summary} presents a table summarizing the @code{getline} variants and which variables they can affect. It is worth noting that those variants which do not use redirection can cause @code{FILENAME} to be updated if they cause @@ -15033,7 +15056,7 @@ changed. @cindex arguments, command-line @cindex command line, arguments -@ref{Auto-set}, +@DBREF{Auto-set} presented the following program describing the information contained in @code{ARGC} and @code{ARGV}: @@ -19809,7 +19832,7 @@ being aware of them. @cindex pointers to functions @cindex differences in @command{awk} and @command{gawk}, indirect function calls -This section describes a @command{gawk}-specific extension. +This section describes an advanced, @command{gawk}-specific extension. Often, you may wish to defer the choice of function to call until runtime. For example, you may have different kinds of records, each of which @@ -19855,7 +19878,7 @@ To process the data, you might write initially: @noindent This style of programming works, but can be awkward. With @dfn{indirect} function calls, you tell @command{gawk} to use the @emph{value} of a -variable as the name of the function to call. +variable as the @emph{name} of the function to call. @cindex @code{@@}-notation for indirect function calls @cindex indirect function calls, @code{@@}-notation @@ -19917,7 +19940,6 @@ Otherwise they perform the expected computations and are not unusual. @example @c file eg/prog/indirectcall.awk # For each record, print the class name and the requested statistics - @{ class_name = $1 gsub(/_/, " ", class_name) # Replace _ with spaces @@ -20146,10 +20168,12 @@ $ @kbd{gawk -f quicksort.awk -f indirectcall.awk class_data2} Remember that you must supply a leading @samp{@@} in front of an indirect function call. -Unfortunately, indirect function calls cannot be used with the built-in functions. However, -you can generally write ``wrapper'' functions which call the built-in ones, and those can -be called indirectly. (Other than, perhaps, the mathematical functions, there is not a lot -of reason to try to call the built-in functions indirectly.) +Starting with @value{PVERSION} 4.1.2 of @command{gawk}, indirect function +calls may also be used with built-in functions and with extension functions +(@pxref{Dynamic Extensions}). The only thing you cannot do is pass a regular +expression constant to a built-in function through an indirect function +call.@footnote{This may change in a future version; recheck the documentation that +comes with your version of @command{gawk} to see if it has.} @command{gawk} does its best to make indirect function calls efficient. For example, in the following case: @@ -20160,7 +20184,7 @@ for (i = 1; i <= n; i++) @end example @noindent -@code{gawk} will look up the actual function to call only once. +@code{gawk} looks up the actual function to call only once. @node Functions Summary @section Summary @@ -20200,6 +20224,8 @@ from the real parameters by extra whitespace. User-defined functions may call other user-defined (and built-in) functions and may call themselves recursively. Function parameters ``hide'' any global variables of the same names. +You cannot use the name of a reserved variable (such as @code{ARGC}) +as the name of a parameter in user-defined functions. @item Scalar values are passed to user-defined functions by value. Array @@ -20218,7 +20244,7 @@ either scalar or array. @item @command{gawk} provides indirect function calls using a special syntax. -By setting a variable to the name of a user-defined function, you can +By setting a variable to the name of a function, you can determine at runtime what function will be called at that point in the program. This is equivalent to function pointers in C and C++. @@ -20253,7 +20279,7 @@ It contains the following chapters: @c STARTOFRANGE fudlib @cindex functions, user-defined, library of -@ref{User-defined}, describes how to write +@DBREF{User-defined} describes how to write your own @command{awk} functions. Writing functions is important, because it allows you to encapsulate algorithms and program tasks in a single place. It simplifies programming, making program development more @@ -20286,7 +20312,7 @@ use these functions. The functions are presented here in a progression from simple to complex. @cindex Texinfo -@ref{Extract Program}, +@DBREF{Extract Program} presents a program that you can use to extract the source code for these example library functions and programs from the Texinfo source for this @value{DOCUMENT}. @@ -20437,7 +20463,7 @@ A different convention, common in the Tcl community, is to use a single associative array to hold the values needed by the library function(s), or ``package.'' This significantly decreases the number of actual global names in use. For example, the functions described in -@ref{Passwd Functions}, +@DBREF{Passwd Functions} might have used array elements @code{@w{PW_data["inited"]}}, @code{@w{PW_data["total"]}}, @code{@w{PW_data["count"]}}, and @code{@w{PW_data["awklib"]}}, instead of @code{@w{_pw_inited}}, @code{@w{_pw_awklib}}, @code{@w{_pw_total}}, @@ -21000,7 +21026,7 @@ more difficult than they really need to be.} @cindex timestamps, formatted @cindex time, managing The @code{systime()} and @code{strftime()} functions described in -@ref{Time Functions}, +@DBREF{Time Functions} provide the minimum functionality necessary for dealing with the time of day in human readable form. While @code{strftime()} is extensive, the control formats are not necessarily easy to remember or intuitively obvious when @@ -21086,7 +21112,7 @@ function getlocaltime(time, ret, now, i) The string indices are easier to use and read than the various formats required by @code{strftime()}. The @code{alarm} program presented in -@ref{Alarm Program}, +@DBREF{Alarm Program} uses this function. A more general design for the @code{getlocaltime()} function would have allowed the user to supply an optional timestamp value to use instead @@ -21118,10 +21144,13 @@ This function reads from @code{file} one record at a time, building up the full contents of the file in the local variable @code{contents}. It works, but is not necessarily @c 8/2014. Thanks to BWK for pointing this out: -efficient.@footnote{Execution time grows quadratically in the size of +efficient. +@ignore +@footnote{Execution time grows quadratically in the size of the input; for each record, @command{awk} has to allocate a bigger internal buffer for @code{contents}, copy the old contents into it, and then append the contents of the new record.} +@end ignore The following function, based on a suggestion by Denis Shirokov, reads the entire contents of the named file in one shot: @@ -21294,7 +21323,7 @@ END @{ endfile(_filename_) @} @c endfile @end example -@ref{Wc Program}, +@DBREF{Wc Program} shows how this library function can be used and how it simplifies writing the main program. @@ -22297,7 +22326,7 @@ once. If you are worried about squeezing every last cycle out of your this is not necessary, since most @command{awk} programs are I/O-bound, and such a change would clutter up the code. -The @command{id} program in @ref{Id Program}, +The @command{id} program in @DBREF{Id Program} uses these functions. @c ENDOFRANGE libfudata @c ENDOFRANGE flibudata @@ -22323,7 +22352,7 @@ uses these functions. @cindex group file @cindex files, group Much of the discussion presented in -@ref{Passwd Functions}, +@DBREF{Passwd Functions} applies to the group database as well. Although there has traditionally been a well-known file (@file{/etc/group}) in a well-known format, the POSIX standard only provides a set of C library routines @@ -22662,13 +22691,13 @@ Most of the work is in scanning the database and building the various associative arrays. The functions that the user calls are themselves very simple, relying on @command{awk}'s associative arrays to do work. -The @command{id} program in @ref{Id Program}, +The @command{id} program in @DBREF{Id Program} uses these functions. @node Walking Arrays @section Traversing Arrays of Arrays -@ref{Arrays of Arrays}, described how @command{gawk} +@DBREF{Arrays of Arrays} described how @command{gawk} provides arrays of arrays. In particular, any element of an array may be either a scalar, or another array. The @code{isarray()} function (@pxref{Type Functions}) @@ -22823,7 +22852,7 @@ As a related challenge, revise that code to handle the case where an intervening value in @code{ARGV} is a variable assignment. @item -@ref{Walking Arrays}, presented a function that walked a multidimensional +@DBREF{Walking Arrays} presented a function that walked a multidimensional array to print it out. However, walking an array and processing each element is a general-purpose operation. Generalize the @code{walk_array()} function by adding an additional parameter named @@ -23836,6 +23865,11 @@ This program is a bit sloppy; it relies on @command{awk} to automatically close instead of doing it in an @code{END} rule. It also assumes that letters are contiguous in the character set, which isn't true for EBCDIC systems. +@ifset FOR_PRINT +You might want to consider how to eliminate the use of +@code{ord()} and @code{chr()}; this can be done in such a +way as to solve the EBCDIC issue as well. +@end ifset @c ENDOFRANGE filspl @c ENDOFRANGE split @@ -24081,7 +24115,7 @@ BEGIN @{ else if (c == "c") do_count++ else if (index("0123456789", c) != 0) @{ - # getopt requires args to options + # getopt() requires args to options # this messes us up for things like -5 if (Optarg ~ /^[[:digit:]]+$/) fcount = (c Optarg) + 0 @@ -24218,6 +24252,22 @@ END @{ @} @c endfile @end example + +@ifset FOR_PRINT +The logic for choosing which lines to print represents a @dfn{state +machine}, which is ``a device that can be in one of a set number of stable +conditions depending on its previous condition and on the present values +of its inputs.''@footnote{This is the definition returned from entering +@code{define: state machine} into Google.} +Brian Kernighan suggests that +``an alternative approach to state mechines is to just read +the input into an array, then use indexing. It's almost always +easier code, and for most inputs where you would use this, just +as fast.'' Consider how to rewrite the logic to follow this +suggestion. +@end ifset + + @c ENDOFRANGE prunt @c ENDOFRANGE tpul @c ENDOFRANGE uniq @@ -24743,7 +24793,7 @@ of standard @command{awk}: dealing with individual characters is very painful, requiring repeated use of the @code{substr()}, @code{index()}, and @code{gsub()} built-in functions (@pxref{String Functions}).@footnote{This -program was written before @command{gawk} acquired the ability to +program was also written before @command{gawk} acquired the ability to split each character in a string into separate array elements.} There are two functions. The first, @code{stranslate()}, takes three arguments: @@ -26357,6 +26407,23 @@ The @code{split.awk} program (@pxref{Split Program}) assumes that letters are contiguous in the character set, which isn't true for EBCDIC systems. Fix this problem. +(Hint: Consider a different way to work through the alphabet, +without relying on @code{ord()} and @code{chr()}.) + +@item +In @file{uniq.awk} (@pxref{Uniq Program}, the +logic for choosing which lines to print represents a @dfn{state +machine}, which is ``a device that can be in one of a set number of stable +conditions depending on its previous condition and on the present values +of its inputs.''@footnote{This is the definition returned from entering +@code{define: state machine} into Google.} +Brian Kernighan suggests that +``an alternative approach to state mechines is to just read +the input into an array, then use indexing. It's almost always +easier code, and for most inputs where you would use this, just +as fast.'' Rewrite the logic to follow this +suggestion. + @item Why can't the @file{wc.awk} program (@pxref{Wc Program}) just @@ -26634,7 +26701,7 @@ Often, though, it is desirable to be able to loop over the elements in a particular order that you, the programmer, choose. @command{gawk} lets you do this. -@ref{Controlling Scanning}, describes how you can assign special, +@DBREF{Controlling Scanning} describes how you can assign special, pre-defined values to @code{PROCINFO["sorted_in"]} in order to control the order in which @command{gawk} traverses an array during a @code{for} loop. @@ -29790,7 +29857,9 @@ responds @samp{syntax error}. When you do figure out what your mistake was, though, you'll feel like a real guru. @item -If you perused the dump of opcodes in @ref{Miscellaneous Debugger Commands}, +@c NOTE: no comma after the ref{} on purpose, due to following +@c parenthetical remark. +If you perused the dump of opcodes in @ref{Miscellaneous Debugger Commands} (or if you are already familiar with @command{gawk} internals), you will realize that much of the internal manipulation of data in @command{gawk}, as in many interpreters, is done on a stack. @@ -38251,7 +38320,7 @@ as well as any considerations you should bear in mind. @appendixsubsec Accessing The @command{gawk} Git Repository As @command{gawk} is Free Software, the source code is always available. -@ref{Gawk Distribution}, describes how to get and build the formal, +@DBREF{Gawk Distribution} describes how to get and build the formal, released versions of @command{gawk}. @cindex @command{git} utility diff --git a/doc/gawktexi.in b/doc/gawktexi.in index 3a443b55..9bef7907 100644 --- a/doc/gawktexi.in +++ b/doc/gawktexi.in @@ -160,6 +160,19 @@ @end macro @end ifdocbook +@c hack for docbook, where comma shouldn't always follow an @ref{} +@ifdocbook +@macro DBREF{text} +@ref{\text\} +@end macro +@end ifdocbook + +@ifnotdocbook +@macro DBREF{text} +@ref{\text\}, +@end macro +@end ifnotdocbook + @ifclear FOR_PRINT @set FN file name @set FFN File Name @@ -1589,7 +1602,7 @@ available @command{awk} implementations. @ifset FOR_PRINT -@ref{Copying}, +@DBREF{Copying} presents the license that covers the @command{gawk} source code. The version of this @value{DOCUMENT} distributed with @command{gawk} @@ -3314,7 +3327,7 @@ and array sorting. As we develop our presentation of the @command{awk} language, we introduce most of the variables and many of the functions. They are described -systematically in @ref{Built-in Variables}, and +systematically in @ref{Built-in Variables}, and in @ref{Built-in}. @node When @@ -5024,7 +5037,7 @@ The escape sequences described @ifnotinfo earlier @end ifnotinfo -in @ref{Escape Sequences}, +in @DBREF{Escape Sequences} are valid inside a regexp. They are introduced by a @samp{\} and are recognized and converted into corresponding real characters as the very first step in processing regexps. @@ -5260,7 +5273,7 @@ Within a bracket expression, a @dfn{range expression} consists of two characters separated by a hyphen. It matches any single character that sorts between the two characters, based upon the system's native character set. For example, @samp{[0-9]} is equivalent to @samp{[0123456789]}. -(See @ref{Ranges and Locales}, for an explanation of how the POSIX +(See @DBREF{Ranges and Locales} for an explanation of how the POSIX standard and @command{gawk} have changed over time. This is mainly of historical interest.) @@ -7615,6 +7628,16 @@ processing on the next record @emph{right now}. For example: @} @end example +@c 8/2014: Here is some sample input: +@ignore +mon/*comment*/key +rab/*commen +t*/bit +horse /*comment*/more text +part 1 /*comment*/part 2 /*comment*/part 3 +no comment +@end ignore + This @command{awk} program deletes C-style comments (@samp{/* @dots{} */}) from the input. It uses a number of features we haven't covered yet, including @@ -8030,7 +8053,7 @@ probably by accident, and you should reconsider what it is you're trying to accomplish. @item -@ref{Getline Summary}, presents a table summarizing the +@DBREF{Getline Summary} presents a table summarizing the @code{getline} variants and which variables they can affect. It is worth noting that those variants which do not use redirection can cause @code{FILENAME} to be updated if they cause @@ -14321,7 +14344,7 @@ changed. @cindex arguments, command-line @cindex command line, arguments -@ref{Auto-set}, +@DBREF{Auto-set} presented the following program describing the information contained in @code{ARGC} and @code{ARGV}: @@ -18936,7 +18959,7 @@ being aware of them. @cindex pointers to functions @cindex differences in @command{awk} and @command{gawk}, indirect function calls -This section describes a @command{gawk}-specific extension. +This section describes an advanced, @command{gawk}-specific extension. Often, you may wish to defer the choice of function to call until runtime. For example, you may have different kinds of records, each of which @@ -18982,7 +19005,7 @@ To process the data, you might write initially: @noindent This style of programming works, but can be awkward. With @dfn{indirect} function calls, you tell @command{gawk} to use the @emph{value} of a -variable as the name of the function to call. +variable as the @emph{name} of the function to call. @cindex @code{@@}-notation for indirect function calls @cindex indirect function calls, @code{@@}-notation @@ -19044,7 +19067,6 @@ Otherwise they perform the expected computations and are not unusual. @example @c file eg/prog/indirectcall.awk # For each record, print the class name and the requested statistics - @{ class_name = $1 gsub(/_/, " ", class_name) # Replace _ with spaces @@ -19273,10 +19295,12 @@ $ @kbd{gawk -f quicksort.awk -f indirectcall.awk class_data2} Remember that you must supply a leading @samp{@@} in front of an indirect function call. -Unfortunately, indirect function calls cannot be used with the built-in functions. However, -you can generally write ``wrapper'' functions which call the built-in ones, and those can -be called indirectly. (Other than, perhaps, the mathematical functions, there is not a lot -of reason to try to call the built-in functions indirectly.) +Starting with @value{PVERSION} 4.1.2 of @command{gawk}, indirect function +calls may also be used with built-in functions and with extension functions +(@pxref{Dynamic Extensions}). The only thing you cannot do is pass a regular +expression constant to a built-in function through an indirect function +call.@footnote{This may change in a future version; recheck the documentation that +comes with your version of @command{gawk} to see if it has.} @command{gawk} does its best to make indirect function calls efficient. For example, in the following case: @@ -19287,7 +19311,7 @@ for (i = 1; i <= n; i++) @end example @noindent -@code{gawk} will look up the actual function to call only once. +@code{gawk} looks up the actual function to call only once. @node Functions Summary @section Summary @@ -19327,6 +19351,8 @@ from the real parameters by extra whitespace. User-defined functions may call other user-defined (and built-in) functions and may call themselves recursively. Function parameters ``hide'' any global variables of the same names. +You cannot use the name of a reserved variable (such as @code{ARGC}) +as the name of a parameter in user-defined functions. @item Scalar values are passed to user-defined functions by value. Array @@ -19345,7 +19371,7 @@ either scalar or array. @item @command{gawk} provides indirect function calls using a special syntax. -By setting a variable to the name of a user-defined function, you can +By setting a variable to the name of a function, you can determine at runtime what function will be called at that point in the program. This is equivalent to function pointers in C and C++. @@ -19380,7 +19406,7 @@ It contains the following chapters: @c STARTOFRANGE fudlib @cindex functions, user-defined, library of -@ref{User-defined}, describes how to write +@DBREF{User-defined} describes how to write your own @command{awk} functions. Writing functions is important, because it allows you to encapsulate algorithms and program tasks in a single place. It simplifies programming, making program development more @@ -19413,7 +19439,7 @@ use these functions. The functions are presented here in a progression from simple to complex. @cindex Texinfo -@ref{Extract Program}, +@DBREF{Extract Program} presents a program that you can use to extract the source code for these example library functions and programs from the Texinfo source for this @value{DOCUMENT}. @@ -19564,7 +19590,7 @@ A different convention, common in the Tcl community, is to use a single associative array to hold the values needed by the library function(s), or ``package.'' This significantly decreases the number of actual global names in use. For example, the functions described in -@ref{Passwd Functions}, +@DBREF{Passwd Functions} might have used array elements @code{@w{PW_data["inited"]}}, @code{@w{PW_data["total"]}}, @code{@w{PW_data["count"]}}, and @code{@w{PW_data["awklib"]}}, instead of @code{@w{_pw_inited}}, @code{@w{_pw_awklib}}, @code{@w{_pw_total}}, @@ -20127,7 +20153,7 @@ more difficult than they really need to be.} @cindex timestamps, formatted @cindex time, managing The @code{systime()} and @code{strftime()} functions described in -@ref{Time Functions}, +@DBREF{Time Functions} provide the minimum functionality necessary for dealing with the time of day in human readable form. While @code{strftime()} is extensive, the control formats are not necessarily easy to remember or intuitively obvious when @@ -20213,7 +20239,7 @@ function getlocaltime(time, ret, now, i) The string indices are easier to use and read than the various formats required by @code{strftime()}. The @code{alarm} program presented in -@ref{Alarm Program}, +@DBREF{Alarm Program} uses this function. A more general design for the @code{getlocaltime()} function would have allowed the user to supply an optional timestamp value to use instead @@ -20245,10 +20271,13 @@ This function reads from @code{file} one record at a time, building up the full contents of the file in the local variable @code{contents}. It works, but is not necessarily @c 8/2014. Thanks to BWK for pointing this out: -efficient.@footnote{Execution time grows quadratically in the size of +efficient. +@ignore +@footnote{Execution time grows quadratically in the size of the input; for each record, @command{awk} has to allocate a bigger internal buffer for @code{contents}, copy the old contents into it, and then append the contents of the new record.} +@end ignore The following function, based on a suggestion by Denis Shirokov, reads the entire contents of the named file in one shot: @@ -20421,7 +20450,7 @@ END @{ endfile(_filename_) @} @c endfile @end example -@ref{Wc Program}, +@DBREF{Wc Program} shows how this library function can be used and how it simplifies writing the main program. @@ -21395,7 +21424,7 @@ once. If you are worried about squeezing every last cycle out of your this is not necessary, since most @command{awk} programs are I/O-bound, and such a change would clutter up the code. -The @command{id} program in @ref{Id Program}, +The @command{id} program in @DBREF{Id Program} uses these functions. @c ENDOFRANGE libfudata @c ENDOFRANGE flibudata @@ -21421,7 +21450,7 @@ uses these functions. @cindex group file @cindex files, group Much of the discussion presented in -@ref{Passwd Functions}, +@DBREF{Passwd Functions} applies to the group database as well. Although there has traditionally been a well-known file (@file{/etc/group}) in a well-known format, the POSIX standard only provides a set of C library routines @@ -21760,13 +21789,13 @@ Most of the work is in scanning the database and building the various associative arrays. The functions that the user calls are themselves very simple, relying on @command{awk}'s associative arrays to do work. -The @command{id} program in @ref{Id Program}, +The @command{id} program in @DBREF{Id Program} uses these functions. @node Walking Arrays @section Traversing Arrays of Arrays -@ref{Arrays of Arrays}, described how @command{gawk} +@DBREF{Arrays of Arrays} described how @command{gawk} provides arrays of arrays. In particular, any element of an array may be either a scalar, or another array. The @code{isarray()} function (@pxref{Type Functions}) @@ -21921,7 +21950,7 @@ As a related challenge, revise that code to handle the case where an intervening value in @code{ARGV} is a variable assignment. @item -@ref{Walking Arrays}, presented a function that walked a multidimensional +@DBREF{Walking Arrays} presented a function that walked a multidimensional array to print it out. However, walking an array and processing each element is a general-purpose operation. Generalize the @code{walk_array()} function by adding an additional parameter named @@ -22934,6 +22963,11 @@ This program is a bit sloppy; it relies on @command{awk} to automatically close instead of doing it in an @code{END} rule. It also assumes that letters are contiguous in the character set, which isn't true for EBCDIC systems. +@ifset FOR_PRINT +You might want to consider how to eliminate the use of +@code{ord()} and @code{chr()}; this can be done in such a +way as to solve the EBCDIC issue as well. +@end ifset @c ENDOFRANGE filspl @c ENDOFRANGE split @@ -23179,7 +23213,7 @@ BEGIN @{ else if (c == "c") do_count++ else if (index("0123456789", c) != 0) @{ - # getopt requires args to options + # getopt() requires args to options # this messes us up for things like -5 if (Optarg ~ /^[[:digit:]]+$/) fcount = (c Optarg) + 0 @@ -23316,6 +23350,22 @@ END @{ @} @c endfile @end example + +@ifset FOR_PRINT +The logic for choosing which lines to print represents a @dfn{state +machine}, which is ``a device that can be in one of a set number of stable +conditions depending on its previous condition and on the present values +of its inputs.''@footnote{This is the definition returned from entering +@code{define: state machine} into Google.} +Brian Kernighan suggests that +``an alternative approach to state mechines is to just read +the input into an array, then use indexing. It's almost always +easier code, and for most inputs where you would use this, just +as fast.'' Consider how to rewrite the logic to follow this +suggestion. +@end ifset + + @c ENDOFRANGE prunt @c ENDOFRANGE tpul @c ENDOFRANGE uniq @@ -23841,7 +23891,7 @@ of standard @command{awk}: dealing with individual characters is very painful, requiring repeated use of the @code{substr()}, @code{index()}, and @code{gsub()} built-in functions (@pxref{String Functions}).@footnote{This -program was written before @command{gawk} acquired the ability to +program was also written before @command{gawk} acquired the ability to split each character in a string into separate array elements.} There are two functions. The first, @code{stranslate()}, takes three arguments: @@ -25455,6 +25505,23 @@ The @code{split.awk} program (@pxref{Split Program}) assumes that letters are contiguous in the character set, which isn't true for EBCDIC systems. Fix this problem. +(Hint: Consider a different way to work through the alphabet, +without relying on @code{ord()} and @code{chr()}.) + +@item +In @file{uniq.awk} (@pxref{Uniq Program}, the +logic for choosing which lines to print represents a @dfn{state +machine}, which is ``a device that can be in one of a set number of stable +conditions depending on its previous condition and on the present values +of its inputs.''@footnote{This is the definition returned from entering +@code{define: state machine} into Google.} +Brian Kernighan suggests that +``an alternative approach to state mechines is to just read +the input into an array, then use indexing. It's almost always +easier code, and for most inputs where you would use this, just +as fast.'' Rewrite the logic to follow this +suggestion. + @item Why can't the @file{wc.awk} program (@pxref{Wc Program}) just @@ -25732,7 +25799,7 @@ Often, though, it is desirable to be able to loop over the elements in a particular order that you, the programmer, choose. @command{gawk} lets you do this. -@ref{Controlling Scanning}, describes how you can assign special, +@DBREF{Controlling Scanning} describes how you can assign special, pre-defined values to @code{PROCINFO["sorted_in"]} in order to control the order in which @command{gawk} traverses an array during a @code{for} loop. @@ -28888,7 +28955,9 @@ responds @samp{syntax error}. When you do figure out what your mistake was, though, you'll feel like a real guru. @item -If you perused the dump of opcodes in @ref{Miscellaneous Debugger Commands}, +@c NOTE: no comma after the ref{} on purpose, due to following +@c parenthetical remark. +If you perused the dump of opcodes in @ref{Miscellaneous Debugger Commands} (or if you are already familiar with @command{gawk} internals), you will realize that much of the internal manipulation of data in @command{gawk}, as in many interpreters, is done on a stack. @@ -37349,7 +37418,7 @@ as well as any considerations you should bear in mind. @appendixsubsec Accessing The @command{gawk} Git Repository As @command{gawk} is Free Software, the source code is always available. -@ref{Gawk Distribution}, describes how to get and build the formal, +@DBREF{Gawk Distribution} describes how to get and build the formal, released versions of @command{gawk}. @cindex @command{git} utility diff --git a/helpers/ChangeLog b/helpers/ChangeLog index c9121403..a5bbafb1 100644 --- a/helpers/ChangeLog +++ b/helpers/ChangeLog @@ -1,3 +1,7 @@ +2014-09-04 Arnold D. Robbins <arnold@skeeve.com> + + * chlistref.awk: New file. Finds @ref{} to non-chapters. + 2014-06-08 Arnold D. Robbins <arnold@skeeve.com> * testdfa.c: Minor improvements. diff --git a/helpers/chlistref.awk b/helpers/chlistref.awk new file mode 100644 index 00000000..49f63f59 --- /dev/null +++ b/helpers/chlistref.awk @@ -0,0 +1,31 @@ +BEGIN { + chapters["Getting Started"]++ + chapters["Invoking Gawk"]++ + chapters["Regexp"]++ + chapters["Reading Files"]++ + chapters["Printing"]++ + chapters["Expressions"]++ + chapters["Patterns and Actions"]++ + chapters["Arrays"]++ + chapters["Functions"]++ + chapters["Library Functions"]++ + chapters["Sample Programs"]++ + chapters["Advanced Features"]++ + chapters["Internationalization"]++ + chapters["Debugger"]++ + chapters["Arbitrary Precision Arithmetic"]++ + chapters["Dynamic Extensions"]++ + chapters["Language History"]++ + chapters["Installation"]++ + chapters["Notes"]++ + chapters["Basic Concepts"]++ + + Pattern = ".*@ref\\{([^}]+)\\},.*" +} + +$0 ~ Pattern { + ref = gensub(Pattern, "\\1", 1, $0) + if (! (ref in chapters)) + printf("%s:%d: %s\n", FILENAME, FNR, $0) +} + diff --git a/interpret.h b/interpret.h index be017355..fee8136e 100644 --- a/interpret.h +++ b/interpret.h @@ -1039,13 +1039,42 @@ match_re: } if (f == NULL) { - /* FIXME: See if function is a built-in and try to call it */ - fatal(_("`%s' is not a user-defined function, so it cannot be called indirectly"), + int arg_count = (pc + 1)->expr_count; + builtin_func_t the_func = lookup_builtin(t1->stptr); + + if (the_func == NULL) + fatal(_("`%s' is not a user-defined function, so it cannot be called indirectly"), t1->stptr); - } else if(f->type != Node_func) { - if (f->type == Node_ext_func || f->type == Node_old_ext_func) - fatal(_("cannot (yet) call extension functions indirectly")); - else + + /* call it */ + r = the_func(arg_count); + PUSH(r); + break; + } else if (f->type != Node_func) { + if ( f->type == Node_ext_func + || f->type == Node_old_ext_func) { + /* code copied from below, keep in sync */ + INSTRUCTION *bc; + char *fname = pc->func_name; + int arg_count = (pc + 1)->expr_count; + static INSTRUCTION npc[2]; + + npc[0] = *pc; + + bc = f->code_ptr; + assert(bc->opcode == Op_symbol); + if (f->type == Node_ext_func) + npc[0].opcode = Op_ext_builtin; /* self modifying code */ + else + npc[0].opcode = Op_old_ext_builtin; /* self modifying code */ + npc[0].extfunc = bc->extfunc; + npc[0].expr_count = arg_count; /* actual argument count */ + npc[1] = pc[1]; + npc[1].func_name = fname; /* name of the builtin */ + npc[1].expr_count = bc->expr_count; /* defined max # of arguments */ + ni = npc; + JUMPTO(ni); + } else fatal(_("function called indirectly through `%s' does not exist"), pc->func_name); } @@ -1069,6 +1098,7 @@ match_re: } if (f->type == Node_ext_func || f->type == Node_old_ext_func) { + /* keep in sync with indirect call code */ INSTRUCTION *bc; char *fname = pc->func_name; int arg_count = (pc + 1)->expr_count; @@ -731,20 +731,28 @@ cleanup: ip = pc + 1; indent(ip->forloop_body->exec_count); fprintf(prof_fp, "%s (", op2str(pc->opcode)); - pprint(pc->nexti, ip->forloop_cond, true); - fprintf(prof_fp, "; "); - if (ip->forloop_cond->opcode == Op_no_op && - ip->forloop_cond->nexti == ip->forloop_body) + /* If empty for looop header, print it a little more nicely. */ + if ( pc->nexti->opcode == Op_no_op + && ip->forloop_cond == pc->nexti + && pc->target_continue->opcode == Op_jmp) { + fprintf(prof_fp, ";;"); + } else { + pprint(pc->nexti, ip->forloop_cond, true); fprintf(prof_fp, "; "); - else { - pprint(ip->forloop_cond, ip->forloop_body, true); - t1 = pp_pop(); - fprintf(prof_fp, "%s; ", t1->pp_str); - pp_free(t1); - } - pprint(pc->target_continue, pc->target_break, true); + if (ip->forloop_cond->opcode == Op_no_op && + ip->forloop_cond->nexti == ip->forloop_body) + fprintf(prof_fp, "; "); + else { + pprint(ip->forloop_cond, ip->forloop_body, true); + t1 = pp_pop(); + fprintf(prof_fp, "%s; ", t1->pp_str); + pp_free(t1); + } + + pprint(pc->target_continue, pc->target_break, true); + } fprintf(prof_fp, ") {\n"); indent_in(); pprint(ip->forloop_body->nexti, pc->target_continue, false); diff --git a/test/ChangeLog b/test/ChangeLog index 8f8c9c31..47835e75 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,9 @@ +2014-09-04 Arnold D. Robbins <arnold@skeeve.com> + + * profile2.ok: Update after code improvement in profiler. + * functab4.ok: Update after making indirect calls of + extension functions work. :-) + 2014-08-15 Arnold D. Robbins <arnold@skeeve.com> * badargs.ok: Adjust after revising text for -L option. diff --git a/test/functab4.ok b/test/functab4.ok index 70a520b7..8eaab508 100644 --- a/test/functab4.ok +++ b/test/functab4.ok @@ -1,3 +1,2 @@ x = chdir -gawk: functab4.awk:11: fatal: cannot (yet) call extension functions indirectly -EXIT CODE: 2 +we are now in --> /tmp diff --git a/test/profile2.ok b/test/profile2.ok index fe76a2c9..50c7e190 100644 --- a/test/profile2.ok +++ b/test/profile2.ok @@ -7,7 +7,7 @@ 1 asplit("BEGIN:END:atan2:break:close:continue:cos:delete:" "do:else:exit:exp:for:getline:gsub:if:in:index:int:" "length:log:match:next:print:printf:rand:return:sin:" "split:sprintf:sqrt:srand:sub:substr:system:while", keywords, ":") 1 split("00:00:00:00:00:00:00:00:00:00:" "20:10:10:12:12:11:07:00:00:00:" "08:08:08:08:08:33:08:00:00:00:" "08:44:08:36:08:08:08:00:00:00:" "08:44:45:42:42:41:08", machine, ":") 1 state = 1 - 571 for (; ; ) { + 571 for (;;) { 571 symb = lex() 571 nextstate = substr(machine[state symb], 1, 1) 571 act = substr(machine[state symb], 2, 1) @@ -109,7 +109,7 @@ 571 function lex() { - 1702 for (; ; ) { + 1702 for (;;) { 1702 if (tok == "(eof)") { return 7 } |