diff options
Diffstat (limited to 'doc')
-rw-r--r-- | doc/ChangeLog | 82 | ||||
-rw-r--r-- | doc/Makefile.in | 19 | ||||
-rw-r--r-- | doc/awkcard.in | 298 | ||||
-rw-r--r-- | doc/gawk.1 | 547 | ||||
-rw-r--r-- | doc/gawk.info | 3047 | ||||
-rw-r--r-- | doc/gawk.texi | 1678 | ||||
-rw-r--r-- | doc/gawkinet.info | 218 | ||||
-rw-r--r-- | doc/gawkinet.texi | 55 | ||||
-rw-r--r-- | doc/lflashlight.eps | 2 | ||||
-rw-r--r-- | doc/rflashlight.eps | 2 |
10 files changed, 3974 insertions, 1974 deletions
diff --git a/doc/ChangeLog b/doc/ChangeLog index 980c0f26..ec1cc43a 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,85 @@ +Thu Jul 1 21:29:25 2010 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi, gawk.1, awkcard.in: Document `/inet4' and `/inet6'. + +Sun Jun 27 21:58:47 2010 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi, gawk.1, awkcard.in: Document all short options. + +Wed Jun 2 22:06:22 2010 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi, gawk.1, awkcard.in: Document FPAT variable and patsplit + function. + +Fri Jun 12 13:28:24 2009 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi, gawk.1: Remove --disable-directories-fatal configuration + option. + +Thu Feb 26 20:36:18 2009 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi, gawk.1, awkcard.in: Document BEGINFILE and ENDFILE. + +Mon Feb 16 21:53:22 2009 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi: Document switch statements as always available. + * gawk.1: Ditto. + * awkcard.in: Ditto. + +Thu Feb 12 22:36:32 2009 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi: Document that interval expressions are now on by default. + Also that --gen-po is now --gen-pot. + * gawk.1: Ditto. + * awkcard.in: Ditto. + +Sat Jan 17 20:03:43 2009 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi: Document indirect function calls. + * gawk.1: Ditto. + * awkcard.in: Ditto. + +Tue Dec 30 22:22:04 2008 Assaf Gordon <gordon@cshl.edu> + + * gawk.texi: Document new --sandbox option. + * gawk.1: Ditto. + * awkcard.in: Ditto. + +Tue Dec 30 22:21:11 2008 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi: Change --binary to --characters-as-bytes, per Karl Berry. + * gawk.1: Ditto. + * awkcard.in: Ditto. + +Thu Dec 18 05:30:13 2008 Steffen Schuler <schuler.steffen@googlemail.com> + + * gawk.texi: Documented fourth parameter of split(). + * gawk.1: Ditto. + * awkcard.in: Ditto. + +Thu Dec 18 05:16:48 2008 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi: Minimally document `-b' / --binary. + * gawk.1: Ditto. + * awkcard.in: Ditto. + +Sun Nov 16 22:03:50 2008 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi: Fully documented `-r' as synonym for --re-interval. + * gawk.1: Ditto. + * awkcard.in: Ditto. + +Tue Aug 3 13:35:15 2004 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi: Document that gawk now uses the 2001 POSIX + rules for `sub' and `gsub'. + +Wed Dec 26 22:15:05 2001 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi: Documented that process special files are gone. + * gawk.1: Ditto. + * awkcard.in: Ditto. + Thu May 6 20:55:14 2010 Arnold D. Robbins <arnold@skeeve.com> * Release 3.1.8: Release tar file made. diff --git a/doc/Makefile.in b/doc/Makefile.in index b1533e79..661c90ef 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -63,15 +63,16 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ChangeLog \ ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/arch.m4 \ $(top_srcdir)/m4/codeset.m4 $(top_srcdir)/m4/gettext.m4 \ - $(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/intmax_t.m4 \ - $(top_srcdir)/m4/inttypes_h.m4 $(top_srcdir)/m4/isc-posix.m4 \ - $(top_srcdir)/m4/lcmessage.m4 $(top_srcdir)/m4/lib-ld.m4 \ - $(top_srcdir)/m4/lib-link.m4 $(top_srcdir)/m4/lib-prefix.m4 \ - $(top_srcdir)/m4/libsigsegv.m4 $(top_srcdir)/m4/longlong.m4 \ - $(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \ - $(top_srcdir)/m4/progtest.m4 $(top_srcdir)/m4/socket.m4 \ - $(top_srcdir)/m4/stdint_h.m4 $(top_srcdir)/m4/uintmax_t.m4 \ - $(top_srcdir)/m4/ulonglong.m4 $(top_srcdir)/configure.ac + $(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/intlmacosx.m4 \ + $(top_srcdir)/m4/intmax_t.m4 $(top_srcdir)/m4/inttypes_h.m4 \ + $(top_srcdir)/m4/isc-posix.m4 $(top_srcdir)/m4/lcmessage.m4 \ + $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \ + $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libsigsegv.m4 \ + $(top_srcdir)/m4/longlong.m4 $(top_srcdir)/m4/nls.m4 \ + $(top_srcdir)/m4/po.m4 $(top_srcdir)/m4/progtest.m4 \ + $(top_srcdir)/m4/socket.m4 $(top_srcdir)/m4/stdint_h.m4 \ + $(top_srcdir)/m4/uintmax_t.m4 $(top_srcdir)/m4/ulonglong.m4 \ + $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs diff --git a/doc/awkcard.in b/doc/awkcard.in index 2bf5c3d4..0d00c33e 100644 --- a/doc/awkcard.in +++ b/doc/awkcard.in @@ -223,6 +223,11 @@ provide a moment of nostalgia for long time \*(AK users. T} .. +.de TI \" table item +.ti -4n +\\$1 +.br +.. .TS expand, tab(%); l lw(1.3i). @@ -230,61 +235,43 @@ l lw(1.3i). \*(FC\-\^\-field-separator \*(FIfs\*(FR%just like \*(FC\-F\fP. \*(FC\-\^\-file \*(FIprog-file%\*(FRjust like \*(FC\-f\fP. .TE -.TS -expand, tab(%); -ls -l lw(2.2i). -\*(FC\-\^\-compat\*(FR, \*(FC\-\^\-traditional\*(FR -%T{ +.in +4n +.TI "\*(FC\-b\*(FR, \*(FC\-\^\-characters\-as\-bytes\*(FR +Treat all input data as single-byte characters. I.e., +don't attempt to +process strings as multibyte characters. +Overridden by \*(FC\-\^\-posix\*(FR. +.TI "\*(FC\-c\*(FR, \*(FC\-\^\-compat\*(FR, \*(FC\-\^\-traditional\*(FR disable \*(GK-specific extensions (the use of \*(FC\-\^\-traditional\*(FR is preferred). -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-copyleft\*(FR, \*(FC\-\^\-copyright\*(FR -%T{ +.TI "\*(FC\-C\*(FR, \*(FC\-\^\-copyleft\*(FR, \*(FC\-\^\-copyright\*(FR print the short version of the GNU copyright information on \*(FCstdout\*(FR. -T} .T& -ls -l lw(2.2i). -\*(FC\-\^\-dump-variables\*(FR[\*(FC=\*(FIfile\*(FR] -%T{ +.IT "\*(FC\-d \*(FIfile\*(FR, \*(FC\-\^\-dump-variables\*(FR[\*(FC=\*(FIfile\*(FR] print a sorted list of global variables, their types and final values to \*(FIfile\*(FR. If no \*(FIfile\*(FR is provided, \*(FCgawk\*(FR uses \*(FCawkvars.out\*(FR. -T} -\*(FC\-\^\-exec \*(FIfile\*(FR%T{ +.TI "\*(FC-e '\*(FItext\*(FC'\*(FR, \*(FC\-\^\-source '\*(FItext\*(FC'\*(FR +use \*(FItext\*(FR as AWK program source code. +.TI "\*(FC\-E \*(FIfile\*(FR, \*(FC\-\^\-exec \*(FIfile\*(FR read program text from \*(FIfile\fP. No other options are processed. Also disables command-line variable assignments. Useful with \*(FC#!\fP. -T} -\*(FC\-\^\-gen\-po\*(FR%T{ +.TI "\*(FC\-g\*(FR, \*(FC\-\^\-gen\-pot\*(FR process the program and print a GNU \*(FCgettext\*(FR format \*(FC\&.po\*(FR format file on standard output, containing the text of all strings that were marked for localization. -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-help\*(FR, \*(FC\-\^\-usage\*(FR -%T{ +.TI "\*(FC\-h\*(FR, \*(FC\-\^\-help\*(FR, \*(FC\-\^\-usage\*(FR print a short summary of the available options on \*(FCstdout\*(FR, then exit zero. -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-lint\*(FR[\*(FC=\*(FIvalue\*(FR] -%T{ -warn about dubios or non-portable constructs. +.TI "\*(FC\-l \*(FR[\*(FC=\*(FIvalue\*(FR], \*(FC\-\^\-lint\*(FR[\*(FC=\*(FIvalue\*(FR] +warn about dubious or non-portable constructs. If \*(FIvalue\*(FR is \*(FCfatal\*(FR, lint warnings become fatal errors. @@ -292,83 +279,43 @@ If \*(FIvalue\*(FR is \*(FCinvalid\*(FR, only issue warnings about things that are actually invalid (not fully implemented yet). -T} -.T& -l lw(2.2i). -\*(FC\-\^\-lint\-old\*(FR%T{ +.TI "\*(FC-L\*(FR, \*(FC\-\^\-lint\-old\*(FR warn about constructs that are not portable to the original version of Unix \*(AK. -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-non\-decimal\-data\*(FR -%T{ +.TI "\*(FC\-n\*(FR, \*(FC\-\^\-non\-decimal\-data\*(FR recognize octal and hexadecimal values in input data. \*(FIUse this option with great caution!\*(FR -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-optimize\*(FR, \*(FC\-O\*(FR -%T{ +.TI "\*(FC\-N\*(FR, \*(FC\-\^\-use\-lc\-numeric\*(FR +force use of the locale's decimal point character when parsing input data. +.TI "\*(FC\-O\*(FR, \*(FC\-\^\-optimize\*(FR enable some internal optimizations. -T} -.T& -l lw(2.2i). -\*(FC\-\^\-posix\*(FR%T{ -disable common and GNU extensions. -Enable \*(FIinterval expressions\*(FR in regular -expression matching (see \fHRegular -Expressions\fP below). -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-profile\*(FR[\*(FC=\*(FIprof_file\*(FR] -%T{ -send profiling data to \*(FIprof_file\*(FR +.TI "\*(FC\-p \*(FR[\*(FC\*(FIfile\*(FR], \*(FC\-\^\-profile\*(FR[\*(FC=\*(FIfile\*(FR] +send profiling data to \*(FIfile\*(FR (default: \*(FCawkprof.out\*(FR). With \*(GK, the profile is just a ``pretty printed'' version of the program. With \*(PK, the profile contains execution counts in the left margin of each statement in the program. -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-re\-interval\*(FR -%T{ +.TI "\*(FC\-P\*(FR, \*(FC\-\^\-posix\*(FR +disable common and GNU extensions. +.TI "\*(FC\-r\*(FR, \*(FC\-\^\-re\-interval\*(FR enable \*(FIinterval expressions\*(FR in regular expression matching (see \fHRegular Expressions\fP below). Useful if -\*(FC\-\^\-posix\*(FR is not specified. -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-source '\*(FItext\*(FC'\*(FR -%T{ -use \*(FItext\*(FR as AWK program source code. -T} -.T& -l lw(2.2i). -\*(FC\-\^\-version\*(FR%T{ +\*(FC\-\^\-traditional\*(FR is specified. +.TI "\*(FC\-S\*(FR, \*(FC\-\^\-sandbox\*(FR +disable the \*(FCsystem\*(FR function, +input redirection with \*(FCgetline\*(FR, +output redirection with \*(FCprint\*(FR and \*(FCprintf\*(FR, +and dynamic extensions loading. +.TI "\*(FC\-V\*(FR, \*(FC\-\^\-version\*(FR print version \" information info on \*(FCstdout\fP and exit zero. -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-use\-lc\-numeric -%T{ -force use of the locale's decimal point character when parsing input data. -T} -.TE +.in -4n .sp .5 .fi In compatibility mode, @@ -565,6 +512,8 @@ _ \*(FC\eB\*(FR~middle of a word \*(FC\e<\*(FR~beginning of a word \*(FC\e>\*(FR~end of a word +\*(FC\es\*(FR~any whitespace character +\*(FC\eS\*(FR~any non-whitespace character \*(FC\ew\*(FR~any word-constituent character \*(FC\eW\*(FR~any non-word-constituent character \*(FC\e`\*(FR~beginning of a string @@ -579,9 +528,7 @@ _ .fi \*(CRThe \*(FIr\*(FC{\*(FIn\*(FC,\*(FIm\*(FC}\*(FR notation is called an \*(FIinterval expression\fP. POSIX mandates it for AWK regexps, but -most \*(AKs don't implement it. \*(CBUse \*(FC\-\^\-re\-interval\*(FR -or \*(FC\-\^\-posix\*(FR to enable -this feature in \*(GK.\*(CX +most \*(AKs don't implement it.\*(CX .EB "\s+2\f(HBREGULAR EXPRESSIONS\*(FR\s0" @@ -662,8 +609,21 @@ expected to have a fixed width, and \*(GK splits up the record using the specified widths. The value of \*(FCFS\fP is ignored. Assigning a new value to \*(FCFS\fP -overrides the use of \*(FCFIELDWIDTHS\*(FR, -and restores the default behavior.\*(CD +overrides the use of \*(FCFIELDWIDTHS\*(FR. +and restores the default behavior. +.sp .5 +Similarly, if the +\*(FCFPAT\fP +variable is set to a string representing a regular expression, +each field is made up of text that matches that regular expression. In +this case, the regular expression describes the fields themselves, +instead of the text that separates the fields. +Assigning a new value to +\*(FCFS\fP +or +\*(FCFIELDWIDTHS\fP +overrides the use of +\*(FCFPAT\fP.\*(CD .sp .5 Each field in the input record may be referenced by its position, \*(FC$1\*(FR, \*(FC$2\*(FR and so on. @@ -695,6 +655,8 @@ Decreasing the value of \*(FCNF\fP causes the trailing fields to be lost .nf \*(FCBEGIN END + BEGINFILE + ENDFILE \*(FIexpression pat1\*(FC,\*(FIpat2\*(FR .sp .5 @@ -705,6 +667,13 @@ be multiple \*(FCBEGIN\fP and \*(FCEND\fP rules; they are merged and executed as if there had just been one large rule. They may occur anywhere in a program, including different source files. .sp .5 +\*(FCBEGINFILE\*(FR and \*(FCENDFILE\*(FR are special patterns that +execute before the first record of each file and after the last record +of each file, respectively. In the \*(FCBEGINFILE\*(FR rule, the \*(FCERRNO\*(FR +variable is non-null if there is a problem with the file; the code should use +\*(FCnextfile\*(FR to skip the file if desired. Otherwise \*(GK exits with +its usual fatal error. +.sp .5 Expression patterns can be any expression, as described under \fHExpressions\fP. .sp .5 @@ -787,8 +756,7 @@ matches the closest \*(FCif\*(FR. .br }\*(FR .br -switch on \*(FIexpression\*(FR, execute \*(FIcase\*(FR if matched, default if not. -For 3.1.x, requires \*(FC\-\^\-enable\-switch\*(FR option to \*(FCconfigure\*(FR.\*(CD +switch on \*(FIexpression\*(FR, execute \*(FIcase\*(FR if matched, default if not.\*(CD .ti -.2i .\" --- End switch statement \*(FCwhile (\*(FIcondition\*(FC) \*(FIstatement \*(FR @@ -884,6 +852,11 @@ T} \*(FCFNR\fP T{ record number in current input file. T} +\*(CB\*(FCFPAT\fP T{ +regular expression describing field contents. +Used to parse the input based on the fields +instead of the field separator.\*(CD +T} \*(FCFS\fP T{ input field separator, a space by default (see \fHFields\fP above). @@ -1003,7 +976,8 @@ numeric strings, they are string constants. The idea of ``numeric string'' only applies to fields, \*(FCgetline\fP input, \*(FCFILENAME\*(FR, \*(FCARGV\fP elements, \*(FCENVIRON\fP elements and the elements of an array created by -\*(FCsplit()\fP that are numeric strings. +\*(FCsplit()\fP or +\*(FCpatsplit()\fP that are numeric strings. The basic idea is that \*(FIuser input\*(FR, and only user input, that looks numeric, should be treated that way. @@ -1082,6 +1056,7 @@ i.e., not used on the right-hand side of \*(CB\*(FCgensub()\fP,\*(CD \*(FCgsub()\fP, \*(FCmatch()\fP, +\*(FCpatsplit()\fP, \*(FCsplit()\fP, and \*(FCsub()\fP, @@ -1295,51 +1270,41 @@ File associated with the open file descriptor \*(FIn\*(FR. .ti -.2i \*(FC/inet/tcp/\*(FIlport\*(FC/\*(FIrhost\*(FC/\*(FIrport\*(FR .br -File for TCP/IP connection on local port \*(FIlport\*(FR to +.ti -.2i +\*(FC/inet4/tcp/\*(FIlport\*(FC/\*(FIrhost\*(FC/\*(FIrport\*(FR +.br +.ti -.2i +\*(FC/inet6/tcp/\*(FIlport\*(FC/\*(FIrhost\*(FC/\*(FIrport\*(FR +.br +Files for TCP/IP connections on local port \*(FIlport\*(FR to remote host \*(FIrhost\*(FR on remote port \*(FIrport\*(FR. Use a port of \*(FC0\*(FR to have the system pick a port. +Use \*(FC/inet4\fP to force an IPv4 connection, +and \*(FC/inet6\fP to force an IPv6 connection. +Plain \*(FC/inet\fP uses the system default (probably IPv4). Usable only with the \*(FC|&\*(FR two-way I/O operator. .ti -.2i \*(FC/inet/udp/\*(FIlport\*(FC/\*(FIrhost\*(FC/\*(FIrport\*(FR .br +.ti -.2i +\*(FC/inet4/udp/\*(FIlport\*(FC/\*(FIrhost\*(FC/\*(FIrport\*(FR +.br +.ti -.2i +\*(FC/inet6/udp/\*(FIlport\*(FC/\*(FIrhost\*(FC/\*(FIrport\*(FR +.br Similar, but use UDP/IP instead of TCP/IP. .ti -.2i \*(CR\*(FC/inet/raw/\*(FIlport\*(FC/\*(FIrhost\*(FC/\*(FIrport\*(FR .br +.ti -.2i +\*(FC/inet4/raw/\*(FIlport\*(FC/\*(FIrhost\*(FC/\*(FIrport\*(FR +.br +.ti -.2i +\*(FC/inet6/raw/\*(FIlport\*(FC/\*(FIrhost\*(FC/\*(FIrport\*(FR +.br .\" Similar, but use raw IP sockets. -Reserved for future use.\*(CB +Reserved for future use.\*(CL .in -.2i -.sp .5 -.fi -Other special filenames provide access to information about the running -\*(FCgawk\fP process. -Reading from these files returns a single record. -The filenames and what they return are:\*(FR -.sp .5 -.TS -expand; -l lw(2i). -\*(FC/dev/pid\fP process ID of current process -\*(FC/dev/ppid\fP parent process ID of current process -\*(FC/dev/pgrpid\fP process group ID of current process -\*(FC/dev/user\fP T{ -.nf -a single newline-terminated record. -The fields are separated with spaces. -\*(FC$1\fP is the return value of \*(FIgetuid\*(FR(2), -\*(FC$2\fP is the return value of \*(FIgeteuid\*(FR(2), -\*(FC$3\fP is the return value of \*(FIgetgid\*(FR(2) , and -\*(FC$4\fP is the return value of \*(FIgetegid\*(FR(2). -.fi -Any additional fields are the group IDs returned -by \*(FIgetgroups\*(FR(2). Multiple groups may not be -supported on all systems. -T} -.TE -.sp .5 -.fi -\*(CRThese filenames are now obsolete. -Use the \*(FCPROCINFO\fP array to obtain the information they provide.\*(CL .EB "\s+2\f(HBSPECIAL FILENAMES\*(FR\s0" .BT @@ -1540,12 +1505,29 @@ and provide the starting index in the string and length respectively, of each matching substring.\*(CD .ti -.2i -\*(FCsplit(\*(FIs\*(FC, \*(FIa \*(FR[\*(FC, \*(FIr\*(FR]\*(FC)\*(FR +\*(CB\*(FCpatsplit(\*(FIs\*(FC, \*(FIa \*(FR[\*(FC, \*(FIr \*(CB\*(FR[\*(FC, \*(FIseps \*(FR] \*(FR] \*(FC)\*(FR .br splits the string -\*(FIs\fP into the array \*(FIa\fP using the regular expression \*(FIr\*(FR, +\*(FIs\fP into the array \*(FIa\fP +and the array \*(FIseps\fP of separator strings +using the regular expression \*(FIr\*(FR, +and returns the number of fields. +Element values are the portions of \*(FIs\fP that matched \*(FIr\fP. +The value of \*(FIseps\fP[\*(FIi\fP] is the separator that appeared in +front of \*(FIa\fP[\*(FIi\fP+1]. +If \*(FIr\fP is omitted, \*(FCFPAT\fP +is used instead. +The arrays \*(FIa\fP and \*(FIseps\fP are cleared first. +Splitting behaves identically to field splitting with \*(FCFPAT\fP.\*(CD +.ti -.2i +\*(FCsplit(\*(FIs\*(FC, \*(FIa \*(FR[\*(FC, \*(FIr \*(CB\*(FR[\*(FC, \*(FIseps \*(FR]\*(CD \*(FR] \*(FC)\*(FR +.br +splits the string +\*(FIs\fP into the array \*(FIa\fP \*(CBand the array \*(FIseps\fP of separator strings\*(CD +using the regular expression \*(FIr\*(FR, and returns the number of fields. If \*(FIr\fP is omitted, \*(FCFS\fP -is used instead. The array \*(FIa\fP is cleared first. +is used instead. +The arrays \*(FIa\fP \*(CBand \*(FIseps\fP\*(CD are cleared first. Splitting behaves identically to field splitting. (See \fHFields\fP, above.) .ti -.2i @@ -1776,7 +1758,7 @@ to use the current domain.\*(CB \*(CDThere are several steps involved in producing and running a localizable \*(AK program. .sp .5 -1. Add a \*(FCBEGIN\*(FR action to assign a value to the +1. Add a \*(FCBEGIN\*(FR action to assign a value to the \*(FCTEXTDOMAIN\*(FR variable to set the text domain for your program. .sp .5 @@ -1788,24 +1770,24 @@ file associated with your program. Without this step, \*(GK uses the \*(FCmessages\*(FR text domain, which probably won't work. .sp .5 -2. Mark all strings that should be translated with leading underscores. +2. Mark all strings that should be translated with leading underscores. .sp .5 -3. Use the +3. Use the \*(FCbindtextdomain()\*(FR, \*(FCdcgettext()\*(FR, and/or \*(FCdcngettext()\*(FR functions in your program, as appropriate. .sp .5 -4. Run +4. Run .sp .5 -.ti +5n -\*(FCgawk \-\^\-gen\-po \-f myprog.awk > myprog.po\*(FR +.ti +3n +\*(FCgawk\0\-\^\-gen\-pot\0\-f\0myprog.awk\0>\0myprog.po\*(FR .sp .5 to generate a \*(FC\&.po\*(FR file for your program. .sp .5 -5. Provide appropriate translations, and build and install a corresponding +5. Provide appropriate translations, and build and install a corresponding \*(FC\&.mo\*(FR file. .sp .5 The internationalization features are described in full detail in \*(AM.\*(CB @@ -1855,7 +1837,24 @@ Functions may call each other and may be recursive. Function parameters used as local variables are initialized to the null string and the number zero upon function invocation. .sp .5 -Use \*(FCreturn\fP to return a value from a function. The return value +\*(CBFunctions may be called indirectly. To do this, assign +the name of the function to be called, as a string, to a variable. +Then use the variable as if it were the name of a function, prefixed with +an ``at'' sign, like so:\*(FC +.nf +.sp .5 + function myfunc() + { + print "myfunc called" + } +.sp .3 + { + the_func = "myfunc" + @the_func() + } +.fi +.sp .5 +\*(FR\*(CDUse \*(FCreturn\fP to return a value from a function. The return value is undefined if no value is provided, or if the function returns by ``falling off'' the end. .sp .5 @@ -1886,6 +1885,8 @@ then \*(GK behaves exactly as if the \*(FC\-\^\-posix\fP option had been given.\*(CB .EB "\s+2\f(HBENVIRONMENT VARIABLES (\*(GK\f(HB)\*(FR\s0" +.BT + .\" --- Historical Features .ES .fi @@ -1904,14 +1905,11 @@ equivalent to the \*(FCnext\fP statement. is specified.\*(CB .EB "\s+2\f(HBHISTORICAL FEATURES (\*(GK\f(HB)\*(FR\s0" - -.BT - .\" --- FTP/HTTP Information .ES .nf \*(CDHost: \*(FCftp.gnu.org\*(FR -File: \*(FC/gnu/gawk/gawk-3.1.8.tar.gz\fP +File: \*(FC/gnu/gawk/gawk-4.0.0.tar.gz\fP .in +.2i .fi GNU \*(AK (\*(GK). There may be a later version. @@ -1962,9 +1960,11 @@ translation approved by the Foundation.\*(CX .EB "\s+2\f(HBCOPYING PERMISSIONS\*(FR\s0" .\" Need the BT here to get the final page number +.ig .ES \*(CX -.sp 28 +.sp 10 .EB "\s+2\f(HBNOTES\*(FR\s0" +.. .BT @@ -124,10 +124,33 @@ sign, with no intervening spaces, or they may be provided in the next command line argument. Long options may be abbreviated, as long as the abbreviation remains unique. +.PP +Additionally, each long option has a corresponding short +option, so that the option's functionality may be used from +within +.B #! +executable scripts. .SH OPTIONS .PP .I Gawk -accepts the following options, listed by frequency. +accepts the following options. +Standard options are listed first, followed by options for +.I gawk +extensions, listed alphabetically by short option. +.TP +.PD 0 +.BI \-f " program-file" +.TP +.PD +.BI \-\^\-file " program-file" +Read the \*(AK program source from the file +.IR program-file , +instead of from the first command line argument. +Multiple +.B \-f +(or +.BR \-\^\-file ) +options may be used. .TP .PD 0 .BI \-F " fs" @@ -154,20 +177,7 @@ before execution of the program begins. Such variable values are available to the .B BEGIN block of an \*(AK program. -.TP -.PD 0 -.BI \-f " program-file" -.TP -.PD -.BI \-\^\-file " program-file" -Read the \*(AK program source from the file -.IR program-file , -instead of from the first command line argument. -Multiple -.B \-f -(or -.BR \-\^\-file ) -options may be used. +.ig .TP .PD 0 .BI \-mf " NNN" @@ -193,22 +203,22 @@ has no pre-defined limits. (Current versions of the Bell Laboratories .I awk no longer accept them.) +.. .TP .PD 0 -.B \-O +.B \-b .TP .PD -.B \-\^\-optimize -Enable optimizations upon the internal representation of the program. -Currently, this includes just simple constant-folding. The -.I gawk -maintainer hopes to add additional optimizations over time. -.TP -.PD 0 -.B "\-W compat" +.B \-\^\-characters\-as\-bytes +Treat all input data as single-byte characters. In other words, +don't pay any attention to the locale information when attempting to +process strings as multibyte characters. +The +.B "\-\^\-posix" +option overrides this one. .TP .PD 0 -.B "\-W traditional" +.B \-c .TP .PD 0 .B \-\^\-compat @@ -222,18 +232,15 @@ mode. In compatibility mode, behaves identically to \*(UX .IR awk ; none of the \*(GN-specific extensions are recognized. -The use of -.B \-\^\-traditional -is preferred over the other forms of this option. +.\" The use of +.\" .B \-\^\-traditional +.\" is preferred over the other forms of this option. See .BR "GNU EXTENSIONS" , below, for more information. .TP .PD 0 -.B "\-W copyleft" -.TP -.PD 0 -.B "\-W copyright" +.B \-C .TP .PD 0 .B \-\^\-copyleft @@ -244,7 +251,7 @@ Print the short version of the \*(GN copyright information message on the standard output and exit successfully. .TP .PD 0 -\fB\-W dump-variables\fR[\fB=\fIfile\fR] +\fB\-d \fR[\fIfile\fR] .TP .PD \fB\-\^\-dump-variables\fR[\fB=\fIfile\fR] @@ -270,7 +277,23 @@ names like and so on.) .TP .PD 0 -.BI "\-W exec " file +.BI "\-e " program-text +.TP +.PD +.BI \-\^\-source " program-text" +Use +.I program-text +as \*(AK program source code. +This option allows the easy intermixing of library functions (used via the +.B \-f +and +.B \-\^\-file +options) with source code entered on the command line. +It is intended primarily for medium to large \*(AK programs used +in shell scripts. +.TP +.PD 0 +.BI "\-E " file .TP .PD .BI \-\^\-exec " file" @@ -285,10 +308,10 @@ from a URL. This option disables command-line variable assignments. .TP .PD 0 -.B "\-W gen\-po" +.B \-g .TP .PD -.B \-\^\-gen\-po +.B \-\^\-gen\-pot Scan and parse the \*(AK program, and generate a \*(GN .B \&.po format file on standard output with entries for all localizable @@ -300,10 +323,7 @@ distribution for more information on files. .TP .PD 0 -.B "\-W help" -.TP -.PD 0 -.B "\-W usage" +.B \-h .TP .PD 0 .B \-\^\-help @@ -317,7 +337,7 @@ the standard output. these options cause an immediate, successful exit.) .TP .PD 0 -.BR "\-W lint" [ =\fIvalue\fR ] +.BR "\-l " [ \fIvalue\fR ] .TP .PD .BR \-\^\-lint [ =\fIvalue\fR ] @@ -334,7 +354,7 @@ only warnings about things that are actually invalid are issued. (This is not fully implemented yet.) .TP .PD 0 -.B "\-W lint\-old" +.B \-L .TP .PD .B \-\^\-lint\-old @@ -343,12 +363,31 @@ not portable to the original version of Unix .IR awk . .TP .PD 0 -.B "\-W non\-decimal\-data" +.B \-n .TP .PD .B "\-\^\-non\-decimal\-data" Recognize octal and hexadecimal values in input data. .I "Use this option with great caution!" +.TP +.PD 0 +.B \-N +.TP +.PD +.B \-\^\-use\-lc\-numeric +This forces +.I gawk +to use the locale's decimal point character when parsing input data. +Although the POSIX standard requires this behavior, and +.I gawk +does so when +.B \-\^\-posix +is in effect, the default is to follow traditional behavior and use a +period as the decimal point, even in locales where the period is not the +decimal point character. This option overrides the default behavior, +without the full draconian strictness of the +.B \-\^\-posix +option. .ig .\" This option is left undocumented, on purpose. .TP @@ -363,7 +402,34 @@ users. .. .TP .PD 0 -.B "\-W posix" +.B \-O +.TP +.PD +.B \-\^\-optimize +Enable optimizations upon the internal representation of the program. +Currently, this includes just simple constant-folding. The +.I gawk +maintainer hopes to add additional optimizations over time. +.TP +.PD 0 +\fB\-p \fR[\fIprof_file\fR] +.TP +.PD +\fB\-\^\-profile\fR[\fB=\fIprof_file\fR] +Send profiling data to +.IR prof_file . +The default is +.BR awkprof.out . +When run with +.IR gawk , +the profile is just a \*(lqpretty printed\*(rq version of the program. +When run with +.IR pgawk , +the profile contains execution counts of each statement in the program +in the left margin and function call counts for each user-defined function. +.TP +.PD 0 +.B \-P .TP .PD .B \-\^\-posix @@ -411,24 +477,7 @@ function is not available. .RE .TP .PD 0 -\fB\-W profile\fR[\fB=\fIprof_file\fR] -.TP -.PD -\fB\-\^\-profile\fR[\fB=\fIprof_file\fR] -Send profiling data to -.IR prof_file . -The default is -.BR awkprof.out . -When run with -.IR gawk , -the profile is just a \*(lqpretty printed\*(rq version of the program. -When run with -.IR pgawk , -the profile contains execution counts of each statement in the program -in the left margin and function call counts for each user-defined function. -.TP -.PD 0 -.B "\-W re\-interval" +.B \-r .TP .PD .B \-\^\-re\-interval @@ -444,50 +493,26 @@ Interval expressions were not traditionally available in the and .I egrep consistent with each other. -However, their use is likely -to break old \*(AK programs, so -.I gawk -only provides them if they are requested with this option, or when -.B \-\^\-posix -is specified. -.TP -.PD 0 -.BI "\-W source " program-text -.TP -.PD -.BI \-\^\-source " program-text" -Use -.I program-text -as \*(AK program source code. -This option allows the easy intermixing of library functions (used via the -.B \-f -and -.B \-\^\-file -options) with source code entered on the command line. -It is intended primarily for medium to large \*(AK programs used -in shell scripts. .TP .PD 0 -.B "\-W use\-lc\-numeric" +.BI \-S .TP .PD -.B \-\^\-use\-lc\-numeric -This forces -.I gawk -to use the locale's decimal point character when parsing input data. -Although the POSIX standard requires this behavior, and +.BI \-\^\-sandbox +Runs .I gawk -does so when -.B \-\^\-posix -is in effect, the default is to follow traditional behavior and use a -period as the decimal point, even in locales where the period is not the -decimal point character. This option overrides the default behavior, -without the full draconian strictness of the -.B \-\^\-posix -option. +in sandbox mode, disabling the +.B system +function, input redirection with +.BR getline , +output redirection with +.BR print "and " printf , +and dynamic extensions loading. +Command execution (through pipelines) is also disabled. +This effectively blocks a script from accessing local resources (except for the files specified on the command line). .TP .PD 0 -.B "\-W version" +.B \-V .TP .PD .B \-\^\-version @@ -621,6 +646,28 @@ Finally, after all the input is exhausted, executes the code in the .B END block(s) (if any). +.SS Command Line Directories +.PP +According to POSIX, files named on the +.I awk +command line must be +text files. The behavior is ``undefined'' if they are not. Most versions +of +.I awk +treat a directory on the command line as a fatal error. +.PP +.\" FIXME: VERSION!! +Starting with version 3.x of +.IR gawk , +a directory on the command line +produces a warning, but is otherwise skipped. If either of the +.B \-\^\-posix +or +.B \-\^\-traditional +options is given, then +.I gawk +reverts to +treating directories on the command line as a fatal error. .SH VARIABLES, RECORDS AND FIELDS \*(AK variables are dynamic; they come into existence when they are first used. Their values are either floating-point numbers or strings, @@ -698,9 +745,23 @@ splits up the record using the specified widths. The value of is ignored. Assigning a new value to .B FS +or +.B FPAT +overrides the use of +.BR FIELDWIDTHS . +.PP +Similarly, if the +.B FPAT +variable is set to a string representing a regular expression, +each field is made up of text that matches that regular expression. In +this case, the regular expression describes the fields themselves, +instead of the text that separates the fields. +Assigning a new value to +.BR FS +or +.B FIELDWIDTHS overrides the use of -.BR FIELDWIDTHS , -and restores the default behavior. +.BR FPAT . .PP Each field in the input record may be referenced by its position, .BR $1 , @@ -838,6 +899,20 @@ block .B FNR The input record number in the current input file. .TP +.B FPAT +A regular expression describing the contents of the +fields in a record. +When set, +.I gawk +parses the input into fields, where the fields match the +regular expression, instead of using the +value of the +.B FS +variable as the field separator. +See +.BR Fields , +above. +.TP .B FS The input field separator, a space by default. See .BR Fields , @@ -863,6 +938,7 @@ and the .BR gsub() , .BR index() , .BR match() , +.BR patsplit() , .BR split() , and .B sub() @@ -954,7 +1030,11 @@ system call. \fBPROCINFO["FS"]\fP \fB"FS"\fP if field splitting with .B FS -is in effect, or \fB"FIELDWIDTHS"\fP if field splitting with +is in effect, +\fB"FPAT"\fP if field splitting with +.B FPAT +is in effect, +or \fB"FIELDWIDTHS"\fP if field splitting with .B FIELDWIDTHS is in effect. .TP @@ -1141,6 +1221,8 @@ elements, .B ENVIRON elements and the elements of an array created by .B split() +or +.B patsplit() that are numeric strings. The basic idea is that .IR "user input" , @@ -1271,6 +1353,8 @@ and to the pattern-action statements themselves. .nf .B BEGIN .B END +.B BEGINFILE +.B ENDFILE .BI / "regular expression" / .I "relational expression" .IB pattern " && " pattern @@ -1308,6 +1392,24 @@ and .B END patterns cannot have missing action parts. .PP +.B BEGINFILE +and +.B ENDFILE +are additional special patterns whose bodies are executed +before reading the first record of each command line input file +and after reading the last record of each file. +Inside the +.B BEGINFILE +rule, the value of +.B ERRNO +will be the empty string if the file could be opened successfully. +Otherwise, there is some problem with the file and the code should +use +.B nextfile +to skip it. If that is not done, +.I gawk +will produce its usual fatal error for files that cannot be opened. +.PP For .BI / "regular expression" / patterns, the associated statement is executed for each input record that matches @@ -1432,12 +1534,6 @@ If there is one number followed by a comma, then is repeated at least .I n times. -.sp .5 -Interval expressions are only available if either -.B \-\^\-posix -or -.B \-\^\-re\-interval -is specified on the command line. .TP .B \ey matches the empty string at either the beginning or the @@ -1452,6 +1548,12 @@ matches the empty string at the beginning of a word. .B \e> matches the empty string at the end of a word. .TP +.B \es +matches any whitespace character. +.TP +.B \eS +matches any nonwhitespace character. +.TP .B \ew matches any word-constituent character (letter, digit, or underscore). .TP @@ -1594,6 +1696,8 @@ The .BR \eB , .BR \e< , .BR \e> , +.BR \es , +.BR \eS , .BR \ew , .BR \eW , .BR \e` , @@ -1613,7 +1717,6 @@ In the default case, .I gawk provide all the facilities of \*(PX regular expressions and the \*(GN regular expression operators described above. -However, interval expressions are not supported. .TP .B \-\^\-posix Only \*(PX regular expressions are supported, the \*(GN operators are not special. @@ -1621,7 +1724,6 @@ Only \*(PX regular expressions are supported, the \*(GN operators are not specia .B \ew matches a literal .BR w ). -Interval expressions are allowed. .TP .B \-\^\-traditional Traditional Unix @@ -1647,6 +1749,20 @@ Action statements consist of the usual assignment, conditional, and looping statements found in most languages. The operators, control statements, and input/output statements available are patterned after those in C. +.PP +.I gawk +accepts an additional control-flow statement not allowed in other +.I awk +versions: +.RS +.nf +\fBswitch (\fIexpression\fB) { +\fBcase \fIvalue\fB|\fIregex\fB : \fIstatement +\&.\^.\^. +\fR[ \fBdefault: \fIstatement \fR] +\fB}\fR +.fi +.RE .SS Operators .PP The operators in \*(AK, in order of decreasing precedence, are @@ -2192,9 +2308,16 @@ print "You blew it!" | "cat 1>&2" The following special filenames may be used with the .B |& co-process operator for creating TCP/IP network connections. -.TP "\w'\fB/inet/tcp/\fIlport\fB/\fIrhost\fB/\fIrport\fR'u+2n" +.TP +.PD 0 .BI /inet/tcp/ lport / rhost / rport -File for TCP/IP connection on local port +.TP +.PD 0 +.BI /inet4/tcp/ lport / rhost / rport +.TP +.PD +.BI /inet6/tcp/ lport / rhost / rport +Files for a TCP/IP connection on local port .I lport to remote host @@ -2204,57 +2327,36 @@ on remote port Use a port of .B 0 to have the system pick a port. +Use +.B /inet4 +to force an IPv4 connection, +and +.B /inet6 +to force an IPv6 connection. +Plain +.B /inet +uses the system default (most likely IPv4). .TP +.PD 0 .BI /inet/udp/ lport / rhost / rport +.TP +.PD 0 +.BI /inet4/udp/ lport / rhost / rport +.TP +.PD +.BI /inet6/udp/ lport / rhost / rport Similar, but use UDP/IP instead of TCP/IP. .TP +.PD 0 .BI /inet/raw/ lport / rhost / rport +.TP +.PD 0 +.BI /inet4/raw/ lport / rhost / rport +.TP +.PD +.BI /inet6/raw/ lport / rhost / rport .\" Similar, but use raw IP sockets. Reserved for future use. -.PP -Other special filenames provide access to information about the running -.I gawk -process. -.B "These filenames are now obsolete." -Use the -.B PROCINFO -array to obtain the information they provide. -The filenames are: -.TP "\w'\fB/dev/stdout\fR'u+1n" -.B /dev/pid -Reading this file returns the process ID of the current process, -in decimal, terminated with a newline. -.TP -.B /dev/ppid -Reading this file returns the parent process ID of the current process, -in decimal, terminated with a newline. -.TP -.B /dev/pgrpid -Reading this file returns the process group ID of the current process, -in decimal, terminated with a newline. -.TP -.B /dev/user -Reading this file returns a single record terminated with a newline. -The fields are separated with spaces. -.B $1 -is the value of the -.IR getuid (2) -system call, -.B $2 -is the value of the -.IR geteuid (2) -system call, -.B $3 -is the value of the -.IR getgid (2) -system call, and -.B $4 -is the value of the -.IR getegid (2) -system call. -If there are any additional fields, they are the group IDs returned by -.IR getgroups (2). -Multiple groups may not be supported on all systems. .SS Numeric Functions .PP \*(AK has the following built-in arithmetic functions: @@ -2489,11 +2591,51 @@ and provide the starting index in the string and length respectively, of each matching substring. .TP -\fBsplit(\fIs\fB, \fIa \fR[\fB, \fIr\fR]\fB)\fR +\fBpatsplit(\fIs\fB, \fIa \fR[\fB, \fIr\fR [\fB, \fIseps\fR] ]\fB)\fR +Splits the string +.I s +into the array +.I a +and the separators array +.I seps +on the regular expression +.IR r , +and returns the number of fields. +Element values are the portions of +.I s +that matched +.IR r . +The value of +.I seps[i] +is the separator that appeared in +front of +.IR a[i+1] . +If +.I r +is omitted, +.B FPAT +is used instead. +The arrays +.I a +and +.I seps +are cleared first. +.I seps[i] +is the field separator text between +.I a[i] +and +.IR a[i+1] . +Splitting behaves identically to field splitting with +.BR FPAT , +described above. +.TP +\fBsplit(\fIs\fB, \fIa \fR[\fB, \fIr\fR [\fB, \fIseps\fR] ]\fB)\fR Splits the string .I s into the array .I a +and the separators array +.I seps on the regular expression .IR r , and returns the number of fields. If @@ -2501,9 +2643,30 @@ and returns the number of fields. If is omitted, .B FS is used instead. -The array +The arrays .I a -is cleared first. +and +.I seps +are cleared first. +.I seps[i] +is the field separator matched by +.I r +between +.I a[i] +and +.IR a[i+1] . +If +.I r +is a single space, then leading whitespace in +.I s +goes into the extra array element +.I seps[0] +and trailing whitespace goes into the extra array element +.IR seps[n] , +where +.I n +is the return value of +.IR "split(s, a, r, seps)" . Splitting behaves identically to field splitting, described above. .TP .BI sprintf( fmt , " expr-list" ) @@ -2831,6 +2994,30 @@ to return a value from a function. The return value is undefined if no value is provided, or if the function returns by \*(lqfalling off\*(rq the end. .PP +As a +.I gawk +extension, functions may be called indirectly. To do this, assign +the name of the function to be called, as a string, to a variable. +Then use the variable as if it were the name of a function, prefixed with +an ``at'' sign, like so: +.RS +.ft B +.nf +function myfunc() +{ + print "myfunc called" + \&.\|.\|. +} + +{ .\|.\|. + the_func = "myfunc" + @the_func() # call through the_func to myfunc + .\|.\|. +} +.fi +.ft R +.RE +.PP If .B \-\^\-lint has been provided, @@ -2986,7 +3173,7 @@ functions in your program, as appropriate. .TP 4. Run -.B "gawk \-\^\-gen\-po \-f myprog.awk > myprog.po" +.B "gawk \-\^\-gen\-pot \-f myprog.awk > myprog.po" to generate a .B \&.po file for your program. @@ -3198,6 +3385,11 @@ variable and fixed-width field splitting. .TP \(bu The +.B FPAT +variable and field splitting based on field values. +.TP +\(bu +The .B PROCINFO array is not available. .\" I/O stuff @@ -3268,6 +3460,7 @@ The .BR lshift() , .BR mktime() , .BR or() , +.BR patsplit() , .BR rshift() , .BR strftime() , .BR strtonum() , @@ -3347,34 +3540,6 @@ This option should only be of interest to the maintainers, and may not even be compiled into .IR gawk . .. -.PP -If -.I gawk -is -.I configured -with the -.B \-\^\-enable\-switch -option to the -.I configure -command, then it accepts an additional control-flow statement: -.RS -.nf -\fBswitch (\fIexpression\fB) { -\fBcase \fIvalue\fB|\fIregex\fB : \fIstatement -\&.\^.\^. -\fR[ \fBdefault: \fIstatement \fR] -\fB}\fR -.fi -.RE -.PP -If -.I gawk -is configured with the -.B \-\^\-disable\-directories-fatal -option, then it will silently skip directories named on the command line. -Otherwise, it will do so only if invoked with the -.B \-\^\-traditional -option. .SH ENVIRONMENT VARIABLES The .B AWKPATH @@ -3493,7 +3658,7 @@ Fred Fish supplied support for the Amiga, and Martin Brown provided the BeOS port. Stephen Davies provided the original Tandem port, and Matthew Woehlke provided changes for Tandem's POSIX-compliant systems. -Ralf Wildenhues now maintains that port. +.SH Ralf Wildenhues now maintains that port. .PP See the .I README @@ -3501,10 +3666,10 @@ file in the .I gawk distribution for current information about maintainers and which ports are currently supported. -.SH VERSION INFORMATION +VERSION INFORMATION This man page documents .IR gawk , -version 3.1.8. +version 4.0. .SH BUG REPORTS If you find a bug in .IR gawk , diff --git a/doc/gawk.info b/doc/gawk.info index 447ecf7c..8b70bd43 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -106,6 +106,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) * Copying:: Your right to copy and distribute `gawk'. * GNU Free Documentation License:: The license for this Info file. +* next-edition:: next-edition. * Index:: Concept and Variable Index. * History:: The history of `gawk' and @@ -163,6 +164,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) * Command Line Field Separator:: Setting `FS' from the command-line. * Field Splitting Summary:: Some final points and a summary table. * Constant Size:: Reading constant width data. +* Splitting By Content:: Defining Fields By Content * Multiple Line:: Reading multi-line records. * Getline:: Reading files under explicit program control using the `getline' function. @@ -180,6 +182,9 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) * Getline Notes:: Important things to know about `getline'. * Getline Summary:: Summary of `getline' Variants. +* BEGINFILE/ENDFILE:: Two special patterns for advanced control. +* Command line directories:: What happens if you put a directory on the + command line. * Print:: The `print' statement. * Print Examples:: Simple examples of `print' statements. * Output Separators:: The output separators and how to change @@ -197,10 +202,11 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) `gawk' allows access to inherited file descriptors. * Special FD:: Special files for I/O. -* Special Process:: Special files for process information. * Special Network:: Special files for network communications. * Special Caveats:: Things to watch out for. * Close Files And Pipes:: Closing Input and Output Files and Pipes. +* Values:: Constants, Variables, and Regular + Expressions. * Constants:: String, numeric and regexp constants. * Scalar Constants:: Numeric and string constants. * Nondecimal-numbers:: What are octal and hex numbers. @@ -214,6 +220,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) advanced method of input. * Conversion:: The conversion of strings to numbers and vice versa. +* All Operators:: `gawk''s operators. * Arithmetic Ops:: Arithmetic operations (`+', `-', etc.) * Concatenation:: Concatenating strings. @@ -221,6 +228,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) field. * Increment Ops:: Incrementing the numeric value of a variable. +* Truth Values and Conditions:: Testing for true and false. * Truth Values:: What is ``true'' and what is ``false''. * Typing and Comparison:: How variables acquire types and how this affects comparison of numbers and strings @@ -272,6 +280,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) * Auto-set:: Built-in variables where `awk' gives you information. * ARGC and ARGV:: Ways to use `ARGC' and `ARGV'. +* Array Basics:: The basics of arrays. * Array Intro:: Introduction to Arrays * Reference to Elements:: How to examine one element of an array. * Assigning Elements:: How to change an element of an array. @@ -311,6 +320,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) * Function Caveats:: Things to watch out for. * Return Statement:: Specifying the value a function returns. * Dynamic Typing:: How variable types can change at runtime. +* Indirect Calls:: Choosing the function to call at runtime. * I18N and L10N:: Internationalization and Localization. * Explaining gettext:: How GNU `gettext' works. * Programmer i18n:: Features for the programmer. @@ -332,8 +342,8 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) * Other Arguments:: Input file names and variable assignments. * AWKPATH Variable:: Searching directories for `awk' programs. -* Obsolete:: Obsolete Options and/or features. * Exit Status:: `gawk''s exit status. +* Obsolete:: Obsolete Options and/or features. * Undocumented:: Undocumented Options and Features. * Known Bugs:: Known Bugs in `gawk'. * Library Names:: How to best name private global variables @@ -341,6 +351,8 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) * General Functions:: Functions that are of general use. * Nextfile Function:: Two implementations of a `nextfile' function. +* Strtonum Function:: A replacement for the built-in + `strtonum' function. * Assert Function:: A function for assertions in `awk' programs. * Round Function:: A function for rounding if `sprintf' @@ -410,14 +422,16 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) * PC Installation:: Installing and Compiling `gawk' on MS-DOS and OS/2. * PC Binary Installation:: Installing a prepared distribution. -* PC Compiling:: Compiling `gawk' for MS-DOS, Windows32, - and OS/2. -* PC Using:: Running `gawk' on MS-DOS, Windows32 and - OS/2. +* PC Compiling:: Compiling `gawk' for MS-DOS, + Windows32, and OS/2. * PC Dynamic:: Compiling `gawk' for dynamic libraries. +* PC Using:: Running `gawk' on MS-DOS, Windows32 + and OS/2. * Cygwin:: Building and running `gawk' for Cygwin. +* MSYS:: Using `gawk' In The MSYS + Environment. * VMS Installation:: Installing `gawk' on VMS. * VMS Compilation:: How to compile `gawk' under VMS. * VMS Installation Details:: How to install `gawk' under VMS. @@ -455,9 +469,12 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) * Basic Data Typing:: A very quick intro to data types. * Floating Point Issues:: Stuff to know about floating-point numbers. * String Conversion Precision:: The String Value Can Lie. -* Unexpected Results:: Floating Point Numbers Are Not - Abstract Numbers. +* Unexpected Results:: Floating Point Numbers Are Not Abstract + Numbers. * POSIX Floating Point Problems:: Standards Versus Existing Practice. +* unresolved:: unresolved. +* revision:: revision. +* consistency:: consistency. To Miriam, for making me complete. @@ -1018,13 +1035,14 @@ a significant pleasure. Ulrich Drepper, provided invaluable help and feedback for the design of the internationalization features. - Nelson Beebe, Antonio Colombo Scott Deifik, John H. DuBois III, -Darrel Hankerson, Michal Jaegermann, Ju"rgen Kahrs, Dave Pitts, Stepan -Kasal, Pat Rankin, Andrew Schorr, Corinna Vinschen, Anders Wallin, and -Eli Zaretskii (in alphabetical order) make up the current `gawk' "crack -portability team." Without their hard work and help, `gawk' would not -be nearly the fine program it is today. It has been and continues to -be a pleasure working with this team of fine people. + Nelson Beebe, Andreas Buening, Antonio Colombo, Scott Deifik, John +H. DuBois III, Darrel Hankerson, Michal Jaegermann, Ju"rgen Kahrs, Dave +Pitts, Stepan Kasal, Pat Rankin, Andrew Schorr, Corinna Vinschen, +Anders Wallin, and Eli Zaretskii (in alphabetical order) make up the +current `gawk' "crack portability team." Without their hard work and +help, `gawk' would not be nearly the fine program it is today. It has +been and continues to be a pleasure working with this team of fine +people. David and I would like to thank Brian Kernighan of Bell Laboratories for invaluable assistance during the testing and debugging of `gawk', @@ -1183,8 +1201,8 @@ MS-DOS, it is `Ctrl-z'.) As an example, the following program prints a friendly piece of advice (from Douglas Adams's `The Hitchhiker's Guide to the Galaxy'), -to keep you from worrying about the complexities of computer programming -(`BEGIN' is a feature we haven't discussed yet): +to keep you from worrying about the complexities of computer +programming(1) (`BEGIN' is a feature we haven't discussed yet): $ awk "BEGIN { print \"Don't Panic!\" }" -| Don't Panic! @@ -1192,7 +1210,7 @@ to keep you from worrying about the complexities of computer programming This program does not read any input. The `\' before each of the inner double quotes is necessary because of the shell's quoting rules--in particular because it mixes both single quotes and double -quotes.(1) +quotes.(2) This next simple `awk' program emulates the `cat' utility; it copies whatever you type on the keyboard to its standard output (why this @@ -1211,7 +1229,12 @@ works is explained shortly). ---------- Footnotes ---------- - (1) Although we generally recommend the use of single quotes around + (1) If you use `bash' as your shell, you should execute the command +`set +H' before running this program interactively, to disable the +`csh'-style command history, which treats `!' as a special character. +We recommend putting this command into your personal startup file. + + (2) Although we generally recommend the use of single quotes around the program text, double quotes are needed here in order to put the single quote into the message. @@ -1378,7 +1401,7 @@ File: gawk.info, Node: Quoting, Prev: Comments, Up: Running gawk * Menu: -* DOS Quoting:: Quoting in MS-DOS Batch Files. +* DOS Quoting:: Quoting in MS-DOS Batch Files. For short to medium length `awk' programs, it is most convenient to enter the program on the `awk' command line. This is best done by @@ -2360,16 +2383,18 @@ sequences and that are not listed in the table stand for themselves: They were added as part of the POSIX standard to make `awk' and `egrep' consistent with each other. - However, because old programs may use `{' and `}' in regexp - constants, by default `gawk' does _not_ match interval expressions - in regexps. If either `--posix' or `--re-interval' are specified - (*note Options::), then interval expressions are allowed in - regexps. + Initially, because old programs may use `{' and `}' in regexp + constants, `gawk' did _not_ match interval expressions in regexps. + + However, beginning with version 3.2 *(FIXME: version)* `gawk' does + match interval expressions by default. This is because + compatibility with POSIX has become more important to most `gawk' + users than compatibility with old programs. - For new programs that use `{' and `}' in regexp constants, it is - good practice to always escape them with a backslash. Then the - regexp constants are valid and work the way you want them to, using - any version of `awk'.(2) + For programs that use `{' and `}' in regexp constants, it is good + practice to always escape them with a backslash. Then the regexp + constants are valid and work the way you want them to, using any + version of `awk'.(2) In regular expressions, the `*', `+', and `?' operators, as well as the braces `{' and `}', have the highest precedence, followed by @@ -2503,6 +2528,14 @@ minor node and are specific to `gawk'; they are not available in other matching. For our purposes, a "word" is a sequence of one or more letters, digits, or underscores (`_'): +`\s' + Matches any whitespace character. Think of it as shorthand for + `[[:space:]]'. + +`\S' + Matches any character that is not whitespace. Think of it as + shorthand for `[^[:space:]]'. + `\w' Matches any word-constituent character--that is, it matches any letter, digit, or underscore. Think of it as shorthand for @@ -2558,7 +2591,7 @@ GNU `\b' appears to be the lesser of two evils. No options In the default case, `gawk' provides all the facilities of POSIX regexps and the GNU regexp operators described in *note Regexp - Operators::. However, interval expressions are not supported. + Operators::. `--posix' Only POSIX regexps are supported; the GNU operators are not special @@ -2574,10 +2607,9 @@ No options `gawk' silently skips directories named on the command line. `--re-interval' - Allow interval expressions in regexps, even if `--traditional' has - been provided. (`--posix' automatically enables interval - expressions, so `--re-interval' is redundant when `--posix' is is - used.) + Allow interval expressions in regexps, if `--traditional' has been + provided. Otherwise, interval expressions are available by + default. File: gawk.info, Node: Case-sensitivity, Next: Leftmost Longest, Prev: GNU Regexp Operators, Up: Regexp @@ -2850,9 +2882,13 @@ have to be named on the `awk' command line (*note Getline::). * Changing Fields:: Changing the Contents of a Field. * Field Separators:: The field separator and how to change it. * Constant Size:: Reading constant width data. +* Splitting By Content:: Defining Fields By Content * Multiple Line:: Reading multi-line records. * Getline:: Reading files under explicit program control using the `getline' function. +* BEGINFILE/ENDFILE:: Two special patterns for advanced control. +* Command line directories:: What happens if you put a directory on the + command line. File: gawk.info, Node: Records, Next: Fields, Up: Reading Files @@ -3277,8 +3313,9 @@ The intervening field, `$5', is created with an empty value (indicated by the second pair of adjacent colons), and `NF' is updated with the value six. - Decrementing `NF' throws away the values of the fields after the new -value of `NF' and recomputes `$0'. (d.c.) Here is an example: + *FIXME:* Verify that this is in POSIX. Decrementing `NF' throws +away the values of the fields after the new value of `NF' and +recomputes `$0'. (d.c.) Here is an example: $ echo a b c d e f | awk '{ print "NF =", NF; > NF = 3; print $0 }' @@ -3674,7 +3711,7 @@ effect. defined by the POSIX standard. -File: gawk.info, Node: Constant Size, Next: Multiple Line, Prev: Field Separators, Up: Reading Files +File: gawk.info, Node: Constant Size, Next: Splitting By Content, Prev: Field Separators, Up: Reading Files 3.6 Reading Fixed-Width Data ============================ @@ -3778,9 +3815,97 @@ restore the original settings (*note Passwd Functions::, for an example of such a function). -File: gawk.info, Node: Multiple Line, Next: Getline, Prev: Constant Size, Up: Reading Files +File: gawk.info, Node: Splitting By Content, Next: Multiple Line, Prev: Constant Size, Up: Reading Files + +3.7 Defining Fields By Content +============================== + +(This minor node discusses an advanced feature of `awk'. If you are a +novice `awk' user, you might want to skip it on the first reading.) + +Normally, when using `FS', `gawk' defines the fields as the parts of +the record that occur in between each field separator. In other words, +`FS' defines what a field _is not_, and not what a field _is_. +However, there are times when you really want to define the fields by +what they are, and not by what they are not. + + The most notorious such case is so-called Comma-Separated-Value +(CSV) data. Many spreadsheet programs, for example, can export their +data into text files, where each record is terminated with a newline, +and fields are separated by commas. If only commas separated the data, +there wouldn't be an issue. The problem comes when one of the fields +contains an _embedded_ comma. While there is no formal standard +specification for CSV data(1), in such cases, most programs embed the +field in double quotes. So we might have data like this: + + Robbins,Arnold,"1234 A Pretty Street, NE",MyTown,MyState,12345-6789,USA + + The `FPAT' variable offers a solution for cases like this. The +value of `FPAT' should be a string that provides a regular expression. +This regular expression describes the contents of each field. + + In the case of CSV data as presented above, each field is either +"anything that is not a comma," or "a double quote, anything that is +not a double quote, and a closing double quote." If written as a +regular expression constant (*note Regexp::), we would have +`/([^,]+)|("[^"]+")/'. Writing this as a string requires us to escape +the double quotes, leading to: + + FPAT = "([^,]+)|(\"[^\"]+\")" + + Putting this to use, here is a simple program to parse the data: + + BEGIN { + FPAT = "([^,]+)|(\"[^\"]+\")" + } + + { + print "NF = ", NF + for (i = 1; i <= NF; i++) { + printf("$%d = <%s>\n", i, $i) + } + } + + When run, we get the following: + + $ gawk -f simple-csv.awk addresses.csv + NF = 7 + $1 = <Robbins> + $2 = <Arnold> + $3 = <"1234 A Pretty Street, NE"> + $4 = <MyTown> + $5 = <MyState> + $6 = <12345-6789> + $7 = <USA> + + Note the embedded comma in the value of `$3'. + + A straightforward improvement when processing CSV data of this sort +would be to remove the quotes when they occur, with something like this: + + if (substr($i, 1, 1) == "\"") { + len = length($i) + $i = substr($i, 2, len - 2) # Get text within the two quotes + } + + As with `FS', the `IGNORECASE' variable (*note User-modified::) +affects field splitting with `FPAT'. + + NOTE: Some programs export CSV data that contains embedded + newlines between the double quotes. `gawk' provides no way to + deal with this. Since there is no formal specification for CSV + data, there isn't much more to be done; the `FPAT' mechanism + provides an elegant solution for the majority of cases, and the + `gawk' maintainer is satisfied with that. -3.7 Multiple-Line Records + ---------- Footnotes ---------- + + (1) At least, we don't know of one. + + +File: gawk.info, Node: Multiple Line, Next: Getline, Prev: Splitting By Content, Up: Reading Files + +3.8 Multiple-Line Records ========================= In some databases, a single line cannot conveniently hold all the @@ -3916,9 +4041,9 @@ feature of `RS' does not apply. It does apply to the default field separator of a single space: `FS = " "'. -File: gawk.info, Node: Getline, Prev: Multiple Line, Up: Reading Files +File: gawk.info, Node: Getline, Next: BEGINFILE/ENDFILE, Prev: Multiple Line, Up: Reading Files -3.8 Explicit Input with `getline' +3.9 Explicit Input with `getline' ================================= So far we have been getting our input data from `awk''s main input @@ -3943,6 +4068,9 @@ describing the error that occurred. In the following examples, COMMAND stands for a string value that represents a shell command. + NOTE: When `--sandbox' is specified, reading lines from files, + pipes and coprocesses is disabled. + * Menu: * Plain Getline:: Using `getline' with no arguments. @@ -3962,7 +4090,7 @@ represents a shell command. File: gawk.info, Node: Plain Getline, Next: Getline/Variable, Up: Getline -3.8.1 Using `getline' with No Arguments +3.9.1 Using `getline' with No Arguments --------------------------------------- The `getline' command can be used without arguments to read input from @@ -4014,7 +4142,7 @@ value of `$0'. File: gawk.info, Node: Getline/Variable, Next: Getline/File, Prev: Plain Getline, Up: Getline -3.8.2 Using `getline' into a Variable +3.9.2 Using `getline' into a Variable ------------------------------------- You can use `getline VAR' to read the next record from `awk''s input @@ -4055,7 +4183,7 @@ not change. File: gawk.info, Node: Getline/File, Next: Getline/Variable/File, Prev: Getline/Variable, Up: Getline -3.8.3 Using `getline' from a File +3.9.3 Using `getline' from a File --------------------------------- Use `getline < FILE' to read the next record from FILE. Here FILE is a @@ -4088,7 +4216,7 @@ other `awk' implementations. File: gawk.info, Node: Getline/Variable/File, Next: Getline/Pipe, Prev: Getline/File, Up: Getline -3.8.4 Using `getline' into a Variable from a File +3.9.4 Using `getline' into a Variable from a File ------------------------------------------------- Use `getline VAR < FILE' to read input from the file FILE, and put it @@ -4132,7 +4260,7 @@ regular expression. File: gawk.info, Node: Getline/Pipe, Next: Getline/Variable/Pipe, Prev: Getline/Variable/File, Up: Getline -3.8.5 Using `getline' from a Pipe +3.9.5 Using `getline' from a Pipe --------------------------------- The output of a command can also be piped into `getline', using @@ -4200,7 +4328,7 @@ other `awk' implementations. File: gawk.info, Node: Getline/Variable/Pipe, Next: Getline/Coprocess, Prev: Getline/Pipe, Up: Getline -3.8.6 Using `getline' into a Variable from a Pipe +3.9.6 Using `getline' into a Variable from a Pipe ------------------------------------------------- When you use `COMMAND | getline VAR', the output of COMMAND is sent @@ -4227,7 +4355,7 @@ portable to other `awk' implementations. File: gawk.info, Node: Getline/Coprocess, Next: Getline/Variable/Coprocess, Prev: Getline/Variable/Pipe, Up: Getline -3.8.7 Using `getline' from a Coprocess +3.9.7 Using `getline' from a Coprocess -------------------------------------- Input into `getline' from a pipe is a one-way operation. The command @@ -4257,7 +4385,7 @@ where coprocesses are discussed in more detail. File: gawk.info, Node: Getline/Variable/Coprocess, Next: Getline Notes, Prev: Getline/Coprocess, Up: Getline -3.8.8 Using `getline' into a Variable from a Coprocess +3.9.8 Using `getline' into a Variable from a Coprocess ------------------------------------------------------ When you use `COMMAND |& getline VAR', the output from the coprocess @@ -4275,7 +4403,7 @@ where coprocesses are discussed in more detail. File: gawk.info, Node: Getline Notes, Next: Getline Summary, Prev: Getline/Variable/Coprocess, Up: Getline -3.8.9 Points to Remember About `getline' +3.9.9 Points to Remember About `getline' ---------------------------------------- Here are some miscellaneous points about `getline' that you should bear @@ -4309,7 +4437,7 @@ in mind: File: gawk.info, Node: Getline Summary, Prev: Getline Notes, Up: Getline -3.8.10 Summary of `getline' Variants +3.9.10 Summary of `getline' Variants ------------------------------------ *note table-getline-variants:: summarizes the eight variants of @@ -4329,6 +4457,84 @@ COMMAND `|& getline' VAR Sets VAR. This is a `gawk' extension Table 3.1: getline Variants and What They Set +File: gawk.info, Node: BEGINFILE/ENDFILE, Next: Command line directories, Prev: Getline, Up: Reading Files + +3.10 The `BEGINFILE' and `ENDFILE' Special Patterns +=================================================== + +*FIXME:* Get the version right. + + NOTE: This minor node describes a `gawk'-specific feature added in + `gawk' 3.X. + + Two special kinds of rule, `BEGINFILE' and `ENDFILE', give you +"hooks" into `gawk''s command-line file processing loop. As with the +`BEGIN' and `END' rules (*note BEGIN/END::), all `BEGINFILE' rules in a +program are merged, in the order they are read by `gawk', and all +`ENDFILE' rules are merged as well. + + The body of the `BEGINFILE' rules is executed just before `gawk' +reads the first record from a file. `FILENAME' is set to the name of +the current file, and `FNR' is set to zero. + + The `BEGINFILE' rule provides you the opportunity for two tasks that +would otherwise be difficult or impossible to perform: + + 1. You can test if the file is readable. Normally, it is a fatal + error if a file named on the command line cannot be opened for + reading. However, you can bypass the fatal error and move on to + the next file on the command line. + + You do this by checking if the `ERRNO' variable is not the empty + string; if so, then `gawk' was not able to open the file. In this + case, your program can execute the `nextfile' statement (*note + Nextfile Statement::). This casuses `gawk' to skip the file + entirely. Otherwise, `gawk' will exit with the usual fatal error. + + 2. If you have written extensions that modify the record handling (by + inserting an "open hook"), you can invoke them at this point, + before `gawk' has started processing the file. (This is a _very_ + advanced feature, currently used only by the XMLgawk project + (http://xgawk.sourceforge.net).) + + The `ENDFILE' rule is called when `gawk' has finished processing the +last record in an input file. It will be called before any `END' rules. + + Normally, when an error occurs when reading input in the normal +input processing loop, the error is fatal. However, if an `ENDFILE' +rule is present, the error becomes non-fatal, and instead `ERRNO' is +set. This makes it possible to catch and process I/O errors at the +level of the `awk' program. + + The `next' statement is not allowed inside either a `BEGINFILE' or +and `ENDFILE' rule. The `nextfile' statement is allowed only inside a +`BEGINFILE' rule, but not inside an `ENDFILE' rule. + + The `getline' statement (*note Getline::) is restricted inside both +`BEGINFILE' and `ENDFILE'. Only the `getline VARIABLE < FILE' form is +allowed. + + `BEGINFILE' and `ENDFILE' are `gawk' extensions. In most other +`awk' implementations, or if `gawk' is in compatibility mode (*note +Options::), they are not special. + + +File: gawk.info, Node: Command line directories, Prev: BEGINFILE/ENDFILE, Up: Reading Files + +3.11 Directories On The Command Line +==================================== + +According to POSIX, files named on the `awk' command line must be text +files. The behavior is "undefined" if they are not. Most versions of +`awk' treat a directory on the command line as a fatal error. + + *FIXME:* Get the version right. Starting with version 3.x of +`gawk', a directory on the command line produces a warning, but is +otherwise skipped. If either of the `--posix' or `--traditional' +options is given, then `gawk' reverts to treating directories on the +command line as a fatal error. + + File: gawk.info, Node: Printing, Next: Expressions, Prev: Reading Files, Up: Top 4 Printing Output @@ -4952,6 +5158,9 @@ So far, the output from `print' and `printf' has gone to the standard output, usually the terminal. Both `print' and `printf' can also send their output to other places. This is called "redirection". + NOTE: When `--sandbox' is specified, redirecting output to files + and pipes is disabled. + A redirection appears after the `print' or `printf' statement. Redirections in `awk' are written just like redirections in shell commands, except that they are written inside the `awk' program. @@ -5110,12 +5319,11 @@ descriptors, process-related information, and TCP/IP networking. * Menu: * Special FD:: Special files for I/O. -* Special Process:: Special files for process information. * Special Network:: Special files for network communications. * Special Caveats:: Things to watch out for. -File: gawk.info, Node: Special FD, Next: Special Process, Up: Special Files +File: gawk.info, Node: Special FD, Next: Special Network, Up: Special Files 4.7.1 Special Files for Standard Descriptors -------------------------------------------- @@ -5181,88 +5389,33 @@ error message in a `gawk' program is to use `/dev/stderr', like this: redirection, the value must be a string. It is a common error to omit the quotes, which leads to confusing results. - -File: gawk.info, Node: Special Process, Next: Special Network, Prev: Special FD, Up: Special Files - -4.7.2 Special Files for Process-Related Information ---------------------------------------------------- - -`gawk' also provides special file names that give access to information -about the running `gawk' process. Each of these "files" provides a -single record of information. To read them more than once, they must -first be closed with the `close' function (*note Close Files And -Pipes::). The file names are: - -`/dev/pid' - Reading this file returns the process ID of the current process, - in decimal form, terminated with a newline. - -`/dev/ppid' - Reading this file returns the parent process ID of the current - process, in decimal form, terminated with a newline. - -`/dev/pgrpid' - Reading this file returns the process group ID of the current - process, in decimal form, terminated with a newline. - -`/dev/user' - Reading this file returns a single record terminated with a - newline. The fields are separated with spaces. The fields - represent the following information: - - `$1' - The return value of the `getuid' system call (the real user - ID number). - - `$2' - The return value of the `geteuid' system call (the effective - user ID number). - - `$3' - The return value of the `getgid' system call (the real group - ID number). - - `$4' - The return value of the `getegid' system call (the effective - group ID number). - - If there are any additional fields, they are the group IDs - returned by the `getgroups' system call. (Multiple groups may not - be supported on all systems.) - - These special file names may be used on the command line as data -files, as well as for I/O redirections within an `awk' program. They -may not be used as source files with the `-f' option. - - NOTE: The special files that provide process-related information - are now considered obsolete and will disappear entirely in the - next release of `gawk'. `gawk' prints a warning message every - time you use one of these files. To obtain process-related - information, use the `PROCINFO' array. *Note Auto-set::. + Finally, usng the `close' function on a file name of the form +`"/dev/fd/N"', for file descriptor numbers above two, will actually +close the given file descriptor. -File: gawk.info, Node: Special Network, Next: Special Caveats, Prev: Special Process, Up: Special Files +File: gawk.info, Node: Special Network, Next: Special Caveats, Prev: Special FD, Up: Special Files -4.7.3 Special Files for Network Communications +4.7.2 Special Files for Network Communications ---------------------------------------------- -Starting with version 3.1 of `gawk', `awk' programs can open a two-way -TCP/IP connection, acting as either a client or a server. This is done -using a special file name of the form: +`awk' programs can open a two-way TCP/IP connection, acting as either a +client or a server. This is done using a special file name of the form: - `/inet/PROTOCOL/LOCAL-PORT/REMOTE-HOST/REMOTE-PORT' + `/NET-TYPE/PROTOCOL/LOCAL-PORT/REMOTE-HOST/REMOTE-PORT' - The PROTOCOL is one of `tcp', `udp', or `raw', and the other fields -represent the other essential pieces of information for making a -networking connection. These file names are used with the `|&' -operator for communicating with a coprocess (*note Two-way I/O::). -This is an advanced feature, mentioned here only for completeness. -Full discussion is delayed until *note TCP/IP Networking::. + The NET-TYPE is one of `inet', `inet4' or `inet6' The PROTOCOL is +one of `tcp', `udp', or `raw', and the other fields represent the other +essential pieces of information for making a networking connection. +These file names are used with the `|&' operator for communicating with +a coprocess (*note Two-way I/O::). This is an advanced feature, +mentioned here only for completeness. Full discussion is delayed until +*note TCP/IP Networking::. File: gawk.info, Node: Special Caveats, Prev: Special Network, Up: Special Files -4.7.4 Special File Name Caveats +4.7.3 Special File Name Caveats ------------------------------- Here is a list of things to bear in mind when using the special file @@ -5479,34 +5632,35 @@ operators. * Menu: +* Values:: Constants, Variables, and Regular Expressions. +* All Operators:: `gawk''s operators. +* Truth Values and Conditions:: Testing for true and false. +* Function Calls:: A function call is an expression. +* Precedence:: How various operators nest. + + +File: gawk.info, Node: Values, Next: All Operators, Up: Expressions + +5.1 Constants, Variables and Conversions +======================================== + +Expressions are built up from values and the operations performed upon +them. This minor node describes the elementary objects which provide +values used in expressions. + +* Menu: + * Constants:: String, numeric and regexp constants. * Using Constant Regexps:: When and how to use a regexp constant. * Variables:: Variables give names to values for later use. * Conversion:: The conversion of strings to numbers and vice versa. -* Arithmetic Ops:: Arithmetic operations (`+', `-', - etc.) -* Concatenation:: Concatenating strings. -* Assignment Ops:: Changing the value of a variable or a field. -* Increment Ops:: Incrementing the numeric value of a variable. -* Truth Values:: What is ``true'' and what is ``false''. -* Typing and Comparison:: How variables acquire types and how this - affects comparison of numbers and strings with - `<', etc. -* Boolean Ops:: Combining comparison expressions using boolean - operators `||' (``or''), `&&' - (``and'') and `!' (``not''). -* Conditional Exp:: Conditional expressions select between two - subexpressions under control of a third - subexpression. -* Function Calls:: A function call is an expression. -* Precedence:: How various operators nest. -File: gawk.info, Node: Constants, Next: Using Constant Regexps, Up: Expressions +File: gawk.info, Node: Constants, Next: Using Constant Regexps, Up: Values -5.1 Constant Expressions -======================== +5.1.1 Constant Expressions +-------------------------- The simplest type of expression is the "constant", which always has the same value. There are three types of constants: numeric, string, and @@ -5525,8 +5679,8 @@ forms, but are stored identically internally. File: gawk.info, Node: Scalar Constants, Next: Nondecimal-numbers, Up: Constants -5.1.1 Numeric and String Constants ----------------------------------- +5.1.1.1 Numeric and String Constants +.................................... A "numeric constant" stands for a number. This number can be an integer, a decimal fraction, or a number in scientific (exponential) @@ -5557,8 +5711,8 @@ these are in IEEE 754 standard format. File: gawk.info, Node: Nondecimal-numbers, Next: Regexp Constants, Prev: Scalar Constants, Up: Constants -5.1.2 Octal and Hexadecimal Numbers ------------------------------------ +5.1.1.2 Octal and Hexadecimal Numbers +..................................... In `awk', all numbers are in decimal; i.e., base 10. Many other programming languages allow you to specify numbers in other bases, often @@ -5632,20 +5786,20 @@ for conversion of numbers to strings: File: gawk.info, Node: Regexp Constants, Prev: Nondecimal-numbers, Up: Constants -5.1.3 Regular Expression Constants ----------------------------------- +5.1.1.3 Regular Expression Constants +.................................... A regexp constant is a regular expression description enclosed in slashes, such as `/^beginning and end$/'. Most regexps used in `awk' programs are constant, but the `~' and `!~' matching operators can also -match computed or "dynamic" regexps (which are just ordinary strings or +match computed or dynamic regexps (which are just ordinary strings or variables that contain a regexp). -File: gawk.info, Node: Using Constant Regexps, Next: Variables, Prev: Constants, Up: Expressions +File: gawk.info, Node: Using Constant Regexps, Next: Variables, Prev: Constants, Up: Values -5.2 Using Regular Expression Constants -====================================== +5.1.2 Using Regular Expression Constants +---------------------------------------- When used on the righthand side of the `~' or `!~' operators, a regexp constant merely stands for the regexp that is to be matched. However, @@ -5719,10 +5873,10 @@ used as a parameter to a user-defined function, since passing a truth value in this way is probably not what was intended. -File: gawk.info, Node: Variables, Next: Conversion, Prev: Using Constant Regexps, Up: Expressions +File: gawk.info, Node: Variables, Next: Conversion, Prev: Using Constant Regexps, Up: Values -5.3 Variables -============= +5.1.3 Variables +--------------- Variables are ways of storing values at one point in your program for use later in another part of your program. They can be manipulated @@ -5739,8 +5893,8 @@ on the `awk' command line. File: gawk.info, Node: Using Variables, Next: Assignment Options, Up: Variables -5.3.1 Using Variables in a Program ----------------------------------- +5.1.3.1 Using Variables in a Program +.................................... Variables let you give names to values and refer to them later. Variables have already been used in many of the examples. The name of @@ -5751,7 +5905,8 @@ it may not begin with a digit. Case is significant in variable names; A variable name is a valid expression by itself; it represents the variable's current value. Variables are given new values with "assignment operators", "increment operators", and "decrement -operators". *Note Assignment Ops::. +operators". *Note Assignment Ops::. *FIXME: NEXT ED:* Can also be +changed by sub, gsub, split. A few variables have special built-in meanings, such as `FS' (the field separator), and `NF' (the number of fields in the current input @@ -5771,8 +5926,8 @@ do in C and in most other traditional languages. File: gawk.info, Node: Assignment Options, Prev: Using Variables, Up: Variables -5.3.2 Assigning Variables on the Command Line ---------------------------------------------- +5.1.3.2 Assigning Variables on the Command Line +............................................... Any `awk' variable can be set by including a "variable assignment" among the arguments on the command line when `awk' is invoked (*note @@ -5817,10 +5972,10 @@ processes the values of command-line assignments for escape sequences (*note Escape Sequences::). (d.c.) -File: gawk.info, Node: Conversion, Next: Arithmetic Ops, Prev: Variables, Up: Expressions +File: gawk.info, Node: Conversion, Prev: Variables, Up: Values -5.4 Conversion of Strings and Numbers -===================================== +5.1.4 Conversion of Strings and Numbers +--------------------------------------- Strings are converted to numbers and numbers are converted to strings, if the context of the `awk' program demands it. For example, if the @@ -5948,10 +6103,27 @@ presented in *note POSIX Floating Point Problems::. doubt that you need to worry about this. -File: gawk.info, Node: Arithmetic Ops, Next: Concatenation, Prev: Conversion, Up: Expressions +File: gawk.info, Node: All Operators, Next: Truth Values and Conditions, Prev: Values, Up: Expressions -5.5 Arithmetic Operators -======================== +5.2 Operators: Doing Something With Values +========================================== + +This minor node introduces the "operators" which make use of the values +provided by constants and variables. + +* Menu: + +* Arithmetic Ops:: Arithmetic operations (`+', `-', + etc.) +* Concatenation:: Concatenating strings. +* Assignment Ops:: Changing the value of a variable or a field. +* Increment Ops:: Incrementing the numeric value of a variable. + + +File: gawk.info, Node: Arithmetic Ops, Next: Concatenation, Up: All Operators + +5.2.1 Arithmetic Operators +-------------------------- The `awk' language uses the common arithmetic operators when evaluating expressions. All of these arithmetic operators follow normal @@ -6033,10 +6205,10 @@ be machine-dependent. operator. -File: gawk.info, Node: Concatenation, Next: Assignment Ops, Prev: Arithmetic Ops, Up: Expressions +File: gawk.info, Node: Concatenation, Next: Assignment Ops, Prev: Arithmetic Ops, Up: All Operators -5.6 String Concatenation -======================== +5.2.2 String Concatenation +-------------------------- It seemed like a good idea at the time. Brian Kernighan @@ -6120,10 +6292,10 @@ Otherwise, you're never quite sure what you'll get. not rely on this. -File: gawk.info, Node: Assignment Ops, Next: Increment Ops, Prev: Concatenation, Up: Expressions +File: gawk.info, Node: Assignment Ops, Next: Increment Ops, Prev: Concatenation, Up: All Operators -5.7 Assignment Expressions -========================== +5.2.3 Assignment Expressions +---------------------------- An "assignment" is an expression that stores a (usually different) value into a variable. For example, let's assign the value one to the @@ -6285,10 +6457,10 @@ A workaround is: versions described in *note Other Versions::. -File: gawk.info, Node: Increment Ops, Next: Truth Values, Prev: Assignment Ops, Up: Expressions +File: gawk.info, Node: Increment Ops, Prev: Assignment Ops, Up: All Operators -5.8 Increment and Decrement Operators -===================================== +5.2.4 Increment and Decrement Operators +--------------------------------------- "Increment" and "decrement operators" increase or decrease the value of a variable by one. An assignment operator can do the same thing, so @@ -6371,10 +6543,34 @@ not anything that you can rely upon for portability. You should avoid such things in your own programs. -File: gawk.info, Node: Truth Values, Next: Typing and Comparison, Prev: Increment Ops, Up: Expressions +File: gawk.info, Node: Truth Values and Conditions, Next: Function Calls, Prev: All Operators, Up: Expressions -5.9 True and False in `awk' -=========================== +5.3 Truth Values and Conditions +=============================== + +In certain contexts, expression values also serve as "truth values;" +i.e., they determine what should happen next as the program runs. This +minor node describes how `awk' defines "true" and "false" and how +values are compared. + +* Menu: + +* Truth Values:: What is ``true'' and what is ``false''. +* Typing and Comparison:: How variables acquire types and how this + affects comparison of numbers and strings with + `<', etc. +* Boolean Ops:: Combining comparison expressions using boolean + operators `||' (``or''), `&&' + (``and'') and `!' (``not''). +* Conditional Exp:: Conditional expressions select between two + subexpressions under control of a third + subexpression. + + +File: gawk.info, Node: Truth Values, Next: Typing and Comparison, Up: Truth Values and Conditions + +5.3.1 True and False in `awk' +----------------------------- Many programming languages have a special representation for the concepts of "true" and "false." Such languages usually use the special @@ -6399,10 +6595,10 @@ the string constant `"0"' is actually true, because it is non-null. (d.c.) -File: gawk.info, Node: Typing and Comparison, Next: Boolean Ops, Prev: Truth Values, Up: Expressions +File: gawk.info, Node: Typing and Comparison, Next: Boolean Ops, Prev: Truth Values, Up: Truth Values and Conditions -5.10 Variable Typing and Comparison Expressions -=============================================== +5.3.2 Variable Typing and Comparison Expressions +------------------------------------------------ The Guide is definitive. Reality is frequently inaccurate. The Hitchhiker's Guide to the Galaxy @@ -6420,8 +6616,8 @@ are typed, and how `awk' compares variables. File: gawk.info, Node: Variable Typing, Next: Comparison Operators, Up: Typing and Comparison -5.10.1 String Type Versus Numeric Type --------------------------------------- +5.3.2.1 String Type Versus Numeric Type +....................................... The 1992 POSIX standard introduced the concept of a "numeric string", which is simply a string that looks like a number--for example, @@ -6516,8 +6712,8 @@ rules for typing and comparison are the same as just described for File: gawk.info, Node: Comparison Operators, Prev: Variable Typing, Up: Typing and Comparison -5.10.2 Comparison Operators ---------------------------- +5.3.2.2 Comparison Operators +............................ "Comparison expressions" compare strings or numbers for relationships such as equality. They are written using "relational operators", which @@ -6626,10 +6822,10 @@ abbreviation for the following comparison expression: Constant Regexps::, where this is discussed in more detail. -File: gawk.info, Node: Boolean Ops, Next: Conditional Exp, Prev: Typing and Comparison, Up: Expressions +File: gawk.info, Node: Boolean Ops, Next: Conditional Exp, Prev: Typing and Comparison, Up: Truth Values and Conditions -5.11 Boolean Expressions -======================== +5.3.3 Boolean Expressions +------------------------- A "Boolean expression" is a combination of comparison expressions or matching expressions, using the Boolean operators "or" (`||'), "and" @@ -6722,10 +6918,10 @@ back to false.(1) would you fix it? -File: gawk.info, Node: Conditional Exp, Next: Function Calls, Prev: Boolean Ops, Up: Expressions +File: gawk.info, Node: Conditional Exp, Prev: Boolean Ops, Up: Truth Values and Conditions -5.12 Conditional Expressions -============================ +5.3.4 Conditional Expressions +----------------------------- A "conditional expression" is a special kind of expression that has three operands. It allows you to use one expression's value to select @@ -6762,10 +6958,10 @@ using backslash continuation (*note Statements/Lines::). If `--posix' is specified (*note Options::), then this extension is disabled. -File: gawk.info, Node: Function Calls, Next: Precedence, Prev: Conditional Exp, Up: Expressions +File: gawk.info, Node: Function Calls, Next: Precedence, Prev: Truth Values and Conditions, Up: Expressions -5.13 Function Calls -=================== +5.4 Function Calls +================== A "function" is a name for a particular calculation. This enables you to ask for it by name at any point in the program. For example, the @@ -6810,6 +7006,11 @@ omitted in calls to user-defined functions, then those arguments are treated as local variables and initialized to the empty string (*note User-defined::). + As an advanced feature, `gawk' provides indirect function calls, +which is a way to choose the function to call at runtime, instead of +when you write the source code to your program. We defer discussion of +this feature until later; *Note Indirect Calls::. + Like every other expression, the function call has a value, which is computed by the function based on the arguments you give it. In this example, the value of `sqrt(ARGUMENT)' is the square root of ARGUMENT. @@ -6850,8 +7051,8 @@ Here is a sample run: File: gawk.info, Node: Precedence, Prev: Function Calls, Up: Expressions -5.14 Operator Precedence (How Operators Nest) -============================================= +5.5 Operator Precedence (How Operators Nest) +============================================ "Operator precedence" determines how operators are grouped when different operators appear close by in one expression. For example, @@ -7664,15 +7865,9 @@ File: gawk.info, Node: Switch Statement, Next: Break Statement, Prev: For Sta 6.4.5 The `switch' Statement ---------------------------- - NOTE: This node describes an experimental feature added in `gawk' - 3.1.3. It is _not_ enabled by default. To enable it, use the - `--enable-switch' option to `configure' when `gawk' is being - configured and built. *Note Additional Configuration Options::, - for more information. - - The `switch' statement allows the evaluation of an expression and -the execution of statements based on a `case' match. Case statements -are checked for a match in the order they are defined. If no suitable +The `switch' statement allows the evaluation of an expression and the +execution of statements based on a `case' match. Case statements are +checked for a match in the order they are defined. If no suitable `case' is found, the `default' section is executed, if supplied. Each `case' contains a single constant, be it numeric, string, or @@ -7719,6 +7914,9 @@ is executed and then falls through into the `default' section, executing its `print' statement. In turn, the -1 case will also be executed since the `default' does not halt execution. + This feature is a `gawk' extension, and is not available in POSIX +`awk'. + File: gawk.info, Node: Break Statement, Next: Continue Statement, Prev: Switch Statement, Up: Statements @@ -7931,6 +8129,9 @@ appeared after `next', `file' was a keyword; otherwise, it was a regular identifier. The old usage is no longer accepted; `next file' generates a syntax error. + The `nextfile' statement has a special purpose when used inside a +`BEGINFILE' rule; see *note BEGINFILE/ENDFILE::. + File: gawk.info, Node: Exit Statement, Prev: Nextfile Statement, Up: Statements @@ -8049,13 +8250,24 @@ specific to `gawk' are marked with a pound sign (`#'). `FIELDWIDTHS #' This is a space-separated list of columns that tells `gawk' how to split input with fixed columnar boundaries. Assigning a value to - `FIELDWIDTHS' overrides the use of `FS' for field splitting. - *Note Constant Size::, for more information. + `FIELDWIDTHS' overrides the use of `FS' and `FPAT' for field + splitting. *Note Constant Size::, for more information. If `gawk' is in compatibility mode (*note Options::), then `FIELDWIDTHS' has no special meaning, and field-splitting operations occur based exclusively on the value of `FS'. +`FPAT #' + This is a regular expression (as a string) that tells `gawk' to + create the fields based on text that matches the regular + expression. Assigning a value to `FPAT' overrides the use of `FS' + and `FIELDWIDTHS' for field splitting. *Note Splitting By + Content::, for more information. + + If `gawk' is in compatibility mode (*note Options::), then `FPAT' + has no special meaning, and field-splitting operations occur based + exclusively on the value of `FS'. + `FS' This is the input field separator (*note Field Separators::). The value is a single-character string or a multi-character regular @@ -8063,7 +8275,8 @@ specific to `gawk' are marked with a pound sign (`#'). record. If the value is the null string (`""'), then each character in the record becomes a separate field. (This behavior is a `gawk' extension. POSIX `awk' does not specify the behavior - when `FS' is the null string.) + when `FS' is the null string.) *FIXME: NEXT ED:* Mark as common + extension. The default value is `" "', a string consisting of a single space. As a special exception, this value means that any sequence of @@ -8244,12 +8457,17 @@ with a pound sign (`#'). during a read for `getline', or during a `close' operation, then `ERRNO' contains a string describing the error. - `ERRNO' works similarly to the C variable `errno'. In particular - `gawk' _never_ clears it (sets it to zero or `""'). Thus, you - should only expect its value to be meaningful when an I/O - operation returns a failure value, such as `getline' returning -1. - You are, of course, free to clear it yourself before doing an I/O - operation. + *FIXME:* Get the version right. Starting with version 3.X, `gawk' + clears `ERRNO' before opening each command line input file. This + enables checking if the file is readable inside a `BEGINFILE' + pattern (*note BEGINFILE/ENDFILE::). + + Otherwise, `ERRNO' works similarly to the C variable `errno'. + Except for the case just mentioned, `gawk' _never_ clears it (sets + it to zero or `""'). Thus, you should only expect its value to be + meaningful when an I/O operation returns a failure value, such as + `getline' returning -1. You are, of course, free to clear it + yourself before doing an I/O operation. This variable is a `gawk' extension. In other `awk' implementations, or if `gawk' is in compatibility mode (*note @@ -8298,9 +8516,10 @@ with a pound sign (`#'). The value of the `geteuid' system call. `PROCINFO["FS"]' - This is `"FS"' if field splitting with `FS' is in effect, or - it is `"FIELDWIDTHS"' if field splitting with `FIELDWIDTHS' - is in effect. + This is `"FS"' if field splitting with `FS' is in effect, + `"FIELDWIDTHS"' if field splitting with `FIELDWIDTHS' is in + effect, or it is `"FPAT"' if field matching with `FPAT' is in + effect. `PROCINFO["gid"]' The value of the `getgid' system call. @@ -8441,7 +8660,8 @@ elements from `ARGV' (*note Delete::). actual processing of the input begins. *Note Split Program::, and see *note Tee Program::, for examples of each way of removing elements from `ARGV'. The following fragment processes `ARGV' in order to examine, -and then remove, command-line options: +and then remove, command-line options: *FIXME: NEXT ED:* Add xref to +rewind() function. BEGIN { for (i = 1; i < ARGC; i++) { @@ -8500,13 +8720,7 @@ cannot have a variable and an array with the same name in the same * Menu: -* Array Intro:: Introduction to Arrays -* Reference to Elements:: How to examine one element of an array. -* Assigning Elements:: How to change an element of an array. -* Array Example:: Basic Example of an Array -* Scanning an Array:: A variation of the `for' statement. It - loops through the indices of an array's - existing elements. +* Array Basics:: The basics of arrays. * Delete:: The `delete' statement removes an element from an array. * Numeric Array Subscripts:: How to use numbers as subscripts in @@ -8514,14 +8728,32 @@ cannot have a variable and an array with the same name in the same * Uninitialized Subscripts:: Using Uninitialized variables as subscripts. * Multi-dimensional:: Emulating multidimensional arrays in `awk'. -* Multi-scanning:: Scanning multidimensional arrays. * Array Sorting:: Sorting array values and indices. -File: gawk.info, Node: Array Intro, Next: Reference to Elements, Up: Arrays +File: gawk.info, Node: Array Basics, Next: Delete, Up: Arrays -7.1 Introduction to Arrays -========================== +7.1 The Basics of Arrays +======================== + +This minor node presents the basics: working with elements in arrays +one at a time, and traversing all of the elements in an array. + +* Menu: + +* Array Intro:: Introduction to Arrays +* Reference to Elements:: How to examine one element of an array. +* Assigning Elements:: How to change an element of an array. +* Array Example:: Basic Example of an Array +* Scanning an Array:: A variation of the `for' statement. It + loops through the indices of an array's + existing elements. + + +File: gawk.info, Node: Array Intro, Next: Reference to Elements, Up: Array Basics + +7.1.1 Introduction to Arrays +---------------------------- Doing linear scans over an associateive array is like tryinng to club someone to death with a loaded Uzi. @@ -8558,6 +8790,7 @@ declared.) example, conceptually, if the element values are 8, `"foo"', `""', and 30: + *FIXME: NEXT ED:* Use real images here +---------+---------+--------+---------+ | 8 | "foo" | "" | 30 | Value +---------+---------+--------+---------+ @@ -8619,10 +8852,10 @@ starting at one. (*Note String Functions::.) independent of the number of elements in the array. -File: gawk.info, Node: Reference to Elements, Next: Assigning Elements, Prev: Array Intro, Up: Arrays +File: gawk.info, Node: Reference to Elements, Next: Assigning Elements, Prev: Array Intro, Up: Array Basics -7.2 Referring to an Array Element -================================= +7.1.2 Referring to an Array Element +----------------------------------- The principal way to use an array is to refer to one of its elements. An array reference is an expression as follows: @@ -8666,10 +8899,10 @@ except to scan all the elements. Also, this _does not_ create print "Subscript 2 is present." -File: gawk.info, Node: Assigning Elements, Next: Array Example, Prev: Reference to Elements, Up: Arrays +File: gawk.info, Node: Assigning Elements, Next: Array Example, Prev: Reference to Elements, Up: Array Basics -7.3 Assigning Array Elements -============================ +7.1.3 Assigning Array Elements +------------------------------ Array elements can be assigned values just like `awk' variables: @@ -8680,10 +8913,10 @@ index of the element of the array that is assigned a value. The expression VALUE is the value to assign to that element of the array. -File: gawk.info, Node: Array Example, Next: Scanning an Array, Prev: Assigning Elements, Up: Arrays +File: gawk.info, Node: Array Example, Next: Scanning an Array, Prev: Assigning Elements, Up: Array Basics -7.4 Basic Array Example -======================= +7.1.4 Basic Array Example +------------------------- The following program takes a list of lines, each beginning with a line number, and prints them out in order of line number. The line numbers @@ -8736,10 +8969,10 @@ easy improvement to the program's `END' rule, as follows: } -File: gawk.info, Node: Scanning an Array, Next: Delete, Prev: Array Example, Up: Arrays +File: gawk.info, Node: Scanning an Array, Prev: Array Example, Up: Array Basics -7.5 Scanning All Elements of an Array -===================================== +7.1.5 Scanning All Elements of an Array +--------------------------------------- In programs that use arrays, it is often necessary to use a loop that executes once for each element of an array. In other languages, where @@ -8792,9 +9025,9 @@ them. Similarly, changing VAR inside the loop may produce strange results. It is best to avoid such things. -File: gawk.info, Node: Delete, Next: Numeric Array Subscripts, Prev: Scanning an Array, Up: Arrays +File: gawk.info, Node: Delete, Next: Numeric Array Subscripts, Prev: Array Basics, Up: Arrays -7.6 The `delete' Statement +7.2 The `delete' Statement ========================== To remove an individual element of an array, use the `delete' statement: @@ -8865,7 +9098,7 @@ regular variable). For example, the following does not work: File: gawk.info, Node: Numeric Array Subscripts, Next: Uninitialized Subscripts, Prev: Delete, Up: Arrays -7.7 Using Numbers to Subscript Arrays +7.3 Using Numbers to Subscript Arrays ===================================== An important aspect about arrays to remember is that _array subscripts @@ -8916,7 +9149,7 @@ on your programs. File: gawk.info, Node: Uninitialized Subscripts, Next: Multi-dimensional, Prev: Numeric Array Subscripts, Up: Arrays -7.8 Using Uninitialized Variables as Subscripts +7.4 Using Uninitialized Variables as Subscripts =============================================== Suppose it's necessary to write a program to print the input data in @@ -8961,16 +9194,20 @@ string as a subscript if `--lint' is provided on the command line (*note Options::). -File: gawk.info, Node: Multi-dimensional, Next: Multi-scanning, Prev: Uninitialized Subscripts, Up: Arrays +File: gawk.info, Node: Multi-dimensional, Next: Array Sorting, Prev: Uninitialized Subscripts, Up: Arrays -7.9 Multidimensional Arrays +7.5 Multidimensional Arrays =========================== -A multidimensional array is an array in which an element is identified -by a sequence of indices instead of a single index. For example, a -two-dimensional array requires two indices. The usual way (in most -languages, including `awk') to refer to an element of a two-dimensional -array named `grid' is with `grid[X,Y]'. +* Menu: + +* Multi-scanning:: Scanning multidimensional arrays. + + A multidimensional array is an array in which an element is +identified by a sequence of indices instead of a single index. For +example, a two-dimensional array requires two indices. The usual way +(in most languages, including `awk') to refer to an element of a +two-dimensional array named `grid' is with `grid[X,Y]'. Multidimensional arrays are supported in `awk' through concatenation of indices into one string. `awk' converts the indices into strings @@ -9043,10 +9280,10 @@ the program produces the following output: 3 2 1 6 -File: gawk.info, Node: Multi-scanning, Next: Array Sorting, Prev: Multi-dimensional, Up: Arrays +File: gawk.info, Node: Multi-scanning, Up: Multi-dimensional -7.10 Scanning Multidimensional Arrays -===================================== +7.5.1 Scanning Multidimensional Arrays +-------------------------------------- There is no special `for' statement for scanning a "multidimensional" array. There cannot be one, because, in truth, there are no @@ -9083,10 +9320,10 @@ The result is to set `separate[1]' to `"1"' and `separate[2]' to recovered. -File: gawk.info, Node: Array Sorting, Prev: Multi-scanning, Up: Arrays +File: gawk.info, Node: Array Sorting, Prev: Multi-dimensional, Up: Arrays -7.11 Sorting Array Values and Indices with `gawk' -================================================= +7.6 Sorting Array Values and Indices with `gawk' +================================================ The order in which an array is scanned with a `for (i in array)' loop is essentially arbitrary. In most `awk' implementations, sorting an @@ -9171,7 +9408,9 @@ to `ind', there is only one copy of the actual index strings. We said previously that comparisons are done using `gawk''s "usual comparison rules." Because `IGNORECASE' affects string comparisons, the value of `IGNORECASE' also affects sorting for both `asort' and -`asorti'. Caveat Emptor. +`asorti'. Note also that the locale's sorting order does _not_ come +into play; comparisons are based on character values only. Caveat +Emptor. File: gawk.info, Node: Functions, Next: Internationalization, Prev: Arrays, Up: Top @@ -9192,6 +9431,7 @@ major node describes these "user-defined" functions. * Built-in:: Summarizes the built-in functions. * User-defined:: Describes User-defined functions in detail. +* Indirect Calls:: Choosing the function to call at runtime. File: gawk.info, Node: Built-in, Next: User-defined, Up: Functions @@ -9492,7 +9732,7 @@ with a pound sign (`#'): found, it returns zero. The REGEXP argument may be either a regexp constant (`/.../') or a - string constant ("..."). In the latter case, the string is + string constant (`"..."'). In the latter case, the string is treated as a regexp to be matched. *note Computed Regexps::, for a discussion of the difference between the two forms, and the implications for writing your program correctly. @@ -9572,20 +9812,40 @@ with a pound sign (`#'): compatibility mode (*note Options::), using a third argument is a fatal error. -`split(STRING, ARRAY [, FIELDSEP])' +`patsplit(STRING, ARRAY [, FIELDPAT [, SEPS ] ])' + This function divides STRING into pieces defined by FIELDPAT and + stores the pieces in ARRAY and the separator strings in the SEPS + array. The first piece is stored in `ARRAY[1]', the second piece + in `ARRAY[2]', and so forth. The string value of the third + argument, FIELDPAT, is a regexp describing the fields in STRING + (just as `FPAT' is a regexp describing the fields in input + records). If FIELDPAT is omitted, the value of `FPAT' is used. + `patsplit' returns the number of elements created. `SEPS[I]' is + the separator string between `ARRAY[I]' and `ARRAY[I+1]'. Any + leading separator will be in `SEPS[0]'. + + The `patsplit' function splits strings into pieces in a manner + similar to the way input lines are split into fields using `FPAT'. + +`split(STRING, ARRAY [, FIELDSEP [, SEPS ] ])' This function divides STRING into pieces separated by FIELDSEP and - stores the pieces in ARRAY. The first piece is stored in - `ARRAY[1]', the second piece in `ARRAY[2]', and so forth. The - string value of the third argument, FIELDSEP, is a regexp - describing where to split STRING (much as `FS' can be a regexp - describing where to split input records). If FIELDSEP is omitted, - the value of `FS' is used. `split' returns the number of elements - created. + stores the pieces in ARRAY and the separator strings in the SEPS + array. The first piece is stored in `ARRAY[1]', the second piece + in `ARRAY[2]', and so forth. The string value of the third + argument, FIELDSEP, is a regexp describing where to split STRING + (much as `FS' can be a regexp describing where to split input + records). If FIELDSEP is omitted, the value of `FS' is used. + `split' returns the number of elements created. SEPS is a `gawk' + extension with `SEPS[I]' being the separator string between + `ARRAY[I]' and `ARRAY[I+1]'. If FIELDSEP is a single space then + any leading whitespace goes into `SEPS[0]' and any trailing + whitespace goes into `SEPS[N]' where N is the return value of + `split()' (that is, the number of elements in ARRAY). The `split' function splits strings into pieces in a manner similar to the way input lines are split into fields. For example: - split("cul-de-sac", a, "-") + split("cul-de-sac", a, "-", seps) splits the string `cul-de-sac' into three fields using `-' as the separator. It sets the contents of the array `a' as follows: @@ -9594,14 +9854,19 @@ with a pound sign (`#'): a[2] = "de" a[3] = "sac" + and sets the contents of the array `seps' as follows: + + seps[1] = "-" + seps[2] = "-" + The value returned by this call to `split' is three. As with input field-splitting, when the value of FIELDSEP is - `" "', leading and trailing whitespace is ignored, and the elements - are separated by runs of whitespace. Also as with input - field-splitting, if FIELDSEP is the null string, each individual - character in the string is split into its own array element. - (This is a `gawk'-specific extension.) + `" "', leading and trailing whitespace is ignored in ARRAY but not + in SEPS, and the elements are separated by runs of whitespace. + Also as with input field-splitting, if FIELDSEP is the null + string, each individual character in the string is split into its + own array element. (This is a `gawk'-specific extension.) Note, however, that `RS' has no effect on the way `split' works. Even though `RS = ""' causes newline to also be an input field @@ -9615,7 +9880,7 @@ with a pound sign (`#'): writing your program correctly. Before splitting the string, `split' deletes any previously - existing elements in the array ARRAY. + existing elements in the arrays ARRAY and SEPS. If STRING is null, the array has no elements. (So this is a portable way to delete an entire array with one statement. *Note @@ -9662,7 +9927,7 @@ with a pound sign (`#'): REPLACEMENT. The modified string becomes the new value of TARGET. The REGEXP argument may be either a regexp constant (`/.../') or a - string constant ("..."). In the latter case, the string is + string constant (`"..."'). In the latter case, the string is treated as a regexp to be matched. *note Computed Regexps::, for a discussion of the difference between the two forms, and the implications for writing your program correctly. @@ -9992,13 +10257,12 @@ Table 8.4: POSIX 2001 rules for sub The only case where the difference is noticeable is the last one: `\\\\' is seen as `\\' and produces `\' instead of `\\'. - Starting with version 3.1.4, `gawk' follows the POSIX rules when -`--posix' is specified (*note Options::). Otherwise, it continues to -follow the 1996 proposed rules, since, as of this writing, that has -been its behavior for over seven years. + Starting with version 3.1.4, `gawk' followed the POSIX rules when +`--posix' is specified (*note Options::). Otherwise, it continued to +follow the 1996 proposed rules, since that had been its behavior for +many seven years. - NOTE: At the next major release, `gawk' will switch to using the - POSIX 2001 rules by default. + As of version 3.2, `gawk' uses the POSIX 2001 rules. The rules for `gensub' are considerably simpler. At the runtime level, whenever `gawk' sees a `\', if the following character is a @@ -10131,6 +10395,10 @@ parameters are enclosed in square brackets ([ ]): an editor. Some operating systems cannot implement the `system' function. `system' causes a fatal error if it is not supported. + NOTE: When `--sandbox' is specified, the `system' function is + disabled. + + Advanced Notes: Interactive Versus Noninteractive Buffering ----------------------------------------------------------- @@ -10486,7 +10754,7 @@ if the time zone is set to UTC: #! /bin/sh # - # date --- approximate the P1003.2 'date' command + # date --- approximate the POSIX 'date' command case $1 in -u) TZ=UTC0 # use UTC @@ -10495,7 +10763,7 @@ if the time zone is set to UTC: esac gawk 'BEGIN { - format = "%a %b %d %H:%M:%S %Z %Y" + format = "%a %b %e %H:%M:%S %Z %Y" exitval = 0 if (ARGC > 2) @@ -10692,7 +10960,7 @@ brackets ([ ]): binding for the given DOMAIN. -File: gawk.info, Node: User-defined, Prev: Built-in, Up: Functions +File: gawk.info, Node: User-defined, Next: Indirect Calls, Prev: Built-in, Up: Functions 8.2 User-Defined Functions ========================== @@ -10724,7 +10992,8 @@ There is no need to put the definition of a function before all uses of the function. This is because `awk' reads the entire program before starting to execute any of it. - The definition of a function named NAME looks like this: + The definition of a function named NAME looks like this: *FIXME: +NEXT ED:* put [ ] around parameter list. function NAME(PARAMETER-LIST) { @@ -10801,7 +11070,8 @@ of the variable `func' with the return value of the function `foo'. If the resulting string is non-null, the action is executed. This is probably not what is desired. (`awk' accepts this input as syntactically valid, because functions may be used before they are -defined in `awk' programs.) +defined in `awk' programs.) *FIXME: NEXT ED:* This won't actually run, +since foo() is undefined ... To ensure that your `awk' programs are portable, always use the keyword `function' when defining a function. @@ -10884,7 +11154,7 @@ built-in `strftime' function (*note Time Functions::) to create an function ctime(ts, format) { - format = "%a %b %d %H:%M:%S %Z %Y" + format = "%a %b %e %H:%M:%S %Z %Y" if (ts == 0) ts = systime() # use current time as default return strftime(format, ts) @@ -11101,6 +11371,293 @@ runtime. Here is an annotated sample program: of them. +File: gawk.info, Node: Indirect Calls, Prev: User-defined, Up: Functions + +8.3 Indirect Function Calls +=========================== + +This section describes a `gawk'-specific extension. + + Often, you may wish to defer the choice of function to call until +runtime. For example, you may have different kinds of records, each of +which should be processed differently. + + Normally, you would have to use a series of `if'-`else' statements +to decide which function to call. By using "indirect" function calls, +you can specify the name of the function to call as a string variable, +and then call the function. Let's look at an example. + + Suppose you have a file with your test scores for the classes you +are taking. The first field is the class name. The following fields +are the functions to call to process the data, up to a "marker" field +`data:'. Following the marker, to the end of the record, are the +various numeric test scores. + + Here is the initial file; you wish to get the sum and the average of +your test scores: + + Biology_101 sum average data: 87.0 92.4 78.5 94.9 + Chemistry_305 sum average data: 75.2 98.3 94.7 88.2 + English_401 sum average data: 100.0 95.6 87.1 93.4 + + To process the data, you might write initially: + + { + class = $1 + for (i = 2; $i != "data:"; i++) { + if ($i == "sum") + sum() # processes the whole record + else if ($i == "average") + average() + ... # and so on + } + } + +This style of programming works, but can be awkward. With "indirect" +function calls, you tell `gawk' to use the _value_ of a variable as the +name of the function to call. + + The syntax is similar to that of a regular function call: an +identifier immediately followed by a left parenthesis, any arguments, +and then a closing right parenthesis, with the addition of a leading `@' +character: + + the_func = "sum" + result = @the_func() # calls the `sum' function + + Here is a full program that processes the previously shown data, +using indirect function calls. + + # indirectcall.awk --- Demonstrate indirect function calls + + # average --- return the average of the values in fields $first - $last + + function average(first, last, sum, i) + { + sum = 0; + for (i = first; i <= last; i++) + sum += $i + + return sum / (last - first + 1) + } + + # sum --- return the average of the values in fields $first - $last + + function sum(first, last, ret, i) + { + ret = 0; + for (i = first; i <= last; i++) + ret += $i + + return ret + } + + These two functions expect to work on fields; thus the parameters +`first' and `last' indicate where in the fields to start. Otherwise +they perform the expected computations and are not unusual. + + # For each record, print the class name and the requested statistics + + { + class_name = $1 + gsub(/_/, " ", class_name) # Replace _ with spaces + + # find start + for (i = 1; i <= NF; i++) { + if ($i == "data:") { + start = i + 1 + break + } + } + + printf("%s:\n", class_name) + for (i = 2; $i != "data:"; i++) { + the_function = $i + printf("\t%s: <%s>\n", $i, @the_function(start, NF) "") + } + print "" + } + + This is the main processing for each record. It prints the class +name (with underscores replaced with spaces). It then finds the start +of the actual data, saving it in `start'. The last part of the code +loops through each function name (from `$2' up to the marker, `data:'), +calling the function named by the field. The indirect function call +itself occurs as a parameter in the call to `printf'. (The `printf' +format string uses `%s' as the format specifier so that we can use +functions that return strings, as well as numbers. Note that the result +from the indirect call is concatenated with the empty string, in order +to force it to be a string value.) + + Here is the result of running the program: + + $ gawk -f indirectcall.awk class_data1 + => Biology 101: + => sum: <352.8> + => average: <88.2> + => + => Chemistry 305: + => sum: <356.4> + => average: <89.1> + => + => English 401: + => sum: <376.1> + => average: <94.025> + + The ability to use indirect function calls is more powerful than you +may think at first. The C and C++ languages provide "function +pointers," which are a mechanism for calling a function chosen at +runtime. One of the most well-known uses of this ablity is the C +`qsort' function, which sorts an array using the well-known "quick +sort" algorithm (see the Wikipedia article +(http://en.wikipedia.org/wiki/Quick_sort) for more information). To +use this function, you supply a pointer to a comparison function. This +mechanism allows you to sort arbitrary data in an arbitrary fashion. + + We can do something similar using `gawk', like this: + + # quicksort.awk --- Quicksort algorithm, with user-supplied + # comparison function + # quicksort --- C.A.R. Hoare's quick sort algorithm. See Wikipedia + # or almost any algorithms or computer science text + + function quicksort(data, left, right, less_than, i, last) + { + if (left >= right) # do nothing if array contains fewer + return # than two elements + + quicksort_swap(data, left, int((left + right) / 2)) + last = left + for (i = left + 1; i <= right; i++) + if (@less_than(data[i], data[left])) + quicksort_swap(data, ++last, i) + quicksort_swap(data, left, last) + quicksort(data, left, last - 1, less_than) + quicksort(data, last + 1, right, less_than) + } + + # quicksort_swap --- helper function for quicksort, should really be inline + + function quicksort_swap(data, i, j, temp) + { + temp = data[i] + data[i] = data[j] + data[j] = temp + } + + The `quicksort' function receives the `data' array, the starting and +ending indices to sort (`left' and `right'), and the name of a function +that performs a "less than" comparison. It then implements the quick +sort algorithm. + + To make use of the sorting function, we return to our previous +example. The first thing to do is write some comparison functions: + + # num_lt --- do a numeric less than comparison + + function num_lt(left, right) + { + return ((left + 0) < (right + 0)) + } + + # num_ge --- do a numeric greater than or equal to comparison + + function num_ge(left, right) + { + return ((left + 0) >= (right + 0)) + } + + The `num_ge' function is needed to perform a descending sort; when +used to perform a "less than" test, it actually does the opposite +(greater than or equal to), which yields data sorted in descending +order. + + Next comes a sorting function. It is parameterized with the +starting and ending field numbers and the comparison function. It +builds an array with the data and calls `quicksort' appropriately, and +then formats the results as a single string: + + # do_sort --- sort the data according to `compare' and return it as a string + + function do_sort(first, last, compare, data, i, retval) + { + delete data + for (i = 1; first <= last; first++) { + data[i] = $first + i++ + } + + quicksort(data, 1, i-1, compare) + + retval = data[1] + for (i = 2; i in data; i++) + retval = retval " " data[i] + + return retval + } + + Finally, the two sorting functions call `do_sort', passing in the +names of the two comparison functions: + + # sort --- sort the data in ascending order and return it as a string + + function sort(first, last) + { + return do_sort(first, last, "num_lt") + } + + # rsort --- sort the data in descending order and return it as a string + + function rsort(first, last) + { + return do_sort(first, last, "num_ge") + } + + Here is an extended version of the data file: + + Biology_101 sum average sort rsort data: 87.0 92.4 78.5 94.9 + Chemistry_305 sum average sort rsort data: 75.2 98.3 94.7 88.2 + English_401 sum average sort rsort data: 100.0 95.6 87.1 93.4 + + Finally, here are the results when the enhanced program is run: + + $ gawk -f quicksort.awk -f indirectcall.awk class_data2 + => Biology 101: + => sum: <352.8> + => average: <88.2> + => sort: <78.5 87.0 92.4 94.9> + => rsort: <94.9 92.4 87.0 78.5> + => + => Chemistry 305: + => sum: <356.4> + => average: <89.1> + => sort: <75.2 88.2 94.7 98.3> + => rsort: <98.3 94.7 88.2 75.2> + => + => English 401: + => sum: <376.1> + => average: <94.025> + => sort: <87.1 93.4 95.6 100.0> + => rsort: <100.0 95.6 93.4 87.1> + + Remember that you must supply a leading `@' in front of an indirect +function call. + + Unfortunately, indirect function calls cannot be used with the +built-in functions. However, you can generally write "wrapper" +functions which call the built-in ones, and those can be called +indirectly. (Other than, perhaps, the mathematical functions, there is +not a lot of reason to try to call the built-in functions indirectly.) + + `gawk' does its best to make indirect function calls efficient. For +example: + + for (i = 1; i <= n; i++) + @the_func() + +`gawk' will look up the actual function to call only once. + + File: gawk.info, Node: Internationalization, Next: Advanced Features, Prev: Functions, Up: Top 9 Internationalization with `gawk' @@ -11392,9 +11949,9 @@ extracted to create the initial `.po' file. As part of translation, it is often helpful to rearrange the order in which arguments to `printf' are output. - `gawk''s `--gen-po' command-line option extracts the messages and is -discussed next. After that, `printf''s ability to rearrange the order -for `printf' arguments at runtime is covered. + `gawk''s `--gen-pot' command-line option extracts the messages and +is discussed next. After that, `printf''s ability to rearrange the +order for `printf' arguments at runtime is covered. * Menu: @@ -11410,12 +11967,12 @@ File: gawk.info, Node: String Extraction, Next: Printf Ordering, Up: Translat Once your `awk' program is working, and all the strings have been marked and you've set (and perhaps bound) the text domain, it is time -to produce translations. First, use the `--gen-po' command-line option -to create the initial `.po' file: +to produce translations. First, use the `--gen-pot' command-line +option to create the initial `.po' file: - $ gawk --gen-po -f guide.awk > guide.po + $ gawk --gen-pot -f guide.awk > guide.po - When run with `--gen-po', `gawk' does not execute your program. + When run with `--gen-pot', `gawk' does not execute your program. Instead, it parses it as usual and prints all marked strings to standard output in the format of a GNU `gettext' Portable Object file. Also included in the output are any constant strings that appear as the @@ -11587,9 +12144,9 @@ source: print "Pardon me, Zaphod who?" } -Run `gawk --gen-po' to create the `.po' file: +Run `gawk --gen-pot' to create the `.po' file: - $ gawk --gen-po -f guide.awk > guide.po + $ gawk --gen-pot -f guide.awk > guide.po This produces: @@ -11902,20 +12459,25 @@ networking connection. You can think of this as just a _very long_ two-way pipeline to a coprocess. The way `gawk' decides that you want to use TCP/IP -networking is by recognizing special file names that begin with -`/inet/'. +networking is by recognizing special file names that begin with one of +`/inet/', `/inet4/' or `/inet6'. The full syntax of the special file name is -`/inet/PROTOCOL/LOCAL-PORT/REMOTE-HOST/REMOTE-PORT'. The components -are: +`/NET-TYPE/PROTOCOL/LOCAL-PORT/REMOTE-HOST/REMOTE-PORT'. The +components are: + +NET-TYPE + Specifies the kind of Internet connection to make. Use `/inet4/' + to force IPv4, and `/inet6/' to force IPv6. Plain `/inet/' (which + used to be the only option) uses the system default, most likely + IPv4. PROTOCOL The protocol to use over IP. This must be either `tcp', `udp', or `raw', for a TCP, UDP, or raw IP connection, respectively. The use of TCP is recommended for most applications. - *Caution:* The use of raw sockets is not currently supported in - version 3.1 of `gawk'. + *Caution:* The use of raw sockets is not currently supported. LOCAL-PORT The local TCP or UDP port number to use. Use a port number of `0' @@ -12211,8 +12773,8 @@ full details. * Other Arguments:: Input file names and variable assignments. * AWKPATH Variable:: Searching directories for `awk' programs. -* Obsolete:: Obsolete Options and/or features. * Exit Status:: `gawk''s exit status. +* Obsolete:: Obsolete Options and/or features. * Undocumented:: Undocumented Options and Features. * Known Bugs:: Known Bugs in `gawk'. @@ -12283,17 +12845,6 @@ options and their meanings are as follows: those variables as it needs to, possibly ignoring any predefined value you may have given. -`-mf N' -`-mr N' - Sets various memory limits to the value N. The `f' flag sets the - maximum number of fields and the `r' flag sets the maximum record - size. These two flags and the `-m' option are from the Bell - Laboratories research version of Unix `awk'. They are provided - for compatibility but otherwise ignored by `gawk', since `gawk' - has no predefined limits. (The Bell Laboratories `awk' no longer - needs these options; it continues to accept them to avoid breaking - old programs.) - `-W GAWK-OPT' Following the POSIX standard, implementation-specific options are supplied as arguments to the `-W' option. These options also have @@ -12311,19 +12862,20 @@ options and their meanings are as follows: shell scripts, if you have file names that will be specified by the user that could start with `-'. - The previous list described options mandated by the POSIX standard, -as well as options available in the Bell Laboratories version of `awk'. + The previous list described options mandated by the POSIX standard. The following list describes `gawk'-specific options: -`-O' -`--optimize' - Enables some optimizations on the internal representation of the - program. At the moment this includes just simple constant - folding. The `gawk' maintainer hopes to add more optimizations - over time. +`-b' +`--characters-as-bytes' + Causes `gawk' to treat all input data as single-byte characters. + Normally, `gawk' follows the POSIX standard and attempts to process + its input data according to the current locale. This can often + involve converting multi-byte characters into wide characters + (internally), and can lead to problems or confusion if the input + data does not contain valid multi-byte characters. This option is + an easy way to tell `gawk': "hands off my data!". -`-W compat' -`-W traditional' +`-c' `--compat' `--traditional' Specifies "compatibility mode", in which the GNU extensions to the @@ -12332,17 +12884,13 @@ The following list describes `gawk'-specific options: is the preferred form of this option. *Note POSIX/GNU::, which summarizes the extensions. Also see *note Compatibility Mode::. -`-W copyright' +`-C' `--copyright' +`--copyleft' Print the short version of the General Public License and then exit. -`-W copyleft' -`--copyleft' - Just like `--copyright'. This option may disappear in a future - version of `gawk'. - -`-W dump-variables[=FILE]' +`-d [FILE]' `--dump-variables[=FILE]' Prints a sorted list of global variables, their types, and final values to FILE. If no FILE is provided, `gawk' prints this list @@ -12356,7 +12904,15 @@ The following list describes `gawk'-specific options: particularly easy mistake to make with simple variable names like `i', `j', etc.) -`-W exec FILE' +`-e PROGRAM-TEXT' +`--source PROGRAM-TEXT' + Allows you to mix source code in files with source code that you + enter on the command line. Program source code is taken from the + PROGRAM-TEXT. This is particularly useful when you have library + functions that you want to use from your command-line programs + (*note AWKPATH Variable::). + +`-E FILE' `--exec FILE' Similar to `-f', reads `awk' program text from FILE. There are two differences. The first is that this option also terminates @@ -12371,26 +12927,25 @@ The following list describes `gawk'-specific options: CGI application. This option should be used with `#!' scripts (*note Executable Scripts::), like so: - #! /usr/local/bin/gawk --exec + #! /usr/local/bin/gawk -E AWK PROGRAM HERE ... -`-W gen-po' -`--gen-po' +`-g' +`--gen-pot' Analyzes the source program and generates a GNU `gettext' Portable Object file on standard output for all string constants that have been marked for translation. *Note Internationalization::, for information about this option. -`-W help' -`-W usage' +`-h' `--help' `--usage' Prints a "usage" message summarizing the short and long style options that `gawk' accepts and then exit. -`-W lint[=fatal]' -`--lint[=fatal]' +`-l [value]' +`--lint[=value]' Warns about constructs that are dubious or nonportable to other `awk' implementations. Some warnings are issued when `gawk' first reads your program. Others are issued at runtime, as your program @@ -12408,12 +12963,12 @@ The following list describes `gawk'-specific options: inappropriate construct. As `awk' programs are usually short, doing so is not burdensome. -`-W lint-old' +`-L' `--lint-old' Warns about constructs that are not available in the original version of `awk' from Version 7 Unix (*note V7/SVR3.1::). -`-W non-decimal-data' +`-n' `--non-decimal-data' Enable automatic interpretation of octal and hexadecimal values in input data (*note Nondecimal Data::). @@ -12421,7 +12976,31 @@ The following list describes `gawk'-specific options: *Caution:* This option can severely break old programs. Use with care. -`-W posix' +`-N' +`--use-lc-numeric' + This option forces the use of the locale's decimal point character + when parsing numeric input data (*note Locales::). + +`-O' +`--optimize' + Enables some optimizations on the internal representation of the + program. At the moment this includes just simple constant + folding. The `gawk' maintainer hopes to add more optimizations + over time. + +`-p [FILE]' +`--profile[=FILE]' + Enable profiling of `awk' programs (*note Profiling::). By + default, profiles are created in a file named `awkprof.out'. The + optional FILE argument allows you to specify a different file name + for the profile file. + + When run with `gawk', the profile is just a "pretty printed" + version of the program. When run with `pgawk', the profile + contains execution counts for each statement in the program in the + left margin, and function call counts for each function. + +`-P' `--posix' Operates in strict POSIX mode. This disables all `gawk' extensions (just like `--traditional') and adds the following @@ -12457,39 +13036,23 @@ The following list describes `gawk'-specific options: line, `--posix' takes precedence. `gawk' also issues a warning if both options are supplied. -`-W profile[=FILE]' -`--profile[=FILE]' - Enable profiling of `awk' programs (*note Profiling::). By - default, profiles are created in a file named `awkprof.out'. The - optional FILE argument allows you to specify a different file name - for the profile file. - - When run with `gawk', the profile is just a "pretty printed" - version of the program. When run with `pgawk', the profile - contains execution counts for each statement in the program in the - left margin, and function call counts for each function. - -`-W re-interval' +`-r' `--re-interval' Allows interval expressions (*note Regexp Operators::) in regexps. - Because interval expressions were traditionally not available in - `awk', `gawk' does not provide them by default. This prevents old - `awk' programs from breaking. - -`-W source PROGRAM-TEXT' -`--source PROGRAM-TEXT' - Allows you to mix source code in files with source code that you - enter on the command line. Program source code is taken from the - PROGRAM-TEXT. This is particularly useful when you have library - functions that you want to use from your command-line programs - (*note AWKPATH Variable::). - -`-W use-lc-numeric' -`--use-lc-numeric' - This option forces the use of the locale's decimal point character - when parsing numeric input data (*note Locales::). - -`-W version' + This is now the default behavior for `gawk'. Nevertheless, this + option remains for both backward compatibility, and for use in + combination with the `--traditional' option. + +`-S' +`--sandbox' + In sandbox mode, the `system' function, input redirections with + `getline', output redirections with `print' and `printf' and + dynamic extensions are disabled. This is particularly useful when + you want to run `awk' scripts from questionable sources and need + to make sure the scripts can't access your system (other then the + specified input data file). + +`-V' `--version' Prints version information for this particular copy of `gawk'. This allows you to determine if your copy of `gawk' is up to date @@ -12612,7 +13175,7 @@ the value of `FS' is not strictly necessary. It remains for historical compatibility. -File: gawk.info, Node: AWKPATH Variable, Next: Obsolete, Prev: Other Arguments, Up: Invoking Gawk +File: gawk.info, Node: AWKPATH Variable, Next: Exit Status, Prev: Other Arguments, Up: Invoking Gawk 11.4 The `AWKPATH' Environment Variable ======================================= @@ -12672,7 +13235,7 @@ the value of `$(datadir)' generated when `gawk' was configured. You probably don't need to worry about this, though. -File: gawk.info, Node: Exit Status, Next: Undocumented, Prev: Obsolete, Up: Invoking Gawk +File: gawk.info, Node: Exit Status, Next: Obsolete, Prev: AWKPATH Variable, Up: Invoking Gawk 11.5 `gawk''s Exit Status ========================= @@ -12690,7 +13253,7 @@ with the value of the C constant `EXIT_SUCCESS'. This is usually zero. non-POSIX systems, this value may be mapped to `EXIT_FAILURE'. -File: gawk.info, Node: Obsolete, Next: Exit Status, Prev: AWKPATH Variable, Up: Invoking Gawk +File: gawk.info, Node: Obsolete, Next: Undocumented, Prev: Exit Status, Up: Invoking Gawk 11.6 Obsolete Options and/or Features ===================================== @@ -12701,19 +13264,15 @@ current version or that are still supported but deprecated (meaning that they will _not_ be in the next release). For version 3.1 of `gawk', there are no deprecated command-line -options from the previous version of `gawk'. The use of `next file' -(two words) for `nextfile' was deprecated in `gawk' 3.0 but still -worked. Starting with version 3.1, the two-word usage is no longer -accepted. +options from the previous version of `gawk'. - The process-related special files described in *note Special -Process::, work as described, but are now considered deprecated. -`gawk' prints a warning message every time they are used. (Use -`PROCINFO' instead; see *note Auto-set::.) They will be removed from -the next release of `gawk'. + The process-related special files `/dev/pid', `/dev/ppid', +`/dev/pgrpid', and `/dev/user' were deprecated in `gawk' 3.1, but still +worked. As of version 3.2, they are no longer interpreted specially by +`gawk'. (Use `PROCINFO' instead; see *note Auto-set::.) -File: gawk.info, Node: Undocumented, Next: Known Bugs, Prev: Exit Status, Up: Invoking Gawk +File: gawk.info, Node: Undocumented, Next: Known Bugs, Prev: Obsolete, Up: Invoking Gawk 11.7 Undocumented Options and Features ====================================== @@ -14124,6 +14683,7 @@ corresponding to the C functions of the same names: oldrs = RS olddol0 = $0 using_fw = (PROCINFO["FS"] == "FIELDWIDTHS") + using_fpat = (PROCINFO["FS"] == "FPAT") FS = ":" RS = "\n" @@ -14139,6 +14699,8 @@ corresponding to the C functions of the same names: FS = oldfs if (using_fw) FIELDWIDTHS = FIELDWIDTHS + else if (using_fpat) + FPAT = FPAT RS = oldrs $0 = olddol0 } @@ -14167,16 +14729,20 @@ This makes it possible to restore the correct field-splitting mechanism later. The test can only be true for `gawk'. It is false if using `FS' or on some other `awk' implementation. + The code that checks for using `FPAT' is similar. + The main part of the function uses a loop to read database lines, split the line into fields, and then store the line into each array as necessary. When the loop is done, `_pw_init' cleans up by closing the pipeline, setting `_pw_inited' to one, and restoring `FS' (and -`FIELDWIDTHS' if necessary), `RS', and `$0'. The use of `_pw_count' is -explained shortly. +`FIELDWIDTHS' or `FPAT' if necessary), `RS', and `$0'. The use of +`_pw_count' is explained shortly. - The `getpwnam' function takes a username as a string argument. If -that user is in the database, it returns the appropriate line. -Otherwise, it returns the null string: + *FIXME: NEXT ED:* All of these functions don't need the ... in ... +test. Just return the array element, which will be "" if not already +there. Duh. The `getpwnam' function takes a username as a string +argument. If that user is in the database, it returns the appropriate +line. Otherwise, it returns the null string: function getpwnam(name) { @@ -14341,6 +14907,7 @@ the same names: oldrs = RS olddol0 = $0 using_fw = (PROCINFO["FS"] == "FIELDWIDTHS") + using_fpat = (PROCINFO["FS"] == "FPAT") FS = ":" RS = "\n" @@ -14371,6 +14938,8 @@ the same names: FS = oldfs if (using_fw) FIELDWIDTHS = FIELDWIDTHS + else if (using_fpat) + FPAT = FPAT RS = oldrs $0 = olddol0 } @@ -14383,9 +14952,8 @@ might want it to be in a different directory on your system. These routines follow the same general outline as the user database routines (*note Passwd Functions::). The `_gr_inited' variable is used to ensure that the database is scanned no more than once. The -`_gr_init' function first saves `FS', `FIELDWIDTHS', `RS', and `$0', -and then sets `FS' and `RS' to the correct values for scanning the -group information. +`_gr_init' function first saves `FS', `RS', and `$0', and then sets +`FS' and `RS' to the correct values for scanning the group information. The group information is stored is several associative arrays. The arrays are indexed by group name (`_gr_byname'), by group ID number @@ -14409,7 +14977,7 @@ time there were no names. This code adds the names with a leading comma. It also doesn't check that there is a `$4'.) Finally, `_gr_init' closes the pipeline to `grcat', restores `FS' -(and `FIELDWIDTHS' if necessary), `RS', and `$0', initializes +(and `FIELDWIDTHS' or `FPAT' if necessary), `RS', and `$0', initializes `_gr_count' to zero (it is used later), and makes `_gr_inited' nonzero. The `getgrnam' function takes a group name as its argument, and if @@ -15268,6 +15836,7 @@ less than two, then no file names were supplied and `tee' prints a usage message and exits. Finally, `awk' is forced to read the standard input by setting `ARGV[1]' to `"-"' and `ARGC' to two: + *FIXME: NEXT ED:* Add more leading commentary in this program # tee.awk --- tee in awk BEGIN \ { @@ -15626,7 +16195,7 @@ lines, words, and characters to zero, and saves the current file name in function beginfile(file) { - chars = lines = words = 0 + lines = words = chars = 0 fname = FILENAME } @@ -15637,9 +16206,9 @@ reset the numbers for the following data file: function endfile(file) { - tchars += chars tlines += lines twords += words + tchars += chars if (do_lines) printf "\t%d", lines if (do_words) @@ -15707,8 +16276,8 @@ hope you find them both interesting and enjoyable. * Simple Sed:: A Simple Stream Editor. * Igawk Program:: A wrapper for `awk' that includes files. -* Signature Program:: People do amazing things with too much time - on their hands. +* Signature Program:: People do amazing things with too much time on + their hands. File: gawk.info, Node: Dupword Program, Next: Alarm Program, Up: Miscellaneous Programs @@ -16121,7 +16690,9 @@ File: gawk.info, Node: Word Sorting, Next: History Sorting, Prev: Labels Prog 13.3.5 Generating Word-Usage Counts ----------------------------------- -The following `awk' program prints the number of occurrences of each +*FIXME: NEXT ED:* Rewrite this whole section and example. + + The following `awk' program prints the number of occurrences of each word in its input. It illustrates the associative nature of `awk' arrays by using strings as subscripts. It also demonstrates the `for (i in array)' mechanism. Finally, it shows how `awk' is used in @@ -17233,7 +17804,8 @@ all be disabled with either the `--traditional' or `--posix' options `getline' returns -1 or `close' fails (*note Built-in Variables::). * The `/dev/pid', `/dev/ppid', `/dev/pgrpid', and `/dev/user' file - name interpretation (*note Special Files::). + name interpretation. (As of version 3.2, these names are no + longer supported.) * The ability to delete all of an array at once with `delete ARRAY' (*note Delete::). @@ -17359,10 +17931,6 @@ all be disabled with either the `--traditional' or `--posix' options treatment of pathnames that begin with `/p' as BSD portals (*note Portal Files::). - * The `--disable-directories-fatal' configuration option which - causes `gawk' to silently skip directories named on the command - line (*note Additional Configuration Options::). - * The use of GNU Automake to help in standardizing the configuration process (*note Quick Installation::). @@ -17396,6 +17964,48 @@ all be disabled with either the `--traditional' or `--posix' options * The `strftime' function acquired a third argument to enable printing times as UTC (*note Time Functions::). + Version 3.2 of `gawk' introduced the following features: + + * The special files `/dev/pid', `/dev/ppid', `/dev/pgrpid', and + `/dev/user' were removed entirely (*note Obsolete::). + + * The `\s' and `\S' escapae sequences in regular expressions (*note + GNU Regexp Operators::). + + * Interval expressions became part of the default matching done if + not in POSIX mode or in compatibility mode. (*note Regexp + Operators::). + + * The `split()' function was given the additional optional fourth + argument which is an array to hold the text of the field + separators. (*note String Functions::). + + * The `BEGINFILE' and `ENDFILE' special patterns. (*note + BEGINFILE/ENDFILE::). + + * The `switch' statement was enabled by default. (*note Switch + Statement::). + + * The `--sandbox' and `--characters-as-bytes' options (*note + Options::). + + * Indirect function calls (*note Indirect Calls::). + + * The `--gen-po' command-line option was renamed `--gen-pot' (*note + String Extraction::). + + * Directories on the command line produce a warning and are skipped + (*note Command line directories::). + + * The `FPAT' variable and its effects (*note Splitting By Content::). + + * The `patsplit' function (*note String Functions::). + + * The `/inet4' and `/inet6' special files for TCP/IP networking + using `|&' to specify which version of the IP protocol to use. + (*note TCP/IP Networking::). + + File: gawk.info, Node: Contributors, Prev: POSIX/GNU, Up: Language History @@ -17469,7 +18079,7 @@ Info file, in approximate chronological order: documentation. * Matthew Woehlke provided improvements for Tandem's POSIX-compliant - systems. Ralf Wildenhues now maintains this port. + systems. * Martin Brown provided the port to BeOS and its documentation. @@ -17814,10 +18424,6 @@ command line when compiling `gawk' from scratch, including: Treat pathnames that begin with `/p' as BSD portal files when doing two-way I/O with the `|&' operator (*note Portal Files::). -`--enable-switch' - Enable the recognition and execution of C-style `switch' statements - in `awk' programs (*note Switch Statement::.) - `--with-whiny-user-strftime' Force use of the included version of the `strftime' function for deficient systems @@ -17842,10 +18448,6 @@ command line when compiling `gawk' from scratch, including: desirable, but it may bring you some slight performance improvement. -`--disable-directories-fatal' - Causes `gawk' to silently skip directories named on the command - line. - As of version 3.1.5, the `--with-included-gettext' configuration option is no longer available, since `gawk' expects the GNU `gettext' library to be installed as an external library. @@ -17934,11 +18536,12 @@ distribution. * Menu: * PC Binary Installation:: Installing a prepared distribution. -* PC Compiling:: Compiling `gawk' for MS-DOS, Windows32, +* PC Compiling:: Compiling `gawk' for MS-DOS, + Windows32, and OS/2. +* PC Dynamic:: Compiling `gawk' for dynamic + libraries. +* PC Using:: Running `gawk' on MS-DOS, Windows32 and OS/2. -* PC Dynamic:: Compiling `gawk' for dynamic libraries. -* PC Using:: Running `gawk' on MS-DOS, Windows32 and - OS/2. * Cygwin:: Building and running `gawk' for Cygwin. * MSYS:: Using `gawk' In The MSYS Environment. @@ -17991,11 +18594,11 @@ B.3.1.2 Compiling `gawk' for PC Operating Systems development tools from DJ Delorie (DJGPP; MS-DOS only) or Eberhard Mattes (EMX; MS-DOS, Windows32 and OS/2). Microsoft Visual C/C++ can be used to build a Windows32 version, and Microsoft C/C++ can be used -to build 16-bit versions for MS-DOS and OS/2. (As of `gawk' 3.1.2, the -MSC version doesn't work. However, the maintainer is working on fixing -it.) The file `README_d/README.pc' in the `gawk' distribution contains -additional notes, and `pc/Makefile' contains important information on -compilation options. +to build 16-bit versions for MS-DOS and OS/2. *FIXME:* (As of `gawk' +3.1.2, the MSC version doesn't work. However, the maintainer is working +on fixing it.) The file `README_d/README.pc' in the `gawk' +distribution contains additional notes, and `pc/Makefile' contains +important information on compilation options. To build `gawk' for MS-DOS, Windows32, and OS/2 (16 bit only; for 32 bit (EMX) you can use the `configure' script and skip the following @@ -18726,10 +19329,9 @@ considered authoritative if it conflicts with this Info file. The people maintaining the non-Unix ports of `gawk' are as follows: -MS-DOS Scott Deifik, <scottd.mail@sbcglobal.net>. MS-Windows using MINGW Eli Zaretskii, <eliz@gnu.org>. -Tandem Stephen Davies, <scldad@sdc.com.au>. -Tandem (POSIX-compliant) Ralf Wildenhues <Ralf.Wildenhues@gmx.de> + Scott Deifik, <scottd.mail@sbcglobal.net>. +OS/2 Andreas Buening, <andreas.buening@nexgo.de> VMS Pat Rankin, <rankin@pactechdata.com>. z/OS (OS/390) Dave Pitts, <pitts@cozx.com>. @@ -19170,6 +19772,8 @@ to recompile them for each new `gawk' release. There is no guarantee of binary compatibility between different releases, nor will there ever be such a guarantee. + NOTE: When `--sandbox' is specified, extensions are disabled. + * Menu: * Internals:: A brief look at some `gawk' internals. @@ -19882,6 +20486,9 @@ Integrating the DBUG library Following is a list of probable improvements that will make `gawk' perform better: + *FIXME: NEXT ED:* remove this item. awka and mawk do these + respectively. + Compilation of `awk' programs `gawk' uses a Bison (YACC-like) parser to convert the script given it into a syntax tree; the syntax tree is then executed by a @@ -19936,6 +20543,7 @@ D.1 What a Program Does At the most basic level, the job of a program is to process some input data and produce results. + *FIXME: NEXT ED:* Use real images here _______ +------+ / \ +---------+ | Data | -----> < Program > -----> | Results | @@ -19949,6 +20557,7 @@ uses the instructions in your program to process the data. When you write a program, it usually consists of the following, very basic set of steps: + *FIXME: NEXT ED:* Use real images here ______ +----------------+ / More \ No +----------+ | Initialization | -------> < Data > -------> | Clean Up | @@ -20128,10 +20737,10 @@ in computer science. * Menu: -* String Conversion Precision:: The String Value Can Lie. -* Unexpected Results:: Floating Point Numbers Are Not - Abstract Numbers. -* POSIX Floating Point Problems:: Standards Versus Existing Practice. +* String Conversion Precision:: The String Value Can Lie. +* Unexpected Results:: Floating Point Numbers Are Not Abstract + Numbers. +* POSIX Floating Point Problems:: Standards Versus Existing Practice. File: gawk.info, Node: String Conversion Precision, Next: Unexpected Results, Up: Floating Point Issues @@ -20422,7 +21031,7 @@ Built-in Variable `ARGC', `ARGV', `CONVFMT', `ENVIRON', `FILENAME', `FNR', `FS', `NF', `NR', `OFMT', `OFS', `ORS', `RLENGTH', `RSTART', `RS', and `SUBSEP' are the variables that have special meaning to `awk'. In - addition, `ARGIND', `BINMODE', `ERRNO', `FIELDWIDTHS', + addition, `ARGIND', `BINMODE', `ERRNO', `FIELDWIDTHS', `FPAT', `IGNORECASE', `LINT', `PROCINFO', `RT', and `TEXTDOMAIN' are the variables that have special meaning to `gawk'. Changing some of them affects `awk''s running environment. (*Note Built-in @@ -20563,8 +21172,11 @@ Field separated by whitespace (or by a separator regexp that you can change by setting the built-in variable `FS'). Such pieces are called fields. If the pieces are of fixed length, you can use the - built-in variable `FIELDWIDTHS' to describe their lengths. (*Note - Field Separators::, and *note Constant Size::.) + built-in variable `FIELDWIDTHS' to describe their lengths. If you + wish to specify the contents of fields instead of the field + separator, you can use the built-in variable `FPAT' to do so. + (*Note Field Separators::, *note Constant Size::, and *note + Splitting By Content::.) Flag A variable whose truth value indicates the existence or @@ -20677,12 +21289,10 @@ Keyword meaning. Keywords are reserved and may not be used as variable names. - `gawk''s keywords are: `BEGIN', `END', `if', `else', `while', - `do...while', `for', `for...in', `break', `continue', `delete', - `next', `nextfile', `function', `func', and `exit'. If `gawk' was - configured with the `--enable-switch' option (*note Switch - Statement::), then `switch', `case', and `default' are also - keywords. + `gawk''s keywords are: `BEGIN', `END', `break', `case', `continue', + `default' `delete', `do...while', `else', `exit', `for...in', + `for', `function', `func', `if', `nextfile', `next', `switch', and + `while'. Lesser General Public License This document describes the terms under which binary library @@ -21636,7 +22246,7 @@ GNU Lesser General Public License instead of this License. But first, please read `http://www.gnu.org/philosophy/why-not-lgpl.html'. -File: gawk.info, Node: GNU Free Documentation License, Next: Index, Prev: Copying, Up: Top +File: gawk.info, Node: GNU Free Documentation License, Next: next-edition, Prev: Copying, Up: Top GNU Free Documentation License ****************************** @@ -22122,7 +22732,170 @@ free software license, such as the GNU General Public License, to permit their use in free software. -File: gawk.info, Node: Index, Prev: GNU Free Documentation License, Up: Top +File: gawk.info, Node: next-edition, Next: Index, Prev: GNU Free Documentation License, Up: Top + +Appendix A To Do In The Next Edition +************************************ + +Stuff for working on the manual + +* Menu: + +* unresolved:: unresolved. +* revision:: revision. +* consistency:: consistency. + + +File: gawk.info, Node: unresolved, Next: revision, Up: next-edition + +A.1 Unresovled Issues +===================== + + 1. Robert J. Chassell points out that awk programs should have some + indication of how to use them. It would be useful to perhaps have + a "programming style" section of the manual that would include + this and other tips. + + 2. The default AWKPATH search path should be configurable via + `configure' The default and how this changes needs to be + documented. + + +File: gawk.info, Node: revision, Next: consistency, Prev: unresolved, Up: next-edition + +A.2 Revisions To Make +===================== + + 1. Talk about common extensions, those in nawk, gawk, mawk. + + 2. Use `foo' for variables and `foo()' for functions. + + 3. Standardize the error messages from the functions and programs in + Chapters 12 and 13. + + 4. Nuke the BBS stuff and use something that won't be obsolete. + + +File: gawk.info, Node: consistency, Prev: revision, Up: next-edition + +A.3 Consistency Issues +====================== + + * /.../ regexps are in @code, not @samp + + * ".." strings are in @code, not @samp + + * no @print before @dots + + * values of expressions in the text (`x' has the value 15), should + be in roman, not @code + + * Use TAB and not tab + + * Use ESC and not ESCAPE + + * Use space and not blank to describe the space bar's + character The term "blank" is thus basically reserved for "blank + lines" etc. + + * To make dark corners work, the @value{DARKCORNER} has to be outside + closing `.' of a sentence and after (@pxref{...}). This is a + change from earlier versions. + + * " " should have an around it + + * Use "non-" only with language names or acronyms, or the words bug + and option + + * Use `ftp' when talking about anonymous ftp + + * Use uppercase and lowercase, not "upper-case" and "lower-case" or + "upper case" and "lower case" + + * Use "single precision" and "double precision", not + "single-precision" or "double-precision" + + * Use alphanumeric, not alpha-numeric + + * Use POSIX-compliant, not POSIX compliant + + * Use -foo, not -Wfoo when describing long options + + * Use "Bell Laboratories", but not "Bell Labs". + + * Use "behavior" instead of "behaviour". + + * Use "zeros" instead of "zeroes". + + * Use "nonzero" not "non-zero". + + * Use "runtime" not "run time" or "run-time". + + * Use "command-line" not "command line". + + * Use "online" not "on-line". + + * Use "whitespace" not "white space". + + * Use "Input/Output", not "input/output". Also "I/O", not "i/o". + + * Use "lefthand"/"righthand", not "left-hand"/"right-hand". + + * Use "workaround", not "work-around". + + * Use "startup"/"cleanup", not "start-up"/"clean-up" + + * Use `do', and not `do'-`while', except where actually discussing + the do-while. + + * Use "versus" in text and "vs." in index entries + + * The words "a", "and", "as", "between", "for", "from", "in", "of", + "on", "that", "the", "to", "with", and "without", should not be + capitalized in @chapter, @section etc. "Into" and "How" should. + + * Search for @dfn; make sure important items are also indexed. + + * "e.g." should always be followed by a comma. + + * "i.e." should always be followed by a comma. + + * The numbers zero through ten should be spelled out, except when + talking about file descriptor numbers. > 10 and < 0, it's ok to + use numbers. + + * In tables, put command-line options in @code, while in the text, + put them in @option. + + * When using @strong, use "Note:" or "Caution:" with colons and not + exclamation points. Do not surround the paragraphs with + @quotation ... @end quotation. + + * For most cases, do NOT put a comma before "and", "or" or "but". + But exercise taste with this rule. + + * Don't show the awk command with a program in quotes when it's just + the program. I.e. + + { + ... + } + + and not + awk '{ + ... + }' + + * Do show it when showing command-line arguments, data files, etc, + even if there is no output shown. + + * Use numbered lists only to show a sequential series of steps. + + * Use @code{xxx} for the xxx operator in indexing statements, not + @samp. + + +File: gawk.info, Node: Index, Prev: next-edition, Up: Top Index ***** @@ -22179,11 +22952,11 @@ Index * * (asterisk), * operator, as regexp operator: Regexp Operators. (line 86) * * (asterisk), * operator, null strings, matching: Gory Details. - (line 160) -* * (asterisk), ** operator <1>: Options. (line 199) + (line 159) +* * (asterisk), ** operator <1>: Options. (line 216) * * (asterisk), ** operator <2>: Precedence. (line 49) * * (asterisk), ** operator: Arithmetic Ops. (line 81) -* * (asterisk), **= operator <1>: Options. (line 199) +* * (asterisk), **= operator <1>: Options. (line 216) * * (asterisk), **= operator <2>: Precedence. (line 95) * * (asterisk), **= operator: Assignment Ops. (line 129) * * (asterisk), *= operator <1>: Precedence. (line 95) @@ -22202,65 +22975,83 @@ Index * - (hyphen), -- operator: Increment Ops. (line 48) * - (hyphen), -= operator <1>: Precedence. (line 95) * - (hyphen), -= operator: Assignment Ops. (line 129) -* - (hyphen), filenames beginning with: Options. (line 67) +* - (hyphen), filenames beginning with: Options. (line 56) * - (hyphen), in character lists: Character Lists. (line 17) * --assign option: Options. (line 30) -* --compat option: Options. (line 86) -* --copyleft option: Options. (line 99) -* --copyright option: Options. (line 94) -* --disable-directories-fatal configuration option: Additional Configuration Options. - (line 41) +* --c option: Options. (line 76) +* --characters-as-bytes option: Options. (line 65) +* --compat option: Options. (line 76) +* --copyleft option: Options. (line 85) +* --copyright option: Options. (line 85) * --disable-lint configuration option: Additional Configuration Options. - (line 21) + (line 17) * --disable-nls configuration option: Additional Configuration Options. - (line 36) + (line 32) * --dump-variables option <1>: Library Names. (line 45) -* --dump-variables option: Options. (line 102) +* --dump-variables option: Options. (line 90) * --enable-portals configuration option <1>: Additional Configuration Options. (line 9) * --enable-portals configuration option: Portal Files. (line 6) -* --enable-switch configuration option: Additional Configuration Options. - (line 13) -* --exec option: Options. (line 118) +* --exec option: Options. (line 112) * --field-separator option: Options. (line 21) * --file option: Options. (line 25) -* --gen-po option <1>: Options. (line 137) -* --gen-po option: String Extraction. (line 6) -* --help option: Options. (line 146) -* --lint option <1>: Options. (line 151) +* --gen-pot option <1>: Options. (line 131) +* --gen-pot option: String Extraction. (line 6) +* --help option: Options. (line 139) +* --L option: Options. (line 163) +* --lint option <1>: Options. (line 144) * --lint option: Command Line. (line 20) -* --lint-old option: Options. (line 170) -* --non-decimal-data option <1>: Options. (line 175) +* --lint-old option: Options. (line 163) +* --non-decimal-data option <1>: Options. (line 168) * --non-decimal-data option: Nondecimal Data. (line 6) * --non-decimal-data option, strtonum function and: Nondecimal Data. (line 36) -* --optimize option: Options. (line 77) -* --posix option: Options. (line 183) -* --posix option, --traditional option and: Options. (line 213) -* --profile option <1>: Options. (line 219) +* --optimize option: Options. (line 181) +* --posix option: Options. (line 200) +* --posix option, --traditional option and: Options. (line 230) +* --profile option <1>: Options. (line 188) * --profile option: Profiling. (line 15) -* --re-interval option: Options. (line 231) -* --source option: Options. (line 238) -* --traditional option: Options. (line 86) -* --traditional option, --posix option and: Options. (line 213) -* --usage option: Options. (line 146) -* --use-lc-numeric option: Options. (line 246) -* --version option: Options. (line 251) +* --re-interval option: Options. (line 236) +* --sandbox option: Options. (line 243) +* --sandbox option, disabling system function: I/O Functions. (line 88) +* --sandbox option, input redirection with getline: Getline. (line 19) +* --sandbox option, output redirection with print, printf: Redirection. + (line 6) +* --source option: Options. (line 104) +* --traditional option: Options. (line 76) +* --traditional option, --posix option and: Options. (line 230) +* --usage option: Options. (line 139) +* --use-lc-numeric option: Options. (line 176) +* --version option: Options. (line 252) * --with-whiny-user-strftime configuration option: Additional Configuration Options. - (line 17) + (line 13) +* -b option: Options. (line 65) +* -C option: Options. (line 85) +* -d option: Options. (line 90) +* -E option: Options. (line 112) +* -e option: Options. (line 104) * -f option: Options. (line 25) * -F option <1>: Options. (line 21) * -F option: Command Line Field Separator. (line 6) * -f option: Long. (line 12) -* -F option, -Ft sets FS to TAB: Options. (line 259) -* -f option, on command line: Options. (line 264) +* -F option, -Ft sets FS to TAB: Options. (line 260) +* -f option, on command line: Options. (line 265) * -F option, troubleshooting: Known Bugs. (line 6) -* -mf/-mr options: Options. (line 45) -* -O option: Options. (line 77) +* -g option: Options. (line 131) +* -h option: Options. (line 139) +* -l option: Options. (line 144) +* -N option: Options. (line 176) +* -n option: Options. (line 168) +* -O option: Options. (line 181) +* -P option: Options. (line 200) +* -p option: Options. (line 188) +* -r option: Options. (line 236) +* -S option: Options. (line 243) +* -V option: Options. (line 252) * -v option: Options. (line 30) * -v option, variables, assigning: Assignment Options. (line 12) -* -W option: Options. (line 55) +* -W option: Options. (line 44) * . (period): Regexp Operators. (line 43) * .mo files: Explaining gettext. (line 39) * .mo files, converting from .po: I18N Example. (line 62) @@ -22279,6 +23070,8 @@ Index * /= operator vs. /=.../ regexp constant: Assignment Ops. (line 148) * /dev/... special files (gawk): Special FD. (line 41) * /inet/ files (gawk): TCP/IP Networking. (line 6) +* /inet4/ files (gawk): TCP/IP Networking. (line 6) +* /inet6/ files (gawk): TCP/IP Networking. (line 6) * /p files (gawk): Portal Files. (line 6) * ; (semicolon): Statements/Lines. (line 90) * ; (semicolon), AWKPATH variable and: PC Using. (line 11) @@ -22300,14 +23093,14 @@ Index * > (right angle bracket), > operator <1>: Precedence. (line 65) * > (right angle bracket), > operator: Comparison Operators. (line 11) -* > (right angle bracket), > operator (I/O): Redirection. (line 19) +* > (right angle bracket), > operator (I/O): Redirection. (line 22) * > (right angle bracket), >= operator <1>: Precedence. (line 65) * > (right angle bracket), >= operator: Comparison Operators. (line 11) * > (right angle bracket), >> operator (I/O) <1>: Precedence. (line 65) -* > (right angle bracket), >> operator (I/O): Redirection. (line 47) +* > (right angle bracket), >> operator (I/O): Redirection. (line 50) * ? (question mark) <1>: GNU Regexp Operators. - (line 51) + (line 59) * ? (question mark): Regexp Operators. (line 110) * ? (question mark), ?: operator: Precedence. (line 92) * [] (square brackets): Regexp Operators. (line 55) @@ -22317,31 +23110,35 @@ Index * \ (backslash): Read Terminal. (line 25) * \ (backslash), \" escape sequence: Escape Sequences. (line 76) * \ (backslash), \' operator (gawk): GNU Regexp Operators. - (line 48) + (line 56) * \ (backslash), \/ escape sequence: Escape Sequences. (line 69) * \ (backslash), \< operator (gawk): GNU Regexp Operators. - (line 22) + (line 30) * \ (backslash), \> operator (gawk): GNU Regexp Operators. - (line 26) + (line 34) * \ (backslash), \` operator (gawk): GNU Regexp Operators. - (line 46) + (line 54) * \ (backslash), \a escape sequence: Escape Sequences. (line 34) * \ (backslash), \b escape sequence: Escape Sequences. (line 38) * \ (backslash), \B operator (gawk): GNU Regexp Operators. - (line 35) + (line 43) * \ (backslash), \f escape sequence: Escape Sequences. (line 41) * \ (backslash), \n escape sequence: Escape Sequences. (line 44) * \ (backslash), \NNN escape sequence: Escape Sequences. (line 56) * \ (backslash), \r escape sequence: Escape Sequences. (line 47) +* \ (backslash), \S operator (gawk): GNU Regexp Operators. + (line 17) +* \ (backslash), \s operator (gawk): GNU Regexp Operators. + (line 13) * \ (backslash), \t escape sequence: Escape Sequences. (line 50) * \ (backslash), \v escape sequence: Escape Sequences. (line 53) * \ (backslash), \W operator (gawk): GNU Regexp Operators. - (line 18) + (line 26) * \ (backslash), \w operator (gawk): GNU Regexp Operators. - (line 13) + (line 21) * \ (backslash), \x escape sequence: Escape Sequences. (line 61) * \ (backslash), \y operator (gawk): GNU Regexp Operators. - (line 30) + (line 38) * \ (backslash), as field separators: Command Line Field Separator. (line 27) * \ (backslash), continuing lines and <1>: Egrep Program. (line 218) @@ -22358,11 +23155,11 @@ Index (line 113) * \ (backslash), regexp constants: Computed Regexps. (line 28) * ^ (caret) <1>: GNU Regexp Operators. - (line 51) + (line 59) * ^ (caret): Regexp Operators. (line 22) -* ^ (caret), ^ operator <1>: Options. (line 199) +* ^ (caret), ^ operator <1>: Options. (line 216) * ^ (caret), ^ operator: Precedence. (line 49) -* ^ (caret), ^= operator <1>: Options. (line 199) +* ^ (caret), ^= operator <1>: Options. (line 216) * ^ (caret), ^= operator <2>: Precedence. (line 95) * ^ (caret), ^= operator: Assignment Ops. (line 129) * ^ (caret), in character lists: Character Lists. (line 17) @@ -22384,14 +23181,14 @@ Index * adding, features to gawk: Adding Code. (line 6) * adding, fields: Changing Fields. (line 53) * adding, functions to gawk: Dynamic Extensions. (line 10) -* advanced features, buffering: I/O Functions. (line 96) +* advanced features, buffering: I/O Functions. (line 100) * advanced features, close function: Close Files And Pipes. (line 130) * advanced features, constants, values of: Nondecimal-numbers. (line 67) * advanced features, data files as single record: Records. (line 170) * advanced features, fixed-width data: Constant Size. (line 9) -* advanced features, FNR/NR variables: Auto-set. (line 187) +* advanced features, FNR/NR variables: Auto-set. (line 193) * advanced features, gawk: Advanced Features. (line 6) * advanced features, gawk, BSD portals: Portal Files. (line 6) * advanced features, gawk, network programming: TCP/IP Networking. @@ -22402,15 +23199,17 @@ Index (line 23) * advanced features, network connections, See Also networks, connections: Advanced Features. (line 6) -* advanced features, null strings, matching: Gory Details. (line 160) +* advanced features, null strings, matching: Gory Details. (line 159) * advanced features, operators, precedence: Increment Ops. (line 61) -* advanced features, piping into sh: Redirection. (line 140) +* advanced features, piping into sh: Redirection. (line 143) * advanced features, regexp constants: Assignment Ops. (line 148) +* advanced features, specifying field content: Splitting By Content. + (line 9) * Aho, Alfred <1>: Contributors. (line 12) * Aho, Alfred: History. (line 17) * alarm clock example program: Alarm Program. (line 9) * alarm.awk program: Alarm Program. (line 27) -* algorithms: Basic High Level. (line 66) +* algorithms: Basic High Level. (line 68) * Alpha (DEC): Manual History. (line 28) * amazing awk assembler (aaa): Glossary. (line 12) * amazingly workable formatter (awf): Glossary. (line 20) @@ -22441,7 +23240,7 @@ Index * arithmetic operators: Arithmetic Ops. (line 6) * arrays: Arrays. (line 6) * arrays, as parameters to functions: Function Caveats. (line 55) -* arrays, associative: Array Intro. (line 50) +* arrays, associative: Array Intro. (line 51) * arrays, associative, clearing: Internals. (line 66) * arrays, associative, library functions and: Library Names. (line 57) * arrays, deleting entire contents: Delete. (line 39) @@ -22453,16 +23252,16 @@ Index (line 6) * arrays, elements, retrieving number of: String Functions. (line 18) * arrays, for statement and: Scanning an Array. (line 20) -* arrays, IGNORECASE variable and: Array Intro. (line 92) -* arrays, indexing: Array Intro. (line 50) +* arrays, IGNORECASE variable and: Array Intro. (line 93) +* arrays, indexing: Array Intro. (line 51) * arrays, merging into strings: Join Function. (line 6) -* arrays, multidimensional: Multi-dimensional. (line 6) +* arrays, multidimensional: Multi-dimensional. (line 10) * arrays, multidimensional, scanning: Multi-scanning. (line 11) * arrays, names of: Arrays. (line 17) * arrays, scanning: Scanning an Array. (line 6) * arrays, sorting: Array Sorting. (line 6) * arrays, sorting, IGNORECASE variable and: Array Sorting. (line 86) -* arrays, sparse: Array Intro. (line 71) +* arrays, sparse: Array Intro. (line 72) * arrays, subscripts: Numeric Array Subscripts. (line 6) * arrays, subscripts, uninitialized variables as: Uninitialized Subscripts. @@ -22483,17 +23282,17 @@ Index * assignments as filenames: Ignoring Assigns. (line 6) * assoc_clear internal function: Internals. (line 66) * assoc_lookup internal function: Internals. (line 70) -* associative arrays: Array Intro. (line 50) +* associative arrays: Array Intro. (line 51) * asterisk (*), * operator, as multiplication operator: Precedence. (line 55) * asterisk (*), * operator, as regexp operator: Regexp Operators. (line 86) * asterisk (*), * operator, null strings, matching: Gory Details. - (line 160) -* asterisk (*), ** operator <1>: Options. (line 199) + (line 159) +* asterisk (*), ** operator <1>: Options. (line 216) * asterisk (*), ** operator <2>: Precedence. (line 49) * asterisk (*), ** operator: Arithmetic Ops. (line 81) -* asterisk (*), **= operator <1>: Options. (line 199) +* asterisk (*), **= operator <1>: Options. (line 216) * asterisk (*), **= operator <2>: Precedence. (line 95) * asterisk (*), **= operator: Assignment Ops. (line 129) * asterisk (*), *= operator <1>: Precedence. (line 95) @@ -22517,7 +23316,7 @@ Index * awk programs, location of: Options. (line 25) * awk programs, one-line examples: Very Simple. (line 45) * awk programs, profiling: Profiling. (line 6) -* awk programs, profiling, enabling: Options. (line 219) +* awk programs, profiling, enabling: Options. (line 188) * awk programs, running <1>: Long. (line 6) * awk programs, running: Running gawk. (line 6) * awk programs, running, from shell scripts: One-shot. (line 22) @@ -22528,7 +23327,7 @@ Index * awk, gawk and <1>: This Manual. (line 13) * awk, gawk and: Preface. (line 22) * awk, history of: History. (line 17) -* awk, implementation issues, pipes: Redirection. (line 132) +* awk, implementation issues, pipes: Redirection. (line 135) * awk, implementations: Other Versions. (line 6) * awk, implementations, limits: Getline Notes. (line 14) * awk, invoking: Command Line. (line 6) @@ -22557,38 +23356,42 @@ Index * AWKPATH environment variable: AWKPATH Variable. (line 6) * awkprof.out file: Profiling. (line 10) * awksed.awk program: Simple Sed. (line 25) -* awkvars.out file: Options. (line 102) +* awkvars.out file: Options. (line 90) * backslash (\) <1>: Regexp Operators. (line 18) * backslash (\) <2>: Quoting. (line 31) * backslash (\) <3>: Comments. (line 50) * backslash (\): Read Terminal. (line 25) * backslash (\), \" escape sequence: Escape Sequences. (line 76) * backslash (\), \' operator (gawk): GNU Regexp Operators. - (line 48) + (line 56) * backslash (\), \/ escape sequence: Escape Sequences. (line 69) * backslash (\), \< operator (gawk): GNU Regexp Operators. - (line 22) + (line 30) * backslash (\), \> operator (gawk): GNU Regexp Operators. - (line 26) + (line 34) * backslash (\), \` operator (gawk): GNU Regexp Operators. - (line 46) + (line 54) * backslash (\), \a escape sequence: Escape Sequences. (line 34) * backslash (\), \b escape sequence: Escape Sequences. (line 38) * backslash (\), \B operator (gawk): GNU Regexp Operators. - (line 35) + (line 43) * backslash (\), \f escape sequence: Escape Sequences. (line 41) * backslash (\), \n escape sequence: Escape Sequences. (line 44) * backslash (\), \NNN escape sequence: Escape Sequences. (line 56) * backslash (\), \r escape sequence: Escape Sequences. (line 47) +* backslash (\), \S operator (gawk): GNU Regexp Operators. + (line 17) +* backslash (\), \s operator (gawk): GNU Regexp Operators. + (line 13) * backslash (\), \t escape sequence: Escape Sequences. (line 50) * backslash (\), \v escape sequence: Escape Sequences. (line 53) * backslash (\), \W operator (gawk): GNU Regexp Operators. - (line 18) + (line 26) * backslash (\), \w operator (gawk): GNU Regexp Operators. - (line 13) + (line 21) * backslash (\), \x escape sequence: Escape Sequences. (line 61) * backslash (\), \y operator (gawk): GNU Regexp Operators. - (line 30) + (line 38) * backslash (\), as field separators: Command Line Field Separator. (line 27) * backslash (\), continuing lines and <1>: Egrep Program. (line 218) @@ -22625,9 +23428,10 @@ Index * BEGIN pattern, operators and: Using BEGIN/END. (line 17) * BEGIN pattern, pgawk program: Profiling. (line 69) * BEGIN pattern, print statement and: I/O And BEGIN/END. (line 16) -* BEGIN pattern, pwcat program: Passwd Functions. (line 125) +* BEGIN pattern, pwcat program: Passwd Functions. (line 128) * BEGIN pattern, running awk programs and: Cut Program. (line 66) * BEGIN pattern, TEXTDOMAIN variable and: Programmer i18n. (line 58) +* BEGINFILE special pattern: BEGINFILE/ENDFILE. (line 6) * beginfile user-defined function: Filetrans Function. (line 60) * Bell Laboratories awk extensions: BTL. (line 6) * Benzinger, Michael: Contributors. (line 89) @@ -22663,14 +23467,16 @@ Index * Broder, Alan J.: Contributors. (line 80) * Brown, Martin: Contributors. (line 75) * BSD portals: Portal Files. (line 6) -* BSD-based operating systems: Glossary. (line 581) -* Buening, Andreas: Contributors. (line 84) +* BSD-based operating systems: Glossary. (line 582) +* Buening, Andreas <1>: Bugs. (line 70) +* Buening, Andreas <2>: Contributors. (line 84) +* Buening, Andreas: Acknowledgments. (line 53) * buffering, input/output <1>: Two-way I/O. (line 71) -* buffering, input/output: I/O Functions. (line 128) -* buffering, interactive vs. noninteractive: I/O Functions. (line 96) +* buffering, input/output: I/O Functions. (line 132) +* buffering, interactive vs. noninteractive: I/O Functions. (line 100) * buffers, flushing: I/O Functions. (line 29) * buffers, operators for: GNU Regexp Operators. - (line 40) + (line 48) * bug reports, email address, bug-gawk@gnu.org: Bugs. (line 27) * bug-gawk@gnu.org bug reporting address: Bugs. (line 27) * built-in functions: Functions. (line 6) @@ -22682,23 +23488,23 @@ Index * call by reference: Function Caveats. (line 55) * call by value: Function Caveats. (line 26) * caret (^) <1>: GNU Regexp Operators. - (line 51) + (line 59) * caret (^): Regexp Operators. (line 22) -* caret (^), ^ operator <1>: Options. (line 199) +* caret (^), ^ operator <1>: Options. (line 216) * caret (^), ^ operator: Precedence. (line 49) -* caret (^), ^= operator <1>: Options. (line 199) +* caret (^), ^= operator <1>: Options. (line 216) * caret (^), ^= operator <2>: Precedence. (line 95) * caret (^), ^= operator: Assignment Ops. (line 129) * caret (^), in character lists: Character Lists. (line 17) * case keyword: Switch Statement. (line 6) -* case sensitivity, array indices and: Array Intro. (line 92) -* case sensitivity, converting case: String Functions. (line 467) +* case sensitivity, array indices and: Array Intro. (line 93) +* case sensitivity, converting case: String Functions. (line 492) * case sensitivity, example programs: Library Functions. (line 43) * case sensitivity, gawk: Case-sensitivity. (line 26) -* case sensitivity, regexps and <1>: User-modified. (line 70) +* case sensitivity, regexps and <1>: User-modified. (line 82) * case sensitivity, regexps and: Case-sensitivity. (line 6) -* case sensitivity, string comparisons and: User-modified. (line 70) -* CGI, awk scripts for: Options. (line 118) +* case sensitivity, string comparisons and: User-modified. (line 82) +* CGI, awk scripts for: Options. (line 112) * character encodings: Ordinal Functions. (line 44) * character lists <1>: Character Lists. (line 6) * character lists: Regexp Operators. (line 55) @@ -22744,6 +23550,8 @@ Index * command line, arguments <1>: Other Arguments. (line 6) * command line, arguments <2>: ARGC and ARGV. (line 6) * command line, arguments: Auto-set. (line 11) +* command line, directories on: Command line directories. + (line 6) * command line, formats: Running gawk. (line 12) * command line, FS on, setting: Command Line Field Separator. (line 6) @@ -22752,7 +23560,7 @@ Index * command line, options <2>: Command Line Field Separator. (line 6) * command line, options: Long. (line 12) -* command line, options, end of: Options. (line 62) +* command line, options, end of: Options. (line 51) * command line, variables, assigning on: Assignment Options. (line 6) * command-line options, processing: Getopt Function. (line 6) * command-line options, string extraction: String Extraction. (line 6) @@ -22770,33 +23578,29 @@ Index (line 60) * compatibility mode (gawk), octal numbers: Nondecimal-numbers. (line 60) -* compatibility mode (gawk), specifying: Options. (line 86) +* compatibility mode (gawk), specifying: Options. (line 76) * compiled programs <1>: Glossary. (line 155) -* compiled programs: Basic High Level. (line 14) +* compiled programs: Basic High Level. (line 15) * compl function (gawk): Bitwise Functions. (line 43) * complement, bitwise: Bitwise Functions. (line 25) * compound statements, control statements and: Statements. (line 10) * concatenating: Concatenation. (line 9) * conditional expressions: Conditional Exp. (line 6) -* configuration option, --disable-directories-fatal: Additional Configuration Options. - (line 41) * configuration option, --disable-lint: Additional Configuration Options. - (line 21) + (line 17) * configuration option, --disable-nls: Additional Configuration Options. - (line 36) + (line 32) * configuration option, --enable-portals: Additional Configuration Options. (line 9) -* configuration option, --enable-switch: Additional Configuration Options. - (line 13) * configuration option, --with-whiny-user-strftime: Additional Configuration Options. - (line 17) + (line 13) * configuration options, gawk: Additional Configuration Options. (line 6) * constants, nondecimal: Nondecimal Data. (line 6) * constants, types of: Constants. (line 6) * continue statement: Continue Statement. (line 6) * control statements: Statements. (line 6) -* converting, case: String Functions. (line 467) +* converting, case: String Functions. (line 492) * converting, dates to timestamps: Time Functions. (line 72) * converting, during subscripting: Numeric Array Subscripts. (line 31) @@ -22808,7 +23612,7 @@ Index * CONVFMT variable, array subscripts and: Numeric Array Subscripts. (line 6) * coprocesses <1>: Two-way I/O. (line 44) -* coprocesses: Redirection. (line 99) +* coprocesses: Redirection. (line 102) * coprocesses, closing: Close Files And Pipes. (line 6) * coprocesses, getline from: Getline/Coprocess. (line 6) @@ -22816,7 +23620,7 @@ Index * counting: Wc Program. (line 6) * csh utility: Statements/Lines. (line 44) * csh utility, backslash continuation and: More Complex. (line 15) -* csh utility, POSIXLY_CORRECT environment variable: Options. (line 302) +* csh utility, POSIXLY_CORRECT environment variable: Options. (line 303) * csh utility, |& operator, comparison with: Two-way I/O. (line 44) * ctime user-defined function: Function Example. (line 72) * currency symbols, localization: Explaining gettext. (line 99) @@ -22845,9 +23649,9 @@ Index * dark corner, exit statement: Exit Statement. (line 29) * dark corner, field separators: Field Splitting Summary. (line 47) -* dark corner, FILENAME variable <1>: Auto-set. (line 88) +* dark corner, FILENAME variable <1>: Auto-set. (line 93) * dark corner, FILENAME variable: Getline Notes. (line 19) -* dark corner, FNR/NR variables: Auto-set. (line 187) +* dark corner, FNR/NR variables: Auto-set. (line 193) * dark corner, format-control characters: Control Letters. (line 18) * dark corner, FS as null string: Single Character Fields. (line 20) @@ -22863,10 +23667,10 @@ Index (line 148) * dark corner, regexp constants, as arguments to user-defined functions: Using Constant Regexps. (line 44) -* dark corner, split function: String Functions. (line 234) +* dark corner, split function: String Functions. (line 259) * dark corner, strings, storing: Records. (line 186) * data, fixed-width: Constant Size. (line 9) -* data-driven languages: Basic High Level. (line 83) +* data-driven languages: Basic High Level. (line 85) * database, group, reading: Group Functions. (line 6) * database, users, reading: Passwd Functions. (line 6) * date utility, GNU: Time Functions. (line 17) @@ -22874,7 +23678,6 @@ Index * dates, converting to timestamps: Time Functions. (line 72) * dates, information related to, localization: Explaining gettext. (line 111) -* Davies, Stephen <1>: Bugs. (line 70) * Davies, Stephen: Contributors. (line 69) * dcgettext function (gawk) <1>: Programmer i18n. (line 19) * dcgettext function (gawk): I18N Functions. (line 12) @@ -22887,7 +23690,7 @@ Index * deadlocks: Two-way I/O. (line 71) * debugging gawk: Known Bugs. (line 6) * debugging gawk, bug reports: Bugs. (line 9) -* decimal point character, locale specific: Options. (line 207) +* decimal point character, locale specific: Options. (line 224) * decrement operators: Increment Ops. (line 35) * default keyword: Switch Statement. (line 6) * Deifik, Scott <1>: Bugs. (line 69) @@ -22898,7 +23701,7 @@ Index * deleting entire arrays: Delete. (line 39) * differences between gawk and awk: String Functions. (line 102) * differences in awk and gawk, ARGC/ARGV variables: ARGC and ARGV. - (line 85) + (line 86) * differences in awk and gawk, ARGIND variable: Auto-set. (line 40) * differences in awk and gawk, array elements, deleting: Delete. (line 39) @@ -22916,49 +23719,54 @@ Index * differences in awk and gawk, error messages: Special FD. (line 15) * differences in awk and gawk, FIELDWIDTHS variable: User-modified. (line 35) +* differences in awk and gawk, FPAT variable: User-modified. (line 45) * differences in awk and gawk, function arguments (gawk): Calling Built-in. (line 16) * differences in awk and gawk, getline command: Getline. (line 19) * differences in awk and gawk, IGNORECASE variable: User-modified. - (line 70) + (line 82) * differences in awk and gawk, implementation limitations <1>: Redirection. - (line 132) + (line 135) * differences in awk and gawk, implementation limitations: Getline Notes. (line 14) +* differences in awk and gawk, indirect function calls: Indirect Calls. + (line 6) * differences in awk and gawk, input/output operators <1>: Redirection. - (line 99) + (line 102) * differences in awk and gawk, input/output operators: Getline/Coprocess. (line 6) * differences in awk and gawk, line continuations: Conditional Exp. (line 34) -* differences in awk and gawk, LINT variable: User-modified. (line 85) +* differences in awk and gawk, LINT variable: User-modified. (line 97) * differences in awk and gawk, match function: String Functions. (line 165) * differences in awk and gawk, next/nextfile statements: Nextfile Statement. (line 6) * differences in awk and gawk, print/printf statements: Format Modifiers. (line 13) -* differences in awk and gawk, PROCINFO array: Auto-set. (line 119) +* differences in awk and gawk, PROCINFO array: Auto-set. (line 124) * differences in awk and gawk, record separators: Records. (line 112) * differences in awk and gawk, regexp constants: Using Constant Regexps. (line 44) * differences in awk and gawk, regular expressions: Case-sensitivity. (line 26) * differences in awk and gawk, RS/RT variables: Records. (line 162) -* differences in awk and gawk, RT variable: Auto-set. (line 176) +* differences in awk and gawk, RT variable: Auto-set. (line 182) * differences in awk and gawk, single-character fields: Single Character Fields. (line 6) * differences in awk and gawk, split function: String Functions. - (line 223) + (line 248) * differences in awk and gawk, strings: Scalar Constants. (line 20) * differences in awk and gawk, strings, storing: Records. (line 182) * differences in awk and gawk, strtonum function (gawk): String Functions. - (line 261) + (line 286) * differences in awk and gawk, TEXTDOMAIN variable: User-modified. - (line 140) + (line 152) * differences in awk and gawk, trunc-mod operation: Arithmetic Ops. (line 66) * directories, changing: Sample Library. (line 6) +* directories, command line: Command line directories. + (line 6) * directories, searching <1>: Igawk Program. (line 358) * directories, searching: AWKPATH Variable. (line 6) * division: Arithmetic Ops. (line 44) @@ -23008,11 +23816,12 @@ Index * END pattern, operators and: Using BEGIN/END. (line 17) * END pattern, pgawk program: Profiling. (line 69) * END pattern, print statement and: I/O And BEGIN/END. (line 16) +* ENDFILE special pattern: BEGINFILE/ENDFILE. (line 6) * endfile user-defined function: Filetrans Function. (line 60) -* endgrent function (C library): Group Functions. (line 213) -* endgrent user-defined function: Group Functions. (line 216) -* endpwent function (C library): Passwd Functions. (line 192) -* endpwent user-defined function: Passwd Functions. (line 195) +* endgrent function (C library): Group Functions. (line 215) +* endgrent user-defined function: Group Functions. (line 218) +* endpwent function (C library): Passwd Functions. (line 199) +* endpwent user-defined function: Passwd Functions. (line 202) * ENVIRON variable <1>: Internals. (line 165) * ENVIRON variable: Auto-set. (line 60) * environment variables: Auto-set. (line 60) @@ -23030,7 +23839,7 @@ Index * error output: Special FD. (line 6) * escape processing, gsub/gensub/sub functions: Gory Details. (line 6) * escape sequences: Escape Sequences. (line 6) -* escape sequences, unrecognized: Options. (line 187) +* escape sequences, unrecognized: Options. (line 204) * evaluation order: Increment Ops. (line 61) * evaluation order, concatenation: Concatenation. (line 42) * evaluation order, functions: Calling Built-in. (line 30) @@ -23083,14 +23892,15 @@ Index * Fenlason, Jay <1>: Contributors. (line 19) * Fenlason, Jay: History. (line 30) * fflush function: I/O Functions. (line 25) -* fflush function, unsupported: Options. (line 210) +* fflush function, unsupported: Options. (line 227) * field numbers: Nonconstant Fields. (line 6) * field operator $: Fields. (line 19) * field operators, dollar sign as: Fields. (line 19) -* field separators <1>: User-modified. (line 45) +* field separators <1>: User-modified. (line 56) * field separators: Field Separators. (line 13) * field separators, choice of: Field Separators. (line 49) * field separators, FIELDWIDTHS variable and: User-modified. (line 35) +* field separators, FPAT variable and: User-modified. (line 45) * field separators, in multiline records: Multiple Line. (line 41) * field separators, on command line: Command Line Field Separator. (line 6) @@ -23102,7 +23912,7 @@ Index * field separators, regular expressions as: Field Separators. (line 49) * field separators, See Also OFS: Changing Fields. (line 64) * field separators, spaces as: Cut Program. (line 106) -* fields <1>: Basic High Level. (line 71) +* fields <1>: Basic High Level. (line 73) * fields <2>: Fields. (line 6) * fields: Reading Files. (line 14) * fields, adding: Changing Fields. (line 53) @@ -23121,7 +23931,7 @@ Index * file names, distinguishing: Auto-set. (line 52) * file names, in compatibility mode: Special Caveats. (line 9) * file names, standard streams in gawk: Special FD. (line 41) -* FILENAME variable <1>: Auto-set. (line 88) +* FILENAME variable <1>: Auto-set. (line 93) * FILENAME variable: Reading Files. (line 6) * FILENAME variable, getline, setting with: Getline Notes. (line 19) * filenames, assignments as: Ignoring Assigns. (line 6) @@ -23134,14 +23944,15 @@ Index * files, .po, converting to .mo: I18N Example. (line 62) * files, /dev/... special files: Special FD. (line 41) * files, /inet/ (gawk): TCP/IP Networking. (line 6) +* files, /inet4/ (gawk): TCP/IP Networking. (line 6) +* files, /inet6/ (gawk): TCP/IP Networking. (line 6) * files, /p (gawk): Portal Files. (line 6) * files, as single records: Records. (line 191) * files, awk programs in: Long. (line 6) * files, awkprof.out: Profiling. (line 10) -* files, awkvars.out: Options. (line 102) +* files, awkvars.out: Options. (line 90) * files, closing: I/O Functions. (line 10) * files, descriptors, See file descriptors: Special FD. (line 6) -* files, for process information: Special Process. (line 6) * files, group: Group Functions. (line 6) * files, information about, retrieving: Sample Library. (line 6) * files, initialization and cleanup: Filetrans Function. (line 6) @@ -23166,7 +23977,7 @@ Index * files, portable object: Explaining gettext. (line 36) * files, portable object, converting to message object files: I18N Example. (line 62) -* files, portable object, generating: Options. (line 137) +* files, portable object, generating: Options. (line 131) * files, portal: Portal Files. (line 6) * files, processing, ARGIND variable and: Auto-set. (line 47) * files, reading: Rewind Function. (line 6) @@ -23183,9 +23994,9 @@ Index * floating-point: Unexpected Results. (line 6) * floating-point, numbers: Basic Data Typing. (line 21) * floating-point, numbers, AWKNUM internal type: Internals. (line 19) -* FNR variable <1>: Auto-set. (line 98) +* FNR variable <1>: Auto-set. (line 103) * FNR variable: Records. (line 6) -* FNR variable, changing: Auto-set. (line 187) +* FNR variable, changing: Auto-set. (line 193) * for statement: For Statement. (line 6) * for statement, in arrays: Scanning an Array. (line 20) * force_number internal function: Internals. (line 27) @@ -23205,18 +24016,19 @@ Index * forward slash (/), /= operator, vs. /=.../ regexp constant: Assignment Ops. (line 148) * forward slash (/), patterns and: Expression Patterns. (line 24) +* FPAT variable: User-modified. (line 45) * Free Documentation License (FDL): GNU Free Documentation License. (line 6) -* Free Software Foundation (FSF) <1>: Glossary. (line 283) +* Free Software Foundation (FSF) <1>: Glossary. (line 286) * Free Software Foundation (FSF): Manual History. (line 6) * free_temp internal macro: Internals. (line 102) -* FreeBSD: Glossary. (line 581) -* FS variable <1>: User-modified. (line 45) +* FreeBSD: Glossary. (line 582) +* FS variable <1>: User-modified. (line 56) * FS variable: Field Separators. (line 13) * FS variable, --field-separator option and: Options. (line 21) * FS variable, as null string: Single Character Fields. (line 20) -* FS variable, as TAB character: Options. (line 203) +* FS variable, as TAB character: Options. (line 220) * FS variable, changing value of <1>: Known Bugs. (line 6) * FS variable, changing value of: Field Separators. (line 33) * FS variable, running awk programs and: Cut Program. (line 66) @@ -23224,9 +24036,11 @@ Index (line 6) * FS, containing ^: Regexp Field Splitting. (line 59) -* FSF (Free Software Foundation) <1>: Glossary. (line 283) +* FSF (Free Software Foundation) <1>: Glossary. (line 286) * FSF (Free Software Foundation): Manual History. (line 6) * function calls: Function Calls. (line 6) +* function calls, indirect: Indirect Calls. (line 6) +* function pointers: Indirect Calls. (line 6) * functions, arrays as parameters to: Function Caveats. (line 55) * functions, built-in <1>: Functions. (line 6) * functions, built-in: Function Calls. (line 10) @@ -23256,9 +24070,9 @@ Index * functions, library, rounding numbers: Round Function. (line 6) * functions, library, user database, reading: Passwd Functions. (line 6) -* functions, names of <1>: Definition Syntax. (line 20) +* functions, names of <1>: Definition Syntax. (line 21) * functions, names of: Arrays. (line 17) -* functions, recursive: Definition Syntax. (line 72) +* functions, recursive: Definition Syntax. (line 73) * functions, return values, setting: Internals. (line 146) * functions, string-translation: I18N Functions. (line 6) * functions, undefined: Function Caveats. (line 79) @@ -23270,7 +24084,7 @@ Index (line 39) * functions, user-defined, next/nextfile statements and: Next Statement. (line 39) -* G-d: Acknowledgments. (line 71) +* G-d: Acknowledgments. (line 72) * Garfinkle, Scott: Contributors. (line 37) * gawk, awk and <1>: This Manual. (line 13) * gawk, awk and: Preface. (line 22) @@ -23280,7 +24094,7 @@ Index * gawk, character classes and: Character Lists. (line 92) * gawk, coding style in: Adding Code. (line 32) * gawk, command-line options: GNU Regexp Operators. - (line 62) + (line 70) * gawk, comparison operators and: Comparison Operators. (line 50) * gawk, configuring: Configuration Philosophy. @@ -23292,34 +24106,35 @@ Index * gawk, distribution: Distribution contents. (line 6) * gawk, escape sequences: Escape Sequences. (line 125) -* gawk, extensions, disabling: Options. (line 183) +* gawk, extensions, disabling: Options. (line 200) * gawk, features, adding: Adding Code. (line 6) * gawk, features, advanced: Advanced Features. (line 6) * gawk, fflush function in: I/O Functions. (line 45) -* gawk, field separators and: User-modified. (line 65) +* gawk, field separators and: User-modified. (line 77) * gawk, FIELDWIDTHS variable in: User-modified. (line 41) * gawk, file names in: Special Files. (line 6) * gawk, format-control characters: Control Letters. (line 18) +* gawk, FPAT variable in: User-modified. (line 52) * gawk, function arguments and: Calling Built-in. (line 16) * gawk, functions, adding: Dynamic Extensions. (line 10) * gawk, hexadecimal numbers and: Nondecimal-numbers. (line 42) -* gawk, IGNORECASE variable in: User-modified. (line 81) +* gawk, IGNORECASE variable in: User-modified. (line 93) * gawk, implementation issues: Notes. (line 6) * gawk, implementation issues, debugging: Compatibility Mode. (line 6) * gawk, implementation issues, downward compatibility: Compatibility Mode. (line 6) * gawk, implementation issues, limits: Getline Notes. (line 14) -* gawk, implementation issues, pipes: Redirection. (line 132) +* gawk, implementation issues, pipes: Redirection. (line 135) * gawk, installing: Installation. (line 6) * gawk, internals: Internals. (line 6) * gawk, internationalization and, See internationalization: Internationalization. (line 13) -* gawk, interpreter, adding code to <1>: Future Extensions. (line 87) +* gawk, interpreter, adding code to <1>: Future Extensions. (line 90) * gawk, interpreter, adding code to: Using Internal File Ops. (line 6) * gawk, interval expressions and: Regexp Operators. (line 138) * gawk, line continuation in: Conditional Exp. (line 34) -* gawk, LINT variable in: User-modified. (line 94) +* gawk, LINT variable in: User-modified. (line 106) * gawk, list of contributors to: Contributors. (line 6) * gawk, MS-DOS version of: PC Using. (line 11) * gawk, newlines in: Statements/Lines. (line 12) @@ -23334,19 +24149,19 @@ Index (line 26) * gawk, regular expressions, operators: GNU Regexp Operators. (line 6) -* gawk, regular expressions, precedence: Regexp Operators. (line 154) +* gawk, regular expressions, precedence: Regexp Operators. (line 156) * gawk, See Also awk: Preface. (line 35) * gawk, source code, obtaining: Getting. (line 6) * gawk, splitting fields and: Constant Size. (line 87) * gawk, string-translation functions: I18N Functions. (line 6) * gawk, timestamps: Time Functions. (line 6) * gawk, uses for: Preface. (line 35) -* gawk, versions of, information about, printing: Options. (line 251) +* gawk, versions of, information about, printing: Options. (line 252) * gawk, word-boundary operator: GNU Regexp Operators. - (line 55) -* General Public License (GPL): Glossary. (line 292) + (line 63) +* General Public License (GPL): Glossary. (line 295) * General Public License, See GPL: Manual History. (line 11) -* gensub function (gawk) <1>: String Functions. (line 375) +* gensub function (gawk) <1>: String Functions. (line 400) * gensub function (gawk): Using Constant Regexps. (line 44) * gensub function (gawk), escape processing: Gory Details. (line 6) @@ -23358,16 +24173,16 @@ Index * get_scalar_argument internal macro: Internals. (line 136) * getgrent function (C library): Group Functions. (line 6) * getgrent user-defined function: Group Functions. (line 6) -* getgrgid function (C library): Group Functions. (line 180) -* getgrgid user-defined function: Group Functions. (line 183) -* getgrnam function (C library): Group Functions. (line 168) -* getgrnam user-defined function: Group Functions. (line 172) -* getgruser function (C library): Group Functions. (line 191) -* getgruser function, user-defined: Group Functions. (line 194) +* getgrgid function (C library): Group Functions. (line 182) +* getgrgid user-defined function: Group Functions. (line 185) +* getgrnam function (C library): Group Functions. (line 170) +* getgrnam user-defined function: Group Functions. (line 174) +* getgruser function (C library): Group Functions. (line 193) +* getgruser function, user-defined: Group Functions. (line 196) * getline command: Reading Files. (line 20) * getline command, _gr_init user-defined function: Group Functions. (line 80) -* getline command, _pw_init function: Passwd Functions. (line 136) +* getline command, _pw_init function: Passwd Functions. (line 139) * getline command, coprocesses, using from <1>: Close Files And Pipes. (line 6) * getline command, coprocesses, using from: Getline/Coprocess. @@ -23381,11 +24196,11 @@ Index * getopt user-defined function: Getopt Function. (line 106) * getpwent function (C library): Passwd Functions. (line 16) * getpwent user-defined function: Passwd Functions. (line 16) -* getpwnam function (C library): Passwd Functions. (line 156) -* getpwnam user-defined function: Passwd Functions. (line 160) -* getpwuid function (C library): Passwd Functions. (line 168) -* getpwuid user-defined function: Passwd Functions. (line 172) -* getservbyname function (C library): TCP/IP Networking. (line 34) +* getpwnam function (C library): Passwd Functions. (line 163) +* getpwnam user-defined function: Passwd Functions. (line 167) +* getpwuid function (C library): Passwd Functions. (line 175) +* getpwuid user-defined function: Passwd Functions. (line 179) +* getservbyname function (C library): TCP/IP Networking. (line 39) * gettext function (C library): Explaining gettext. (line 60) * gettext library: Explaining gettext. (line 6) * gettext library, locale categories: Explaining gettext. (line 78) @@ -23395,29 +24210,29 @@ Index * GNU awk, See gawk: Preface. (line 48) * GNU Free Documentation License: GNU Free Documentation License. (line 6) -* GNU General Public License: Glossary. (line 292) -* GNU Lesser General Public License: Glossary. (line 372) +* GNU General Public License: Glossary. (line 295) +* GNU Lesser General Public License: Glossary. (line 373) * GNU long options <1>: Options. (line 6) * GNU long options: Command Line. (line 13) -* GNU long options, printing list of: Options. (line 146) -* GNU Project <1>: Glossary. (line 301) +* GNU long options, printing list of: Options. (line 139) +* GNU Project <1>: Glossary. (line 304) * GNU Project: Manual History. (line 11) -* GNU/Linux <1>: Glossary. (line 581) +* GNU/Linux <1>: Glossary. (line 582) * GNU/Linux <2>: Atari Compiling. (line 16) * GNU/Linux <3>: I18N Example. (line 55) * GNU/Linux: Manual History. (line 28) -* GPL (General Public License) <1>: Glossary. (line 292) +* GPL (General Public License) <1>: Glossary. (line 295) * GPL (General Public License): Manual History. (line 11) -* GPL (General Public License), printing: Options. (line 94) +* GPL (General Public License), printing: Options. (line 85) * grcat program: Group Functions. (line 15) * Grigera, Juan: Contributors. (line 55) * group database, reading: Group Functions. (line 6) * group file: Group Functions. (line 6) * groups, information about: Group Functions. (line 6) -* gsub function <1>: String Functions. (line 359) +* gsub function <1>: String Functions. (line 384) * gsub function: Using Constant Regexps. (line 44) -* gsub function, arguments of: String Functions. (line 339) +* gsub function, arguments of: String Functions. (line 364) * gsub function, escape processing: Gory Details. (line 6) * Hankerson, Darrel <1>: Contributors. (line 57) * Hankerson, Darrel: Acknowledgments. (line 53) @@ -23425,7 +24240,7 @@ Index * Hartholz, Marshall: Acknowledgments. (line 35) * Hasegawa, Isamu: Contributors. (line 86) * hexadecimal numbers: Nondecimal-numbers. (line 6) -* hexadecimal values, enabling interpretation of: Options. (line 175) +* hexadecimal values, enabling interpretation of: Options. (line 168) * histsort.awk program: History Sorting. (line 25) * Hughes, Phil: Acknowledgments. (line 40) * HUP signal: Profiling. (line 207) @@ -23434,7 +24249,7 @@ Index * hyphen (-), -- operator: Increment Ops. (line 48) * hyphen (-), -= operator <1>: Precedence. (line 95) * hyphen (-), -= operator: Assignment Ops. (line 129) -* hyphen (-), filenames beginning with: Options. (line 67) +* hyphen (-), filenames beginning with: Options. (line 56) * hyphen (-), in character lists: Character Lists. (line 17) * id utility: Id Program. (line 6) * id.awk program: Id Program. (line 30) @@ -23442,15 +24257,15 @@ Index * if statement: Regexp Usage. (line 19) * if statement, actions, changing: Ranges. (line 25) * igawk.sh program: Igawk Program. (line 118) -* IGNORECASE variable <1>: User-modified. (line 70) +* IGNORECASE variable <1>: User-modified. (line 82) * IGNORECASE variable: Case-sensitivity. (line 26) * IGNORECASE variable, array sorting and: Array Sorting. (line 86) -* IGNORECASE variable, array subscripts and: Array Intro. (line 92) +* IGNORECASE variable, array subscripts and: Array Intro. (line 93) * IGNORECASE variable, in example programs: Library Functions. (line 43) * implementation issues, gawk: Notes. (line 6) * implementation issues, gawk, debugging: Compatibility Mode. (line 6) -* implementation issues, gawk, limits <1>: Redirection. (line 132) +* implementation issues, gawk, limits <1>: Redirection. (line 135) * implementation issues, gawk, limits: Getline Notes. (line 14) * in operator <1>: Id Program. (line 93) * in operator <2>: For Statement. (line 74) @@ -23462,7 +24277,8 @@ Index (line 25) * increment operators: Increment Ops. (line 6) * index function: String Functions. (line 60) -* indexing arrays: Array Intro. (line 50) +* indexing arrays: Array Intro. (line 51) +* indirect function calls: Indirect Calls. (line 6) * initialization, automatic: More Complex. (line 38) * input files: Reading Files. (line 6) * input files, closing: Close Files And Pipes. @@ -23500,7 +24316,7 @@ Index * internationalization: I18N Functions. (line 6) * internationalization, localization <1>: Internationalization. (line 13) -* internationalization, localization: User-modified. (line 140) +* internationalization, localization: User-modified. (line 152) * internationalization, localization, character classes: Character Lists. (line 92) * internationalization, localization, gawk and: Internationalization. @@ -23512,13 +24328,13 @@ Index * internationalization, localization, portability and: I18N Portability. (line 6) * internationalizing a program: Explaining gettext. (line 6) -* interpreted programs <1>: Glossary. (line 341) -* interpreted programs: Basic High Level. (line 14) +* interpreted programs <1>: Glossary. (line 344) +* interpreted programs: Basic High Level. (line 15) * interval expressions: Regexp Operators. (line 115) * inventory-shipped file: Sample Data Files. (line 32) * IOBUF internal structure: Internals. (line 178) * iop_alloc internal function: Internals. (line 178) -* ISO: Glossary. (line 352) +* ISO: Glossary. (line 355) * ISO 8859-1: Glossary. (line 137) * ISO Latin-1: Glossary. (line 137) * Jacobs, Andrew: Passwd Functions. (line 76) @@ -23537,14 +24353,14 @@ Index * Kernighan, Brian <3>: Contributors. (line 12) * Kernighan, Brian <4>: BTL. (line 6) * Kernighan, Brian <5>: Concatenation. (line 6) -* Kernighan, Brian <6>: Acknowledgments. (line 61) +* Kernighan, Brian <6>: Acknowledgments. (line 62) * Kernighan, Brian <7>: Conventions. (line 33) * Kernighan, Brian: History. (line 17) * kill command, dynamic profiling: Profiling. (line 185) * Knights, jedi: Undocumented. (line 6) * Kwok, Conrad: Contributors. (line 37) * labels.awk program: Labels Program. (line 48) -* languages, data-driven: Basic High Level. (line 83) +* languages, data-driven: Basic High Level. (line 85) * LC_ALL locale category: Explaining gettext. (line 116) * LC_COLLATE locale category: Explaining gettext. (line 89) * LC_CTYPE locale category: Explaining gettext. (line 93) @@ -23565,8 +24381,8 @@ Index * left shift, bitwise: Bitwise Functions. (line 32) * leftmost longest match: Multiple Line. (line 26) * length function: String Functions. (line 71) -* Lesser General Public License (LGPL): Glossary. (line 372) -* LGPL (Lesser General Public License): Glossary. (line 372) +* Lesser General Public License (LGPL): Glossary. (line 373) +* LGPL (Lesser General Public License): Glossary. (line 373) * libraries of awk functions: Library Functions. (line 6) * libraries of awk functions, assertions: Assert Function. (line 6) * libraries of awk functions, associative arrays and: Library Names. @@ -23601,22 +24417,22 @@ Index * lines, duplicate, removing: History Sorting. (line 6) * lines, matching ranges of: Ranges. (line 6) * lines, skipping between markers: Ranges. (line 43) -* lint checking: User-modified. (line 85) +* lint checking: User-modified. (line 97) * lint checking, array elements: Delete. (line 34) * lint checking, array subscripts: Uninitialized Subscripts. (line 42) * lint checking, empty programs: Command Line. (line 16) -* lint checking, issuing warnings: Options. (line 151) +* lint checking, issuing warnings: Options. (line 144) * lint checking, POSIXLY_CORRECT environment variable: Options. - (line 289) + (line 290) * lint checking, undefined functions: Function Caveats. (line 96) -* LINT variable: User-modified. (line 85) -* Linux <1>: Glossary. (line 581) +* LINT variable: User-modified. (line 97) +* Linux <1>: Glossary. (line 582) * Linux <2>: Atari Compiling. (line 16) * Linux <3>: I18N Example. (line 55) * Linux: Manual History. (line 28) * locale categories: Explaining gettext. (line 78) -* locale decimal point character: Options. (line 207) +* locale decimal point character: Options. (line 224) * locale, definition of: Locales. (line 6) * localization: I18N and L10N. (line 6) * localization, See internationalization, localization: I18N and L10N. @@ -23651,11 +24467,10 @@ Index * matching, expressions, See comparison expressions: Typing and Comparison. (line 9) * matching, leftmost longest: Multiple Line. (line 26) -* matching, null strings: Gory Details. (line 160) +* matching, null strings: Gory Details. (line 159) * mawk program: Other Versions. (line 34) * McPhee, Patrick: Contributors. (line 92) * memory, releasing: Internals. (line 102) -* memory, setting limits: Options. (line 45) * message object files: Explaining gettext. (line 39) * message object files, converting from portable object files: I18N Example. (line 62) @@ -23671,16 +24486,16 @@ Index * names, arrays/variables <1>: Library Names. (line 6) * names, arrays/variables: Arrays. (line 17) * names, functions <1>: Library Names. (line 6) -* names, functions: Definition Syntax. (line 20) +* names, functions: Definition Syntax. (line 21) * namespace issues <1>: Library Names. (line 6) * namespace issues: Arrays. (line 17) -* namespace issues, functions: Definition Syntax. (line 20) +* namespace issues, functions: Definition Syntax. (line 21) * nawk utility: Names. (line 17) * negative zero: Unexpected Results. (line 28) -* NetBSD: Glossary. (line 581) +* NetBSD: Glossary. (line 582) * networks, programming: TCP/IP Networking. (line 6) * networks, support for: Special Network. (line 6) -* newlines <1>: Options. (line 190) +* newlines <1>: Options. (line 207) * newlines <2>: Boolean Ops. (line 67) * newlines: Statements/Lines. (line 6) * newlines, as field separators: Field Separators. (line 63) @@ -23691,8 +24506,7 @@ Index * newlines, separating statements in actions <1>: Statements. (line 10) * newlines, separating statements in actions: Action Overview. (line 19) -* next file statement: POSIX/GNU. (line 153) -* next file statement, deprecated: Obsolete. (line 11) +* next file statement: POSIX/GNU. (line 154) * next file statement, in gawk: Nextfile Statement. (line 46) * next statement <1>: Next Statement. (line 6) * next statement: Boolean Ops. (line 85) @@ -23703,20 +24517,19 @@ Index (line 36) * nextfile statement, implementing: Nextfile Function. (line 6) * nextfile statement, in gawk: Nextfile Statement. (line 46) -* nextfile statement, next file statement and: Obsolete. (line 11) * nextfile statement, user-defined functions and: Nextfile Statement. (line 39) * nextfile user-defined function: Nextfile Function. (line 38) -* NF variable <1>: Auto-set. (line 103) +* NF variable <1>: Auto-set. (line 108) * NF variable: Fields. (line 33) * NF variable, decrementing: Changing Fields. (line 107) * noassign.awk program: Ignoring Assigns. (line 15) * NODE internal type: Internals. (line 23) * nodes, duplicating: Internals. (line 97) * not Boolean-logic operator: Boolean Ops. (line 6) -* NR variable <1>: Auto-set. (line 114) +* NR variable <1>: Auto-set. (line 119) * NR variable: Records. (line 6) -* NR variable, changing: Auto-set. (line 187) +* NR variable, changing: Auto-set. (line 193) * null strings <1>: Basic Data Typing. (line 47) * null strings <2>: Truth Values. (line 6) * null strings <3>: Regexp Field Splitting. @@ -23726,7 +24539,7 @@ Index * null strings, as array subscripts: Uninitialized Subscripts. (line 42) * null strings, converting numbers to strings: Conversion. (line 21) -* null strings, matching: Gory Details. (line 160) +* null strings, matching: Gory Details. (line 159) * null strings, quoting and: Quoting. (line 62) * number sign (#), #! (executable scripts): Executable Scripts. (line 6) @@ -23756,15 +24569,15 @@ Index * oawk utility: Names. (line 17) * obsolete features: Obsolete. (line 6) * octal numbers: Nondecimal-numbers. (line 6) -* octal values, enabling interpretation of: Options. (line 175) -* OFMT variable <1>: User-modified. (line 102) +* octal values, enabling interpretation of: Options. (line 168) +* OFMT variable <1>: User-modified. (line 114) * OFMT variable <2>: Conversion. (line 54) * OFMT variable: OFMT. (line 15) * OFMT variable, POSIX awk and: OFMT. (line 27) -* OFS variable <1>: User-modified. (line 111) +* OFS variable <1>: User-modified. (line 123) * OFS variable <2>: Output Separators. (line 6) * OFS variable: Changing Fields. (line 64) -* OpenBSD: Glossary. (line 581) +* OpenBSD: Glossary. (line 582) * OpenSolaris: Other Versions. (line 101) * operating systems, BSD-based <1>: Portal Files. (line 6) * operating systems, BSD-based: Manual History. (line 28) @@ -23783,7 +24596,7 @@ Index * operators, GNU-specific: GNU Regexp Operators. (line 6) * operators, input/output <1>: Precedence. (line 65) -* operators, input/output <2>: Redirection. (line 19) +* operators, input/output <2>: Redirection. (line 22) * operators, input/output <3>: Getline/Coprocess. (line 6) * operators, input/output <4>: Getline/Pipe. (line 6) * operators, input/output: Getline/File. (line 6) @@ -23796,26 +24609,26 @@ Index * operators, string: Concatenation. (line 9) * operators, string-matching: Regexp Usage. (line 19) * operators, string-matching, for buffers: GNU Regexp Operators. - (line 40) + (line 48) * operators, word-boundary (gawk): GNU Regexp Operators. - (line 55) + (line 63) * options, command-line <1>: Options. (line 6) * options, command-line <2>: Command Line Field Separator. (line 6) * options, command-line: Long. (line 12) -* options, command-line, end of: Options. (line 62) +* options, command-line, end of: Options. (line 51) * options, command-line, invoking awk: Command Line. (line 6) * options, command-line, processing: Getopt Function. (line 6) * options, deprecated: Obsolete. (line 6) * options, long <1>: Options. (line 6) * options, long: Command Line. (line 13) -* options, printing list of: Options. (line 146) +* options, printing list of: Options. (line 139) * OR bitwise operation: Bitwise Functions. (line 6) * or Boolean-logic operator: Boolean Ops. (line 6) * or function (gawk): Bitwise Functions. (line 39) * ord user-defined function: Ordinal Functions. (line 16) * order of evaluation, concatenation: Concatenation. (line 42) -* ORS variable <1>: User-modified. (line 116) +* ORS variable <1>: User-modified. (line 128) * ORS variable: Output Separators. (line 20) * output field separator, See OFS variable: Changing Fields. (line 64) * output record separator, See ORS variable: Output Separators. @@ -23827,16 +24640,17 @@ Index (line 6) * output, format specifier, OFMT: OFMT. (line 15) * output, formatted: Printf. (line 6) -* output, pipes: Redirection. (line 54) +* output, pipes: Redirection. (line 57) * output, printing, See printing: Printing. (line 6) * output, records: Output Separators. (line 20) * output, standard: Special FD. (line 6) -* P1003.2 POSIX standard: Glossary. (line 425) +* P1003.2 POSIX standard: Glossary. (line 426) * param_cnt internal variable: Internals. (line 46) * parameters, number of: Internals. (line 46) * parentheses (): Regexp Operators. (line 78) * parentheses (), pgawk program: Profiling. (line 144) * password file: Passwd Functions. (line 16) +* patsplit function: String Functions. (line 200) * patterns: Patterns and Actions. (line 6) * patterns, comparison expressions as: Expression Patterns. (line 14) @@ -23863,8 +24677,8 @@ Index * pipes, closing: Close Files And Pipes. (line 6) * pipes, input: Getline/Pipe. (line 6) -* pipes, output: Redirection. (line 54) -* Pitts, Dave: Bugs. (line 73) +* pipes, output: Redirection. (line 57) +* Pitts, Dave: Bugs. (line 72) * Pitts, Davi: Acknowledgments. (line 53) * plus sign (+): Regexp Operators. (line 101) * plus sign (+), + operator: Precedence. (line 52) @@ -23874,6 +24688,7 @@ Index * plus sign (+), += operator: Assignment Ops. (line 82) * plus sign (+), decrement/increment operators: Increment Ops. (line 11) +* pointers to functions: Indirect Calls. (line 6) * portability: Escape Sequences. (line 94) * portability, #! (executable scripts): Executable Scripts. (line 34) * portability, ** operator and: Arithmetic Ops. (line 81) @@ -23888,7 +24703,7 @@ Index * portability, deleting array elements: Delete. (line 51) * portability, example programs: Library Functions. (line 31) * portability, fflush function and: I/O Functions. (line 29) -* portability, functions, defining: Definition Syntax. (line 92) +* portability, functions, defining: Definition Syntax. (line 94) * portability, gawk: New Ports. (line 6) * portability, gettext library and: Explaining gettext. (line 10) * portability, internationalization and: I18N Portability. (line 6) @@ -23896,16 +24711,16 @@ Index * portability, new awk vs. old awk: Conversion. (line 54) * portability, next statement in user-defined functions: Function Caveats. (line 99) -* portability, NF variable, decrementing: Changing Fields. (line 115) +* portability, NF variable, decrementing: Changing Fields. (line 116) * portability, operators: Increment Ops. (line 61) * portability, operators, not in POSIX awk: Precedence. (line 98) -* portability, POSIXLY_CORRECT environment variable: Options. (line 307) -* portability, substr function: String Functions. (line 457) +* portability, POSIXLY_CORRECT environment variable: Options. (line 308) +* portability, substr function: String Functions. (line 482) * portable object files <1>: Translator i18n. (line 6) * portable object files: Explaining gettext. (line 36) * portable object files, converting to message object files: I18N Example. (line 62) -* portable object files, generating: Options. (line 137) +* portable object files, generating: Options. (line 131) * portal files: Portal Files. (line 6) * porting gawk: New Ports. (line 6) * positional specifiers, printf statement <1>: Printf Ordering. @@ -23933,8 +24748,8 @@ Index * POSIX awk, field separators and <1>: Field Splitting Summary. (line 41) * POSIX awk, field separators and: Fields. (line 6) -* POSIX awk, FS variable and: User-modified. (line 54) -* POSIX awk, function keyword in: Definition Syntax. (line 77) +* POSIX awk, FS variable and: User-modified. (line 66) +* POSIX awk, function keyword in: Definition Syntax. (line 78) * POSIX awk, functions and, gsub/sub: Gory Details. (line 53) * POSIX awk, functions and, length: String Functions. (line 80) * POSIX awk, GNU long options and: Options. (line 15) @@ -23945,25 +24760,25 @@ Index * POSIX awk, OFMT variable and: OFMT. (line 27) * POSIX awk, period (.), using: Regexp Operators. (line 50) * POSIX awk, printf format strings and: Format Modifiers. (line 159) -* POSIX awk, regular expressions and: Regexp Operators. (line 154) +* POSIX awk, regular expressions and: Regexp Operators. (line 156) * POSIX awk, timestamps and: Time Functions. (line 6) * POSIX awk, | I/O operator and: Getline/Pipe. (line 52) -* POSIX mode: Options. (line 183) +* POSIX mode: Options. (line 200) * POSIX, awk and: Preface. (line 22) * POSIX, gawk extensions not included in: POSIX/GNU. (line 6) * POSIX, programs, implementing in awk: Clones. (line 6) -* POSIXLY_CORRECT environment variable: Options. (line 289) +* POSIXLY_CORRECT environment variable: Options. (line 290) * precedence <1>: Precedence. (line 6) * precedence: Increment Ops. (line 61) -* precedence, regexp operators: Regexp Operators. (line 149) +* precedence, regexp operators: Regexp Operators. (line 151) * print statement: Printing. (line 16) * print statement, BEGIN/END patterns and: I/O And BEGIN/END. (line 16) * print statement, commas, omitting: Print Examples. (line 31) * print statement, I/O operators in: Precedence. (line 71) * print statement, line continuations and: Print Examples. (line 76) -* print statement, OFMT variable and: User-modified. (line 111) +* print statement, OFMT variable and: User-modified. (line 123) * print statement, See Also redirection, of output: Redirection. - (line 14) + (line 17) * print statement, sprintf function and: Round Function. (line 6) * printf statement <1>: Printf. (line 6) * printf statement: Printing. (line 16) @@ -23978,21 +24793,20 @@ Index * printf statement, positional specifiers, mixing with regular formats: Printf Ordering. (line 57) * printf statement, See Also redirection, of output: Redirection. - (line 14) + (line 17) * printf statement, sprintf function and: Round Function. (line 6) * printf statement, syntax of: Basic Printf. (line 6) * printing: Printing. (line 6) -* printing, list of options: Options. (line 146) +* printing, list of options: Options. (line 139) * printing, mailing labels: Labels Program. (line 6) * printing, unduplicated lines of text: Uniq Program. (line 6) * printing, user information: Id Program. (line 6) * private variables: Library Names. (line 11) -* process information, files for: Special Process. (line 6) * processes, two-way communications with: Two-way I/O. (line 23) * processing data: Basic High Level. (line 6) * PROCINFO array <1>: Group Functions. (line 6) * PROCINFO array <2>: Passwd Functions. (line 6) -* PROCINFO array <3>: Auto-set. (line 119) +* PROCINFO array <3>: Auto-set. (line 124) * PROCINFO array: Special Caveats. (line 12) * PROCINFO variable: Internals. (line 165) * profiling awk programs: Profiling. (line 6) @@ -24009,7 +24823,7 @@ Index * programming conventions, functions, calling: Calling Built-in. (line 10) * programming conventions, functions, writing: Definition Syntax. - (line 54) + (line 55) * programming conventions, gawk internals: Internal File Ops. (line 33) * programming conventions, nextfile statement: Nextfile Function. (line 20) @@ -24018,12 +24832,12 @@ Index * programming language, recipe for: History. (line 6) * programming languages, data-driven vs. procedural: Getting Started. (line 12) -* programming, basic steps: Basic High Level. (line 19) +* programming, basic steps: Basic High Level. (line 20) * programming, concepts: Basic Concepts. (line 6) * pwcat program: Passwd Functions. (line 23) * QSE Awk: Other Versions. (line 126) * question mark (?) <1>: GNU Regexp Operators. - (line 51) + (line 59) * question mark (?): Regexp Operators. (line 110) * question mark (?), ?: operator: Precedence. (line 92) * QuikTrim Awk: Other Versions. (line 119) @@ -24041,27 +24855,27 @@ Index * random numbers, seed of: Numeric Functions. (line 70) * range expressions: Character Lists. (line 6) * range patterns: Ranges. (line 6) -* Rankin, Pat <1>: Bugs. (line 72) +* Rankin, Pat <1>: Bugs. (line 71) * Rankin, Pat <2>: Contributors. (line 35) * Rankin, Pat <3>: Assignment Ops. (line 100) * Rankin, Pat: Acknowledgments. (line 53) -* raw sockets: TCP/IP Networking. (line 30) +* raw sockets: TCP/IP Networking. (line 36) * readable data files, checking: File Checking. (line 6) * readable.awk program: File Checking. (line 11) * recipe for a programming language: History. (line 6) -* record separators <1>: User-modified. (line 121) +* record separators <1>: User-modified. (line 133) * record separators: Records. (line 14) * record separators, changing: Records. (line 81) * record separators, regular expressions as: Records. (line 112) * record separators, with multiline records: Multiple Line. (line 10) -* records <1>: Basic High Level. (line 71) +* records <1>: Basic High Level. (line 73) * records: Reading Files. (line 14) * records, multiline: Multiple Line. (line 6) * records, printing: Print. (line 22) * records, splitting input into: Records. (line 6) * records, terminating: Records. (line 112) * records, treating files as: Records. (line 191) -* recursive functions: Definition Syntax. (line 72) +* recursive functions: Definition Syntax. (line 73) * redirection of input: Getline/File. (line 6) * redirection of output: Redirection. (line 6) * reference counting, sorting arrays: Array Sorting. (line 79) @@ -24086,7 +24900,7 @@ Index * regular expressions, as patterns <1>: Regexp Patterns. (line 6) * regular expressions, as patterns: Regexp Usage. (line 6) * regular expressions, as record separators: Records. (line 112) -* regular expressions, case sensitivity <1>: User-modified. (line 70) +* regular expressions, case sensitivity <1>: User-modified. (line 82) * regular expressions, case sensitivity: Case-sensitivity. (line 6) * regular expressions, computed: Computed Regexps. (line 6) * regular expressions, constants, See regexp constants: Regexp Usage. @@ -24095,20 +24909,20 @@ Index * regular expressions, dynamic, with embedded newlines: Computed Regexps. (line 59) * regular expressions, gawk, command-line options: GNU Regexp Operators. - (line 62) -* regular expressions, interval expressions and: Options. (line 231) + (line 70) +* regular expressions, interval expressions and: Options. (line 236) * regular expressions, leftmost longest match: Leftmost Longest. (line 6) * regular expressions, operators <1>: Regexp Operators. (line 6) * regular expressions, operators: Regexp Usage. (line 19) * regular expressions, operators, for buffers: GNU Regexp Operators. - (line 40) + (line 48) * regular expressions, operators, for words: GNU Regexp Operators. (line 6) * regular expressions, operators, gawk: GNU Regexp Operators. (line 6) * regular expressions, operators, precedence of: Regexp Operators. - (line 149) + (line 151) * regular expressions, searching for: Egrep Program. (line 6) * relational operators, See comparison operators: Typing and Comparison. (line 9) @@ -24120,15 +24934,15 @@ Index * right angle bracket (>), > operator <1>: Precedence. (line 65) * right angle bracket (>), > operator: Comparison Operators. (line 11) -* right angle bracket (>), > operator (I/O): Redirection. (line 19) +* right angle bracket (>), > operator (I/O): Redirection. (line 22) * right angle bracket (>), >= operator <1>: Precedence. (line 65) * right angle bracket (>), >= operator: Comparison Operators. (line 11) * right angle bracket (>), >> operator (I/O) <1>: Precedence. (line 65) -* right angle bracket (>), >> operator (I/O): Redirection. (line 47) +* right angle bracket (>), >> operator (I/O): Redirection. (line 50) * right shift, bitwise: Bitwise Functions. (line 32) * Ritchie, Dennis: Basic Data Typing. (line 71) -* RLENGTH variable: Auto-set. (line 163) +* RLENGTH variable: Auto-set. (line 169) * RLENGTH variable, match function and: String Functions. (line 129) * Robbins, Arnold <1>: Future Extensions. (line 6) * Robbins, Arnold <2>: Bugs. (line 29) @@ -24139,30 +24953,31 @@ Index * Robbins, Arnold: Command Line Field Separator. (line 80) * Robbins, Bill: Getline/Pipe. (line 36) -* Robbins, Harry: Acknowledgments. (line 71) -* Robbins, Jean: Acknowledgments. (line 71) +* Robbins, Harry: Acknowledgments. (line 72) +* Robbins, Jean: Acknowledgments. (line 72) * Robbins, Miriam <1>: Passwd Functions. (line 76) * Robbins, Miriam <2>: Getline/Pipe. (line 36) -* Robbins, Miriam: Acknowledgments. (line 71) +* Robbins, Miriam: Acknowledgments. (line 72) * Robinson, Will: Dynamic Extensions. (line 6) * robot, the: Dynamic Extensions. (line 6) * Rommel, Kai Uwe: Contributors. (line 42) * round user-defined function: Round Function. (line 16) * rounding: Round Function. (line 6) * rounding numbers: Round Function. (line 6) -* RS variable <1>: User-modified. (line 121) +* RS variable <1>: User-modified. (line 133) * RS variable: Records. (line 20) * RS variable, multiline records and: Multiple Line. (line 17) * rshift function (gawk): Bitwise Functions. (line 46) -* RSTART variable: Auto-set. (line 169) +* RSTART variable: Auto-set. (line 175) * RSTART variable, match function and: String Functions. (line 129) -* RT variable <1>: Auto-set. (line 176) +* RT variable <1>: Auto-set. (line 182) * RT variable <2>: Multiple Line. (line 129) * RT variable: Records. (line 112) * Rubin, Paul <1>: Contributors. (line 16) * Rubin, Paul: History. (line 30) * rule, definition of: Getting Started. (line 21) * rvalues/lvalues: Assignment Ops. (line 32) +* sandbox mode: Options. (line 243) * scalar values: Basic Data Typing. (line 13) * Schorr, Andrew: Acknowledgments. (line 53) * Schreiber, Bert: Acknowledgments. (line 35) @@ -24185,16 +25000,17 @@ Index (line 10) * semicolon (;), separating statements in actions: Action Overview. (line 19) -* separators, field: User-modified. (line 45) +* separators, field: User-modified. (line 56) * separators, field, FIELDWIDTHS variable and: User-modified. (line 35) +* separators, field, FPAT variable and: User-modified. (line 45) * separators, field, POSIX and: Fields. (line 6) * separators, for records: Records. (line 14) * separators, for records, regular expressions as: Records. (line 112) * separators, for statements in actions: Action Overview. (line 19) -* separators, record: User-modified. (line 121) -* separators, subscript: User-modified. (line 134) +* separators, record: User-modified. (line 133) +* separators, subscript: User-modified. (line 146) * set_value internal function: Internals. (line 146) -* shells, piping commands into: Redirection. (line 140) +* shells, piping commands into: Redirection. (line 143) * shells, quoting: Using Shell Variables. (line 12) * shells, quoting, rules for: Quoting. (line 18) @@ -24213,7 +25029,7 @@ Index * side effects, conditional expressions: Conditional Exp. (line 22) * side effects, decrement/increment operators: Increment Ops. (line 11) * side effects, FILENAME variable: Getline Notes. (line 19) -* side effects, function calls: Function Calls. (line 49) +* side effects, function calls: Function Calls. (line 54) * side effects, statements: Action Overview. (line 32) * signals, HUP/SIGHUP: Profiling. (line 207) * signals, INT/SIGINT (MS-DOS): Profiling. (line 210) @@ -24230,10 +25046,10 @@ Index * single-precision floating-point: Basic Data Typing. (line 33) * Skywalker, Luke: Undocumented. (line 6) * sleep utility: Alarm Program. (line 102) -* sockets: TCP/IP Networking. (line 30) +* sockets: TCP/IP Networking. (line 36) * Solaris, POSIX compliant awk: Other Versions. (line 101) * sort function, arrays, sorting: Array Sorting. (line 6) -* sort utility: Word Sorting. (line 54) +* sort utility: Word Sorting. (line 56) * sort utility, coprocesses and: Two-way I/O. (line 84) * sorting characters in different languages: Explaining gettext. (line 89) @@ -24241,23 +25057,23 @@ Index * source code, Bell Laboratories awk: Other Versions. (line 13) * source code, gawk: Gawk Distribution. (line 6) * source code, mawk: Other Versions. (line 34) -* source code, mixing: Options. (line 238) +* source code, mixing: Options. (line 104) * source files, search path for: Igawk Program. (line 358) -* sparse arrays: Array Intro. (line 71) +* sparse arrays: Array Intro. (line 72) * Spencer, Henry: Glossary. (line 12) -* split function: String Functions. (line 200) +* split function: String Functions. (line 215) * split function, array elements, deleting: Delete. (line 56) * split utility: Split Program. (line 6) * split.awk program: Split Program. (line 30) -* sprintf function <1>: String Functions. (line 253) +* sprintf function <1>: String Functions. (line 278) * sprintf function: OFMT. (line 15) -* sprintf function, OFMT variable and: User-modified. (line 111) +* sprintf function, OFMT variable and: User-modified. (line 123) * sprintf function, print/printf statements and: Round Function. (line 6) * sqrt function: Numeric Functions. (line 18) * square brackets ([]): Regexp Operators. (line 55) * srand function: Numeric Functions. (line 80) -* Stallman, Richard <1>: Glossary. (line 283) +* Stallman, Richard <1>: Glossary. (line 286) * Stallman, Richard <2>: Contributors. (line 24) * Stallman, Richard <3>: Acknowledgments. (line 18) * Stallman, Richard: Manual History. (line 6) @@ -24293,27 +25109,27 @@ Index * strings, null: Regexp Field Splitting. (line 43) * strings, numeric: Variable Typing. (line 6) -* strings, splitting: String Functions. (line 214) -* strtonum function (gawk): String Functions. (line 261) +* strings, splitting: String Functions. (line 234) +* strtonum function (gawk): String Functions. (line 286) * strtonum function (gawk), --non-decimal-data option and: Nondecimal Data. (line 36) -* sub function <1>: String Functions. (line 282) +* sub function <1>: String Functions. (line 307) * sub function: Using Constant Regexps. (line 44) -* sub function, arguments of: String Functions. (line 339) +* sub function, arguments of: String Functions. (line 364) * sub function, escape processing: Gory Details. (line 6) -* subscript separators: User-modified. (line 134) -* subscripts in arrays, multidimensional: Multi-dimensional. (line 6) +* subscript separators: User-modified. (line 146) +* subscripts in arrays, multidimensional: Multi-dimensional. (line 10) * subscripts in arrays, multidimensional, scanning: Multi-scanning. (line 11) * subscripts in arrays, numbers as: Numeric Array Subscripts. (line 6) * subscripts in arrays, uninitialized variables as: Uninitialized Subscripts. (line 6) -* SUBSEP variable: User-modified. (line 134) +* SUBSEP variable: User-modified. (line 146) * SUBSEP variable, multidimensional arrays: Multi-dimensional. - (line 12) -* substr function: String Functions. (line 426) + (line 16) +* substr function: String Functions. (line 451) * Sumner, Andrew: Other Versions. (line 81) * switch statement: Switch Statement. (line 6) * syntactic ambiguity: /= operator vs. /=.../ regexp constant: Assignment Ops. @@ -24343,7 +25159,7 @@ Index * text, printing, unduplicated lines of: Uniq Program. (line 6) * textdomain function (C library): Explaining gettext. (line 27) * TEXTDOMAIN variable <1>: Programmer i18n. (line 9) -* TEXTDOMAIN variable: User-modified. (line 140) +* TEXTDOMAIN variable: User-modified. (line 152) * TEXTDOMAIN variable, BEGIN pattern and: Programmer i18n. (line 58) * TEXTDOMAIN variable, portability and: I18N Portability. (line 20) * tilde (~), ~ operator <1>: Expression Patterns. (line 24) @@ -24365,11 +25181,11 @@ Index (line 6) * tmp_number internal function: Internals. (line 92) * tmp_string internal function: Internals. (line 87) -* tolower function: String Functions. (line 468) -* toupper function: String Functions. (line 474) +* tolower function: String Functions. (line 493) +* toupper function: String Functions. (line 499) * tr utility: Translate Program. (line 6) * translate.awk program: Translate Program. (line 55) -* troubleshooting, --non-decimal-data option: Options. (line 178) +* troubleshooting, --non-decimal-data option: Options. (line 171) * troubleshooting, -F option: Known Bugs. (line 6) * troubleshooting, == operator: Comparison Operators. (line 37) @@ -24389,20 +25205,20 @@ Index * troubleshooting, gawk, fatal errors, function arguments: Calling Built-in. (line 16) * troubleshooting, getline function: File Checking. (line 24) -* troubleshooting, gsub/sub functions: String Functions. (line 349) +* troubleshooting, gsub/sub functions: String Functions. (line 374) * troubleshooting, match function: String Functions. (line 195) * troubleshooting, print statement, omitting commas: Print Examples. (line 31) -* troubleshooting, printing: Redirection. (line 115) +* troubleshooting, printing: Redirection. (line 118) * troubleshooting, quotes with file names: Special FD. (line 63) * troubleshooting, readable data files: File Checking. (line 6) * troubleshooting, regexp constants vs. string constants: Computed Regexps. (line 38) * troubleshooting, string concatenation: Concatenation. (line 27) -* troubleshooting, substr function: String Functions. (line 444) +* troubleshooting, substr function: String Functions. (line 469) * troubleshooting, system function: I/O Functions. (line 88) * troubleshooting, typographical errors, global variables: Options. - (line 108) + (line 94) * true, logical: Truth Values. (line 6) * Trueman, David <1>: Contributors. (line 31) * Trueman, David <2>: Acknowledgments. (line 44) @@ -24421,7 +25237,7 @@ Index (line 6) * uniq utility: Uniq Program. (line 6) * uniq.awk program: Uniq Program. (line 65) -* Unix: Glossary. (line 581) +* Unix: Glossary. (line 582) * Unix awk, backslashes in escape sequences: Escape Sequences. (line 125) * Unix awk, close function and: Close Files And Pipes. @@ -24448,7 +25264,7 @@ Index * variables: Other Features. (line 6) * variables, assigning on command line: Assignment Options. (line 6) * variables, built-in <1>: Built-in Variables. (line 6) -* variables, built-in: Using Variables. (line 17) +* variables, built-in: Using Variables. (line 18) * variables, built-in, -v option, setting with: Options. (line 38) * variables, built-in, conveying information: Auto-set. (line 6) * variables, flag: Boolean Ops. (line 67) @@ -24460,12 +25276,12 @@ Index (line 6) * variables, getline command into, using: Getline/Variable. (line 6) * variables, global, for library functions: Library Names. (line 11) -* variables, global, printing list of: Options. (line 102) -* variables, initializing: Using Variables. (line 17) +* variables, global, printing list of: Options. (line 90) +* variables, initializing: Using Variables. (line 18) * variables, names of: Arrays. (line 17) * variables, private: Library Names. (line 11) * variables, setting: Options. (line 30) -* variables, shadowing: Definition Syntax. (line 60) +* variables, shadowing: Definition Syntax. (line 61) * variables, types of: Assignment Ops. (line 40) * variables, types of, comparison expressions and: Typing and Comparison. (line 9) @@ -24488,7 +25304,7 @@ Index * Wall, Larry <1>: Future Extensions. (line 6) * Wall, Larry: Array Intro. (line 6) * Wallin, Anders: Acknowledgments. (line 53) -* warnings, issuing: Options. (line 151) +* warnings, issuing: Options. (line 144) * wc utility: Wc Program. (line 6) * wc.awk program: Wc Program. (line 45) * Weinberger, Peter <1>: Contributors. (line 12) @@ -24497,19 +25313,17 @@ Index * while statement: Regexp Usage. (line 19) * whitespace, as field separators: Field Separators. (line 63) * whitespace, functions, calling: Calling Built-in. (line 10) -* whitespace, newlines as: Options. (line 190) -* Wildenhues, Ralf <1>: Bugs. (line 71) -* Wildenhues, Ralf: Contributors. (line 72) +* whitespace, newlines as: Options. (line 207) * Williams, Kent: Contributors. (line 37) * Woehlke, Matthew: Contributors. (line 72) * Woods, John: Contributors. (line 28) * word boundaries, matching: GNU Regexp Operators. - (line 30) + (line 38) * word, regexp definition of: GNU Regexp Operators. (line 6) * word-boundary operator (gawk): GNU Regexp Operators. - (line 55) -* wordfreq.awk program: Word Sorting. (line 60) + (line 63) +* wordfreq.awk program: Word Sorting. (line 62) * words, counting: Wc Program. (line 6) * words, duplicate, searching for: Dupword Program. (line 6) * words, usage counts, generating: Word Sorting. (line 6) @@ -24527,11 +25341,11 @@ Index * {} (braces), statements, grouping: Statements. (line 10) * | (vertical bar): Regexp Operators. (line 68) * | (vertical bar), | operator (I/O) <1>: Precedence. (line 65) -* | (vertical bar), | operator (I/O) <2>: Redirection. (line 54) +* | (vertical bar), | operator (I/O) <2>: Redirection. (line 57) * | (vertical bar), | operator (I/O): Getline/Pipe. (line 6) * | (vertical bar), |& operator (I/O) <1>: Two-way I/O. (line 44) * | (vertical bar), |& operator (I/O) <2>: Precedence. (line 65) -* | (vertical bar), |& operator (I/O) <3>: Redirection. (line 99) +* | (vertical bar), |& operator (I/O) <3>: Redirection. (line 102) * | (vertical bar), |& operator (I/O): Getline/Coprocess. (line 6) * | (vertical bar), |& operator (I/O), pipes, closing: Close Files And Pipes. (line 117) @@ -24551,366 +25365,379 @@ Index Tag Table: Node: Top1340 -Node: Foreword27566 -Node: Preface31887 -Ref: Preface-Footnote-134756 -Node: History34988 -Node: Names37204 -Ref: Names-Footnote-138676 -Node: This Manual38748 -Ref: This Manual-Footnote-143503 -Node: Conventions43603 -Node: Manual History45477 -Ref: Manual History-Footnote-148930 -Ref: Manual History-Footnote-248971 -Node: How To Contribute49045 -Node: Acknowledgments50189 -Node: Getting Started54049 -Node: Running gawk56421 -Node: One-shot57607 -Node: Read Terminal58832 -Ref: Read Terminal-Footnote-160490 -Node: Long60661 -Node: Executable Scripts62037 -Ref: Executable Scripts-Footnote-163933 -Ref: Executable Scripts-Footnote-264084 -Node: Comments64535 -Node: Quoting66903 -Node: DOS Quoting71483 -Node: Sample Data Files72155 -Node: Very Simple75187 -Node: Two Rules79792 -Node: More Complex81939 -Ref: More Complex-Footnote-184862 -Ref: More Complex-Footnote-285310 -Node: Statements/Lines85393 -Ref: Statements/Lines-Footnote-189775 -Node: Other Features90040 -Node: When90892 -Node: Regexp93148 -Node: Regexp Usage94602 -Node: Escape Sequences96654 -Node: Regexp Operators102393 -Ref: Regexp Operators-Footnote-1109500 -Ref: Regexp Operators-Footnote-2109647 -Node: Character Lists109745 -Ref: table-char-classes111702 -Node: GNU Regexp Operators114327 -Node: Case-sensitivity117971 -Ref: Case-sensitivity-Footnote-1121144 -Node: Leftmost Longest121379 -Node: Computed Regexps122570 -Node: Locales125951 -Node: Reading Files128217 -Node: Records129974 -Ref: Records-Footnote-1138532 -Node: Fields138569 -Ref: Fields-Footnote-1141599 -Node: Nonconstant Fields141685 -Node: Changing Fields143887 -Node: Field Separators149168 -Node: Regexp Field Splitting152659 -Node: Single Character Fields156212 -Node: Command Line Field Separator157263 -Node: Field Splitting Summary160702 -Ref: Field Splitting Summary-Footnote-1163888 -Node: Constant Size163989 -Node: Multiple Line168466 -Ref: Multiple Line-Footnote-1174197 -Node: Getline174376 -Node: Plain Getline176444 -Node: Getline/Variable178531 -Node: Getline/File179672 -Node: Getline/Variable/File180996 -Ref: Getline/Variable/File-Footnote-1182593 -Node: Getline/Pipe182680 -Node: Getline/Variable/Pipe185277 -Node: Getline/Coprocess186384 -Node: Getline/Variable/Coprocess187627 -Node: Getline Notes188341 -Node: Getline Summary189984 -Ref: table-getline-variants190268 -Node: Printing190834 -Node: Print192463 -Node: Print Examples193789 -Node: Output Separators196584 -Node: OFMT198345 -Node: Printf199700 -Node: Basic Printf200619 -Node: Control Letters202154 -Node: Format Modifiers206037 -Node: Printf Examples212047 -Node: Redirection214764 -Node: Special Files221661 -Node: Special FD222295 -Node: Special Process225321 -Node: Special Network227556 -Node: Special Caveats228398 -Ref: Special Caveats-Footnote-1229596 -Node: Close Files And Pipes229979 -Ref: Close Files And Pipes-Footnote-1236900 -Ref: Close Files And Pipes-Footnote-2237048 -Node: Expressions237196 -Node: Constants239385 -Node: Scalar Constants240066 -Ref: Scalar Constants-Footnote-1240921 -Node: Nondecimal-numbers241103 -Node: Regexp Constants244161 -Node: Using Constant Regexps244634 -Node: Variables247717 -Node: Using Variables248373 -Node: Assignment Options249883 -Node: Conversion251760 -Ref: table-locale-affects257191 -Ref: Conversion-Footnote-1257815 -Node: Arithmetic Ops257924 -Node: Concatenation260436 -Ref: Concatenation-Footnote-1263218 -Node: Assignment Ops263309 -Ref: table-assign-ops268287 -Node: Increment Ops269688 -Node: Truth Values273181 -Node: Typing and Comparison274231 -Node: Variable Typing274934 -Ref: Variable Typing-Footnote-1278620 -Node: Comparison Operators278764 -Ref: table-relational-ops279140 -Node: Boolean Ops282689 -Ref: Boolean Ops-Footnote-1286749 -Node: Conditional Exp286840 -Node: Function Calls288577 -Node: Precedence291859 -Node: Patterns and Actions295511 -Node: Pattern Overview296565 -Node: Regexp Patterns298002 -Node: Expression Patterns298545 -Node: Ranges302095 -Node: BEGIN/END305184 -Node: Using BEGIN/END305934 -Ref: Using BEGIN/END-Footnote-1308666 -Node: I/O And BEGIN/END308780 -Node: Empty311047 -Node: Using Shell Variables311355 -Node: Action Overview313636 -Node: Statements315994 -Node: If Statement317850 -Node: While Statement319349 -Node: Do Statement321381 -Node: For Statement322530 -Node: Switch Statement325670 -Node: Break Statement327946 -Node: Continue Statement330003 -Node: Next Statement331907 -Node: Nextfile Statement334187 -Node: Exit Statement336784 -Node: Built-in Variables339055 -Node: User-modified340150 -Ref: User-modified-Footnote-1347515 -Node: Auto-set347577 -Ref: Auto-set-Footnote-1355917 -Node: ARGC and ARGV356122 -Node: Arrays359834 -Node: Array Intro361742 -Node: Reference to Elements366076 -Node: Assigning Elements367965 -Node: Array Example368446 -Node: Scanning an Array370168 -Node: Delete372448 -Ref: Delete-Footnote-1374841 -Node: Numeric Array Subscripts374898 -Node: Uninitialized Subscripts377085 -Node: Multi-dimensional378691 -Node: Multi-scanning381704 -Node: Array Sorting383319 -Node: Functions386982 -Node: Built-in387717 -Node: Calling Built-in388687 -Node: Numeric Functions390654 -Ref: Numeric Functions-Footnote-1394396 -Ref: Numeric Functions-Footnote-2394722 -Node: String Functions394991 -Ref: String Functions-Footnote-1415372 -Ref: String Functions-Footnote-2415501 -Ref: String Functions-Footnote-3415749 -Node: Gory Details415836 -Ref: table-sub-escapes417471 -Ref: table-sub-posix-92418806 -Ref: table-sub-proposed420145 -Ref: table-posix-2001-sub421497 -Ref: table-gensub-escapes422834 -Ref: Gory Details-Footnote-1424020 -Node: I/O Functions424071 -Ref: I/O Functions-Footnote-1430722 -Node: Time Functions430813 -Ref: Time Functions-Footnote-1441605 -Ref: Time Functions-Footnote-2441673 -Ref: Time Functions-Footnote-3441831 -Ref: Time Functions-Footnote-4441942 -Ref: Time Functions-Footnote-5442067 -Ref: Time Functions-Footnote-6442294 -Node: Bitwise Functions442556 -Ref: table-bitwise-ops443134 -Ref: Bitwise Functions-Footnote-1447368 -Node: I18N Functions447552 -Node: User-defined449273 -Node: Definition Syntax450054 -Node: Function Example454630 -Node: Function Caveats457210 -Node: Return Statement461135 -Node: Dynamic Typing463792 -Node: Internationalization464529 -Node: I18N and L10N465948 -Node: Explaining gettext466632 -Ref: Explaining gettext-Footnote-1471539 -Ref: Explaining gettext-Footnote-2471778 -Node: Programmer i18n471947 -Node: Translator i18n476170 -Node: String Extraction476960 -Ref: String Extraction-Footnote-1477910 -Node: Printf Ordering478036 -Ref: Printf Ordering-Footnote-1480814 -Node: I18N Portability480878 -Ref: I18N Portability-Footnote-1483306 -Node: I18N Example483369 -Ref: I18N Example-Footnote-1485981 -Node: Gawk I18N486053 -Node: Advanced Features486631 -Node: Nondecimal Data488030 -Node: Two-way I/O489589 -Ref: Two-way I/O-Footnote-1495070 -Node: TCP/IP Networking495147 -Node: Portal Files497702 -Node: Profiling498346 -Node: Invoking Gawk505802 -Node: Command Line507036 -Node: Options507821 -Ref: Options-Footnote-1520899 -Node: Other Arguments520924 -Node: AWKPATH Variable523605 -Ref: AWKPATH Variable-Footnote-1526377 -Node: Exit Status526637 -Node: Obsolete527300 -Node: Undocumented528299 -Node: Known Bugs528564 -Node: Library Functions529166 -Ref: Library Functions-Footnote-1532147 -Node: Library Names532318 -Ref: Library Names-Footnote-1535791 -Ref: Library Names-Footnote-2536010 -Node: General Functions536096 -Node: Nextfile Function537155 -Node: Strtonum Function541519 -Node: Assert Function544454 -Node: Round Function547758 -Node: Cliff Random Function549291 -Node: Ordinal Functions550304 -Ref: Ordinal Functions-Footnote-1553364 -Node: Join Function553580 -Ref: Join Function-Footnote-1555340 -Node: Gettimeofday Function555540 -Node: Data File Management559243 -Node: Filetrans Function559875 -Node: Rewind Function563301 -Node: File Checking564747 -Node: Empty Files565777 -Node: Ignoring Assigns568002 -Node: Getopt Function569550 -Ref: Getopt Function-Footnote-1580828 -Node: Passwd Functions581029 -Ref: Passwd Functions-Footnote-1589690 -Node: Group Functions589778 -Node: Sample Programs597776 -Node: Running Examples598453 -Node: Clones599181 -Node: Cut Program600313 -Node: Egrep Program610070 -Ref: Egrep Program-Footnote-1617820 -Node: Id Program617930 -Node: Split Program621537 -Node: Tee Program625001 -Node: Uniq Program627678 -Node: Wc Program635046 -Ref: Wc Program-Footnote-1639290 -Node: Miscellaneous Programs639486 -Node: Dupword Program640606 -Node: Alarm Program642637 -Node: Translate Program647177 -Ref: Translate Program-Footnote-1651545 -Ref: Translate Program-Footnote-2651782 -Node: Labels Program651916 -Ref: Labels Program-Footnote-1655207 -Node: Word Sorting655291 -Node: History Sorting659572 -Node: Extract Program661410 -Node: Simple Sed668762 -Node: Igawk Program671817 -Ref: Igawk Program-Footnote-1686548 -Ref: Igawk Program-Footnote-2686749 -Node: Signature Program686887 -Node: Language History687967 -Node: V7/SVR3.1689351 -Node: SVR4691624 -Node: POSIX693063 -Node: BTL694671 -Node: POSIX/GNU696350 -Node: Contributors704265 -Node: Installation707906 -Node: Gawk Distribution708877 -Node: Getting709361 -Node: Extracting710187 -Node: Distribution contents711575 -Node: Unix Installation716656 -Node: Quick Installation717247 -Node: Additional Configuration Options718949 -Node: Configuration Philosophy721121 -Node: Non-Unix Installation723485 -Node: PC Installation723950 -Node: PC Binary Installation725224 -Node: PC Compiling727067 -Node: PC Dynamic731563 -Node: PC Using733924 -Node: Cygwin738474 -Node: MSYS739470 -Node: VMS Installation739976 -Node: VMS Compilation740580 -Node: VMS Installation Details742157 -Node: VMS Running743787 -Node: VMS POSIX745384 -Node: VMS Old Gawk746682 -Node: Unsupported747151 -Node: Atari Installation747613 -Node: Atari Compiling748900 -Node: Atari Using750785 -Node: BeOS Installation753630 -Node: Tandem Installation754777 -Node: Bugs756456 -Node: Other Versions760349 -Node: Notes765567 -Node: Compatibility Mode766259 -Node: Additions767053 -Node: Adding Code767803 -Node: New Ports773853 -Node: Dynamic Extensions777985 -Node: Internals779242 -Node: Sample Library790245 -Node: Internal File Description790904 -Node: Internal File Ops794597 -Ref: Internal File Ops-Footnote-1799923 -Node: Using Internal File Ops800071 -Node: Future Extensions802094 -Node: Basic Concepts806047 -Node: Basic High Level806804 -Ref: Basic High Level-Footnote-1810836 -Node: Basic Data Typing811030 -Node: Floating Point Issues815467 -Node: String Conversion Precision816560 -Ref: String Conversion Precision-Footnote-1818254 -Node: Unexpected Results818363 -Node: POSIX Floating Point Problems820189 -Ref: POSIX Floating Point Problems-Footnote-1823663 -Node: Glossary823701 -Node: Copying847401 -Node: GNU Free Documentation License884958 -Node: Index910095 +Node: Foreword28547 +Node: Preface32868 +Ref: Preface-Footnote-135737 +Node: History35969 +Node: Names38185 +Ref: Names-Footnote-139657 +Node: This Manual39729 +Ref: This Manual-Footnote-144484 +Node: Conventions44584 +Node: Manual History46458 +Ref: Manual History-Footnote-149911 +Ref: Manual History-Footnote-249952 +Node: How To Contribute50026 +Node: Acknowledgments51170 +Node: Getting Started55048 +Node: Running gawk57420 +Node: One-shot58606 +Node: Read Terminal59831 +Ref: Read Terminal-Footnote-161492 +Ref: Read Terminal-Footnote-261768 +Node: Long61939 +Node: Executable Scripts63315 +Ref: Executable Scripts-Footnote-165211 +Ref: Executable Scripts-Footnote-265362 +Node: Comments65813 +Node: Quoting68181 +Node: DOS Quoting72758 +Node: Sample Data Files73430 +Node: Very Simple76462 +Node: Two Rules81067 +Node: More Complex83214 +Ref: More Complex-Footnote-186137 +Ref: More Complex-Footnote-286585 +Node: Statements/Lines86668 +Ref: Statements/Lines-Footnote-191050 +Node: Other Features91315 +Node: When92167 +Node: Regexp94423 +Node: Regexp Usage95877 +Node: Escape Sequences97929 +Node: Regexp Operators103668 +Ref: Regexp Operators-Footnote-1110877 +Ref: Regexp Operators-Footnote-2111024 +Node: Character Lists111122 +Ref: table-char-classes113079 +Node: GNU Regexp Operators115704 +Node: Case-sensitivity119434 +Ref: Case-sensitivity-Footnote-1122607 +Node: Leftmost Longest122842 +Node: Computed Regexps124033 +Node: Locales127414 +Node: Reading Files129680 +Node: Records131692 +Ref: Records-Footnote-1140250 +Node: Fields140287 +Ref: Fields-Footnote-1143317 +Node: Nonconstant Fields143403 +Node: Changing Fields145605 +Node: Field Separators150926 +Node: Regexp Field Splitting154417 +Node: Single Character Fields157970 +Node: Command Line Field Separator159021 +Node: Field Splitting Summary162460 +Ref: Field Splitting Summary-Footnote-1165646 +Node: Constant Size165747 +Node: Splitting By Content170231 +Ref: Splitting By Content-Footnote-1173460 +Node: Multiple Line173500 +Ref: Multiple Line-Footnote-1179238 +Node: Getline179417 +Node: Plain Getline181620 +Node: Getline/Variable183707 +Node: Getline/File184848 +Node: Getline/Variable/File186172 +Ref: Getline/Variable/File-Footnote-1187769 +Node: Getline/Pipe187856 +Node: Getline/Variable/Pipe190453 +Node: Getline/Coprocess191560 +Node: Getline/Variable/Coprocess192803 +Node: Getline Notes193517 +Node: Getline Summary195160 +Ref: table-getline-variants195444 +Node: BEGINFILE/ENDFILE196010 +Node: Command line directories198848 +Node: Printing199525 +Node: Print201154 +Node: Print Examples202480 +Node: Output Separators205275 +Node: OFMT207036 +Node: Printf208391 +Node: Basic Printf209310 +Node: Control Letters210845 +Node: Format Modifiers214728 +Node: Printf Examples220738 +Node: Redirection223455 +Node: Special Files230451 +Node: Special FD231014 +Node: Special Network234207 +Node: Special Caveats235062 +Ref: Special Caveats-Footnote-1236260 +Node: Close Files And Pipes236643 +Ref: Close Files And Pipes-Footnote-1243564 +Ref: Close Files And Pipes-Footnote-2243712 +Node: Expressions243860 +Node: Values244929 +Node: Constants245601 +Node: Scalar Constants246281 +Ref: Scalar Constants-Footnote-1247140 +Node: Nondecimal-numbers247322 +Node: Regexp Constants250384 +Node: Using Constant Regexps250859 +Node: Variables253941 +Node: Using Variables254596 +Node: Assignment Options256170 +Node: Conversion258051 +Ref: table-locale-affects263458 +Ref: Conversion-Footnote-1264082 +Node: All Operators264191 +Node: Arithmetic Ops264821 +Node: Concatenation267320 +Ref: Concatenation-Footnote-1270108 +Node: Assignment Ops270199 +Ref: table-assign-ops275183 +Node: Increment Ops276584 +Node: Truth Values and Conditions280062 +Node: Truth Values281145 +Node: Typing and Comparison282193 +Node: Variable Typing282914 +Ref: Variable Typing-Footnote-1286602 +Node: Comparison Operators286746 +Ref: table-relational-ops287124 +Node: Boolean Ops290673 +Ref: Boolean Ops-Footnote-1294751 +Node: Conditional Exp294842 +Node: Function Calls296574 +Node: Precedence300125 +Node: Patterns and Actions303775 +Node: Pattern Overview304829 +Node: Regexp Patterns306266 +Node: Expression Patterns306809 +Node: Ranges310359 +Node: BEGIN/END313448 +Node: Using BEGIN/END314198 +Ref: Using BEGIN/END-Footnote-1316930 +Node: I/O And BEGIN/END317044 +Node: Empty319311 +Node: Using Shell Variables319619 +Node: Action Overview321900 +Node: Statements324258 +Node: If Statement326114 +Node: While Statement327613 +Node: Do Statement329645 +Node: For Statement330794 +Node: Switch Statement333934 +Node: Break Statement335982 +Node: Continue Statement338039 +Node: Next Statement339943 +Node: Nextfile Statement342223 +Node: Exit Statement344939 +Node: Built-in Variables347210 +Node: User-modified348305 +Ref: User-modified-Footnote-1356230 +Node: Auto-set356292 +Ref: Auto-set-Footnote-1364980 +Node: ARGC and ARGV365185 +Node: Arrays368946 +Node: Array Basics370393 +Node: Array Intro371104 +Node: Reference to Elements375490 +Node: Assigning Elements377389 +Node: Array Example377880 +Node: Scanning an Array379612 +Node: Delete381887 +Ref: Delete-Footnote-1384275 +Node: Numeric Array Subscripts384332 +Node: Uninitialized Subscripts386519 +Node: Multi-dimensional388125 +Node: Multi-scanning391216 +Node: Array Sorting392796 +Node: Functions396578 +Node: Built-in397387 +Node: Calling Built-in398357 +Node: Numeric Functions400324 +Ref: Numeric Functions-Footnote-1404066 +Ref: Numeric Functions-Footnote-2404392 +Node: String Functions404661 +Ref: String Functions-Footnote-1426447 +Ref: String Functions-Footnote-2426576 +Ref: String Functions-Footnote-3426824 +Node: Gory Details426911 +Ref: table-sub-escapes428546 +Ref: table-sub-posix-92429881 +Ref: table-sub-proposed431220 +Ref: table-posix-2001-sub432572 +Ref: table-gensub-escapes433841 +Ref: Gory Details-Footnote-1435027 +Node: I/O Functions435078 +Ref: I/O Functions-Footnote-1441823 +Node: Time Functions441914 +Ref: Time Functions-Footnote-1452704 +Ref: Time Functions-Footnote-2452772 +Ref: Time Functions-Footnote-3452930 +Ref: Time Functions-Footnote-4453041 +Ref: Time Functions-Footnote-5453166 +Ref: Time Functions-Footnote-6453393 +Node: Bitwise Functions453655 +Ref: table-bitwise-ops454233 +Ref: Bitwise Functions-Footnote-1458467 +Node: I18N Functions458651 +Node: User-defined460372 +Node: Definition Syntax461176 +Node: Function Example465874 +Node: Function Caveats468454 +Node: Return Statement472379 +Node: Dynamic Typing475036 +Node: Indirect Calls475773 +Node: Internationalization485408 +Node: I18N and L10N486827 +Node: Explaining gettext487511 +Ref: Explaining gettext-Footnote-1492418 +Ref: Explaining gettext-Footnote-2492657 +Node: Programmer i18n492826 +Node: Translator i18n497049 +Node: String Extraction497840 +Ref: String Extraction-Footnote-1498793 +Node: Printf Ordering498919 +Ref: Printf Ordering-Footnote-1501697 +Node: I18N Portability501761 +Ref: I18N Portability-Footnote-1504189 +Node: I18N Example504252 +Ref: I18N Example-Footnote-1506866 +Node: Gawk I18N506938 +Node: Advanced Features507516 +Node: Nondecimal Data508915 +Node: Two-way I/O510474 +Ref: Two-way I/O-Footnote-1515955 +Node: TCP/IP Networking516032 +Node: Portal Files518825 +Node: Profiling519469 +Node: Invoking Gawk526925 +Node: Command Line528159 +Node: Options528944 +Ref: Options-Footnote-1542064 +Node: Other Arguments542089 +Node: AWKPATH Variable544770 +Ref: AWKPATH Variable-Footnote-1547545 +Node: Exit Status547805 +Node: Obsolete548472 +Node: Undocumented549271 +Node: Known Bugs549533 +Node: Library Functions550135 +Ref: Library Functions-Footnote-1553116 +Node: Library Names553287 +Ref: Library Names-Footnote-1556760 +Ref: Library Names-Footnote-2556979 +Node: General Functions557065 +Node: Nextfile Function558124 +Node: Strtonum Function562488 +Node: Assert Function565423 +Node: Round Function568727 +Node: Cliff Random Function570260 +Node: Ordinal Functions571273 +Ref: Ordinal Functions-Footnote-1574333 +Node: Join Function574549 +Ref: Join Function-Footnote-1576309 +Node: Gettimeofday Function576509 +Node: Data File Management580212 +Node: Filetrans Function580844 +Node: Rewind Function584270 +Node: File Checking585716 +Node: Empty Files586746 +Node: Ignoring Assigns588971 +Node: Getopt Function590519 +Ref: Getopt Function-Footnote-1601797 +Node: Passwd Functions601998 +Ref: Passwd Functions-Footnote-1610978 +Node: Group Functions611066 +Node: Sample Programs619163 +Node: Running Examples619840 +Node: Clones620568 +Node: Cut Program621700 +Node: Egrep Program631457 +Ref: Egrep Program-Footnote-1639207 +Node: Id Program639317 +Node: Split Program642924 +Node: Tee Program646388 +Node: Uniq Program649131 +Node: Wc Program656499 +Ref: Wc Program-Footnote-1660743 +Node: Miscellaneous Programs660939 +Node: Dupword Program662059 +Node: Alarm Program664090 +Node: Translate Program668630 +Ref: Translate Program-Footnote-1672998 +Ref: Translate Program-Footnote-2673235 +Node: Labels Program673369 +Ref: Labels Program-Footnote-1676660 +Node: Word Sorting676744 +Node: History Sorting681087 +Node: Extract Program682925 +Node: Simple Sed690277 +Node: Igawk Program693332 +Ref: Igawk Program-Footnote-1708063 +Ref: Igawk Program-Footnote-2708264 +Node: Signature Program708402 +Node: Language History709482 +Node: V7/SVR3.1710866 +Node: SVR4713139 +Node: POSIX714578 +Node: BTL716186 +Node: POSIX/GNU717865 +Node: Contributors727081 +Node: Installation730680 +Node: Gawk Distribution731651 +Node: Getting732135 +Node: Extracting732961 +Node: Distribution contents734349 +Node: Unix Installation739430 +Node: Quick Installation740021 +Node: Additional Configuration Options741723 +Node: Configuration Philosophy743641 +Node: Non-Unix Installation746005 +Node: PC Installation746470 +Node: PC Binary Installation747776 +Node: PC Compiling749619 +Node: PC Dynamic754124 +Node: PC Using756485 +Node: Cygwin761035 +Node: MSYS762031 +Node: VMS Installation762537 +Node: VMS Compilation763141 +Node: VMS Installation Details764718 +Node: VMS Running766348 +Node: VMS POSIX767945 +Node: VMS Old Gawk769243 +Node: Unsupported769712 +Node: Atari Installation770174 +Node: Atari Compiling771461 +Node: Atari Using773346 +Node: BeOS Installation776191 +Node: Tandem Installation777338 +Node: Bugs779017 +Node: Other Versions782849 +Node: Notes788067 +Node: Compatibility Mode788759 +Node: Additions789553 +Node: Adding Code790303 +Node: New Ports796353 +Node: Dynamic Extensions800485 +Node: Internals801810 +Node: Sample Library812813 +Node: Internal File Description813472 +Node: Internal File Ops817165 +Ref: Internal File Ops-Footnote-1822491 +Node: Using Internal File Ops822639 +Node: Future Extensions824662 +Node: Basic Concepts828699 +Node: Basic High Level829456 +Ref: Basic High Level-Footnote-1833572 +Node: Basic Data Typing833766 +Node: Floating Point Issues838203 +Node: String Conversion Precision839286 +Ref: String Conversion Precision-Footnote-1840980 +Node: Unexpected Results841089 +Node: POSIX Floating Point Problems842915 +Ref: POSIX Floating Point Problems-Footnote-1846389 +Node: Glossary846427 +Node: Copying870183 +Node: GNU Free Documentation License907740 +Node: next-edition932884 +Node: unresolved933236 +Node: revision933736 +Node: consistency934159 +Node: Index937512 End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index 5da5fe08..329718e7 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -291,6 +291,7 @@ particular records in a file and perform operations upon them. * Copying:: Your right to copy and distribute @command{gawk}. * GNU Free Documentation License:: The license for this @value{DOCUMENT}. +* next-edition:: next-edition. * Index:: Concept and Variable Index. @detailmenu @@ -349,6 +350,7 @@ particular records in a file and perform operations upon them. * Command Line Field Separator:: Setting @code{FS} from the command-line. * Field Splitting Summary:: Some final points and a summary table. * Constant Size:: Reading constant width data. +* Splitting By Content:: Defining Fields By Content * Multiple Line:: Reading multi-line records. * Getline:: Reading files under explicit program control using the @code{getline} function. @@ -366,6 +368,9 @@ particular records in a file and perform operations upon them. * Getline Notes:: Important things to know about @code{getline}. * Getline Summary:: Summary of @code{getline} Variants. +* BEGINFILE/ENDFILE:: Two special patterns for advanced control. +* Command line directories:: What happens if you put a directory on the + command line. * Print:: The @code{print} statement. * Print Examples:: Simple examples of @code{print} statements. * Output Separators:: The output separators and how to change @@ -383,10 +388,11 @@ particular records in a file and perform operations upon them. @command{gawk} allows access to inherited file descriptors. * Special FD:: Special files for I/O. -* Special Process:: Special files for process information. * Special Network:: Special files for network communications. * Special Caveats:: Things to watch out for. * Close Files And Pipes:: Closing Input and Output Files and Pipes. +* Values:: Constants, Variables, and Regular + Expressions. * Constants:: String, numeric and regexp constants. * Scalar Constants:: Numeric and string constants. * Nondecimal-numbers:: What are octal and hex numbers. @@ -400,6 +406,7 @@ particular records in a file and perform operations upon them. advanced method of input. * Conversion:: The conversion of strings to numbers and vice versa. +* All Operators:: @command{gawk}'s operators. * Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-}, etc.) * Concatenation:: Concatenating strings. @@ -407,6 +414,7 @@ particular records in a file and perform operations upon them. field. * Increment Ops:: Incrementing the numeric value of a variable. +* Truth Values and Conditions:: Testing for true and false. * Truth Values:: What is ``true'' and what is ``false''. * Typing and Comparison:: How variables acquire types and how this affects comparison of numbers and strings @@ -458,6 +466,7 @@ particular records in a file and perform operations upon them. * Auto-set:: Built-in variables where @command{awk} gives you information. * ARGC and ARGV:: Ways to use @code{ARGC} and @code{ARGV}. +* Array Basics:: The basics of arrays. * Array Intro:: Introduction to Arrays * Reference to Elements:: How to examine one element of an array. * Assigning Elements:: How to change an element of an array. @@ -497,6 +506,7 @@ particular records in a file and perform operations upon them. * Function Caveats:: Things to watch out for. * Return Statement:: Specifying the value a function returns. * Dynamic Typing:: How variable types can change at runtime. +* Indirect Calls:: Choosing the function to call at runtime. * I18N and L10N:: Internationalization and Localization. * Explaining gettext:: How GNU @code{gettext} works. * Programmer i18n:: Features for the programmer. @@ -518,8 +528,8 @@ particular records in a file and perform operations upon them. * Other Arguments:: Input file names and variable assignments. * AWKPATH Variable:: Searching directories for @command{awk} programs. -* Obsolete:: Obsolete Options and/or features. * Exit Status:: @command{gawk}'s exit status. +* Obsolete:: Obsolete Options and/or features. * Undocumented:: Undocumented Options and Features. * Known Bugs:: Known Bugs in @command{gawk}. * Library Names:: How to best name private global variables @@ -527,6 +537,8 @@ particular records in a file and perform operations upon them. * General Functions:: Functions that are of general use. * Nextfile Function:: Two implementations of a @code{nextfile} function. +* Strtonum Function:: A replacement for the built-in + @code{strtonum} function. * Assert Function:: A function for assertions in @command{awk} programs. * Round Function:: A function for rounding if @code{sprintf} @@ -596,14 +608,16 @@ particular records in a file and perform operations upon them. * PC Installation:: Installing and Compiling @command{gawk} on MS-DOS and OS/2. * PC Binary Installation:: Installing a prepared distribution. -* PC Compiling:: Compiling @command{gawk} for MS-DOS, Windows32, - and OS/2. -* PC Using:: Running @command{gawk} on MS-DOS, Windows32 and - OS/2. +* PC Compiling:: Compiling @command{gawk} for MS-DOS, + Windows32, and OS/2. * PC Dynamic:: Compiling @command{gawk} for dynamic libraries. +* PC Using:: Running @command{gawk} on MS-DOS, Windows32 + and OS/2. * Cygwin:: Building and running @command{gawk} for Cygwin. +* MSYS:: Using @command{gawk} In The MSYS + Environment. * VMS Installation:: Installing @command{gawk} on VMS. * VMS Compilation:: How to compile @command{gawk} under VMS. * VMS Installation Details:: How to install @command{gawk} under VMS. @@ -641,9 +655,12 @@ particular records in a file and perform operations upon them. * Basic Data Typing:: A very quick intro to data types. * Floating Point Issues:: Stuff to know about floating-point numbers. * String Conversion Precision:: The String Value Can Lie. -* Unexpected Results:: Floating Point Numbers Are Not - Abstract Numbers. +* Unexpected Results:: Floating Point Numbers Are Not Abstract + Numbers. * POSIX Floating Point Problems:: Standards Versus Existing Practice. +* unresolved:: unresolved. +* revision:: revision. +* consistency:: consistency. @end detailmenu @end menu @@ -1461,15 +1478,14 @@ Drepper, provided invaluable help and feedback for the design of the internationalization features. @c @cindex Brown, Martin -@c @cindex Buening, Andreas @c @cindex Hasegawa, Isamu @c @cindex Rommel, Kai Uwe @c Martin Brown, -@c Andreas Buening, @c Isamu Hasegawa, @c Kai Uwe Rommel, @cindex Beebe, Nelson +@cindex Buening, Andreas @cindex Colombo, Antonio @cindex Deifik, Scott @cindex DuBois, John @@ -1484,7 +1500,8 @@ internationalization features. @cindex Wallin, Anders @cindex Zaretskii, Eli Nelson Beebe, -Antonio Colombo +Andreas Buening, +Antonio Colombo, Scott Deifik, John H. DuBois III, Darrel Hankerson, @@ -1766,7 +1783,12 @@ For example, on OS/2 and MS-DOS, it is @kbd{@value{CTL}-z}.) @cindex @command{awk} programs, running, without input files As an example, the following program prints a friendly piece of advice (from Douglas Adams's @cite{The Hitchhiker's Guide to the Galaxy}), -to keep you from worrying about the complexities of computer programming +to keep you from worrying about the complexities of computer +programming@footnote{If you use @command{bash} as your shell, you should execute +the command @samp{set +H} before running this program interactively, +to disable the @command{csh}-style command history, which treats +@samp{!} as a special character. We recommend putting this command into +your personal startup file.} (@code{BEGIN} is a feature we haven't discussed yet): @example @@ -2008,7 +2030,7 @@ The next @value{SUBSECTION} describes the shell's quoting rules. @cindex quoting, rules for @menu -* DOS Quoting:: Quoting in MS-DOS Batch Files. +* DOS Quoting:: Quoting in MS-DOS Batch Files. @end menu For short to medium length @command{awk} programs, it is most convenient @@ -3335,13 +3357,19 @@ They were added as part of the POSIX standard to make @command{awk} and @command{egrep} consistent with each other. @cindex @command{gawk}, interval expressions and -However, because old programs may use @samp{@{} and @samp{@}} in regexp -constants, by default @command{gawk} does @emph{not} match interval expressions -in regexps. If either @option{--posix} or @option{--re-interval} are specified -(@pxref{Options}), then interval expressions -are allowed in regexps. +Initially, because old programs may use @samp{@{} and @samp{@}} in regexp +constants, +@command{gawk} did @emph{not} match interval expressions +in regexps. + +However, +beginning with version 3.2 @strong{(FIXME: version)} +@command{gawk} does match interval expressions by default. +This is because compatibility with POSIX has become more +important to most @command{gawk} users than compatibility with +old programs. -For new programs that use @samp{@{} and @samp{@}} in regexp constants, +For programs that use @samp{@{} and @samp{@}} in regexp constants, it is good practice to always escape them with a backslash. Then the regexp constants are valid and work the way you want them to, using any version of @command{awk}.@footnote{Use two backslashes if you're @@ -3523,6 +3551,22 @@ For our purposes, a @dfn{word} is a sequence of one or more letters, digits, or underscores (@samp{_}): @table @code +@c @cindex operators, @code{\s} (@command{gawk}) +@cindex backslash (@code{\}), @code{\s} operator (@command{gawk}) +@cindex @code{\} (backslash), @code{\s} operator (@command{gawk}) +@item \s +Matches any whitespace character. +Think of it as shorthand for +@w{@code{[[:space:]]}}. + +@c @cindex operators, @code{\S} (@command{gawk}) +@cindex backslash (@code{\}), @code{\S} operator (@command{gawk}) +@cindex @code{\} (backslash), @code{\S} operator (@command{gawk}) +@item \S +Matches any character that is not whitespace. +Think of it as shorthand for +@w{@code{[^[:space:]]}}. + @c @cindex operators, @code{\w} (@command{gawk}) @cindex backslash (@code{\}), @code{\w} operator (@command{gawk}) @cindex @code{\} (backslash), @code{\w} operator (@command{gawk}) @@ -3639,7 +3683,6 @@ GNU regexp operators. GNU regexp operators described in @ref{Regexp Operators}. @end ifnottex -However, interval expressions are not supported. @item @code{--posix} Only POSIX regexps are supported; the GNU operators are not special @@ -3655,10 +3698,9 @@ treated literally, even if they represent regexp metacharacters. Also, @command{gawk} silently skips directories named on the command line. @item @code{--re-interval} -Allow interval expressions in regexps, even if @option{--traditional} -has been provided. (@option{--posix} automatically enables -interval expressions, so @option{--re-interval} is redundant -when @option{--posix} is is used.) +Allow interval expressions in regexps, if @option{--traditional} +has been provided. +Otherwise, interval expressions are available by default. @end table @c ENDOFRANGE gregexp @c ENDOFRANGE regexpg @@ -4014,9 +4056,13 @@ used with it do not have to be named on the @command{awk} command line * Changing Fields:: Changing the Contents of a Field. * Field Separators:: The field separator and how to change it. * Constant Size:: Reading constant width data. +* Splitting By Content:: Defining Fields By Content * Multiple Line:: Reading multi-line records. * Getline:: Reading files under explicit program control using the @code{getline} function. +* BEGINFILE/ENDFILE:: Two special patterns for advanced control. +* Command line directories:: What happens if you put a directory on the + command line. @end menu @node Records @@ -4571,7 +4617,7 @@ The intervening field, @code{$5}, is created with an empty value (indicated by the second pair of adjacent colons), and @code{NF} is updated with the value six. -@c FIXME: Verify that this is in POSIX +@strong{FIXME:} Verify that this is in POSIX. @cindex dark corner, @code{NF} variable, decrementing @cindex @code{NF} variable, decrementing Decrementing @code{NF} throws away the values of the fields @@ -5236,6 +5282,117 @@ read some records, and then restore the original settings (@pxref{Passwd Functions}, for an example of such a function). +@node Splitting By Content +@section Defining Fields By Content + +@ifnotinfo +@quotation NOTE +This @value{SECTION} discusses an advanced +feature of @command{gawk}. If you are a novice @command{awk} user, +you might want to skip it on the first reading. +@end quotation +@end ifnotinfo + +@ifinfo +(This @value{SECTION} discusses an advanced feature of @command{awk}. +If you are a novice @command{awk} user, you might want to skip it on +the first reading.) +@end ifinfo + +@cindex advanced features, specifying field content +Normally, when using @code{FS}, @command{gawk} defines the fields as the +parts of the record that occur in between each field separator. In other +words, @code{FS} defines what a field @emph{is not}, and not what a field +@emph{is}. +However, there are times when you really want to define the fields by +what they are, and not by what they are not. + +The most notorious such case +is so-called Comma-Separated-Value (CSV) data. Many spreadsheet programs, +for example, can export their data into text files, where each record is +terminated with a newline, and fields are separated by commas. If only +commas separated the data, there wouldn't be an issue. The problem comes when +one of the fields contains an @emph{embedded} comma. While there is no +formal standard specification for CSV data@footnote{At least, we don't know of one.}, +in such cases, most programs embed the field in double quotes. So we might +have data like this: + +@example +@c file eg/misc/addresses.csv +Robbins,Arnold,"1234 A Pretty Street, NE",MyTown,MyState,12345-6789,USA +@c endfile +@end example + +The @code{FPAT} variable offers a solution for cases like this. +The value of @code{FPAT} should be a string that provides a regular expression. +This regular expression describes the contents of each field. + +In the case of CSV data as presented above, each field is either ``anything that +is not a comma,'' or ``a double quote, anything that is not a double quote, and a +closing double quote.'' If written as a regular expression constant +(@pxref{Regexp}), +we would have @code{/([^,]+)|("[^"]+")/}. +Writing this as a string requires us to escape the double quotes, leading to: + +@example +FPAT = "([^,]+)|(\"[^\"]+\")" +@end example + +Putting this to use, here is a simple program to parse the data: + +@example +@c file eg/misc/simple-csv.awk +BEGIN @{ + FPAT = "([^,]+)|(\"[^\"]+\")" +@} + +@{ + print "NF = ", NF + for (i = 1; i <= NF; i++) @{ + printf("$%d = <%s>\n", i, $i) + @} +@} +@c endfile +@end example + +When run, we get the following: + +@example +$ @kbd{gawk -f simple-csv.awk addresses.csv} +NF = 7 +$1 = <Robbins> +$2 = <Arnold> +$3 = <"1234 A Pretty Street, NE"> +$4 = <MyTown> +$5 = <MyState> +$6 = <12345-6789> +$7 = <USA> +@end example + +Note the embedded comma in the value of @code{$3}. + +A straightforward improvement when processing CSV data of this sort +would be to remove the quotes when they occur, with something like this: + +@example +if (substr($i, 1, 1) == "\"") @{ + len = length($i) + $i = substr($i, 2, len - 2) # Get text within the two quotes +@} +@end example + +As with @code{FS}, the @code{IGNORECASE} variable (@pxref{User-modified}) +affects field splitting with @code{FPAT}. + +@quotation NOTE +Some programs export CSV data that contains embedded newlines between +the double quotes. @command{gawk} provides no way to deal with this. +Since there is no formal specification for CSV data, there isn't much +more to be done; +the @code{FPAT} mechanism provides an elegant solution for the majority +of cases, and the @command{gawk} maintainer is satisfied with that. +@end quotation + @node Multiple Line @section Multiple-Line Records @@ -5436,6 +5593,8 @@ rest of this @value{DOCUMENT} and have a good knowledge of how @command{awk} wor @cindex @code{ERRNO} variable @cindex differences in @command{awk} and @command{gawk}, @code{getline} command @cindex @code{getline} command, return values +@cindex @code{--sandbox} option, input redirection with @command{getline} + The @code{getline} command returns one if it finds a record and zero if it encounters the end of the file. If there is some error in getting a record, such as a file that cannot be opened, then @code{getline} @@ -5445,6 +5604,10 @@ returns @minus{}1. In this case, @command{gawk} sets the variable In the following examples, @var{command} stands for a string value that represents a shell command. +@quotation NOTE +When @option{--sandbox} is specified, reading lines from files, pipes and coprocesses is disabled. +@end quotation + @menu * Plain Getline:: Using @code{getline} with no arguments. * Getline/Variable:: Using @code{getline} into a variable. @@ -5920,6 +6083,90 @@ listing which built-in variables are set by each one. @c ENDOFRANGE inex @c ENDOFRANGE infir +@node BEGINFILE/ENDFILE +@section The @code{BEGINFILE} and @code{ENDFILE} Special Patterns +@cindex @code{BEGINFILE} special pattern +@cindex @code{ENDFILE} special pattern + +@strong{FIXME:} Get the version right. +@quotation NOTE +This @value{SECTION} describes a @command{gawk}-specific feature +added in @command{gawk} 3.X. +@end quotation + +Two special kinds of rule, @code{BEGINFILE} and @code{ENDFILE}, give you ``hooks'' +into @command{gawk}'s command-line file processing loop. As with the @code{BEGIN} +and @code{END} rules (@pxref{BEGIN/END}), +all @code{BEGINFILE} rules in a program are merged, +in the order they are read by @command{gawk}, and all @code{ENDFILE} rules are +merged as well. + +The body of the @code{BEGINFILE} rules is executed just before @command{gawk} +reads the first record from a file. @code{FILENAME} is set to the name of the current file, +and @code{FNR} is set to zero. + +The @code{BEGINFILE} rule provides you the opportunity for two +tasks that would otherwise be difficult or impossible to perform: + +@enumerate 1 +@item +You can test if the file is readable. +Normally, it is a fatal error if a file named on the command line cannot be +opened for reading. However, you can +bypass the fatal error and move on to the next file on the command line. + +You do this by checking if +the @code{ERRNO} variable is not +the empty string; if so, then @command{gawk} was not able to open the file. In +this case, your program can execute the @code{nextfile} statement (@pxref{Nextfile Statement}). +This casuses @command{gawk} to skip the file entirely. +Otherwise, @command{gawk} will exit with the usual fatal error. + +@item +If you have written extensions that modify the record handling (by inserting +an ``open hook''), you can invoke them at this point, before @command{gawk} +has started processing the file. (This is a @emph{very} advanced feature, +currently used only by the @uref{http://xgawk.sourceforge.net, XMLgawk project}.) +@end enumerate + +The @code{ENDFILE} rule is called when @command{gawk} has finished processing +the last record in an input file. It will be called before any @code{END} rules. + +Normally, when an error occurs when reading input in the normal input processing +loop, the error is fatal. However, if an @code{ENDFILE} rule is present, the +error becomes non-fatal, and instead @code{ERRNO} is set. This makes it possible +to catch and process I/O errors at the level of the @command{awk} program. + +The @code{next} statement is not allowed inside either a @code{BEGINFILE} or +and @code{ENDFILE} rule. The @code{nextfile} statement is allowed only inside +a @code{BEGINFILE} rule, but not inside an @code{ENDFILE} rule. + +The @code{getline} statement (@pxref{Getline}) is restricted inside both @code{BEGINFILE} +and @code{ENDFILE}. Only the @samp{getline @var{variable} < @var{file}} form is +allowed. + +@code{BEGINFILE} and @code{ENDFILE} are @command{gawk} extensions. +In most other @command{awk} implementations, +or if @command{gawk} is in compatibility mode +(@pxref{Options}), +they are not special. + + +@node Command line directories +@section Directories On The Command Line +@cindex directories, command line +@cindex command line, directories on + +According to POSIX, files named on the @command{awk} command line must be +text files. The behavior is ``undefined'' if they are not. Most versions +of @command{awk} treat a directory on the command line as a fatal error. + +@strong{FIXME:} Get the version right. +Starting with version 3.x of @command{gawk}, a directory on the command line +produces a warning, but is otherwise skipped. If either of the @option{--posix} +or @option{--traditional} options is given, then @command{gawk} reverts to +treating directories on the command line as a fatal error. + @node Printing @chapter Printing Output @@ -6699,12 +6946,17 @@ on the @code{print} statement @cindex output redirection @cindex redirection of output +@cindex @code{--sandbox} option, output redirection with @command{print}, @command{printf} So far, the output from @code{print} and @code{printf} has gone to the standard output, usually the terminal. Both @code{print} and @code{printf} can also send their output to other places. This is called @dfn{redirection}. +@quotation NOTE +When @option{--sandbox} is specified, redirecting output to files and pipes is disabled. +@end quotation + A redirection appears after the @code{print} or @code{printf} statement. Redirections in @command{awk} are written just like redirections in shell commands, except that they are written inside the @command{awk} program. @@ -6923,7 +7175,6 @@ process-related information, and TCP/IP networking. @menu * Special FD:: Special files for I/O. -* Special Process:: Special files for process information. * Special Network:: Special files for network communications. * Special Caveats:: Things to watch out for. @end menu @@ -7024,93 +7275,25 @@ It is a common error to omit the quotes, which leads to confusing results. @c Exercise: What does it do? :-) -@node Special Process -@subsection Special Files for Process-Related Information - -@cindex files, for process information -@cindex process information, files for -@command{gawk} also provides special @value{FN}s that give access to information -about the running @command{gawk} process. Each of these ``files'' provides -a single record of information. To read them more than once, they must -first be closed with the @code{close} function -(@pxref{Close Files And Pipes}). -The @value{FN}s are: - -@c @cindex @code{/dev/pid} special file -@c @cindex @code{/dev/pgrpid} special file -@c @cindex @code{/dev/ppid} special file -@c @cindex @code{/dev/user} special file -@table @file -@item /dev/pid -Reading this file returns the process ID of the current process, -in decimal form, terminated with a newline. - -@item /dev/ppid -Reading this file returns the parent process ID of the current process, -in decimal form, terminated with a newline. - -@item /dev/pgrpid -Reading this file returns the process group ID of the current process, -in decimal form, terminated with a newline. - -@item /dev/user -Reading this file returns a single record terminated with a newline. -The fields are separated with spaces. The fields represent the -following information: - -@table @code -@item $1 -The return value of the @code{getuid} system call -(the real user ID number). - -@item $2 -The return value of the @code{geteuid} system call -(the effective user ID number). - -@item $3 -The return value of the @code{getgid} system call -(the real group ID number). - -@item $4 -The return value of the @code{getegid} system call -(the effective group ID number). -@end table - -If there are any additional fields, they are the group IDs returned by -the @code{getgroups} system call. -(Multiple groups may not be supported on all systems.) -@end table - -These special @value{FN}s may be used on the command line as @value{DF}s, -as well as for I/O redirections within an @command{awk} program. -They may not be used as source files with the @option{-f} option. - -@c @cindex automatic warnings -@c @cindex warnings, automatic -@quotation NOTE -The special files that provide process-related information are now considered -obsolete and will disappear entirely -in the next release of @command{gawk}. -@command{gawk} prints a warning message every time you use one of -these files. -To obtain process-related information, use the @code{PROCINFO} array. -@xref{Auto-set}. -@end quotation +Finally, usng the @code{close} function on a @value{FN} of the +form @code{"/dev/fd/@var{N}"}, for file descriptor numbers +above two, will actually close the given file descriptor. @node Special Network @subsection Special Files for Network Communications @cindex networks, support for @cindex TCP/IP, support for -Starting with @value{PVERSION} 3.1 of @command{gawk}, @command{awk} programs +@command{awk} programs can open a two-way TCP/IP connection, acting as either a client or a server. This is done using a special @value{FN} of the form: @example -@file{/inet/@var{protocol}/@var{local-port}/@var{remote-host}/@var{remote-port}} +@file{/@var{net-type}/@var{protocol}/@var{local-port}/@var{remote-host}/@var{remote-port}} @end example +The @var{net-type} is one of @samp{inet}, @samp{inet4} or @samp{inet6} The @var{protocol} is one of @samp{tcp}, @samp{udp}, or @samp{raw}, and the other fields represent the other essential pieces of information for making a networking connection. @@ -7388,35 +7571,6 @@ different implementations vary in what they report when closing pipes; thus the return value cannot be used portably. @value{DARKCORNER} -@ignore -@c 4/27/2003: Commenting this out for now, given the above -@c return of 16-bit value -The return value for closing a pipeline is particularly useful. -It allows you to get the output from a command as well as its -exit status. -@c 8/21/2002, FIXME: Maybe the code and this doc should be adjusted to -@c create values indicating death-by-signal? Sigh. - -@cindex pipes, closing -@cindex POSIX @command{awk}, pipes@comma{} closing -For POSIX-compliant systems, -if the exit status is a number above 128, then the program -was terminated by a signal. Subtract 128 to get the signal number: - -@example -exit_val = close(command) -if (exit_val > 128) - print command, "died with signal", exit_val - 128 -else - print command, "exited with code", exit_val -@end example - -Currently, in @command{gawk}, this only works for commands -piping into @code{getline}. For commands piped into -from @code{print} or @code{printf}, the -return value from @code{close} is that of the library's -@code{pclose} function. -@end ignore @c ENDOFRANGE ifc @c ENDOFRANGE ofc @c ENDOFRANGE pc @@ -7441,32 +7595,30 @@ variables, array references, constants, and function calls, as well as combinations of these with various operators. @menu +* Values:: Constants, Variables, and Regular Expressions. +* All Operators:: @command{gawk}'s operators. +* Truth Values and Conditions:: Testing for true and false. +* Function Calls:: A function call is an expression. +* Precedence:: How various operators nest. +@end menu + +@node Values +@section Constants, Variables and Conversions + +Expressions are built up from values and the operations performed +upon them. This @value{SECTION} describes the elementary objects +which provide values used in expressions. + +@menu * Constants:: String, numeric and regexp constants. * Using Constant Regexps:: When and how to use a regexp constant. * Variables:: Variables give names to values for later use. * Conversion:: The conversion of strings to numbers and vice versa. -* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-}, - etc.) -* Concatenation:: Concatenating strings. -* Assignment Ops:: Changing the value of a variable or a field. -* Increment Ops:: Incrementing the numeric value of a variable. -* Truth Values:: What is ``true'' and what is ``false''. -* Typing and Comparison:: How variables acquire types and how this - affects comparison of numbers and strings with - @samp{<}, etc. -* Boolean Ops:: Combining comparison expressions using boolean - operators @samp{||} (``or''), @samp{&&} - (``and'') and @samp{!} (``not''). -* Conditional Exp:: Conditional expressions select between two - subexpressions under control of a third - subexpression. -* Function Calls:: A function call is an expression. -* Precedence:: How various operators nest. @end menu @node Constants -@section Constant Expressions +@subsection Constant Expressions @cindex constants, types of The simplest type of expression is the @dfn{constant}, which always has @@ -7484,7 +7636,7 @@ have different forms, but are stored identically internally. @end menu @node Scalar Constants -@subsection Numeric and String Constants +@subsubsection Numeric and String Constants @cindex numeric, constants A @dfn{numeric constant} stands for a number. This number can be an @@ -7520,7 +7672,7 @@ Other @command{awk} implementations may have difficulty with some character codes. @node Nondecimal-numbers -@subsection Octal and Hexadecimal Numbers +@subsubsection Octal and Hexadecimal Numbers @cindex octal numbers @cindex hexadecimal numbers @cindex numbers, octal @@ -7620,7 +7772,7 @@ $ gawk 'BEGIN @{ printf "0x11 is <%s>\n", 0x11 @}' @end example @node Regexp Constants -@subsection Regular Expression Constants +@subsubsection Regular Expression Constants @c STARTOFRANGE rec @cindex regexp constants @@ -7631,12 +7783,12 @@ $ gawk 'BEGIN @{ printf "0x11 is <%s>\n", 0x11 @}' A regexp constant is a regular expression description enclosed in slashes, such as @code{@w{/^beginning and end$/}}. Most regexps used in @command{awk} programs are constant, but the @samp{~} and @samp{!~} -matching operators can also match computed or ``dynamic'' regexps +matching operators can also match computed or dynamic regexps (which are just ordinary strings or variables that contain a regexp). @c ENDOFRANGE cnst @node Using Constant Regexps -@section Using Regular Expression Constants +@subsection Using Regular Expression Constants @cindex dark corner, regexp constants When used on the righthand side of the @samp{~} or @samp{!~} @@ -7749,7 +7901,7 @@ this way is probably not what was intended. @c ENDOFRANGE rec @node Variables -@section Variables +@subsection Variables @cindex variables, user-defined @cindex user-defined, variables @@ -7766,7 +7918,7 @@ on the @command{awk} command line. @end menu @node Using Variables -@subsection Using Variables in a Program +@subsubsection Using Variables in a Program Variables let you give names to values and refer to them later. Variables have already been used in many of the examples. The name of a variable @@ -7779,7 +7931,7 @@ variable's current value. Variables are given new values with @dfn{assignment operators}, @dfn{increment operators}, and @dfn{decrement operators}. @xref{Assignment Ops}. -@c NEXT ED: Can also be changed by sub, gsub, split +@strong{FIXME: NEXT ED:} Can also be changed by sub, gsub, split. @cindex variables, built-in @cindex variables, initializing @@ -7798,7 +7950,7 @@ is zero if converted to a number. There is no need to which is what you would do in C and in most other traditional languages. @node Assignment Options -@subsection Assigning Variables on the Command Line +@subsubsection Assigning Variables on the Command Line @cindex variables, assigning on command line @cindex command line, variables@comma{} assigning on @@ -7864,7 +8016,7 @@ sequences @value{DARKCORNER} @node Conversion -@section Conversion of Strings and Numbers +@subsection Conversion of Strings and Numbers @cindex converting, strings to numbers @cindex strings, converting @@ -8019,8 +8171,22 @@ representation can have an unusual but important effect on the way @command{gawk} converts some special string values to numbers. The details are presented in @ref{POSIX Floating Point Problems}. +@node All Operators +@section Operators: Doing Something With Values + +This @value{SECTION} introduces the @dfn{operators} which make use +of the values provided by constants and variables. + +@menu +* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-}, + etc.) +* Concatenation:: Concatenating strings. +* Assignment Ops:: Changing the value of a variable or a field. +* Increment Ops:: Incrementing the numeric value of a variable. +@end menu + @node Arithmetic Ops -@section Arithmetic Operators +@subsection Arithmetic Operators @cindex arithmetic operators @cindex operators, arithmetic @c @cindex addition @@ -8135,7 +8301,7 @@ For maximum portability, do not use the @samp{**} operator. @end quotation @node Concatenation -@section String Concatenation +@subsection String Concatenation @cindex Kernighan, Brian @quotation @i{It seemed like a good idea at the time.}@* @@ -8268,7 +8434,7 @@ when doing concatenation, @emph{parenthesize}. Otherwise, you're never quite sure what you'll get. @node Assignment Ops -@section Assignment Expressions +@subsection Assignment Expressions @c STARTOFRANGE asop @cindex assignment operators @c STARTOFRANGE opas @@ -8525,7 +8691,7 @@ freely available versions described in @c ENDOFRANGE asop @node Increment Ops -@section Increment and Decrement Operators +@subsection Increment and Decrement Operators @c STARTOFRANGE inop @cindex increment operators @@ -8646,8 +8812,29 @@ You should avoid such things in your own programs. @c ENDOFRANGE opde @c ENDOFRANGE deop +@node Truth Values and Conditions +@section Truth Values and Conditions + +In certain contexts, expression values also serve as ``truth values;'' i.e., +they determine what should happen next as the program runs. This +@value{SECTION} describes how @command{awk} defines ``true'' and ``false'' +and how values are compared. + +@menu +* Truth Values:: What is ``true'' and what is ``false''. +* Typing and Comparison:: How variables acquire types and how this + affects comparison of numbers and strings with + @samp{<}, etc. +* Boolean Ops:: Combining comparison expressions using boolean + operators @samp{||} (``or''), @samp{&&} + (``and'') and @samp{!} (``not''). +* Conditional Exp:: Conditional expressions select between two + subexpressions under control of a third + subexpression. +@end menu + @node Truth Values -@section True and False in @command{awk} +@subsection True and False in @command{awk} @cindex truth values @cindex logical false/true @cindex false, logical @@ -8682,7 +8869,7 @@ the string constant @code{"0"} is actually true, because it is non-null. @value{DARKCORNER} @node Typing and Comparison -@section Variable Typing and Comparison Expressions +@subsection Variable Typing and Comparison Expressions @quotation @i{The Guide is definitive. Reality is frequently inaccurate.}@* The Hitchhiker's Guide to the Galaxy @@ -8712,7 +8899,7 @@ compares variables. @end menu @node Variable Typing -@subsection String Type Versus Numeric Type +@subsubsection String Type Versus Numeric Type @cindex numeric, strings @cindex strings, numeric @@ -8869,7 +9056,7 @@ $ echo ' +3.14' | gawk '@{ print $1 == 3.14 @}' @i{True} @end example @node Comparison Operators -@subsection Comparison Operators +@subsubsection Comparison Operators @dfn{Comparison expressions} compare strings or numbers for relationships such as equality. They are written using @dfn{relational @@ -9031,7 +9218,7 @@ where this is discussed in more detail. @c ENDOFRANGE varting @node Boolean Ops -@section Boolean Expressions +@subsection Boolean Expressions @cindex and Boolean-logic operator @cindex or Boolean-logic operator @cindex not Boolean-logic operator @@ -9174,7 +9361,7 @@ The reason it's there is to avoid printing the bracketing @c ENDOFRANGE boex @node Conditional Exp -@section Conditional Expressions +@subsection Conditional Expressions @cindex conditional expressions @cindex expressions, conditional @cindex expressions, selecting @@ -9290,6 +9477,11 @@ are omitted in calls to user-defined functions, then those arguments are treated as local variables and initialized to the empty string (@pxref{User-defined}). +As an advanced feature, @command{gawk} provides indirect function calls, +which is a way to choose the function to call at runtime, instead of +when you write the source code to your program. We defer discussion of +this feature until later; @xref{Indirect Calls}. + @cindex side effects, function calls Like every other expression, the function call has a value, which is computed by the function based on the arguments you give it. In this @@ -10420,14 +10612,6 @@ for more information on this version of the @code{for} loop. @cindex @code{case} keyword @cindex @code{default} keyword -@quotation NOTE -This @value{SUBSECTION} describes an experimental feature -added in @command{gawk} 3.1.3. It is @emph{not} enabled by default. To -enable it, use the @option{--enable-switch} option to @command{configure} -when @command{gawk} is being configured and built. -@xref{Additional Configuration Options}, for more information. -@end quotation - The @code{switch} statement allows the evaluation of an expression and the execution of statements based on a @code{case} match. Case statements are checked for a match in the order they are defined. If no suitable @@ -10483,6 +10667,9 @@ the @code{print} statement is executed and then falls through into the the @minus{}1 case will also be executed since the @code{default} does not halt execution. +This feature is a @command{gawk} extension, and is not available in +POSIX @command{awk}. + @node Break Statement @subsection The @code{break} Statement @cindex @code{break} statement @@ -10755,6 +10942,9 @@ inconsistent. When it appeared after @code{next}, @samp{file} was a keyword; otherwise, it was a regular identifier. The old usage is no longer accepted; @samp{next file} generates a syntax error. +The @code{nextfile} statement has a special purpose when used inside a +@code{BEGINFILE} rule; see @ref{BEGINFILE/ENDFILE}. + @node Exit Statement @subsection The @code{exit} Statement @@ -10915,7 +11105,7 @@ Its default value is @code{"%.6g"}. This is a space-separated list of columns that tells @command{gawk} how to split input with fixed columnar boundaries. Assigning a value to @code{FIELDWIDTHS} -overrides the use of @code{FS} for field splitting. +overrides the use of @code{FS} and @code{FPAT} for field splitting. @xref{Constant Size}, for more information. @cindex @command{gawk}, @code{FIELDWIDTHS} variable in @@ -10924,6 +11114,23 @@ If @command{gawk} is in compatibility mode has no special meaning, and field-splitting operations occur based exclusively on the value of @code{FS}. +@cindex @code{FPAT} variable +@cindex differences in @command{awk} and @command{gawk}, @code{FPAT} variable +@cindex field separators, @code{FPAT} variable and +@cindex separators, field, @code{FPAT} variable and +@item FPAT # +This is a regular expression (as a string) that tells @command{gawk} +to create the fields based on text that matches the regular expression. +Assigning a value to @code{FPAT} +overrides the use of @code{FS} and @code{FIELDWIDTHS} for field splitting. +@xref{Splitting By Content}, for more information. + +@cindex @command{gawk}, @code{FPAT} variable in +If @command{gawk} is in compatibility mode +(@pxref{Options}), then @code{FPAT} +has no special meaning, and field-splitting operations occur based +exclusively on the value of @code{FS}. + @cindex @code{FS} variable @cindex separators, field @cindex field separators @@ -10936,7 +11143,7 @@ record. If the value is the null string (@code{""}), then each character in the record becomes a separate field. (This behavior is a @command{gawk} extension. POSIX @command{awk} does not specify the behavior when @code{FS} is the null string.) -@c NEXT ED: Mark as common extension +@strong{FIXME: NEXT ED:} Mark as common extension. @cindex POSIX @command{awk}, @code{FS} variable and The default value is @w{@code{" "}}, a string consisting of a single @@ -11186,8 +11393,15 @@ If a system error occurs during a redirection for @code{getline}, during a read for @code{getline}, or during a @code{close} operation, then @code{ERRNO} contains a string describing the error. +@strong{FIXME:} Get the version right. +Starting with @value{PVERSION} 3.X, @command{gawk} clears @code{ERRNO} +before opening each command line input file. This enables checking if +the file is readable inside a @code{BEGINFILE} pattern (@pxref{BEGINFILE/ENDFILE}). + +Otherwise, @code{ERRNO} works similarly to the C variable @code{errno}. -In particular @command{gawk} @emph{never} clears it (sets it +Except for the case just mentioned, +@command{gawk} @emph{never} clears it (sets it to zero or @code{""}). Thus, you should only expect its value to be meaningful when an I/O operation returns a failure value, such as @code{getline} returning @minus{}1. @@ -11269,8 +11483,9 @@ The value of the @code{geteuid} system call. @item PROCINFO["FS"] This is -@code{"FS"} if field splitting with @code{FS} is in effect, or it is -@code{"FIELDWIDTHS"} if field splitting with @code{FIELDWIDTHS} is in effect. +@code{"FS"} if field splitting with @code{FS} is in effect, +@code{"FIELDWIDTHS"} if field splitting with @code{FIELDWIDTHS} is in effect, +or it is @code{"FPAT"} if field matching with @code{FPAT} is in effect. @item PROCINFO["gid"] The value of the @code{getgid} system call. @@ -11444,7 +11659,7 @@ before actual processing of the input begins. of each way of removing elements from @code{ARGV}. The following fragment processes @code{ARGV} in order to examine, and then remove, command-line options: -@c NEXT ED: Add xref to rewind() function +@strong{FIXME: NEXT ED:} Add xref to rewind() function. @example BEGIN @{ @@ -11518,13 +11733,7 @@ Thus, you cannot have a variable and an array with the same name in the same @command{awk} program. @menu -* Array Intro:: Introduction to Arrays -* Reference to Elements:: How to examine one element of an array. -* Assigning Elements:: How to change an element of an array. -* Array Example:: Basic Example of an Array -* Scanning an Array:: A variation of the @code{for} statement. It - loops through the indices of an array's - existing elements. +* Array Basics:: The basics of arrays. * Delete:: The @code{delete} statement removes an element from an array. * Numeric Array Subscripts:: How to use numbers as subscripts in @@ -11532,12 +11741,28 @@ same @command{awk} program. * Uninitialized Subscripts:: Using Uninitialized variables as subscripts. * Multi-dimensional:: Emulating multidimensional arrays in @command{awk}. -* Multi-scanning:: Scanning multidimensional arrays. * Array Sorting:: Sorting array values and indices. @end menu +@node Array Basics +@section The Basics of Arrays + +This @value{SECTION} presents the basics: working with elements +in arrays one at a time, and traversing all of the elements in +an array. + +@menu +* Array Intro:: Introduction to Arrays +* Reference to Elements:: How to examine one element of an array. +* Assigning Elements:: How to change an element of an array. +* Array Example:: Basic Example of an Array +* Scanning an Array:: A variation of the @code{for} statement. It + loops through the indices of an array's + existing elements. +@end menu + @node Array Intro -@section Introduction to Arrays +@subsection Introduction to Arrays @cindex Wall, Larry @quotation @@ -11578,7 +11803,7 @@ A contiguous array of four elements might look like the following example, conceptually, if the element values are 8, @code{"foo"}, @code{""}, and 30: -@c NEXT ED: Use real images here +@strong{FIXME: NEXT ED:} Use real images here @iftex @c from Karl Berry, much thanks for the help. @tex @@ -11696,7 +11921,7 @@ is independent of the number of elements in the array. @c ENDOFRANGE inarr @node Reference to Elements -@section Referring to an Array Element +@subsection Referring to an Array Element @cindex arrays, elements, referencing @cindex elements in arrays @@ -11758,7 +11983,7 @@ if (frequencies[2] != "") @end example @node Assigning Elements -@section Assigning Array Elements +@subsection Assigning Array Elements @cindex arrays, elements, assigning @cindex elements in arrays, assigning @@ -11776,7 +12001,7 @@ assigned a value. The expression @var{value} is the value to assign to that element of the array. @node Array Example -@section Basic Array Example +@subsection Basic Array Example The following program takes a list of lines, each beginning with a line number, and prints them out in order of line number. The line numbers @@ -11844,7 +12069,7 @@ END @{ @end example @node Scanning an Array -@section Scanning All Elements of an Array +@subsection Scanning All Elements of an Array @cindex elements in arrays, scanning @cindex arrays, scanning @@ -12136,6 +12361,10 @@ on the command line (@pxref{Options}). @node Multi-dimensional @section Multidimensional Arrays +@menu +* Multi-scanning:: Scanning multidimensional arrays. +@end menu + @cindex subscripts in arrays, multidimensional @cindex arrays, multidimensional A multidimensional array is an array in which an element is identified @@ -12232,7 +12461,7 @@ the program produces the following output: @end example @node Multi-scanning -@section Scanning Multidimensional Arrays +@subsection Scanning Multidimensional Arrays There is no special @code{for} statement for scanning a ``multidimensional'' array. There cannot be one, because, in truth, there @@ -12390,6 +12619,8 @@ We said previously that comparisons are done using @command{gawk}'s ``usual comparison rules.'' Because @code{IGNORECASE} affects string comparisons, the value of @code{IGNORECASE} also affects sorting for both @code{asort} and @code{asorti}. +Note also that the locale's sorting order does @emph{not} +come into play; comparisons are based on character values only. Caveat Emptor. @c ENDOFRANGE arrs @@ -12414,6 +12645,7 @@ The second half of this @value{CHAPTER} describes these @menu * Built-in:: Summarizes the built-in functions. * User-defined:: Describes User-defined functions in detail. +* Indirect Calls:: Choosing the function to call at runtime. @end menu @node Built-in @@ -12777,7 +13009,7 @@ at which that substring begins (one, if it starts at the beginning of @var{string}). If no match is found, it returns zero. The @var{regexp} argument may be either a regexp constant -(@samp{/@dots{}/}) or a string constant (@var{"@dots{}"}). +(@code{/@dots{}/}) or a string constant (@code{"@dots{}"}). In the latter case, the string is treated as a regexp to be matched. @ref{Computed Regexps}, for a discussion of the difference between the two forms, and the @@ -12884,22 +13116,51 @@ The @var{array} argument to @code{match} is a (@pxref{Options}), using a third argument is a fatal error. -@item split(@var{string}, @var{array} @r{[}, @var{fieldsep}@r{]}) +@item patsplit(@var{string}, @var{array} @r{[}, @var{fieldpat} @r{[}, @var{seps} @r{]} @r{]}) +@cindex @code{patsplit} function +This function divides @var{string} into pieces defined by @var{fieldpat} +and stores the pieces in @var{array} and the separator strings in the +@var{seps} array. The first piece is stored in +@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so +forth. The string value of the third argument, @var{fieldpat}, is +a regexp describing the fields in @var{string} (just as @code{FPAT} is +a regexp describing the fields in input records). If +@var{fieldpat} is omitted, the value of @code{FPAT} is used. +@code{patsplit} returns the number of elements created. +@code{@var{seps}[@var{i}]} is +the separator string +between @code{@var{array}[@var{i}]} and @code{@var{array}[@var{i}+1]}. +Any leading separator will be in @code{@var{seps}[0]}. + +The @code{patsplit} function splits strings into pieces in a +manner similar to the way input lines are split into fields using @code{FPAT}. + +@item split(@var{string}, @var{array} @r{[}, @var{fieldsep} @r{[}, @var{seps} @r{]} @r{]}) @cindex @code{split} function This function divides @var{string} into pieces separated by @var{fieldsep} -and stores the pieces in @var{array}. The first piece is stored in +and stores the pieces in @var{array} and the separator strings in the +@var{seps} array. The first piece is stored in @code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so forth. The string value of the third argument, @var{fieldsep}, is a regexp describing where to split @var{string} (much as @code{FS} can be a regexp describing where to split input records). If @var{fieldsep} is omitted, the value of @code{FS} is used. @code{split} returns the number of elements created. +@var{seps} is a @command{gawk} extension with @code{@var{seps}[@var{i}]} +being the separator string +between @code{@var{array}[@var{i}]} and @code{@var{array}[@var{i}+1]}. +If @var{fieldsep} is a single +space then any leading whitespace goes into @code{@var{seps}[0]} and +any trailing +whitespace goes into @code{@var{seps}[@var{n}]} where @var{n} is the +return value of +@code{split()} (that is, the number of elements in @var{array}). The @code{split} function splits strings into pieces in a manner similar to the way input lines are split into fields. For example: @example -split("cul-de-sac", a, "-") +split("cul-de-sac", a, "-", seps) @end example @noindent @@ -12913,12 +13174,20 @@ a[2] = "de" a[3] = "sac" @end example +and sets the contents of the array @code{seps} as follows: + +@example +seps[1] = "-" +seps[2] = "-" +@end example + @noindent The value returned by this call to @code{split} is three. @cindex differences in @command{awk} and @command{gawk}, @code{split} function As with input field-splitting, when the value of @var{fieldsep} is -@w{@code{" "}}, leading and trailing whitespace is ignored, and the elements +@w{@code{" "}}, leading and trailing whitespace is ignored in +@var{array} but not in @var{seps}, and the elements are separated by runs of whitespace. Also as with input field-splitting, if @var{fieldsep} is the null string, each individual character in the string is split into its own array element. @@ -12939,7 +13208,7 @@ discussion of the difference between using a string constant or a regexp constan and the implications for writing your program correctly. Before splitting the string, @code{split} deletes any previously existing -elements in the array @var{array}. +elements in the arrays @var{array} and @var{seps}. If @var{string} is null, the array has no elements. (So this is a portable way to delete an entire array with one statement. @@ -13001,7 +13270,7 @@ changed by replacing the matched text with @var{replacement}. The modified string becomes the new value of @var{target}. The @var{regexp} argument may be either a regexp constant -(@samp{/@dots{}/}) or a string constant (@var{"@dots{}"}). +(@code{/@dots{}/}) or a string constant (@code{"@dots{}"}). In the latter case, the string is treated as a regexp to be matched. @ref{Computed Regexps}, for a discussion of the difference between the two forms, and the @@ -13535,15 +13804,12 @@ These rules are presented in @ref{table-posix-2001-sub}. The only case where the difference is noticeable is the last one: @samp{\\\\} is seen as @samp{\\} and produces @samp{\} instead of @samp{\\}. -Starting with version 3.1.4, @command{gawk} follows the POSIX rules +Starting with version 3.1.4, @command{gawk} followed the POSIX rules when @option{--posix} is specified (@pxref{Options}). Otherwise, -it continues to follow the 1996 proposed rules, since, as of this -writing, that has been its behavior for over seven years. +it continued to follow the 1996 proposed rules, since +that had been its behavior for many seven years. -@quotation NOTE -At the next major release, @command{gawk} will switch to using -the POSIX 2001 rules by default. -@end quotation +As of version 3.2, @command{gawk} uses the POSIX 2001 rules. The rules for @code{gensub} are considerably simpler. At the runtime level, whenever @command{gawk} sees a @samp{\}, if the following character @@ -13733,11 +13999,17 @@ close("/bin/sh") @noindent @cindex troubleshooting, @code{system} function +@cindex @code{--sandbox} option, disabling @command{system} function However, if your @command{awk} program is interactive, @code{system} is useful for cranking up large self-contained programs, such as a shell or an editor. Some operating systems cannot implement the @code{system} function. @code{system} causes a fatal error if it is not supported. + +@quotation NOTE +When @option{--sandbox} is specified, the @code{system} function is disabled. +@end quotation + @end table @c fakenode --- for prepinfo @@ -14189,7 +14461,7 @@ is set to UTC: @example #! /bin/sh # -# date --- approximate the P1003.2 'date' command +# date --- approximate the POSIX 'date' command case $1 in -u) TZ=UTC0 # use UTC @@ -14197,9 +14469,8 @@ case $1 in shift ;; esac -@c FIXME: One day, change %d to %e, when C 99 is common. gawk 'BEGIN @{ - format = "%a %b %d %H:%M:%S %Z %Y" + format = "%a %b %e %H:%M:%S %Z %Y" exitval = 0 if (ARGC > 2) @@ -14631,7 +14902,7 @@ before all uses of the function. This is because @command{awk} reads the entire program before starting to execute any of it. The definition of a function named @var{name} looks like this: -@c NEXT ED: put [ ] around parameter list +@strong{FIXME: NEXT ED:} put [ ] around parameter list. @example function @var{name}(@var{parameter-list}) @@ -14728,7 +14999,7 @@ If the resulting string is non-null, the action is executed. This is probably not what is desired. (@command{awk} accepts this input as syntactically valid, because functions may be used before they are defined in @command{awk} programs.) -@c NEXT ED: This won't actually run, since foo() is undefined ... +@strong{FIXME: NEXT ED:} This won't actually run, since foo() is undefined ... @cindex portability, functions@comma{} defining To ensure that your @command{awk} programs are portable, always use the @@ -14825,7 +15096,6 @@ The following example uses the built-in @code{strftime} function to create an @command{awk} version of @code{ctime}: @cindex @code{ctime} user-defined function -@c FIXME: One day, change %d to %e, when C 99 is common. @example @c file eg/lib/ctime.awk # ctime.awk @@ -14834,7 +15104,7 @@ to create an @command{awk} version of @code{ctime}: function ctime(ts, format) @{ - format = "%a %b %d %H:%M:%S %Z %Y" + format = "%a %b %e %H:%M:%S %Z %Y" if (ts == 0) ts = systime() # use current time as default return strftime(format, ts) @@ -15091,6 +15361,362 @@ BEGIN @{ Usually, such things aren't a big issue, but it's worth being aware of them. @c ENDOFRANGE udfunc + +@node Indirect Calls +@section Indirect Function Calls + +@cindex indirect function calls +@cindex function calls, indirect +@cindex function pointers +@cindex pointers to functions +@cindex differences in @command{awk} and @command{gawk}, indirect function calls + +This section describes a @command{gawk}-specific extension. + +Often, you may wish to defer the choice of function to call until runtime. +For example, you may have different kinds of records, each of which +should be processed differently. + +Normally, you would have to use a series of @code{if}-@code{else} +statements to decide which function to call. By using @dfn{indirect} +function calls, you can specify the name of the function to call as a +string variable, and then call the function. Let's look at an example. + +Suppose you have a file with your test scores for the classes you +are taking. The first field is the class name. The following fields +are the functions to call to process the data, up to a ``marker'' +field @samp{data:}. Following the marker, to the end of the record, +are the various numeric test scores. + +Here is the initial file; you wish to get the sum and the average of +your test scores: + +@example +@c file eg/data/class_data1 +Biology_101 sum average data: 87.0 92.4 78.5 94.9 +Chemistry_305 sum average data: 75.2 98.3 94.7 88.2 +English_401 sum average data: 100.0 95.6 87.1 93.4 +@c endfile +@end example + +To process the data, you might write initially: + +@example +@{ + class = $1 + for (i = 2; $i != "data:"; i++) @{ + if ($i == "sum") + sum() # processes the whole record + else if ($i == "average") + average() + @dots{} # and so on + @} +@} +@end example + +@noindent +This style of programming works, but can be awkward. With @dfn{indirect} +function calls, you tell @command{gawk} to use the @emph{value} of a +variable as the name of the function to call. + +The syntax is similar to that of a regular function call: an identifier +immediately followed by a left parenthesis, any arguments, and then +a closing right parenthesis, with the addition of a leading @code{@@} +character: + +@example +the_func = "sum" +result = @@the_func() # calls the `sum' function +@end example + +Here is a full program that processes the previously shown data, +using indirect function calls. + +@example +@c file eg/prog/indirectcall.awk +# indirectcall.awk --- Demonstrate indirect function calls +@c endfile +@ignore +@c file eg/prog/indirectcall.awk +# +# Arnold Robbins, arnold@skeeve.com, Public Domain +# January 2009 +@c endfile +@end ignore + +@c file eg/prog/indirectcall.awk +# average --- return the average of the values in fields $first - $last + +function average(first, last, sum, i) +@{ + sum = 0; + for (i = first; i <= last; i++) + sum += $i + + return sum / (last - first + 1) +@} + +# sum --- return the average of the values in fields $first - $last + +function sum(first, last, ret, i) +@{ + ret = 0; + for (i = first; i <= last; i++) + ret += $i + + return ret +@} +@c endfile +@end example + +These two functions expect to work on fields; thus the parameters +@code{first} and @code{last} indicate where in the fields to start. +Otherwise they perform the expected computations and are not unusual. + +@example +@c file eg/prog/indirectcall.awk +# For each record, print the class name and the requested statistics + +@{ + class_name = $1 + gsub(/_/, " ", class_name) # Replace _ with spaces + + # find start + for (i = 1; i <= NF; i++) @{ + if ($i == "data:") @{ + start = i + 1 + break + @} + @} + + printf("%s:\n", class_name) + for (i = 2; $i != "data:"; i++) @{ + the_function = $i + printf("\t%s: <%s>\n", $i, @@the_function(start, NF) "") + @} + print "" +@} +@c endfile +@end example + +This is the main processing for each record. It prints the class name (with +underscores replaced with spaces). It then finds the start of the actual data, +saving it in @code{start}. +The last part of the code loops through each function name (from @code{$2} up to +the marker, @samp{data:}), calling the function named by the field. The indirect +function call itself occurs as a parameter in the call to @code{printf}. +(The @code{printf} format string uses @samp{%s} as the format specifier so that we +can use functions that return strings, as well as numbers. Note that the result +from the indirect call is concatenated with the empty string, in order to force +it to be a string value.) + +Here is the result of running the program: + +@example +$ @kbd{gawk -f indirectcall.awk class_data1} +@result{} Biology 101: +@result{} sum: <352.8> +@result{} average: <88.2> +@result{} +@result{} Chemistry 305: +@result{} sum: <356.4> +@result{} average: <89.1> +@result{} +@result{} English 401: +@result{} sum: <376.1> +@result{} average: <94.025> +@end example + +The ability to use indirect function calls is more powerful than you may +think at first. The C and C++ languages provide ``function pointers,'' which +are a mechanism for calling a function chosen at runtime. One of the most +well-known uses of this ablity is the C @code{qsort} function, which sorts +an array using the well-known ``quick sort'' algorithm +(see @uref{http://en.wikipedia.org/wiki/Quick_sort, the Wikipedia article} +for more information). To use this function, you supply a pointer to a comparison +function. This mechanism allows you to sort arbitrary data in an arbitrary +fashion. + +We can do something similar using @command{gawk}, like this: + +@example +@c file eg/lib/quicksort.awk +# quicksort.awk --- Quicksort algorithm, with user-supplied +# comparison function +@c endfile +@ignore +@c file eg/lib/quicksort.awk +# +# Arnold Robbins, arnold@skeeve.com, Public Domain +# January 2009 +@c endfile + +@end ignore +@c file eg/lib/quicksort.awk +# quicksort --- C.A.R. Hoare's quick sort algorithm. See Wikipedia +# or almost any algorithms or computer science text +@c endfile +@ignore +@c file eg/lib/quicksort.awk +# +# Adapted from K&R-II, page 110 +@end ignore +@c file eg/lib/quicksort.awk + +function quicksort(data, left, right, less_than, i, last) +@{ + if (left >= right) # do nothing if array contains fewer + return # than two elements + + quicksort_swap(data, left, int((left + right) / 2)) + last = left + for (i = left + 1; i <= right; i++) + if (@@less_than(data[i], data[left])) + quicksort_swap(data, ++last, i) + quicksort_swap(data, left, last) + quicksort(data, left, last - 1, less_than) + quicksort(data, last + 1, right, less_than) +@} + +# quicksort_swap --- helper function for quicksort, should really be inline + +function quicksort_swap(data, i, j, temp) +@{ + temp = data[i] + data[i] = data[j] + data[j] = temp +@} +@c endfile +@end example + +The @code{quicksort} function receives the @code{data} array, the starting and ending +indices to sort (@code{left} and @code{right}), and the name of a function that +performs a ``less than'' comparison. It then implements the quick sort algorithm. + +To make use of the sorting function, we return to our previous example. The +first thing to do is write some comparison functions: + +@example +@c file eg/prog/indirectcall.awk +# num_lt --- do a numeric less than comparison + +function num_lt(left, right) +@{ + return ((left + 0) < (right + 0)) +@} + +# num_ge --- do a numeric greater than or equal to comparison + +function num_ge(left, right) +@{ + return ((left + 0) >= (right + 0)) +@} +@c endfile +@end example + +The @code{num_ge} function is needed to perform a descending sort; when used +to perform a ``less than'' test, it actually does the opposite (greater than +or equal to), which yields data sorted in descending order. + +Next comes a sorting function. It is parameterized with the starting and +ending field numbers and the comparison function. It builds an array with +the data and calls @code{quicksort} appropriately, and then formats the +results as a single string: + +@example +@c file eg/prog/indirectcall.awk +# do_sort --- sort the data according to `compare' and return it as a string + +function do_sort(first, last, compare, data, i, retval) +@{ + delete data + for (i = 1; first <= last; first++) @{ + data[i] = $first + i++ + @} + + quicksort(data, 1, i-1, compare) + + retval = data[1] + for (i = 2; i in data; i++) + retval = retval " " data[i] + + return retval +@} +@c endfile +@end example + +Finally, the two sorting functions call @code{do_sort}, passing in the +names of the two comparison functions: + +@example +@c file eg/prog/indirectcall.awk +# sort --- sort the data in ascending order and return it as a string + +function sort(first, last) +@{ + return do_sort(first, last, "num_lt") +@} + +# rsort --- sort the data in descending order and return it as a string + +function rsort(first, last) +@{ + return do_sort(first, last, "num_ge") +@} +@c endfile +@end example + +Here is an extended version of the data file: + +@example +@c file eg/data/class_data2 +Biology_101 sum average sort rsort data: 87.0 92.4 78.5 94.9 +Chemistry_305 sum average sort rsort data: 75.2 98.3 94.7 88.2 +English_401 sum average sort rsort data: 100.0 95.6 87.1 93.4 +@c endfile +@end example + +Finally, here are the results when the enhanced program is run: + +@example +$ @kbd{gawk -f quicksort.awk -f indirectcall.awk class_data2} +@result{} Biology 101: +@result{} sum: <352.8> +@result{} average: <88.2> +@result{} sort: <78.5 87.0 92.4 94.9> +@result{} rsort: <94.9 92.4 87.0 78.5> +@result{} +@result{} Chemistry 305: +@result{} sum: <356.4> +@result{} average: <89.1> +@result{} sort: <75.2 88.2 94.7 98.3> +@result{} rsort: <98.3 94.7 88.2 75.2> +@result{} +@result{} English 401: +@result{} sum: <376.1> +@result{} average: <94.025> +@result{} sort: <87.1 93.4 95.6 100.0> +@result{} rsort: <100.0 95.6 93.4 87.1> +@end example + +Remember that you must supply a leading @samp{@@} in front of an indirect function call. + +Unfortunately, indirect function calls cannot be used with the built-in functions. However, +you can generally write ``wrapper'' functions which call the built-in ones, and those can +be called indirectly. (Other than, perhaps, the mathematical functions, there is not a lot +of reason to try to call the built-in functions indirectly.) + +@command{gawk} does its best to make indirect function calls efficient. For example: + +@example +for (i = 1; i <= n; i++) + @@the_func() +@end example + +@noindent +@code{gawk} will look up the actual function to call only once. + @c ENDOFRANGE funcud @node Internationalization @@ -15496,7 +16122,7 @@ be extracted to create the initial @file{.po} file. As part of translation, it is often helpful to rearrange the order in which arguments to @code{printf} are output. -@command{gawk}'s @option{--gen-po} command-line option extracts +@command{gawk}'s @option{--gen-pot} command-line option extracts the messages and is discussed next. After that, @code{printf}'s ability to rearrange the order for @code{printf} arguments at runtime @@ -15512,25 +16138,25 @@ is covered. @subsection Extracting Marked Strings @cindex strings, extracting @cindex marked strings@comma{} extracting -@cindex @code{--gen-po} option +@cindex @code{--gen-pot} option @cindex command-line options, string extraction @cindex string extraction (internationalization) @cindex marked string extraction (internationalization) @cindex extraction, of marked strings (internationalization) -@cindex @code{--gen-po} option +@cindex @code{--gen-pot} option Once your @command{awk} program is working, and all the strings have been marked and you've set (and perhaps bound) the text domain, it is time to produce translations. -First, use the @option{--gen-po} command-line option to create +First, use the @option{--gen-pot} command-line option to create the initial @file{.po} file: @example -$ gawk --gen-po -f guide.awk > guide.po +$ gawk --gen-pot -f guide.awk > guide.po @end example @cindex @code{xgettext} utility -When run with @option{--gen-po}, @command{gawk} does not execute your +When run with @option{--gen-pot}, @command{gawk} does not execute your program. Instead, it parses it as usual and prints all marked strings to standard output in the format of a GNU @code{gettext} Portable Object file. Also included in the output are any constant strings that @@ -15739,10 +16365,10 @@ BEGIN @{ @end example @noindent -Run @samp{gawk --gen-po} to create the @file{.po} file: +Run @samp{gawk --gen-pot} to create the @file{.po} file: @example -$ gawk --gen-po -f guide.awk > guide.po +$ gawk --gen-pot -f guide.awk > guide.po @end example @noindent @@ -16162,6 +16788,10 @@ using regular pipes. @cindex TCP/IP @cindex @code{/inet/} files (@command{gawk}) @cindex files, @code{/inet/} (@command{gawk}) +@cindex @code{/inet4/} files (@command{gawk}) +@cindex files, @code{/inet4/} (@command{gawk}) +@cindex @code{/inet6/} files (@command{gawk}) +@cindex files, @code{/inet6/} (@command{gawk}) @cindex @code{EMISTERED} @quotation @code{EMISTERED}: @i{A host is a host from coast to coast,@* @@ -16179,13 +16809,21 @@ another process on another system across an IP networking connection. You can think of this as just a @emph{very long} two-way pipeline to a coprocess. The way @command{gawk} decides that you want to use TCP/IP networking is -by recognizing special @value{FN}s that begin with @samp{/inet/}. +by recognizing special @value{FN}s that begin with one of @samp{/inet/}, +@samp{/inet4/} or @samp{/inet6}. The full syntax of the special @value{FN} is -@file{/inet/@var{protocol}/@var{local-port}/@var{remote-host}/@var{remote-port}}. +@file{/@var{net-type}/@var{protocol}/@var{local-port}/@var{remote-host}/@var{remote-port}}. The components are: @table @var +@item net-type +Specifies the kind of Internet connection to make. +Use @samp{/inet4/} to force IPv4, and +@samp{/inet6/} to force IPv6. +Plain @samp{/inet/} (which used to be the only option) uses +the system default, most likely IPv4. + @item protocol The protocol to use over IP. This must be either @samp{tcp}, @samp{udp}, or @samp{raw}, for a TCP, UDP, or raw IP connection, @@ -16193,8 +16831,7 @@ respectively. The use of TCP is recommended for most applications. @cindex raw sockets @cindex sockets -@strong{Caution:} The use of raw sockets is not currently supported -in @value{PVERSION} 3.1 of @command{gawk}. +@strong{Caution:} The use of raw sockets is not currently supported. @item local-port @cindex @code{getservbyname} function (C library) @@ -16601,8 +17238,8 @@ full details. * Other Arguments:: Input file names and variable assignments. * AWKPATH Variable:: Searching directories for @command{awk} programs. -* Obsolete:: Obsolete Options and/or features. * Exit Status:: @command{gawk}'s exit status. +* Obsolete:: Obsolete Options and/or features. * Undocumented:: Undocumented Options and Features. * Known Bugs:: Known Bugs in @command{gawk}. @end menu @@ -16712,6 +17349,7 @@ variables may lead to surprising results. @command{awk} will reset the values of those variables as it needs to, possibly ignoring any predefined value you may have given. +@ignore @item -mf @var{N} @itemx -mr @var{N} @cindex @code{-mf}/@code{-mr} options @@ -16724,6 +17362,7 @@ for compatibility but otherwise ignored by @command{gawk}, since @command{gawk} has no predefined limits. (The Bell Laboratories @command{awk} no longer needs these options; it continues to accept them to avoid breaking old programs.) +@end ignore @item -W @var{gawk-opt} @cindex @code{-W} option @@ -16751,23 +17390,26 @@ by the user that could start with @samp{-}. @c ENDOFRANGE gnulo @c ENDOFRANGE longo -The previous list described options mandated by the POSIX standard, -as well as options available in the Bell Laboratories version of @command{awk}. +The previous list described options mandated by the POSIX standard. The following list describes @command{gawk}-specific options: @table @code -@item -O -@itemx --optimize -@cindex @code{--optimize} option -@cindex @code{-O} option -Enables some optimizations on the internal representation of the program. -At the moment this includes just simple constant folding. The @command{gawk} -maintainer hopes to add more optimizations over time. +@item -b +@itemx --characters-as-bytes +@cindex @code{-b} option +@cindex @code{--characters-as-bytes} option +Causes @command{gawk} to treat all input data as single-byte characters. +Normally, @command{gawk} follows the POSIX standard and attempts to process +its input data according to the current locale. This can often involve +converting multi-byte characters into wide characters (internally), and +can lead to problems or confusion if the input data does not contain valid +multi-byte characters. This option is an easy way to tell @command{gawk}: +``hands off my data!''. -@item -W compat -@itemx -W traditional +@item -c @itemx --compat @itemx --traditional +@cindex @code{--c} option @cindex @code{--compat} option @cindex @code{--traditional} option @cindex compatibility mode (@command{gawk}), specifying @@ -16779,24 +17421,22 @@ like the Bell Laboratories research version of Unix @command{awk}. which summarizes the extensions. Also see @ref{Compatibility Mode}. -@item -W copyright +@item -C @itemx --copyright +@itemx --copyleft +@cindex @code{-C} option @cindex @code{--copyright} option +@cindex @code{--copyleft} option @cindex GPL (General Public License), printing Print the short version of the General Public License and then exit. -@item -W copyleft -@itemx --copyleft -@cindex @code{--copyleft} option -Just like @option{--copyright}. -This option may disappear in a future version of @command{gawk}. - +@item -d @r{[}@var{file}@r{]} +@itemx --dump-variables@r{[}=@var{file}@r{]} +@cindex @code{-d} option @cindex @code{--dump-variables} option @cindex @code{awkvars.out} file @cindex files, @code{awkvars.out} @cindex variables, global, printing list of -@item -W dump-variables@r{[}=@var{file}@r{]} -@itemx --dump-variables@r{[}=@var{file}@r{]} Prints a sorted list of global variables, their types, and final values to @var{file}. If no @var{file} is provided, @command{gawk} prints this list to the file named @file{awkvars.out} in the current directory. @@ -16810,8 +17450,21 @@ inadvertently use global variables that you meant to be local. (This is a particularly easy mistake to make with simple variable names like @code{i}, @code{j}, etc.) -@item -W exec @var{file} +@item -e @var{program-text} +@itemx --source @var{program-text} +@cindex @code{-e} option +@cindex @code{--source} option +@cindex source code, mixing +Allows you to mix source code in files with source +code that you enter on the command line. +Program source code is taken from the @var{program-text}. +This is particularly useful +when you have library functions that you want to use from your command-line +programs (@pxref{AWKPATH Variable}). + +@item -E @var{file} @itemx --exec @var{file} +@cindex @code{-E} option @cindex @code{--exec} option @cindex @command{awk} programs, location of @cindex CGI, @command{awk} scripts for @@ -16828,14 +17481,15 @@ that pass arguments through the URL; using this option prevents a malicious with @samp{#!} scripts (@pxref{Executable Scripts}), like so: @example -#! /usr/local/bin/gawk --exec +#! /usr/local/bin/gawk -E @var{awk program here @dots{}} @end example -@item -W gen-po -@itemx --gen-po -@cindex @code{--gen-po} option +@item -g +@itemx --gen-pot +@cindex @code{-g} option +@cindex @code{--gen-pot} option @cindex portable object files, generating @cindex files, portable object, generating Analyzes the source program and @@ -16844,10 +17498,10 @@ output for all string constants that have been marked for translation. @xref{Internationalization}, for information about this option. -@item -W help -@itemx -W usage +@item -h @itemx --help @itemx --usage +@cindex @code{-h} option @cindex @code{--help} option @cindex @code{--usage} option @cindex GNU long options, printing list of @@ -16856,8 +17510,9 @@ for information about this option. Prints a ``usage'' message summarizing the short and long style options that @command{gawk} accepts and then exit. -@item -W lint@r{[}=fatal@r{]} -@itemx --lint@r{[}=fatal@r{]} +@item -l @r{[}value@r{]} +@itemx --lint@r{[}=value@r{]} +@cindex @code{-l} option @cindex @code{--lint} option @cindex lint checking, issuing warnings @cindex warnings, issuing @@ -16878,15 +17533,17 @@ problems pointed out by @option{--lint}, you should take care to search for all occurrences of each inappropriate construct. As @command{awk} programs are usually short, doing so is not burdensome. -@item -W lint-old +@item -L @itemx --lint-old +@cindex @code{--L} option @cindex @code{--lint-old} option Warns about constructs that are not available in the original version of @command{awk} from Version 7 Unix (@pxref{V7/SVR3.1}). -@item -W non-decimal-data +@item -n @itemx --non-decimal-data +@cindex @code{-n} option @cindex @code{--non-decimal-data} option @cindex hexadecimal values@comma{} enabling interpretation of @cindex octal values@comma{} enabling interpretation of @@ -16898,8 +17555,40 @@ values in input data @strong{Caution:} This option can severely break old programs. Use with care. -@item -W posix +@item -N +@itemx --use-lc-numeric +@cindex @code{-N} option +@cindex @code{--use-lc-numeric} option +This option forces the use of the locale's decimal point character +when parsing numeric input data (@pxref{Locales}). + +@item -O +@itemx --optimize +@cindex @code{--optimize} option +@cindex @code{-O} option +Enables some optimizations on the internal representation of the program. +At the moment this includes just simple constant folding. The @command{gawk} +maintainer hopes to add more optimizations over time. + +@item -p @r{[}@var{file}@r{]} +@itemx --profile@r{[}=@var{file}@r{]} +@cindex @code{-p} option +@cindex @code{--profile} option +@cindex @command{awk} programs, profiling, enabling +Enable profiling of @command{awk} programs +(@pxref{Profiling}). +By default, profiles are created in a file named @file{awkprof.out}. +The optional @var{file} argument allows you to specify a different +@value{FN} for the profile file. + +When run with @command{gawk}, the profile is just a ``pretty printed'' version +of the program. When run with @command{pgawk}, the profile contains execution +counts for each statement in the program in the left margin, and function +call counts for each function. + +@item -P @itemx --posix +@cindex @code{-P} option @cindex @code{--posix} option @cindex POSIX mode @cindex @command{gawk}, extensions@comma{} disabling @@ -16969,51 +17658,34 @@ If you supply both @option{--traditional} and @option{--posix} on the command line, @option{--posix} takes precedence. @command{gawk} also issues a warning if both options are supplied. -@item -W profile@r{[}=@var{file}@r{]} -@itemx --profile@r{[}=@var{file}@r{]} -@cindex @code{--profile} option -@cindex @command{awk} programs, profiling, enabling -Enable profiling of @command{awk} programs -(@pxref{Profiling}). -By default, profiles are created in a file named @file{awkprof.out}. -The optional @var{file} argument allows you to specify a different -@value{FN} for the profile file. - -When run with @command{gawk}, the profile is just a ``pretty printed'' version -of the program. When run with @command{pgawk}, the profile contains execution -counts for each statement in the program in the left margin, and function -call counts for each function. - -@item -W re-interval +@item -r @itemx --re-interval +@cindex @code{-r} option @cindex @code{--re-interval} option @cindex regular expressions, interval expressions and Allows interval expressions (@pxref{Regexp Operators}) in regexps. -Because interval expressions were traditionally not available in @command{awk}, -@command{gawk} does not provide them by default. This prevents old @command{awk} -programs from breaking. - -@item -W source @var{program-text} -@itemx --source @var{program-text} -@cindex @code{--source} option -@cindex source code, mixing -Allows you to mix source code in files with source -code that you enter on the command line. -Program source code is taken from the @var{program-text}. -This is particularly useful -when you have library functions that you want to use from your command-line -programs (@pxref{AWKPATH Variable}). - -@item -W use-lc-numeric -@itemx --use-lc-numeric -@cindex @code{--use-lc-numeric} option -This option forces the use of the locale's decimal point character -when parsing numeric input data (@pxref{Locales}). - -@item -W version +This is now the default behavior for @command{gawk}. +Nevertheless, this option remains for both backward compatibility, +and for use in combination with the @option{--traditional} option. + +@item -S +@itemx --sandbox +@cindex @code{-S} option +@cindex @code{--sandbox} option +@cindex sandbox mode +In sandbox mode, the @command{system} function, +input redirections with @command{getline}, +output redirections with @command{print} and @command{printf} +and dynamic extensions are disabled. +This is particularly useful when you want to run @command{awk} scripts +from questionable sources and need to make sure the scripts +can't access your system (other then the specified input data file). + +@item -V @itemx --version +@cindex @code{-V} option @cindex @code{--version} option @cindex @command{gawk}, versions of, information about@comma{} printing Prints version information for this particular copy of @command{gawk}. @@ -17271,24 +17943,27 @@ they will @emph{not} be in the next release). @c update this section for each release! +@ignore @cindex @code{next file} statement, deprecated @cindex @code{nextfile} statement, @code{next file} statement and +@end ignore For @value{PVERSION} @value{VERSION} of @command{gawk}, there are no deprecated command-line options @c or other deprecated features from the previous version of @command{gawk}. +@ignore The use of @samp{next file} (two words) for @code{nextfile} was deprecated in @command{gawk} 3.0 but still worked. Starting with @value{PVERSION} 3.1, the two-word usage is no longer accepted. +@end ignore -The process-related special files described in -@ref{Special Process}, -work as described, but -are now considered deprecated. -@command{gawk} prints a warning message every time they are used. +The process-related special files +@file{/dev/pid}, @file{/dev/ppid}, @file{/dev/pgrpid}, and +@file{/dev/user} were deprecated in @command{gawk} 3.1, but still +worked. As of @value{PVERSION} 3.2, they are no longer interpreted specially +by @command{gawk}. (Use @code{PROCINFO} instead; see @ref{Auto-set}.) -They will be removed from the next release of @command{gawk}. @ignore This @value{SECTION} @@ -19373,6 +20048,7 @@ function _pw_init( oldfs, oldrs, olddol0, pwcat, using_fw) oldrs = RS olddol0 = $0 using_fw = (PROCINFO["FS"] == "FIELDWIDTHS") + using_fpat = (PROCINFO["FS"] == "FPAT") FS = ":" RS = "\n" @@ -19388,6 +20064,8 @@ function _pw_init( oldfs, oldrs, olddol0, pwcat, using_fw) FS = oldfs if (using_fw) FIELDWIDTHS = FIELDWIDTHS + else if (using_fpat) + FPAT = FPAT RS = oldrs $0 = olddol0 @} @@ -19424,15 +20102,18 @@ field-splitting mechanism later. The test can only be true for @command{gawk}. It is false if using @code{FS} or on some other @command{awk} implementation. +The code that checks for using @code{FPAT} is similar. + The main part of the function uses a loop to read database lines, split the line into fields, and then store the line into each array as necessary. When the loop is done, @code{@w{_pw_init}} cleans up by closing the pipeline, -setting @code{@w{_pw_inited}} to one, and restoring @code{FS} (and @code{FIELDWIDTHS} +setting @code{@w{_pw_inited}} to one, and restoring @code{FS} +(and @code{FIELDWIDTHS} or @code{FPAT} if necessary), @code{RS}, and @code{$0}. The use of @code{@w{_pw_count}} is explained shortly. -@c NEXT ED: All of these functions don't need the ... in ... test. Just -@c return the array element, which will be "" if not already there. Duh. +@strong{FIXME: NEXT ED:} All of these functions don't need the ... in ... test. Just +return the array element, which will be "" if not already there. Duh. @cindex @code{getpwnam} function (C library) The @code{getpwnam} function takes a username as a string argument. If that user is in the database, it returns the appropriate line. Otherwise, it @@ -19738,6 +20419,7 @@ function _gr_init( oldfs, oldrs, olddol0, grcat, oldrs = RS olddol0 = $0 using_fw = (PROCINFO["FS"] == "FIELDWIDTHS") + using_fpat = (PROCINFO["FS"] == "FPAT") FS = ":" RS = "\n" @@ -19768,6 +20450,8 @@ function _gr_init( oldfs, oldrs, olddol0, grcat, FS = oldfs if (using_fw) FIELDWIDTHS = FIELDWIDTHS + else if (using_fpat) + FPAT = FPAT RS = oldrs $0 = olddol0 @} @@ -19783,7 +20467,8 @@ These routines follow the same general outline as the user database routines (@pxref{Passwd Functions}). The @code{@w{_gr_inited}} variable is used to ensure that the database is scanned no more than once. -The @code{@w{_gr_init}} function first saves @code{FS}, @code{FIELDWIDTHS}, @code{RS}, and +The @code{@w{_gr_init}} function first saves @code{FS}, +@code{RS}, and @code{$0}, and then sets @code{FS} and @code{RS} to the correct values for scanning the group information. @@ -19810,7 +20495,7 @@ the first time there were no names. This code adds the names with a leading comma. It also doesn't check that there is a @code{$4}.) Finally, @code{_gr_init} closes the pipeline to @command{grcat}, restores -@code{FS} (and @code{FIELDWIDTHS} if necessary), @code{RS}, and @code{$0}, +@code{FS} (and @code{FIELDWIDTHS} or @code{FPAT} if necessary), @code{RS}, and @code{$0}, initializes @code{_gr_count} to zero (it is used later), and makes @code{_gr_inited} nonzero. @@ -20953,7 +21638,7 @@ If the first argument is @option{-a}, then the flag variable Finally, @command{awk} is forced to read the standard input by setting @code{ARGV[1]} to @code{"-"} and @code{ARGC} to two: -@c NEXT ED: Add more leading commentary in this program +@strong{FIXME: NEXT ED:} Add more leading commentary in this program @cindex @code{tee.awk} program @example @c file eg/prog/tee.awk @@ -21407,12 +22092,11 @@ The @code{beginfile} function is simple; it just resets the counts of lines, words, and characters to zero, and saves the current @value{FN} in @code{fname}: -@c NEXT ED: make it lines = words = chars = 0 @example @c file eg/prog/wc.awk function beginfile(file) @{ - chars = lines = words = 0 + lines = words = chars = 0 fname = FILENAME @} @c endfile @@ -21430,14 +22114,13 @@ for the file that was just read. It relies on @code{beginfile} to reset the numbers for the following @value{DF}: @c ONE DAY: make the above footnote an exercise, instead of giving away the answer. -@c NEXT ED: make order for += be lines, words, chars @example @c file eg/prog/wc.awk function endfile(file) @{ - tchars += chars tlines += lines twords += words + tchars += chars if (do_lines) printf "\t%d", lines @group @@ -21513,8 +22196,8 @@ We hope you find them both interesting and enjoyable. * Simple Sed:: A Simple Stream Editor. * Igawk Program:: A wrapper for @command{awk} that includes files. -* Signature Program:: People do amazing things with too much time - on their hands. +* Signature Program:: People do amazing things with too much time on + their hands. @end menu @node Dupword Program @@ -22024,7 +22707,8 @@ END \ @c STARTOFRANGE worus @cindex words, usage counts@comma{} generating -@c NEXT ED: Rewrite this whole section and example +@strong{FIXME: NEXT ED:} Rewrite this whole section and example. + The following @command{awk} program prints the number of occurrences of each word in its input. It illustrates the associative nature of @command{awk} arrays by using strings as subscripts. It @@ -23583,8 +24267,8 @@ The @code{ERRNO} variable, which contains the system error message when @item The @file{/dev/pid}, @file{/dev/ppid}, @file{/dev/pgrpid}, and -@file{/dev/user} @value{FN} interpretation -(@pxref{Special Files}). +@file{/dev/user} @value{FN} interpretation. +(As of @value{PVERSION} 3.2, these names are no longer supported.) @item The ability to delete all of an array at once with @samp{delete @var{array}} @@ -23789,11 +24473,6 @@ pathnames that begin with @file{/p} as BSD portals (@pxref{Portal Files}). @item -The @option{--disable-directories-fatal} configuration option which -causes @command{gawk} to silently skip directories named on the -command line (@pxref{Additional Configuration Options}). - -@item The use of GNU Automake to help in standardizing the configuration process (@pxref{Quick Installation}). @@ -23848,6 +24527,67 @@ enable printing times as UTC (@pxref{Time Functions}). @end itemize +Version 3.2 of @command{gawk} introduced the following features: + +@itemize @bullet +@item +The special files @file{/dev/pid}, @file{/dev/ppid}, @file{/dev/pgrpid}, and +@file{/dev/user} were removed entirely +(@pxref{Obsolete}). + +@item +The @code{\s} and @code{\S} escapae sequences in regular expressions +(@pxref{GNU Regexp Operators}). + +@item +Interval expressions became part of the default matching done if not +in POSIX mode or in compatibility mode. +(@pxref{Regexp Operators}). + +@item +The @code{split()} function was given the additional optional fourth +argument which is an array to hold the text of the field separators. +(@pxref{String Functions}). + +@item +The @code{BEGINFILE} and @code{ENDFILE} special patterns. +(@pxref{BEGINFILE/ENDFILE}). + +@item +The @code{switch} statement was enabled by default. +(@pxref{Switch Statement}). + +@item +The @option{--sandbox} and @option{--characters-as-bytes} options +(@pxref{Options}). + +@item +Indirect function calls +(@pxref{Indirect Calls}). + +@item +The @option{--gen-po} command-line option was renamed @option{--gen-pot} +(@pxref{String Extraction}). + +@item +Directories on the command line produce a warning and are skipped +(@pxref{Command line directories}). + +@item +The @code{FPAT} variable and its effects +(@pxref{Splitting By Content}). + +@item +The @code{patsplit} function +(@pxref{String Functions}). + +@item +The @file{/inet4} and @samp{/inet6} special files for TCP/IP networking +using @samp{|&} to specify which version of the IP protocol to use. +(@pxref{TCP/IP Networking}). + +@end itemize + @c XXX ADD MORE STUFF HERE @c ENDOFRANGE fripls @@ -23990,11 +24730,9 @@ provided the initial port to Tandem systems and its documentation. @item @cindex Woehlke, Matthew -@cindex Wildenhues, Ralf Matthew Woehlke provided improvements for Tandem's POSIX-compliant systems. -Ralf Wildenhues now maintains this port. @item @cindex Brown, Martin @@ -24404,6 +25142,7 @@ There are several additional options you may use on the @command{configure} command line when compiling @command{gawk} from scratch, including: @table @code + @cindex @code{--enable-portals} configuration option @cindex configuration option, @code{--enable-portals} @item --enable-portals @@ -24412,13 +25151,6 @@ with @file{/p} as BSD portal files when doing two-way I/O with the @samp{|&} operator (@pxref{Portal Files}). -@cindex @code{--enable-switch} configuration option -@cindex configuration option, @code{--enable-switch} -@item --enable-switch -Enable the recognition and execution of C-style @code{switch} statements -in @command{awk} programs -(@pxref{Switch Statement}.) - @cindex @code{--with-whiny-user-strftime} configuration option @cindex configuration option, @code{--with-whiny-user-strftime} @item --with-whiny-user-strftime @@ -24451,11 +25183,6 @@ to fail. This option may be removed at a later date. Disable all message-translation facilities. This is usually not desirable, but it may bring you some slight performance improvement. - -@cindex @code{--disable-directories-fatal} configuration option -@cindex configuration option, @code{--disable-directories-fatal} -@item --disable-directories-fatal -Causes @command{gawk} to silently skip directories named on the command line. @end table As of version 3.1.5, the @option{--with-included-gettext} configuration @@ -24548,11 +25275,12 @@ distribution. @menu * PC Binary Installation:: Installing a prepared distribution. -* PC Compiling:: Compiling @command{gawk} for MS-DOS, Windows32, +* PC Compiling:: Compiling @command{gawk} for MS-DOS, + Windows32, and OS/2. +* PC Dynamic:: Compiling @command{gawk} for dynamic + libraries. +* PC Using:: Running @command{gawk} on MS-DOS, Windows32 and OS/2. -* PC Dynamic:: Compiling @command{gawk} for dynamic libraries. -* PC Using:: Running @command{gawk} on MS-DOS, Windows32 and - OS/2. * Cygwin:: Building and running @command{gawk} for Cygwin. * MSYS:: Using @command{gawk} In The MSYS Environment. @@ -24604,7 +25332,7 @@ development tools from DJ Delorie (DJGPP; MS-DOS only) or Eberhard Mattes (EMX; MS-DOS, Windows32 and OS/2). Microsoft Visual C/C++ can be used to build a Windows32 version, and Microsoft C/C++ can be used to build 16-bit versions for MS-DOS and OS/2. -@c FIXME: +@strong{FIXME:} (As of @command{gawk} 3.1.2, the MSC version doesn't work. However, the maintainer is working on fixing it.) The file @@ -25445,30 +26173,33 @@ as follows: @c not supported @cindex Brown, Martin @item BeOS @tab Martin Brown, @email{mc@@whoever.com}. -@end ignore -@cindex Deifik, Scott @c @cindex Hankerson, Darrel @item MS-DOS @tab Scott Deifik, @email{scottd.mail@@sbcglobal.net}. @c and Darrel Hankerson, @email{hankedr@@auburn.edu}. +@end ignore @cindex Zaretskii, Eli +@cindex Deifik, Scott @item MS-Windows using MINGW @tab Eli Zaretskii, @email{eliz@@gnu.org}. +@item @tab Scott Deifik, @email{scottd.mail@@sbcglobal.net}. @c not supported @ignore @cindex Grigera, Juan @item MS-Windows @tab Juan Grigera, @email{juan@@grigera.com.ar}. +@end ignore @cindex Buening, Andreas @item OS/2 @tab Andreas Buening, @email{andreas.buening@@nexgo.de} -@end ignore +@ignore @cindex Davies, Stephen @item Tandem @tab Stephen Davies, @email{scldad@@sdc.com.au}. -@cindex Wildenhues, Ralf -@item Tandem (POSIX-compliant) @tab Ralf Wildenhues @email{Ralf.Wildenhues@@gmx.de} +@cindex Woehlke, Matthew +@item Tandem (POSIX-compliant) @tab Matthew Woehlke @tab @email{mw_triad@@users.sourceforge.net} +@end ignore @cindex Rankin, Pat @item VMS @tab Pat Rankin, @email{rankin@@pactechdata.com}. @@ -26057,6 +26788,10 @@ be sure to recompile them for each new @command{gawk} release. There is no guarantee of binary compatibility between different releases, nor will there ever be such a guarantee. +@quotation NOTE +When @option{--sandbox} is specified, extensions are disabled. +@end quotation + @menu * Internals:: A brief look at some @command{gawk} internals. * Sample Library:: A example of new functions. @@ -26940,7 +27675,7 @@ Following is a list of probable improvements that will make @command{gawk} perform better: @table @asis -@c NEXT ED: remove this item. awka and mawk do these respectively +@strong{FIXME: NEXT ED:} remove this item. awka and mawk do these respectively. @item Compilation of @command{awk} programs @command{gawk} uses a Bison (YACC-like) parser to convert the script given it into a syntax tree; the syntax @@ -26997,7 +27732,7 @@ other introductory texts that you should refer to instead.) At the most basic level, the job of a program is to process some input data and produce results. -@c NEXT ED: Use real images here +@strong{FIXME: NEXT ED:} Use real images here @iftex @tex \expandafter\ifx\csname graph\endcsname\relax \csname newbox\endcsname\graph\fi @@ -27079,7 +27814,7 @@ instructions in your program to process the data. When you write a program, it usually consists of the following, very basic set of steps: -@c NEXT ED: Use real images here +@strong{FIXME: NEXT ED:} Use real images here @iftex @tex \expandafter\ifx\csname graph\endcsname\relax \csname newbox\endcsname\graph\fi @@ -27375,10 +28110,10 @@ This is worth reading if you are interested in the details, but it does require a background in computer science. @menu -* String Conversion Precision:: The String Value Can Lie. -* Unexpected Results:: Floating Point Numbers Are Not - Abstract Numbers. -* POSIX Floating Point Problems:: Standards Versus Existing Practice. +* String Conversion Precision:: The String Value Can Lie. +* Unexpected Results:: Floating Point Numbers Are Not Abstract + Numbers. +* POSIX Floating Point Problems:: Standards Versus Existing Practice. @end menu @node String Conversion Precision @@ -27734,6 +28469,7 @@ In addition, @code{BINMODE}, @code{ERRNO}, @code{FIELDWIDTHS}, +@code{FPAT}, @code{IGNORECASE}, @code{LINT}, @code{PROCINFO}, @@ -27894,9 +28630,12 @@ separated by whitespace (or by a separator regexp that you can change by setting the built-in variable @code{FS}). Such pieces are called fields. If the pieces are of fixed length, you can use the built-in variable @code{FIELDWIDTHS} to describe their lengths. +If you wish to specify the contents of fields instead of the field +separator, you can use the built-in variable @code{FPAT} to do so. (@xref{Field Separators}, +@ref{Constant Size}, and -@ref{Constant Size}.) +@ref{Splitting By Content}.) @item Flag A variable whose truth value indicates the existence or nonexistence @@ -28025,28 +28764,24 @@ meaning. Keywords are reserved and may not be used as variable names. @command{gawk}'s keywords are: @code{BEGIN}, @code{END}, -@code{if}, -@code{else}, -@code{while}, -@code{do@dots{}while}, -@code{for}, -@code{for@dots{}in}, @code{break}, +@code{case}, @code{continue}, +@code{default} @code{delete}, -@code{next}, -@code{nextfile}, +@code{do@dots{}while}, +@code{else}, +@code{exit}, +@code{for@dots{}in}, +@code{for}, @code{function}, @code{func}, -and -@code{exit}. -If @command{gawk} was configured with the @option{--enable-switch} -option (@pxref{Switch Statement}), then +@code{if}, +@code{nextfile}, +@code{next}, @code{switch}, -@code{case}, and -@code{default} -are also keywords. +@code{while}. @cindex LGPL (Lesser General Public License) @cindex Lesser General Public License (LGPL) @@ -29522,6 +30257,171 @@ to permit their use in free software. @c ispell-local-pdict: "ispell-dict" @c End: +@node next-edition +@appendix To Do In The Next Edition + +Stuff for working on the manual + +@menu +* unresolved:: unresolved. +* revision:: revision. +* consistency:: consistency. +@end menu + +@node unresolved +@appendixsec Unresovled Issues + +@enumerate +@item +Robert J. Chassell points out that awk programs should have some indication +of how to use them. It would be useful to perhaps have a ``programming +style'' section of the manual that would include this and other tips. + +@item +The default AWKPATH search path should be configurable via @command{configure} +The default and how this changes needs to be documented. +@end enumerate + +@node revision +@appendixsec Revisions To Make + +@enumerate 1 +@item +Talk about common extensions, those in nawk, gawk, mawk. +@item +Use @code{foo} for variables and @code{foo()} for functions. +@item +Standardize the error messages from the functions and programs +in Chapters 12 and 13. +@item +Nuke the BBS stuff and use something that won't be obsolete. +@end enumerate + + +@node consistency +@appendixsec Consistency Issues + +@itemize @bullet +@item +/.../ regexps are in @@code, not @@samp +@item +".." strings are in @@code, not @@samp +@item +no @@print before @@dots +@item +values of expressions in the text (@code{x} has the value 15), +should be in roman, not @@code +@item +Use TAB and not tab +@item +Use ESC and not ESCAPE +@item +Use space and not blank to describe the space bar's character +The term "blank" is thus basically reserved for "blank lines" etc. +@item +To make dark corners work, the @@value@{DARKCORNER@} has to be outside +closing `.' of a sentence and after (@@pxref@{@dots{}@}). This is +a change from earlier versions. +@item +" " should have an @w{} around it +@item +Use "non-" only with language names or acronyms, or the words bug and option +@item +Use @command{ftp} when talking about anonymous ftp +@item +Use uppercase and lowercase, not "upper-case" and "lower-case" +or "upper case" and "lower case" +@item +Use "single precision" and "double precision", +not "single-precision" or "double-precision" +@item +Use alphanumeric, not alpha-numeric +@item +Use POSIX-compliant, not POSIX compliant +@item +Use --foo, not -Wfoo when describing long options +@item +Use "Bell Laboratories", but not "Bell Labs". +@item +Use "behavior" instead of "behaviour". +@item +Use "zeros" instead of "zeroes". +@item +Use "nonzero" not "non-zero". +@item +Use "runtime" not "run time" or "run-time". +@item +Use "command-line" not "command line". +@item +Use "online" not "on-line". +@item +Use "whitespace" not "white space". +@item +Use "Input/Output", not "input/output". Also "I/O", not "i/o". +@item +Use "lefthand"/"righthand", not "left-hand"/"right-hand". +@item +Use "workaround", not "work-around". +@item +Use "startup"/"cleanup", not "start-up"/"clean-up" +@item +Use @code{do}, and not @code{do}-@code{while}, except where +actually discussing the do-while. +@item +Use "versus" in text and "vs." in index entries +@item +The words "a", "and", "as", "between", "for", "from", "in", "of", +"on", "that", "the", "to", "with", and "without", +should not be capitalized in @@chapter, @@section etc. +"Into" and "How" should. +@item +Search for @@dfn; make sure important items are also indexed. +@item +"e.g." should always be followed by a comma. +@item +"i.e." should always be followed by a comma. +@item +The numbers zero through ten should be spelled out, except when +talking about file descriptor numbers. > 10 and < 0, it's +ok to use numbers. +@item +In tables, put command-line options in @@code, while in the text, +put them in @@option. +@item +When using @@strong, use "Note:" or "Caution:" with colons and +not exclamation points. Do not surround the paragraphs +with @@quotation ... @@end quotation. +@item +For most cases, do NOT put a comma before "and", "or" or "but". +But exercise taste with this rule. +@item +Don't show the awk command with a program in quotes when it's +just the program. I.e. + +@example +@{ + @dots{} +@} +@end example + +@noindent +and not +@example +awk '@{ + @dots{} +@}' +@end example + +@item +Do show it when showing command-line arguments, data files, etc, even +if there is no output shown. + +@item +Use numbered lists only to show a sequential series of steps. + +@item +Use @@code@{xxx@} for the xxx operator in indexing statements, not @@samp. +@end itemize @node Index @unnumbered Index @@ -29645,35 +30545,3 @@ Make FIELDWIDTHS be an array? % 3. Standardize the error messages from the functions and programs % in Chapters 12 and 13. % 4. Nuke the BBS stuff and use something that won't be obsolete -% 5. Reorg chapters 5 & 7 like so: -%Chapter 5: -% - Constants, Variables, and Conversions -% + Constant Expressions -% + Using Regular Expression Constants -% + Variables -% + Conversion of Strings and Numbers -% - Operators -% + Arithmetic Operators -% + String Concatenation -% + Assignment Expressions -% + Increment and Decrement Operators -% - Truth Values and Conditions -% + True and False in Awk -% + Boolean Expressions -% + Conditional Expressions -% - Function Calls -% - Operator Precedence -% -%Chapter 7: -% - Array Basics -% + Introduction to Arrays -% + Referring to an Array Element -% + Assigning Array Elements -% + Basic Array Example -% + Scanning All Elements of an Array -% - The delete Statement -% - Using Numbers to Subscript Arrays -% - Using Uninitialized Variables as Subscripts -% - Multidimensional Arrays -% + Scanning Multidimensional Arrays -% - Sorting Array Values and Indices with gawk diff --git a/doc/gawkinet.info b/doc/gawkinet.info index 9124faa4..325dc5a9 100644 --- a/doc/gawkinet.info +++ b/doc/gawkinet.info @@ -6,12 +6,12 @@ START-INFO-DIR-ENTRY * Gawkinet: (gawkinet). TCP/IP Internetworking With `gawk'. END-INFO-DIR-ENTRY - This is Edition 1.1 of `TCP/IP Internetworking With `gawk'', for the -3.1.7 (or later) version of the GNU implementation of AWK. + This is Edition 1.2 of `TCP/IP Internetworking With `gawk'', for the +4.0.0 (or later) version of the GNU implementation of AWK. - Copyright (C) 2000, 2001, 2002, 2004, 2009 Free Software Foundation, -Inc. + Copyright (C) 2000, 2001, 2002, 2004, 2009, 2010 Free Software +Foundation, Inc. Permission is granted to copy, distribute and/or modify this document @@ -30,12 +30,12 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) This file documents the networking features in GNU `awk'. - This is Edition 1.1 of `TCP/IP Internetworking With `gawk'', for the -3.1.7 (or later) version of the GNU implementation of AWK. + This is Edition 1.2 of `TCP/IP Internetworking With `gawk'', for the +4.0.0 (or later) version of the GNU implementation of AWK. - Copyright (C) 2000, 2001, 2002, 2004, 2009 Free Software Foundation, -Inc. + Copyright (C) 2000, 2001, 2002, 2004, 2009, 2010 Free Software +Foundation, Inc. Permission is granted to copy, distribute and/or modify this document @@ -59,14 +59,14 @@ General Introduction ******************** This file documents the networking features in GNU Awk (`gawk') version -3.1 and later. +4.0 and later. - This is Edition 1.1 of `TCP/IP Internetworking With `gawk'', for the -3.1.7 (or later) version of the GNU implementation of AWK. + This is Edition 1.2 of `TCP/IP Internetworking With `gawk'', for the +4.0.0 (or later) version of the GNU implementation of AWK. - Copyright (C) 2000, 2001, 2002, 2004, 2009 Free Software Foundation, -Inc. + Copyright (C) 2000, 2001, 2002, 2004, 2009, 2010 Free Software +Foundation, Inc. Permission is granted to copy, distribute and/or modify this document @@ -299,7 +299,23 @@ IP and routing infrastructure of the Internet. Much like the phone company's switching centers or the Post Office's trucks, it is not of much day-to-day interest to the regular user (or programmer). - It happens to be a best effort datagram protocol. + It happens to be a best effort datagram protocol. In the early + twenty-first century, there are two versions of this protocol in + use: + + IPv4 + The original version of the Internet Protocol, with 32-bit + addresses, on which most of the current Internet is based. + + IPv6 + The "next generation" of the Internet Protocol, with 128-bit + addresses. This protocol is in wide use in certain parts of + the world, but has not yet replaced IPv4.(1) + + Versions of the other protocols that sit "atop" IP exist for both + IPv4 and IPv6. However, as the IPv6 versions are fundamentally the + same as the original IPv4 versions, we will not distinguish + further between them. UDP The User Datagram Protocol. This is a best effort datagram @@ -318,6 +334,10 @@ basic communications. Examples are SMTP (Simple Mail Transfer Protocol), FTP (File Transfer Protocol), and HTTP (HyperText Transfer Protocol). + ---------- Footnotes ---------- + + (1) There isn't an IPv5. + File: gawkinet.info, Node: Ports, Prev: Basic Protocols, Up: The TCP/IP Protocols @@ -326,7 +346,7 @@ File: gawkinet.info, Node: Ports, Prev: Basic Protocols, Up: The TCP/IP Proto In the postal system, the address on an envelope indicates a physical location, such as a residence or office building. But there may be -more than one person at a location; thus you have to further quantify +more than one person at the location; thus you have to further quantify the recipient by putting a person or company name on the envelope. In the phone system, one phone number may represent an entire @@ -462,12 +482,12 @@ File: gawkinet.info, Node: Gawk Special Files, Next: TCP Connecting, Prev: Us 2.1 `gawk''s Networking Mechanisms ================================== -The `|&' operator introduced in `gawk' 3.1 for use in communicating -with a "coprocess" is described in *note Two-way Communications With -Another Process: (gawk)Two-way I/O. It shows how to do two-way I/O to a -separate process, sending it data with `print' or `printf' and reading -data with `getline'. If you haven't read it already, you should detour -there to do so. +The `|&' operator for use in communicating with a "coprocess" is +described in *note Two-way Communications With Another Process: +(gawk)Two-way I/O. It shows how to do two-way I/O to a separate +process, sending it data with `print' or `printf' and reading data with +`getline'. If you haven't read it already, you should detour there to +do so. `gawk' transparently extends the two-way I/O mechanism to simple networking through the use of special file names. When a "coprocess" @@ -488,12 +508,13 @@ and easier to use. The special file name for network access is made up of several fields, all of which are mandatory: - /inet/PROTOCOL/LOCALPORT/HOSTNAME/REMOTEPORT + /NET-TYPE/PROTOCOL/LOCALPORT/HOSTNAME/REMOTEPORT - The `/inet/' field is, of course, constant when accessing the -network. The LOCALPORT and REMOTEPORT fields do not have a meaning -when used with `/inet/raw' because "ports" only apply to TCP and UDP. -So, when using `/inet/raw', the port fields always have to be `0'. + The NET-TYPE field lets you specify IPv4 versus IPv6, or lets you +allow the system to choose. The LOCALPORT and REMOTEPORT fields do not +have a meaning when used with `/inet/raw' because "ports" only apply to +TCP and UDP. So, when using `/inet/raw', the port fields always have to +be `0'. * Menu: @@ -511,6 +532,12 @@ range of values and the defaults. All of the fields are mandatory. To let the system pick a value, or if the field doesn't apply to the protocol, specify it as `0': +NET-TYPE + This is one of `inet4' for IPv4, `inet6' for IPv6, or `inet' to + use the system default (which is likely to be IPv4). For the rest + of this document, we will use the generic `/inet' in our + descriptions of how `gawk''s networking works. + PROTOCOL Determines which member of the TCP/IP family of protocols is selected to transport the data across the network. There are three @@ -888,8 +915,8 @@ browser to Microsoft Windows 95 at the last minute. They even back-ported their TCP/IP implementation to Microsoft Windows for Workgroups 3.11, but it was a rather rudimentary and half-hearted implementation. Nevertheless, the equivalent of `/etc/services' resides -under `C:\WINNT\system32\drivers\etc\services' on Microsoft Windows -2000. +under `C:\WINNT\system32\drivers\etc\services' on Microsoft Windows 2000 +and Microsoft Windows XP. File: gawkinet.info, Node: Interacting, Next: Setting Up, Prev: Troubleshooting, Up: Using Networking @@ -1870,9 +1897,9 @@ is up to you to accomplish this? Some other ideas for useful networked applications: * Read the file `doc/awkforai.txt' in the `gawk' distribution. It - was written by Ronald P. Loui (Associate Professor of Computer - Science, at Washington University in St. Louis, - <loui@ai.wustl.edu>) and summarizes why he teaches `gawk' to + was written by Ronald P. Loui (at the time, Associate Professor of + Computer Science, at Washington University in St. Louis, + <loui@ai.wustl.edu>) and summarizes why he taught `gawk' to students of Artificial Intelligence. Here are some passages from the text: @@ -4262,10 +4289,10 @@ Index * finger utility: Setting Up. (line 22) * Free Documentation License (FDL): GNU Free Documentation License. (line 6) -* FTP (File Transfer Protocol): Basic Protocols. (line 29) +* FTP (File Transfer Protocol): Basic Protocols. (line 45) * gawk, networking: Using Networking. (line 6) * gawk, networking, connections <1>: TCP Connecting. (line 6) -* gawk, networking, connections: Special File Fields. (line 49) +* gawk, networking, connections: Special File Fields. (line 55) * gawk, networking, filenames: Gawk Special Files. (line 29) * gawk, networking, See Also email: Email. (line 6) * gawk, networking, service, establishing: Setting Up. (line 6) @@ -4284,10 +4311,10 @@ Index * GNUPlot utility: Interacting Service. (line 189) * Hoare, C.A.R. <1>: PROTBASE. (line 6) * Hoare, C.A.R.: MOBAGWHO. (line 6) -* hostname field: Special File Fields. (line 29) +* hostname field: Special File Fields. (line 35) * HTML (Hypertext Markup Language): Web page. (line 30) * HTTP (Hypertext Transfer Protocol) <1>: Web page. (line 6) -* HTTP (Hypertext Transfer Protocol): Basic Protocols. (line 29) +* HTTP (Hypertext Transfer Protocol): Basic Protocols. (line 45) * HTTP (Hypertext Transfer Protocol), record separators and: Web page. (line 30) * HTTP server, core logic: Interacting Service. (line 6) @@ -4316,15 +4343,16 @@ Index * MOBAGWHO program: MOBAGWHO. (line 6) * NCBI, National Center for Biotechnology Information: PROTBASE. (line 6) +* network type field: Special File Fields. (line 11) * networks, gawk and: Using Networking. (line 6) * networks, gawk and, connections <1>: TCP Connecting. (line 6) -* networks, gawk and, connections: Special File Fields. (line 49) +* networks, gawk and, connections: Special File Fields. (line 55) * networks, gawk and, filenames: Gawk Special Files. (line 29) * networks, gawk and, See Also email: Email. (line 6) * networks, gawk and, service, establishing: Setting Up. (line 6) * networks, gawk and, troubleshooting: Caveats. (line 6) * networks, ports, reserved: Setting Up. (line 37) -* networks, ports, specifying: Special File Fields. (line 18) +* networks, ports, specifying: Special File Fields. (line 24) * networks, See Also web pages: PANIC. (line 6) * Numerical Recipes: STATIST. (line 24) * ORS variable, HTTP and: Web page. (line 30) @@ -4341,7 +4369,7 @@ Index * PostScript: STATIST. (line 138) * PROLOG: Challenges. (line 76) * PROTBASE: PROTBASE. (line 6) -* protocol field: Special File Fields. (line 11) +* protocol field: Special File Fields. (line 17) * PS image format: STATIST. (line 6) * Python: Using Networking. (line 14) * Python, gawk networking and: Using Networking. (line 24) @@ -4356,12 +4384,12 @@ Index * RS variable, POP and: Email. (line 36) * servers <1>: Setting Up. (line 22) * servers: Making Connections. (line 14) -* servers, as hosts: Special File Fields. (line 29) +* servers, as hosts: Special File Fields. (line 35) * servers, HTTP: Interacting Service. (line 6) * servers, web: Simple Server. (line 6) * Simple Mail Transfer Protocol (SMTP): Email. (line 6) * SMTP (Simple Mail Transfer Protocol) <1>: Email. (line 6) -* SMTP (Simple Mail Transfer Protocol): Basic Protocols. (line 29) +* SMTP (Simple Mail Transfer Protocol): Basic Protocols. (line 45) * SPAK utility: File /inet/raw. (line 21) * STATIST program: STATIST. (line 6) * STOXPRED program: STOXPRED. (line 6) @@ -4375,7 +4403,8 @@ Index * TCP (Transmission Control Protocol), connection, establishing: TCP Connecting. (line 6) * TCP (Transmission Control Protocol), UDP and: Interacting. (line 48) -* TCP/IP, protocols, selecting: Special File Fields. (line 11) +* TCP/IP, network type, selecting: Special File Fields. (line 11) +* TCP/IP, protocols, selecting: Special File Fields. (line 17) * TCP/IP, sockets and: Gawk Special Files. (line 19) * Transmission Control Protocol, See TCP: Using Networking. (line 29) * troubleshooting, gawk, networks: Caveats. (line 6) @@ -4405,58 +4434,59 @@ Index Tag Table: -Node: Top2003 -Node: Preface5691 -Node: Introduction7066 -Node: Stream Communications8092 -Node: Datagram Communications9265 -Node: The TCP/IP Protocols10896 -Ref: The TCP/IP Protocols-Footnote-111580 -Node: Basic Protocols11737 -Node: Ports13059 -Node: Making Connections14464 -Ref: Making Connections-Footnote-117045 -Ref: Making Connections-Footnote-217092 -Node: Using Networking17273 -Node: Gawk Special Files19627 -Node: Special File Fields21631 -Ref: table-inet-components25381 -Node: Comparing Protocols27293 -Node: File /inet/tcp27882 -Node: File /inet/udp28908 -Node: File /inet/raw30029 -Ref: File /inet/raw-Footnote-133062 -Node: TCP Connecting33142 -Node: Troubleshooting35480 -Ref: Troubleshooting-Footnote-138531 -Node: Interacting39075 -Node: Setting Up41805 -Node: Email45299 -Node: Web page47625 -Ref: Web page-Footnote-150430 -Node: Primitive Service50627 -Node: Interacting Service53361 -Ref: Interacting Service-Footnote-162490 -Node: CGI Lib62522 -Node: Simple Server69483 -Ref: Simple Server-Footnote-177206 -Node: Caveats77307 -Node: Challenges78450 -Node: Some Applications and Techniques87117 -Node: PANIC89574 -Node: GETURL91292 -Node: REMCONF93915 -Node: URLCHK99391 -Node: WEBGRAB103226 -Node: STATIST107676 -Ref: STATIST-Footnote-1119384 -Node: MAZE119829 -Node: MOBAGWHO126017 -Ref: MOBAGWHO-Footnote-1139961 -Node: STOXPRED140016 -Node: PROTBASE154271 -Node: Links167353 -Node: GNU Free Documentation License170787 -Node: Index195926 +Node: Top2015 +Node: Preface5709 +Node: Introduction7084 +Node: Stream Communications8110 +Node: Datagram Communications9283 +Node: The TCP/IP Protocols10914 +Ref: The TCP/IP Protocols-Footnote-111598 +Node: Basic Protocols11755 +Ref: Basic Protocols-Footnote-113798 +Node: Ports13827 +Node: Making Connections15234 +Ref: Making Connections-Footnote-117815 +Ref: Making Connections-Footnote-217862 +Node: Using Networking18043 +Node: Gawk Special Files20397 +Node: Special File Fields22403 +Ref: table-inet-components26418 +Node: Comparing Protocols28330 +Node: File /inet/tcp28919 +Node: File /inet/udp29945 +Node: File /inet/raw31066 +Ref: File /inet/raw-Footnote-134099 +Node: TCP Connecting34179 +Node: Troubleshooting36517 +Ref: Troubleshooting-Footnote-139568 +Node: Interacting40137 +Node: Setting Up42867 +Node: Email46361 +Node: Web page48687 +Ref: Web page-Footnote-151492 +Node: Primitive Service51689 +Node: Interacting Service54423 +Ref: Interacting Service-Footnote-163552 +Node: CGI Lib63584 +Node: Simple Server70545 +Ref: Simple Server-Footnote-178268 +Node: Caveats78369 +Node: Challenges79512 +Node: Some Applications and Techniques88191 +Node: PANIC90648 +Node: GETURL92366 +Node: REMCONF94989 +Node: URLCHK100465 +Node: WEBGRAB104300 +Node: STATIST108750 +Ref: STATIST-Footnote-1120458 +Node: MAZE120903 +Node: MOBAGWHO127091 +Ref: MOBAGWHO-Footnote-1141035 +Node: STOXPRED141090 +Node: PROTBASE155345 +Node: Links168427 +Node: GNU Free Documentation License171861 +Node: Index197000 End Tag Table diff --git a/doc/gawkinet.texi b/doc/gawkinet.texi index 6abd7ea7..a7fe43b7 100644 --- a/doc/gawkinet.texi +++ b/doc/gawkinet.texi @@ -61,18 +61,18 @@ @c pages, I think this is the right decision. ADR. @set TITLE TCP/IP Internetworking With @command{gawk} -@set EDITION 1.1 -@set UPDATE-MONTH July, 2009 +@set EDITION 1.2 +@set UPDATE-MONTH August, 2010 @c gawk versions: -@set VERSION 3.1 -@set PATCHLEVEL 7 +@set VERSION 4.0 +@set PATCHLEVEL 0 @copying This is Edition @value{EDITION} of @cite{@value{TITLE}}, for the @value{VERSION}.@value{PATCHLEVEL} (or later) version of the GNU implementation of AWK. @sp 2 -Copyright (C) 2000, 2001, 2002, 2004, 2009 Free Software Foundation, Inc. +Copyright (C) 2000, 2001, 2002, 2004, 2009, 2010 Free Software Foundation, Inc. @sp 2 Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or @@ -145,7 +145,7 @@ ISBN 1-882114-93-0 @* @comment node-name, next, previous, up This file documents the networking features in GNU Awk (@command{gawk}) -version 3.1 and later. +version 4.0 and later. @insertcopying @end ifnottex @@ -377,6 +377,23 @@ of the Internet. Much like the phone company's switching centers or the Post Office's trucks, it is not of much day-to-day interest to the regular user (or programmer). It happens to be a best effort datagram protocol. +In the early twenty-first century, there are two versions of this protocol +in use: + +@table @asis +@item IPv4 +The original version of the Internet Protocol, with 32-bit addresses, on which +most of the current Internet is based. + +@item IPv6 +The ``next generation'' of the Internet Protocol, with 128-bit addresses. +This protocol is in wide use in certain parts of the world, but has not +yet replaced IPv4.@footnote{There isn't an IPv5.} +@end table + +Versions of the other protocols that sit ``atop'' IP exist for both +IPv4 and IPv6. However, as the IPv6 versions are fundamentally the same +as the original IPv4 versions, we will not distinguish further between them. @item UDP The User Datagram Protocol. This is a best effort datagram protocol. @@ -402,7 +419,7 @@ FTP (File Transfer Protocol), and HTTP (HyperText Transfer Protocol). In the postal system, the address on an envelope indicates a physical location, such as a residence or office building. But there may be -more than one person at a location; thus you have to further quantify +more than one person at the location; thus you have to further quantify the recipient by putting a person or company name on the envelope. In the phone system, one phone number may represent an entire company, @@ -569,7 +586,7 @@ untractable (RAW). @comment node-name, next, previous, up @section @command{gawk}'s Networking Mechanisms -The @samp{|&} operator introduced in @command{gawk} 3.1 for use in +The @samp{|&} operator for use in communicating with a @dfn{coprocess} is described in @ref{Two-way I/O, ,Two-way Communications With Another Process, gawk, GAWK: Effective AWK Programming}. It shows how to do two-way I/O to a @@ -607,14 +624,15 @@ The special @value{FN} for network access is made up of several fields, all of which are mandatory: @example -/inet/@var{protocol}/@var{localport}/@var{hostname}/@var{remoteport} +/@var{net-type}/@var{protocol}/@var{localport}/@var{hostname}/@var{remoteport} @end example @cindex @code{/inet/} files (@command{gawk}) @cindex files, @code{/inet/} (@command{gawk}) @cindex localport field @cindex remoteport field -The @file{/inet/} field is, of course, constant when accessing the network. +The @var{net-type} field lets you specify IPv4 versus IPv6, or lets +you allow the system to choose. The @var{localport} and @var{remoteport} fields do not have a meaning when used with @file{/inet/raw} because ``ports'' only apply to TCP and UDP. So, when using @file{/inet/raw}, the port fields always have @@ -633,6 +651,15 @@ All of the fields are mandatory. To let the system pick a value, or if the field doesn't apply to the protocol, specify it as @samp{0}: @table @var +@cindex network type field +@c last comma is part of secondary +@cindex TCP/IP, network type, selecting +@item net-type +This is one of @samp{inet4} for IPv4, @samp{inet6} for IPv6, +or @samp{inet} to use the system default (which is likely to be IPv4). +For the rest of this document, we will use the generic @samp{/inet} +in our descriptions of how @command{gawk}'s networking works. + @cindex protocol field @c last comma is part of secondary @cindex TCP/IP, protocols, selecting @@ -807,7 +834,6 @@ only the @var{protocol} has changed. As before, it does matter which side starts first. The receiving side blocks and waits for the sender. In this case, the receiver/client has to be started first: -@page @example # Server BEGIN @{ @@ -1078,7 +1104,8 @@ their own browser to Microsoft Windows 95 at the last minute. They even back-por their TCP/IP implementation to Microsoft Windows for Workgroups 3.11, but it was a rather rudimentary and half-hearted implementation. Nevertheless, the equivalent of @file{/etc/services} resides under -@file{C:\WINNT\system32\drivers\etc\services} on Microsoft Windows 2000.} +@file{C:\WINNT\system32\drivers\etc\services} on Microsoft Windows 2000 +and Microsoft Windows XP.} The first column of the file gives the name of the service, and the second column gives a unique number and the protocol that one can use to connect to this service. @@ -2487,10 +2514,10 @@ Some other ideas for useful networked applications: @itemize @bullet @item Read the file @file{doc/awkforai.txt} in the @command{gawk} distribution. -It was written by Ronald P.@: Loui (Associate Professor of +It was written by Ronald P.@: Loui (at the time, Associate Professor of Computer Science, at Washington University in St. Louis, @email{loui@@ai.wustl.edu}) and summarizes why -he teaches @command{gawk} to students of Artificial Intelligence. Here are +he taught @command{gawk} to students of Artificial Intelligence. Here are some passages from the text: @cindex AI diff --git a/doc/lflashlight.eps b/doc/lflashlight.eps index fdb8cf31..68865f0f 100644 --- a/doc/lflashlight.eps +++ b/doc/lflashlight.eps @@ -131,5 +131,5 @@ x %%Trailer showpage % Trailer for xpic to PostScript converter -% $Header: x2ps.tra,v 1.2 89/07/02 15:59:53 moraes Exp $ +% $Header: /d/mongo/cvsrep/gawk-devel/doc/lflashlight.eps,v 1.1.1.1 2008/11/16 19:21:15 arnold Exp $ xpic end restore diff --git a/doc/rflashlight.eps b/doc/rflashlight.eps index 28cb7e25..1e3ab760 100644 --- a/doc/rflashlight.eps +++ b/doc/rflashlight.eps @@ -137,5 +137,5 @@ x %%Trailer showpage % Trailer for xpic to PostScript converter -% $Header: x2ps.tra,v 1.2 89/07/02 15:59:53 moraes Exp $ +% $Header: /d/mongo/cvsrep/gawk-devel/doc/rflashlight.eps,v 1.1.1.1 2008/11/16 19:21:15 arnold Exp $ xpic end restore |