diff options
Diffstat (limited to 'doc')
-rw-r--r-- | doc/ChangeLog | 49 | ||||
-rw-r--r-- | doc/Makefile.in | 12 | ||||
-rw-r--r-- | doc/awkcard.in | 9 | ||||
-rw-r--r-- | doc/gawk.1 | 359 | ||||
-rw-r--r-- | doc/gawk.info | 1003 | ||||
-rw-r--r-- | doc/gawk.texi | 285 | ||||
-rw-r--r-- | doc/igawk.1 | 6 | ||||
-rw-r--r-- | doc/texinfo.tex | 3377 |
8 files changed, 3196 insertions, 1904 deletions
diff --git a/doc/ChangeLog b/doc/ChangeLog index 17f2844b..dc8aff84 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,52 @@ +Sun Jun 25 15:08:19 2000 Arnold D. Robbins <arnold@skeeve.com> + + * Release 3.0.5: Release tar file made. + +Wed May 17 19:04:54 2000 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi, gawk.1, awkcard.in: Documented %u. Ooops. + +Tue May 2 11:44:13 2000 Arnold D. Robbins <arnold@skeeve.com> + + * texinfo.tex: Updated to version 1999-10-01.07. + * gawk.texi: Redid page breaking for new texinfo.tex. + +Thu Apr 6 12:32:49 2000 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi: Change info dir file entry to `(gawk)' from + `(gawk.info)'. + * Makefile.in [$(infodir)/gawk.info]: Fix grep test is + accordance with above. + +Sun Feb 13 15:36:32 2000 Paul Eggert <eggert@twinsun.com> + + * gawk.texi: Mention that arithmetic is done in double + precision floating point, and point to Goldberg's paper for + people who want to know more. Fix some other minor floating + point discussion issues. + +Wed Nov 3 17:04:35 1999 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.1: Lots of troff ``lint'' from Paul Eggert. Not all + of his changes, just the ones I thought worth doing. + +Mon Oct 11 16:53:54 1999 Arnold D. Robbins <arnold@skeeve.com> + + * Makefile.in (gawk.dvi): Put $(srcdir) first in TEXINPUTS, + and also just use texi2dvi, don't run texindex and tex + manually. Doing so is no longer necessary. + +Mon Aug 9 13:06:01 1999 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi: New node `Array Efficiency' on the best use + of subscripting to avoid memory bloat. + +Thu Jul 29 23:15:34 1999 Arnold D. Robbins <arnold@skeeve.com> + + * Makefile.in ($(infodir)/gawk.info): Removed loop around + $(INSTALL_DATA), since there's only one Info file to install, + install it directly. + Wed Jun 30 16:14:36 1999 Arnold D. Robbins <arnold@gnu.org> * Release 3.0.4: Release tar file made. This time for sure. diff --git a/doc/Makefile.in b/doc/Makefile.in index 989903cb..edcf4046 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -1,6 +1,6 @@ # Makefile for GNU Awk documentation. # -# Copyright (C) 1993-1999 the Free Software Foundation, Inc. +# Copyright (C) 1993-2000 the Free Software Foundation, Inc. # # This file is part of GAWK, the GNU implementation of the # AWK Programming Language. @@ -76,12 +76,10 @@ $(infodir)/gawk.info:: else d=$(srcdir); fi; \ if [ -f $(infodir)/dir -a -f $(infodir)/gawk.info ] \ && cmp $$d/gawk.info $(infodir)/gawk.info > /dev/null \ - && grep '(gawk\.info)' $(infodir)/dir > /dev/null; then \ + && grep '(gawk)' $(infodir)/dir > /dev/null; then \ exit 0; \ fi; \ - for i in $$d/gawk.info*; do \ - $(INSTALL_DATA) $$i $(infodir)/$$i ; \ - done; \ + $(INSTALL_DATA) $$d/gawk.info $(infodir)/gawk.info ; \ if $(SHELL) -c 'install-info --version' > /dev/null 2>&1 ; \ then install-info --info-dir=$(infodir) gawk.info ; \ else true ; fi; exit 0 @@ -98,9 +96,7 @@ uninstall: dvi: gawk.dvi gawk.dvi: gawk.texi - -TEXINPUTS=$$TEXINPUTS:$(srcdir) $(TEXI2DVI) $(srcdir)/gawk.texi - texindex gawk.?? - TEXINPUTS=$$TEXINPUTS:$(srcdir) $(TEX) $(srcdir)/gawk.texi + -TEXINPUTS=$(srcdir):$$TEXINPUTS $(TEXI2DVI) $(srcdir)/gawk.texi info: gawk.info diff --git a/doc/awkcard.in b/doc/awkcard.in index c3e262df..d1ceb0d3 100644 --- a/doc/awkcard.in +++ b/doc/awkcard.in @@ -1,6 +1,6 @@ .\" AWK Reference Card --- Arnold Robbins, arnold@gnu.org .\" -.\" Copyright (C) 1996, 97, 98, 99 Free Software Foundation, Inc. +.\" Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc. .\" .\" Permission is granted to make and distribute verbatim copies of .\" this reference card provided the copyright notice and this permission @@ -95,7 +95,7 @@ for their help. \*(CD .SL .nf -\*(FR\(co Copyright 1996-1999, Free Software Foundation +\*(FR\(co Copyright 1996-2000, Free Software Foundation 59 Temple Place \(em Suite 330 Boston, MA 02111-1307 USA .nf @@ -1078,6 +1078,7 @@ accept the following conversion specification formats: nonsignificant zeros suppressed \*(FC%G\fP like \*(FC%g\fP, but use \*(FC%E\fP instead of \*(FC%e\*(FR \*(FC%o\fP an unsigned octal integer +\*(FC%u\fP an unsigned decimal integer \*(FC%s\fP a character string \*(FC%x\fP an unsigned hexadecimal integer \*(FC%X\fP like \*(FC%x\fP, but use \*(FCABCDEF\fP for 10\(en15 @@ -1503,7 +1504,7 @@ has been specified.\*(CB .ES .nf \*(CDHost: \*(FCgnudist.gnu.org\*(FR -File: \*(FC/gnu/gawk/gawk-3.0.4.tar.gz\fP +File: \*(FC/gnu/gawk/gawk-3.0.5.tar.gz\fP .in +.2i .fi GNU \*(AK (\*(GK). There may be a later version. @@ -1530,7 +1531,7 @@ Michael Brennan's \*(MK. There may be a newer version.\*(CX .\" --- Copying Permissions .ES .fi -\*(CDCopyright \(co 1996, 1997 Free Software Foundation, Inc. +\*(CDCopyright \(co 1996-2000 Free Software Foundation, Inc. .sp .5 Permission is granted to make and distribute verbatim copies of this reference card provided the copyright notice and this permission notice @@ -1,28 +1,40 @@ .ds PX \s-1POSIX\s+1 .ds UX \s-1UNIX\s+1 .ds AN \s-1ANSI\s+1 -.TH GAWK 1 "Apr 28 1999" "Free Software Foundation" "Utility Commands" +.ds GN \s-1GNU\s+1 +.ds AK \s-1AWK\s+1 +.if !\n(.g \{\ +. if !\w|\*(lq| \{\ +. ds lq `` +. if \w'\(lq' .ds lq "\(lq +. \} +. if !\w|\*(rq| \{\ +. ds rq '' +. if \w'\(rq' .ds rq "\(rq +. \} +.\} +.TH GAWK 1 "May 17 2000" "Free Software Foundation" "Utility Commands" .SH NAME gawk \- pattern scanning and processing language .SH SYNOPSIS .B gawk -[ POSIX or GNU style options ] +[ \*(PX or \*(GN style options ] .B \-f .I program-file [ .B \-\^\- -] file .\^.\^. +] file .\|.\|. .br .B gawk -[ POSIX or GNU style options ] +[ \*(PX or \*(GN style options ] [ .B \-\^\- ] .I program-text -file .\^.\^. +file .\|.\|. .SH DESCRIPTION .I Gawk -is the GNU Project's implementation of the AWK programming language. +is the \*(GN Project's implementation of the \*(AK programming language. It conforms to the definition of the language in the \*(PX 1003.2 Command Language And Utilities Standard. This version in turn is based on the description in @@ -34,11 +46,11 @@ of \*(UX .I Gawk also provides more recent Bell Labs .I awk -extensions, and some GNU-specific extensions. +extensions, and some \*(GN-specific extensions. .PP The command line consists of options to .I gawk -itself, the AWK program text (if not supplied via the +itself, the \*(AK program text (if not supplied via the .B \-f or .B \-\^\-file @@ -47,14 +59,14 @@ available in the .B ARGC and .B ARGV -pre-defined AWK variables. +pre-defined \*(AK variables. .SH OPTION FORMAT .PP .I Gawk options may be either the traditional \*(PX one letter options, -or the GNU style long options. \*(PX options start with a single ``\-'', -while long options start with ``\-\^\-''. -Long options are provided for both GNU-specific features and +or the \*(GN style long options. \*(PX options start with a single \*(lq\-\*(rq, +while long options start with \*(lq\-\^\-\*(rq. +Long options are provided for both \*(GN-specific features and for \*(PX mandated features. .PP Following the \*(PX standard, @@ -103,14 +115,14 @@ to the variable before execution of the program begins. Such variable values are available to the .B BEGIN -block of an AWK program. +block of an \*(AK program. .TP .PD 0 .BI \-f " program-file" .TP .PD .BI \-\^\-file " program-file" -Read the AWK program source from the file +Read the \*(AK program source from the file .IR program-file , instead of from the first command line argument. Multiple @@ -157,7 +169,7 @@ mode. In compatibility mode, .I gawk behaves identically to \*(UX .IR awk ; -none of the GNU-specific extensions are recognized. +none of the \*(GN-specific extensions are recognized. The use of .B \-\^\-traditional is preferred over the other forms of this option. @@ -176,7 +188,7 @@ below, for more information. .TP .PD .B \-\^\-copyright -Print the short version of the GNU copyright information message on +Print the short version of the \*(GN copyright information message on the standard output, and exits successfully. .TP .PD 0 @@ -202,7 +214,7 @@ these options cause an immediate, successful exit.) .PD .B \-\^\-lint Provide warnings about constructs that are -dubious or non-portable to other AWK implementations. +dubious or non-portable to other \*(AK implementations. .TP .PD 0 .B "\-W lint\-old" @@ -231,7 +243,7 @@ users. .PD .B \-\^\-posix This turns on -.I compatibility +.I compatibility mode, with the following additional restrictions: .RS .TP \w'\(bu'u+1n @@ -279,13 +291,13 @@ in regular expression matching .BR "Regular Expressions" , below). Interval expressions were not traditionally available in the -AWK language. The POSIX standard added them, to make +\*(AK language. The \*(PX standard added them, to make .I awk and .I egrep consistent with each other. However, their use is likely -to break old AWK programs, so +to break old \*(AK programs, so .I gawk only provides them if they are requested with this option, or when .B \-\^\-posix @@ -298,13 +310,13 @@ is specified. .BI \-\^\-source " program-text" Use .I program-text -as AWK program source code. -This option allows the easy intermixing of library functions (used via the +as \*(AK program source code. +This option allows the easy intermixing of library functions (used via the .B \-f and .B \-\^\-file options) with source code entered on the command line. -It is intended primarily for medium to large AWK programs used +It is intended primarily for medium to large \*(AK programs used in shell scripts. .TP .PD 0 @@ -326,21 +338,21 @@ This is also useful when reporting bugs. these options cause an immediate, successful exit.) .TP .B \-\^\- -Signal the end of options. This is useful to allow further arguments to the -AWK program itself to start with a ``\-''. +Signal the end of options. This is useful to allow further arguments to the +\*(AK program itself to start with a \*(lq\-\*(rq. This is mainly for consistency with the argument parsing convention used by most other \*(PX programs. .PP In compatibility mode, any other options are flagged as illegal, but are otherwise ignored. In normal operation, as long as program text has been supplied, unknown -options are passed on to the AWK program in the +options are passed on to the \*(AK program in the .B ARGV -array for processing. This is particularly useful for running AWK -programs via the ``#!'' executable interpreter mechanism. +array for processing. This is particularly useful for running \*(AK +programs via the \*(lq#!\*(rq executable interpreter mechanism. .SH AWK PROGRAM EXECUTION .PP -An AWK program consists of a sequence of pattern-action statements +An \*(AK program consists of a sequence of pattern-action statements and optional function definitions. .RS .PP @@ -366,14 +378,14 @@ will read the program text as if all the .IR program-file s and command line source texts had been concatenated together. This is useful for building libraries -of AWK functions, without having to include them in each new AWK +of \*(AK functions, without having to include them in each new \*(AK program that uses them. It also provides the ability to mix library functions with command line programs. .PP The environment variable .B AWKPATH specifies a search path to use when finding source files named with -the +the .B \-f option. If this variable does not exist, the default path is \fB".:/usr/local/share/awk"\fR. @@ -382,10 +394,10 @@ option. If this variable does not exist, the default path is was built and installed.) If a file name given to the .B \-f -option contains a ``/'' character, no path search is performed. +option contains a \*(lq/\*(rq character, no path search is performed. .PP .I Gawk -executes AWK programs in the following order. +executes \*(AK programs in the following order. First, all variable assignments specified via the .B \-v @@ -408,7 +420,7 @@ reads the standard input. .PP If a filename on the command line has the form .IB var = val -it is treated as a variable assignment. The variable +it is treated as a variable assignment. The variable .I var will be assigned the value .IR val . @@ -417,8 +429,8 @@ will be assigned the value block(s) have been run.) Command line variable assignment is most useful for dynamically assigning values to the variables -AWK uses to control how input is broken into fields and records. It -is also useful for controlling state if multiple passes are needed over +\*(AK uses to control how input is broken into fields and records. +It is also useful for controlling state if multiple passes are needed over a single data file. .PP If the value of a particular element of @@ -431,7 +443,7 @@ For each record in the input, .I gawk tests to see if it matches any .I pattern -in the AWK program. +in the \*(AK program. For each pattern that the record matches, the associated .I action is executed. @@ -443,18 +455,18 @@ executes the code in the .B END block(s) (if any). .SH VARIABLES, RECORDS AND FIELDS -AWK variables are dynamic; they come into existence when they are -first used. Their values are either floating-point numbers or strings, +\*(AK variables are dynamic; they come into existence when they are +first used. Their values are either floating-point numbers or strings, or both, -depending upon how they are used. AWK also has one dimensional +depending upon how they are used. \*(AK also has one dimensional arrays; arrays with multiple dimensions may be simulated. Several pre-defined variables are set as a program runs; these will be described as needed and summarized below. .SS Records -Normally, records are separated by newline characters. You can control how +Normally, records are separated by newline characters. You can control how records are separated by assigning values to the built-in variable .BR RS . -If +If .B RS is any single character, that character separates records. Otherwise, @@ -498,7 +510,7 @@ In the special case that is a single space, fields are separated by runs of spaces and/or tabs and/or newlines. (But see the discussion of -.BR \-\-posix , +.BR \-\^\-posix , below). Note that the value of .B IGNORECASE @@ -527,7 +539,7 @@ Each field in the input record may be referenced by its position, .BR $2 , and so on. .B $0 -is the whole record. The value of a field may be assigned to as well. +is the whole record. The value of a field may be assigned to as well. Fields need not be referenced by constants: .RS .PP @@ -545,8 +557,8 @@ is set to the total number of fields in the input record. .PP References to non-existent fields (i.e. fields after .BR $NF ) -produce the null-string. However, assigning to a non-existent field -(e.g., +produce the null-string. However, assigning to a non-existent field +(e.g., .BR "$(NF+2) = 5" ) will increase the value of .BR NF , @@ -579,7 +591,7 @@ The index in of the current file being processed. .TP .B ARGV -Array of command line arguments. The array is indexed from +Array of command line arguments. The array is indexed from 0 to .B ARGC \- 1. @@ -632,7 +644,7 @@ evolves over time. The name of the current input file. If no files are specified on the command line, the value of .B FILENAME -is ``\-''. +is \*(lq\-\*(rq. However, .B FILENAME is undefined inside the @@ -648,8 +660,8 @@ The input field separator, a space by default. See above. .TP .B IGNORECASE -Controls the case-sensitivity of all regular expression -and string operations. If +Controls the case-sensitivity of all regular expression +and string operations. If .B IGNORECASE has a non-zero value, then string comparisons and pattern matching in rules, @@ -677,7 +689,7 @@ is not equal to zero, .B /aB/ matches all of the strings \fB"ab"\fP, \fB"aB"\fP, \fB"Ab"\fP, and \fB"AB"\fP. -As with all AWK variables, the initial value of +As with all \*(AK variables, the initial value of .B IGNORECASE is zero, so all regular expression and string operations are normally case-sensitive. @@ -688,7 +700,7 @@ In versions of .I gawk prior to 3.0, .B IGNORECASE -only affected regular expression operations. It now affects string +only affected regular expression operations. It now affects string comparisons as well. .TP .B NF @@ -736,14 +748,14 @@ elements, by default \fB"\e034"\fR. Arrays are subscripted with an expression between square brackets .RB ( [ " and " ] ). If the expression is an expression list -.RI ( expr ", " expr " ...)" +.RI ( expr ", " expr " .\|.\|.)" then the array subscript is a string consisting of the concatenation of the (string) value of each expression, separated by the value of the .B SUBSEP variable. This facility is used to simulate multiply dimensioned -arrays. For example: +arrays. For example: .PP .RS .ft B @@ -755,7 +767,7 @@ x[i, j, k] = "hello, world\en" .PP assigns the string \fB"hello, world\en"\fR to the element of the array .B x -which is indexed by the string \fB"A\e034B\e034C"\fR. All arrays in AWK +which is indexed by the string \fB"A\e034B\e034C"\fR. All arrays in \*(AK are associative, i.e. indexed by string values. .PP The special operator @@ -795,8 +807,8 @@ just by specifying the array name without a subscript. .SS Variable Typing And Conversion .PP Variables and fields -may be (floating point) numbers, or strings, or both. How the -value of a variable is interpreted depends upon its context. If used in +may be (floating point) numbers, or strings, or both. How the +value of a variable is interpreted depends upon its context. If used in a numeric expression, it will be treated as a number, if used as a string it will be treated as a string. .PP @@ -811,7 +823,7 @@ A number is converted to a string by using the value of as a format string for .IR sprintf (3), with the numeric value of the variable as the argument. -However, even though all numbers in AWK are floating-point, +However, even though all numbers in \*(AK are floating-point, integral values are .I always converted as integers. Thus, given @@ -834,7 +846,7 @@ has a string value of \fB"12"\fR and not \fB"12.00"\fR. performs comparisons as follows: If two variables are numeric, they are compared numerically. If one value is numeric and the other has a string value that is a -``numeric string,'' then comparisons are also done numerically. +\*(lqnumeric string,\*(rq then comparisons are also done numerically. Otherwise, the numeric value is converted to a string and a string comparison is performed. Two strings are compared, of course, as strings. @@ -846,7 +858,8 @@ does not do this. .PP Note that string constants, such as \fB"57"\fP, are .I not -numeric strings, they are string constants. The idea of ``numeric string'' +numeric strings, they are string constants. +The idea of \*(lqnumeric string\*(rq only applies to fields, .B getline input, @@ -865,13 +878,13 @@ should be treated that way. Uninitialized variables have the numeric value 0 and the string value "" (the null, or empty, string). .SH PATTERNS AND ACTIONS -AWK is a line oriented language. The pattern comes first, and then the -action. Action statements are enclosed in +\*(AK is a line-oriented language. The pattern comes first, and then the +action. Action statements are enclosed in .B { and .BR } . Either the pattern may be missing, or the action may be missing, but, -of course, not both. If the pattern is missing, the action will be +of course, not both. If the pattern is missing, the action will be executed for every single record of input. A missing action is equivalent to .RS @@ -881,12 +894,12 @@ A missing action is equivalent to .PP which prints the entire record. .PP -Comments begin with the ``#'' character, and continue until the +Comments begin with the \*(lq#\*(rq character, and continue until the end of the line. Blank lines may be used to separate statements. Normally, a statement ends with a newline, however, this is not the case for lines ending in -a ``,'', +a \*(lq,\*(rq, .BR { , .BR ? , .BR : , @@ -898,16 +911,16 @@ Lines ending in or .B else also have their statements automatically continued on the following line. -In other cases, a line can be continued by ending it with a ``\e'', +In other cases, a line can be continued by ending it with a \*(lq\e\*(rq, in which case the newline will be ignored. .PP Multiple statements may -be put on one line by separating them with a ``;''. +be put on one line by separating them with a \*(lq;\*(rq. This applies to both the statements within the action part of a pattern-action pair (the usual case), and to the pattern-action statements themselves. .SS Patterns -AWK patterns may be one of the following: +\*(AK patterns may be one of the following: .PP .RS .nf @@ -934,8 +947,8 @@ The action parts of all patterns are merged as if all the statements had been written in a single .B BEGIN -block. They are executed before any -of the input is read. Similarly, all the +block. They are executed before any +of the input is read. Similarly, all the .B END blocks are merged, and executed when all the input is exhausted (or when an @@ -970,16 +983,16 @@ and .B ! operators are logical AND, logical OR, and logical NOT, respectively, as in C. They do short-circuit evaluation, also as in C, and are used for combining -more primitive pattern expressions. As in most languages, parentheses +more primitive pattern expressions. As in most languages, parentheses may be used to change the order of evaluation. .PP The .B ?\^: -operator is like the same operator in C. If the first pattern is true +operator is like the same operator in C. If the first pattern is true then the pattern used for testing is the second pattern, otherwise it is -the third. Only one of the second and third patterns is evaluated. +the third. Only one of the second and third patterns is evaluated. .PP -The +The .IB pattern1 ", " pattern2 form of an expression is called a .IR "range pattern" . @@ -987,12 +1000,12 @@ It matches all input records starting with a record that matches .IR pattern1 , and continuing until a record that matches .IR pattern2 , -inclusive. It does not combine with any other sort of pattern expression. +inclusive. It does not combine with any other sort of pattern expression. .SS Regular Expressions Regular expressions are the extended kind found in .IR egrep . They are composed of characters as follows: -.TP \w'\fB[^\fIabc...\fB]\fR'u+2n +.TP \w'\fB[^\fIabc.\|.\|.\fB]\fR'u+2n .I c matches the non-metacharacter .IR c . @@ -1012,13 +1025,13 @@ matches the beginning of a string. .B $ matches the end of a string. .TP -.BI [ abc... ] +.BI [ abc.\|.\|. ] character list, matches any of the characters -.IR abc... . +.IR abc.\|.\|. . .TP -.BI [^ abc... ] +.BI [^ abc.\|.\|. ] negated character list, matches any character except -.IR abc... . +.IR abc.\|.\|. . .TP .IB r1 | r2 alternation: matches either @@ -1034,15 +1047,15 @@ and then .TP .IB r + matches one or more -.IR r 's. +.IR r 's. .TP .IB r * matches zero or more -.IR r 's. +.IR r 's. .TP .IB r ? matches zero or one -.IR r 's. +.IR r 's. .TP .BI ( r ) grouping: matches @@ -1110,9 +1123,9 @@ The escape sequences that are valid in string constants (see below) are also legal in regular expressions. .PP .I "Character classes" -are a new feature introduced in the POSIX standard. +are a new feature introduced in the \*(PX standard. A character class is a special notation for describing -lists of characters that have a specific attribute, but where the +lists of characters that have a specific attribute, but where the actual characters themselves can vary from country to country and/or from character set to character set. For example, the notion of what is an alphabetic character differs in the USA and in France. @@ -1124,7 +1137,7 @@ the brackets of a character list. Character classes consist of a keyword denoting the class, and .BR :] . Here are the character -classes defined by the POSIX standard. +classes defined by the \*(PX standard. .TP .B [:alnum:] Alphanumeric characters. @@ -1166,11 +1179,11 @@ Upper-case alphabetic characters. .B [:xdigit:] Characters that are hexadecimal digits. .PP -For example, before the POSIX standard, to match alphanumeric +For example, before the \*(PX standard, to match alphanumeric characters, you would have had to write .BR /[A\-Za\-z0\-9]/ . If your character set had other alphabetic characters in it, this would not -match them. With the POSIX character classes, you can write +match them. With the \*(PX character classes, you can write .BR /[[:alnum:]]/ , and this will match .I all @@ -1178,12 +1191,12 @@ the alphabetic and numeric characters in your character set. .PP Two additional special sequences can appear in character lists. These apply to non-ASCII character sets, which can have single symbols -(called +(called .IR "collating elements" ) that are represented with more than one character, as well as several characters that are equivalent for .IR collating , -or sorting, purposes. (E.g., in French, a plain ``e'' +or sorting, purposes. (E.g., in French, a plain \*(lqe\*(rq and a grave-accented e\` are equivalent.) .TP Collating Symbols @@ -1204,14 +1217,14 @@ or .TP Equivalence Classes An equivalence class is a locale-specific name for a list of -characters that are equivalent. The name is enclosed in +characters that are equivalent. The name is enclosed in .B [= and .BR =] . For example, the name .B e might be used to represent all of -``e,'' ``e\`,'' and ``e\`.'' +\*(lqe,\*(rq \*(lqe\`,\*(rq and \*(lqe\`.\*(rq In this case, .B [[=e]] is a regexp @@ -1225,7 +1238,7 @@ These features are very valuable in non-English speaking locales. The library functions that .I gawk uses for regular expression matching -currently only recognize POSIX character classes; they do not recognize +currently only recognize \*(PX character classes; they do not recognize collating symbols or equivalence classes. .PP The @@ -1240,7 +1253,7 @@ and .B \e' operators are specific to .IR gawk ; -they are extensions based on facilities in the GNU regexp libraries. +they are extensions based on facilities in the \*(GN regexp libraries. .PP The various command line options control how @@ -1251,11 +1264,11 @@ No options In the default case, .I gawk provide all the facilities of -POSIX regexps and the GNU regexp operators described above. +\*(PX regexps and the \*(GN regexp operators described above. However, interval expressions are not supported. .TP .B \-\^\-posix -Only POSIX regexps are supported, the GNU operators are not special. +Only \*(PX regexps are supported, the \*(GN operators are not special. (E.g., .B \ew matches a literal @@ -1265,9 +1278,9 @@ Interval expressions are allowed. .B \-\^\-traditional Traditional Unix .I awk -regexps are matched. The GNU operators +regexps are matched. The \*(GN operators are not special, interval expressions are not available, and neither -are the POSIX character classes +are the \*(PX character classes .RB ( [[:alnum:]] and so on). Characters described by octal and hexadecimal escape sequences are @@ -1283,15 +1296,15 @@ Action statements are enclosed in braces, and .BR } . Action statements consist of the usual assignment, conditional, and looping -statements found in most languages. The operators, control statements, +statements found in most languages. The operators, control statements, and input/output statements available are patterned after those in C. .SS Operators .PP -The operators in AWK, in order of decreasing precedence, are +The operators in \*(AK, in order of decreasing precedence, are .PP .TP "\w'\fB*= /= %= ^=\fR'u+1n" -.BR ( \&... ) +.BR ( \&.\|.\|. ) Grouping .TP .B $ @@ -1352,9 +1365,10 @@ Logical AND. Logical OR. .TP .B ?: -The C conditional expression. This has the form +The C conditional expression. This has the form .IB expr1 " ? " expr2 " : " expr3\c -\&. If +\&. +If .I expr1 is true, the value of the expression is .IR expr2 , @@ -1371,7 +1385,7 @@ is evaluated. .TP .PD .B "*= /= %= ^=" -Assignment. Both absolute assignment +Assignment. Both absolute assignment .BI ( var " = " value ) and operator-assignment (the other forms) are supported. .SS Control Statements @@ -1432,9 +1446,9 @@ from next record of .IR file . .TP .B next -Stop processing the current input record. The next input record +Stop processing the current input record. The next input record is read and processing starts over with the first pattern in the -AWK program. If the end of the input data is reached, the +\*(AK program. If the end of the input data is reached, the .B END block(s), if any, are executed. .TP @@ -1447,13 +1461,13 @@ and are updated, .B FNR is reset to 1, and processing starts over with the first pattern in the -AWK program. If the end of the input data is reached, the +\*(AK program. If the end of the input data is reached, the .B END block(s), if any, are executed. .B NOTE: Earlier versions of gawk used .BR "next file" , -as two words. While this usage is still recognized, it generates a +as two words. While this usage is still recognized, it generates a warning message and will eventually be removed. .TP .B print @@ -1476,7 +1490,7 @@ Prints expressions on .IR file . Each expression is separated by the value of the .B OFS -variable. The output record is terminated with the value of the +variable. The output record is terminated with the value of the .B ORS variable. .TP @@ -1505,11 +1519,11 @@ is the null string, then all open output files and pipes have their buffers flushed. .PP -Other input/output redirections are also allowed. For +Other input/output redirections are also allowed. For .B print and .BR printf , -.BI >> file +.BI >> " file" appends output to the .IR file , while @@ -1522,9 +1536,23 @@ pipes into The .BR getline command will return 0 on end of file, and \-1 on an error. +.PP +NOTE: If using a pipe to +.BR getline , +or from +.B print +or +.BR printf +within a loop, you +.I must +use +.B close() +to create new instances of the command. +AWK does not automatically close pipes when +they return EOF. .SS The \fIprintf\fP\^ Statement .PP -The AWK versions of the +The \*(AK versions of the .B printf statement and .B sprintf() @@ -1583,7 +1611,11 @@ instead of .BR %e . .TP .B %o -An unsigned octal number (again, an integer). +An unsigned octal number (also an integer). +.TP +.PD +.B %u +An unsigned decimal number (again, an integer). .TP .B %s A character string. @@ -1594,7 +1626,7 @@ A character string. .PD .B %X An unsigned hexadecimal number (an integer). -.The +The .B %X format uses .B ABCDEF @@ -1620,12 +1652,12 @@ negative values with a minus sign. .B + The plus sign, used before the width modifier (see below), says to always supply a sign for numeric conversions, even if the data -to be formatted is positive. The +to be formatted is positive. The .B + overrides the space modifier. .TP .B # -Use an ``alternate form'' for certain control letters. +Use an \*(lqalternate form\*(rq for certain control letters. For .BR %o , supply a leading zero. @@ -1634,9 +1666,9 @@ For and .BR %X , supply a leading -.BR 0x +.BR 0x or -.BR 0X +.BR 0X for a nonzero result. For @@ -1662,7 +1694,7 @@ This flag only has an effect when the field width is wider than the value to be printed. .TP .I width -The field should be padded to this width. The field is normally padded +The field should be padded to this width. The field is normally padded with spaces. If the .B 0 flag has been used, it is padded with zeroes. @@ -1673,7 +1705,7 @@ For the .BR %e , .BR %E , and -.BR %f +.BR %f formats, this specifies the number of digits you want printed to the right of the decimal point. For the @@ -1779,7 +1811,7 @@ The standard error output. The file associated with the open file descriptor .IR n . .PP -These are particularly useful for error messages. For example: +These are particularly useful for error messages. For example: .PP .RS .ft B @@ -1798,7 +1830,7 @@ print "You blew it!" | "cat 1>&2" These file names may also be used on the command line to name data files. .SS Numeric Functions .PP -AWK has the following pre-defined arithmetic functions: +\*(AK has the following pre-defined arithmetic functions: .PP .TP \w'\fBsrand(\fR[\fIexpr\^\fR]\fB)\fR'u+1n .BI atan2( y , " x" ) @@ -1834,7 +1866,7 @@ the square root function. \&\fBsrand(\fR[\fIexpr\^\fR]\fB)\fR uses .I expr -as a new seed for the random number generator. If no +as a new seed for the random number generator. If no .I expr is provided, the time of day will be used. The return value is the previous seed for the random @@ -1879,7 +1911,7 @@ where is a digit from 1 to 9, may be used to indicate just the text that matched the .IR n 'th -parenthesized subexpression. The sequence +parenthesized subexpression. The sequence .B \e0 represents the entire matched text, as does the character .BR & . @@ -1912,7 +1944,7 @@ Use to get a literal .BR & . See -.I "AWK Language Programming" +.I "Effective AWK Programming" for a fuller discussion of the rules for .BR &'s and backslashes in the replacement text of @@ -1958,7 +1990,7 @@ into the array .I a on the regular expression .IR r , -and returns the number of fields. If +and returns the number of fields. If .I r is omitted, .B FS @@ -2010,7 +2042,7 @@ translated to their corresponding upper-case counterparts. Non-alphabetic characters are left unchanged. .SS Time Functions .PP -Since one of the primary uses of AWK programs is processing log files +Since one of the primary uses of \*(AK programs is processing log files that contain time stamp information, .I gawk provides the following two functions for obtaining time stamps and @@ -2052,17 +2084,17 @@ then all of the conversions described in that man page are available to .IR gawk. .SS String Constants .PP -String constants in AWK are sequences of characters enclosed -between double quotes (\fB"\fR). Within strings, certain +String constants in \*(AK are sequences of characters enclosed +between double quotes (\fB"\fR). Within strings, certain .I "escape sequences" -are recognized, as in C. These are: +are recognized, as in C. These are: .PP .TP \w'\fB\e\^\fIddd\fR'u+1n .B \e\e A literal backslash. .TP .B \ea -The ``alert'' character; usually the \s-1ASCII\s+1 \s-1BEL\s+1 character. +The \*(lqalert\*(rq character; usually the \s-1ASCII\s+1 \s-1BEL\s+1 character. .TP .B \eb backspace. @@ -2093,7 +2125,8 @@ E.g., \fB"\ex1B"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character. .TP .BI \e ddd The character represented by the 1-, 2-, or 3-digit sequence of octal -digits. E.g. \fB"\e033"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character. +digits. +E.g., \fB"\e033"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character. .TP .BI \e c The literal character @@ -2106,12 +2139,12 @@ matches whitespace characters). .PP In compatibility mode, the characters represented by octal and hexadecimal escape sequences are treated literally when used in -regexp constants. Thus, +regexp constants. Thus, .B /a\e52b/ is equivalent to .BR /a\e*b/ . .SH FUNCTIONS -Functions in AWK are defined as follows: +Functions in \*(AK are defined as follows: .PP .RS \fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements \fB}\fR @@ -2122,20 +2155,20 @@ in either patterns or actions. Actual parameters supplied in the function call are used to instantiate the formal parameters declared in the function. Arrays are passed by reference, other variables are passed by value. .PP -Since functions were not originally part of the AWK language, the provision +Since functions were not originally part of the \*(AK language, the provision for local variables is rather clumsy: They are declared as extra parameters -in the parameter list. The convention is to separate local variables from -real parameters by extra spaces in the parameter list. For example: +in the parameter list. The convention is to separate local variables from +real parameters by extra spaces in the parameter list. For example: .PP .RS .ft B .nf function f(p, q, a, b) # a & b are local { - \&..... + \&.\|.\|. } -/abc/ { ... ; f(1, 2) ; ... } +/abc/ { .\|.\|. ; f(1, 2) ; .\|.\|. } .fi .ft R .RE @@ -2152,8 +2185,8 @@ to the null string and the number zero upon function invocation. .PP Use .BI return " expr" -to return a value from a function. The return value is undefined if no -value is provided, or if the function returns by ``falling off'' the +to return a value from a function. The return value is undefined if no +value is provided, or if the function returns by \*(lqfalling off\*(rq the end. .PP If @@ -2209,9 +2242,9 @@ Concatenate and line number (a variation on a theme): .PP .IR "The AWK Programming Language" , Alfred V. Aho, Brian W. Kernighan, Peter J. Weinberger, -Addison-Wesley, 1988. ISBN 0-201-07981-X. +Addison-Wesley, 1988. ISBN 0-201-07981-X. .PP -.IR "AWK Language Programming" , +.IR "Effective AWK Programming" , Edition 1.0, published by the Free Software Foundation, 1995. .SH POSIX COMPATIBILITY A primary goal for @@ -2222,7 +2255,7 @@ latest version of \*(UX To this end, .I gawk incorporates the following user visible -features which are not described in the AWK book, +features which are not described in the \*(AK book, but are part of the Bell Labs version of .IR awk , and are in the \*(PX standard. @@ -2239,12 +2272,12 @@ assignment appeared before any file names, the assignment would happen .I before the .B BEGIN -block was run. Applications came to depend on this ``feature.'' +block was run. Applications came to depend on this \*(lqfeature.\*(rq When .I awk was changed to match its documentation, this option was added to accommodate applications that depended upon the old behavior. -(This feature was agreed upon by both the AT&T and GNU developers.) +(This feature was agreed upon by both the AT&T and \*(GN developers.) .PP The .B \-W @@ -2252,18 +2285,18 @@ option for implementation specific features is from the \*(PX standard. .PP When processing arguments, .I gawk -uses the special option ``\fB\-\^\-\fP'' to signal the end of +uses the special option \*(lq\-\^\-\*(rq to signal the end of arguments. In compatibility mode, it will warn about, but otherwise ignore, undefined options. -In normal operation, such arguments are passed on to the AWK program for +In normal operation, such arguments are passed on to the \*(AK program for it to process. .PP -The AWK book does not define the return value of +The \*(AK book does not define the return value of .BR srand() . The \*(PX standard has it return the seed it was using, to allow keeping track -of random number sequences. Therefore +of random number sequences. Therefore .B srand() in .I gawk @@ -2401,13 +2434,13 @@ if the .I fs argument to the .B \-F -option is ``t'', then +option is \*(lqt\*(rq, then .B FS will be set to the tab character. Note that typing -.B "gawk \-F\et \&..." -simply causes the shell to quote the ``t,'', and does not pass -``\et'' to the +.B "gawk \-F\et \&.\|.\|." +simply causes the shell to quote the \*(lqt,\*(rq, and does not pass +\*(lq\et\*(rq to the .B \-F option. Since this is a rather ugly special case, it is not the default behavior. @@ -2416,7 +2449,7 @@ This behavior also does not occur if has been specified. To really get a tab character as the field separator, it is best to use quotes: -.BR "gawk \-F'\et' \&..." . +.BR "gawk \-F'\et' \&.\|.\|." . .ig .PP If @@ -2440,7 +2473,7 @@ maintainers, and may not even be compiled into .IR gawk . .. .SH HISTORICAL FEATURES -There are two features of historical AWK implementations that +There are two features of historical \*(AK implementations that .I gawk supports. First, it is possible to call the @@ -2464,7 +2497,7 @@ a = length($0) .ft R .RE .PP -This feature is marked as ``deprecated'' in the \*(PX standard, and +This feature is marked as \*(lqdeprecated\*(rq in the \*(PX standard, and .I gawk will issue a warning about its use if .B \-\^\-lint @@ -2479,7 +2512,7 @@ statements outside the body of a .BR for , or .B do -loop. Traditional AWK implementations have treated such usage as +loop. Traditional \*(AK implementations have treated such usage as equivalent to the .B next statement. @@ -2541,12 +2574,12 @@ and the effort to do so really is not worth it. .SH VERSION INFORMATION This man page documents .IR gawk , -version 3.0.4. +version 3.0.5. .SH AUTHORS The original version of \*(UX .I awk was designed and implemented by Alfred Aho, -Peter Weinberger, and Brian Kernighan of AT&T Bell Labs. Brian Kernighan +Peter Weinberger, and Brian Kernighan of AT&T Bell Labs. Brian Kernighan continues to maintain and enhance it. .PP Paul Rubin and Jay Fenlason, @@ -2581,7 +2614,7 @@ Please include your operating system and its revision, the version of what C compiler you used to compile it, and a test program and data that are as small as possible for reproducing the problem. .PP -Before sending a bug report, please do two things. First, verify that +Before sending a bug report, please do two things. First, verify that you have the latest version of .IR gawk . Many bugs (usually subtle ones) are fixed at each release, and if @@ -2597,14 +2630,14 @@ post a bug report in While the .I gawk developers occasionally read this newsgroup, posting bug reports there -is an unreliable way to report bugs. Instead, please use the electronic mail +is an unreliable way to report bugs. Instead, please use the electronic mail addresses given above. .SH ACKNOWLEDGEMENTS Brian Kernighan of Bell Labs provided valuable assistance during testing and debugging. We thank him. .SH COPYING PERMISSIONS -Copyright \(co) 1996,97,98,99 Free Software Foundation, Inc. +Copyright \(co 1996\-2000 Free Software Foundation, Inc. .PP Permission is granted to make and distribute verbatim copies of this manual page provided the copyright notice and this permission diff --git a/doc/gawk.info b/doc/gawk.info index e39a483b..0ac376a0 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -1,18 +1,17 @@ -This is Info file gawk.info, produced by Makeinfo version 1.68 from the -input file ./gawk.texi. +This is gawk.info, produced by makeinfo version 4.0 from ./gawk.texi. INFO-DIR-SECTION Programming Languages START-INFO-DIR-ENTRY -* Gawk: (gawk.info). A Text Scanning and Processing Language. +* Gawk: (gawk). A Text Scanning and Processing Language. END-INFO-DIR-ENTRY This file documents `awk', a program that you can use to select particular records in a file and perform operations upon them. - This is Edition 1.0.4 of `Effective AWK Programming', for the -3.0.4 version of the GNU implementation of AWK. + This is Edition 1.0.5 of `Effective AWK Programming', for the +3.0.5 version of the GNU implementation of AWK. - Copyright (C) 1989, 1991, 92, 93, 96, 97, 98, 99 Free Software + Copyright (C) 1989, 1991, 1992, 1993, 1996-2000 Free Software Foundation, Inc. Permission is granted to make and distribute verbatim copies of this @@ -38,8 +37,8 @@ General Introduction This file documents `awk', a program that you can use to select particular records in a file and perform operations upon them. - This is Edition 1.0.4 of `Effective AWK Programming', -for the 3.0.4 version of the GNU implementation + This is Edition 1.0.5 of `Effective AWK Programming', +for the 3.0.5 version of the GNU implementation of AWK. * Menu: @@ -173,13 +172,13 @@ of AWK. * Concatenation:: Concatenating strings. * Assignment Ops:: Changing the value of a variable or a field. * Increment Ops:: Incrementing the numeric value of a variable. -* Truth Values:: What is "true" and what is "false". +* Truth Values:: What is ``true'' and what is ``false''. * Typing and Comparison:: How variables acquire types, and how this affects comparison of numbers and strings with `<', etc. * Boolean Ops:: Combining comparison expressions using boolean - operators `||' ("or"), `&&' - ("and") and `!' ("not"). + operators `||' (``or''), `&&' + (``and'') and `!' (``not''). * Conditional Exp:: Conditional expressions select between two subexpressions under control of a third subexpression. @@ -316,7 +315,7 @@ of AWK. * Time Functions Summary:: Built-in time functions. * String Constants Summary:: Escape sequences in strings. * Functions Summary:: Defining and calling functions. -* Historical Features:: Some undocumented but supported "features". +* Historical Features:: Some undocumented but supported ``features''. * Gawk Distribution:: What is in the `gawk' distribution. * Getting:: How to get the distribution. * Extracting:: How to extract the distribution. @@ -348,12 +347,18 @@ of AWK. To Miriam, for making me complete. + To Chana, for the joy you bring us. + To Rivka, for the exponential increase. + To Nachum, for the added dimension. + + To Malka, for the new beginning. + File: gawk.info, Node: Preface, Next: What Is Awk, Prev: Top, Up: Top @@ -512,7 +517,7 @@ Pat Rankin, Michal Jaegermann, Darrel Hankerson and Scott Deifik updated their respective sections for Edition 1.0. Robert J. Chassell provided much valuable advice on the use of -Texinfo. He also deserves special thanks for convincing me *not* to +Texinfo. He also deserves special thanks for convincing me _not_ to title this Info file `How To Gawk Politely'. Karl Berry helped significantly with the TeX part of Texinfo. @@ -552,6 +557,8 @@ I also must acknowledge my gratitude to G-d, for the many opportunities He has sent my way, as well as for the gifts He has given me with which to take advantage of those opportunities. + + Arnold Robbins Atlanta, Georgia February, 1997 @@ -639,7 +646,7 @@ Library of `awk' Functions: Library Functions.; also *note Practical your memory about a particular feature. If you find terms that you aren't familiar with, try looking them up -in the glossary (*note Glossary::.). +in the glossary (*note Glossary::). Most of the time complete `awk' programs are used as examples, but in some of the more advanced sections, only the part of the `awk' program @@ -658,6 +665,8 @@ Dark Corners Who opened that window shade?!? Count Dracula + + Until the POSIX standard (and `The Gawk Manual'), many features of `awk' were either poorly documented, or not documented at all. Descriptions of such features (often called "dark corners") are noted @@ -1043,7 +1052,7 @@ like this: : The colon ensures execution by the standard shell. awk 'PROGRAM' "$@" - Using this technique, it is *vital* to enclose the PROGRAM in single + Using this technique, it is _vital_ to enclose the PROGRAM in single quotes to protect it from interpretation by the shell. If you omit the quotes, only a shell wizard can predict the results. @@ -1097,7 +1106,7 @@ later time. *Caution:* As mentioned in *Note One-shot Throw-away `awk' Programs: One-shot, you can enclose small to medium programs in single quotes, in order to keep your shell scripts self-contained. When doing so, -*don't* put an apostrophe (i.e., a single quote) into a comment (or +_don't_ put an apostrophe (i.e., a single quote) into a comment (or anywhere else in your program). The shell will interpret the quote as the closing quote for the entire program. As a result, usually the shell will print a message about mismatched quotes, and if `awk' @@ -1142,7 +1151,7 @@ special shell characters. In an `awk' rule, either the pattern or the action can be omitted, but not both. If the pattern is omitted, then the action is performed -for *every* input line. If the action is omitted, the default action +for _every_ input line. If the action is omitted, the default action is to print all lines that match the pattern. Thus, we could leave out the action (the `print' statement and the @@ -1179,7 +1188,7 @@ the pattern and also has `print $0' as the action. Each rule's action is enclosed in its own pair of braces. This `awk' program prints every line that contains the string `12' -*or* the string `21'. If a line contains both strings, it is printed +_or_ the string `21'. If a line contains both strings, it is printed twice, once by each rule. This is what happens if we run this program on our two sample data @@ -1315,11 +1324,11 @@ expression or a string. *Caution: backslash continuation does not work as described above with the C shell.* Continuation with backslash works for `awk' -programs in files, and also for one-shot programs *provided* you are +programs in files, and also for one-shot programs _provided_ you are using a POSIX-compliant shell, such as the Bourne shell or Bash, the GNU Bourne-Again shell. But the C shell (`csh') behaves differently! There, you must use two backslashes in a row, followed by a newline. -Note also that when using the C shell, *every* newline in your awk +Note also that when using the C shell, _every_ newline in your awk program must be escaped with a backslash. To illustrate: % awk 'BEGIN { \ @@ -1333,11 +1342,11 @@ analogous to the standard shell's `$' and `>'. `awk' is a line-oriented language. Each rule's action has to begin on the same line as the pattern. To have the pattern and action on -separate lines, you *must* use backslash continuation--there is no +separate lines, you _must_ use backslash continuation--there is no other way. Note that backslash continuation and comments do not mix. As soon as -`awk' sees the `#' that starts a comment, it ignores *everything* on +`awk' sees the `#' that starts a comment, it ignores _everything_ on the rest of the line. For example: $ gawk 'BEGIN { print "dont panic" # a friendly \ @@ -1405,10 +1414,10 @@ can avoid the (usually lengthy) compilation part of the typical edit-compile-test-debug cycle of software development. Complex programs have been written in `awk', including a complete -retargetable assembler for eight-bit microprocessors (*note -Glossary::., for more information) and a microcode assembler for a -special purpose Prolog computer. However, `awk''s capabilities are -strained by tasks of such complexity. +retargetable assembler for eight-bit microprocessors (*note Glossary::, +for more information) and a microcode assembler for a special purpose +Prolog computer. However, `awk''s capabilities are strained by tasks of +such complexity. If you find yourself writing `awk' scripts of more than, say, a few hundred lines, you might consider using a different programming @@ -1506,7 +1515,7 @@ that matches every input record whose text belongs to that set. both. Such a regexp matches any string that contains that sequence. Thus, the regexp `foo' matches any string containing `foo'. Therefore, the pattern `/foo/' matches any input record containing the three -characters `foo', *anywhere* in the record. Other kinds of regexps let +characters `foo', _anywhere_ in the record. Other kinds of regexps let you specify more complicated classes of strings. * Menu: @@ -1562,8 +1571,8 @@ statements. (*Note Control Statements in Actions: Statements.) `EXP !~ /REGEXP/' This is true if the expression EXP (taken as a character string) - is *not* matched by REGEXP. The following example matches, or - selects, all input records whose first field *does not* contain + is _not_ matched by REGEXP. The following example matches, or + selects, all input records whose first field _does not_ contain the upper-case letter `J': $ awk '$1 !~ /J/' inventory-shipped @@ -1768,7 +1777,7 @@ themselves. if ("line1\nLINE 2" ~ /1$/) ... `.' - The period, or dot, matches any single character, *including* the + The period, or dot, matches any single character, _including_ the newline character. For example: .P @@ -1784,7 +1793,7 @@ themselves. Other versions of `awk' may not be able to match the NUL character. `[...]' - This is called a "character list". It matches any *one* of the + This is called a "character list". It matches any _one_ of the characters that are enclosed in the square brackets. For example: [MVX] @@ -1822,7 +1831,7 @@ themselves. notion of what is an alphabetic character differs in the USA and in France. - A character class is only valid in a regexp *inside* the brackets + A character class is only valid in a regexp _inside_ the brackets of a character list. Character classes consist of `[:', a keyword denoting the class, and `:]'. Here are the character classes defined by the POSIX standard. @@ -1871,7 +1880,7 @@ themselves. characters, you had to write `/[A-Za-z0-9]/'. If your character set had other alphabetic characters in it, this would not match them. With the POSIX character classes, you can write - `/[[:alnum:]]/', and this will match *all* the alphabetic and + `/[[:alnum:]]/', and this will match _all_ the alphabetic and numeric characters in your character set. Two additional special sequences can appear in character lists. @@ -1879,7 +1888,7 @@ themselves. symbols (called "collating elements") that are represented with more than one character, as well as several characters that are equivalent for "collating", or sorting, purposes. (E.g., in - French, a plain "e" and a grave-accented "`e" are equivalent.) + French, a plain "e" and a grave-accented "e`" are equivalent.) Collating Symbols A "collating symbol" is a multi-character collating element @@ -1892,8 +1901,8 @@ themselves. An "equivalence class" is a locale-specific name for a list of characters that are equivalent. The name is enclosed in `[=' and `=]'. For example, the name `e' might be used to - represent all of "e," "`e," and "'e." In this case, `[[=e]]' - is a regexp that matches any of `e', `'e', or ``e'. + represent all of "e," "e`," and "e'." In this case, `[[=e]]' + is a regexp that matches any of `e', `e'', or `e`'. These features are very valuable in non-English speaking locales. @@ -1904,7 +1913,7 @@ themselves. `[^ ...]' This is a "complemented character list". The first character after - the `[' *must* be a `^'. It matches any characters *except* those + the `[' _must_ be a `^'. It matches any characters _except_ those in the square brackets. For example: [^0-9] @@ -1942,7 +1951,7 @@ themselves. of one `p' followed by any number of `h's. This will also match just `p' if no `h's are present. - The `*' repeats the *smallest* possible preceding expression. + The `*' repeats the _smallest_ possible preceding expression. (Use parentheses if you wish to repeat a larger expression.) It finds as many repetitions as possible. For example: @@ -1996,7 +2005,7 @@ themselves. `egrep' consistent with each other. However, since old programs may use `{' and `}' in regexp - constants, by default `gawk' does *not* match interval expressions + constants, by default `gawk' does _not_ match interval expressions in regexps. If either `--posix' or `--re-interval' are specified (*note Command Line Options: Options.), then interval expressions are allowed in regexps. @@ -2087,7 +2096,7 @@ of two evils. Options.) control how `gawk' interprets characters in regexps. No options - In the default case, `gawk' provide all the facilities of POSIX + In the default case, `gawk' provides all the facilities of POSIX regexps and the GNU regexp operators described in *Note Regular Expression Operators: Regexp Operators. However, interval expressions are not supported. @@ -2136,8 +2145,8 @@ converts the first field to lower-case before matching against it. This will work in any POSIX-compliant implementation of `awk'. Another method, specific to `gawk', is to set the variable -`IGNORECASE' to a non-zero value (*note Built-in Variables::.). When -`IGNORECASE' is not zero, *all* regexp and string operations ignore +`IGNORECASE' to a non-zero value (*note Built-in Variables::). When +`IGNORECASE' is not zero, _all_ regexp and string operations ignore case. Changing the value of `IGNORECASE' dynamically controls the case sensitivity of your program as it runs. Case is significant by default because `IGNORECASE' (like most variables) is initialized to zero. @@ -2196,20 +2205,20 @@ In other words, how many is "one or more"--will `awk' match two, three, or all four `a' characters? The answer is, `awk' (and POSIX) regular expressions always match -the leftmost, *longest* sequence of input characters that can match. +the leftmost, _longest_ sequence of input characters that can match. Thus, in this example, all four `a' characters are replaced with `<A>'. $ echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }' -| <A>bcd For simple match/no-match tests, this is not so important. But when -doing regexp-based field and record splitting, and text matching and -substitutions with the `match', `sub', `gsub', and `gensub' functions, -it is very important. *Note Built-in Functions for String -Manipulation: String Functions, for more information on these functions. -Understanding this principle is also important for regexp-based record -and field splitting (*note How Input is Split into Records: Records., -and also *note Specifying How Fields are Separated: Field Separators.). +doing text matching and substitutions with the `match', `sub', `gsub', +and `gensub' functions, it is very important. *Note Built-in Functions +for String Manipulation: String Functions, for more information on +these functions. Understanding this principle is also important for +regexp-based record and field splitting (*note How Input is Split into +Records: Records., and also *note Specifying How Fields are Separated: +Field Separators.). File: gawk.info, Node: Computed Regexps, Prev: Leftmost Longest, Up: Regexp @@ -2223,7 +2232,7 @@ expression. The expression is evaluated, and converted if necessary to a string; the contents of the string are used as the regexp. A regexp that is computed in this way is called a "dynamic regexp". For example: - BEGIN { identifier_regexp = "[A-Za-z_][A-Za-z_0-9]+" } + BEGIN { identifier_regexp = "[A-Za-z_][A-Za-z_0-9]*" } $0 ~ identifier_regexp { print } sets `identifier_regexp' to a regexp that describes `awk' variable @@ -2233,7 +2242,7 @@ names, and tests if the input record matches this regexp. difference between a regexp constant enclosed in slashes, and a string constant enclosed in double quotes. If you are going to use a string constant, you have to understand that the string is in essence scanned -*twice*; the first time when `awk' reads your program, and the second +_twice_; the first time when `awk' reads your program, and the second time when it goes to match the string on the left-hand side of the operator with the pattern on the right. This is true of any string valued expression (such as `identifier_regexp' above), not just string @@ -2279,7 +2288,7 @@ command) or from files whose names you specify on the `awk' command line. If you specify input files, `awk' reads them in order, reading all the data from one before going on to the next. The name of the current input file can be found in the built-in variable `FILENAME' -(*note Built-in Variables::.). +(*note Built-in Variables::). The input is read in units called "records", and processed by the rules of your program one record at a time. By default, each record is @@ -2469,7 +2478,7 @@ separated or "parsed" by the interpreter into chunks called "fields". By default, fields are separated by whitespace, like words in a line. Whitespace in `awk' means any string of one or more spaces, tabs or newlines;(1) other characters such as formfeed, and so on, that are -considered whitespace by other languages are *not* considered +considered whitespace by other languages are _not_ considered whitespace by `awk'. The purpose of fields is to make it more convenient for you to refer @@ -2518,8 +2527,8 @@ field contains the string `foo'. The operator `~' is called a Usage.); it tests whether a string (here, the field `$1') matches a given regular expression. - By contrast, the following example looks for `foo' in *the entire -record* and prints the first field and the last field for each input + By contrast, the following example looks for `foo' in _the entire +record_ and prints the first field and the last field for each input record containing a match. $ awk '/foo/ { print $1, $NF }' BBS-list @@ -2577,7 +2586,7 @@ behave differently.) As mentioned in *Note Examining Fields: Fields, the number of fields in the current record is stored in the built-in variable `NF' (also -*note Built-in Variables::.). The expression `$NF' is not a special +*note Built-in Variables::). The expression `$NF' is not a special feature: it is the direct consequence of evaluating `NF' and using its value as a field number. @@ -2589,7 +2598,7 @@ Changing the Contents of a Field You can change the contents of a field as seen by `awk' within an `awk' program; this changes what `awk' perceives as the current input -record. (The actual input is untouched; `awk' *never* modifies the +record. (The actual input is untouched; `awk' _never_ modifies the input file.) Consider this example and its output: @@ -2648,11 +2657,11 @@ existing fields. This recomputation affects and is affected by `NF' (the number of fields; *note Examining Fields: Fields.), and by a feature that has not been discussed yet, the "output field separator", `OFS', which is used -to separate the fields (*note Output Separators::.). For example, the +to separate the fields (*note Output Separators::). For example, the value of `NF' is set to the number of the highest field you create. - Note, however, that merely *referencing* an out-of-range field does -*not* change the value of either `$0' or `NF'. Referencing an + Note, however, that merely _referencing_ an out-of-range field does +_not_ change the value of either `$0' or `NF'. Referencing an out-of-range field only produces an empty string. For example: if ($(NF+1) != "") @@ -2738,7 +2747,7 @@ would be split into three fields: `m', `*g' and `*gai*pan'. Note the leading spaces in the values of the second and third fields. The field separator is represented by the built-in variable `FS'. -Shell programmers take note! `awk' does *not* use the name `IFS' which +Shell programmers take note! `awk' does _not_ use the name `IFS' which is used by the POSIX compatible shells (such as the Bourne shell, `sh', or the GNU Bourne-Again Shell, Bash). @@ -2805,7 +2814,7 @@ example, the assignment: makes every area of an input line that consists of a comma followed by a space and a tab, into a field separator. (`\t' is an "escape sequence" -that stands for a tab; *note Escape Sequences::., for the complete list +that stands for a tab; *note Escape Sequences::, for the complete list of similar escape sequences.) For a less trivial example of a regular expression, suppose you want @@ -2893,7 +2902,7 @@ capital `F'. Contrast this with `-f', which specifies a file containing an `awk' program. Case is significant in command line options: the `-F' and `-f' options have nothing to do with each other. You can use both options at the same time to set the `FS' variable -*and* get an `awk' program from a file. +_and_ get an `awk' program from a file. The value used for the argument to `-F' is processed in exactly the same way as assignments to the built-in variable `FS'. This means that @@ -2906,7 +2915,7 @@ would have to type: Since `\' is used for quoting in the shell, `awk' will see `-F\\'. Then `awk' processes the `\\' for escape characters (*note Escape -Sequences::.), finally yielding a single `\' to be used for the field +Sequences::), finally yielding a single `\' to be used for the field separator. As a special case, in compatibility mode (*note Command Line @@ -2971,7 +2980,7 @@ should reflect the old value of `FS', not the new one. However, many implementations of `awk' do not work this way. Instead, they defer splitting the fields until a field is actually -referenced. The fields will be split using the *current* value of +referenced. The fields will be split using the _current_ value of `FS'! (d.c.) This behavior can be difficult to diagnose. The following example illustrates the difference between the two methods. (The `sed'(1) command prints just the first line of `/etc/passwd'.) @@ -3029,8 +3038,8 @@ numbers are run together; or in the output of programs that did not anticipate the use of their output as input for other programs. An example of the latter is a table where all the columns are lined -up by the use of a variable number of spaces and *empty fields are just -spaces*. Clearly, `awk''s normal field splitting based on `FS' will +up by the use of a variable number of spaces and _empty fields are just +spaces_. Clearly, `awk''s normal field splitting based on `FS' will not work well in this case. Although a portable `awk' program can use a series of `substr' calls on `$0' (*note Built-in Functions for String Manipulation: String Functions.), this is awkward and inefficient for a @@ -3039,7 +3048,7 @@ large number of fields. The splitting of an input record into fixed-width fields is specified by assigning a string containing space-separated numbers to the built-in variable `FIELDWIDTHS'. Each number specifies the width -of the field *including* columns between fields. If you want to ignore +of the field _including_ columns between fields. If you want to ignore the columns between fields, you can specify the width as a separate field that is subsequently ignored. @@ -3155,7 +3164,7 @@ second case, this special processing is not done (d.c.). separate the fields in the record. One way to do this is to divide each of the lines into fields in the normal manner. This happens by default as the result of a special feature: when `RS' is set to the empty -string, the newline character *always* acts as a field separator. This +string, the newline character _always_ acts as a field separator. This is in addition to whatever field separations result from `FS'. The original motivation for this special exception was probably to @@ -3181,7 +3190,6 @@ a mailing list in a file named `addresses', that looks like this: John Smith 456 Tree-lined Avenue Smallville, MW 98765-4321 - ... A simple program to process this file would look like this: @@ -3272,11 +3280,11 @@ File: gawk.info, Node: Getline Intro, Next: Plain Getline, Prev: Getline, Up Introduction to `getline' ------------------------- - This command is used in several different ways, and should *not* be + This command is used in several different ways, and should _not_ be used by beginners. It is covered here because this is the chapter on input. The examples that follow the explanation of the `getline' command include material that has not been covered yet. Therefore, -come back and study the `getline' command *after* you have reviewed the +come back and study the `getline' command _after_ you have reviewed the rest of this Info file and have a good knowledge of how `awk' works. `getline' returns one if it finds a record, and zero if the end of @@ -3298,7 +3306,7 @@ Using `getline' with No Arguments from the current input file. All it does in this case is read the next input record and split it up into fields. This is useful if you've finished processing the current record, but you want to do some special -processing *right now* on the next record. Here's an example: +processing _right now_ on the next record. Here's an example: awk '{ if ((t = index($0, "/*")) != 0) { @@ -3658,10 +3666,10 @@ relational operator; otherwise it could be confused with a redirection of the current record (such as `$1'), variables, or any `awk' expressions. Numeric values are converted to strings, and then printed. - The `print' statement is completely general for computing *what* -values to print. However, with two exceptions, you cannot specify *how* + The `print' statement is completely general for computing _what_ +values to print. However, with two exceptions, you cannot specify _how_ to print them--how many columns, whether to use exponential notation or -not, and so on. (For the exceptions, *note Output Separators::., and +not, and so on. (For the exceptions, *note Output Separators::, and *Note Controlling Numeric Output with `print': OFMT.) For that, you need the `printf' statement (*note Using `printf' Statements for Fancier Printing: Printf.). @@ -3689,7 +3697,7 @@ Examples of `print' Statements Here is an example of printing a string that contains embedded newlines (the `\n' is an escape sequence, used to represent the newline -character; *note Escape Sequences::.): +character; *note Escape Sequences::): $ awk 'BEGIN { print "line one\nline two\nline three" }' -| line one @@ -3954,6 +3962,11 @@ parameters to use, such as the field width. `s' This prints a string. +`u' + This prints an unsigned decimal number. (This format is of + marginal use, since all numbers in `awk' are floating point. It + is provided primarily for compatibility with C.) + `x' `X' This prints an unsigned hexadecimal integer. (In hexadecimal, or @@ -4180,7 +4193,7 @@ for `printf' also. This type of redirection prints the items into the output file OUTPUT-FILE. The file name OUTPUT-FILE can be any expression. Its value is changed to a string and then used as a file name - (*note Expressions::.). + (*note Expressions::). When this type of redirection is used, the OUTPUT-FILE is erased before the first output is written to it. Subsequent writes to @@ -4385,10 +4398,10 @@ these file names is done by `gawk' itself. For example, using `/dev/fd/4' for output will actually write on file descriptor 4, and not on a new file descriptor that was `dup''ed from file descriptor 4. Most of the time this does not matter; however, it is important to -*not* close any of the files related to file descriptors 0, 1, and 2. +_not_ close any of the files related to file descriptors 0, 1, and 2. If you do close one of these files, unpredictable behavior will result. - The special files that provide process-related information may + The special files that provide process-related information will disappear in a future version of `gawk'. *Note Probable Future Extensions: Future Extensions. @@ -4423,7 +4436,7 @@ or close(COMMAND) The argument FILENAME or COMMAND can be any expression. Its value -must *exactly* match the string that was used to open the file or start +must _exactly_ match the string that was used to open the file or start the command (spaces and other "irrelevant" characters included). For example, if you open a pipe with this: @@ -4518,13 +4531,13 @@ calls, as well as combinations of these with various operators. * Concatenation:: Concatenating strings. * Assignment Ops:: Changing the value of a variable or a field. * Increment Ops:: Incrementing the numeric value of a variable. -* Truth Values:: What is "true" and what is "false". +* Truth Values:: What is ``true'' and what is ``false''. * Typing and Comparison:: How variables acquire types, and how this affects comparison of numbers and strings with `<', etc. * Boolean Ops:: Combining comparison expressions using boolean - operators `||' ("or"), `&&' - ("and") and `!' ("not"). + operators `||' (``or''), `&&' + (``and'') and `!' (``not''). * Conditional Exp:: Conditional expressions select between two subexpressions under control of a third subexpression. @@ -4777,7 +4790,7 @@ the `awk' program in an array named `ARGV' (*note Using `ARGC' and `ARGV': ARGC and ARGV.). `awk' processes the values of command line assignments for escape -sequences (d.c.) (*note Escape Sequences::.). +sequences (d.c.) (*note Escape Sequences::). File: gawk.info, Node: Conversion, Next: Arithmetic Ops, Prev: Variables, Up: Expressions @@ -4811,14 +4824,14 @@ interpreted as valid numbers are converted to zero. The exact manner in which numbers are converted into strings is controlled by the `awk' built-in variable `CONVFMT' (*note Built-in -Variables::.). Numbers are converted using the `sprintf' function +Variables::). Numbers are converted using the `sprintf' function (*note Built-in Functions for String Manipulation: String Functions.) with `CONVFMT' as the format specifier. `CONVFMT''s default value is `"%.6g"', which prints a value with at least six significant digits. For some applications you will want to -change it to specify more precision. Double precision on most modern -machines gives you 16 or 17 decimal digits of precision. +change it to specify more precision. On most modern machines, you must +print 17 digits to capture a floating point number's value exactly. Strange results can happen if you set `CONVFMT' to a string that doesn't tell `sprintf' how to format floating point numbers in a useful @@ -4826,7 +4839,7 @@ way. For example, if you forget the `%' in the format, all numbers will be converted to the same constant string. As a special case, if a number is an integer, then the result of -converting it to a string is *always* an integer, no matter what the +converting it to a string is _always_ an integer, no matter what the value of `CONVFMT' may be. Given the following code fragment: CONVFMT = "%2.2f" @@ -4855,7 +4868,9 @@ Arithmetic Operators The `awk' language uses the common arithmetic operators when evaluating expressions. All of these arithmetic operators follow normal -precedence rules, and work as you would expect them to. +precedence rules, and work as you would expect them to. Arithmetic +operations are evaluated using double precision floating point, which +has the usual problems of inexactness and exceptions.(1) Here is a file `grades' containing a list of student names and three test scores per student (it's a small class): @@ -4892,8 +4907,9 @@ highest precedence to lowest: Multiplication. `X / Y' - Division. Since all numbers in `awk' are real numbers, the result - is not rounded to an integer: `3 / 4' has the value 0.75. + Division. Since all numbers in `awk' are floating point numbers, + the result is not rounded to an integer: `3 / 4' has the value + 0.75. `X % Y' Remainder. The quotient is rounded toward zero to an integer, @@ -4923,6 +4939,12 @@ highest precedence to lowest: operators all have the same precedence, and addition and subtraction have the same precedence. + ---------- Footnotes ---------- + + (1) David Goldberg, `What Every Computer Scientist Should Know About +Floating-point Arithmetic' (http://www.validgh.com/goldberg/paper.ps), +`ACM Computing Surveys' *23*, 1 (1991-03), 5-48. + File: gawk.info, Node: Concatenation, Next: Assignment Ops, Prev: Arithmetic Ops, Up: Expressions @@ -4932,6 +4954,8 @@ String Concatenation It seemed like a good idea at the time. Brian Kernighan + + There is only one string operation: concatenation. It does not have a specific operator to represent it. Instead, concatenation is performed by writing expressions next to one another, with no operator. @@ -5002,7 +5026,7 @@ makes itself felt through the alteration of the variable. We call this a "side effect". The left-hand operand of an assignment need not be a variable (*note -Variables::.); it can also be a field (*note Changing the Contents of a +Variables::); it can also be a field (*note Changing the Contents of a Field: Changing Fields.) or an array element (*note Arrays in `awk': Arrays.). These are all called "lvalues", which means they can appear on the left-hand side of an assignment operator. The right-hand @@ -5010,7 +5034,7 @@ operand may be any expression; it produces the new value which the assignment stores in the specified variable, field or array element. (Such values are called "rvalues"). - It is important to note that variables do *not* have permanent types. + It is important to note that variables do _not_ have permanent types. The type of a variable is simply the type of whatever value it happens to hold at the moment. In the following program fragment, the variable `foo' has a numeric value at first, and a string value later on: @@ -5031,7 +5055,7 @@ zero. After executing this code, the value of `foo' is five: (Note that using a variable as a number and then later as a string can be confusing and is poor programming style. The above examples -illustrate how `awk' works, *not* how you should write your own +illustrate how `awk' works, _not_ how you should write your own programs!) An assignment is an expression, so it has a value: the same value @@ -5066,7 +5090,7 @@ This is equivalent to the following: Use whichever one makes the meaning of your program clearer. There are situations where using `+=' (or any assignment operator) -is *not* the same as simply repeating the left-hand operand in the +is _not_ the same as simply repeating the left-hand operand in the right-hand expression. For example: # Thanks to Pat Rankin for this example @@ -5086,7 +5110,7 @@ will return different values each time it is called. (Arrays and the Arrays, and see *Note Numeric Built-in Functions: Numeric Functions, for more information). This example illustrates an important fact about the assignment operators: the left-hand expression is only -evaluated *once*. +evaluated _once_. It is also up to the implementation as to which expression is evaluated first, the left-hand one or the right-hand one. Consider @@ -5145,7 +5169,7 @@ The assignment expression `V += 1' is completely equivalent. Writing the `++' after the variable specifies post-increment. This increments the variable value just the same; the difference is that the -value of the increment expression itself is the variable's *old* value. +value of the increment expression itself is the variable's _old_ value. Thus, if `foo' has the value four, then the expression `foo++' has the value four, but it changes the value of `foo' to five. @@ -5173,7 +5197,7 @@ lvalue to pre-decrement or after it to post-decrement. `LVALUE++' This expression increments LVALUE, but the value of the expression - is the *old* value of LVALUE. + is the _old_ value of LVALUE. `--LVALUE' Like `++LVALUE', but instead of adding, it subtracts. It @@ -5181,7 +5205,7 @@ lvalue to pre-decrement or after it to post-decrement. `LVALUE--' Like `LVALUE++', but instead of adding, it subtracts. It - decrements LVALUE. The value of the expression is the *old* value + decrements LVALUE. The value of the expression is the _old_ value of LVALUE. @@ -5195,7 +5219,7 @@ concepts of "true" and "false." Such languages usually use the special constants `true' and `false', or perhaps their upper-case equivalents. `awk' is different. It borrows a very simple concept of true and -false from C. In `awk', any non-zero numeric value, *or* any non-empty +false from C. In `awk', any non-zero numeric value, _or_ any non-empty string value is true. Any other value (zero or the null string, `""') is false. The following program will print `A strange truth value' three times: @@ -5221,6 +5245,8 @@ Variable Typing and Comparison Expressions The Guide is definitive. Reality is frequently inaccurate. The Hitchhiker's Guide to the Galaxy + + Unlike other programming languages, `awk' variables do not have a fixed type. Instead, they can be either a number or a string, depending upon the value that is assigned to them. @@ -5275,7 +5301,7 @@ according to the following, symmetric, matrix: STRNUM | string numeric numeric --------+---------------------------------------------- - The basic idea is that user input that looks numeric, and *only* + The basic idea is that user input that looks numeric, and _only_ user input, should be treated as numeric, even though it is actually made of characters, and is therefore also a string. @@ -5397,7 +5423,7 @@ abbreviation for this comparison expression: $0 ~ /REGEXP/ - One special place where `/foo/' is *not* an abbreviation for `$0 ~ + One special place where `/foo/' is _not_ an abbreviation for `$0 ~ /foo/' is when it is the right-hand operand of `~' or `!~'! *Note Using Regular Expression Constants: Using Constant Regexps, where this is discussed in more detail. @@ -5443,7 +5469,7 @@ you can use one as a pattern to control the execution of rules. `BOOLEAN1 || BOOLEAN2' True if at least one of BOOLEAN1 or BOOLEAN2 is true. For example, the following statement prints all records in the input - that contain *either* `2400' or `foo', or both. + that contain _either_ `2400' or `foo', or both. if ($0 ~ /2400/ || $0 ~ /foo/) print @@ -5453,7 +5479,7 @@ you can use one as a pattern to control the execution of rules. `! BOOLEAN' True if BOOLEAN is false. For example, the following program - prints all records in the input file `BBS-list' that do *not* + prints all records in the input file `BBS-list' that do _not_ contain the string `foo'. awk '{ if (! ($0 ~ /foo/)) print }' BBS-list @@ -5512,9 +5538,9 @@ value becomes the value of the whole expression. x > 0 ? x : -x Each time the conditional expression is computed, exactly one of -IF-TRUE-EXP and IF-FALSE-EXP is computed; the other is ignored. This -is important when the expressions contain side effects. For example, -this conditional expression examines element `i' of either array `a' or +IF-TRUE-EXP and IF-FALSE-EXP is used; the other is ignored. This is +important when the expressions have side effects. For example, this +conditional expression examines element `i' of either array `a' or array `b', and increments `i'. x == y ? a[i++] : b[i++] @@ -5832,7 +5858,7 @@ that contain both `2400' and `foo'. -| fooey 555-1234 2400/1200/300 B The following command prints all records in `BBS-list' that contain -*either* `2400' or `foo', or both. +_either_ `2400' or `foo', or both. $ awk '/2400/ || /foo/' BBS-list -| alpo-net 555-3412 2400/1200/300 A @@ -5843,7 +5869,7 @@ that contain both `2400' and `foo'. -| sdace 555-3430 2400/1200/300 A -| sabafoo 555-2127 1200/300 C - The following command prints all records in `BBS-list' that do *not* + The following command prints all records in `BBS-list' that do _not_ contain the string `foo'. $ awk '! /foo/' BBS-list @@ -5964,7 +5990,7 @@ been read. For example: that contain the string `foo'. The `BEGIN' rule prints a title for the report. There is no need to use the `BEGIN' rule to initialize the counter `n' to zero, as `awk' does this automatically (*note -Variables::.). +Variables::). The second rule increments the variable `n' every time a record containing the pattern `foo' is read. The `END' rule prints the value @@ -6026,7 +6052,7 @@ it. The second point is similar to the first, but from the other direction. Inside an `END' rule, what is the value of `$0' and `NF'? Traditionally, due largely to implementation issues, `$0' and `NF' were -*undefined* inside an `END' rule. The POSIX standard specified that +_undefined_ inside an `END' rule. The POSIX standard specified that `NF' was available in an `END' rule, containing the number of fields from the last input record. Due most probably to an oversight, the standard does not say that `$0' is also preserved, although logically @@ -6050,7 +6076,7 @@ File: gawk.info, Node: Empty, Prev: BEGIN/END, Up: Pattern Overview The Empty Pattern ----------------- - An empty (i.e. non-existent) pattern is considered to match *every* + An empty (i.e. non-existent) pattern is considered to match _every_ input record. For example, the program: awk '{ print $1 }' BBS-list @@ -6093,7 +6119,7 @@ well. An omitted action is equivalent to `{ print $0 }'. Here are the kinds of statements supported in `awk': * Expressions, which can call functions or assign values to variables - (*note Expressions::.). Executing this kind of statement simply + (*note Expressions::). Executing this kind of statement simply computes the value of the expression. This is useful when the expression has side effects (*note Assignment Expressions: Assignment Ops.). @@ -6720,7 +6746,7 @@ specific to `gawk' are marked with an asterisk, `*'. the `gensub', `gsub', `index', `match', `split' and `sub' functions, record termination with `RS', and field splitting with `FS' all ignore case when doing their particular regexp operations. - The value of `IGNORECASE' does *not* affect array subscripting. + The value of `IGNORECASE' does _not_ affect array subscripting. *Note Case-sensitivity in Matching: Case-sensitivity. If `gawk' is in compatibility mode (*note Command Line Options: @@ -6738,7 +6764,7 @@ specific to `gawk' are marked with an asterisk, `*'. general expressions; this is now done by `CONVFMT'. `OFS' - This is the output field separator (*note Output Separators::.). + This is the output field separator (*note Output Separators::). It is output between the fields output by a `print' statement. Its default value is `" "', a string consisting of a single space. @@ -6947,7 +6973,7 @@ In this example, `ARGV[0]' contains `"awk"', `ARGV[1]' contains special command line options, with their arguments, are also not entered. This includes variable assignments done with the `-v' option (*note Command Line Options: Options.). Normal variable assignments on -the command line *are* treated as arguments, and do show up in the +the command line _are_ treated as arguments, and do show up in the `ARGV' array. $ cat showargs.awk @@ -7056,6 +7082,7 @@ array with the same name in the same `awk' program. * Multi-dimensional:: Emulating multi-dimensional arrays in `awk'. * Multi-scanning:: Scanning multi-dimensional arrays. +* Array Efficiency:: Implementation-specific tips. File: gawk.info, Node: Array Intro, Next: Reference to Elements, Prev: Arrays, Up: Arrays @@ -7193,9 +7220,9 @@ index `2', you could write this statement: if (2 in frequencies) print "Subscript 2 is present." - Note that this is *not* a test of whether or not the array -`frequencies' contains an element whose *value* is two. (There is no -way to do that except to scan all the elements.) Also, this *does not* + Note that this is _not_ a test of whether or not the array +`frequencies' contains an element whose _value_ is two. (There is no +way to do that except to scan all the elements.) Also, this _does not_ create `frequencies[2]', while the following (incorrect) alternative would do so: @@ -7365,7 +7392,7 @@ the presence of that element will return zero (i.e. false): if (4 in foo) print "This will never be printed" - It is important to note that deleting an element is *not* the same + It is important to note that deleting an element is _not_ the same as assigning it a null value (the empty string, `""'). foo[4] = "" @@ -7410,8 +7437,8 @@ File: gawk.info, Node: Numeric Array Subscripts, Next: Uninitialized Subscript Using Numbers to Subscript Arrays ================================= - An important aspect of arrays to remember is that *array subscripts -are always strings*. If you use a numeric value as a subscript, it + An important aspect of arrays to remember is that _array subscripts +are always strings_. If you use a numeric value as a subscript, it will be converted to a string value before it is used for subscripting (*note Conversion of Strings and Numbers: Conversion.). @@ -7476,7 +7503,7 @@ the output! At first glance, this program should have worked. The variable `lines' is uninitialized, and uninitialized variables have the numeric -value zero. So, the value of `l[0]' should have been printed. +value zero. So, `awk' should have printed the value of `l[0]'. The issue here is that subscripts for `awk' arrays are *always* strings. And uninitialized variables, when used as strings, have the @@ -7585,7 +7612,7 @@ it produces: 3 2 1 6 -File: gawk.info, Node: Multi-scanning, Prev: Multi-dimensional, Up: Arrays +File: gawk.info, Node: Multi-scanning, Next: Array Efficiency, Prev: Multi-dimensional, Up: Arrays Scanning Multi-dimensional Arrays ================================= @@ -7593,7 +7620,7 @@ Scanning Multi-dimensional Arrays There is no special `for' statement for scanning a "multi-dimensional" array; there cannot be one, because in truth there are no multi-dimensional arrays or elements; there is only a -multi-dimensional *way of accessing* an array. +multi-dimensional _way of accessing_ an array. However, if your program has an array that is always accessed as multi-dimensional, you can get the effect of scanning it by combining @@ -7626,6 +7653,34 @@ to `"foo"'. Presto, the original sequence of separate indices has been recovered. +File: gawk.info, Node: Array Efficiency, Prev: Multi-scanning, Up: Arrays + +Using Array Memory Efficiently +============================== + + This section applies just to `gawk'. + + It is often useful to use the same bit of data as an index into +multiple arrays. Due to the way `gawk' implements associative arrays, +when you need to use input data as an index for multiple arrays, it is +much more effecient to assign the input field to a separate variable, +and then use that variable as the index. + + { + name = $1 + ssn = $2 + nkids = $3 + ... + seniority[name]++ # better than seniority[$1]++ + kids[name] = nkids # better than kids[$1] = nkids + } + + Using separate variables with mnemonic names for the input fields +makes programs more readable, in any case. It is an eventual goal to +make `gawk''s array indexing as efficient as possible, no matter what +the source of the index value. + + File: gawk.info, Node: Built-in, Next: User-defined, Prev: Arrays, Up: Top Built-in Functions @@ -7745,9 +7800,9 @@ Optional parameters are enclosed in square brackets ("[" and "]"). return int(n * rand()) } - The multiplication produces a random real number greater than zero - and less than `n'. We then make it an integer (using `int') - between zero and `n' - 1, inclusive. + The multiplication produces a random number greater than zero and + less than `n'. We then make it an integer (using `int') between + zero and `n' - 1, inclusive. Here is an example where a similar function is used to produce random integers between one and N. This program prints a new @@ -7968,10 +8023,10 @@ and "]"). awk 'BEGIN { str = "daabaaa" - sub(/a*/, "c&c", str) + sub(/a+/, "C&C", str) print str }' - -| dcaacbaaa + -| dCaaCbaaa This shows how `&' can represent a non-constant string, and also illustrates the "leftmost, longest" rule in regexp matching (*note @@ -8007,7 +8062,7 @@ and "]"). `gsub(REGEXP, REPLACEMENT [, TARGET])' This is similar to the `sub' function, except `gsub' replaces - *all* of the longest, leftmost, *non-overlapping* matching + _all_ of the longest, leftmost, _non-overlapping_ matching substrings it can find. The `g' in `gsub' stands for "global," which means replace everywhere. For example: @@ -8028,7 +8083,7 @@ and "]"). `gsub', it searches the target string TARGET for matches of the regular expression REGEXP. Unlike `sub' and `gsub', the modified string is returned as the result of the function, and the original - target string is *not* changed. If HOW is a string beginning with + target string is _not_ changed. If HOW is a string beginning with `g' or `G', then it replaces all matches of REGEXP with REPLACEMENT. Otherwise, HOW is a number indicating which match of REGEXP to replace. If no TARGET is supplied, `$0' is used instead. @@ -8087,7 +8142,7 @@ and "]"). also returned if LENGTH is greater than the number of characters remaining in the string, counting from character number START. - *Note:* The string returned by `substr' *cannot* be assigned to. + *Note:* The string returned by `substr' _cannot_ be assigned to. Thus, it is a mistake to attempt to change a portion of a string, like this: @@ -8183,7 +8238,7 @@ leads to two problems. 1. Backslashes must now be doubled in the REPLACEMENT string, breaking historical `awk' programs. - 2. To make sure that an `awk' program is portable, *every* character + 2. To make sure that an `awk' program is portable, _every_ character in the REPLACEMENT string must be preceded with a backslash.(1) The POSIX standard is under revision.(2) Because of the above @@ -8235,8 +8290,8 @@ the use of `gawk' and `gensub' for when you have to do substitutions. (1) This consequence was certainly unintended. - (2) As of April, 1999, with final approval and publication hopefully -sometime in 1997. + (2) As of June, 2000, with final approval and publication as part of +the Austin Group Standards hopefully sometime in 2001. File: gawk.info, Node: I/O Functions, Next: Time Functions, Prev: String Functions, Up: Built-in @@ -8278,7 +8333,7 @@ parameters are enclosed in square brackets ("[" and "]"). `gawk' extends the `fflush' function in two ways. The first is to allow no argument at all. In this case, the buffer for the standard output is flushed. The second way is to allow the null - string (`""') as the argument. In this case, the buffers for *all* + string (`""') as the argument. In this case, the buffers for _all_ open output files and pipes are flushed. `fflush' returns zero if the buffer was successfully flushed, and @@ -8671,7 +8726,7 @@ User-defined Functions Complicated `awk' programs can often be simplified by defining your own functions. User-defined functions can be called just like built-in -ones (*note Function Calls::.), but it is up to you to define them--to +ones (*note Function Calls::), but it is up to you to define them--to tell `awk' what they should do. * Menu: @@ -8690,7 +8745,7 @@ Function Definition Syntax Definitions of functions can appear anywhere between the rules of an `awk' program. Thus, the general form of an `awk' program is extended -to include sequences of rules *and* user-defined function definitions. +to include sequences of rules _and_ user-defined function definitions. There is no need in `awk' to put the definition of a function before all uses of the function. This is because `awk' reads the entire program before starting to execute any of it. @@ -8715,7 +8770,7 @@ cannot have two parameters with the same name. The BODY-OF-FUNCTION consists of `awk' statements. It is the most important part of the definition, because it says what the function -should actually *do*. The argument names exist to give the body a way +should actually _do_. The argument names exist to give the body a way to talk about the arguments; local variables, to give the body places to keep temporary values. @@ -8756,7 +8811,7 @@ function. When this happens, we say the function is "recursive". `function' may be abbreviated `func'. However, POSIX only specifies the use of the keyword `function'. This actually has some practical implications. If `gawk' is in POSIX-compatibility mode (*note Command -Line Options: Options.), then the following statement will *not* define +Line Options: Options.), then the following statement will _not_ define a function: func foo() { a = sqrt($1) ; print a } @@ -8813,7 +8868,9 @@ this program, using our function to format the results, prints: elements in an array and start over with a new list of elements (*note The `delete' Statement: Delete.). Instead of having to repeat this loop everywhere in your program that you need to clear out an array, -your program can just call `delarray'. +your program can just call `delarray'. (This guarantees portability. +The usage `delete ARRAY' to delete the contents of an entire array is a +non-standard extension.) Here is an example of a recursive function. It takes a string as an input parameter, and returns the string in backwards order. @@ -8877,7 +8934,7 @@ concatenate a variable with an expression in parentheses. However, it notices that you used a function name and not a variable name, and reports an error. - When a function is called, it is given a *copy* of the values of its + When a function is called, it is given a _copy_ of the values of its arguments. This is known as "call by value". The caller may use a variable as the expression for the argument, but the called function does not know this: it only knows what value the argument had. For @@ -8900,18 +8957,18 @@ this has no effect on any other variables. Thus, if `myfunc' does this: print str } -to change its first argument variable `str', this *does not* change the +to change its first argument variable `str', this _does not_ change the value of `foo' in the caller. The role of `foo' in calling `myfunc' ended when its value, `"bar"', was computed. If `str' also exists outside of `myfunc', the function body cannot alter this outer value, because it is shadowed during the execution of `myfunc' and cannot be seen or changed from there. - However, when arrays are the parameters to functions, they are *not* + However, when arrays are the parameters to functions, they are _not_ copied. Instead, the array itself is made available for direct manipulation by the function. This is usually called "call by reference". Changes made to an array parameter inside the body of a -function *are* visible outside that function. This can be *very* +function _are_ visible outside that function. This can be *very* dangerous if you do not watch what you are doing. For example: function changeit(array, ind, nvalue) @@ -8972,7 +9029,7 @@ value is undefined and, therefore, unpredictable. A `return' statement with no value expression is assumed at the end of every function definition. So if control reaches the end of the function body, then the function returns an unpredictable value. `awk' -will *not* warn you if you use the return value of such a function. +will _not_ warn you if you use the return value of such a function. Sometimes, you want to write a function for what it does, not for what it returns. Such a function corresponds to a `void' function in C @@ -9108,6 +9165,11 @@ The options and their meanings are as follows: than once, setting another variable each time, like this: `awk -v foo=1 -v bar=2 ...'. + *Caution:* Using `-v' to set the values of the builtin variables + may lead to suprising results. `awk' will reset the values of + those variables as it needs to, possibly ignoring any predefined + value you may have given. + `-mf NNN' `-mr NNN' Set various memory limits to the value NNN. The `f' flag sets the @@ -9179,7 +9241,7 @@ The options and their meanings are as follows: restrictions: * `\x' escape sequences are not recognized (*note Escape - Sequences::.). + Sequences::). * Newlines do not act as whitespace to separate fields when `FS' is equal to a single space. @@ -9297,7 +9359,7 @@ argument that has the form `VAR=VALUE', assigns the value VALUE to the variable VAR--it does not specify a file at all. All these arguments are made available to your `awk' program in the -`ARGV' array (*note Built-in Variables::.). Command line options and +`ARGV' array (*note Built-in Variables::). Command line options and the program text (if present) are omitted from `ARGV'. All other arguments, including variable assignments, are included. As each element of `ARGV' is processed, `gawk' sets the variable `ARGIND' to @@ -9311,15 +9373,15 @@ reading a file. Therefore, the variables actually receive the given values after all previously specified files have been read. In particular, the values of -variables assigned in this fashion are *not* available inside a `BEGIN' +variables assigned in this fashion are _not_ available inside a `BEGIN' rule (*note The `BEGIN' and `END' Special Patterns: BEGIN/END.), since such rules are run before `awk' begins scanning the argument list. The variable values given on the command line are processed for -escape sequences (d.c.) (*note Escape Sequences::.). +escape sequences (d.c.) (*note Escape Sequences::). In some earlier implementations of `awk', when a variable assignment -occurred before any file names, the assignment would happen *before* +occurred before any file names, the assignment would happen _before_ the `BEGIN' rule was executed. `awk''s behavior was thus inconsistent; some command line assignments were available inside the `BEGIN' rule, while others were not. However, some applications came to depend upon @@ -9392,11 +9454,10 @@ path `gawk' will use. ---------- Footnotes ---------- - (1) Your version of `gawk' may use a directory that is different -than `/usr/local/share/awk'; it will depend upon how `gawk' was built -and installed. The actual directory will be the value of `$(datadir)' -generated when `gawk' was configured. You probably don't need to worry -about this though. + (1) Your version of `gawk' may use a different directory; it will +depend upon how `gawk' was built and installed. The actual directory +will be the value of `$(datadir)' generated when `gawk' was configured. +You probably don't need to worry about this though. File: gawk.info, Node: Obsolete, Next: Undocumented, Prev: AWKPATH Variable, Up: Invoking Gawk @@ -9407,9 +9468,9 @@ Obsolete Options and/or Features This section describes features and/or command line options from previous releases of `gawk' that are either not available in the current version, or that are still supported but deprecated (meaning -that they will *not* be in the next release). +that they will _not_ be in the next release). - For version 3.0.4 of `gawk', there are no command line options or + For version 3.0.5 of `gawk', there are no command line options or other deprecated features from the previous version of `gawk'. This node is thus essentially a place holder, in case some option becomes obsolete in a future version of `gawk'. @@ -9423,6 +9484,8 @@ Undocumented Options and Features Use the Source, Luke! Obi-Wan + + This section intentionally left blank. @@ -9570,7 +9633,7 @@ file is reached, and a new data file is opened, changing the value of and then executes a `next' statement to start the loop going.(1) This initial version has a subtle problem. What happens if the same -data file is listed *twice* on the command line, one right after the +data file is listed _twice_ on the command line, one right after the other, or even with just a variable assignment between the two occurrences of the file name? @@ -9945,7 +10008,8 @@ canonical representation of a date into a timestamp. It would appear at first glance that `gawk' would have to supply a `mktime' built-in function that was simply a "hook" to the C language -version. In fact though, `mktime' can be implemented entirely in `awk'. +version. In fact though, `mktime' can be implemented entirely in +`awk'.(1) Here is a version of `mktime' for `awk'. It takes a simple representation of the date and time, and converts it into a timestamp. @@ -10055,7 +10119,7 @@ efficient. } The function starts with a first approximation of all the seconds -between Midnight, January 1, 1970,(1) and the beginning of the current +between Midnight, January 1, 1970,(2) and the beginning of the current year. It then goes through all those years, and for every leap year, adds an additional day's worth of seconds. @@ -10130,7 +10194,7 @@ set-up and error checking. Recall that `_tm_addup' generated a value in seconds since Midnight, January 1, 1970. This value is not directly usable as the result we -want, *since the calculation does not account for the local timezone*. +want, _since the calculation does not account for the local timezone_. In other words, the value represents the count in seconds since the Epoch, but only for UTC (Universal Coordinated Time). If the local timezone is east or west of UTC, then some number of hours should be @@ -10146,8 +10210,8 @@ the result. How can `mktime' determine how far away it is from UTC? This is surprisingly easy. The returned timestamp represents the time passed to -`mktime' *as UTC*. This timestamp can be fed back to `strftime', which -will format it as a *local* time; i.e. as if it already had the UTC +`mktime' _as UTC_. This timestamp can be fed back to `strftime', which +will format it as a _local_ time; i.e. as if it already had the UTC difference added in to it. This is done by giving `"%Y %m %d %H %M %S"' to `strftime' as the format argument. It returns the computed timestamp in the original string format. The result @@ -10163,7 +10227,6 @@ An example demonstrating this is presented below. if (_tm_test) { printf "Enter date as yyyy mm dd hh mm ss: " getline _tm_test_date - t = mktime(_tm_test_date) r = strftime("%Y %m %d %H %M %S", t) printf "Got back (%s)\n", r @@ -10199,7 +10262,11 @@ months, and AM/PM times into 24-hour clocks, to generate the ---------- Footnotes ---------- - (1) This is the Epoch on POSIX systems. It may be different on + (1) June, 2000: Actually, I was mistaken when I wrote this. The +version presented here doesn't always work correctly, and the next +major version of `gawk' will provide `mktime' as a built-in function. + + (2) This is the Epoch on POSIX systems. It may be different on other systems. @@ -10310,7 +10377,7 @@ even supplied us the code to do so. library program. It arranges to call two user-supplied functions, `beginfile' and `endfile', at the beginning and end of each data file. Besides solving the problem in only nine(!) lines of code, it does so -*portably*; this will work with any implementation of `awk'. +_portably_; this will work with any implementation of `awk'. # transfile.awk # @@ -11205,7 +11272,7 @@ that it is global, while the fact that the variable name is not all capital letters indicates that the variable is not one of `awk''s built-in variables, like `FS'. - It is also important that *all* variables in library functions that + It is also important that _all_ variables in library functions that do not need to save state are in fact declared local. If this is not done, the variable could accidentally be used in the user's program, leading to bugs that are very difficult to track down. @@ -11568,7 +11635,7 @@ is preceded by the name of the file and a colon. `-v' Invert the sense of the test. `egrep' prints the lines that do - *not* match the pattern, and exits successfully if the pattern was + _not_ match the pattern, and exits successfully if the pattern was not matched. `-i' @@ -11590,7 +11657,7 @@ Function.). The program begins with a descriptive comment, and then a `BEGIN' rule that processes the command line arguments with `getopt'. The `-i' (ignore case) option is particularly easy with `gawk'; we just use the -`IGNORECASE' built in variable (*note Built-in Variables::.). +`IGNORECASE' built in variable (*note Built-in Variables::). # egrep.awk --- simulate egrep in awk # Arnold Robbins, arnold@gnu.org, Public Domain @@ -12139,7 +12206,7 @@ standard output, `/dev/stdout'. # -n skip n fields # +n skip n characters, skip fields first - BEGIN \ + BEGIN \ { count = 1 outputfile = "/dev/stdout" @@ -12367,8 +12434,7 @@ totals of lines, words, and characters. It then prints out those numbers for the file that was just read. It relies on `beginfile' to reset the numbers for the following data file. - function beginfile(file) - { + function beginfile(file) { chars = lines = words = 0 fname = FILENAME } @@ -12735,7 +12801,7 @@ the lists be enclosed in square brackets and quoted. This is a feature. (2) This program was written before `gawk' acquired the ability to split each character in a string into separate array elements. How -might this ability simplify the program? +might you use this new feature to simplify the program? File: gawk.info, Node: Labels Program, Next: Word Sorting, Prev: Translate Program, Up: Miscellaneous Programs @@ -12873,7 +12939,7 @@ program listing. rules. The first rule, because it has an empty pattern, is executed on every line of the input. It uses `awk''s field-accessing mechanism (*note Examining Fields: Fields.) to pick out the individual words from -the line, and the built-in variable `NF' (*note Built-in Variables::.) +the line, and the built-in variable `NF' (*note Built-in Variables::) to know how many fields are available. For each input word, an element of the array `freq' is incremented to @@ -12961,7 +13027,7 @@ Removing Duplicates from Unsorted Text -------------------------------------- The `uniq' program (*note Printing Non-duplicated Lines of Text: -Uniq Program.), removes duplicate lines from *sorted* data. +Uniq Program.), removes duplicate lines from _sorted_ data. Suppose, however, you need to remove duplicate lines from a data file, but that you wish to preserve the order the lines are in? A good @@ -13085,8 +13151,7 @@ exited with a zero exit status, signifying OK. # extract.awk --- extract files and run programs # from texinfo files - # Arnold Robbins, arnold@gnu.org, Public Domain - # May 1993 + # Arnold Robbins, arnold@gnu.org, Public Domain, May 1993 BEGIN { IGNORECASE = 1 } @@ -13398,7 +13463,6 @@ are several cases of interest. The source text is echoed into `/tmp/ig.s.$$'. `--version' -`--version' `-Wversion' `igawk' prints its version number, and runs `gawk --version' to get the `gawk' version information, and then exits. @@ -13713,7 +13777,7 @@ changes, with cross-references to further details. Functions for Input/Output: I/O Functions.). * The `ARGC', `ARGV', `FNR', `RLENGTH', `RSTART', and `SUBSEP' - built-in variables (*note Built-in Variables::.). + built-in variables (*note Built-in Variables::). * The conditional expression using the ternary operator `?:' (*note Conditional Expressions: Conditional Exp.). @@ -13735,7 +13799,7 @@ changes, with cross-references to further details. How to Use Regular Expressions: Regexp Usage.). * The escape sequences `\b', `\f', and `\r' (*note Escape - Sequences::.). (Some vendors have updated their old versions of + Sequences::). (Some vendors have updated their old versions of `awk' to recognize `\r', `\b', and `\f', but this is not something you can rely on.) @@ -13757,7 +13821,7 @@ Changes between SVR3.1 and SVR4 The System V Release 4 version of Unix `awk' added these features (some of which originated in `gawk'): - * The `ENVIRON' variable (*note Built-in Variables::.). + * The `ENVIRON' variable (*note Built-in Variables::). * Multiple `-f' options on the command line (*note Command Line Options: Options.). @@ -13768,7 +13832,7 @@ Changes between SVR3.1 and SVR4 * The `--' option for terminating command line options. * The `\a', `\v', and `\x' escape sequences (*note Escape - Sequences::.). + Sequences::). * A defined return value for the `srand' built-in function (*note Numeric Built-in Functions: Numeric Functions.). @@ -13814,7 +13878,7 @@ introduced the following changes into the language: standard: * `\x' escape sequences are not recognized (*note Escape - Sequences::.). + Sequences::). * Newlines do not act as whitespace to separate fields when `FS' is equal to a single space. @@ -13852,6 +13916,7 @@ describes extensions in his version of `awk' that are not in POSIX * The `fflush' built-in function for flushing buffered output (*note Built-in Functions for Input/Output: I/O Functions.). + File: gawk.info, Node: POSIX/GNU, Prev: BTL, Up: Language History @@ -13902,11 +13967,11 @@ all be disabled with either the `--traditional' or `--posix' options Version 2.15 of `gawk' introduced these features: * The `ARGIND' variable, that tracks the movement of `FILENAME' - through `ARGV' (*note Built-in Variables::.). + through `ARGV' (*note Built-in Variables::). * The `ERRNO' variable, that contains the system error message when `getline' returns -1, or when `close' fails (*note Built-in - Variables::.). + Variables::). * The ability to use GNU-style long named options that start with `--' (*note Command Line Options: Options.). @@ -13967,6 +14032,7 @@ all be disabled with either the `--traditional' or `--posix' options * Amiga support (*note Installing `gawk' on an Amiga: Amiga Installation.). + File: gawk.info, Node: Gawk Summary, Next: Installation, Prev: Language History, Up: Top @@ -13986,7 +14052,7 @@ It is therefore terse, but complete. parts. * Actions Summary:: Quick overview of actions. * Functions Summary:: Defining and calling functions. -* Historical Features:: Some undocumented but supported "features". +* Historical Features:: Some undocumented but supported ``features''. File: gawk.info, Node: Command Line Summary, Next: Language Summary, Prev: Gawk Summary, Up: Gawk Summary @@ -14264,7 +14330,7 @@ Built-in Variables `!~', and the `gensub', `gsub', `index', `match', `split' and `sub' built-in functions all ignore case when doing regular expression operations, and all string comparisons are done - ignoring case. The value of `IGNORECASE' does *not* affect array + ignoring case. The value of `IGNORECASE' does _not_ affect array subscripting. `NF' @@ -14315,7 +14381,7 @@ Arrays ------ Arrays are subscripted with an expression between square brackets -(`[' and `]'). Array subscripts are *always* strings; numbers are +(`[' and `]'). Array subscripts are _always_ strings; numbers are converted to strings as necessary, following the standard conversion rules (*note Conversion of Strings and Numbers: Conversion.). @@ -14485,7 +14551,7 @@ Regular Expressions Regular expressions are based on POSIX EREs (extended regular expressions). The escape sequences allowed in string constants are -also valid in regular expressions (*note Escape Sequences::.). Regexps +also valid in regular expressions (*note Escape Sequences::). Regexps are composed of characters as follows: `C' @@ -14496,7 +14562,7 @@ are composed of characters as follows: matches the literal character C. `.' - matches any character, *including* newline. In strict POSIX mode, + matches any character, _including_ newline. In strict POSIX mode, `.' does not match the NUL character, which is a character with all bits equal to zero. @@ -14794,7 +14860,7 @@ I/O Statements `printf FMT, EXPR-LIST' Format and print. -`printf FMT, EXPR-LIST > file' +`printf FMT, EXPR-LIST > FILE' Format and print to FILE. If FILE does not exist, it is created. If it does exist, its contents are deleted the first time the `printf' is executed. @@ -14849,7 +14915,10 @@ following conversion specification formats: instead of `%e'. `%o' - An unsigned octal number (again, an integer). + An unsigned octal number (also an integer). + +`%u' + An unsigned decimal number (again, an integer). `%s' A character string. @@ -15291,7 +15360,8 @@ Getting the `gawk' Distribution Boston, MA 02111-1307 USA Phone: +1-617-542-5942 Fax (including Japan): +1-617-542-2652 - E-mail: `gnu@gnu.org' + Email: `gnu@gnu.org' + URL: `http://www.gnu.org/' Ordering from the FSF directly contributes to the support of the foundation and to the production of more free software. @@ -15382,21 +15452,21 @@ Extracting the Distribution `gawk' is distributed as a `tar' file compressed with the GNU Zip program, `gzip'. - Once you have the distribution (for example, `gawk-3.0.4.tar.gz'), + Once you have the distribution (for example, `gawk-3.0.5.tar.gz'), first use `gzip' to expand the file, and then use `tar' to extract it. You can use the following pipeline to produce the `gawk' distribution: # Under System V, add 'o' to the tar flags - gzip -d -c gawk-3.0.4.tar.gz | tar -xvpf - + gzip -d -c gawk-3.0.5.tar.gz | tar -xvpf - -This will create a directory named `gawk-3.0.4' in the current +This will create a directory named `gawk-3.0.5' in the current directory. The distribution file name is of the form `gawk-V.R.N.tar.gz'. The V represents the major version of `gawk', the R represents the current release of version V, and the N represents a "patch level", meaning that minor bugs have been fixed in the release. The current patch -level is 4, but when retrieving distributions, you should get the +level is 5, but when retrieving distributions, you should get the version with the highest version, release, and patch level. (Note that release levels greater than or equal to 90 denote "beta," or non-production software; you may not wish to retrieve such a version @@ -15569,7 +15639,7 @@ Compiling `gawk' for Unix ------------------------- After you have extracted the `gawk' distribution, `cd' to -`gawk-3.0.4'. Like most GNU software, `gawk' is configured +`gawk-3.0.5'. Like most GNU software, `gawk' is configured automatically for your Unix system by running the `configure' program. This program is a Bourne shell script that was generated automatically using GNU `autoconf'. (The `autoconf' software is described fully @@ -15755,7 +15825,7 @@ Running `gawk' on VMS Command line parsing and quoting conventions are significantly different on VMS, so examples in this Info file or from other sources -often need minor changes. They *are* minor though, and all `awk' +often need minor changes. They _are_ minor though, and all `awk' programs should run correctly. Here are a couple of trivial tests: @@ -15855,7 +15925,7 @@ MS-DOS and OS/2. The file `README_d/README.pc' in the `gawk' distribution contains additional notes, and `pc/Makefile' contains important notes on compilation options. - To build `gawk', copy the files in the `pc' directory (*except* for + To build `gawk', copy the files in the `pc' directory (_except_ for `ChangeLog') to the directory with the rest of the `gawk' sources. The `Makefile' contains a configuration section with comments, and may need to be edited in order to work with your `make' utility. @@ -15991,7 +16061,7 @@ function, which may not support this convention. Whenever it is possible that a file created by `gawk' will be used by some other program, use only backslashes. Also remember that in `awk', backslashes in strings have to be doubled in order to get literal -backslashes (*note Escape Sequences::.). +backslashes (*note Escape Sequences::). File: gawk.info, Node: Amiga Installation, Next: Bugs, Prev: Atari Installation, Up: Installation @@ -16036,6 +16106,8 @@ Reporting Problems and Bugs There is nothing more dangerous than a bored archeologist. The Hitchhiker's Guide to the Galaxy + + If you have problems with `gawk' or think that you have found a bug, please report it to the developers; we cannot promise to do anything but we might well want to fix it. @@ -16053,7 +16125,7 @@ idea of what kind of Unix system you're using, and the exact results `gawk' gave you. Also say what you expected to occur; this will help us decide whether the problem was really in the documentation. - Once you have a precise problem, there are two e-mail addresses you + Once you have a precise problem, there are two email addresses you can send mail to. Internet: @@ -16067,7 +16139,7 @@ get this information with the command `gawk --version'. You should send a carbon copy of your mail to Arnold Robbins, who can be reached at `arnold@gnu.org'. - *Important!* Do *not* try to report bugs in `gawk' by posting to the + *Important!* Do _not_ try to report bugs in `gawk' by posting to the Usenet/Internet newsgroup `comp.lang.awk'. While the `gawk' developers do occasionally read this newsgroup, there is no guarantee that we will see your posting. The steps described above are the official, @@ -16117,6 +16189,8 @@ Other Freely Available `awk' Implementations `// Do C++ comments work? answer: yes! of course' Michael Brennan + + There are two other freely available `awk' implementations. This section briefly describes where to get them. @@ -16129,8 +16203,8 @@ Unix `awk' This is a shell archive that has been compressed with the GNU `gzip' utility. It can be uncompressed with the `gunzip' utility. - You can also retrieve this version via the World Wide Web from - Brian Kernighan's home page (http://cm.bell-labs.com/who/bwk). + You can also retrieve this version via the World Wide Web from his + home page (http://cm.bell-labs.com/who/bwk). This version requires an ANSI C compiler; GCC (the GNU C compiler) works quite nicely. @@ -16228,7 +16302,7 @@ make it possible for me to include your changes. 2. See *note (Version)Top:: standards, GNU Coding Standards. This document describes how GNU software should be written. If you - haven't read it, please do so, preferably *before* starting to + haven't read it, please do so, preferably _before_ starting to modify `gawk'. (The `GNU Coding Standards' are available as part of the Autoconf distribution, from the FSF.) @@ -16293,7 +16367,7 @@ make it possible for me to include your changes. FSF to distribute your changes, you must either place those changes in the public domain, and submit a signed statement to that effect, or assign the copyright in your changes to the FSF. Both - of these actions are easy to do, and *many* people have done so + of these actions are easy to do, and _many_ people have done so already. If you have questions, please contact me (*note Reporting Problems and Bugs: Bugs.), or `gnu@gnu.org'. @@ -16322,6 +16396,10 @@ make it possible for me to include your changes. have to apply the changes manually, using a text editor, I may not do so, particularly if there are lots of changes. + 7. Include an entry for the `ChangeLog' file with your submission. + This further helps minimize the amount of work I have to do, + making it easier for me to accept patches. + Although this sounds like a lot of work, please remember that while you may write the new code, I have to maintain it and support it, and if it isn't possible for me to do that with a minimum of extra work, @@ -16398,7 +16476,7 @@ several steps to follow. FSF to distribute your code, you must either place your code in the public domain, and submit a signed statement to that effect, or assign the copyright in your code to the FSF. Both of these - actions are easy to do, and *many* people have done so already. If + actions are easy to do, and _many_ people have done so already. If you have questions, please contact me, or `gnu@gnu.org'. Following these steps will make it much easier to integrate your @@ -16420,6 +16498,8 @@ Probable Future Extensions Hey! Larry Wall + + This section briefly lists extensions and possible improvements that indicate the directions we are currently considering for `gawk'. The file `FUTURES' in the `gawk' distributions lists these extensions as @@ -16441,7 +16521,7 @@ Databases A `PROCINFO' Array The special files that provide process-related information (*note - Special File Names in `gawk': Special Files.) may be superseded + Special File Names in `gawk': Special Files.) will be superseded by a `PROCINFO' array that would provide the same information, in an easier to access fashion. @@ -17233,7 +17313,7 @@ convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. ONE LINE TO GIVE THE PROGRAM'S NAME AND AN IDEA OF WHAT IT DOES. - Copyright (C) 19YY NAME OF AUTHOR + Copyright (C) YEAR NAME OF AUTHOR This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -17255,7 +17335,7 @@ mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: - Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR + Gnomovision version 69, Copyright (C) YEAR NAME OF AUTHOR Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' @@ -18086,236 +18166,267 @@ Index Tag Table: -Node: Top1238 -Node: Preface20727 -Node: History22076 -Node: Manual History23434 -Node: Acknowledgements26876 -Node: What Is Awk30503 -Node: This Manual32157 -Node: Conventions34856 -Node: Sample Data Files36148 -Node: Getting Started39231 -Node: Names41539 -Node: Running gawk43108 -Node: One-shot44269 -Node: Read Terminal45656 -Node: Long47268 -Node: Executable Scripts48661 -Node: Comments51227 -Node: Very Simple53009 -Node: Two Rules55056 -Node: More Complex57235 -Node: Statements/Lines60351 -Node: Other Features64624 -Node: When65350 -Node: One-liners67285 -Node: Regexp70172 -Node: Regexp Usage71498 -Node: Escape Sequences73648 -Node: Regexp Operators79102 -Node: GNU Regexp Operators90135 -Node: Case-sensitivity93840 -Node: Leftmost Longest96956 -Node: Computed Regexps98491 -Node: Reading Files101148 -Node: Records102916 -Node: Fields109411 -Node: Non-Constant Fields112479 -Node: Changing Fields114766 -Node: Field Separators119174 -Node: Basic Field Splitting119876 -Node: Regexp Field Splitting123105 -Node: Single Character Fields125672 -Node: Command Line Field Separator126741 -Node: Field Splitting Summary129982 -Node: Constant Size132002 -Node: Multiple Line136039 -Node: Getline141447 -Node: Getline Intro142521 -Node: Plain Getline143484 -Node: Getline/Variable145748 -Node: Getline/File146890 -Node: Getline/Variable/File148200 -Node: Getline/Pipe150174 -Node: Getline/Variable/Pipe152382 -Node: Getline Summary153618 -Node: Printing155212 -Node: Print156280 -Node: Print Examples158381 -Node: Output Separators160989 -Node: OFMT162887 -Node: Printf164289 -Node: Basic Printf165193 -Node: Control Letters166727 -Node: Format Modifiers169415 -Node: Printf Examples173564 -Node: Redirection176343 -Node: Special Files180982 -Node: Close Files And Pipes186219 -Node: Expressions190280 -Node: Constants192476 -Node: Scalar Constants192955 -Node: Regexp Constants193959 -Node: Using Constant Regexps194421 -Node: Variables197622 -Node: Using Variables198276 -Node: Assignment Options199711 -Node: Conversion201656 -Node: Arithmetic Ops204838 -Node: Concatenation206972 -Node: Assignment Ops208394 -Node: Increment Ops213990 -Node: Truth Values216518 -Node: Typing and Comparison217566 -Node: Boolean Ops223573 -Node: Conditional Exp227266 -Node: Function Calls229043 -Node: Precedence231923 -Node: Patterns and Actions235311 -Node: Pattern Overview235737 -Node: Kinds of Patterns236512 -Node: Regexp Patterns237649 -Node: Expression Patterns238203 -Node: Ranges241855 -Node: BEGIN/END244579 -Node: Using BEGIN/END245048 -Node: I/O And BEGIN/END248011 -Node: Empty250027 -Node: Action Overview250326 -Node: Statements252898 -Node: If Statement254604 -Node: While Statement256107 -Node: Do Statement258138 -Node: For Statement259240 -Node: Break Statement262497 -Node: Continue Statement264768 -Node: Next Statement266764 -Node: Nextfile Statement269261 -Node: Exit Statement271175 -Node: Built-in Variables273186 -Node: User-modified274282 -Node: Auto-set279203 -Node: ARGC and ARGV285732 -Node: Arrays289571 -Node: Array Intro291034 -Node: Reference to Elements295074 -Node: Assigning Elements297024 -Node: Array Example297526 -Node: Scanning an Array299245 -Node: Delete301575 -Node: Numeric Array Subscripts303829 -Node: Uninitialized Subscripts305735 -Node: Multi-dimensional307379 -Node: Multi-scanning310474 -Node: Built-in312117 -Node: Calling Built-in313106 -Node: Numeric Functions315077 -Node: String Functions318895 -Node: I/O Functions338236 -Node: Time Functions344046 -Node: User-defined352896 -Node: Definition Syntax353609 -Node: Function Example357858 -Node: Function Caveats360188 -Node: Return Statement364059 -Node: Invoking Gawk366714 -Node: Options367949 -Node: Other Arguments376778 -Node: AWKPATH Variable379426 -Node: Obsolete382174 -Node: Undocumented382840 -Node: Known Bugs383089 -Node: Library Functions384227 -Node: Portability Notes386646 -Node: Nextfile Function387930 -Node: Assert Function392798 -Node: Round Function396150 -Node: Ordinal Functions397788 -Node: Join Function401232 -Node: Mktime Function403277 -Node: Gettimeofday Function414844 -Node: Filetrans Function418849 -Node: Getopt Function422512 -Node: Passwd Functions433861 -Node: Group Functions442182 -Node: Library Names450066 -Node: Sample Programs453991 -Node: Clones454482 -Node: Cut Program455576 -Node: Egrep Program465598 -Node: Id Program473254 -Node: Split Program476518 -Node: Tee Program479879 -Node: Uniq Program482668 -Node: Wc Program490206 -Node: Miscellaneous Programs494616 -Node: Dupword Program495526 -Node: Alarm Program497190 -Node: Translate Program501728 -Node: Labels Program506531 -Node: Word Sorting510067 -Node: History Sorting514412 -Node: Extract Program516374 -Node: Simple Sed523969 -Node: Igawk Program527306 -Node: Language History540619 -Node: V7/SVR3.1541852 -Node: SVR4544507 -Node: POSIX546029 -Node: BTL547649 -Node: POSIX/GNU548412 -Node: Gawk Summary552844 -Node: Command Line Summary553666 -Node: Language Summary556642 -Node: Variables/Fields559022 -Node: Fields Summary559756 -Node: Built-in Summary561542 -Node: Arrays Summary565257 -Node: Data Type Summary566550 -Node: Rules Summary568376 -Node: Pattern Summary569904 -Node: Regexp Summary572089 -Node: Actions Summary575472 -Node: Operator Summary577304 -Node: Control Flow Summary578531 -Node: I/O Summary579088 -Node: Printf Summary582077 -Node: Special File Summary585415 -Node: Built-in Functions Summary587093 -Node: Time Functions Summary591093 -Node: String Constants Summary591984 -Node: Functions Summary593304 -Node: Historical Features594365 -Node: Installation595863 -Node: Gawk Distribution597078 -Node: Getting597581 -Node: Extracting600532 -Node: Distribution contents601919 -Node: Unix Installation606695 -Node: Quick Installation607204 -Node: Configuration Philosophy608722 -Node: VMS Installation611124 -Node: VMS Compilation611663 -Node: VMS Installation Details613267 -Node: VMS Running614909 -Node: VMS POSIX616499 -Node: PC Installation617779 -Node: Atari Installation621182 -Node: Atari Compiling622366 -Node: Atari Using624275 -Node: Amiga Installation627122 -Node: Bugs628233 -Node: Other Versions631286 -Node: Notes632972 -Node: Compatibility Mode633579 -Node: Additions634422 -Node: Adding Code635120 -Node: New Ports640450 -Node: Future Extensions644610 -Node: Improvements646553 -Node: Glossary648421 -Node: Copying665492 -Node: Index684684 +Node: Top1206 +Node: Preface20764 +Ref: Preface-Footnote-121881 +Node: History22113 +Node: Manual History23471 +Node: Acknowledgements26913 +Node: What Is Awk30542 +Node: This Manual32196 +Node: Conventions34896 +Node: Sample Data Files36188 +Node: Getting Started39271 +Node: Names41579 +Ref: Names-Footnote-143076 +Node: Running gawk43148 +Node: One-shot44309 +Node: Read Terminal45696 +Node: Long47308 +Node: Executable Scripts48701 +Ref: Executable Scripts-Footnote-150664 +Ref: Executable Scripts-Footnote-250813 +Node: Comments51267 +Node: Very Simple53049 +Node: Two Rules55096 +Node: More Complex57275 +Node: Statements/Lines60391 +Node: Other Features64664 +Node: When65390 +Node: One-liners67324 +Node: Regexp70211 +Node: Regexp Usage71537 +Node: Escape Sequences73687 +Node: Regexp Operators79141 +Node: GNU Regexp Operators90174 +Node: Case-sensitivity93880 +Node: Leftmost Longest96995 +Node: Computed Regexps98486 +Node: Reading Files101143 +Node: Records102910 +Node: Fields109405 +Ref: Fields-Footnote-1112387 +Node: Non-Constant Fields112473 +Node: Changing Fields114759 +Node: Field Separators119166 +Node: Basic Field Splitting119868 +Node: Regexp Field Splitting123097 +Node: Single Character Fields125663 +Node: Command Line Field Separator126732 +Node: Field Splitting Summary129972 +Ref: Field Splitting Summary-Footnote-1131891 +Node: Constant Size131992 +Node: Multiple Line136029 +Node: Getline141431 +Node: Getline Intro142505 +Node: Plain Getline143468 +Node: Getline/Variable145732 +Node: Getline/File146874 +Node: Getline/Variable/File148184 +Node: Getline/Pipe150158 +Node: Getline/Variable/Pipe152366 +Node: Getline Summary153602 +Node: Printing155196 +Node: Print156264 +Node: Print Examples158364 +Node: Output Separators160971 +Node: OFMT162869 +Node: Printf164271 +Node: Basic Printf165175 +Node: Control Letters166709 +Node: Format Modifiers169591 +Node: Printf Examples173740 +Node: Redirection176519 +Node: Special Files181157 +Node: Close Files And Pipes186395 +Node: Expressions190456 +Node: Constants192662 +Node: Scalar Constants193141 +Ref: Scalar Constants-Footnote-1194001 +Node: Regexp Constants194145 +Node: Using Constant Regexps194607 +Node: Variables197808 +Node: Using Variables198462 +Node: Assignment Options199897 +Node: Conversion201841 +Node: Arithmetic Ops205035 +Ref: Arithmetic Ops-Footnote-1207359 +Node: Concatenation207552 +Node: Assignment Ops208976 +Node: Increment Ops214571 +Node: Truth Values217099 +Node: Typing and Comparison218147 +Node: Boolean Ops224156 +Node: Conditional Exp227849 +Node: Function Calls229619 +Node: Precedence232499 +Node: Patterns and Actions235887 +Node: Pattern Overview236313 +Node: Kinds of Patterns237088 +Node: Regexp Patterns238225 +Node: Expression Patterns238779 +Node: Ranges242431 +Node: BEGIN/END245155 +Node: Using BEGIN/END245624 +Node: I/O And BEGIN/END248586 +Node: Empty250602 +Node: Action Overview250901 +Node: Statements253472 +Node: If Statement255178 +Node: While Statement256681 +Node: Do Statement258712 +Node: For Statement259814 +Node: Break Statement263071 +Node: Continue Statement265342 +Node: Next Statement267338 +Node: Nextfile Statement269835 +Node: Exit Statement271749 +Node: Built-in Variables273760 +Node: User-modified274856 +Ref: User-modified-Footnote-1279714 +Node: Auto-set279776 +Ref: Auto-set-Footnote-1286099 +Node: ARGC and ARGV286305 +Node: Arrays290144 +Node: Array Intro291669 +Node: Reference to Elements295709 +Node: Assigning Elements297659 +Node: Array Example298161 +Node: Scanning an Array299880 +Node: Delete302210 +Node: Numeric Array Subscripts304464 +Node: Uninitialized Subscripts306370 +Node: Multi-dimensional308015 +Node: Multi-scanning311110 +Node: Array Efficiency312778 +Node: Built-in313742 +Node: Calling Built-in314731 +Node: Numeric Functions316702 +Ref: Numeric Functions-Footnote-1320245 +Node: String Functions320515 +Ref: String Functions-Footnote-1339714 +Ref: String Functions-Footnote-2339765 +Node: I/O Functions339893 +Ref: I/O Functions-Footnote-1345612 +Node: Time Functions345703 +Ref: Time Functions-Footnote-1354022 +Ref: Time Functions-Footnote-2354133 +Ref: Time Functions-Footnote-3354409 +Node: User-defined354553 +Node: Definition Syntax355265 +Node: Function Example359514 +Node: Function Caveats361972 +Node: Return Statement365843 +Node: Invoking Gawk368498 +Node: Options369733 +Ref: Options-Footnote-1378777 +Node: Other Arguments378802 +Node: AWKPATH Variable381448 +Ref: AWKPATH Variable-Footnote-1383896 +Node: Obsolete384159 +Node: Undocumented384825 +Node: Known Bugs385076 +Node: Library Functions386214 +Node: Portability Notes388633 +Node: Nextfile Function389917 +Ref: Nextfile Function-Footnote-1394615 +Node: Assert Function394785 +Node: Round Function398137 +Node: Ordinal Functions399775 +Ref: Ordinal Functions-Footnote-1403000 +Node: Join Function403219 +Node: Mktime Function405264 +Ref: Mktime Function-Footnote-1416745 +Ref: Mktime Function-Footnote-2416951 +Node: Gettimeofday Function417034 +Node: Filetrans Function421039 +Node: Getopt Function424702 +Node: Passwd Functions436051 +Node: Group Functions444372 +Node: Library Names452256 +Node: Sample Programs456181 +Node: Clones456672 +Node: Cut Program457766 +Node: Egrep Program467788 +Node: Id Program475443 +Node: Split Program478707 +Node: Tee Program482068 +Node: Uniq Program484857 +Node: Wc Program492394 +Ref: Wc Program-Footnote-1496618 +Node: Miscellaneous Programs496799 +Node: Dupword Program497709 +Node: Alarm Program499373 +Node: Translate Program503911 +Ref: Translate Program-Footnote-1508391 +Ref: Translate Program-Footnote-2508534 +Node: Labels Program508729 +Ref: Labels Program-Footnote-1512181 +Node: Word Sorting512265 +Node: History Sorting516609 +Node: Extract Program518571 +Node: Simple Sed526160 +Node: Igawk Program529497 +Ref: Igawk Program-Footnote-1542660 +Node: Language History542798 +Node: V7/SVR3.1544031 +Node: SVR4546684 +Node: POSIX548204 +Node: BTL549823 +Node: POSIX/GNU550587 +Node: Gawk Summary555018 +Node: Command Line Summary555842 +Node: Language Summary558818 +Ref: Language Summary-Footnote-1561075 +Node: Variables/Fields561198 +Node: Fields Summary561932 +Ref: Fields Summary-Footnote-1563660 +Node: Built-in Summary563718 +Node: Arrays Summary567433 +Node: Data Type Summary568726 +Node: Rules Summary570552 +Node: Pattern Summary572080 +Node: Regexp Summary574265 +Node: Actions Summary577647 +Node: Operator Summary579479 +Node: Control Flow Summary580706 +Node: I/O Summary581263 +Node: Printf Summary584252 +Node: Special File Summary587647 +Node: Built-in Functions Summary589325 +Node: Time Functions Summary593325 +Node: String Constants Summary594216 +Node: Functions Summary595536 +Node: Historical Features596597 +Node: Installation598095 +Node: Gawk Distribution599310 +Node: Getting599813 +Node: Extracting602800 +Node: Distribution contents604187 +Node: Unix Installation608963 +Node: Quick Installation609472 +Node: Configuration Philosophy610990 +Node: VMS Installation613392 +Node: VMS Compilation613931 +Node: VMS Installation Details615535 +Node: VMS Running617177 +Node: VMS POSIX618767 +Node: PC Installation620047 +Node: Atari Installation623450 +Node: Atari Compiling624634 +Node: Atari Using626543 +Node: Amiga Installation629389 +Node: Bugs630500 +Node: Other Versions633554 +Node: Notes635228 +Node: Compatibility Mode635835 +Node: Additions636678 +Node: Adding Code637376 +Node: New Ports642890 +Node: Future Extensions647050 +Node: Improvements648996 +Node: Glossary650864 +Node: Copying667935 +Node: Index687127 End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index 3e8e102f..6cc1a87a 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -9,7 +9,7 @@ @c I hope this is the right category @dircategory Programming Languages @direntry -* Gawk: (gawk.info). A Text Scanning and Processing Language. +* Gawk: (gawk). A Text Scanning and Processing Language. @end direntry @end ifinfo @@ -21,10 +21,10 @@ @c applies to, and when the document was updated. @set TITLE Effective AWK Programming @set SUBTITLE A User's Guide for GNU Awk -@set PATCHLEVEL 4 +@set PATCHLEVEL 5 @set EDITION 1.0.@value{PATCHLEVEL} @set VERSION 3.0 -@set UPDATE-MONTH April, 1999 +@set UPDATE-MONTH June, 2000 @iftex @set DOCUMENT book @end iftex @@ -74,7 +74,7 @@ particular records in a file and perform operations upon them. This is Edition @value{EDITION} of @cite{@value{TITLE}}, for the @value{VERSION}.@value{PATCHLEVEL} version of the GNU implementation of AWK. -Copyright (C) 1989, 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc. +Copyright (C) 1989, 1991, 1992, 1993, 1996-2000 Free Software Foundation, Inc. Permission is granted to make and distribute verbatim copies of this manual provided the copyright notice and this permission notice @@ -138,31 +138,27 @@ Corporation. @* Registered Trademark of Paramount Pictures Corporation. @* @c sorry, i couldn't resist @sp 3 -Copyright @copyright{} 1989, 1991, 92, 93, 96, 97, 98, 99 Free Software Foundation, Inc. +Copyright @copyright{} 1989, 1991, 1992, 1993, 1996-2000 Free Software Foundation, Inc. @sp 2 This is Edition @value{EDITION} of @cite{@value{TITLE}}, @* for the @value{VERSION}.@value{PATCHLEVEL} (or later) version of the GNU implementation of AWK. @sp 2 -@center Published jointly by: - -@multitable {Specialized Systems Consultants, Inc. (SSC)} {Boston, MA 02111-1307 USA} -@item Specialized Systems Consultants, Inc. (SSC) @tab Free Software Foundation -@item PO Box 55549 @tab 59 Temple Place --- Suite 330 -@item Seattle, WA 98155 USA @tab Boston, MA 02111-1307 USA -@item Phone: +1-206-782-7733 @tab Phone: +1-617-542-5942 -@item Fax: +1-206-782-7191 @tab Fax: +1-617-542-2652 -@item E-mail: @code{sales@@ssc.com} @tab E-mail: @code{gnu@@gnu.org} -@item URL: @code{http://www.ssc.com/} @tab URL: @code{http://www.fsf.org/} -@end multitable +Published by: + +Free Software Foundation @* +59 Temple Place --- Suite 330 @* +Boston, MA 02111-1307 USA @* +Phone: +1-617-542-5942 @* +Fax: +1-617-542-2652 @* +Email: @code{gnu@@gnu.org} @* +URL: @code{http://www.gnu.org/} @* @sp 1 -@c this ISBN can change! Check with SSC +@c this ISBN can change! @c This one is correct for gawk 3.0 and edition 1.0 from the FSF ISBN 1-882114-26-4 @* -@c This one is correct for gawk 3.0.3 and edition 1.0.3 from SSC -@c ISBN 1-57831-000-8 @* Permission is granted to make and distribute verbatim copies of this manual provided the copyright notice and this permission notice @@ -178,8 +174,7 @@ into another language, under the above conditions for modified versions, except that this permission notice may be stated in a translation approved by the Foundation. @sp 2 -@c Cover art by Etienne Suvasa. -Cover art by Amy Wells Wood. +Cover art by Etienne Suvasa. @end titlepage @c Thanks to Bob Chassell for directions on doing dedications. @@ -195,6 +190,8 @@ Cover art by Amy Wells Wood. @center @i{To Rivka, for the exponential increase.} @sp 1 @center @i{To Nachum, for the added dimension.} +@sp 1 +@center @i{To Malka, for the new beginning.} @page @w{ } @page @@ -540,6 +537,8 @@ of AWK. @center To Rivka, for the exponential increase. @sp 1 @center To Nachum, for the added dimension. +@sp 1 +@center To Malka, for the new beginning. @end ifinfo @node Preface, What Is Awk, Top, Top @@ -2686,7 +2685,7 @@ control how @code{gawk} interprets characters in regexps. @table @asis @item No options -In the default case, @code{gawk} provide all the facilities of +In the default case, @code{gawk} provides all the facilities of POSIX regexps and the GNU regexp operators described @iftex above. @@ -2843,7 +2842,6 @@ $ echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}' @end example For simple match/no-match tests, this is not so important. But when doing -regexp-based field and record splitting, and text matching and substitutions with the @code{match}, @code{sub}, @code{gsub}, and @code{gensub} functions, it is very important. @ifinfo @@ -2871,7 +2869,7 @@ regexp. A regexp that is computed in this way is called a @dfn{dynamic regexp}. For example: @example -BEGIN @{ identifier_regexp = "[A-Za-z_][A-Za-z_0-9]+" @} +BEGIN @{ identifier_regexp = "[A-Za-z_][A-Za-z_0-9]*" @} $0 ~ identifier_regexp @{ print @} @end example @@ -2879,6 +2877,12 @@ $0 ~ identifier_regexp @{ print @} sets @code{identifier_regexp} to a regexp that describes @code{awk} variable names, and tests if the input record matches this regexp. +@ignore +Do we want to use "^[A-Za-z_][A-Za-z_0-9]*$" to restrict the entire +record to just identifiers? Doing that also would disrupt the flow of +the text. +@end ignore + @strong{Caution:} When using the @samp{~} and @samp{!~} operators, there is a difference between a regexp constant enclosed in slashes, and a string constant enclosed in double quotes. @@ -3070,8 +3074,10 @@ is one field, consisting of a newline. The value of the built-in variable @code{NF} is the number of fields in the current record. @example +@group $ echo | awk 'BEGIN @{ RS = "a" @} ; @{ print NF @}' @print{} 1 +@end group @end example @cindex dark corner @@ -3219,6 +3225,8 @@ when the record has only seven fields, you get the empty string. a special case: it represents the whole input record. @code{$0} is used when you are not interested in fields. +@c NEEDED +@page Here are some more examples: @example @@ -3613,8 +3621,10 @@ the record, and then decide where the fields are. For example, the following pipeline prints @samp{b}: @example +@group $ echo ' a b c d ' | awk '@{ print $2 @}' @print{} b +@end group @end example @noindent @@ -3914,17 +3924,19 @@ idle time. (This program uses a number of @code{awk} features that haven't been introduced yet.) @example -@group BEGIN @{ FIELDWIDTHS = "9 6 10 6 7 7 35" @} NR > 2 @{ idle = $4 sub(/^ */, "", idle) # strip leading spaces if (idle == "") idle = 0 +@group if (idle ~ /:/) @{ split(idle, t, ":") idle = t[1] * 60 + t[2] @} +@end group +@group if (idle ~ /days/) idle *= 24 * 60 * 60 @@ -4042,6 +4054,8 @@ A practical example of a data file organized this way might be a mailing list, where each entry is separated by blank lines. If we have a mailing list in a file named @file{addresses}, that looks like this: +@c NEEDED +@page @example Jane Doe 123 Main Street @@ -4050,7 +4064,6 @@ Anywhere, SE 12345-6789 John Smith 456 Tree-lined Avenue Smallville, MW 98765-4321 - @dots{} @end example @@ -4426,8 +4439,6 @@ each one. @c Exercise!! @c This example is unrealistic, since you could just use system -@c NEEDED -@page Given the input: @example @@ -4974,6 +4985,12 @@ the decimal number eight is represented as @samp{10} in octal.) @item s This prints a string. +@item u +This prints an unsigned decimal number. +(This format is of marginal use, since all numbers in @code{awk} +are floating point. It is provided primarily for compatibility +with C.) + @item x @itemx X This prints an unsigned hexadecimal integer. @@ -5525,7 +5542,7 @@ is important to @emph{not} close any of the files related to file descriptors 0, 1, and 2. If you do close one of these files, unpredictable behavior will result. -The special files that provide process-related information may disappear +The special files that provide process-related information will disappear in a future version of @code{gawk}. @xref{Future Extensions, ,Probable Future Extensions}. @@ -5624,6 +5641,8 @@ really do its work until the pipe is closed. For example, if you redirect output to the @code{mail} program, the message is not actually sent until the pipe is closed. +@c NEEDED +@page @item To run the same program a second time, with the same arguments. This is not the same thing as giving more input to the first run! @@ -6017,8 +6036,8 @@ specifier. @code{CONVFMT}'s default value is @code{"%.6g"}, which prints a value with at least six significant digits. For some applications you will want to -change it to specify more precision. Double precision on most modern -machines gives you 16 or 17 decimal digits of precision. +change it to specify more precision. On most modern machines, you must +print 17 digits to capture a floating point number's value exactly. Strange results can happen if you set @code{CONVFMT} to a string that doesn't tell @code{sprintf} how to format floating point numbers in a useful way. @@ -6069,7 +6088,12 @@ for more information on the @code{print} statement. The @code{awk} language uses the common arithmetic operators when evaluating expressions. All of these arithmetic operators follow normal -precedence rules, and work as you would expect them to. +precedence rules, and work as you would expect them to. Arithmetic +operations are evaluated using double precision floating point, which +has the usual problems of inexactness and exceptions.@footnote{David +Goldberg, @uref{http://www.validgh.com/goldberg/paper.ps, @cite{What Every +Computer Scientist Should Know About Floating-point Arithmetic}}, +@cite{ACM Computing Surveys} @strong{23}, 1 (1991-03), 5-48.} Here is a file @file{grades} containing a list of student names and three test scores per student (it's a small class): @@ -6117,7 +6141,7 @@ Multiplication. @item @var{x} / @var{y} Division. Since all numbers in @code{awk} are -real numbers, the result is not rounded to an integer: @samp{3 / 4} +floating point numbers, the result is not rounded to an integer: @samp{3 / 4} has the value 0.75. @item @var{x} % @var{y} @@ -6976,8 +7000,8 @@ x > 0 ? x : -x @end example Each time the conditional expression is computed, exactly one of -@var{if-true-exp} and @var{if-false-exp} is computed; the other is ignored. -This is important when the expressions contain side effects. For example, +@var{if-true-exp} and @var{if-false-exp} is used; the other is ignored. +This is important when the expressions have side effects. For example, this conditional expression examines element @code{i} of either array @code{a} or array @code{b}, and increments @code{i}. @@ -7975,9 +7999,11 @@ identifies prime numbers: @example awk '# find smallest divisor of num @{ num = $1 +@group for (div = 2; div*div <= num; div++) if (num % div == 0) break +@end group if (num % div == 0) printf "Smallest divisor of %d is %d\n", num, div else @@ -8049,8 +8075,8 @@ of the loop altogether. @ignore In Texinfo source files, text that the author wishes to ignore can be enclosed between lines that start with @samp{@@ignore} and end with -@samp{@@end ignore}. Here is a program that strips out lines between -@samp{@@ignore} and @samp{@@end ignore} pairs. +@samp{@atend ignore}. Here is a program that strips out lines between +@samp{@@ignore} and @samp{@atend ignore} pairs. @example BEGIN @{ @@ -8069,7 +8095,7 @@ BEGIN @{ @end example When an @samp{@@ignore} is seen, the @code{ignoring} flag is set to one (true). -When @samp{@@end ignore} is seen, the flag is reset to zero (false). As long +When @samp{@atend ignore} is seen, the flag is reset to zero (false). As long as the flag is true, the input record is not printed, because the @code{continue} restarts the @code{while} loop, skipping over the @code{print} statement. @@ -8778,6 +8804,7 @@ same @code{awk} program. * Multi-dimensional:: Emulating multi-dimensional arrays in @code{awk}. * Multi-scanning:: Scanning multi-dimensional arrays. +* Array Efficiency:: Implementation-specific tips. @end menu @node Array Intro, Reference to Elements, Arrays, Arrays @@ -9008,12 +9035,14 @@ It is a very simple program, and gets confused if it encounters repeated numbers, gaps, or lines that don't begin with a number. @example +@group @c file eg/misc/arraymax.awk @{ if ($1 > max) max = $1 arr[$1] = $0 @} +@end group END @{ for (x = 1; x <= max; x++) @@ -9308,7 +9337,7 @@ output! At first glance, this program should have worked. The variable @code{lines} is uninitialized, and uninitialized variables have the numeric value zero. -So, the value of @code{l[0]} should have been printed. +So, @code{awk} should have printed the value of @code{l[0]}. The issue here is that subscripts for @code{awk} arrays are @strong{always} strings. And uninitialized variables, when used as strings, have the @@ -9445,7 +9474,7 @@ it produces: @end group @end example -@node Multi-scanning, , Multi-dimensional, Arrays +@node Multi-scanning, Array Efficiency, Multi-dimensional, Arrays @section Scanning Multi-dimensional Arrays There is no special @code{for} statement for scanning a @@ -9492,6 +9521,34 @@ The result of this is to set @code{separate[1]} to @code{"1"} and @code{separate[2]} to @code{"foo"}. Presto, the original sequence of separate indices has been recovered. +@node Array Efficiency, , Multi-scanning, Arrays +@section Using Array Memory Efficiently + +This section applies just to @code{gawk}. + +It is often useful to use the same bit of data as an index +into multiple arrays. +Due to the way @code{gawk} implements associative arrays, +when you need to use input data as an index for multiple +arrays, it is much more effecient to assign the input field +to a separate variable, and then use that variable as the index. + +@example +@{ + name = $1 + ssn = $2 + nkids = $3 + @dots{} + seniority[name]++ # better than seniority[$1]++ + kids[name] = nkids # better than kids[$1] = nkids +@} +@end example + +Using separate variables with mnemonic names for the input fields +makes programs more readable, in any case. +It is an eventual goal to make @code{gawk}'s array indexing as efficient +as possible, no matter what the source of the index value. + @node Built-in, User-defined, Arrays, Top @chapter Built-in Functions @@ -9625,7 +9682,7 @@ function randint(n) @{ @end example @noindent -The multiplication produces a random real number greater than zero and less +The multiplication produces a random number greater than zero and less than @code{n}. We then make it an integer (using @code{int}) between zero and @code{n} @minus{} 1, inclusive. @@ -9915,10 +9972,10 @@ Here is another example: @example awk 'BEGIN @{ str = "daabaaa" - sub(/a*/, "c&c", str) + sub(/a+/, "C&C", str) print str @}' -@print{} dcaacbaaa +@print{} dCaaCbaaa @end example @noindent @@ -10229,7 +10286,8 @@ backslash.@footnote{This consequence was certainly unintended.} @end enumerate The POSIX standard is under revision.@footnote{As of @value{UPDATE-MONTH}, -with final approval and publication hopefully sometime in 1997.} +with final approval and publication as part of the Austin Group +Standards hopefully sometime in 2001.} Because of the above problems, proposed text for the revised standard reverts to rules that correspond more closely to the original existing practice. The proposed rules have special cases that make it possible @@ -10981,6 +11039,8 @@ in an array and start over with a new list of elements Instead of having to repeat this loop everywhere in your program that you need to clear out an array, your program can just call @code{delarray}. +(This guarantees portability. The usage @samp{delete @var{array}} to delete +the contents of an entire array is a non-standard extension.) Here is an example of a recursive function. It takes a string as an input parameter, and returns the string in backwards order. @@ -11012,11 +11072,11 @@ formatted in a well known fashion. Here is an @code{awk} version: @example @c file eg/lib/ctime.awk -@group # ctime.awk # # awk version of C ctime(3) function +@group function ctime(ts, format) @{ format = "%a %b %d %H:%M:%S %Z %Y" @@ -11113,10 +11173,12 @@ doing.} For example: @end iftex @example +@group function changeit(array, ind, nvalue) @{ array[ind] = nvalue @} +@end group BEGIN @{ a[1] = 1; a[2] = 2; a[3] = 3 @@ -11355,6 +11417,11 @@ The @samp{-v} option can only set one variable, but you can use it more than once, setting another variable each time, like this: @samp{awk @w{-v foo=1} @w{-v bar=2} @dots{}}. +@strong{Caution:} Using @samp{-v} to set the values of the builtin +variables may lead to suprising results. @code{awk} will reset the +values of those variables as it needs to, possibly ignoring any +predefined value you may have given. + @item -mf @var{NNN} @itemx -mr @var{NNN} Set various memory limits to the value @var{NNN}. The @samp{f} flag sets @@ -11656,7 +11723,7 @@ separated by colons. @code{gawk} gets its search path from the @code{AWKPATH} environment variable. If that variable does not exist, @code{gawk} uses a default path, which is @samp{.:/usr/local/share/awk}.@footnote{Your version of @code{gawk} -may use a directory that is different than @file{/usr/local/share/awk}; it +may use a different directory; it will depend upon how @code{gawk} was built and installed. The actual directory will be the value of @samp{$(datadir)} generated when @code{gawk} was configured. You probably don't need to worry about this @@ -11958,7 +12025,6 @@ it should stop when it gets to the end of the first occurrence. Here is a second version of @code{nextfile} that remedies this problem. @example -@group @c file eg/lib/nextfile.awk # nextfile --- skip remaining records in current file # correctly handle successive occurrences of the same file @@ -11969,14 +12035,15 @@ Here is a second version of @code{nextfile} that remedies this problem. function nextfile() @{ _abandon_ = FILENAME; next @} +@group _abandon_ == FILENAME @{ if (FNR == 1) _abandon_ = "" else next @} -@c endfile @end group +@c endfile @end example The @code{nextfile} function has not changed. It sets @code{_abandon_} @@ -12029,6 +12096,8 @@ print a diagnostic message describing the condition that should have been true but was not, and then it kills the program. In C, using @code{assert} looks this: +@c NEEDED +@page @example #include <assert.h> @@ -12093,6 +12162,8 @@ program's @code{END} rules will execute. For all of this to work correctly, @file{assert.awk} must be the first source file read by @code{awk}. +@c NEEDED +@page You would use this function in your programs this way: @example @@ -12158,10 +12229,12 @@ function round(x, ival, aval, fraction) aval = -x # absolute value ival = int(aval) fraction = aval - ival +@group if (fraction >= .5) return int(x) - 1 # -2.5 --> -3 else return int(x) # -2.3 --> -2 +@end group @} else @{ fraction = x - ival if (fraction >= .5) @@ -12283,7 +12356,7 @@ function chr(c) @c endfile @end group -@c @group +@group @c file eg/lib/ord.awk #### test code #### # BEGIN \ @@ -12296,7 +12369,7 @@ function chr(c) # @} # @} @c endfile -@c @end group +@end group @end example An obvious improvement to these functions would be to move the code for the @@ -12381,7 +12454,11 @@ date into a timestamp. It would appear at first glance that @code{gawk} would have to supply a @code{mktime} built-in function that was simply a ``hook'' to the C language version. In fact though, @code{mktime} can be implemented entirely in -@code{awk}. +@code{awk}.@footnote{@value{UPDATE-MONTH}: Actually, I was mistaken when +I wrote this. The version presented here doesn't always work correctly, +and the next major version of @code{gawk} will provide @code{mktime} +as a built-in function.} +@c sigh. Here is a version of @code{mktime} for @code{awk}. It takes a simple representation of the date and time, and converts it into a timestamp. @@ -12630,13 +12707,14 @@ to the original result. An example demonstrating this is presented below. Finally, there is a ``main'' program for testing the function. @example +@c there used to be a blank line after the getline, +@c squished out for page formatting reasons @c @group @c file eg/lib/mktime.awk BEGIN @{ if (_tm_test) @{ printf "Enter date as yyyy mm dd hh mm ss: " getline _tm_test_date - t = mktime(_tm_test_date) r = strftime("%Y %m %d %H %M %S", t) printf "Got back (%s)\n", r @@ -12722,7 +12800,6 @@ time formatted in the same way as the @code{date} utility. # time["timezone"] -- abbreviation of timezone name # time["ampm"] -- AM or PM designation -@group function gettimeofday(time, ret, now, i) @{ # get time once, avoids unnecessary system calls @@ -12734,9 +12811,7 @@ function gettimeofday(time, ret, now, i) # clear out target array for (i in time) delete time[i] -@end group -@group # fill in values, force numeric values to be # numeric by adding 0 time["second"] = strftime("%S", now) + 0 @@ -12761,7 +12836,6 @@ function gettimeofday(time, ret, now, i) return ret @} -@end group @c endfile @end example @@ -13569,9 +13643,11 @@ char **argv; int i; @end group +@group while ((g = getgrent()) != NULL) @{ printf("%s:%s:%d:", g->gr_name, g->gr_passwd, g->gr_gid); +@end group for (i = 0; g->gr_mem[i] != NULL; i++) @{ printf("%s", g->gr_mem[i]); if (g->gr_mem[i+1] != NULL) @@ -14074,11 +14150,11 @@ BEGIN \ if (c == "f") @{ by_fields = 1 fieldlist = Optarg -@group @} else if (c == "c") @{ by_chars = 1 fieldlist = Optarg OFS = "" +@group @} else if (c == "d") @{ if (length(Optarg) > 1) @{ printf("Using first character of %s" \ @@ -14304,8 +14380,6 @@ Normally, @code{egrep} prints the lines that matched. If multiple file names are provided on the command line, each output line is preceded by the name of the file and a colon. -@c NEEDED -@page The options are: @table @code @@ -14457,7 +14531,7 @@ processed. Finally, @code{fcount} is added to @code{total}, so that we know how many lines altogether matched the pattern. @example -@c @group +@group @c file eg/prog/egrep.awk function endfile(file) @{ @@ -14470,7 +14544,7 @@ function endfile(file) total += fcount @} @c endfile -@c @end group +@end group @end example This rule does most of the work of matching lines. The variable @@ -14520,10 +14594,8 @@ necessary. fcount += matches # 1 or 0 -@group if (! matches) next -@end group if (no_print && ! count_only) nextfile @@ -14535,8 +14607,10 @@ necessary. if (do_filenames && ! count_only) print FILENAME ":" $0 +@group else if (! count_only) print +@end group @} @c endfile @c @end group @@ -15032,7 +15106,6 @@ standard output, @file{/dev/stdout}. @findex uniq.awk @example -@c @group @c file eg/prog/uniq.awk # uniq.awk --- do uniq in awk # Arnold Robbins, arnold@@gnu.org, Public Domain @@ -15047,15 +15120,13 @@ function usage( e) @} @end group -@group # -c count lines. overrides -d and -u # -d only repeated lines # -u only non-repeated lines # -n skip n fields # +n skip n characters, skip fields first -@end group -BEGIN \ +BEGIN \ @{ count = 1 outputfile = "/dev/stdout" @@ -15072,10 +15143,12 @@ BEGIN \ # this messes us up for things like -5 if (Optarg ~ /^[0-9]+$/) fcount = (c Optarg) + 0 +@group else @{ fcount = c + 0 Optind-- @} +@end group @} else usage() @} @@ -15091,14 +15164,12 @@ BEGIN \ if (repeated_only == 0 && non_repeated_only == 0) repeated_only = non_repeated_only = 1 -@group if (ARGC - Optind == 2) @{ outputfile = ARGV[ARGC - 1] ARGV[ARGC - 1] = "" @} @} @c endfile -@end group @end example The following function, @code{are_equal}, compares the current line, @@ -15315,23 +15386,22 @@ for the file that was just read. It relies on @code{beginfile} to reset the numbers for the following data file. @example -@c @group +@c left brace on line with `function' because of page breaking @c file eg/prog/wc.awk -function beginfile(file) -@{ +@group +function beginfile(file) @{ chars = lines = words = 0 fname = FILENAME @} +@end group function endfile(file) @{ tchars += chars tlines += lines twords += words -@group if (do_lines) printf "\t%d", lines -@end group if (do_words) printf "\t%d", words if (do_chars) @@ -15339,7 +15409,6 @@ function endfile(file) printf "\t%s\n", fname @} @c endfile -@c @end group @end example There is one rule that is executed for each line. It adds the length of the @@ -15565,11 +15634,12 @@ message in a loop, again using @code{sleep} to delay for however many seconds are necessary. @example -@c @group @c file eg/prog/alarm.awk +@group # zzzzzz..... go away if interrupted if (system(sprintf("sleep %d", naptime)) != 0) exit 1 +@end group # time to notify! command = sprintf("sleep %d", delay) @@ -15583,7 +15653,6 @@ seconds are necessary. exit 0 @} @c endfile -@c @end group @end example @node Translate Program, Labels Program, Alarm Program, Miscellaneous Programs @@ -15625,7 +15694,7 @@ functions (@pxref{String Functions, ,Built-in Functions for String Manipulation}).@footnote{This program was written before @code{gawk} acquired the ability to split each character in a string into separate array elements. -How might this ability simplify the program?} +How might you use this new feature to simplify the program?} There are two functions. The first, @code{stranslate}, takes three arguments. @@ -15683,19 +15752,19 @@ function stranslate(from, to, target, lf, lt, t_ar, i, c) return target @} -@group function translate(from, to) @{ return $0 = stranslate(from, to, $0) @} -@end group +@group # main program BEGIN @{ if (ARGC < 3) @{ print "usage: translate from to" > "/dev/stderr" exit @} +@end group FROM = ARGV[1] TO = ARGV[2] ARGC = 2 @@ -15852,10 +15921,12 @@ awk ' freq[$i]++ @} +@group END @{ for (word in freq) printf "%s\t%d\n", word, freq[word] @}' +@end group @end example The first thing to notice about this program is that it has two rules. The @@ -15914,10 +15985,12 @@ the program: @} @c endfile +@group END @{ for (word in freq) printf "%s\t%d\n", word, freq[word] @} +@end group @end example Assuming we have saved this program in a file named @file{wordfreq.awk}, @@ -16126,8 +16199,7 @@ exited with a zero exit status, signifying OK. @c file eg/prog/extract.awk # extract.awk --- extract files and run programs # from texinfo files -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May 1993 +# Arnold Robbins, arnold@@gnu.org, Public Domain, May 1993 BEGIN @{ IGNORECASE = 1 @} @@ -16315,18 +16387,18 @@ are provided, the standard input is used. # Arnold Robbins, arnold@@gnu.org, Public Domain # August 1995 -@group function usage() @{ print "usage: awksed pat repl [files...]" > "/dev/stderr" exit 1 @} -@end group +@group BEGIN @{ # validate arguments if (ARGC < 3) usage() +@end group RS = ARGV[1] ORS = ARGV[2] @@ -16515,7 +16587,6 @@ argument (e.g., @samp{--file=}). The source text is echoed into @file{/tmp/ig.s.$$}. @item --version -@itemx --version @itemx -Wversion @code{igawk} prints its version number, and runs @samp{gawk --version} to get the @code{gawk} version information, and then exits. @@ -16660,11 +16731,13 @@ slower. @end ignore @example -@c @group @c file eg/prog/igawk.sh gawk -- ' # process @@include directives +@c endfile +@group +@c file eg/prog/igawk.sh function pathto(file, i, t, junk) @{ if (index(file, "/") != 0) @@ -16681,7 +16754,7 @@ function pathto(file, i, t, junk) return "" @} @c endfile -@c @end group +@end group @end example The main program is contained inside one @code{BEGIN} rule. The first thing it @@ -18068,19 +18141,19 @@ Prints expressions, sending the output down a pipe to @var{command}. The pipeline to the command stays open until the @code{close} function is called. -@item printf @var{fmt, expr-list} +@item printf @var{fmt}, @var{expr-list} Format and print. -@item printf @var{fmt, expr-list} > file +@item printf @var{fmt}, @var{expr-list} > @var{file} Format and print to @var{file}. If @var{file} does not exist, it is created. If it does exist, its contents are deleted the first time the @code{printf} is executed. -@item printf @var{fmt, expr-list} >> @var{file} +@item printf @var{fmt}, @var{expr-list} >> @var{file} Format and print to @var{file}. The previous contents of @var{file} are retained, and the output of @code{printf} is appended to the file. -@item printf @var{fmt, expr-list} | @var{command} +@item printf @var{fmt}, @var{expr-list} | @var{command} Format and print, sending the output down a pipe to @var{command}. The pipeline to the command stays open until the @code{close} function is called. @@ -18128,7 +18201,10 @@ string, with non-significant zeros suppressed. @samp{%G} will use @samp{%E} instead of @samp{%e}. @item %o -An unsigned octal number (again, an integer). +An unsigned octal number (also an integer). + +@item %u +An unsigned decimal number (again, an integer). @item %s A character string. @@ -18256,6 +18332,8 @@ provides the motivation for this feature. @code{awk} provides a number of built-in functions for performing numeric operations, string related operations, and I/O related operations. +@c NEEDED +@page The built-in arithmetic functions are: @table @code @@ -18592,7 +18670,8 @@ Free Software Foundation @* Boston, MA 02111-1307 USA @* Phone: +1-617-542-5942 @* Fax (including Japan): +1-617-542-2652 @* -E-mail: @code{gnu@@gnu.org} @* +Email: @code{gnu@@gnu.org} @* +URL: @code{http://www.gnu.org/} @* @end quotation @noindent @@ -18617,6 +18696,8 @@ You should use a site that is geographically close to you. @itemx utsun.s.u-tokyo.ac.jp:/ftpsync/prep @end table +@c NEEDED +@page @item Australia: @table @code @item archie.au:/gnu @@ -19412,7 +19493,7 @@ some idea of what kind of Unix system you're using, and the exact results @code{gawk} gave you. Also say what you expected to occur; this will help us decide whether the problem was really in the documentation. -Once you have a precise problem, there are two e-mail addresses you +Once you have a precise problem, there are two email addresses you can send mail to. @table @asis @@ -19514,8 +19595,8 @@ retrieve @file{awk.bundle.gz}. This is a shell archive that has been compressed with the GNU @code{gzip} utility. It can be uncompressed with the @code{gunzip} utility. -You can also retrieve this version via the World Wide Web from -@uref{http://cm.bell-labs.com/who/bwk, Brian Kernighan's home page}. +You can also retrieve this version via the World Wide Web from his +@uref{http://cm.bell-labs.com/who/bwk, home page}. This version requires an ANSI C compiler; GCC (the GNU C compiler) works quite nicely. @@ -19729,6 +19810,11 @@ Using this format makes it easy for me to apply your changes to the master version of the @code{gawk} source code (using @code{patch}). If I have to apply the changes manually, using a text editor, I may not do so, particularly if there are lots of changes. + +@item +Include an entry for the @file{ChangeLog} file with your submission. +This further helps minimize the amount of work I have to do, +making it easier for me to accept patches. @end enumerate Although this sounds like a lot of work, please remember that while you @@ -19736,6 +19822,7 @@ may write the new code, I have to maintain it and support it, and if it isn't possible for me to do that with a minimum of extra work, then I probably will not. + @node New Ports, , Adding Code, Additions @appendixsubsec Porting @code{gawk} to a New Operating System @@ -19900,7 +19987,7 @@ It may be possible to map a GDBM/NDBM/SDBM file into an @code{awk} array. @item A @code{PROCINFO} Array The special files that provide process-related information (@pxref{Special Files, ,Special File Names in @code{gawk}}) -may be superseded by a @code{PROCINFO} array that would provide the same +will be superseded by a @code{PROCINFO} array that would provide the same information, in an easier to access fashion. @item More @code{lint} warnings @@ -20771,7 +20858,7 @@ the ``copyright'' line and a pointer to where the full notice is found. @smallexample @var{one line to give the program's name and an idea of what it does.} -Copyright (C) 19@var{yy} @var{name of author} +Copyright (C) @var{year} @var{name of author} This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -20794,7 +20881,7 @@ If the program is interactive, make it output a short notice like this when it starts in an interactive mode: @smallexample -Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author} +Gnomovision version 69, Copyright (C) @var{year} @var{name of author} Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' diff --git a/doc/igawk.1 b/doc/igawk.1 index 30ca853c..08173ec1 100644 --- a/doc/igawk.1 +++ b/doc/igawk.1 @@ -1,4 +1,4 @@ -.TH IGAWK 1 "Oct 13 1995" "Free Software Foundation" "Utility Commands" +.TH IGAWK 1 "Nov 3 1999" "Free Software Foundation" "Utility Commands" .SH NAME igawk \- gawk with include files .SH SYNOPSIS @@ -66,8 +66,8 @@ igawk \-f test.awk .SH SEE ALSO .IR gawk (1) .PP -.IR "AWK Language Programming" , +.IR "Effective AWK Programming" , Edition 1.0, published by the Free Software Foundation, 1995. .SH AUTHOR Arnold Robbins -.RB ( arnold@gnu.org ). +.RB ( arnold@skeeve.com ). diff --git a/doc/texinfo.tex b/doc/texinfo.tex index 3ce47154..ebf58d8e 100644 --- a/doc/texinfo.tex +++ b/doc/texinfo.tex @@ -1,7 +1,11 @@ % texinfo.tex -- TeX macros to handle Texinfo files. -% $Id: texinfo.tex,v 2.227 1998/02/25 22:54:34 karl Exp $ % -% Copyright (C) 1985, 86, 88, 90, 91, 92, 93, 94, 95, 96, 97, 98 +% Load plain if necessary, i.e., if running under initex. +\expandafter\ifx\csname fmtname\endcsname\relax\input plain\fi +% +\def\texinfoversion{1999-10-01.07} +% +% Copyright (C) 1985, 86, 88, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 % Free Software Foundation, Inc. % % This texinfo.tex file is free software; you can redistribute it and/or @@ -25,36 +29,44 @@ % % Please try the latest version of texinfo.tex before submitting bug % reports; you can get the latest version from: -% ftp://ftp.cs.umb.edu/pub/tex/texinfo.tex -% /home/gd/gnu/doc/texinfo.tex on the GNU machines. -% -% Send bug reports to bug-texinfo@gnu.org. -% Please include a precise test case in each bug report, -% including a complete document with which we can reproduce the problem. -% -% Texinfo macros (with @macro) are *not* supported by texinfo.tex. You -% have to run makeinfo -E to expand macros first; the texi2dvi script -% does this. - - -% Make it possible to create a .fmt file just by loading this file: -% if the underlying format is not loaded, start by loading it now. -% Added by gildea November 1993. -\expandafter\ifx\csname fmtname\endcsname\relax\input plain\fi +% ftp://ftp.gnu.org/gnu/texinfo.tex +% (and all GNU mirrors, see http://www.gnu.org/order/ftp.html) +% ftp://texinfo.org/tex/texinfo.tex +% ftp://us.ctan.org/macros/texinfo/texinfo.tex +% (and all CTAN mirrors, finger ctan@us.ctan.org for a list). +% /home/gd/gnu/doc/texinfo.tex on the GNU machines. +% The texinfo.tex in any given Texinfo distribution could well be out +% of date, so if that's what you're using, please check. +% Texinfo has a small home page at http://texinfo.org/. +% +% Send bug reports to bug-texinfo@gnu.org. Please include including a +% complete document in each bug report with which we can reproduce the +% problem. Patches are, of course, greatly appreciated. +% +% To process a Texinfo manual with TeX, it's most reliable to use the +% texi2dvi shell script that comes with the distribution. For a simple +% manual foo.texi, however, you can get away with this: +% tex foo.texi +% texindex foo.?? +% tex foo.texi +% tex foo.texi +% dvips foo.dvi -o # or whatever, to process the dvi file; this makes foo.ps. +% The extra runs of TeX get the cross-reference information correct. +% Sometimes one run after texindex suffices, and sometimes you need more +% than two; texi2dvi does it as many times as necessary. +% +% It is possible to adapt texinfo.tex for other languages. You can get +% the existing language-specific files from ftp://ftp.gnu.org/gnu/texinfo/. -% This automatically updates the version number based on RCS. -\def\deftexinfoversion$#1: #2 ${\def\texinfoversion{#2}} -\deftexinfoversion$Revision: 2.227 $ -\message{Loading texinfo package [Version \texinfoversion]:} +\message{Loading texinfo [version \texinfoversion]:} % If in a .fmt file, print the version number % and turn on active characters that we couldn't do earlier because % they might have appeared in the input file name. -\everyjob{\message{[Texinfo version \texinfoversion]}\message{} +\everyjob{\message{[Texinfo version \texinfoversion]}% \catcode`+=\active \catcode`\_=\active} % Save some parts of plain tex whose names we will redefine. - \let\ptexb=\b \let\ptexbullet=\bullet \let\ptexc=\c @@ -70,18 +82,9 @@ \let\ptexstar=\* \let\ptext=\t -% Be sure we're in horizontal mode when doing a tie, since we make space -% equivalent to this in @example-like environments. Otherwise, a space -% at the beginning of a line will start with \penalty -- and -% since \penalty is valid in vertical mode, we'd end up putting the -% penalty on the vertical list instead of in the new paragraph. -{\catcode`@ = 11 - % Avoid using \@M directly, because that causes trouble - % if the definition is written into an index file. - \global\let\tiepenalty = \@M - \gdef\tie{\leavevmode\penalty\tiepenalty\ } -} - +% We never want plain's outer \+ definition in Texinfo. +% For @tex, we can use \tabalign. +\let\+ = \relax \message{Basics,} \chardef\other=12 @@ -90,18 +93,47 @@ % starts a new line in the output. \newlinechar = `^^J -% Set up fixed words for English. -\ifx\putwordChapter\undefined{\gdef\putwordChapter{Chapter}}\fi% -\def\putwordInfo{Info}% -\ifx\putwordSee\undefined{\gdef\putwordSee{See}}\fi% -\ifx\putwordsee\undefined{\gdef\putwordsee{see}}\fi% -\ifx\putwordfile\undefined{\gdef\putwordfile{file}}\fi% -\ifx\putwordpage\undefined{\gdef\putwordpage{page}}\fi% -\ifx\putwordsection\undefined{\gdef\putwordsection{section}}\fi% -\ifx\putwordSection\undefined{\gdef\putwordSection{Section}}\fi% -\ifx\putwordTableofContents\undefined{\gdef\putwordTableofContents{Table of Contents}}\fi% -\ifx\putwordShortContents\undefined{\gdef\putwordShortContents{Short Contents}}\fi% -\ifx\putwordAppendix\undefined{\gdef\putwordAppendix{Appendix}}\fi% +% Set up fixed words for English if not already set. +\ifx\putwordAppendix\undefined \gdef\putwordAppendix{Appendix}\fi +\ifx\putwordChapter\undefined \gdef\putwordChapter{Chapter}\fi +\ifx\putwordfile\undefined \gdef\putwordfile{file}\fi +\ifx\putwordin\undefined \gdef\putwordin{in}\fi +\ifx\putwordIndexIsEmpty\undefined \gdef\putwordIndexIsEmpty{(Index is empty)}\fi +\ifx\putwordIndexNonexistent\undefined \gdef\putwordIndexNonexistent{(Index is nonexistent)}\fi +\ifx\putwordInfo\undefined \gdef\putwordInfo{Info}\fi +\ifx\putwordInstanceVariableof\undefined \gdef\putwordInstanceVariableof{Instance Variable of}\fi +\ifx\putwordMethodon\undefined \gdef\putwordMethodon{Method on}\fi +\ifx\putwordNoTitle\undefined \gdef\putwordNoTitle{No Title}\fi +\ifx\putwordof\undefined \gdef\putwordof{of}\fi +\ifx\putwordon\undefined \gdef\putwordon{on}\fi +\ifx\putwordpage\undefined \gdef\putwordpage{page}\fi +\ifx\putwordsection\undefined \gdef\putwordsection{section}\fi +\ifx\putwordSection\undefined \gdef\putwordSection{Section}\fi +\ifx\putwordsee\undefined \gdef\putwordsee{see}\fi +\ifx\putwordSee\undefined \gdef\putwordSee{See}\fi +\ifx\putwordShortTOC\undefined \gdef\putwordShortTOC{Short Contents}\fi +\ifx\putwordTOC\undefined \gdef\putwordTOC{Table of Contents}\fi +% +\ifx\putwordMJan\undefined \gdef\putwordMJan{January}\fi +\ifx\putwordMFeb\undefined \gdef\putwordMFeb{February}\fi +\ifx\putwordMMar\undefined \gdef\putwordMMar{March}\fi +\ifx\putwordMApr\undefined \gdef\putwordMApr{April}\fi +\ifx\putwordMMay\undefined \gdef\putwordMMay{May}\fi +\ifx\putwordMJun\undefined \gdef\putwordMJun{June}\fi +\ifx\putwordMJul\undefined \gdef\putwordMJul{July}\fi +\ifx\putwordMAug\undefined \gdef\putwordMAug{August}\fi +\ifx\putwordMSep\undefined \gdef\putwordMSep{September}\fi +\ifx\putwordMOct\undefined \gdef\putwordMOct{October}\fi +\ifx\putwordMNov\undefined \gdef\putwordMNov{November}\fi +\ifx\putwordMDec\undefined \gdef\putwordMDec{December}\fi +% +\ifx\putwordDefmac\undefined \gdef\putwordDefmac{Macro}\fi +\ifx\putwordDefspec\undefined \gdef\putwordDefspec{Special Form}\fi +\ifx\putwordDefvar\undefined \gdef\putwordDefvar{Variable}\fi +\ifx\putwordDefopt\undefined \gdef\putwordDefopt{User Option}\fi +\ifx\putwordDeftypevar\undefined\gdef\putwordDeftypevar{Variable}\fi +\ifx\putwordDeffunc\undefined \gdef\putwordDeffunc{Function}\fi +\ifx\putwordDeftypefun\undefined\gdef\putwordDeftypefun{Function}\fi % Ignore a token. % @@ -122,30 +154,35 @@ % since that produces some useless output on the terminal. % \def\gloggingall{\begingroup \globaldefs = 1 \loggingall \endgroup}% +\ifx\eTeXversion\undefined \def\loggingall{\tracingcommands2 \tracingstats2 \tracingpages1 \tracingoutput1 \tracinglostchars1 \tracingmacros2 \tracingparagraphs1 \tracingrestores1 \showboxbreadth\maxdimen\showboxdepth\maxdimen }% +\else +\def\loggingall{\tracingcommands3 \tracingstats2 + \tracingpages1 \tracingoutput1 \tracinglostchars1 + \tracingmacros2 \tracingparagraphs1 \tracingrestores1 + \tracingscantokens1 \tracingassigns1 \tracingifs1 + \tracinggroups1 \tracingnesting2 + \showboxbreadth\maxdimen\showboxdepth\maxdimen +}% +\fi % For @cropmarks command. % Do @cropmarks to get crop marks. -% +% \newif\ifcropmarks \let\cropmarks = \cropmarkstrue % % Dimensions to add cropmarks at corners. % Added by P. A. MacKay, 12 Nov. 1986 % -\newdimen\cornerlong \newdimen\cornerthick -\newdimen\topandbottommargin -\newdimen\outerhsize \newdimen\outervsize -\cornerlong=1pc\cornerthick=.3pt % These set size of cropmarks -\outerhsize=7in -%\outervsize=9.5in -% Alternative @smallbook page size is 9.25in -\outervsize=9.25in -\topandbottommargin=.75in +\newdimen\outerhsize \newdimen\outervsize % set by the paper size routines +\newdimen\cornerlong \cornerlong=1pc +\newdimen\cornerthick \cornerthick=.3pt +\newdimen\topandbottommargin \topandbottommargin=.75in % Main output routine. \chardef\PAGE = 255 @@ -179,13 +216,16 @@ \shipout\vbox{% \ifcropmarks \vbox to \outervsize\bgroup \hsize = \outerhsize - \line{\ewtop\hfil\ewtop}% - \nointerlineskip - \line{% - \vbox{\moveleft\cornerthick\nstop}% - \hfill - \vbox{\moveright\cornerthick\nstop}% - }% + \vskip-\topandbottommargin + \vtop to0pt{% + \line{\ewtop\hfil\ewtop}% + \nointerlineskip + \line{% + \vbox{\moveleft\cornerthick\nstop}% + \hfill + \vbox{\moveright\cornerthick\nstop}% + }% + \vss}% \vskip\topandbottommargin \line\bgroup \hfil % center the page within the outer (page) hsize. @@ -203,18 +243,22 @@ \unvbox\footlinebox \fi % + \ifpdfmakepagedest \pdfmkdest{\the\pageno} \fi + % \ifcropmarks \egroup % end of \vbox\bgroup \hfil\egroup % end of (centering) \line\bgroup \vskip\topandbottommargin plus1fill minus1fill \boxmaxdepth = \cornerthick - \line{% - \vbox{\moveleft\cornerthick\nsbot}% - \hfill - \vbox{\moveright\cornerthick\nsbot}% + \vbox to0pt{\vss + \line{% + \vbox{\moveleft\cornerthick\nsbot}% + \hfill + \vbox{\moveright\cornerthick\nsbot}% + }% + \nointerlineskip + \line{\ewbot\hfil\ewbot}% }% - \nointerlineskip - \line{\ewbot\hfil\ewbot}% \egroup % \vbox from first cropmarks clause \fi }% end of \shipout\vbox @@ -330,11 +374,11 @@ %% Call \inENV within environments (after a \begingroup) \newif\ifENV \ENVfalse \def\inENV{\ifENV\relax\else\ENVtrue\fi} \def\ENVcheck{% -\ifENV\errmessage{Still within an environment. Type Return to continue.} +\ifENV\errmessage{Still within an environment; press RETURN to continue} \endgroup\fi} % This is not perfect, but it should reduce lossage % @begin foo is the same as @foo, for now. -\newhelp\EMsimple{Type <Return> to continue.} +\newhelp\EMsimple{Press RETURN to continue.} \outer\def\begin{\parsearg\beginxxx} @@ -393,7 +437,7 @@ % @@ prints an @ % Kludge this until the fonts are right (grr). -\def\@{{\tt \char '100}} +\def\@{{\tt\char64}} % This is turned off because it was never documented % and you can use @w{...} around a quote to suppress ligatures. @@ -403,8 +447,8 @@ %\def\'{{'}} % Used to generate quoted braces. -\def\mylbrace {{\tt \char '173}} -\def\myrbrace {{\tt \char '175}} +\def\mylbrace {{\tt\char123}} +\def\myrbrace {{\tt\char125}} \let\{=\mylbrace \let\}=\myrbrace \begingroup @@ -441,6 +485,18 @@ \fi\fi } +% Be sure we're in horizontal mode when doing a tie, since we make space +% equivalent to this in @example-like environments. Otherwise, a space +% at the beginning of a line will start with \penalty -- and +% since \penalty is valid in vertical mode, we'd end up putting the +% penalty on the vertical list instead of in the new paragraph. +{\catcode`@ = 11 + % Avoid using \@M directly, because that causes trouble + % if the definition is written into an index file. + \global\let\tiepenalty = \@M + \gdef\tie{\leavevmode\penalty\tiepenalty\ } +} + % @: forces normal size whitespace following. \def\:{\spacefactor=1000 } @@ -538,41 +594,47 @@ where each line of input produces a line of output.} %% This method tries to make TeX break the page naturally %% if the depth of the box does not fit. %{\baselineskip=0pt% -%\vtop to #1\mil{\vfil}\kern -#1\mil\penalty 10000 +%\vtop to #1\mil{\vfil}\kern -#1\mil\nobreak %\prevdepth=-1000pt %}} \def\needx#1{% - % Go into vertical mode, so we don't make a big box in the middle of a + % Ensure vertical mode, so we don't make a big box in the middle of a % paragraph. \par % - % Don't add any leading before our big empty box, but allow a page - % break, since the best break might be right here. - \allowbreak - \nointerlineskip - \vtop to #1\mil{\vfil}% - % - % TeX does not even consider page breaks if a penalty added to the - % main vertical list is 10000 or more. But in order to see if the - % empty box we just added fits on the page, we must make it consider - % page breaks. On the other hand, we don't want to actually break the - % page after the empty box. So we use a penalty of 9999. - % - % There is an extremely small chance that TeX will actually break the - % page at this \penalty, if there are no other feasible breakpoints in - % sight. (If the user is using lots of big @group commands, which - % almost-but-not-quite fill up a page, TeX will have a hard time doing - % good page breaking, for example.) However, I could not construct an - % example where a page broke at this \penalty; if it happens in a real - % document, then we can reconsider our strategy. - \penalty9999 - % - % Back up by the size of the box, whether we did a page break or not. - \kern -#1\mil - % - % Do not allow a page break right after this kern. - \nobreak + % If the @need value is less than one line space, it's useless. + \dimen0 = #1\mil + \dimen2 = \ht\strutbox + \advance\dimen2 by \dp\strutbox + \ifdim\dimen0 > \dimen2 + % + % Do a \strut just to make the height of this box be normal, so the + % normal leading is inserted relative to the preceding line. + % And a page break here is fine. + \vtop to #1\mil{\strut\vfil}% + % + % TeX does not even consider page breaks if a penalty added to the + % main vertical list is 10000 or more. But in order to see if the + % empty box we just added fits on the page, we must make it consider + % page breaks. On the other hand, we don't want to actually break the + % page after the empty box. So we use a penalty of 9999. + % + % There is an extremely small chance that TeX will actually break the + % page at this \penalty, if there are no other feasible breakpoints in + % sight. (If the user is using lots of big @group commands, which + % almost-but-not-quite fill up a page, TeX will have a hard time doing + % good page breaking, for example.) However, I could not construct an + % example where a page broke at this \penalty; if it happens in a real + % document, then we can reconsider our strategy. + \penalty9999 + % + % Back up by the size of the box, whether we did a page break or not. + \kern -#1\mil + % + % Do not allow a page break right after this kern. + \nobreak + \fi } % @br forces paragraph break @@ -583,15 +645,19 @@ where each line of input produces a line of output.} % We do .5em per period so that it has the same spacing in a typewriter % font as three actual period characters. % -\def\dots{\hbox to 1.5em{% - \hskip 0pt plus 0.25fil minus 0.25fil - .\hss.\hss.% - \hskip 0pt plus 0.5fil minus 0.5fil -}} +\def\dots{% + \leavevmode + \hbox to 1.5em{% + \hskip 0pt plus 0.25fil minus 0.25fil + .\hss.\hss.% + \hskip 0pt plus 0.5fil minus 0.5fil + }% +} % @enddots{} is an end-of-sentence ellipsis. -% +% \def\enddots{% + \leavevmode \hbox to 2em{% \hskip 0pt plus 0.25fil minus 0.25fil .\hss.\hss.\hss.% @@ -602,7 +668,7 @@ where each line of input produces a line of output.} % @page forces the start of a new page - +% \def\page{\par\vfill\supereject} % @exdent text.... @@ -669,332 +735,50 @@ where each line of input produces a line of output.} % @c is the same as @comment % @ignore ... @end ignore is another way to write a comment -\def\comment{\catcode 64=\other \catcode 123=\other \catcode 125=\other% -\parsearg \commentxxx} - -\def\commentxxx #1{\catcode 64=0 \catcode 123=1 \catcode 125=2 } +\def\comment{\begingroup \catcode`\^^M=\other% +\catcode`\@=\other \catcode`\{=\other \catcode`\}=\other% +\commentxxx} +{\catcode`\^^M=\other \gdef\commentxxx#1^^M{\endgroup}} \let\c=\comment -% @paragraphindent is defined for the Info formatting commands only. -\let\paragraphindent=\comment - -% Prevent errors for section commands. -% Used in @ignore and in failing conditionals. -\def\ignoresections{% -\let\chapter=\relax -\let\unnumbered=\relax -\let\top=\relax -\let\unnumberedsec=\relax -\let\unnumberedsection=\relax -\let\unnumberedsubsec=\relax -\let\unnumberedsubsection=\relax -\let\unnumberedsubsubsec=\relax -\let\unnumberedsubsubsection=\relax -\let\section=\relax -\let\subsec=\relax -\let\subsubsec=\relax -\let\subsection=\relax -\let\subsubsection=\relax -\let\appendix=\relax -\let\appendixsec=\relax -\let\appendixsection=\relax -\let\appendixsubsec=\relax -\let\appendixsubsection=\relax -\let\appendixsubsubsec=\relax -\let\appendixsubsubsection=\relax -\let\contents=\relax -\let\smallbook=\relax -\let\titlepage=\relax -} - -% Used in nested conditionals, where we have to parse the Texinfo source -% and so want to turn off most commands, in case they are used -% incorrectly. -% -\def\ignoremorecommands{% - \let\defcodeindex = \relax - \let\defcv = \relax - \let\deffn = \relax - \let\deffnx = \relax - \let\defindex = \relax - \let\defivar = \relax - \let\defmac = \relax - \let\defmethod = \relax - \let\defop = \relax - \let\defopt = \relax - \let\defspec = \relax - \let\deftp = \relax - \let\deftypefn = \relax - \let\deftypefun = \relax - \let\deftypevar = \relax - \let\deftypevr = \relax - \let\defun = \relax - \let\defvar = \relax - \let\defvr = \relax - \let\ref = \relax - \let\xref = \relax - \let\printindex = \relax - \let\pxref = \relax - \let\settitle = \relax - \let\setchapternewpage = \relax - \let\setchapterstyle = \relax - \let\everyheading = \relax - \let\evenheading = \relax - \let\oddheading = \relax - \let\everyfooting = \relax - \let\evenfooting = \relax - \let\oddfooting = \relax - \let\headings = \relax - \let\include = \relax - \let\lowersections = \relax - \let\down = \relax - \let\raisesections = \relax - \let\up = \relax - \let\set = \relax - \let\clear = \relax - \let\item = \relax -} - -% Ignore @ignore ... @end ignore. -% -\def\ignore{\doignore{ignore}} - -% Ignore @ifinfo, @ifhtml, @ifnottex, @html, @menu, and @direntry text. -% -\def\ifinfo{\doignore{ifinfo}} -\def\ifhtml{\doignore{ifhtml}} -\def\ifnottex{\doignore{ifnottex}} -\def\html{\doignore{html}} -\def\menu{\doignore{menu}} -\def\direntry{\doignore{direntry}} - -% Also ignore @macro ... @end macro. The user must run texi2dvi, -% which runs makeinfo to do macro expansion. Ignore @unmacro, too. -\def\macro{\doignore{macro}} -\let\unmacro = \comment - - -% @dircategory CATEGORY -- specify a category of the dir file -% which this file should belong to. Ignore this in TeX. -\let\dircategory = \comment - -% Ignore text until a line `@end #1'. -% -\def\doignore#1{\begingroup - % Don't complain about control sequences we have declared \outer. - \ignoresections - % - % Define a command to swallow text until we reach `@end #1'. - \long\def\doignoretext##1\end #1{\enddoignore}% - % - % Make sure that spaces turn into tokens that match what \doignoretext wants. - \catcode32 = 10 - % - % Ignore braces, too, so mismatched braces don't cause trouble. - \catcode`\{ = 9 - \catcode`\} = 9 - % - % And now expand that command. - \doignoretext -} - -% What we do to finish off ignored text. -% -\def\enddoignore{\endgroup\ignorespaces}% - -\newif\ifwarnedobs\warnedobsfalse -\def\obstexwarn{% - \ifwarnedobs\relax\else - % We need to warn folks that they may have trouble with TeX 3.0. - % This uses \immediate\write16 rather than \message to get newlines. - \immediate\write16{} - \immediate\write16{***WARNING*** for users of Unix TeX 3.0!} - \immediate\write16{This manual trips a bug in TeX version 3.0 (tex hangs).} - \immediate\write16{If you are running another version of TeX, relax.} - \immediate\write16{If you are running Unix TeX 3.0, kill this TeX process.} - \immediate\write16{ Then upgrade your TeX installation if you can.} - \immediate\write16{ (See ftp://ftp.gnu.ai.mit.edu/pub/gnu/TeX.README.)} - \immediate\write16{If you are stuck with version 3.0, run the} - \immediate\write16{ script ``tex3patch'' from the Texinfo distribution} - \immediate\write16{ to use a workaround.} - \immediate\write16{} - \global\warnedobstrue - \fi -} - -% **In TeX 3.0, setting text in \nullfont hangs tex. For a -% workaround (which requires the file ``dummy.tfm'' to be installed), -% uncomment the following line: -%%%%%\font\nullfont=dummy\let\obstexwarn=\relax - -% Ignore text, except that we keep track of conditional commands for -% purposes of nesting, up to an `@end #1' command. -% -\def\nestedignore#1{% - \obstexwarn - % We must actually expand the ignored text to look for the @end - % command, so that nested ignore constructs work. Thus, we put the - % text into a \vbox and then do nothing with the result. To minimize - % the change of memory overflow, we follow the approach outlined on - % page 401 of the TeXbook: make the current font be a dummy font. - % - \setbox0 = \vbox\bgroup - % Don't complain about control sequences we have declared \outer. - \ignoresections - % - % Define `@end #1' to end the box, which will in turn undefine the - % @end command again. - \expandafter\def\csname E#1\endcsname{\egroup\ignorespaces}% - % - % We are going to be parsing Texinfo commands. Most cause no - % trouble when they are used incorrectly, but some commands do - % complicated argument parsing or otherwise get confused, so we - % undefine them. - % - % We can't do anything about stray @-signs, unfortunately; - % they'll produce `undefined control sequence' errors. - \ignoremorecommands - % - % Set the current font to be \nullfont, a TeX primitive, and define - % all the font commands to also use \nullfont. We don't use - % dummy.tfm, as suggested in the TeXbook, because not all sites - % might have that installed. Therefore, math mode will still - % produce output, but that should be an extremely small amount of - % stuff compared to the main input. - % - \nullfont - \let\tenrm = \nullfont \let\tenit = \nullfont \let\tensl = \nullfont - \let\tenbf = \nullfont \let\tentt = \nullfont \let\smallcaps = \nullfont - \let\tensf = \nullfont - % Similarly for index fonts (mostly for their use in - % smallexample) - \let\indrm = \nullfont \let\indit = \nullfont \let\indsl = \nullfont - \let\indbf = \nullfont \let\indtt = \nullfont \let\indsc = \nullfont - \let\indsf = \nullfont - % - % Don't complain when characters are missing from the fonts. - \tracinglostchars = 0 - % - % Don't bother to do space factor calculations. - \frenchspacing - % - % Don't report underfull hboxes. - \hbadness = 10000 - % - % Do minimal line-breaking. - \pretolerance = 10000 - % - % Do not execute instructions in @tex - \def\tex{\doignore{tex}}% -} - -% @set VAR sets the variable VAR to an empty value. -% @set VAR REST-OF-LINE sets VAR to the value REST-OF-LINE. -% -% Since we want to separate VAR from REST-OF-LINE (which might be -% empty), we can't just use \parsearg; we have to insert a space of our -% own to delimit the rest of the line, and then take it out again if we -% didn't need it. Make sure the catcode of space is correct to avoid -% losing inside @example, for instance. -% -\def\set{\begingroup\catcode` =10 - \catcode`\-=12 \catcode`\_=12 % Allow - and _ in VAR. - \parsearg\setxxx} -\def\setxxx#1{\setyyy#1 \endsetyyy} -\def\setyyy#1 #2\endsetyyy{% - \def\temp{#2}% - \ifx\temp\empty \global\expandafter\let\csname SET#1\endcsname = \empty - \else \setzzz{#1}#2\endsetzzz % Remove the trailing space \setxxx inserted. - \fi - \endgroup -} -% Can't use \xdef to pre-expand #2 and save some time, since \temp or -% \next or other control sequences that we've defined might get us into -% an infinite loop. Consider `@set foo @cite{bar}'. -\def\setzzz#1#2 \endsetzzz{\expandafter\gdef\csname SET#1\endcsname{#2}} - -% @clear VAR clears (i.e., unsets) the variable VAR. -% -\def\clear{\parsearg\clearxxx} -\def\clearxxx#1{\global\expandafter\let\csname SET#1\endcsname=\relax} - -% @value{foo} gets the text saved in variable foo. -% -\def\value{\begingroup - \catcode`\-=12 \catcode`\_=12 % Allow - and _ in VAR. - \valuexxx} -\def\valuexxx#1{% - \expandafter\ifx\csname SET#1\endcsname\relax - {\{No value for ``#1''\}}% - \else - \csname SET#1\endcsname - \fi -\endgroup} - -% @ifset VAR ... @end ifset reads the `...' iff VAR has been defined -% with @set. +% @paragraphindent NCHARS +% We'll use ems for NCHARS, close enough. +% We cannot implement @paragraphindent asis, though. +% +\def\asisword{asis} % no translation, these are keywords +\def\noneword{none} % -\def\ifset{\parsearg\ifsetxxx} -\def\ifsetxxx #1{% - \expandafter\ifx\csname SET#1\endcsname\relax - \expandafter\ifsetfail +\def\paragraphindent{\parsearg\doparagraphindent} +\def\doparagraphindent#1{% + \def\temp{#1}% + \ifx\temp\asisword \else - \expandafter\ifsetsucceed + \ifx\temp\noneword + \defaultparindent = 0pt + \else + \defaultparindent = #1em + \fi \fi + \parindent = \defaultparindent } -\def\ifsetsucceed{\conditionalsucceed{ifset}} -\def\ifsetfail{\nestedignore{ifset}} -\defineunmatchedend{ifset} -% @ifclear VAR ... @end ifclear reads the `...' iff VAR has never been -% defined with @set, or has been undefined with @clear. -% -\def\ifclear{\parsearg\ifclearxxx} -\def\ifclearxxx #1{% - \expandafter\ifx\csname SET#1\endcsname\relax - \expandafter\ifclearsucceed +% @exampleindent NCHARS +% We'll use ems for NCHARS like @paragraphindent. +% It seems @exampleindent asis isn't necessary, but +% I preserve it to make it similar to @paragraphindent. +\def\exampleindent{\parsearg\doexampleindent} +\def\doexampleindent#1{% + \def\temp{#1}% + \ifx\temp\asisword \else - \expandafter\ifclearfail + \ifx\temp\noneword + \lispnarrowing = 0pt + \else + \lispnarrowing = #1em + \fi \fi } -\def\ifclearsucceed{\conditionalsucceed{ifclear}} -\def\ifclearfail{\nestedignore{ifclear}} -\defineunmatchedend{ifclear} - -% @iftex, @ifnothtml, @ifnotinfo always succeed; we read the text -% following, through the first @end iftex (etc.). Make `@end iftex' -% (etc.) valid only after an @iftex. -% -\def\iftex{\conditionalsucceed{iftex}} -\def\ifnothtml{\conditionalsucceed{ifnothtml}} -\def\ifnotinfo{\conditionalsucceed{ifnotinfo}} -\defineunmatchedend{iftex} -\defineunmatchedend{ifnothtml} -\defineunmatchedend{ifnotinfo} - -% We can't just want to start a group at @iftex (for example) and end it -% at @end iftex, since then @set commands inside the conditional have no -% effect (they'd get reverted at the end of the group). So we must -% define \Eiftex to redefine itself to be its previous value. (We can't -% just define it to fail again with an ``unmatched end'' error, since -% the @ifset might be nested.) -% -\def\conditionalsucceed#1{% - \edef\temp{% - % Remember the current value of \E#1. - \let\nece{prevE#1} = \nece{E#1}% - % - % At the `@end #1', redefine \E#1 to be its previous value. - \def\nece{E#1}{\let\nece{E#1} = \nece{prevE#1}}% - }% - \temp -} - -% We need to expand lots of \csname's, but we don't want to expand the -% control sequences after we've constructed them. -% -\def\nece#1{\expandafter\noexpand\csname#1\endcsname} % @asis just yields its argument. Used with @table, for example. % @@ -1017,33 +801,23 @@ where each line of input produces a line of output.} \def\bullet{\implicitmath\ptexbullet\implicitmath} \def\minus{\implicitmath-\implicitmath} -\def\node{\ENVcheck\parsearg\nodezzz} -\def\nodezzz#1{\nodexxx [#1,]} -\def\nodexxx[#1,#2]{\gdef\lastnode{#1}} -\let\nwnode=\node -\let\lastnode=\relax - -\def\donoderef{\ifx\lastnode\relax\else -\expandafter\expandafter\expandafter\setref{\lastnode}\fi -\global\let\lastnode=\relax} - -\def\unnumbnoderef{\ifx\lastnode\relax\else -\expandafter\expandafter\expandafter\unnumbsetref{\lastnode}\fi -\global\let\lastnode=\relax} - -\def\appendixnoderef{\ifx\lastnode\relax\else -\expandafter\expandafter\expandafter\appendixsetref{\lastnode}\fi -\global\let\lastnode=\relax} - % @refill is a no-op. \let\refill=\relax +% If working on a large document in chapters, it is convenient to +% be able to disable indexing, cross-referencing, and contents, for test runs. +% This is done with @novalidate (before @setfilename). +% +\newif\iflinks \linkstrue % by default we want the aux files. +\let\novalidate = \linksfalse + % @setfilename is done at the beginning of every texinfo file. % So open here the files we need to have open while reading the input. % This makes it possible to make a .fmt file for texinfo. \def\setfilename{% - \readauxfile - \opencontents + \iflinks + \readauxfile + \fi % \openindices needs to do some work in any case. \openindices \fixbackslash % Turn off hack to swallow `\input texinfo'. \global\let\setfilename=\comment % Ignore extra @setfilename cmds. @@ -1059,30 +833,197 @@ where each line of input produces a line of output.} \comment % Ignore the actual filename. } +% Called from \setfilename. +% +\def\openindices{% + \newindex{cp}% + \newcodeindex{fn}% + \newcodeindex{vr}% + \newcodeindex{tp}% + \newcodeindex{ky}% + \newcodeindex{pg}% +} + % @bye. \outer\def\bye{\pagealignmacro\tracingstats=1\ptexend} -% \def\macro#1{\begingroup\ignoresections\catcode`\#=6\def\macrotemp{#1}\parsearg\macroxxx} -% \def\macroxxx#1#2 \end macro{% -% \expandafter\gdef\macrotemp#1{#2}% -% \endgroup} - -%\def\linemacro#1{\begingroup\ignoresections\catcode`\#=6\def\macrotemp{#1}\parsearg\linemacroxxx} -%\def\linemacroxxx#1#2 \end linemacro{% -%\let\parsearg=\relax -%\edef\macrotempx{\csname M\butfirst\expandafter\string\macrotemp\endcsname}% -%\expandafter\xdef\macrotemp{\parsearg\macrotempx}% -%\expandafter\gdef\macrotempx#1{#2}% -%\endgroup} -%\def\butfirst#1{} +\message{pdf,} +% adobe `portable' document format +\newcount\tempnum +\newcount\lnkcount +\newtoks\filename +\newcount\filenamelength +\newcount\pgn +\newtoks\toksA +\newtoks\toksB +\newtoks\toksC +\newtoks\toksD +\newbox\boxA +\newcount\countA +\newif\ifpdf +\newif\ifpdfmakepagedest + +\ifx\pdfoutput\undefined + \pdffalse + \let\pdfmkdest = \gobble + \let\pdfurl = \gobble + \let\endlink = \relax + \let\linkcolor = \relax + \let\pdfmakeoutlines = \relax +\else + \pdftrue + \pdfoutput = 1 + \input pdfcolor + \def\dopdfimage#1#2#3{% + \def\imagewidth{#2}% + \def\imageheight{#3}% + \ifnum\pdftexversion < 14 + \pdfimage + \else + \pdfximage + \fi + \ifx\empty\imagewidth\else width \imagewidth \fi + \ifx\empty\imageheight\else height \imageheight \fi + {#1.pdf}% + \ifnum\pdftexversion < 14 \else + \pdfrefximage \pdflastximage + \fi} + \def\pdfmkdest#1{\pdfdest name{#1@} xyz} + \def\pdfmkpgn#1{#1@} + \let\linkcolor = \Cyan + \def\endlink{\Black\pdfendlink} + % Adding outlines to PDF; macros for calculating structure of outlines + % come from Petr Olsak + \def\expnumber#1{\expandafter\ifx\csname#1\endcsname\relax 0% + \else \csname#1\endcsname \fi} + \def\advancenumber#1{\tempnum=\expnumber{#1}\relax + \advance\tempnum by1 + \expandafter\xdef\csname#1\endcsname{\the\tempnum}} + \def\pdfmakeoutlines{{% + \openin 1 \jobname.toc + \ifeof 1\else\bgroup + \closein 1 + \indexnofonts + \def\tt{} + % thanh's hack / proper braces in bookmarks + \edef\mylbrace{\iftrue \string{\else}\fi}\let\{=\mylbrace + \edef\myrbrace{\iffalse{\else\string}\fi}\let\}=\myrbrace + % + \def\chapentry ##1##2##3{} + \def\unnumbchapentry ##1##2{} + \def\secentry ##1##2##3##4{\advancenumber{chap##2}} + \def\unnumbsecentry ##1##2{} + \def\subsecentry ##1##2##3##4##5{\advancenumber{sec##2.##3}} + \def\unnumbsubsecentry ##1##2{} + \def\subsubsecentry ##1##2##3##4##5##6{\advancenumber{subsec##2.##3.##4}} + \def\unnumbsubsubsecentry ##1##2{} + \input \jobname.toc + \def\chapentry ##1##2##3{% + \pdfoutline goto name{\pdfmkpgn{##3}}count-\expnumber{chap##2}{##1}} + \def\unnumbchapentry ##1##2{% + \pdfoutline goto name{\pdfmkpgn{##2}}{##1}} + \def\secentry ##1##2##3##4{% + \pdfoutline goto name{\pdfmkpgn{##4}}count-\expnumber{sec##2.##3}{##1}} + \def\unnumbsecentry ##1##2{% + \pdfoutline goto name{\pdfmkpgn{##2}}{##1}} + \def\subsecentry ##1##2##3##4##5{% + \pdfoutline goto name{\pdfmkpgn{##5}}count-\expnumber{subsec##2.##3.##4}{##1}} + \def\unnumbsubsecentry ##1##2{% + \pdfoutline goto name{\pdfmkpgn{##2}}{##1}} + \def\subsubsecentry ##1##2##3##4##5##6{% + \pdfoutline goto name{\pdfmkpgn{##6}}{##1}} + \def\unnumbsubsubsecentry ##1##2{% + \pdfoutline goto name{\pdfmkpgn{##2}}{##1}} + \input \jobname.toc + \egroup\fi + }} + \def\makelinks #1,{% + \def\params{#1}\def\E{END}% + \ifx\params\E + \let\nextmakelinks=\relax + \else + \let\nextmakelinks=\makelinks + \ifnum\lnkcount>0,\fi + \picknum{#1}% + \startlink attr{/Border [0 0 0]} + goto name{\pdfmkpgn{\the\pgn}}% + \linkcolor #1% + \advance\lnkcount by 1% + \endlink + \fi + \nextmakelinks + } + \def\picknum#1{\expandafter\pn#1} + \def\pn#1{% + \def\p{#1}% + \ifx\p\lbrace + \let\nextpn=\ppn + \else + \let\nextpn=\ppnn + \def\first{#1} + \fi + \nextpn + } + \def\ppn#1{\pgn=#1\gobble} + \def\ppnn{\pgn=\first} + \def\pdfmklnk#1{\lnkcount=0\makelinks #1,END,} + \def\addtokens#1#2{\edef\addtoks{\noexpand#1={\the#1#2}}\addtoks} + \def\skipspaces#1{\def\PP{#1}\def\D{|}% + \ifx\PP\D\let\nextsp\relax + \else\let\nextsp\skipspaces + \ifx\p\space\else\addtokens{\filename}{\PP}% + \advance\filenamelength by 1 + \fi + \fi + \nextsp} + \def\getfilename#1{\filenamelength=0\expandafter\skipspaces#1|\relax} + \ifnum\pdftexversion < 14 + \let \startlink \pdfannotlink + \else + \let \startlink \pdfstartlink + \fi + \def\pdfurl#1{% + \begingroup + \normalturnoffactive\def\@{@}% + \leavevmode\Red + \startlink attr{/Border [0 0 0]}% + user{/Subtype /Link /A << /S /URI /URI (#1) >>}% + % #1 + \endgroup} + \def\pdfgettoks#1.{\setbox\boxA=\hbox{\toksA={#1.}\toksB={}\maketoks}} + \def\addtokens#1#2{\edef\addtoks{\noexpand#1={\the#1#2}}\addtoks} + \def\adn#1{\addtokens{\toksC}{#1}\global\countA=1\let\next=\maketoks} + \def\poptoks#1#2|ENDTOKS|{\let\first=#1\toksD={#1}\toksA={#2}} + \def\maketoks{% + \expandafter\poptoks\the\toksA|ENDTOKS| + \ifx\first0\adn0 + \else\ifx\first1\adn1 \else\ifx\first2\adn2 \else\ifx\first3\adn3 + \else\ifx\first4\adn4 \else\ifx\first5\adn5 \else\ifx\first6\adn6 + \else\ifx\first7\adn7 \else\ifx\first8\adn8 \else\ifx\first9\adn9 + \else + \ifnum0=\countA\else\makelink\fi + \ifx\first.\let\next=\done\else + \let\next=\maketoks + \addtokens{\toksB}{\the\toksD} + \ifx\first,\addtokens{\toksB}{\space}\fi + \fi + \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi + \next} + \def\makelink{\addtokens{\toksB}% + {\noexpand\pdflink{\the\toksC}}\toksC={}\global\countA=0} + \def\pdflink#1{% + \startlink attr{/Border [0 0 0]} goto name{\mkpgn{#1}} + \linkcolor #1\endlink} + \def\mkpgn#1{#1@} + \def\done{\edef\st{\global\noexpand\toksA={\the\toksB}}\st} +\fi % \ifx\pdfoutput \message{fonts,} - % Font-change commands. -% Texinfo supports the sans serif font style, which plain TeX does not. +% Texinfo sort of supports the sans serif font style, which plain TeX does not. % So we set up a \sf analogous to plain's \rm, etc. \newfam\sffam \def\sf{\fam=\sffam \tensf} @@ -1148,22 +1089,17 @@ where each line of input produces a line of output.} \setfont\deftt\ttshape{10}{\magstep1} \def\df{\let\tentt=\deftt \let\tenbf = \defbf \bf} -% Fonts for indices and small examples (9pt). -% We actually use the slanted font rather than the italic, -% because texinfo normally uses the slanted fonts for that. -% Do not make many font distinctions in general in the index, since they -% aren't very useful. -\setfont\ninett\ttshape{9}{1000} -\setfont\indrm\rmshape{9}{1000} -\setfont\indit\slshape{9}{1000} -\let\indsl=\indit -\let\indtt=\ninett -\let\indttsl=\ninett -\let\indsf=\indrm -\let\indbf=\indrm -\setfont\indsc\scshape{10}{900} -\font\indi=cmmi9 -\font\indsy=cmsy9 +% Fonts for indices, footnotes, small examples (9pt). +\setfont\smallrm\rmshape{9}{1000} +\setfont\smalltt\ttshape{9}{1000} +\setfont\smallbf\bfshape{10}{900} +\setfont\smallit\itshape{9}{1000} +\setfont\smallsl\slshape{9}{1000} +\setfont\smallsf\sfshape{9}{1000} +\setfont\smallsc\scshape{10}{900} +\setfont\smallttsl\ttslshape{10}{900} +\font\smalli=cmmi9 +\font\smallsy=cmsy9 % Fonts for title page: \setfont\titlerm\rmbshape{12}{\magstep3} @@ -1277,11 +1213,12 @@ where each line of input produces a line of output.} \let\tensf=\ssecsf \let\teni=\sseci \let\tensy=\ssecsy \let\tenttsl=\ssecttsl \resetmathfonts \setleading{15pt}} \let\subsubsecfonts = \subsecfonts % Maybe make sssec fonts scaled magstephalf? -\def\indexfonts{% - \let\tenrm=\indrm \let\tenit=\indit \let\tensl=\indsl - \let\tenbf=\indbf \let\tentt=\indtt \let\smallcaps=\indsc - \let\tensf=\indsf \let\teni=\indi \let\tensy=\indsy \let\tenttsl=\indttsl - \resetmathfonts \setleading{12pt}} +\def\smallfonts{% + \let\tenrm=\smallrm \let\tenit=\smallit \let\tensl=\smallsl + \let\tenbf=\smallbf \let\tentt=\smalltt \let\smallcaps=\smallsc + \let\tensf=\smallsf \let\teni=\smalli \let\tensy=\smallsy + \let\tenttsl=\smallttsl + \resetmathfonts \setleading{11pt}} % Set up the default fonts, so we can use them for creating boxes. % @@ -1305,13 +1242,14 @@ where each line of input produces a line of output.} % \smartitalic{ARG} outputs arg in italics, followed by an italic correction % unless the following character is such as not to need one. \def\smartitalicx{\ifx\next,\else\ifx\next-\else\ifx\next.\else\/\fi\fi\fi} -\def\smartitalic#1{{\sl #1}\futurelet\next\smartitalicx} +\def\smartslanted#1{{\sl #1}\futurelet\next\smartitalicx} +\def\smartitalic#1{{\it #1}\futurelet\next\smartitalicx} \let\i=\smartitalic -\let\var=\smartitalic -\let\dfn=\smartitalic +\let\var=\smartslanted +\let\dfn=\smartslanted \let\emph=\smartitalic -\let\cite=\smartitalic +\let\cite=\smartslanted \def\b#1{{\bf #1}} \let\strong=\b @@ -1329,9 +1267,9 @@ where each line of input produces a line of output.} } \let\ttfont=\t \def\samp#1{`\tclose{#1}'\null} -\setfont\smallrm\rmshape{8}{1000} -\font\smallsy=cmsy9 -\def\key#1{{\smallrm\textfont2=\smallsy \leavevmode\hbox{% +\setfont\keyrm\rmshape{8}{1000} +\font\keysy=cmsy9 +\def\key#1{{\keyrm\textfont2=\keysy \leavevmode\hbox{% \raise0.4pt\hbox{\angleleft}\kern-.08em\vtop{% \vbox{\hrule\kern-0.4pt \hbox{\raise0.4pt\hbox{\vphantom{\angleleft}}#1}}% @@ -1341,7 +1279,9 @@ where each line of input produces a line of output.} %\def\key #1{{\ttsl \nohyphenation \uppercase{#1}}\null} \def\ctrl #1{{\tt \rawbackslash \hat}#1} +% @file, @option are the same as @samp. \let\file=\samp +\let\option=\samp % @code is a modification of @t, % which makes spaces the same size as normal in the surrounding text. @@ -1376,20 +1316,18 @@ where each line of input produces a line of output.} % and arrange explicitly to hyphenate at a dash. % -- rms. { -\catcode`\-=\active -\catcode`\_=\active -\catcode`\|=\active -\global\def\code{\begingroup \catcode`\-=\active \let-\codedash \catcode`\_=\active \let_\codeunder \codex} -% The following is used by \doprintindex to insure that long function names -% wrap around. It is necessary for - and _ to be active before the index is -% read from the file, as \entry parses the arguments long before \code is -% ever called. -- mycroft -% _ is always active; and it shouldn't be \let = to an _ that is a -% subscript character anyway. Then, @cindex @samp{_} (for example) -% fails. --karl -\global\def\indexbreaks{% - \catcode`\-=\active \let-\realdash -} + \catcode`\-=\active + \catcode`\_=\active + % + \global\def\code{\begingroup + \catcode`\-=\active \let-\codedash + \catcode`\_=\active \let_\codeunder + \codex + } + % + % If we end up with any active - characters when handling the index, + % just treat them as a normal -. + \global\def\indexbreaks{\catcode`\-=\active \let-\realdash} } \def\realdash{-} @@ -1430,27 +1368,55 @@ where each line of input produces a line of output.} \else{\tclose{\kbdfont\look}}\fi \else{\tclose{\kbdfont\look}}\fi} -% @url. Quotes do not seem necessary, so use \code. +% For @url, @env, @command quotes seem unnecessary, so use \code. \let\url=\code - -% @uref (abbreviation for `urlref') takes an optional second argument -% specifying the text to display. First (mandatory) arg is the url. -% Perhaps eventually put in a hypertex \special here. -% -\def\uref#1{\urefxxx #1,,\finish} -\def\urefxxx#1,#2,#3\finish{% - \setbox0 = \hbox{\ignorespaces #2}% +\let\env=\code +\let\command=\code + +% @uref (abbreviation for `urlref') takes an optional (comma-separated) +% second argument specifying the text to display and an optional third +% arg as text to display instead of (rather than in addition to) the url +% itself. First (mandatory) arg is the url. Perhaps eventually put in +% a hypertex \special here. +% +\def\uref#1{\douref #1,,,\finish} +\def\douref#1,#2,#3,#4\finish{\begingroup + \unsepspaces + \pdfurl{#1}% + \setbox0 = \hbox{\ignorespaces #3}% \ifdim\wd0 > 0pt - \unhbox0\ (\code{#1})% + \unhbox0 % third arg given, show only that \else - \code{#1}% + \setbox0 = \hbox{\ignorespaces #2}% + \ifdim\wd0 > 0pt + \ifpdf + \unhbox0 % PDF: 2nd arg given, show only it + \else + \unhbox0\ (\code{#1})% DVI: 2nd arg given, show both it and url + \fi + \else + \code{#1}% only url given, so show it + \fi \fi -} + \endlink +\endgroup} -% rms does not like the angle brackets --karl, 17may97. -% So now @email is just like @uref. +% rms does not like angle brackets --karl, 17may97. +% So now @email is just like @uref, unless we are pdf. +% %\def\email#1{\angleleft{\tt #1}\angleright} -\let\email=\uref +\ifpdf + \def\email#1{\doemail#1,,\finish} + \def\doemail#1,#2,#3\finish{\begingroup + \unsepspaces + \pdfurl{mailto:#1}% + \setbox0 = \hbox{\ignorespaces #2}% + \ifdim\wd0>0pt\unhbox0\else\code{#1}\fi + \endlink + \endgroup} +\else + \let\email=\uref +\fi % Check if we are currently using a typewriter font. Since all the % Computer Modern typewriter fonts have zero interword stretch (and @@ -1460,8 +1426,7 @@ where each line of input produces a line of output.} \def\ifmonospace{\ifdim\fontdimen3\font=0pt } % Typeset a dimension, e.g., `in' or `pt'. The only reason for the -% argument is to make the input look right: @dmn{pt} instead of -% @dmn{}pt. +% argument is to make the input look right: @dmn{pt} instead of @dmn{}pt. % \def\dmn#1{\thinspace #1} @@ -1472,11 +1437,14 @@ where each line of input produces a line of output.} % Polish suppressed-l. --karl, 22sep96. %\def\l#1{{\li #1}\null} +% Explicit font changes: @r, @sc, undocumented @ii. \def\r#1{{\rm #1}} % roman font -% Use of \lowercase was suggested. \def\sc#1{{\smallcaps#1}} % smallcaps font \def\ii#1{{\it #1}} % italic font +% @acronym downcases the argument and prints in smallcaps. +\def\acronym#1{{\smallcaps \lowercase{#1}}} + % @pounds{} is a sterling sign. \def\pounds{{\it\$}} @@ -1490,15 +1458,20 @@ where each line of input produces a line of output.} \newif\ifseenauthor \newif\iffinishedtitlepage +% Do an implicit @contents or @shortcontents after @end titlepage if the +% user says @setcontentsaftertitlepage or @setshortcontentsaftertitlepage. +% +\newif\ifsetcontentsaftertitlepage + \let\setcontentsaftertitlepage = \setcontentsaftertitlepagetrue +\newif\ifsetshortcontentsaftertitlepage + \let\setshortcontentsaftertitlepage = \setshortcontentsaftertitlepagetrue + \def\shorttitlepage{\parsearg\shorttitlepagezzz} \def\shorttitlepagezzz #1{\begingroup\hbox{}\vskip 1.5in \chaprm \centerline{#1}% \endgroup\page\hbox{}\page} \def\titlepage{\begingroup \parindent=0pt \textfonts \let\subtitlerm=\tenrm -% I deinstalled the following change because \cmr12 is undefined. -% This change was not in the ChangeLog anyway. --rms. -% \let\subtitlerm=\cmr12 \def\subtitlefont{\subtitlerm \normalbaselineskip = 13pt \normalbaselines}% % \def\authorfont{\authorrm \normalbaselineskip = 16pt \normalbaselines}% @@ -1547,6 +1520,23 @@ where each line of input produces a line of output.} % after the title page, which we certainly don't want. \oldpage \endgroup + % + % If they want short, they certainly want long too. + \ifsetshortcontentsaftertitlepage + \shortcontents + \contents + \global\let\shortcontents = \relax + \global\let\contents = \relax + \fi + % + \ifsetcontentsaftertitlepage + \contents + \global\let\contents = \relax + \global\let\shortcontents = \relax + \fi + % + \ifpdf \pdfmakepagedesttrue \fi + % \HEADINGSon } @@ -1560,10 +1550,10 @@ where each line of input produces a line of output.} \let\thispage=\folio -\newtoks \evenheadline % Token sequence for heading line of even pages -\newtoks \oddheadline % Token sequence for heading line of odd pages -\newtoks \evenfootline % Token sequence for footing line of even pages -\newtoks \oddfootline % Token sequence for footing line of odd pages +\newtoks\evenheadline % headline on even pages +\newtoks\oddheadline % headline on odd pages +\newtoks\evenfootline % footline on even pages +\newtoks\oddfootline % footline on odd pages % Now make Tex use those variables \headline={{\textfonts\rm \ifodd\pageno \the\oddheadline @@ -1681,39 +1671,23 @@ where each line of input produces a line of output.} % Subroutines used in generating headings % Produces Day Month Year style of output. -\def\today{\number\day\space -\ifcase\month\or -January\or February\or March\or April\or May\or June\or -July\or August\or September\or October\or November\or December\fi -\space\number\year} - -% Use this if you want the Month Day, Year style of output. -%\def\today{\ifcase\month\or -%January\or February\or March\or April\or May\or June\or -%July\or August\or September\or October\or November\or December\fi -%\space\number\day, \number\year} - -% @settitle line... specifies the title of the document, for headings -% It generates no output of its own - -\def\thistitle{No Title} +\def\today{% + \number\day\space + \ifcase\month + \or\putwordMJan\or\putwordMFeb\or\putwordMMar\or\putwordMApr + \or\putwordMMay\or\putwordMJun\or\putwordMJul\or\putwordMAug + \or\putwordMSep\or\putwordMOct\or\putwordMNov\or\putwordMDec + \fi + \space\number\year} + +% @settitle line... specifies the title of the document, for headings. +% It generates no output of its own. +\def\thistitle{\putwordNoTitle} \def\settitle{\parsearg\settitlezzz} \def\settitlezzz #1{\gdef\thistitle{#1}} \message{tables,} - -% @tabs -- simple alignment - -% These don't work. For one thing, \+ is defined as outer. -% So these macros cannot even be defined. - -%\def\tabs{\parsearg\tabszzz} -%\def\tabszzz #1{\settabs\+#1\cr} -%\def\tabline{\parsearg\tablinezzz} -%\def\tablinezzz #1{\+#1\cr} -%\def\&{&} - % Tables -- @table, @ftable, @vtable, @item(x), @kitem(x), @xitem(x). % default indentation of table text @@ -1757,11 +1731,6 @@ July\or August\or September\or October\or November\or December\fi \itemindex{#1}% \nobreak % This prevents a break before @itemx. % - % Be sure we are not still in the middle of a paragraph. - %{\parskip = 0in - %\par - %}% - % % If the item text does not fit in the space we have, put it on a line % by itself, and do not allow a page break either before or after that % line. We do not start a paragraph here because then if the next @@ -1790,13 +1759,17 @@ July\or August\or September\or October\or November\or December\fi \itemxneedsnegativevskipfalse \else % The item text fits into the space. Start a paragraph, so that the - % following text (if any) will end up on the same line. Since that - % text will be indented by \tableindent, we make the item text be in - % a zero-width box. + % following text (if any) will end up on the same line. \noindent - \rlap{\hskip -\tableindent\box0}\ignorespaces% - \endgroup% - \itemxneedsnegativevskiptrue% + % Do this with kerns and \unhbox so that if there is a footnote in + % the item text, it can migrate to the main vertical list and + % eventually be printed. + \nobreak\kern-\tableindent + \dimen0 = \itemmax \advance\dimen0 by \itemmargin \advance\dimen0 by -\wd0 + \unhbox0 + \nobreak\kern\dimen0 + \endgroup + \itemxneedsnegativevskiptrue \fi } @@ -1807,9 +1780,10 @@ July\or August\or September\or October\or November\or December\fi \def\xitem{\errmessage{@xitem while not in a table}} \def\xitemx{\errmessage{@xitemx while not in a table}} -%% Contains a kludge to get @end[description] to work +% Contains a kludge to get @end[description] to work. \def\description{\tablez{\dontindex}{1}{}{}{}{}} +% @table, @ftable, @vtable. \def\table{\begingroup\inENV\obeylines\obeyspaces\tablex} {\obeylines\obeyspaces% \gdef\tablex #1^^M{% @@ -1869,7 +1843,7 @@ July\or August\or September\or October\or November\or December\fi \def\itemize{\parsearg\itemizezzz} \def\itemizezzz #1{% - \begingroup % ended by the @end itemsize + \begingroup % ended by the @end itemize \itemizey {#1}{\Eitemize} } @@ -2082,46 +2056,60 @@ July\or August\or September\or October\or November\or December\fi \multitablelinespace=0pt % Macros used to set up halign preamble: -% +% \let\endsetuptable\relax \def\xendsetuptable{\endsetuptable} \let\columnfractions\relax \def\xcolumnfractions{\columnfractions} \newif\ifsetpercent -% 2/1/96, to allow fractions to be given with more than one digit. -\def\pickupwholefraction#1 {\global\advance\colcount by1 % -\expandafter\xdef\csname col\the\colcount\endcsname{.#1\hsize}% -\setuptable} +% #1 is the part of the @columnfraction before the decimal point, which +% is presumably either 0 or the empty string (but we don't check, we +% just throw it away). #2 is the decimal part, which we use as the +% percent of \hsize for this column. +\def\pickupwholefraction#1.#2 {% + \global\advance\colcount by 1 + \expandafter\xdef\csname col\the\colcount\endcsname{.#2\hsize}% + \setuptable +} \newcount\colcount -\def\setuptable#1{\def\firstarg{#1}% -\ifx\firstarg\xendsetuptable\let\go\relax% -\else - \ifx\firstarg\xcolumnfractions\global\setpercenttrue% +\def\setuptable#1{% + \def\firstarg{#1}% + \ifx\firstarg\xendsetuptable + \let\go = \relax \else - \ifsetpercent - \let\go\pickupwholefraction % In this case arg of setuptable - % is the decimal point before the - % number given in percent of hsize. - % We don't need this so we don't use it. + \ifx\firstarg\xcolumnfractions + \global\setpercenttrue \else - \global\advance\colcount by1 - \setbox0=\hbox{#1 }% Add a normal word space as a separator; - % typically that is always in the input, anyway. - \expandafter\xdef\csname col\the\colcount\endcsname{\the\wd0}% + \ifsetpercent + \let\go\pickupwholefraction + \else + \global\advance\colcount by 1 + \setbox0=\hbox{#1\unskip }% Add a normal word space as a separator; + % typically that is always in the input, anyway. + \expandafter\xdef\csname col\the\colcount\endcsname{\the\wd0}% + \fi + \fi + \ifx\go\pickupwholefraction + % Put the argument back for the \pickupwholefraction call, so + % we'll always have a period there to be parsed. + \def\go{\pickupwholefraction#1}% + \else + \let\go = \setuptable \fi% - \fi% -\ifx\go\pickupwholefraction\else\let\go\setuptable\fi% -\fi\go} + \fi + \go +} -% multitable syntax -\def\tab{&\hskip1sp\relax} % 2/2/96 - % tiny skip here makes sure this column space is - % maintained, even if it is never used. +% This used to have \hskip1sp. But then the space in a template line is +% not enough. That is bad. So let's go back to just & until we +% encounter the problem it was intended to solve again. +% --karl, nathan@acm.org, 20apr99. +\def\tab{&} % @multitable ... @end multitable definitions: - +% \def\multitable{\parsearg\dotable} \def\dotable#1{\bgroup \vskip\parskip @@ -2160,15 +2148,15 @@ July\or August\or September\or October\or November\or December\fi % In order to keep entries from bumping into each other % we will add a \leftskip of \multitablecolspace to all columns after % the first one. - % + % % If a template has been used, we will add \multitablecolspace % to the width of each template entry. - % + % % If the user has set preamble in terms of percent of \hsize we will % use that dimension as the width of the column, and the \leftskip % will keep entries from bumping into each other. Table will start at % left margin and final column will justify at right margin. - % + % % Make sure we don't inherit \rightskip from the outer environment. \rightskip=0pt \ifnum\colcount=1 @@ -2199,15 +2187,18 @@ July\or August\or September\or October\or November\or December\fi % If so, do nothing. If not, give it an appropriate dimension based on % current baselineskip. \ifdim\multitablelinespace=0pt +\setbox0=\vbox{X}\global\multitablelinespace=\the\baselineskip +\global\advance\multitablelinespace by-\ht0 %% strut to put in table in case some entry doesn't have descenders, %% to keep lines equally spaced \let\multistrut = \strut -%% Test to see if parskip is larger than space between lines of -%% table. If not, do nothing. -%% If so, set to same dimension as multitablelinespace. \else +%% FIXME: what is \box0 supposed to be? \gdef\multistrut{\vrule height\multitablelinespace depth\dp0 width0pt\relax} \fi +%% Test to see if parskip is larger than space between lines of +%% table. If not, do nothing. +%% If so, set to same dimension as multitablelinespace. \ifdim\multitableparskip>\multitablelinespace \global\multitableparskip=\multitablelinespace \global\advance\multitableparskip-7pt %% to keep parskip somewhat smaller @@ -2220,6 +2211,356 @@ width0pt\relax} \fi \fi} +\message{conditionals,} +% Prevent errors for section commands. +% Used in @ignore and in failing conditionals. +\def\ignoresections{% + \let\chapter=\relax + \let\unnumbered=\relax + \let\top=\relax + \let\unnumberedsec=\relax + \let\unnumberedsection=\relax + \let\unnumberedsubsec=\relax + \let\unnumberedsubsection=\relax + \let\unnumberedsubsubsec=\relax + \let\unnumberedsubsubsection=\relax + \let\section=\relax + \let\subsec=\relax + \let\subsubsec=\relax + \let\subsection=\relax + \let\subsubsection=\relax + \let\appendix=\relax + \let\appendixsec=\relax + \let\appendixsection=\relax + \let\appendixsubsec=\relax + \let\appendixsubsection=\relax + \let\appendixsubsubsec=\relax + \let\appendixsubsubsection=\relax + \let\contents=\relax + \let\smallbook=\relax + \let\titlepage=\relax +} + +% Used in nested conditionals, where we have to parse the Texinfo source +% and so want to turn off most commands, in case they are used +% incorrectly. +% +\def\ignoremorecommands{% + \let\defcodeindex = \relax + \let\defcv = \relax + \let\deffn = \relax + \let\deffnx = \relax + \let\defindex = \relax + \let\defivar = \relax + \let\defmac = \relax + \let\defmethod = \relax + \let\defop = \relax + \let\defopt = \relax + \let\defspec = \relax + \let\deftp = \relax + \let\deftypefn = \relax + \let\deftypefun = \relax + \let\deftypeivar = \relax + \let\deftypeop = \relax + \let\deftypevar = \relax + \let\deftypevr = \relax + \let\defun = \relax + \let\defvar = \relax + \let\defvr = \relax + \let\ref = \relax + \let\xref = \relax + \let\printindex = \relax + \let\pxref = \relax + \let\settitle = \relax + \let\setchapternewpage = \relax + \let\setchapterstyle = \relax + \let\everyheading = \relax + \let\evenheading = \relax + \let\oddheading = \relax + \let\everyfooting = \relax + \let\evenfooting = \relax + \let\oddfooting = \relax + \let\headings = \relax + \let\include = \relax + \let\lowersections = \relax + \let\down = \relax + \let\raisesections = \relax + \let\up = \relax + \let\set = \relax + \let\clear = \relax + \let\item = \relax +} + +% Ignore @ignore ... @end ignore. +% +\def\ignore{\doignore{ignore}} + +% Ignore @ifinfo, @ifhtml, @ifnottex, @html, @menu, and @direntry text. +% +\def\ifinfo{\doignore{ifinfo}} +\def\ifhtml{\doignore{ifhtml}} +\def\ifnottex{\doignore{ifnottex}} +\def\html{\doignore{html}} +\def\menu{\doignore{menu}} +\def\direntry{\doignore{direntry}} + +% @dircategory CATEGORY -- specify a category of the dir file +% which this file should belong to. Ignore this in TeX. +\let\dircategory = \comment + +% Ignore text until a line `@end #1'. +% +\def\doignore#1{\begingroup + % Don't complain about control sequences we have declared \outer. + \ignoresections + % + % Define a command to swallow text until we reach `@end #1'. + % This @ is a catcode 12 token (that is the normal catcode of @ in + % this texinfo.tex file). We change the catcode of @ below to match. + \long\def\doignoretext##1@end #1{\enddoignore}% + % + % Make sure that spaces turn into tokens that match what \doignoretext wants. + \catcode32 = 10 + % + % Ignore braces, too, so mismatched braces don't cause trouble. + \catcode`\{ = 9 + \catcode`\} = 9 + % + % We must not have @c interpreted as a control sequence. + \catcode`\@ = 12 + % + % Make the letter c a comment character so that the rest of the line + % will be ignored. This way, the document can have (for example) + % @c @end ifinfo + % and the @end ifinfo will be properly ignored. + % (We've just changed @ to catcode 12.) + \catcode`\c = 14 + % + % And now expand that command. + \doignoretext +} + +% What we do to finish off ignored text. +% +\def\enddoignore{\endgroup\ignorespaces}% + +\newif\ifwarnedobs\warnedobsfalse +\def\obstexwarn{% + \ifwarnedobs\relax\else + % We need to warn folks that they may have trouble with TeX 3.0. + % This uses \immediate\write16 rather than \message to get newlines. + \immediate\write16{} + \immediate\write16{WARNING: for users of Unix TeX 3.0!} + \immediate\write16{This manual trips a bug in TeX version 3.0 (tex hangs).} + \immediate\write16{If you are running another version of TeX, relax.} + \immediate\write16{If you are running Unix TeX 3.0, kill this TeX process.} + \immediate\write16{ Then upgrade your TeX installation if you can.} + \immediate\write16{ (See ftp://ftp.gnu.org/pub/gnu/TeX.README.)} + \immediate\write16{If you are stuck with version 3.0, run the} + \immediate\write16{ script ``tex3patch'' from the Texinfo distribution} + \immediate\write16{ to use a workaround.} + \immediate\write16{} + \global\warnedobstrue + \fi +} + +% **In TeX 3.0, setting text in \nullfont hangs tex. For a +% workaround (which requires the file ``dummy.tfm'' to be installed), +% uncomment the following line: +%%%%%\font\nullfont=dummy\let\obstexwarn=\relax + +% Ignore text, except that we keep track of conditional commands for +% purposes of nesting, up to an `@end #1' command. +% +\def\nestedignore#1{% + \obstexwarn + % We must actually expand the ignored text to look for the @end + % command, so that nested ignore constructs work. Thus, we put the + % text into a \vbox and then do nothing with the result. To minimize + % the change of memory overflow, we follow the approach outlined on + % page 401 of the TeXbook: make the current font be a dummy font. + % + \setbox0 = \vbox\bgroup + % Don't complain about control sequences we have declared \outer. + \ignoresections + % + % Define `@end #1' to end the box, which will in turn undefine the + % @end command again. + \expandafter\def\csname E#1\endcsname{\egroup\ignorespaces}% + % + % We are going to be parsing Texinfo commands. Most cause no + % trouble when they are used incorrectly, but some commands do + % complicated argument parsing or otherwise get confused, so we + % undefine them. + % + % We can't do anything about stray @-signs, unfortunately; + % they'll produce `undefined control sequence' errors. + \ignoremorecommands + % + % Set the current font to be \nullfont, a TeX primitive, and define + % all the font commands to also use \nullfont. We don't use + % dummy.tfm, as suggested in the TeXbook, because not all sites + % might have that installed. Therefore, math mode will still + % produce output, but that should be an extremely small amount of + % stuff compared to the main input. + % + \nullfont + \let\tenrm=\nullfont \let\tenit=\nullfont \let\tensl=\nullfont + \let\tenbf=\nullfont \let\tentt=\nullfont \let\smallcaps=\nullfont + \let\tensf=\nullfont + % Similarly for index fonts (mostly for their use in smallexample). + \let\smallrm=\nullfont \let\smallit=\nullfont \let\smallsl=\nullfont + \let\smallbf=\nullfont \let\smalltt=\nullfont \let\smallsc=\nullfont + \let\smallsf=\nullfont + % + % Don't complain when characters are missing from the fonts. + \tracinglostchars = 0 + % + % Don't bother to do space factor calculations. + \frenchspacing + % + % Don't report underfull hboxes. + \hbadness = 10000 + % + % Do minimal line-breaking. + \pretolerance = 10000 + % + % Do not execute instructions in @tex + \def\tex{\doignore{tex}}% + % Do not execute macro definitions. + % `c' is a comment character, so the word `macro' will get cut off. + \def\macro{\doignore{ma}}% +} + +% @set VAR sets the variable VAR to an empty value. +% @set VAR REST-OF-LINE sets VAR to the value REST-OF-LINE. +% +% Since we want to separate VAR from REST-OF-LINE (which might be +% empty), we can't just use \parsearg; we have to insert a space of our +% own to delimit the rest of the line, and then take it out again if we +% didn't need it. Make sure the catcode of space is correct to avoid +% losing inside @example, for instance. +% +\def\set{\begingroup\catcode` =10 + \catcode`\-=12 \catcode`\_=12 % Allow - and _ in VAR. + \parsearg\setxxx} +\def\setxxx#1{\setyyy#1 \endsetyyy} +\def\setyyy#1 #2\endsetyyy{% + \def\temp{#2}% + \ifx\temp\empty \global\expandafter\let\csname SET#1\endcsname = \empty + \else \setzzz{#1}#2\endsetzzz % Remove the trailing space \setxxx inserted. + \fi + \endgroup +} +% Can't use \xdef to pre-expand #2 and save some time, since \temp or +% \next or other control sequences that we've defined might get us into +% an infinite loop. Consider `@set foo @cite{bar}'. +\def\setzzz#1#2 \endsetzzz{\expandafter\gdef\csname SET#1\endcsname{#2}} + +% @clear VAR clears (i.e., unsets) the variable VAR. +% +\def\clear{\parsearg\clearxxx} +\def\clearxxx#1{\global\expandafter\let\csname SET#1\endcsname=\relax} + +% @value{foo} gets the text saved in variable foo. +{ + \catcode`\_ = \active + % + % We might end up with active _ or - characters in the argument if + % we're called from @code, as @code{@value{foo-bar_}}. So \let any + % such active characters to their normal equivalents. + \gdef\value{\begingroup + \catcode`\-=12 \catcode`\_=12 + \indexbreaks \let_\normalunderscore + \valuexxx} +} +\def\valuexxx#1{\expandablevalue{#1}\endgroup} + +% We have this subroutine so that we can handle at least some @value's +% properly in indexes (we \let\value to this in \indexdummies). Ones +% whose names contain - or _ still won't work, but we can't do anything +% about that. The command has to be fully expandable, since the result +% winds up in the index file. This means that if the variable's value +% contains other Texinfo commands, it's almost certain it will fail +% (although perhaps we could fix that with sufficient work to do a +% one-level expansion on the result, instead of complete). +% +\def\expandablevalue#1{% + \expandafter\ifx\csname SET#1\endcsname\relax + {[No value for ``#1'']}% + \else + \csname SET#1\endcsname + \fi +} + +% @ifset VAR ... @end ifset reads the `...' iff VAR has been defined +% with @set. +% +\def\ifset{\parsearg\ifsetxxx} +\def\ifsetxxx #1{% + \expandafter\ifx\csname SET#1\endcsname\relax + \expandafter\ifsetfail + \else + \expandafter\ifsetsucceed + \fi +} +\def\ifsetsucceed{\conditionalsucceed{ifset}} +\def\ifsetfail{\nestedignore{ifset}} +\defineunmatchedend{ifset} + +% @ifclear VAR ... @end ifclear reads the `...' iff VAR has never been +% defined with @set, or has been undefined with @clear. +% +\def\ifclear{\parsearg\ifclearxxx} +\def\ifclearxxx #1{% + \expandafter\ifx\csname SET#1\endcsname\relax + \expandafter\ifclearsucceed + \else + \expandafter\ifclearfail + \fi +} +\def\ifclearsucceed{\conditionalsucceed{ifclear}} +\def\ifclearfail{\nestedignore{ifclear}} +\defineunmatchedend{ifclear} + +% @iftex, @ifnothtml, @ifnotinfo always succeed; we read the text +% following, through the first @end iftex (etc.). Make `@end iftex' +% (etc.) valid only after an @iftex. +% +\def\iftex{\conditionalsucceed{iftex}} +\def\ifnothtml{\conditionalsucceed{ifnothtml}} +\def\ifnotinfo{\conditionalsucceed{ifnotinfo}} +\defineunmatchedend{iftex} +\defineunmatchedend{ifnothtml} +\defineunmatchedend{ifnotinfo} + +% We can't just want to start a group at @iftex (for example) and end it +% at @end iftex, since then @set commands inside the conditional have no +% effect (they'd get reverted at the end of the group). So we must +% define \Eiftex to redefine itself to be its previous value. (We can't +% just define it to fail again with an ``unmatched end'' error, since +% the @ifset might be nested.) +% +\def\conditionalsucceed#1{% + \edef\temp{% + % Remember the current value of \E#1. + \let\nece{prevE#1} = \nece{E#1}% + % + % At the `@end #1', redefine \E#1 to be its previous value. + \def\nece{E#1}{\let\nece{E#1} = \nece{prevE#1}}% + }% + \temp +} + +% We need to expand lots of \csname's, but we don't want to expand the +% control sequences after we've constructed them. +% +\def\nece#1{\expandafter\noexpand\csname#1\endcsname} + +% @defininfoenclose. +\let\definfoenclose=\comment + + \message{indexing,} % Index generation facilities @@ -2235,12 +2576,14 @@ width0pt\relax} \fi % the file that accumulates this index. The file's extension is foo. % The name of an index should be no more than 2 characters long % for the sake of vms. - -\def\newindex #1{ -\expandafter\newwrite \csname#1indfile\endcsname% Define number for output file -\openout \csname#1indfile\endcsname \jobname.#1 % Open the file -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\doindex {#1}} +% +\def\newindex#1{% + \iflinks + \expandafter\newwrite \csname#1indfile\endcsname + \openout \csname#1indfile\endcsname \jobname.#1 % Open the file + \fi + \expandafter\xdef\csname#1index\endcsname{% % Define @#1index + \noexpand\doindex{#1}} } % @defindex foo == \newindex{foo} @@ -2249,31 +2592,37 @@ width0pt\relax} \fi % Define @defcodeindex, like @defindex except put all entries in @code. -\def\newcodeindex #1{ -\expandafter\newwrite \csname#1indfile\endcsname% Define number for output file -\openout \csname#1indfile\endcsname \jobname.#1 % Open the file -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\docodeindex {#1}} +\def\newcodeindex#1{% + \iflinks + \expandafter\newwrite \csname#1indfile\endcsname + \openout \csname#1indfile\endcsname \jobname.#1 + \fi + \expandafter\xdef\csname#1index\endcsname{% + \noexpand\docodeindex{#1}} } \def\defcodeindex{\parsearg\newcodeindex} % @synindex foo bar makes index foo feed into index bar. % Do this instead of @defindex foo if you don't want it as a separate index. -\def\synindex #1 #2 {% -\expandafter\let\expandafter\synindexfoo\expandafter=\csname#2indfile\endcsname -\expandafter\let\csname#1indfile\endcsname=\synindexfoo -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\doindex {#2}}% +% The \closeout helps reduce unnecessary open files; the limit on the +% Acorn RISC OS is a mere 16 files. +\def\synindex#1 #2 {% + \expandafter\let\expandafter\synindexfoo\expandafter=\csname#2indfile\endcsname + \expandafter\closeout\csname#1indfile\endcsname + \expandafter\let\csname#1indfile\endcsname=\synindexfoo + \expandafter\xdef\csname#1index\endcsname{% define \xxxindex + \noexpand\doindex{#2}}% } % @syncodeindex foo bar similar, but put all entries made for index foo % inside @code. -\def\syncodeindex #1 #2 {% -\expandafter\let\expandafter\synindexfoo\expandafter=\csname#2indfile\endcsname -\expandafter\let\csname#1indfile\endcsname=\synindexfoo -\expandafter\xdef\csname#1index\endcsname{% % Define \xxxindex -\noexpand\docodeindex {#2}}% +\def\syncodeindex#1 #2 {% + \expandafter\let\expandafter\synindexfoo\expandafter=\csname#2indfile\endcsname + \expandafter\closeout\csname#1indfile\endcsname + \expandafter\let\csname#1indfile\endcsname=\synindexfoo + \expandafter\xdef\csname#1index\endcsname{% define \xxxindex + \noexpand\docodeindex{#2}}% } % Define \doindex, the driver for all \fooindex macros. @@ -2294,6 +2643,7 @@ width0pt\relax} \fi \def\singlecodeindexer #1{\doind{\indexname}{\code{#1}}} \def\indexdummies{% +\def\ { }% % Take care of the plain tex accent commands. \def\"{\realbackslash "}% \def\`{\realbackslash `}% @@ -2323,8 +2673,11 @@ width0pt\relax} \fi % (Must be a way to avoid doing expansion at all, and thus not have to % laboriously list every single command here.) \def\@{@}% will be @@ when we switch to @ as escape char. -%\let\{ = \lbracecmd -%\let\} = \rbracecmd +% Need these in case \tex is in effect and \{ is a \delimiter again. +% But can't use \lbracecmd and \rbracecmd because texindex assumes +% braces and backslashes are used only as delimiters. +\let\{ = \mylbrace +\let\} = \myrbrace \def\_{{\realbackslash _}}% \def\w{\realbackslash w }% \def\bf{\realbackslash bf }% @@ -2335,7 +2688,6 @@ width0pt\relax} \fi \def\gtr{\realbackslash gtr}% \def\less{\realbackslash less}% \def\hat{\realbackslash hat}% -%\def\char{\realbackslash char}% \def\TeX{\realbackslash TeX}% \def\dots{\realbackslash dots }% \def\result{\realbackslash result}% @@ -2347,6 +2699,11 @@ width0pt\relax} \fi \def\copyright{\realbackslash copyright}% \def\tclose##1{\realbackslash tclose {##1}}% \def\code##1{\realbackslash code {##1}}% +\def\uref##1{\realbackslash uref {##1}}% +\def\url##1{\realbackslash url {##1}}% +\def\env##1{\realbackslash env {##1}}% +\def\command##1{\realbackslash command {##1}}% +\def\option##1{\realbackslash option {##1}}% \def\dotless##1{\realbackslash dotless {##1}}% \def\samp##1{\realbackslash samp {##1}}% \def\,##1{\realbackslash ,{##1}}% @@ -2362,8 +2719,16 @@ width0pt\relax} \fi \def\kbd##1{\realbackslash kbd {##1}}% \def\dfn##1{\realbackslash dfn {##1}}% \def\emph##1{\realbackslash emph {##1}}% -\def\value##1{\realbackslash value {##1}}% +\def\acronym##1{\realbackslash acronym {##1}}% +% +% Handle some cases of @value -- where the variable name does not +% contain - or _, and the value does not contain any +% (non-fully-expandable) commands. +\let\value = \expandablevalue +% \unsepspaces +% Turn off macro expansion +\turnoffmacros } % If an index command is used in an @example environment, any spaces @@ -2420,6 +2785,12 @@ width0pt\relax} \fi %\let\tt=\indexdummyfont \let\tclose=\indexdummyfont \let\code=\indexdummyfont +\let\url=\indexdummyfont +\let\uref=\indexdummyfont +\let\env=\indexdummyfont +\let\acronym=\indexdummyfont +\let\command=\indexdummyfont +\let\option=\indexdummyfont \let\file=\indexdummyfont \let\samp=\indexdummyfont \let\kbd=\indexdummyfont @@ -2435,14 +2806,24 @@ width0pt\relax} \fi % so we do not become unable to do a definition. {\catcode`\@=0 \catcode`\\=\other -@gdef@realbackslash{\}} + @gdef@realbackslash{\}} \let\indexbackslash=0 %overridden during \printindex. +\let\SETmarginindex=\relax % put index entries in margin (undocumented)? + +% For \ifx comparisons. +\def\emptymacro{\empty} -\let\SETmarginindex=\relax %initialize! -% workhorse for all \fooindexes -% #1 is name of index, #2 is stuff to put there -\def\doind #1#2{% +% Most index entries go through here, but \dosubind is the general case. +% +\def\doind#1#2{\dosubind{#1}{#2}\empty} + +% Workhorse for all \fooindexes. +% #1 is name of index, #2 is stuff to put there, #3 is subentry -- +% \empty if called from \doind, as we usually are. The main exception +% is with defuns, which call us directly. +% +\def\dosubind#1#2#3{% % Put the index entry in the margin if desired. \ifx\SETmarginindex\relax\else \insert\margin{\hbox{\vrule height8pt depth3pt width0pt #2}}% @@ -2453,48 +2834,75 @@ width0pt\relax} \fi \indexdummies % Must do this here, since \bf, etc expand at this stage \escapechar=`\\ {% - \let\folio=0% We will expand all macros now EXCEPT \folio. + \let\folio = 0% We will expand all macros now EXCEPT \folio. \def\rawbackslashxx{\indexbackslash}% \indexbackslash isn't defined now % so it will be output as is; and it will print as backslash. % - % First process the index-string with all font commands turned off - % to get the string to sort by. - {\indexnofonts \xdef\indexsorttmp{#2}}% + \def\thirdarg{#3}% + % + % If third arg is present, precede it with space in sort key. + \ifx\thirdarg\emptymacro + \let\subentry = \empty + \else + \def\subentry{ #3}% + \fi + % + % First process the index entry with all font commands turned + % off to get the string to sort by. + {\indexnofonts \xdef\indexsorttmp{#2\subentry}}% % - % Now produce the complete index entry, with both the sort key and the - % original text, including any font commands. + % Now the real index entry with the fonts. \toks0 = {#2}% + % + % If third (subentry) arg is present, add it to the index + % string. And include a space. + \ifx\thirdarg\emptymacro \else + \toks0 = \expandafter{\the\toks0 \space #3}% + \fi + % + % Set up the complete index entry, with both the sort key + % and the original text, including any font commands. We write + % three arguments to \entry to the .?? file, texindex reduces to + % two when writing the .??s sorted result. \edef\temp{% \write\csname#1indfile\endcsname{% \realbackslash entry{\indexsorttmp}{\folio}{\the\toks0}}% }% - \temp + % + % If a skip is the last thing on the list now, preserve it + % by backing up by \lastskip, doing the \write, then inserting + % the skip again. Otherwise, the whatsit generated by the + % \write will make \lastskip zero. The result is that sequences + % like this: + % @end defun + % @tindex whatever + % @defun ... + % will have extra space inserted, because the \medbreak in the + % start of the @defun won't see the skip inserted by the @end of + % the previous defun. + % + % But don't do any of this if we're not in vertical mode. We + % don't want to do a \vskip and prematurely end a paragraph. + % + % Avoid page breaks due to these extra skips, too. + % + \iflinks + \ifvmode + \skip0 = \lastskip + \ifdim\lastskip = 0pt \else \nobreak\vskip-\lastskip \fi + \fi + % + \temp % do the write + % + % + \ifvmode \ifdim\skip0 = 0pt \else \nobreak\vskip\skip0 \fi \fi + \fi }% }% \penalty\count255 }% } -\def\dosubind #1#2#3{% -{\count10=\lastpenalty % -{\indexdummies % Must do this here, since \bf, etc expand at this stage -\escapechar=`\\% -{\let\folio=0% -\def\rawbackslashxx{\indexbackslash}% -% -% Now process the index-string once, with all font commands turned off, -% to get the string to sort the index by. -{\indexnofonts -\xdef\temp1{#2 #3}% -}% -% Now produce the complete index entry. We process the index-string again, -% this time with font commands expanded, to get what to print in the index. -\edef\temp{% -\write \csname#1indfile\endcsname{% -\realbackslash entry {\temp1}{\folio}{#2}{#3}}}% -\temp }% -}\penalty\count10}} - % The index entry written in the file actually looks like % \entry {sortstring}{page}{topic} % or @@ -2534,7 +2942,7 @@ width0pt\relax} \fi \def\doprintindex#1{\begingroup \dobreak \chapheadingskip{10000}% % - \indexfonts \rm + \smallfonts \rm \tolerance = 9500 \indexbreaks % @@ -2550,7 +2958,7 @@ width0pt\relax} \fi % and it loses the chapter title and the aux file entries for the % index. The easiest way to prevent this problem is to make sure % there is some text. - (Index is nonexistent) + \putwordIndexNonexistent \else % % If the index file exists but is empty, then \openin leaves \ifeof @@ -2558,7 +2966,7 @@ width0pt\relax} \fi % it can discover if there is anything in it. \read 1 to \temp \ifeof 1 - (Index is empty) + \putwordIndexIsEmpty \else % Index files are almost Texinfo source, but we use \ as the escape % character. It would be better to use @, but that's too big a change @@ -2577,21 +2985,35 @@ width0pt\relax} \fi % These macros are used by the sorted index file itself. % Change them to control the appearance of the index. -% Same as \bigskipamount except no shrink. -% \balancecolumns gets confused if there is any shrink. -\newskip\initialskipamount \initialskipamount 12pt plus4pt - -\def\initial #1{% -{\let\tentt=\sectt \let\tt=\sectt \let\sf=\sectt -\ifdim\lastskip<\initialskipamount -\removelastskip \penalty-200 \vskip \initialskipamount\fi -\line{\secbf#1\hfill}\kern 2pt\penalty10000}} +\def\initial#1{{% + % Some minor font changes for the special characters. + \let\tentt=\sectt \let\tt=\sectt \let\sf=\sectt + % + % Remove any glue we may have, we'll be inserting our own. + \removelastskip + % + % We like breaks before the index initials, so insert a bonus. + \penalty -300 + % + % Typeset the initial. Making this add up to a whole number of + % baselineskips increases the chance of the dots lining up from column + % to column. It still won't often be perfect, because of the stretch + % we need before each entry, but it's better. + % + % No shrink because it confuses \balancecolumns. + \vskip 1.67\baselineskip plus .5\baselineskip + \leftline{\secbf #1}% + \vskip .33\baselineskip plus .1\baselineskip + % + % Do our best not to break after the initial. + \nobreak +}} % This typesets a paragraph consisting of #1, dot leaders, and then #2 % flush to the right margin. It is used for index and table of contents % entries. The paragraph is indented by \leftskip. % -\def\entry #1#2{\begingroup +\def\entry#1#2{\begingroup % % Start a new paragraph if necessary, so our assignments below can't % affect previous text. @@ -2614,12 +3036,15 @@ width0pt\relax} \fi % % \hangafter is reset to 1 (which is the value we want) at the start % of each paragraph, so we need not do anything with that. - \hangindent=2em + \hangindent = 2em % % When the entry text needs to be broken, just fill out the first line % with blank space. \rightskip = 0pt plus1fil % + % A bit of stretch before each entry for the benefit of balancing columns. + \vskip 0pt plus1pt + % % Start a ``paragraph'' for the index entry so the line breaking % parameters we've set above will have an effect. \noindent @@ -2644,7 +3069,11 @@ width0pt\relax} \fi % The `\ ' here is removed by the implicit \unskip that TeX does as % part of (the primitive) \par. Without it, a spurious underfull % \hbox ensues. - \ #2% The page number ends the paragraph. + \ifpdf + \pdfgettoks#2.\ \the\toksA % The page number ends the paragraph. + \else + \ #2% The page number ends the paragraph. + \fi \fi% \par \endgroup} @@ -2673,24 +3102,26 @@ width0pt\relax} \fi \def\begindoublecolumns{\begingroup % ended by \enddoublecolumns % Grab any single-column material above us. - \output = {\global\setbox\partialpage = \vbox{% - % + \output = {% + % % Here is a possibility not foreseen in manmac: if we accumulate a % whole lot of material, we might end up calling this \output % routine twice in a row (see the doublecol-lose test, which is % essentially a couple of indexes with @setchapternewpage off). In - % that case, we must prevent the second \partialpage from - % simply overwriting the first, causing us to lose the page. - % This will preserve it until a real output routine can ship it - % out. Generally, \partialpage will be empty when this runs and - % this will be a no-op. - \unvbox\partialpage + % that case we just ship out what is in \partialpage with the normal + % output routine. Generally, \partialpage will be empty when this + % runs and this will be a no-op. See the indexspread.tex test case. + \ifvoid\partialpage \else + \onepageout{\pagecontents\partialpage}% + \fi % - % Unvbox the main output page. - \unvbox255 - \kern-\topskip \kern\baselineskip - }}% - \eject + \global\setbox\partialpage = \vbox{% + % Unvbox the main output page. + \unvbox\PAGE + \kern-\topskip \kern\baselineskip + }% + }% + \eject % run that output routine to set \partialpage % % Use the double-column output routine for subsequent pages. \output = {\doublecolumnout}% @@ -2718,14 +3149,21 @@ width0pt\relax} \fi % % Double the \vsize as well. (We don't need a separate register here, % since nobody clobbers \vsize.) + \advance\vsize by -\ht\partialpage \vsize = 2\vsize } + +% The double-column output routine for all double-column pages except +% the last. +% \def\doublecolumnout{% \splittopskip=\topskip \splitmaxdepth=\maxdepth % Get the available space for the double columns -- the normal % (undoubled) page height minus any material left over from the % previous page. - \dimen@=\pageheight \advance\dimen@ by-\ht\partialpage + \dimen@ = \vsize + \divide\dimen@ by 2 + % % box0 will be the left-hand column, box2 the right. \setbox0=\vsplit255 to\dimen@ \setbox2=\vsplit255 to\dimen@ \onepageout\pagesofar @@ -2734,42 +3172,67 @@ width0pt\relax} \fi } \def\pagesofar{% % Re-output the contents of the output page -- any previous material, - % followed by the two boxes we just split. + % followed by the two boxes we just split, in box0 and box2. \unvbox\partialpage + % \hsize = \doublecolumnhsize - \wd0=\hsize \wd2=\hsize \hbox to\pagewidth{\box0\hfil\box2}% + \wd0=\hsize \wd2=\hsize + \hbox to\pagewidth{\box0\hfil\box2}% } \def\enddoublecolumns{% - \output = {\balancecolumns}\eject % split what we have + \output = {% + % Split the last of the double-column material. Leave it on the + % current page, no automatic page break. + \balancecolumns + % + % If we end up splitting too much material for the current page, + % though, there will be another page break right after this \output + % invocation ends. Having called \balancecolumns once, we do not + % want to call it again. Therefore, reset \output to its normal + % definition right away. (We hope \balancecolumns will never be + % called on to balance too much material, but if it is, this makes + % the output somewhat more palatable.) + \global\output = {\onepageout{\pagecontents\PAGE}}% + }% + \eject \endgroup % started in \begindoublecolumns % - % Back to normal single-column typesetting, but take account of the - % fact that we just accumulated some stuff on the output page. + % \pagegoal was set to the doubled \vsize above, since we restarted + % the current page. We're now back to normal single-column + % typesetting, so reset \pagegoal to the normal \vsize (after the + % \endgroup where \vsize got restored). \pagegoal = \vsize } \def\balancecolumns{% % Called at the end of the double column material. - \setbox0 = \vbox{\unvbox255}% + \setbox0 = \vbox{\unvbox255}% like \box255 but more efficient, see p.120. \dimen@ = \ht0 \advance\dimen@ by \topskip \advance\dimen@ by-\baselineskip - \divide\dimen@ by 2 + \divide\dimen@ by 2 % target to split to + %debug\message{final 2-column material height=\the\ht0, target=\the\dimen@.}% \splittopskip = \topskip % Loop until we get a decent breakpoint. - {\vbadness=10000 \loop - \global\setbox3=\copy0 - \global\setbox1=\vsplit3 to\dimen@ - \ifdim\ht3>\dimen@ \global\advance\dimen@ by1pt - \repeat}% + {% + \vbadness = 10000 + \loop + \global\setbox3 = \copy0 + \global\setbox1 = \vsplit3 to \dimen@ + \ifdim\ht3>\dimen@ + \global\advance\dimen@ by 1pt + \repeat + }% + %debug\message{split to \the\dimen@, column heights: \the\ht1, \the\ht3.}% \setbox0=\vbox to\dimen@{\unvbox1}% \setbox2=\vbox to\dimen@{\unvbox3}% + % \pagesofar } \catcode`\@ = \other \message{sectioning,} -% Define chapters, sections, etc. +% Chapters, sections, etc. \newcount\chapno \newcount\secno \secno=0 @@ -2778,58 +3241,48 @@ width0pt\relax} \fi % This counter is funny since it counts through charcodes of letters A, B, ... \newcount\appendixno \appendixno = `\@ -\def\appendixletter{\char\the\appendixno} - -\newwrite\contentsfile -% This is called from \setfilename. -\def\opencontents{\openout\contentsfile = \jobname.toc } +% \def\appendixletter{\char\the\appendixno} +% We do the following for the sake of pdftex, which needs the actual +% letter in the expansion, not just typeset. +\def\appendixletter{% + \ifnum\appendixno=`A A% + \else\ifnum\appendixno=`B B% + \else\ifnum\appendixno=`C C% + \else\ifnum\appendixno=`D D% + \else\ifnum\appendixno=`E E% + \else\ifnum\appendixno=`F F% + \else\ifnum\appendixno=`G G% + \else\ifnum\appendixno=`H H% + \else\ifnum\appendixno=`I I% + \else\ifnum\appendixno=`J J% + \else\ifnum\appendixno=`K K% + \else\ifnum\appendixno=`L L% + \else\ifnum\appendixno=`M M% + \else\ifnum\appendixno=`N N% + \else\ifnum\appendixno=`O O% + \else\ifnum\appendixno=`P P% + \else\ifnum\appendixno=`Q Q% + \else\ifnum\appendixno=`R R% + \else\ifnum\appendixno=`S S% + \else\ifnum\appendixno=`T T% + \else\ifnum\appendixno=`U U% + \else\ifnum\appendixno=`V V% + \else\ifnum\appendixno=`W W% + \else\ifnum\appendixno=`X X% + \else\ifnum\appendixno=`Y Y% + \else\ifnum\appendixno=`Z Z% + % The \the is necessary, despite appearances, because \appendixletter is + % expanded while writing the .toc file. \char\appendixno is not + % expandable, thus it is written literally, thus all appendixes come out + % with the same letter (or @) in the toc without it. + \else\char\the\appendixno + \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi + \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi} % Each @chapter defines this as the name of the chapter. -% page headings and footings can use it. @section does likewise - -\def\thischapter{} \def\thissection{} -\def\seccheck#1{\ifnum \pageno<0 - \errmessage{@#1 not allowed after generating table of contents}% -\fi} - -\def\chapternofonts{% - \let\rawbackslash=\relax - \let\frenchspacing=\relax - \def\result{\realbackslash result}% - \def\equiv{\realbackslash equiv}% - \def\expansion{\realbackslash expansion}% - \def\print{\realbackslash print}% - \def\TeX{\realbackslash TeX}% - \def\dots{\realbackslash dots}% - \def\result{\realbackslash result}% - \def\equiv{\realbackslash equiv}% - \def\expansion{\realbackslash expansion}% - \def\print{\realbackslash print}% - \def\error{\realbackslash error}% - \def\point{\realbackslash point}% - \def\copyright{\realbackslash copyright}% - \def\tt{\realbackslash tt}% - \def\bf{\realbackslash bf}% - \def\w{\realbackslash w}% - \def\less{\realbackslash less}% - \def\gtr{\realbackslash gtr}% - \def\hat{\realbackslash hat}% - \def\char{\realbackslash char}% - \def\tclose##1{\realbackslash tclose{##1}}% - \def\code##1{\realbackslash code{##1}}% - \def\samp##1{\realbackslash samp{##1}}% - \def\r##1{\realbackslash r{##1}}% - \def\b##1{\realbackslash b{##1}}% - \def\key##1{\realbackslash key{##1}}% - \def\file##1{\realbackslash file{##1}}% - \def\kbd##1{\realbackslash kbd{##1}}% - % These are redefined because @smartitalic wouldn't work inside xdef. - \def\i##1{\realbackslash i{##1}}% - \def\cite##1{\realbackslash cite{##1}}% - \def\var##1{\realbackslash var{##1}}% - \def\emph##1{\realbackslash emph{##1}}% - \def\dfn##1{\realbackslash dfn{##1}}% -} +% page headings and footings can use it. @section does likewise. +\def\thischapter{} +\def\thissection{} \newcount\absseclevel % used to calculate proper heading level \newcount\secbase\secbase=0 % @raise/lowersections modify this count @@ -2901,59 +3354,59 @@ width0pt\relax} \fi \fi } - +% @chapter, @appendix, @unnumbered. \def\thischaptername{No Chapter Title} \outer\def\chapter{\parsearg\chapteryyy} \def\chapteryyy #1{\numhead0{#1}} % normally numhead0 calls chapterzzz -\def\chapterzzz #1{\seccheck{chapter}% +\def\chapterzzz #1{% \secno=0 \subsecno=0 \subsubsecno=0 -\global\advance \chapno by 1 \message{\putwordChapter \the\chapno}% +\global\advance \chapno by 1 \message{\putwordChapter\space \the\chapno}% \chapmacro {#1}{\the\chapno}% \gdef\thissection{#1}% \gdef\thischaptername{#1}% % We don't substitute the actual chapter name into \thischapter % because we don't want its macros evaluated now. \xdef\thischapter{\putwordChapter{} \the\chapno: \noexpand\thischaptername}% -{\chapternofonts% \toks0 = {#1}% -\edef\temp{{\realbackslash chapentry{\the\toks0}{\the\chapno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % +\edef\temp{\noexpand\writetocentry{\realbackslash chapentry{\the\toks0}% + {\the\chapno}}}% +\temp +\donoderef \global\let\section = \numberedsec \global\let\subsection = \numberedsubsec \global\let\subsubsection = \numberedsubsubsec -}} +} \outer\def\appendix{\parsearg\appendixyyy} \def\appendixyyy #1{\apphead0{#1}} % normally apphead0 calls appendixzzz -\def\appendixzzz #1{\seccheck{appendix}% +\def\appendixzzz #1{% \secno=0 \subsecno=0 \subsubsecno=0 -\global\advance \appendixno by 1 \message{Appendix \appendixletter}% +\global\advance \appendixno by 1 +\message{\putwordAppendix\space \appendixletter}% \chapmacro {#1}{\putwordAppendix{} \appendixletter}% \gdef\thissection{#1}% \gdef\thischaptername{#1}% \xdef\thischapter{\putwordAppendix{} \appendixletter: \noexpand\thischaptername}% -{\chapternofonts% \toks0 = {#1}% -\edef\temp{{\realbackslash chapentry{\the\toks0}% - {\putwordAppendix{} \appendixletter}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % +\edef\temp{\noexpand\writetocentry{\realbackslash chapentry{\the\toks0}% + {\putwordAppendix{} \appendixletter}}}% +\temp +\appendixnoderef \global\let\section = \appendixsec \global\let\subsection = \appendixsubsec \global\let\subsubsection = \appendixsubsubsec -}} +} % @centerchap is like @unnumbered, but the heading is centered. \outer\def\centerchap{\parsearg\centerchapyyy} \def\centerchapyyy #1{{\let\unnumbchapmacro=\centerchapmacro \unnumberedyyy{#1}}} +% @top is like @unnumbered. \outer\def\top{\parsearg\unnumberedyyy} + \outer\def\unnumbered{\parsearg\unnumberedyyy} \def\unnumberedyyy #1{\unnmhead0{#1}} % normally unnmhead0 calls unnumberedzzz -\def\unnumberedzzz #1{\seccheck{unnumbered}% +\def\unnumberedzzz #1{% \secno=0 \subsecno=0 \subsubsecno=0 % % This used to be simply \message{#1}, but TeX fully expands the @@ -2965,155 +3418,139 @@ width0pt\relax} \fi % Anyway, we don't want the fully-expanded definition of @cite to appear % as a result of the \message, we just want `@cite' itself. We use % \the<toks register> to achieve this: TeX expands \the<toks> only once, -% simply yielding the contents of the <toks register>. +% simply yielding the contents of <toks register>. (We also do this for +% the toc entries.) \toks0 = {#1}\message{(\the\toks0)}% % \unnumbchapmacro {#1}% \gdef\thischapter{#1}\gdef\thissection{#1}% -{\chapternofonts% \toks0 = {#1}% -\edef\temp{{\realbackslash unnumbchapentry{\the\toks0}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % +\edef\temp{\noexpand\writetocentry{\realbackslash unnumbchapentry{\the\toks0}}}% +\temp +\unnumbnoderef \global\let\section = \unnumberedsec \global\let\subsection = \unnumberedsubsec \global\let\subsubsection = \unnumberedsubsubsec -}} +} +% Sections. \outer\def\numberedsec{\parsearg\secyyy} \def\secyyy #1{\numhead1{#1}} % normally calls seczzz -\def\seczzz #1{\seccheck{section}% +\def\seczzz #1{% \subsecno=0 \subsubsecno=0 \global\advance \secno by 1 % \gdef\thissection{#1}\secheading {#1}{\the\chapno}{\the\secno}% -{\chapternofonts% \toks0 = {#1}% -\edef\temp{{\realbackslash secentry % -{\the\toks0}{\the\chapno}{\the\secno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % -\penalty 10000 % -}} +\edef\temp{\noexpand\writetocentry{\realbackslash secentry{\the\toks0}% + {\the\chapno}{\the\secno}}}% +\temp +\donoderef +\nobreak +} \outer\def\appendixsection{\parsearg\appendixsecyyy} \outer\def\appendixsec{\parsearg\appendixsecyyy} \def\appendixsecyyy #1{\apphead1{#1}} % normally calls appendixsectionzzz -\def\appendixsectionzzz #1{\seccheck{appendixsection}% +\def\appendixsectionzzz #1{% \subsecno=0 \subsubsecno=0 \global\advance \secno by 1 % \gdef\thissection{#1}\secheading {#1}{\appendixletter}{\the\secno}% -{\chapternofonts% \toks0 = {#1}% -\edef\temp{{\realbackslash secentry % -{\the\toks0}{\appendixletter}{\the\secno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % -\penalty 10000 % -}} +\edef\temp{\noexpand\writetocentry{\realbackslash secentry{\the\toks0}% + {\appendixletter}{\the\secno}}}% +\temp +\appendixnoderef +\nobreak +} \outer\def\unnumberedsec{\parsearg\unnumberedsecyyy} \def\unnumberedsecyyy #1{\unnmhead1{#1}} % normally calls unnumberedseczzz -\def\unnumberedseczzz #1{\seccheck{unnumberedsec}% +\def\unnumberedseczzz #1{% \plainsecheading {#1}\gdef\thissection{#1}% -{\chapternofonts% \toks0 = {#1}% -\edef\temp{{\realbackslash unnumbsecentry{\the\toks0}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % -\penalty 10000 % -}} +\edef\temp{\noexpand\writetocentry{\realbackslash unnumbsecentry{\the\toks0}}}% +\temp +\unnumbnoderef +\nobreak +} +% Subsections. \outer\def\numberedsubsec{\parsearg\numberedsubsecyyy} \def\numberedsubsecyyy #1{\numhead2{#1}} % normally calls numberedsubseczzz -\def\numberedsubseczzz #1{\seccheck{subsection}% +\def\numberedsubseczzz #1{% \gdef\thissection{#1}\subsubsecno=0 \global\advance \subsecno by 1 % \subsecheading {#1}{\the\chapno}{\the\secno}{\the\subsecno}% -{\chapternofonts% \toks0 = {#1}% -\edef\temp{{\realbackslash subsecentry % -{\the\toks0}{\the\chapno}{\the\secno}{\the\subsecno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % -\penalty 10000 % -}} +\edef\temp{\noexpand\writetocentry{\realbackslash subsecentry{\the\toks0}% + {\the\chapno}{\the\secno}{\the\subsecno}}}% +\temp +\donoderef +\nobreak +} \outer\def\appendixsubsec{\parsearg\appendixsubsecyyy} \def\appendixsubsecyyy #1{\apphead2{#1}} % normally calls appendixsubseczzz -\def\appendixsubseczzz #1{\seccheck{appendixsubsec}% +\def\appendixsubseczzz #1{% \gdef\thissection{#1}\subsubsecno=0 \global\advance \subsecno by 1 % \subsecheading {#1}{\appendixletter}{\the\secno}{\the\subsecno}% -{\chapternofonts% \toks0 = {#1}% -\edef\temp{{\realbackslash subsecentry % -{\the\toks0}{\appendixletter}{\the\secno}{\the\subsecno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % -\penalty 10000 % -}} +\edef\temp{\noexpand\writetocentry{\realbackslash subsecentry{\the\toks0}% + {\appendixletter}{\the\secno}{\the\subsecno}}}% +\temp +\appendixnoderef +\nobreak +} \outer\def\unnumberedsubsec{\parsearg\unnumberedsubsecyyy} \def\unnumberedsubsecyyy #1{\unnmhead2{#1}} %normally calls unnumberedsubseczzz -\def\unnumberedsubseczzz #1{\seccheck{unnumberedsubsec}% +\def\unnumberedsubseczzz #1{% \plainsubsecheading {#1}\gdef\thissection{#1}% -{\chapternofonts% \toks0 = {#1}% -\edef\temp{{\realbackslash unnumbsubsecentry{\the\toks0}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % -\penalty 10000 % -}} +\edef\temp{\noexpand\writetocentry{\realbackslash unnumbsubsecentry% + {\the\toks0}}}% +\temp +\unnumbnoderef +\nobreak +} +% Subsubsections. \outer\def\numberedsubsubsec{\parsearg\numberedsubsubsecyyy} \def\numberedsubsubsecyyy #1{\numhead3{#1}} % normally numberedsubsubseczzz -\def\numberedsubsubseczzz #1{\seccheck{subsubsection}% +\def\numberedsubsubseczzz #1{% \gdef\thissection{#1}\global\advance \subsubsecno by 1 % \subsubsecheading {#1} {\the\chapno}{\the\secno}{\the\subsecno}{\the\subsubsecno}% -{\chapternofonts% \toks0 = {#1}% -\edef\temp{{\realbackslash subsubsecentry{\the\toks0} - {\the\chapno}{\the\secno}{\the\subsecno}{\the\subsubsecno} - {\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\donoderef % -\penalty 10000 % -}} +\edef\temp{\noexpand\writetocentry{\realbackslash subsubsecentry{\the\toks0}% + {\the\chapno}{\the\secno}{\the\subsecno}{\the\subsubsecno}}}% +\temp +\donoderef +\nobreak +} \outer\def\appendixsubsubsec{\parsearg\appendixsubsubsecyyy} \def\appendixsubsubsecyyy #1{\apphead3{#1}} % normally appendixsubsubseczzz -\def\appendixsubsubseczzz #1{\seccheck{appendixsubsubsec}% +\def\appendixsubsubseczzz #1{% \gdef\thissection{#1}\global\advance \subsubsecno by 1 % \subsubsecheading {#1} {\appendixletter}{\the\secno}{\the\subsecno}{\the\subsubsecno}% -{\chapternofonts% \toks0 = {#1}% -\edef\temp{{\realbackslash subsubsecentry{\the\toks0}% - {\appendixletter} - {\the\secno}{\the\subsecno}{\the\subsubsecno}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\appendixnoderef % -\penalty 10000 % -}} +\edef\temp{\noexpand\writetocentry{\realbackslash subsubsecentry{\the\toks0}% + {\appendixletter}{\the\secno}{\the\subsecno}{\the\subsubsecno}}}% +\temp +\appendixnoderef +\nobreak +} \outer\def\unnumberedsubsubsec{\parsearg\unnumberedsubsubsecyyy} \def\unnumberedsubsubsecyyy #1{\unnmhead3{#1}} %normally unnumberedsubsubseczzz -\def\unnumberedsubsubseczzz #1{\seccheck{unnumberedsubsubsec}% +\def\unnumberedsubsubseczzz #1{% \plainsubsubsecheading {#1}\gdef\thissection{#1}% -{\chapternofonts% \toks0 = {#1}% -\edef\temp{{\realbackslash unnumbsubsubsecentry{\the\toks0}{\noexpand\folio}}}% -\escapechar=`\\% -\write \contentsfile \temp % -\unnumbnoderef % -\penalty 10000 % -}} +\edef\temp{\noexpand\writetocentry{\realbackslash unnumbsubsubsecentry% + {\the\toks0}}}% +\temp +\unnumbnoderef +\nobreak +} % These are variants which are not "outer", so they can appear in @ifinfo. % Actually, they should now be obsolete; ordinary section commands should work. @@ -3142,8 +3579,7 @@ width0pt\relax} \fi % Define @majorheading, @heading and @subheading -% NOTE on use of \vbox for chapter headings, section headings, and -% such: +% NOTE on use of \vbox for chapter headings, section headings, and such: % 1) We use \vbox rather than the earlier \line to permit % overlong headings to fold. % 2) \hyphenpenalty is set to 10000 because hyphenation in a @@ -3190,12 +3626,12 @@ width0pt\relax} \fi \def\setchapternewpage #1 {\csname CHAPPAG#1\endcsname} -\def\CHAPPAGoff{ +\def\CHAPPAGoff{% \global\let\contentsalignmacro = \chappager \global\let\pchapsepmacro=\chapbreak \global\let\pagealignmacro=\chappager} -\def\CHAPPAGon{ +\def\CHAPPAGon{% \global\let\contentsalignmacro = \chappager \global\let\pchapsepmacro=\chappager \global\let\pagealignmacro=\chappager @@ -3249,7 +3685,7 @@ width0pt\relax} \fi \def\unnchfopen #1{% \chapoddpage {\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\penalty 10000 % + \rm #1\hfill}}\bigskip \par\nobreak } \def\chfopen #1#2{\chapoddpage {\chapfonts @@ -3260,7 +3696,7 @@ width0pt\relax} \fi \def\centerchfopen #1{% \chapoddpage {\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 \parindent=0pt - \hfill {\rm #1}\hfill}}\bigskip \par\penalty 10000 % + \hfill {\rm #1}\hfill}}\bigskip \par\nobreak } \def\CHAPFopen{ @@ -3313,24 +3749,45 @@ width0pt\relax} \fi } -\message{toc printing,} -% Finish up the main text and prepare to read what we've written -% to \contentsfile. +\message{toc,} +% Table of contents. +\newwrite\tocfile + +% Write an entry to the toc file, opening it if necessary. +% Called from @chapter, etc. We supply {\folio} at the end of the +% argument, which will end up as the last argument to the \...entry macro. +% +% We open the .toc file here instead of at @setfilename or any other +% given time so that @contents can be put in the document anywhere. +% +\newif\iftocfileopened +\def\writetocentry#1{% + \iftocfileopened\else + \immediate\openout\tocfile = \jobname.toc + \global\tocfileopenedtrue + \fi + \iflinks \write\tocfile{#1{\folio}}\fi +} \newskip\contentsrightmargin \contentsrightmargin=1in +\newcount\savepageno +\newcount\lastnegativepageno \lastnegativepageno = -1 + +% Finish up the main text and prepare to read what we've written +% to \tocfile. +% \def\startcontents#1{% % If @setchapternewpage on, and @headings double, the contents should % start on an odd page, unlike chapters. Thus, we maintain % \contentsalignmacro in parallel with \pagealignmacro. % From: Torbjorn Granlund <tege@matematik.su.se> \contentsalignmacro - \immediate\closeout \contentsfile - \ifnum \pageno>0 - \pageno = -1 % Request roman numbered pages. - \fi + \immediate\closeout\tocfile + % % Don't need to put `Contents' or `Short Contents' in the headline. % It is abundantly clear what they are. \unnumbchapmacro{#1}\def\thischapter{}% + \savepageno = \pageno \begingroup % Set up to handle contents files properly. \catcode`\\=0 \catcode`\{=1 \catcode`\}=2 \catcode`\@=11 % We can't do this, because then an actual ^ in a section @@ -3338,20 +3795,31 @@ width0pt\relax} \fi %\catcode`\^=7 % to see ^^e4 as \"a etc. juha@piuha.ydi.vtt.fi \raggedbottom % Worry more about breakpoints than the bottom. \advance\hsize by -\contentsrightmargin % Don't use the full line length. + % + % Roman numerals for page numbers. + \ifnum \pageno>0 \pageno = \lastnegativepageno \fi } % Normal (long) toc. -\outer\def\contents{% - \startcontents{\putwordTableofContents}% - \input \jobname.toc +\def\contents{% + \startcontents{\putwordTOC}% + \openin 1 \jobname.toc + \ifeof 1 \else + \closein 1 + \input \jobname.toc + \fi + \vfill \eject + \contentsalignmacro % in case @setchapternewpage odd is in effect + \pdfmakeoutlines \endgroup - \vfill \eject + \lastnegativepageno = \pageno + \pageno = \savepageno } % And just the chapters. -\outer\def\summarycontents{% - \startcontents{\putwordShortContents}% +\def\summarycontents{% + \startcontents{\putwordShortTOC}% % \let\chapentry = \shortchapentry \let\unnumbchapentry = \shortunnumberedentry @@ -3367,12 +3835,23 @@ width0pt\relax} \fi \def\unnumbsubsecentry ##1##2{} \def\subsubsecentry ##1##2##3##4##5##6{} \def\unnumbsubsubsecentry ##1##2{} - \input \jobname.toc + \openin 1 \jobname.toc + \ifeof 1 \else + \closein 1 + \input \jobname.toc + \fi + \vfill \eject + \contentsalignmacro % in case @setchapternewpage odd is in effect \endgroup - \vfill \eject + \lastnegativepageno = \pageno + \pageno = \savepageno } \let\shortcontents = \summarycontents +\ifpdf + \pdfcatalog{/PageMode /UseOutlines}% +\fi + % These macros generate individual entries in the table of contents. % The first argument is the chapter or section name. % The last argument is the page number. @@ -3383,7 +3862,7 @@ width0pt\relax} \fi % See comments in \dochapentry re vbox and related settings \def\shortchapentry#1#2#3{% - \tocentry{\shortchaplabel{#2}\labelspace #1}{\doshortpageno{#3}}% + \tocentry{\shortchaplabel{#2}\labelspace #1}{\doshortpageno\bgroup#3\egroup}% } % Typeset the label for a chapter or appendix for the short contents. @@ -3391,10 +3870,14 @@ width0pt\relax} \fi % We could simplify the code here by writing out an \appendixentry % command in the toc file for appendices, instead of using \chapentry % for both, but it doesn't seem worth it. -\setbox0 = \hbox{\shortcontrm \putwordAppendix } -\newdimen\shortappendixwidth \shortappendixwidth = \wd0 - +% +\newdimen\shortappendixwidth +% \def\shortchaplabel#1{% + % Compute width of word "Appendix", may change with language. + \setbox0 = \hbox{\shortcontrm \putwordAppendix}% + \shortappendixwidth = \wd0 + % % We typeset #1 in a box of constant width, regardless of the text of % #1, so the chapter titles will come out aligned. \setbox0 = \hbox{#1}% @@ -3409,7 +3892,7 @@ width0pt\relax} \fi } \def\unnumbchapentry#1#2{\dochapentry{#1}{#2}} -\def\shortunnumberedentry#1#2{\tocentry{#1}{\doshortpageno{#2}}} +\def\shortunnumberedentry#1#2{\tocentry{#1}{\doshortpageno\bgroup#2\egroup}} % Sections. \def\secentry#1#2#3#4{\dosecentry{#2.#3\labelspace#1}{#4}} @@ -3436,24 +3919,24 @@ width0pt\relax} \fi \penalty-300 \vskip1\baselineskip plus.33\baselineskip minus.25\baselineskip \begingroup \chapentryfonts - \tocentry{#1}{\dopageno{#2}}% + \tocentry{#1}{\dopageno\bgroup#2\egroup}% \endgroup \nobreak\vskip .25\baselineskip plus.1\baselineskip } \def\dosecentry#1#2{\begingroup \secentryfonts \leftskip=\tocindent - \tocentry{#1}{\dopageno{#2}}% + \tocentry{#1}{\dopageno\bgroup#2\egroup}% \endgroup} \def\dosubsecentry#1#2{\begingroup \subsecentryfonts \leftskip=2\tocindent - \tocentry{#1}{\dopageno{#2}}% + \tocentry{#1}{\dopageno\bgroup#2\egroup}% \endgroup} \def\dosubsubsecentry#1#2{\begingroup \subsubsecentryfonts \leftskip=3\tocindent - \tocentry{#1}{\dopageno{#2}}% + \tocentry{#1}{\dopageno\bgroup#2\egroup}% \endgroup} % Final typesetting of a toc entry; we use the same \entry macro as for @@ -3481,6 +3964,7 @@ width0pt\relax} \fi \message{environments,} +% @foo ... @end foo. % Since these characters are used in examples, it should be an even number of % \tt widths. Each \tt character is 1en, so two makes it 1em. @@ -3553,6 +4037,7 @@ width0pt\relax} \fi \let\!=\ptexexclam \let\i=\ptexi \let\{=\ptexlbrace + \let\+=\tabalign \let\}=\ptexrbrace \let\*=\ptexstar \let\t=\ptext @@ -3604,8 +4089,8 @@ width0pt\relax} \fi % \nonarrowing is a flag. If "set", @lisp etc don't narrow margins. \let\nonarrowing=\relax -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% \cartouche: draw rectangle w/rounded corners around argument +% @cartouche ... @end cartouche: draw rectangle w/rounded corners around +% environment contents. \font\circle=lcircle10 \newdimen\circthick \newdimen\cartouter\newdimen\cartinner @@ -3632,9 +4117,9 @@ width0pt\relax} \fi \cartinner=\hsize \advance\cartinner by-\lskip \advance\cartinner by-\rskip \cartouter=\hsize - \advance\cartouter by 18pt % allow for 3pt kerns on either + \advance\cartouter by 18.4pt % allow for 3pt kerns on either % side, and for 6pt waste from -% each corner char +% each corner char, and rule thickness \normbskip=\baselineskip \normpskip=\parskip \normlskip=\lineskip % Flag to tell @lisp, etc., not to narrow margin. \let\nonarrowing=\comment @@ -3688,49 +4173,52 @@ width0pt\relax} \fi \fi } -% To ending an @example-like environment, we first end the paragraph -% (via \afterenvbreak's vertical glue), and then the group. That way we -% keep the zero \parskip that the environments set -- \parskip glue -% will be inserted at the beginning of the next paragraph in the -% document, after the environment. +% Define the \E... control sequence only if we are inside the particular +% environment, so the error checking in \end will work. +% +% To end an @example-like environment, we first end the paragraph (via +% \afterenvbreak's vertical glue), and then the group. That way we keep +% the zero \parskip that the environments set -- \parskip glue will be +% inserted at the beginning of the next paragraph in the document, after +% the environment. % -\def\nonfillfinish{\afterenvbreak\endgroup}% +\def\nonfillfinish{\afterenvbreak\endgroup} +% @lisp: indented, narrowed, typewriter font. \def\lisp{\begingroup \nonfillstart \let\Elisp = \nonfillfinish \tt - % Make @kbd do something special, if requested. - \let\kbdfont\kbdexamplefont - \rawbackslash % have \ input char produce \ char from current font - \gobble + \let\kbdfont = \kbdexamplefont % Allow @kbd to do something special. + \gobble % eat return } -% Define the \E... control sequence only if we are inside the -% environment, so the error checking in \end will work. -% -% We must call \lisp last in the definition, since it reads the -% return following the @example (or whatever) command. -% +% @example: Same as @lisp. \def\example{\begingroup \def\Eexample{\nonfillfinish\endgroup}\lisp} -\def\smallexample{\begingroup \def\Esmallexample{\nonfillfinish\endgroup}\lisp} -\def\smalllisp{\begingroup \def\Esmalllisp{\nonfillfinish\endgroup}\lisp} -% @smallexample and @smalllisp. This is not used unless the @smallbook -% command is given. Originally contributed by Pavel@xerox. +% @small... is usually equivalent to the non-small (@smallbook +% redefines). We must call \example (or whatever) last in the +% definition, since it reads the return following the @example (or +% whatever) command. % +% This actually allows (for example) @end display inside an +% @smalldisplay. Too bad, but makeinfo will catch the error anyway. +% +\def\smalldisplay{\begingroup\def\Esmalldisplay{\nonfillfinish\endgroup}\display} +\def\smallexample{\begingroup\def\Esmallexample{\nonfillfinish\endgroup}\lisp} +\def\smallformat{\begingroup\def\Esmallformat{\nonfillfinish\endgroup}\format} +\def\smalllisp{\begingroup\def\Esmalllisp{\nonfillfinish\endgroup}\lisp} + +% Real @smallexample and @smalllisp (when @smallbook): use smaller fonts. +% Originally contributed by Pavel@xerox. \def\smalllispx{\begingroup - \nonfillstart - \let\Esmalllisp = \nonfillfinish - \let\Esmallexample = \nonfillfinish - % - % Smaller fonts for small examples. - \indexfonts \tt - \rawbackslash % make \ output the \ character from the current font (tt) - \gobble + \def\Esmalllisp{\nonfillfinish\endgroup}% + \def\Esmallexample{\nonfillfinish\endgroup}% + \smallfonts + \lisp } -% This is @display; same as @lisp except use roman font. +% @display: same as @lisp except keep current font. % \def\display{\begingroup \nonfillstart @@ -3738,7 +4226,15 @@ width0pt\relax} \fi \gobble } -% This is @format; same as @display except don't narrow margins. +% @smalldisplay (when @smallbook): @display plus smaller fonts. +% +\def\smalldisplayx{\begingroup + \def\Esmalldisplay{\nonfillfinish\endgroup}% + \smallfonts \rm + \display +} + +% @format: same as @display except don't narrow margins. % \def\format{\begingroup \let\nonarrowing = t @@ -3747,20 +4243,27 @@ width0pt\relax} \fi \gobble } -% @flushleft (same as @format) and @flushright. +% @smallformat (when @smallbook): @format plus smaller fonts. % -\def\flushleft{\begingroup - \let\nonarrowing = t - \nonfillstart - \let\Eflushleft = \nonfillfinish - \gobble +\def\smallformatx{\begingroup + \def\Esmallformat{\nonfillfinish\endgroup}% + \smallfonts \rm + \format } + +% @flushleft (same as @format). +% +\def\flushleft{\begingroup \def\Eflushleft{\nonfillfinish\endgroup}\format} + +% @flushright. +% \def\flushright{\begingroup \let\nonarrowing = t \nonfillstart \let\Eflushright = \nonfillfinish \advance\leftskip by 0pt plus 1fill - \gobble} + \gobble +} % @quotation does normal linebreaking (hence we can't use \nonfillstart) % and narrows the margins. @@ -3783,9 +4286,11 @@ width0pt\relax} \fi \fi } + \message{defuns,} -% Define formatter for defuns -% First, allow user to change definition object font (\df) internally +% @defun etc. + +% Allow user to change definition object font (\df) internally \def\setdeffont #1 {\csname DEF#1\endcsname} \newskip\defbodyindent \defbodyindent=.4in @@ -3839,10 +4344,16 @@ width0pt\relax} \fi %% contained text. This is especially needed for [ and ] \def\opnr{{\sf\char`\(}\global\advance\parencount by 1 } \def\clnr{{\sf\char`\)}\global\advance\parencount by -1 } -\def\ampnr{\&} +\let\ampnr = \& \def\lbrb{{\bf\char`\[}} \def\rbrb{{\bf\char`\]}} +% Active &'s sneak into the index arguments, so make sure it's defined. +{ + \catcode`& = 13 + \global\let& = \ampnr +} + % First, defname, which formats the header line itself. % #1 should be the function name. % #2 should be the type of definition, such as "Function". @@ -3852,20 +4363,18 @@ width0pt\relax} \fi % outside the @def... \dimen2=\leftskip \advance\dimen2 by -\defbodyindent -\dimen3=\rightskip -\advance\dimen3 by -\defbodyindent -\noindent % +\noindent \setbox0=\hbox{\hskip \deflastargmargin{\rm #2}\hskip \deftypemargin}% \dimen0=\hsize \advance \dimen0 by -\wd0 % compute size for first line \dimen1=\hsize \advance \dimen1 by -\defargsindent %size for continuations -\parshape 2 0in \dimen0 \defargsindent \dimen1 % +\parshape 2 0in \dimen0 \defargsindent \dimen1 % Now output arg 2 ("Function" or some such) % ending at \deftypemargin from the right margin, % but stuck inside a box of width 0 so it does not interfere with linebreaking {% Adjust \hsize to exclude the ambient margins, % so that \rightline will obey them. -\advance \hsize by -\dimen2 \advance \hsize by -\dimen3 -\rlap{\rightline{{\rm #2}\hskip \deftypemargin}}}% +\advance \hsize by -\dimen2 +\rlap{\rightline{{\rm #2}\hskip -1.25pc }}}% % Make all lines underfull and no complaints: \tolerance=10000 \hbadness=10000 \advance\leftskip by -\defbodyindent @@ -3886,23 +4395,62 @@ width0pt\relax} \fi \def#1{\endgraf\endgroup\medbreak}% \def#2{\begingroup\obeylines\activeparens\spacesplit#3}% \parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent +\advance\leftskip by \defbodyindent \exdentamount=\defbodyindent \begingroup % \catcode 61=\active % 61 is `=' \obeylines\activeparens\spacesplit#3} -\def\defmethparsebody #1#2#3#4 {\begingroup\inENV % +% #1 is the \E... control sequence to end the definition (which we define). +% #2 is the \...x control sequence for consecutive fns (which we define). +% #3 is the control sequence to call to resume processing. +% #4, delimited by the space, is the class name. +% +\def\defmethparsebody#1#2#3#4 {\begingroup\inENV % \medbreak % % Define the end token that this defining construct specifies % so that it will exit this group. \def#1{\endgraf\endgroup\medbreak}% \def#2##1 {\begingroup\obeylines\activeparens\spacesplit{#3{##1}}}% \parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent +\advance\leftskip by \defbodyindent \exdentamount=\defbodyindent \begingroup\obeylines\activeparens\spacesplit{#3{#4}}} +% Used for @deftypemethod and @deftypeivar. +% #1 is the \E... control sequence to end the definition (which we define). +% #2 is the \...x control sequence for consecutive fns (which we define). +% #3 is the control sequence to call to resume processing. +% #4, delimited by a space, is the class name. +% #5 is the method's return type. +% +\def\deftypemethparsebody#1#2#3#4 #5 {\begingroup\inENV + \medbreak + \def#1{\endgraf\endgroup\medbreak}% + \def#2##1 ##2 {\begingroup\obeylines\activeparens\spacesplit{#3{##1}{##2}}}% + \parindent=0in + \advance\leftskip by \defbodyindent + \exdentamount=\defbodyindent + \begingroup\obeylines\activeparens\spacesplit{#3{#4}{#5}}} + +% Used for @deftypeop. The change from \deftypemethparsebody is an +% extra argument at the beginning which is the `category', instead of it +% being the hardwired string `Method' or `Instance Variable'. We have +% to account for this both in the \...x definition and in parsing the +% input at hand. Thus also need a control sequence (passed as #5) for +% the \E... definition to assign the category name to. +% +\def\deftypeopparsebody#1#2#3#4#5 #6 {\begingroup\inENV + \medbreak + \def#1{\endgraf\endgroup\medbreak}% + \def#2##1 ##2 ##3 {% + \def#4{##1}% + \begingroup\obeylines\activeparens\spacesplit{#3{##2}{##3}}}% + \parindent=0in + \advance\leftskip by \defbodyindent + \exdentamount=\defbodyindent + \begingroup\obeylines\activeparens\spacesplit{#3{#5}{#6}}} + \def\defopparsebody #1#2#3#4#5 {\begingroup\inENV % \medbreak % % Define the end token that this defining construct specifies @@ -3911,7 +4459,7 @@ width0pt\relax} \fi \def#2##1 ##2 {\def#4{##1}% \begingroup\obeylines\activeparens\spacesplit{#3{##2}}}% \parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent +\advance\leftskip by \defbodyindent \exdentamount=\defbodyindent \begingroup\obeylines\activeparens\spacesplit{#3{#5}}} @@ -3926,7 +4474,7 @@ width0pt\relax} \fi \def#1{\endgraf\endgroup\medbreak}% \def#2{\begingroup\obeylines\spacesplit#3}% \parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent +\advance\leftskip by \defbodyindent \exdentamount=\defbodyindent \begingroup % \catcode 61=\active % @@ -3943,7 +4491,7 @@ width0pt\relax} \fi \def#1{\endgraf\endgroup\medbreak}% \def#2##1 {\begingroup\obeylines\spacesplit{#3{##1}}}% \parindent=0in - \advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent + \advance\leftskip by \defbodyindent \exdentamount=\defbodyindent \begingroup\obeylines } @@ -3988,7 +4536,7 @@ width0pt\relax} \fi \def#2##1 ##2 {\def#4{##1}% \begingroup\obeylines\spacesplit{#3{##2}}}% \parindent=0in -\advance\leftskip by \defbodyindent \advance \rightskip by \defbodyindent +\advance\leftskip by \defbodyindent \exdentamount=\defbodyindent \begingroup\obeylines\spacesplit{#3{#5}}} @@ -4012,16 +4560,17 @@ width0pt\relax} \fi % First, define the processing that is wanted for arguments of \defun % Use this to expand the args and terminate the paragraph they make up -\def\defunargs #1{\functionparens \sl +\def\defunargs#1{\functionparens \sl % Expand, preventing hyphenation at `-' chars. % Note that groups don't affect changes in \hyphenchar. -\hyphenchar\tensl=0 +% Set the font temporarily and use \font in case \setfont made \tensl a macro. +{\tensl\hyphenchar\font=0}% #1% -\hyphenchar\tensl=45 +{\tensl\hyphenchar\font=45}% \ifnum\parencount=0 \else \errmessage{Unbalanced parentheses in @def}\fi% \interlinepenalty=10000 \advance\rightskip by 0pt plus 1fil -\endgraf\penalty 10000\vskip -\parskip\penalty 10000% +\endgraf\nobreak\vskip -\parskip\nobreak } \def\deftypefunargs #1{% @@ -4032,7 +4581,7 @@ width0pt\relax} \fi \tclose{#1}% avoid \code because of side effects on active chars \interlinepenalty=10000 \advance\rightskip by 0pt plus 1fil -\endgraf\penalty 10000\vskip -\parskip\penalty 10000% +\endgraf\nobreak\vskip -\parskip\nobreak } % Do complete processing of one @defun or @defunx line already parsed. @@ -4051,7 +4600,7 @@ width0pt\relax} \fi \def\defun{\defparsebody\Edefun\defunx\defunheader} \def\defunheader #1#2{\doind {fn}{\code{#1}}% Make entry in function index -\begingroup\defname {#1}{Function}% +\begingroup\defname {#1}{\putwordDeffunc}% \defunargs {#2}\endgroup % \catcode 61=\other % Turn off change made in \defparsebody } @@ -4065,7 +4614,7 @@ width0pt\relax} \fi % #1 is the data type, #2 the name, #3 the args. \def\deftypefunheaderx #1#2 #3\relax{% \doind {fn}{\code{#2}}% Make entry in function index -\begingroup\defname {\defheaderxcond#1\relax$$$#2}{Function}% +\begingroup\defname {\defheaderxcond#1\relax$$$#2}{\putwordDeftypefun}% \deftypefunargs {#3}\endgroup % \catcode 61=\other % Turn off change made in \defparsebody } @@ -4096,7 +4645,7 @@ width0pt\relax} \fi \def\defmac{\defparsebody\Edefmac\defmacx\defmacheader} \def\defmacheader #1#2{\doind {fn}{\code{#1}}% Make entry in function index -\begingroup\defname {#1}{Macro}% +\begingroup\defname {#1}{\putwordDefmac}% \defunargs {#2}\endgroup % \catcode 61=\other % Turn off change made in \defparsebody } @@ -4106,53 +4655,77 @@ width0pt\relax} \fi \def\defspec{\defparsebody\Edefspec\defspecx\defspecheader} \def\defspecheader #1#2{\doind {fn}{\code{#1}}% Make entry in function index -\begingroup\defname {#1}{Special Form}% +\begingroup\defname {#1}{\putwordDefspec}% \defunargs {#2}\endgroup % \catcode 61=\other % Turn off change made in \defparsebody } -% This definition is run if you use @defunx -% anywhere other than immediately after a @defun or @defunx. - -\def\deffnx #1 {\errmessage{@deffnx in invalid context}} -\def\defunx #1 {\errmessage{@defunx in invalid context}} -\def\defmacx #1 {\errmessage{@defmacx in invalid context}} -\def\defspecx #1 {\errmessage{@defspecx in invalid context}} -\def\deftypefnx #1 {\errmessage{@deftypefnx in invalid context}} -\def\deftypemethodx #1 {\errmessage{@deftypemethodx in invalid context}} -\def\deftypeunx #1 {\errmessage{@deftypeunx in invalid context}} - -% @defmethod, and so on - -% @defop {Funny Method} foo-class frobnicate argument - +% @defop CATEGORY CLASS OPERATION ARG... +% \def\defop #1 {\def\defoptype{#1}% \defopparsebody\Edefop\defopx\defopheader\defoptype} - -\def\defopheader #1#2#3{% -\dosubind {fn}{\code{#2}}{on #1}% Make entry in function index -\begingroup\defname {#2}{\defoptype{} on #1}% +% +\def\defopheader#1#2#3{% +\dosubind {fn}{\code{#2}}{\putwordon\ #1}% Make entry in function index +\begingroup\defname {#2}{\defoptype\ \putwordon\ #1}% \defunargs {#3}\endgroup % } -% @deftypemethod foo-class return-type foo-method args +% @deftypeop CATEGORY CLASS TYPE OPERATION ARG... +% +\def\deftypeop #1 {\def\deftypeopcategory{#1}% + \deftypeopparsebody\Edeftypeop\deftypeopx\deftypeopheader + \deftypeopcategory} +% +% #1 is the class name, #2 the data type, #3 the operation name, #4 the args. +\def\deftypeopheader#1#2#3#4{% + \dosubind{fn}{\code{#3}}{\putwordon\ \code{#1}}% entry in function index + \begingroup + \defname{\defheaderxcond#2\relax$$$#3} + {\deftypeopcategory\ \putwordon\ \code{#1}}% + \deftypefunargs{#4}% + \endgroup +} + +% @deftypemethod CLASS TYPE METHOD ARG... % \def\deftypemethod{% - \defmethparsebody\Edeftypemethod\deftypemethodx\deftypemethodheader} + \deftypemethparsebody\Edeftypemethod\deftypemethodx\deftypemethodheader} % % #1 is the class name, #2 the data type, #3 the method name, #4 the args. \def\deftypemethodheader#1#2#3#4{% - \deftypefnheaderx{Method on #1}{#2}#3 #4\relax + \dosubind{fn}{\code{#3}}{\putwordon\ \code{#1}}% entry in function index + \begingroup + \defname{\defheaderxcond#2\relax$$$#3}{\putwordMethodon\ \code{#1}}% + \deftypefunargs{#4}% + \endgroup } -% @defmethod == @defop Method +% @deftypeivar CLASS TYPE VARNAME +% +\def\deftypeivar{% + \deftypemethparsebody\Edeftypeivar\deftypeivarx\deftypeivarheader} +% +% #1 is the class name, #2 the data type, #3 the variable name. +\def\deftypeivarheader#1#2#3{% + \dosubind{vr}{\code{#3}}{\putwordof\ \code{#1}}% entry in variable index + \begingroup + \defname{#3}{\putwordInstanceVariableof\ \code{#1}}% + \defvarargs{#3}% + \endgroup +} +% @defmethod == @defop Method +% \def\defmethod{\defmethparsebody\Edefmethod\defmethodx\defmethodheader} - -\def\defmethodheader #1#2#3{% -\dosubind {fn}{\code{#2}}{on #1}% entry in function index -\begingroup\defname {#2}{Method on #1}% -\defunargs {#3}\endgroup % +% +% #1 is the class name, #2 the method name, #3 the args. +\def\defmethodheader#1#2#3{% + \dosubind{fn}{\code{#2}}{\putwordon\ \code{#1}}% entry in function index + \begingroup + \defname{#2}{\putwordMethodon\ \code{#1}}% + \defunargs{#3}% + \endgroup } % @defcv {Class Option} foo-class foo-flag @@ -4161,37 +4734,30 @@ width0pt\relax} \fi \defopvarparsebody\Edefcv\defcvx\defcvarheader\defcvtype} \def\defcvarheader #1#2#3{% -\dosubind {vr}{\code{#2}}{of #1}% Make entry in var index -\begingroup\defname {#2}{\defcvtype{} of #1}% +\dosubind {vr}{\code{#2}}{\putwordof\ #1}% Make entry in var index +\begingroup\defname {#2}{\defcvtype\ \putwordof\ #1}% \defvarargs {#3}\endgroup % } -% @defivar == @defcv {Instance Variable} - +% @defivar CLASS VARNAME == @defcv {Instance Variable} CLASS VARNAME +% \def\defivar{\defvrparsebody\Edefivar\defivarx\defivarheader} - -\def\defivarheader #1#2#3{% -\dosubind {vr}{\code{#2}}{of #1}% Make entry in var index -\begingroup\defname {#2}{Instance Variable of #1}% -\defvarargs {#3}\endgroup % +% +\def\defivarheader#1#2#3{% + \dosubind {vr}{\code{#2}}{\putwordof\ #1}% entry in var index + \begingroup + \defname{#2}{\putwordInstanceVariableof\ #1}% + \defvarargs{#3}% + \endgroup } -% These definitions are run if you use @defmethodx, etc., -% anywhere other than immediately after a @defmethod, etc. - -\def\defopx #1 {\errmessage{@defopx in invalid context}} -\def\defmethodx #1 {\errmessage{@defmethodx in invalid context}} -\def\defcvx #1 {\errmessage{@defcvx in invalid context}} -\def\defivarx #1 {\errmessage{@defivarx in invalid context}} - -% Now @defvar - +% @defvar % First, define the processing that is wanted for arguments of @defvar. % This is actually simple: just print them in roman. % This must expand the args and terminate the paragraph they make up \def\defvarargs #1{\normalparens #1% \interlinepenalty=10000 -\endgraf\penalty 10000\vskip -\parskip\penalty 10000} +\endgraf\nobreak\vskip -\parskip\nobreak} % @defvr Counter foo-count @@ -4205,7 +4771,7 @@ width0pt\relax} \fi \def\defvar{\defvarparsebody\Edefvar\defvarx\defvarheader} \def\defvarheader #1#2{\doind {vr}{\code{#1}}% Make entry in var index -\begingroup\defname {#1}{Variable}% +\begingroup\defname {#1}{\putwordDefvar}% \defvarargs {#2}\endgroup % } @@ -4214,7 +4780,7 @@ width0pt\relax} \fi \def\defopt{\defvarparsebody\Edefopt\defoptx\defoptheader} \def\defoptheader #1#2{\doind {vr}{\code{#1}}% Make entry in var index -\begingroup\defname {#1}{User Option}% +\begingroup\defname {#1}{\putwordDefopt}% \defvarargs {#2}\endgroup % } @@ -4226,9 +4792,9 @@ width0pt\relax} \fi % is actually part of the data type, which should not be put into the index. \def\deftypevarheader #1#2{% \dovarind#2 \relax% Make entry in variables index -\begingroup\defname {\defheaderxcond#1\relax$$$#2}{Variable}% +\begingroup\defname {\defheaderxcond#1\relax$$$#2}{\putwordDeftypevar}% \interlinepenalty=10000 -\endgraf\penalty 10000\vskip -\parskip\penalty 10000 +\endgraf\nobreak\vskip -\parskip\nobreak \endgroup} \def\dovarind#1 #2\relax{\doind{vr}{\code{#1}}} @@ -4239,18 +4805,9 @@ width0pt\relax} \fi \def\deftypevrheader #1#2#3{\dovarind#3 \relax% \begingroup\defname {\defheaderxcond#2\relax$$$#3}{#1} \interlinepenalty=10000 -\endgraf\penalty 10000\vskip -\parskip\penalty 10000 +\endgraf\nobreak\vskip -\parskip\nobreak \endgroup} -% This definition is run if you use @defvarx -% anywhere other than immediately after a @defvar or @defvarx. - -\def\defvrx #1 {\errmessage{@defvrx in invalid context}} -\def\defvarx #1 {\errmessage{@defvarx in invalid context}} -\def\defoptx #1 {\errmessage{@defoptx in invalid context}} -\def\deftypevarx #1 {\errmessage{@deftypevarx in invalid context}} -\def\deftypevrx #1 {\errmessage{@deftypevrx in invalid context}} - % Now define @deftp % Args are printed in bold, a slight difference from @defvar. @@ -4263,51 +4820,394 @@ width0pt\relax} \fi \def\deftpheader #1#2#3{\doind {tp}{\code{#2}}% \begingroup\defname {#2}{#1}\deftpargs{#3}\endgroup} -% This definition is run if you use @deftpx, etc -% anywhere other than immediately after a @deftp, etc. +% These definitions are used if you use @defunx (etc.) +% anywhere other than immediately after a @defun or @defunx. +% +\def\defcvx#1 {\errmessage{@defcvx in invalid context}} +\def\deffnx#1 {\errmessage{@deffnx in invalid context}} +\def\defivarx#1 {\errmessage{@defivarx in invalid context}} +\def\defmacx#1 {\errmessage{@defmacx in invalid context}} +\def\defmethodx#1 {\errmessage{@defmethodx in invalid context}} +\def\defoptx #1 {\errmessage{@defoptx in invalid context}} +\def\defopx#1 {\errmessage{@defopx in invalid context}} +\def\defspecx#1 {\errmessage{@defspecx in invalid context}} +\def\deftpx#1 {\errmessage{@deftpx in invalid context}} +\def\deftypefnx#1 {\errmessage{@deftypefnx in invalid context}} +\def\deftypefunx#1 {\errmessage{@deftypefunx in invalid context}} +\def\deftypeivarx#1 {\errmessage{@deftypeivarx in invalid context}} +\def\deftypemethodx#1 {\errmessage{@deftypemethodx in invalid context}} +\def\deftypeopx#1 {\errmessage{@deftypeopx in invalid context}} +\def\deftypevarx#1 {\errmessage{@deftypevarx in invalid context}} +\def\deftypevrx#1 {\errmessage{@deftypevrx in invalid context}} +\def\defunx#1 {\errmessage{@defunx in invalid context}} +\def\defvarx#1 {\errmessage{@defvarx in invalid context}} +\def\defvrx#1 {\errmessage{@defvrx in invalid context}} + + +\message{macros,} +% @macro. + +% To do this right we need a feature of e-TeX, \scantokens, +% which we arrange to emulate with a temporary file in ordinary TeX. +\ifx\eTeXversion\undefined + \newwrite\macscribble + \def\scanmacro#1{% + \begingroup \newlinechar`\^^M + % Undo catcode changes of \startcontents and \doprintindex + \catcode`\@=0 \catcode`\\=12 \escapechar=`\@ + % Append \endinput to make sure that TeX does not see the ending newline. + \toks0={#1\endinput}% + \immediate\openout\macscribble=\jobname.tmp + \immediate\write\macscribble{\the\toks0}% + \immediate\closeout\macscribble + \let\xeatspaces\eatspaces + \input \jobname.tmp + \endgroup +} +\else +\def\scanmacro#1{% +\begingroup \newlinechar`\^^M +% Undo catcode changes of \startcontents and \doprintindex +\catcode`\@=0 \catcode`\\=12 \escapechar=`\@ +\let\xeatspaces\eatspaces\scantokens{#1\endinput}\endgroup} +\fi + +\newcount\paramno % Count of parameters +\newtoks\macname % Macro name +\newif\ifrecursive % Is it recursive? +\def\macrolist{} % List of all defined macros in the form + % \do\macro1\do\macro2... + +% Utility routines. +% Thisdoes \let #1 = #2, except with \csnames. +\def\cslet#1#2{% +\expandafter\expandafter +\expandafter\let +\expandafter\expandafter +\csname#1\endcsname +\csname#2\endcsname} + +% Trim leading and trailing spaces off a string. +% Concepts from aro-bend problem 15 (see CTAN). +{\catcode`\@=11 +\gdef\eatspaces #1{\expandafter\trim@\expandafter{#1 }} +\gdef\trim@ #1{\trim@@ @#1 @ #1 @ @@} +\gdef\trim@@ #1@ #2@ #3@@{\trim@@@\empty #2 @} +\def\unbrace#1{#1} +\unbrace{\gdef\trim@@@ #1 } #2@{#1} +} + +% Trim a single trailing ^^M off a string. +{\catcode`\^^M=12\catcode`\Q=3% +\gdef\eatcr #1{\eatcra #1Q^^MQ}% +\gdef\eatcra#1^^MQ{\eatcrb#1Q}% +\gdef\eatcrb#1Q#2Q{#1}% +} + +% Macro bodies are absorbed as an argument in a context where +% all characters are catcode 10, 11 or 12, except \ which is active +% (as in normal texinfo). It is necessary to change the definition of \. -\def\deftpx #1 {\errmessage{@deftpx in invalid context}} +% It's necessary to have hard CRs when the macro is executed. This is +% done by making ^^M (\endlinechar) catcode 12 when reading the macro +% body, and then making it the \newlinechar in \scanmacro. + +\def\macrobodyctxt{% + \catcode`\~=12 + \catcode`\^=12 + \catcode`\_=12 + \catcode`\|=12 + \catcode`\<=12 + \catcode`\>=12 + \catcode`\+=12 + \catcode`\{=12 + \catcode`\}=12 + \catcode`\@=12 + \catcode`\^^M=12 + \usembodybackslash} + +\def\macroargctxt{% + \catcode`\~=12 + \catcode`\^=12 + \catcode`\_=12 + \catcode`\|=12 + \catcode`\<=12 + \catcode`\>=12 + \catcode`\+=12 + \catcode`\@=12 + \catcode`\\=12} + +% \mbodybackslash is the definition of \ in @macro bodies. +% It maps \foo\ => \csname macarg.foo\endcsname => #N +% where N is the macro parameter number. +% We define \csname macarg.\endcsname to be \realbackslash, so +% \\ in macro replacement text gets you a backslash. + +{\catcode`@=0 @catcode`@\=@active + @gdef@usembodybackslash{@let\=@mbodybackslash} + @gdef@mbodybackslash#1\{@csname macarg.#1@endcsname} +} +\expandafter\def\csname macarg.\endcsname{\realbackslash} +\def\macro{\recursivefalse\parsearg\macroxxx} +\def\rmacro{\recursivetrue\parsearg\macroxxx} -\message{cross reference,} -% Define cross-reference macros -\newwrite \auxfile +\def\macroxxx#1{% + \getargs{#1}% now \macname is the macname and \argl the arglist + \ifx\argl\empty % no arguments + \paramno=0% + \else + \expandafter\parsemargdef \argl;% + \fi + \if1\csname ismacro.\the\macname\endcsname + \message{Warning: redefining \the\macname}% + \else + \expandafter\ifx\csname \the\macname\endcsname \relax + \else \errmessage{The name \the\macname\space is reserved}\fi + \global\cslet{macsave.\the\macname}{\the\macname}% + \global\expandafter\let\csname ismacro.\the\macname\endcsname=1% + % Add the macroname to \macrolist + \toks0 = \expandafter{\macrolist\do}% + \xdef\macrolist{\the\toks0 + \expandafter\noexpand\csname\the\macname\endcsname}% + \fi + \begingroup \macrobodyctxt + \ifrecursive \expandafter\parsermacbody + \else \expandafter\parsemacbody + \fi} + +\def\unmacro{\parsearg\unmacroxxx} +\def\unmacroxxx#1{% + \if1\csname ismacro.#1\endcsname + \global\cslet{#1}{macsave.#1}% + \global\expandafter\let \csname ismacro.#1\endcsname=0% + % Remove the macro name from \macrolist + \begingroup + \edef\tempa{\expandafter\noexpand\csname#1\endcsname}% + \def\do##1{% + \def\tempb{##1}% + \ifx\tempa\tempb + % remove this + \else + \toks0 = \expandafter{\newmacrolist\do}% + \edef\newmacrolist{\the\toks0\expandafter\noexpand\tempa}% + \fi}% + \def\newmacrolist{}% + % Execute macro list to define \newmacrolist + \macrolist + \global\let\macrolist\newmacrolist + \endgroup + \else + \errmessage{Macro #1 not defined}% + \fi +} + +% This makes use of the obscure feature that if the last token of a +% <parameter list> is #, then the preceding argument is delimited by +% an opening brace, and that opening brace is not consumed. +\def\getargs#1{\getargsxxx#1{}} +\def\getargsxxx#1#{\getmacname #1 \relax\getmacargs} +\def\getmacname #1 #2\relax{\macname={#1}} +\def\getmacargs#1{\def\argl{#1}} + +% Parse the optional {params} list. Set up \paramno and \paramlist +% so \defmacro knows what to do. Define \macarg.blah for each blah +% in the params list, to be ##N where N is the position in that list. +% That gets used by \mbodybackslash (above). + +% We need to get `macro parameter char #' into several definitions. +% The technique used is stolen from LaTeX: let \hash be something +% unexpandable, insert that wherever you need a #, and then redefine +% it to # just before using the token list produced. +% +% The same technique is used to protect \eatspaces till just before +% the macro is used. + +\def\parsemargdef#1;{\paramno=0\def\paramlist{}% + \let\hash\relax\let\xeatspaces\relax\parsemargdefxxx#1,;,} +\def\parsemargdefxxx#1,{% + \if#1;\let\next=\relax + \else \let\next=\parsemargdefxxx + \advance\paramno by 1% + \expandafter\edef\csname macarg.\eatspaces{#1}\endcsname + {\xeatspaces{\hash\the\paramno}}% + \edef\paramlist{\paramlist\hash\the\paramno,}% + \fi\next} + +% These two commands read recursive and nonrecursive macro bodies. +% (They're different since rec and nonrec macros end differently.) + +\long\def\parsemacbody#1@end macro% +{\xdef\temp{\eatcr{#1}}\endgroup\defmacro}% +\long\def\parsermacbody#1@end rmacro% +{\xdef\temp{\eatcr{#1}}\endgroup\defmacro}% + +% This defines the macro itself. There are six cases: recursive and +% nonrecursive macros of zero, one, and many arguments. +% Much magic with \expandafter here. +% \xdef is used so that macro definitions will survive the file +% they're defined in; @include reads the file inside a group. +\def\defmacro{% + \let\hash=##% convert placeholders to macro parameter chars + \ifrecursive + \ifcase\paramno + % 0 + \expandafter\xdef\csname\the\macname\endcsname{% + \noexpand\scanmacro{\temp}}% + \or % 1 + \expandafter\xdef\csname\the\macname\endcsname{% + \bgroup\noexpand\macroargctxt + \noexpand\braceorline + \expandafter\noexpand\csname\the\macname xxx\endcsname}% + \expandafter\xdef\csname\the\macname xxx\endcsname##1{% + \egroup\noexpand\scanmacro{\temp}}% + \else % many + \expandafter\xdef\csname\the\macname\endcsname{% + \bgroup\noexpand\macroargctxt + \noexpand\csname\the\macname xx\endcsname}% + \expandafter\xdef\csname\the\macname xx\endcsname##1{% + \expandafter\noexpand\csname\the\macname xxx\endcsname ##1,}% + \expandafter\expandafter + \expandafter\xdef + \expandafter\expandafter + \csname\the\macname xxx\endcsname + \paramlist{\egroup\noexpand\scanmacro{\temp}}% + \fi + \else + \ifcase\paramno + % 0 + \expandafter\xdef\csname\the\macname\endcsname{% + \noexpand\norecurse{\the\macname}% + \noexpand\scanmacro{\temp}\egroup}% + \or % 1 + \expandafter\xdef\csname\the\macname\endcsname{% + \bgroup\noexpand\macroargctxt + \noexpand\braceorline + \expandafter\noexpand\csname\the\macname xxx\endcsname}% + \expandafter\xdef\csname\the\macname xxx\endcsname##1{% + \egroup + \noexpand\norecurse{\the\macname}% + \noexpand\scanmacro{\temp}\egroup}% + \else % many + \expandafter\xdef\csname\the\macname\endcsname{% + \bgroup\noexpand\macroargctxt + \expandafter\noexpand\csname\the\macname xx\endcsname}% + \expandafter\xdef\csname\the\macname xx\endcsname##1{% + \expandafter\noexpand\csname\the\macname xxx\endcsname ##1,}% + \expandafter\expandafter + \expandafter\xdef + \expandafter\expandafter + \csname\the\macname xxx\endcsname + \paramlist{% + \egroup + \noexpand\norecurse{\the\macname}% + \noexpand\scanmacro{\temp}\egroup}% + \fi + \fi} -\newif\ifhavexrefs % True if xref values are known. +\def\norecurse#1{\bgroup\cslet{#1}{macsave.#1}} + +% \braceorline decides whether the next nonwhitespace character is a +% {. If so it reads up to the closing }, if not, it reads the whole +% line. Whatever was read is then fed to the next control sequence +% as an argument (by \parsebrace or \parsearg) +\def\braceorline#1{\let\next=#1\futurelet\nchar\braceorlinexxx} +\def\braceorlinexxx{% + \ifx\nchar\bgroup\else + \expandafter\parsearg + \fi \next} + +% We mant to disable all macros during \shipout so that they are not +% expanded by \write. +\def\turnoffmacros{\begingroup \def\do##1{\let\noexpand##1=\relax}% + \edef\next{\macrolist}\expandafter\endgroup\next} + + +% @alias. +% We need some trickery to remove the optional spaces around the equal +% sign. Just make them active and then expand them all to nothing. +\def\alias{\begingroup\obeyspaces\parsearg\aliasxxx} +\def\aliasxxx #1{\aliasyyy#1\relax} +\def\aliasyyy #1=#2\relax{\ignoreactivespaces +\edef\next{\global\let\expandafter\noexpand\csname#1\endcsname=% + \expandafter\noexpand\csname#2\endcsname}% +\expandafter\endgroup\next} + + +\message{cross references,} +% @xref etc. + +\newwrite\auxfile + +\newif\ifhavexrefs % True if xref values are known. \newif\ifwarnedxrefs % True if we warned once that they aren't known. -% @inforef is simple. +% @inforef is relatively simple. \def\inforef #1{\inforefzzz #1,,,,**} \def\inforefzzz #1,#2,#3,#4**{\putwordSee{} \putwordInfo{} \putwordfile{} \file{\ignorespaces #3{}}, node \samp{\ignorespaces#1{}}} -% \setref{foo} defines a cross-reference point named foo. +% @node's job is to define \lastnode. +\def\node{\ENVcheck\parsearg\nodezzz} +\def\nodezzz#1{\nodexxx [#1,]} +\def\nodexxx[#1,#2]{\gdef\lastnode{#1}} +\let\nwnode=\node +\let\lastnode=\relax -\def\setref#1{% -\dosetq{#1-title}{Ytitle}% -\dosetq{#1-pg}{Ypagenumber}% -\dosetq{#1-snt}{Ysectionnumberandtype}} +% The sectioning commands (@chapter, etc.) call these. +\def\donoderef{% + \ifx\lastnode\relax\else + \expandafter\expandafter\expandafter\setref{\lastnode}% + {Ysectionnumberandtype}% + \global\let\lastnode=\relax + \fi +} +\def\unnumbnoderef{% + \ifx\lastnode\relax\else + \expandafter\expandafter\expandafter\setref{\lastnode}{Ynothing}% + \global\let\lastnode=\relax + \fi +} +\def\appendixnoderef{% + \ifx\lastnode\relax\else + \expandafter\expandafter\expandafter\setref{\lastnode}% + {Yappendixletterandtype}% + \global\let\lastnode=\relax + \fi +} -\def\unnumbsetref#1{% -\dosetq{#1-title}{Ytitle}% -\dosetq{#1-pg}{Ypagenumber}% -\dosetq{#1-snt}{Ynothing}} -\def\appendixsetref#1{% -\dosetq{#1-title}{Ytitle}% -\dosetq{#1-pg}{Ypagenumber}% -\dosetq{#1-snt}{Yappendixletterandtype}} +% @anchor{NAME} -- define xref target at arbitrary point. +% +\newcount\savesfregister +\gdef\savesf{\relax \ifhmode \savesfregister=\spacefactor \fi} +\gdef\restoresf{\relax \ifhmode \spacefactor=\savesfregister \fi} +\gdef\anchor#1{\savesf \setref{#1}{Ynothing}\restoresf \ignorespaces} + +% \setref{NAME}{SNT} defines a cross-reference point NAME, namely +% NAME-title, NAME-pg, and NAME-SNT. Called from \foonoderef. We have +% to set \indexdummies so commands such as @code in a section title +% aren't expanded. It would be nicer not to expand the titles in the +% first place, but there's so many layers that that is hard to do. +% +\def\setref#1#2{{% + \indexdummies + \pdfmkdest{#1}% + \dosetq{#1-title}{Ytitle}% + \dosetq{#1-pg}{Ypagenumber}% + \dosetq{#1-snt}{#2}% +}} -% \xref, \pxref, and \ref generate cross-references to specified points. -% For \xrefX, #1 is the node name, #2 the name of the Info -% cross-reference, #3 the printed node name, #4 the name of the Info -% file, #5 the name of the printed manual. All but the node name can be -% omitted. +% @xref, @pxref, and @ref generate cross-references. For \xrefX, #1 is +% the node name, #2 the name of the Info cross-reference, #3 the printed +% node name, #4 the name of the Info file, #5 the name of the printed +% manual. All but the node name can be omitted. % \def\pxref#1{\putwordsee{} \xrefX[#1,,,,,,,]} \def\xref#1{\putwordSee{} \xrefX[#1,,,,,,,]} \def\ref#1{\xrefX[#1,,,,,,,]} \def\xrefX[#1,#2,#3,#4,#5,#6]{\begingroup + \unsepspaces \def\printedmanual{\ignorespaces #5}% \def\printednodename{\ignorespaces #3}% \setbox1=\hbox{\printedmanual}% @@ -4320,7 +5220,7 @@ width0pt\relax} \fi \else % Use the actual chapter/section title appear inside % the square brackets. Use the real section title if we have it. - \ifdim \wd1>0pt% + \ifdim \wd1 > 0pt % It is in another manual, so we don't have it. \def\printednodename{\ignorespaces #1}% \else @@ -4341,27 +5241,54 @@ width0pt\relax} \fi % are best written with fairly long node names, containing hyphens, this % is a loss. Therefore, we give the text of the node name again, so it % is as if TeX is seeing it for the first time. + \ifpdf + \leavevmode + \getfilename{#4}% + \ifnum\filenamelength>0 + \startlink attr{/Border [0 0 0]}% + goto file{\the\filename.pdf} name{#1@}% + \else + \startlink attr{/Border [0 0 0]}% + goto name{#1@}% + \fi + \linkcolor + \fi + % \ifdim \wd1 > 0pt - \putwordsection{} ``\printednodename'' in \cite{\printedmanual}% + \putwordsection{} ``\printednodename'' \putwordin{} \cite{\printedmanual}% \else % _ (for example) has to be the character _ for the purposes of the % control sequence corresponding to the node, but it has to expand % into the usual \leavevmode...\vrule stuff for purposes of % printing. So we \turnoffactive for the \refx-snt, back on for the % printing, back off for the \refx-pg. - {\turnoffactive \refx{#1-snt}{}}% - \space [\printednodename],\space + {\normalturnoffactive + % Only output a following space if the -snt ref is nonempty; for + % @unnumbered and @anchor, it won't be. + \setbox2 = \hbox{\ignorespaces \refx{#1-snt}{}}% + \ifdim \wd2 > 0pt \refx{#1-snt}\space\fi + }% + % [mynode], + [\printednodename],\space + % page 3 \turnoffactive \putwordpage\tie\refx{#1-pg}{}% \fi + \endlink \endgroup} % \dosetq is the interface for calls from other macros -% Use \turnoffactive so that punctuation chars such as underscore -% work in node names. -\def\dosetq #1#2{{\let\folio=0 \turnoffactive -\edef\next{\write\auxfile{\internalsetq {#1}{#2}}}% -\next}} +% Use \normalturnoffactive so that punctuation chars such as underscore +% and backslash work in node names. (\turnoffactive doesn't do \.) +\def\dosetq#1#2{% + {\let\folio=0% + \normalturnoffactive + \edef\next{\write\auxfile{\internalsetq{#1}{#2}}}% + \iflinks + \next + \fi + }% +} % \internalsetq {foo}{page} expands into % CHARACTERS 'xrdef {foo}{...expansion of \Ypage...} @@ -4413,12 +5340,14 @@ width0pt\relax} \fi \expandafter\ifx\csname X#1\endcsname\relax % If not defined, say something at least. \angleleft un\-de\-fined\angleright - \ifhavexrefs - \message{\linenumber Undefined cross reference `#1'.}% - \else - \ifwarnedxrefs\else - \global\warnedxrefstrue - \message{Cross reference values unknown; you must run TeX again.}% + \iflinks + \ifhavexrefs + \message{\linenumber Undefined cross reference `#1'.}% + \else + \ifwarnedxrefs\else + \global\warnedxrefstrue + \message{Cross reference values unknown; you must run TeX again.}% + \fi \fi \fi \else @@ -4429,7 +5358,7 @@ width0pt\relax} \fi } % This is the macro invoked by entries in the aux file. -% +% \def\xrdef#1{\begingroup % Reenable \ as an escape while reading the second argument. \catcode`\\ = 0 @@ -4492,8 +5421,7 @@ width0pt\relax} \fi \catcode`\$=\other \catcode`\#=\other \catcode`\&=\other - % `\+ does not work, so use 43. - \catcode43=\other + \catcode`+=\other % avoid \+ for paranoia even though we've turned it off % Make the characters 128-255 be printing characters {% \count 1=128 @@ -4582,6 +5510,8 @@ width0pt\relax} \fi \xspaceskip\z@skip \parindent\defaultparindent % + \smallfonts \rm + % % Hang the footnote text off the number. \hang \textindent{\thisfootno}% @@ -4596,7 +5526,7 @@ width0pt\relax} \fi \else\let\next\f@t\fi \next} \def\f@@t{\bgroup\aftergroup\@foot\let\next} \def\f@t#1{#1\@foot} -\def\@foot{\strut\egroup} +\def\@foot{\strut\par\egroup} }%end \catcode `\@=11 @@ -4655,23 +5585,25 @@ width0pt\relax} \fi % @image. We use the macros from epsf.tex to support this. % If epsf.tex is not installed and @image is used, we complain. -% +% % Check for and read epsf.tex up front. If we read it only at @image % time, we might be inside a group, and then its definitions would get % undone and the next image would fail. \openin 1 = epsf.tex \ifeof 1 \else \closein 1 - \def\epsfannounce{\toks0 = }% do not bother showing banner + % Do not bother showing banner with post-v2.7 epsf.tex (available in + % doc/epsf.tex until it shows up on ctan). + \def\epsfannounce{\toks0 = }% \input epsf.tex \fi % +% We will only complain once about lack of epsf.tex. \newif\ifwarnednoepsf \newhelp\noepsfhelp{epsf.tex must be installed for images to work. It is also included in the Texinfo distribution, or you can get - it from ftp://ftp.tug.org/tex/epsf.tex.} + it from ftp://tug.org/tex/epsf.tex.} % -% Only complain once about lack of epsf.tex. \def\image#1{% \ifx\epsfbox\undefined \ifwarnednoepsf \else @@ -4689,42 +5621,79 @@ width0pt\relax} \fi % #2 is (optional) width, #3 is (optional) height. % #4 is just the usual extra ignored arg for parsing this stuff. \def\imagexxx#1,#2,#3,#4\finish{% - % \epsfbox itself resets \epsf?size at each figure. - \setbox0 = \hbox{\ignorespaces #2}\ifdim\wd0 > 0pt \epsfxsize=#2\relax \fi - \setbox0 = \hbox{\ignorespaces #3}\ifdim\wd0 > 0pt \epsfysize=#3\relax \fi - \epsfbox{#1.eps}% + \ifpdf + \centerline{\dopdfimage{#1}{#2}{#3}}% + \else + % \epsfbox itself resets \epsf?size at each figure. + \setbox0 = \hbox{\ignorespaces #2}\ifdim\wd0 > 0pt \epsfxsize=#2\relax \fi + \setbox0 = \hbox{\ignorespaces #3}\ifdim\wd0 > 0pt \epsfysize=#3\relax \fi + \begingroup + \catcode`\^^M = 5 % in case we're inside an example + % If the image is by itself, center it. + \ifvmode + \nobreak\bigskip + % Usually we'll have text after the image which will insert + % \parskip glue, so insert it here too to equalize the space + % above and below. + \nobreak\vskip\parskip + \nobreak + \centerline{\epsfbox{#1.eps}}% + \bigbreak + \else + % In the middle of a paragraph, no extra space. + \epsfbox{#1.eps}% + \fi + \endgroup + \fi } -% End of control word definitions. - -\message{and turning on texinfo input format.} +\message{localization,} +% and i18n. -\def\openindices{% - \newindex{cp}% - \newcodeindex{fn}% - \newcodeindex{vr}% - \newcodeindex{tp}% - \newcodeindex{ky}% - \newcodeindex{pg}% +% @documentlanguage is usually given very early, just after +% @setfilename. If done too late, it may not override everything +% properly. Single argument is the language abbreviation. +% It would be nice if we could set up a hyphenation file here. +% +\def\documentlanguage{\parsearg\dodocumentlanguage} +\def\dodocumentlanguage#1{% + \tex % read txi-??.tex file in plain TeX. + % Read the file if it exists. + \openin 1 txi-#1.tex + \ifeof1 + \errhelp = \nolanghelp + \errmessage{Cannot read language file txi-#1.tex}% + \let\temp = \relax + \else + \def\temp{\input txi-#1.tex }% + \fi + \temp + \endgroup } +\newhelp\nolanghelp{The given language definition file cannot be found or +is empty. Maybe you need to install it? In the current directory +should work if nowhere else does.} + -% Set some numeric style parameters, for 8.5 x 11 format. +% @documentencoding should change something in TeX eventually, most +% likely, but for now just recognize it. +\let\documentencoding = \comment -\hsize = 6in -\hoffset = .25in + +% Page size parameters. +% \newdimen\defaultparindent \defaultparindent = 15pt -\parindent = \defaultparindent -\parskip 3pt plus 2pt minus 1pt -\setleading{13.2pt} -\advance\topskip by 1.2cm \chapheadingskip = 15pt plus 4pt minus 2pt \secheadingskip = 12pt plus 3pt minus 2pt \subsecheadingskip = 9pt plus 2pt minus 2pt % Prevent underfull vbox error messages. -\vbadness=10000 +\vbadness = 10000 + +% Don't be so finicky about underfull hboxes, either. +\hbadness = 2000 % Following George Bush, just get rid of widows and orphans. \widowpenalty=10000 @@ -4733,101 +5702,125 @@ width0pt\relax} \fi % Use TeX 3.0's \emergencystretch to help line breaking, but if we're % using an old version of TeX, don't do anything. We want the amount of % stretch added to depend on the line length, hence the dependence on -% \hsize. This makes it come to about 9pt for the 8.5x11 format. +% \hsize. We call this whenever the paper size is set. % -\ifx\emergencystretch\thisisundefined - % Allow us to assign to \emergencystretch anyway. - \def\emergencystretch{\dimen0}% -\else - \emergencystretch = \hsize - \divide\emergencystretch by 45 -\fi +\def\setemergencystretch{% + \ifx\emergencystretch\thisisundefined + % Allow us to assign to \emergencystretch anyway. + \def\emergencystretch{\dimen0}% + \else + \emergencystretch = .15\hsize + \fi +} -% Use @smallbook to reset parameters for 7x9.5 format (or else 7x9.25) -\def\smallbook{ - \global\chapheadingskip = 15pt plus 4pt minus 2pt - \global\secheadingskip = 12pt plus 3pt minus 2pt - \global\subsecheadingskip = 9pt plus 2pt minus 2pt +% Parameters in order: 1) textheight; 2) textwidth; 3) voffset; +% 4) hoffset; 5) binding offset; 6) topskip. Then whoever calls us can +% set \parskip and call \setleading for \baselineskip. +% +\def\internalpagesizes#1#2#3#4#5#6{% + \voffset = #3\relax + \topskip = #6\relax + \splittopskip = \topskip % - \global\lispnarrowing = 0.3in - \setleading{12pt} - \advance\topskip by -1cm - \global\parskip 2pt plus 1pt - \global\hsize = 5in - \global\vsize=7.5in - \global\tolerance=700 - \global\hfuzz=1pt - \global\contentsrightmargin=0pt - \global\deftypemargin=0pt - \global\defbodyindent=.5cm + \vsize = #1\relax + \advance\vsize by \topskip + \outervsize = \vsize + \advance\outervsize by 2\topandbottommargin + \pageheight = \vsize % - \global\pagewidth=\hsize - \global\pageheight=\vsize + \hsize = #2\relax + \outerhsize = \hsize + \advance\outerhsize by 0.5in + \pagewidth = \hsize % - \global\let\smalllisp=\smalllispx - \global\let\smallexample=\smalllispx - \global\def\Esmallexample{\Esmalllisp} + \normaloffset = #4\relax + \bindingoffset = #5\relax + % + \parindent = \defaultparindent + \setemergencystretch } +% @letterpaper (the default). +\def\letterpaper{{\globaldefs = 1 + \parskip = 3pt plus 2pt minus 1pt + \setleading{13.2pt}% + % + % If page is nothing but text, make it come out even. + \internalpagesizes{46\baselineskip}{6in}{\voffset}{.25in}{\bindingoffset}{36pt}% +}} + +% Use @smallbook to reset parameters for 7x9.5 (or so) format. +\def\smallbook{{\globaldefs = 1 + \parskip = 2pt plus 1pt + \setleading{12pt}% + % + \internalpagesizes{7.5in}{5.in}{\voffset}{.25in}{\bindingoffset}{16pt}% + % + \lispnarrowing = 0.3in + \tolerance = 700 + \hfuzz = 1pt + \contentsrightmargin = 0pt + \deftypemargin = 0pt + \defbodyindent = .5cm + % + \let\smalldisplay = \smalldisplayx + \let\smallexample = \smalllispx + \let\smallformat = \smallformatx + \let\smalllisp = \smalllispx +}} + % Use @afourpaper to print on European A4 paper. -\def\afourpaper{ -\global\tolerance=700 -\global\hfuzz=1pt -\setleading{12pt} -\global\parskip 15pt plus 1pt - -\global\vsize= 53\baselineskip -\advance\vsize by \topskip -%\global\hsize= 5.85in % A4 wide 10pt -\global\hsize= 6.5in -\global\outerhsize=\hsize -\global\advance\outerhsize by 0.5in -\global\outervsize=\vsize -\global\advance\outervsize by 0.6in - -\global\pagewidth=\hsize -\global\pageheight=\vsize -} - -\bindingoffset=0pt -\normaloffset=\hoffset -\pagewidth=\hsize -\pageheight=\vsize - -% Allow control of the text dimensions. Parameters in order: textheight; -% textwidth; voffset; hoffset; binding offset; topskip. -% All require a dimension; -% header is additional; added length extends the bottom of the page. - -\def\changepagesizes#1#2#3#4#5#6{ - \global\vsize= #1 - \global\topskip= #6 - \advance\vsize by \topskip - \global\voffset= #3 - \global\hsize= #2 - \global\outerhsize=\hsize - \global\advance\outerhsize by 0.5in - \global\outervsize=\vsize - \global\advance\outervsize by 0.6in - \global\pagewidth=\hsize - \global\pageheight=\vsize - \global\normaloffset= #4 - \global\bindingoffset= #5} +\def\afourpaper{{\globaldefs = 1 + \setleading{12pt}% + \parskip = 3pt plus 2pt minus 1pt + % + \internalpagesizes{53\baselineskip}{160mm}{\voffset}{4mm}{\bindingoffset}{44pt}% + % + \tolerance = 700 + \hfuzz = 1pt +}} % A specific text layout, 24x15cm overall, intended for A4 paper. Top margin % 29mm, hence bottom margin 28mm, nominal side margin 3cm. -\def\afourlatex - {\global\tolerance=700 - \global\hfuzz=1pt - \setleading{12pt} - \global\parskip 15pt plus 1pt - \advance\baselineskip by 1.6pt - \changepagesizes{237mm}{150mm}{3.6mm}{3.6mm}{3mm}{7mm} - } +\def\afourlatex{{\globaldefs = 1 + \setleading{13.6pt}% + % + \afourpaper + \internalpagesizes{237mm}{150mm}{3.6mm}{3.6mm}{3mm}{7mm}% + % + \globaldefs = 0 +}} % Use @afourwide to print on European A4 paper in wide format. -\def\afourwide{\afourpaper -\changepagesizes{9.5in}{6.5in}{\hoffset}{\normaloffset}{\bindingoffset}{7mm}} +\def\afourwide{% + \afourpaper + \internalpagesizes{6.5in}{9.5in}{\hoffset}{\normaloffset}{\bindingoffset}{7mm}% + % + \globaldefs = 0 +} + +% @pagesizes TEXTHEIGHT[,TEXTWIDTH] +% Perhaps we should allow setting the margins, \topskip, \parskip, +% and/or leading, also. Or perhaps we should compute them somehow. +% +\def\pagesizes{\parsearg\pagesizesxxx} +\def\pagesizesxxx#1{\pagesizesyyy #1,,\finish} +\def\pagesizesyyy#1,#2,#3\finish{{% + \setbox0 = \hbox{\ignorespaces #2}\ifdim\wd0 > 0pt \hsize=#2\relax \fi + \globaldefs = 1 + % + \parskip = 3pt plus 2pt minus 1pt + \setleading{13.2pt}% + % + \internalpagesizes{#1}{\hsize}{\voffset}{\normaloffset}{\bindingoffset}{44pt}% +}} + +% Set default to letter. +% +\letterpaper + + +\message{and turning on texinfo input format.} % Define macros to output various characters with catcode for normal text. \catcode`\"=\other @@ -4838,6 +5831,7 @@ width0pt\relax} \fi \catcode`\<=\other \catcode`\>=\other \catcode`\+=\other +\catcode`\$=\other \def\normaldoublequote{"} \def\normaltilde{~} \def\normalcaret{^} @@ -4846,6 +5840,7 @@ width0pt\relax} \fi \def\normalless{<} \def\normalgreater{>} \def\normalplus{+} +\def\normaldollar{$} % This macro is used to make a character print one way in ttfont % where it can probably just be output, and another way in other fonts, @@ -4856,7 +5851,13 @@ width0pt\relax} \fi % interword stretch (and shrink), and it is reasonable to expect all % typewriter fonts to have this, we can check that font parameter. % -\def\ifusingtt#1#2{\ifdim \fontdimen3\the\font=0pt #1\else #2\fi} +\def\ifusingtt#1#2{\ifdim \fontdimen3\font=0pt #1\else #2\fi} + +% Same as above, but check for italic font. Actually this also catches +% non-italic slanted fonts since it is impossible to distinguish them from +% italic fonts. But since this is only used by $ and it uses \sl anyway +% this is not a problem. +\def\ifusingit#1#2{\ifdim \fontdimen1\font>0pt #1\else #2\fi} % Turn off all special characters except @ % (and those which the user can use as if they were ordinary). @@ -4864,10 +5865,10 @@ width0pt\relax} \fi % use math or other variants that look better in normal text. \catcode`\"=\active -\def\activedoublequote{{\tt \char '042}} +\def\activedoublequote{{\tt\char34}} \let"=\activedoublequote \catcode`\~=\active -\def~{{\tt \char '176}} +\def~{{\tt\char126}} \chardef\hat=`\^ \catcode`\^=\active \def^{{\tt \hat}} @@ -4878,7 +5879,7 @@ width0pt\relax} \fi \def\_{\leavevmode \kern.06em \vbox{\hrule width.3em height.1ex}} \catcode`\|=\active -\def|{{\tt \char '174}} +\def|{{\tt\char124}} \chardef \less=`\< \catcode`\<=\active \def<{{\tt \less}} @@ -4887,6 +5888,8 @@ width0pt\relax} \fi \def>{{\tt \gtr}} \catcode`\+=\active \def+{{\tt \char 43}} +\catcode`\$=\active +\def${\ifusingit{{\sl\$}}\normaldollar} %\catcode 27=\active %\def^^[{$\diamondsuit$} @@ -4917,9 +5920,6 @@ width0pt\relax} \fi % \normalbackslash outputs one backslash in fixed width font. \def\normalbackslash{{\tt\rawbackslashxx}} -% Say @foo, not \foo, in error messages. -\escapechar=`\@ - % \catcode 17=0 % Define control-q \catcode`\\=\active @@ -4933,7 +5933,8 @@ width0pt\relax} \fi @let|=@normalverticalbar @let<=@normalless @let>=@normalgreater -@let+=@normalplus} +@let+=@normalplus +@let$=@normaldollar} @def@normalturnoffactive{@let"=@normaldoublequote @let\=@normalbackslash @@ -4943,7 +5944,8 @@ width0pt\relax} \fi @let|=@normalverticalbar @let<=@normalless @let>=@normalgreater -@let+=@normalplus} +@let+=@normalplus +@let$=@normaldollar} % Make _ and + \other characters, temporarily. % This is canceled by @fixbackslash. @@ -4962,16 +5964,29 @@ width0pt\relax} \fi % Also back turn on active characters that might appear in the input % file name, in case not using a pre-dumped format. % -@gdef@fixbackslash{@ifx\@eatinput @let\ = @normalbackslash @fi - @catcode`+=@active @catcode`@_=@active} +@gdef@fixbackslash{% + @ifx\@eatinput @let\ = @normalbackslash @fi + @catcode`+=@active + @catcode`@_=@active +} + +% Say @foo, not \foo, in error messages. +@escapechar = `@@ -%% These look ok in all fonts, so just make them not special. The @rm below -%% makes sure that the current font starts out as the newly loaded cmr10 -@catcode`@$=@other @catcode`@%=@other @catcode`@&=@other @catcode`@#=@other +% These look ok in all fonts, so just make them not special. +@catcode`@& = @other +@catcode`@# = @other +@catcode`@% = @other +@c Set initial fonts. @textfonts @rm + @c Local variables: +@c eval: (add-hook 'write-file-hooks 'time-stamp) @c page-delimiter: "^\\\\message" +@c time-stamp-start: "def\\\\texinfoversion{" +@c time-stamp-format: "%:y-%02m-%02d.%02H" +@c time-stamp-end: "}" @c End: |