13 files changed, 674 insertions, 30 deletions
diff --git a/ChangeLog b/ChangeLog
index 178af18a..92fe3b47 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Tue Dec  7 11:59:00 2010  Arnold D. Robbins  <arnold@skeeve.com>
+
+	* configure.ac: Remove test for return type of sprintf. Another
+	renegade from the late 1980's bites the dust!
+	* protos.h (sprintf): Remove declaration.
+
 Sun Dec  5 15:01:35 2010  Arnold D. Robbins  <arnold@skeeve.com>
 
 	* eval.c (grow_stack): Change env var to GAWK_STACKSIZE.
diff --git a/TODO b/TODO
index 97f585c6..fee873b5 100644
--- a/TODO
+++ b/TODO
@@ -24,7 +24,7 @@ xgawk features (@load, -l, others)
 #Review POSIX standard
 #Fix issues related to POSIX
 #	- use of STRCOLL for comparison
-Add tests for pgawk
+#Add tests for pgawk
 Add tests for patches in emails
 #Add doc fix in email
 #Update debugger chapter with new features
diff --git a/configh.in b/configh.in
index b3168c0a..6e1076bd 100644
--- a/configh.in
+++ b/configh.in
@@ -359,9 +359,6 @@
 /* The size of `unsigned long', as computed by sizeof. */
 #undef SIZEOF_UNSIGNED_LONG
 
-/* return type of sprintf */
-#undef SPRINTF_RET
-
 /* Define to 1 if you have the ANSI C header files. */
 #undef STDC_HEADERS
 
diff --git a/configure b/configure
index 71d9e1fd..5f73abdc 100755
--- a/configure
+++ b/configure
@@ -8568,22 +8568,6 @@ _ACEOF
 
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
-#include <stdio.h>
-
-_ACEOF
-if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
-  $EGREP "int.*sprintf" >/dev/null 2>&1; then :
-
-$as_echo "#define SPRINTF_RET int" >>confdefs.h
-
-else
-  $as_echo "#define SPRINTF_RET char *" >>confdefs.h
-
-fi
-rm -f conftest*
-
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
 #include <sys/types.h>
 int
 main ()
diff --git a/configure.ac b/configure.ac
index 3e1b231a..a902f644 100644
--- a/configure.ac
+++ b/configure.ac
@@ -160,9 +160,6 @@ gl_AC_TYPE_UINTMAX_T
 AC_CHECK_TYPE(ssize_t, int)
 AC_CHECK_SIZEOF(unsigned int)
 AC_CHECK_SIZEOF(unsigned long)
-AC_EGREP_HEADER([int.*sprintf], stdio.h,
-	AC_DEFINE(SPRINTF_RET, int, [return type of sprintf]),
-	AC_DEFINE(SPRINTF_RET, char *))
 dnl see if time_t is defined in <sys/types.h>
 AC_TRY_COMPILE([#include <sys/types.h>],[
 	time_t foo;
diff --git a/protos.h b/protos.h
index 0a9674d9..f95f80a0 100644
--- a/protos.h
+++ b/protos.h
@@ -103,8 +103,6 @@ extern int wait(int *);
 #endif
 extern void _exit(int);
 
-extern SPRINTF_RET sprintf(char *, const char *, ...);
-
 #undef aptr_t
 
 #if !defined(HAVE_STRCOLL)
diff --git a/test/ChangeLog b/test/ChangeLog
index 14a45b23..c539fc24 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,9 @@
+Tue Dec  7 22:31:51 2010  Arnold D. Robbins  <arnold@skeeve.com>
+
+	* Makefile.am (profile1, profile2): New tests.
+	* dtdgport.awk, xref.original, xref.awk, profile2.ok: New files.
+	* Gentests: Use POSIX character classes instead of ranges.
+
 Mon Dec  6 19:47:09 2010  Arnold D. Robbins  <arnold@skeeve.com>
 
 	* Makefile.am (whiny): Removed test.
diff --git a/test/Gentests b/test/Gentests
index b4d62f41..fc779f00 100755
--- a/test/Gentests
+++ b/test/Gentests
@@ -17,8 +17,8 @@ BEGIN {
 
 # process the file Makefile.am:
 
-/^[A-Z_]*_TESTS *=/,/[^\\]$/ {
-	gsub(/(^[A-Z_]*_TESTS *=|\\$)/,"")
+/^[[:upper:]_]*_TESTS *=/,/[^\\]$/ {
+	gsub(/(^[[:upper:]_]*_TESTS *=|\\$)/,"")
 	for (i = 1; i <= NF; i++)
 		tests[++ntests] = $i
 	next
@@ -45,7 +45,7 @@ BEGIN {
 	next
 }
 
-/^[a-zA-Z_][a-zA-Z0-9_]*:/ {
+/^[[:alpha:]_][[:alnum:]_]*:/ {
 	# remember all targets from Makefile.am
 	sub(/:.*/,"")
 	targets[$0]
diff --git a/test/Makefile.am b/test/Makefile.am
index 13ba2155..fad37cbd 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -165,6 +165,7 @@ EXTRA_DIST = \
 	double1.ok \
 	double2.awk \
 	double2.ok \
+	dtdgport.awk \
 	dumpvars.in \
 	dumpvars.ok \
 	dynlj.awk \
@@ -704,6 +705,8 @@ EXTRA_DIST = \
 	wjposer1.awk \
 	wjposer1.in \
 	wjposer1.ok \
+	xref.awk \
+	xref.original \
 	zero2.awk \
 	zero2.ok \
 	zeroe0.awk \
@@ -754,7 +757,7 @@ GAWK_EXT_TESTS = \
 	gnuops2 gnuops3 gnureops \
 	icasefs icasers igncdym igncfs ignrcas2 ignrcase indirectcall lint \
 	lintold manyfiles match1 match2 match3 mbstr1 nondec nondec2 patsplit \
-	posix printfbad1 printfbad2 \
+	posix profile1 profile2 printfbad1 printfbad2 \
 	procinfs rebuf regx8bit reint reint2 rsstart1 rsstart2 rsstart3 \
 	rstest6 shadow splitarg4 strftime strtonum switch2
 
@@ -781,6 +784,7 @@ GENTESTS_UNUSED = Makefile.in gtlnbufv.awk printfloat.awk
 
 CMP = cmp
 AWKPROG = ../gawk$(EXEEXT)
+PGAWKPROG = ../pgawk$(EXEEXT)
 
 # This business forces the locale to be C for running the tests,
 # unless we override it to something else for testing.
@@ -788,6 +792,7 @@ AWKPROG = ../gawk$(EXEEXT)
 # This can also be done in individual tests where we wish to
 # check things specifically not in the C locale.
 AWK = LC_ALL=$${GAWKLOCALE:-C} LANG=$${GAWKLOCALE:-C} $(AWKPROG)
+PGAWK = LC_ALL=$${GAWKLOCALE:-C} LANG=$${GAWKLOCALE:-C} $(PGAWKPROG)
 
 # Message stuff is to make it a little easier to follow.
 # Make the pass-fail last and dependent on others to avoid
@@ -1299,6 +1304,18 @@ dumpvars::
 	@mv awkvars.out _$@
 	@-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@
 
+profile1:
+	@echo $@
+	@$(AWK) --profile -f $(srcdir)/xref.awk $(srcdir)/dtdgport.awk > _$@.out1
+	@$(AWK) -f awkprof.out $(srcdir)/dtdgport.awk > _$@.out2
+	@cmp _$@.out1 _$@.out2 && rm _$@.out[12] || echo EXIT CODE: $$? >>_$@
+
+profile2:
+	@echo $@
+	@$(PGAWK) -f $(srcdir)/xref.awk $(srcdir)/dtdgport.awk > /dev/null
+	@sed 1,2d < awkprof.out > _$@; rm awkprof.out
+	@-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@
+
 # Targets generated for other tests:
 include Maketests
 
diff --git a/test/Makefile.in b/test/Makefile.in
index 096d8817..39f317e6 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -350,6 +350,7 @@ EXTRA_DIST = \
 	double1.ok \
 	double2.awk \
 	double2.ok \
+	dtdgport.awk \
 	dumpvars.in \
 	dumpvars.ok \
 	dynlj.awk \
@@ -889,6 +890,8 @@ EXTRA_DIST = \
 	wjposer1.awk \
 	wjposer1.in \
 	wjposer1.ok \
+	xref.awk \
+	xref.original \
 	zero2.awk \
 	zero2.ok \
 	zeroe0.awk \
@@ -938,7 +941,7 @@ GAWK_EXT_TESTS = \
 	gnuops2 gnuops3 gnureops \
 	icasefs icasers igncdym igncfs ignrcas2 ignrcase indirectcall lint \
 	lintold manyfiles match1 match2 match3 mbstr1 nondec nondec2 patsplit \
-	posix printfbad1 printfbad2 \
+	posix profile1 profile2 printfbad1 printfbad2 \
 	procinfs rebuf regx8bit reint reint2 rsstart1 rsstart2 rsstart3 \
 	rstest6 shadow splitarg4 strftime strtonum switch2
 
@@ -962,6 +965,8 @@ FAIL_CODE1 = fnarray2 fnmisc gsubasgn mixed1 noparms paramdup synerr1 synerr2 un
 GENTESTS_UNUSED = Makefile.in gtlnbufv.awk printfloat.awk
 CMP = cmp
 AWKPROG = ../gawk$(EXEEXT)
+PGAWKPROG = ../pgawk$(EXEEXT)
+PGAWK = LC_ALL=$${GAWKLOCALE:-C} LANG=$${GAWKLOCALE:-C} $(PGAWKPROG)
 all: all-am
 
 .SUFFIXES:
@@ -1646,6 +1651,18 @@ dumpvars::
 	@AWKPATH=$(srcdir) $(AWK) --dump-variables 1 < $(srcdir)/$@.in >/dev/null 2>&1 || echo EXIT CODE: $$? >>_$@
 	@mv awkvars.out _$@
 	@-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@
+
+profile1:
+	@echo $@
+	@$(AWK) --profile -f $(srcdir)/xref.awk $(srcdir)/dtdgport.awk > _$@.out1
+	@$(AWK) -f awkprof.out $(srcdir)/dtdgport.awk > _$@.out2
+	@cmp _$@.out1 _$@.out2 && rm _$@.out[12] || echo EXIT CODE: $$? >>_$@
+
+profile2:
+	@echo $@
+	@$(PGAWK) -f $(srcdir)/xref.awk $(srcdir)/dtdgport.awk > /dev/null
+	@sed 1,2d < awkprof.out > _$@; rm awkprof.out
+	@-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@
 Gt-dummy:
 # file Maketests, generated from Makefile.am by the Gentests program
 addcomma:
diff --git a/test/profile2.ok b/test/profile2.ok
new file mode 100644
index 00000000..11dccdcd
--- /dev/null
+++ b/test/profile2.ok
@@ -0,0 +1,170 @@
+	# BEGIN block(s)
+
+	BEGIN {
+     1  	asplit("BEGIN:END:atan2:break:close:continue:cos:delete:" "do:else:exit:exp:for:getline:gsub:if:in:index:int:" "length:log:match:next:print:printf:rand:return:sin:" "split:sprintf:sqrt:srand:sub:substr:system:while", keywords, ":")
+     1  	split("00:00:00:00:00:00:00:00:00:00:" "20:10:10:12:12:11:07:00:00:00:" "08:08:08:08:08:33:08:00:00:00:" "08:44:08:36:08:08:08:00:00:00:" "08:44:45:42:42:41:08", machine, ":")
+     1  	state = 1
+   571  	for (; ; ) {
+   571  		symb = lex()
+   571  		nextstate = substr(machine[state symb], 1, 1)
+   571  		act = substr(machine[state symb], 2, 1)
+   571  		if (act == "0") { # 12
+   559  		} else {
+   559  			if (act == "1") { # 8
+     8  				if (! inarray(tok, names)) { # 3
+     3  					names[++nnames] = tok
+					}
+     8  				lines[tok, ++xnames[tok]] = NR
+   551  			} else {
+   551  				if (act == "2") { # 426
+   426  					if (tok in local) { # 309
+   309  						tok = tok "(" funcname ")"
+   309  						if (! inarray(tok, names)) { # 22
+    22  							names[++nnames] = tok
+							}
+   309  						lines[tok, ++xnames[tok]] = NR
+   117  					} else {
+   117  						tok = tok "()"
+   117  						if (! inarray(tok, names)) { # 22
+    22  							names[++nnames] = tok
+							}
+   117  						lines[tok, ++xnames[tok]] = NR
+						}
+   125  				} else {
+   125  					if (act == "3") { # 4
+     4  						funcname = tok
+     4  						flines[tok] = NR
+   121  					} else {
+   121  						if (act == "4") { # 49
+    49  							braces++
+    72  						} else {
+    72  							if (act == "5") { # 49
+    49  								braces--
+    49  								if (braces == 0) { # 4
+    22  									for (temp in local) {
+    22  										delete local[temp]
+										}
+     4  									funcname = ""
+     4  									nextstate = 1
+									}
+    23  							} else {
+    23  								if (act == "6") { # 22
+    22  									local[tok] = 1
+     1  								} else {
+     1  									if (act == "7") { # 1
+     1  										break
+										} else {
+											if (act == "8") {
+												print("error: xref.awk: line " NR ": aborting") > "/dev/con"
+												exit 1
+											}
+										}
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+   570  		state = nextstate
+		}
+     1  	sortcmd = "sort -k1"
+    47  	for (i = 1; i <= nnames; i++) {
+    47  		printf("%d ", xnames[names[i]]) | sortcmd
+    47  		if (index(names[i], "(") == 0) { # 3
+     3  			printf("%s(%d)", names[i], flines[names[i]]) | sortcmd
+    44  		} else {
+    44  			printf("%s", names[i]) | sortcmd
+			}
+   434  		for (j = 1; j <= xnames[names[i]]; j++) {
+   434  			if (lines[names[i], j] != lines[names[i], j - 1]) { # 390
+   390  				printf(" %d", lines[names[i], j]) | sortcmd
+				}
+			}
+    47  		printf("\n") | sortcmd
+		}
+     1  	close(sortcmd)
+	}
+
+
+	# Functions, listed alphabetically
+
+     1  function asplit(str, arr, fs, n)
+	{
+     1  	n = split(str, temp_asplit, fs)
+    36  	for (i = 1; i <= n; i++) {
+    36  		arr[temp_asplit[i]]++
+		}
+	}
+
+   434  function inarray(val, arr, j)
+	{
+ 11003  	for (j in arr) {
+ 11003  		if (arr[j] == val) { # 387
+   387  			return j
+			}
+		}
+    47  	return ""
+	}
+
+   571  function lex()
+	{
+  1702  	for (; ; ) {
+  1702  		if (tok == "(eof)") {
+				return 7
+			}
+   326  		while (length(line) == 0) {
+   326  			if ((getline line) == 0) { # 1
+     1  				tok = "(eof)"
+     1  				return 7
+				}
+			}
+  1701  		sub(/^[ \t]+/, "", line)
+  1701  		sub(/^"([^"]|\\")*"/, "", line)
+  1701  		sub(/^\/([^\/]|\\\/)+\//, "", line)
+  1701  		sub(/^#.*/, "", line)
+  1701  		if (line ~ /^function/) { # 4
+     4  			tok = "function"
+     4  			line = substr(line, 9)
+     4  			return 1
+  1697  		} else {
+  1697  			if (line ~ /^{/) { # 53
+    53  				tok = "{"
+    53  				line = substr(line, 2)
+    53  				return 2
+  1644  			} else {
+  1644  				if (line ~ /^}/) { # 53
+    53  					tok = "}"
+    53  					line = substr(line, 2)
+    53  					return 3
+  1591  				} else {
+  1591  					if (match(line, /^[[:alpha:]_][[:alnum:]]*\[/)) { # 43
+    43  						tok = substr(line, 1, RLENGTH - 1)
+    43  						line = substr(line, RLENGTH + 1)
+    43  						return 5
+  1548  					} else {
+  1548  						if (match(line, /^[[:alpha:]_][[:alnum:]]*\(/)) { # 87
+    87  							tok = substr(line, 1, RLENGTH - 1)
+    87  							line = substr(line, RLENGTH + 1)
+    87  							if (! (tok in keywords)) { # 12
+    12  								return 6
+								}
+  1461  						} else {
+  1461  							if (match(line, /^[[:alpha:]_][[:alnum:]]*/)) { # 525
+   525  								tok = substr(line, 1, RLENGTH)
+   525  								line = substr(line, RLENGTH + 1)
+   525  								if (! (tok in keywords)) { # 405
+   405  									return 4
+									}
+   936  							} else {
+   936  								match(line, /^[^[:alpha:]_{}]/)
+   936  								tok = substr(line, 1, RLENGTH)
+   936  								line = substr(line, RLENGTH + 1)
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
diff --git a/test/xref.awk b/test/xref.awk
new file mode 100644
index 00000000..5c3f192b
--- /dev/null
+++ b/test/xref.awk
@@ -0,0 +1,139 @@
+        # xref.awk - cross reference an awk program
+
+	# 12/2010: Modified for gawk test suite to use a variable
+	# for the sort command and to use `sort -k1' instead of `sort +1'
+
+        BEGIN {
+
+                # create array of keywords to be ignored by lexer
+                asplit("BEGIN:END:atan2:break:close:continue:cos:delete:" \
+                        "do:else:exit:exp:for:getline:gsub:if:in:index:int:"  \
+                        "length:log:match:next:print:printf:rand:return:sin:" \
+                        "split:sprintf:sqrt:srand:sub:substr:system:while",
+                        keywords,":")
+
+                # build the symbol-state table
+                split("00:00:00:00:00:00:00:00:00:00:" \
+                          "20:10:10:12:12:11:07:00:00:00:" \
+                          "08:08:08:08:08:33:08:00:00:00:" \
+                          "08:44:08:36:08:08:08:00:00:00:" \
+                          "08:44:45:42:42:41:08",machine,":")
+
+                # parse the input and store an intermediate representation
+                # of the cross-reference information
+
+                # set up the machine
+                state = 1
+
+                # run the machine
+                for (;;) {
+
+                        # get next symbol
+                        symb = lex()
+                        nextstate = substr(machine[state symb],1,1)
+                        act = substr(machine[state symb],2,1)
+
+                        # perform required action
+                        if ( act == "0" )
+                                ; # do nothing
+                        else if ( act == "1" ) {
+                                if ( ! inarray(tok,names) )
+                                        names[++nnames] = tok
+                                lines[tok,++xnames[tok]] = NR }
+                        else if ( act == "2" ) {
+                                if ( tok in local ) {
+                                        tok = tok "(" funcname ")"
+                                        if ( ! inarray(tok,names) )
+                                                names[++nnames] = tok
+                                        lines[tok,++xnames[tok]] = NR }
+                                else {
+                                        tok = tok "()"
+                                        if ( ! inarray(tok,names) )
+                                                names[++nnames] = tok
+                                        lines[tok,++xnames[tok]] = NR } }
+                        else if ( act == "3" ) {
+                                funcname = tok
+                                flines[tok] = NR }
+                        else if ( act == "4" )
+                                braces++
+                        else if ( act == "5" ) {
+                                braces--
+                                if ( braces == 0 ) {
+                                        for ( temp in local )
+                                                delete local[temp]
+                                        funcname = ""
+                                        nextstate = 1 } }
+                        else if ( act == "6" ) {
+                                local[tok] = 1 }
+                        else if ( act == "7" )
+                                break
+                        else if ( act == "8" ) {
+                                print "error: xref.awk: line " NR ": aborting" \
+                                        > "/dev/con"
+                                exit 1 }
+
+                        # finished with current token
+                        state = nextstate }
+
+                # finished parsing, now ready to print output
+		sortcmd = "sort -k1"
+                for ( i = 1; i <= nnames; i++ ) {
+                        printf "%d ", xnames[names[i]] | sortcmd
+                        if ( index(names[i],"(") == 0 )
+                                printf "%s(%d)", names[i], flines[names[i]] | sortcmd
+                        else
+                                printf "%s", names[i] | sortcmd
+                        for ( j = 1; j <= xnames[names[i]]; j++ )
+                                if ( lines[names[i],j] != lines[names[i],j-1] )
+                                        printf " %d", lines[names[i],j] | sortcmd
+                        printf "\n" | sortcmd }
+
+			close(sortcmd)
+                } # END OF PROGRAM
+
+        function asplit(str,arr,fs,  n) { n = split(str,temp_asplit,fs)
+                for ( i = 1; i <= n; i++ ) arr[temp_asplit[i]]++ }
+
+        function inarray(val,arr,  j) {
+            for ( j in arr )
+                if ( arr[j] == val ) return j
+            return "" }
+
+        function lex() {
+
+                for (;;) {
+
+                        if ( tok == "(eof)" ) return 7
+
+                        while ( length(line) == 0 )
+                                if ( getline line == 0 ) {
+                                        tok = "(eof)"; return 7 }
+
+                        sub(/^[ \t]+/,"",line)                                # remove white space,
+                        sub(/^"([^"]|\\")*"/,"",line)             # quoted strings,
+                        sub(/^\/([^\/]|\\\/)+\//,"",line)     # regular expressions,
+                        sub(/^#.*/,"",line)                                   # and comments
+
+                        if ( line ~ /^function/ ) {
+                                tok = "function"; line = substr(line,9); return 1 }
+                        else if ( line ~ /^{/ ) {
+                                tok = "{"; line = substr(line,2); return 2 }
+                        else if ( line ~ /^}/ ) {
+                                tok = "}"; line = substr(line,2); return 3 }
+			# change regexes to use posix character classes
+                        else if ( match(line,/^[[:alpha:]_][[:alnum:]]*\[/) ) {
+                                tok = substr(line,1,RLENGTH-1)
+                                line = substr(line,RLENGTH+1)
+                                return 5 }
+                        else if ( match(line,/^[[:alpha:]_][[:alnum:]]*\(/) ) {
+                                tok = substr(line,1,RLENGTH-1)
+                                line = substr(line,RLENGTH+1)
+                                if ( ! ( tok in keywords ) ) return 6 }
+                        else if ( match(line,/^[[:alpha:]_][[:alnum:]]*/) ) {
+                                tok = substr(line,1,RLENGTH)
+                                line = substr(line,RLENGTH+1)
+                                if ( ! ( tok in keywords ) ) return 4 }
+                        else {
+                                match(line,/^[^[:alpha:]_{}]/)
+                                tok = substr(line,1,RLENGTH)
+                                line = substr(line,RLENGTH+1) } } }
diff --git a/test/xref.original b/test/xref.original
new file mode 100644
index 00000000..a94de211
--- /dev/null
+++ b/test/xref.original
@@ -0,0 +1,313 @@
+XREF(AWK)                   Philip L. Bewig                   XREF(AWK)
+
+NAME
+
+        xref(awk) - produce a cross reference listing of an awk program
+
+SYNOPSIS
+
+        awk -f xref.awk [ file ... ]
+
+DESCRIPTION
+
+        XREF(AWK) takes as input a valid awk program and produces as out-
+        put a cross-reference listing of all variables and function calls
+        which appear in the program.
+
+        For ordinary variables and array variables, a line of the form
+
+                count var(func) lines ...
+
+        is produced, where "count" is the number of times the variable is
+        used, "var" is the name of the variable, "func" is the function
+        name to which the variable is local (a null "func" indicates that
+        the variable is global), and "lines" is the number of each line
+        where the variable appears.  Appearances of the variable in a
+        function's parameter list are ignored.  The number of lines shown
+        may differ from "count" if the variable appears more than once on
+        the same line.
+
+        For functions, a line of the form
+
+                count func(define) lines ...
+
+        is produced, where "count" is the number of times the function is
+        called, "func" is the name of the function, "define" is the lime
+        number where the function is defined, and "lines" is the number of
+        each line where the function is called.  As for variables, the
+        number of lines shown may differ from "count."
+
+        Output lines for variables and functions are intermixed and are
+        sorted by name.  Though terse, the output is informative, easy to
+        read, and amenable to further processing.
+
+EXAMPLE
+
+        The cross-reference listing produced by running xref.awk against
+        itself is shown below:
+
+                5 NR() 39 45 50 53 68
+                8 RLENGTH() 119 120 123 124 127 128 132 133
+                10 act() 31 34 36 40 51 54 56 63 65 67
+                1 arr(asplit) 90
+                2 arr(inarray) 93 94
+                1 asplit(89) 6
+                3 braces() 55 57 58
+                2 flines() 53 79
+                1 fs(asplit) 89
+                3 funcname() 42 52 61
+                16 i() 76 77 78 79 81 82 83 84 90
+                3 inarray(92) 37 43 48
+                6 j() 82 83 84
+                3 j(inarray) 93 94
+                3 keywords() 10 125 129
+                1 lex(97) 29
+                31 line() 103 104 107 108 109 110 112 113 114 115 116 117 118
+                        119 120 122 123 124 126 127 128 131 132 133
+                6 lines() 39 45 50 83 84
+                4 local() 41 59 60 64
+                3 machine() 17 30 31
+                2 n(asplit) 89 90
+                15 names() 37 38 43 44 48 49 77 78 79 81 82 83 84
+                3 nextstate() 30 62 73
+                4 nnames() 38 44 49 76
+                4 state() 23 30 31 73
+                1 str(asplit) 89
+                3 symb() 29 30 31
+                2 temp() 59 60
+                2 temp_asplit() 89 90
+                31 tok() 37 38 39 41 42 43 44 45 47 48 49 50 52 53 64 101 105
+                        113 115 117 119 123 125 127 129 132
+                1 val(inarray) 94
+                5 xnames() 39 45 50 77 82
+
+        For readability, some lines have been folded.
+
+SOURCE CODE
+
+        # xref.awk - cross reference an awk program
+
+        BEGIN {
+
+                # create array of keywords to be ignored by lexer
+                asplit("BEGIN:END:atan2:break:close:continue:cos:delete:" \
+                        "do:else:exit:exp:for:getline:gsub:if:in:index:int:"  \
+                        "length:log:match:next:print:printf:rand:return:sin:" \
+                        "split:sprintf:sqrt:srand:sub:substr:system:while",
+                        keywords,":")
+
+                # build the symbol-state table
+                split("00:00:00:00:00:00:00:00:00:00:" \
+                          "20:10:10:12:12:11:07:00:00:00:" \
+                          "08:08:08:08:08:33:08:00:00:00:" \
+                          "08:44:08:36:08:08:08:00:00:00:" \
+                          "08:44:45:42:42:41:08",machine,":")
+
+                # parse the input and store an intermediate representation
+                # of the cross-reference information
+
+                # set up the machine
+                state = 1
+
+                # run the machine
+                for (;;) {
+
+                        # get next symbol
+                        symb = lex()
+                        nextstate = substr(machine[state symb],1,1)
+                        act = substr(machine[state symb],2,1)
+
+                        # perform required action
+                        if ( act == "0" )
+                                ; # do nothing
+                        else if ( act == "1" ) {
+                                if ( ! inarray(tok,names) )
+                                        names[++nnames] = tok
+                                lines[tok,++xnames[tok]] = NR }
+                        else if ( act == "2" ) {
+                                if ( tok in local ) {
+                                        tok = tok "(" funcname ")"
+                                        if ( ! inarray(tok,names) )
+                                                names[++nnames] = tok
+                                        lines[tok,++xnames[tok]] = NR }
+                                else {
+                                        tok = tok "()"
+                                        if ( ! inarray(tok,names) )
+                                                names[++nnames] = tok
+                                        lines[tok,++xnames[tok]] = NR } }
+                        else if ( act == "3" ) {
+                                funcname = tok
+                                flines[tok] = NR }
+                        else if ( act == "4" )
+                                braces++
+                        else if ( act == "5" ) {
+                                braces--
+                                if ( braces == 0 ) {
+                                        for ( temp in local )
+                                                delete local[temp]
+                                        funcname = ""
+                                        nextstate = 1 } }
+                        else if ( act == "6" ) {
+                                local[tok] = 1 }
+                        else if ( act == "7" )
+                                break
+                        else if ( act == "8" ) {
+                                print "error: xref.awk: line " NR ": aborting" \
+                                        > "/dev/con"
+                                exit 1 }
+
+                        # finished with current token
+                        state = nextstate }
+
+                # finished parsing, now ready to print output
+                for ( i = 1; i <= nnames; i++ ) {
+                        printf "%d ", xnames[names[i]] |"sort +1"
+                        if ( index(names[i],"(") == 0 )
+                                printf "%s(%d)", names[i], flines[names[i]] |"sort +1"
+                        else
+                                printf "%s", names[i] |"sort +1"
+                        for ( j = 1; j <= xnames[names[i]]; j++ )
+                                if ( lines[names[i],j] != lines[names[i],j-1] )
+                                        printf " %d", lines[names[i],j] |"sort +1"
+                        printf "\n" |"sort +1" }
+
+                } # END OF PROGRAM
+
+        function asplit(str,arr,fs,  n) { n = split(str,temp_asplit,fs)
+                for ( i = 1; i <= n; i++ ) arr[temp_asplit[i]]++ }
+
+        function inarray(val,arr,  j) {
+            for ( j in arr )
+                if ( arr[j] == val ) return j
+            return "" }
+
+        function lex() {
+
+                for (;;) {
+
+                        if ( tok == "(eof)" ) return 7
+
+                        while ( length(line) == 0 )
+                                if ( getline line == 0 ) {
+                                        tok = "(eof)"; return 7 }
+
+                        sub(/^[ \t]+/,"",line)                                # remove white space,
+                        sub(/^"([^"]|\\")*"/,"",line)             # quoted strings,
+                        sub(/^\/([^\/]|\\\/)+\//,"",line)     # regular expressions,
+                        sub(/^#.*/,"",line)                                   # and comments
+
+                        if ( line ~ /^function/ ) {
+                                tok = "function"; line = substr(line,9); return 1 }
+                        else if ( line ~ /^{/ ) {
+                                tok = "{"; line = substr(line,2); return 2 }
+                        else if ( line ~ /^}/ ) {
+                                tok = "}"; line = substr(line,2); return 3 }
+                        else if ( match(line,/^[A-Za-z_][A-Za-z_0-9]*\[/) ) {
+                                tok = substr(line,1,RLENGTH-1)
+                                line = substr(line,RLENGTH+1)
+                                return 5 }
+                        else if ( match(line,/^[A-Za-z_][A-Za-z_0-9]*\(/) ) {
+                                tok = substr(line,1,RLENGTH-1)
+                                line = substr(line,RLENGTH+1)
+                                if ( ! ( tok in keywords ) ) return 6 }
+                        else if ( match(line,/^[A-Za-z_][A-Za-z_0-9]*/) ) {
+                                tok = substr(line,1,RLENGTH)
+                                line = substr(line,RLENGTH+1)
+                                if ( ! ( tok in keywords ) ) return 4 }
+                        else {
+                                match(line,/^[^A-Za-z_{}]/)
+                                tok = substr(line,1,RLENGTH)
+                                line = substr(line,RLENGTH+1) } } }
+
+TECHNICAL DISCUSSION
+
+        Broadly, XREF(AWK) parses an awk program using a symbol-state
+        table, in much the same way as a yacc-generated parser.  The
+        lexical analyzer recognizes seven distinct symbols:  the word
+        "function", the left brace, the right brace, identifiers used
+        as variables, identifiers used as arrays, identifiers used as
+        functions, and end of file.  The type of symbol is returned to
+        the parser as the value of the "lex" function, and the global
+        variable "tok" is set to the text of the current token.
+
+        The symbol-state table is stored in the "machine" array.  The
+        table can be represented as follows:
+
+                       symbol |     1       2  3   4     5     6     7
+                              |
+        state                 | "function"  {  }  var  array  func  eof
+        -- -- -- -- -- -- -- -+- -- -- -- -- -- -- -- -- -- -- -- -- --
+        1 any                 |     20     10  10  12    12    11   07
+        2 "function"          |     08     08  08  08    08    33   08
+        3 "function" name     |     08     44  08  36    08    08   08
+        4 "function" name "{" |     08     44  45  42    42    41   08
+
+        where the first digit is the state to be entered after process-
+        ing the current token and the second digit is an action to be
+        performed.  The actions are listed below:
+
+                1       found a function call
+                2       found a variable or array
+                3       found a function definition
+                4       found a left brace
+                5       found a right brace
+                6       found a local variable declaration
+                7       found end of file
+                8       found an error
+
+        Each of the first six actions causes some information about the
+        target program to be stored for later processing; the structures
+        used will be discussed below.  The seventh action causes the
+        parser to exit.  The eighth action causes errors to be reported
+        to standard error and the program to abort.
+
+        Before describing the intermediate data structures, we will
+        discuss some of the more interesting points in the action calls.
+        The "braces" variable keeps track of whether we are currently
+        within a functions; it is positive within a function and zero
+        without.  When the right brace which causes the value of "braces"
+        to go from one to zero is found, the value of "nextstate" is
+        changed from four (scanning a function) to one (any) and the
+        names of local variables are forgotten.  The "local" array is
+        accumulated from the variables found after the function name but
+        before the opening left brace of the function; action two care-
+        fully checks whether a variable is global or local before writing
+        to the intermediate data structure.  The variable "funcname" is
+        the name of the current function when within a function and null
+        without.
+
+        The following arrays store an intermediate representation of the
+        variable and function identifiers of the target program:
+
+                names[1..nnames] = list of all identifiers, both variable and
+                        function names; for variables, the name has the form
+                        var(func), but for functions, there are no parentheses
+
+                xnames[names[i]] = number of times names[i] is used
+
+                lines[names[i],1..xnames[names[i]]] = list of line numbers
+                        where names[i] is used
+
+                flines[names[i]] = line number where function names[i] is
+                        defined
+
+        These arrays are created as the parser reads the input; when the
+        parser is finished, the arrays are output in user-readable form.
+
+PORTABILITY
+
+        XREF(AWK) will work with any implementation of nawk.  The MKS
+        ToolKit implementation requires the large-model version of awk.
+
+HISTORY
+
+        Written by Phil Bewig on February 10, 1990.  Inspired by
+        Exercise 3-16 of the book "The Awk Programming Language" by
+        Alfred V. Aho, Brian W. Kernighan and Peter J. Weinberger
+        (Addison-Wesley:  1988).
+
+COPYRIGHT
+
+        This program is placed in the public domain.  However, the
+        author requests credit when distributed. 
+