aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--TODO2
-rw-r--r--configh.in3
-rwxr-xr-xconfigure16
-rw-r--r--configure.ac3
-rw-r--r--protos.h2
-rw-r--r--test/ChangeLog6
-rwxr-xr-xtest/Gentests6
-rw-r--r--test/Makefile.am19
-rw-r--r--test/Makefile.in19
-rw-r--r--test/profile2.ok170
-rw-r--r--test/xref.awk139
-rw-r--r--test/xref.original313
13 files changed, 674 insertions, 30 deletions
diff --git a/ChangeLog b/ChangeLog
index 178af18a..92fe3b47 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Tue Dec 7 11:59:00 2010 Arnold D. Robbins <arnold@skeeve.com>
+
+ * configure.ac: Remove test for return type of sprintf. Another
+ renegade from the late 1980's bites the dust!
+ * protos.h (sprintf): Remove declaration.
+
Sun Dec 5 15:01:35 2010 Arnold D. Robbins <arnold@skeeve.com>
* eval.c (grow_stack): Change env var to GAWK_STACKSIZE.
diff --git a/TODO b/TODO
index 97f585c6..fee873b5 100644
--- a/TODO
+++ b/TODO
@@ -24,7 +24,7 @@ xgawk features (@load, -l, others)
#Review POSIX standard
#Fix issues related to POSIX
# - use of STRCOLL for comparison
-Add tests for pgawk
+#Add tests for pgawk
Add tests for patches in emails
#Add doc fix in email
#Update debugger chapter with new features
diff --git a/configh.in b/configh.in
index b3168c0a..6e1076bd 100644
--- a/configh.in
+++ b/configh.in
@@ -359,9 +359,6 @@
/* The size of `unsigned long', as computed by sizeof. */
#undef SIZEOF_UNSIGNED_LONG
-/* return type of sprintf */
-#undef SPRINTF_RET
-
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
diff --git a/configure b/configure
index 71d9e1fd..5f73abdc 100755
--- a/configure
+++ b/configure
@@ -8568,22 +8568,6 @@ _ACEOF
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#include <stdio.h>
-
-_ACEOF
-if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
- $EGREP "int.*sprintf" >/dev/null 2>&1; then :
-
-$as_echo "#define SPRINTF_RET int" >>confdefs.h
-
-else
- $as_echo "#define SPRINTF_RET char *" >>confdefs.h
-
-fi
-rm -f conftest*
-
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
#include <sys/types.h>
int
main ()
diff --git a/configure.ac b/configure.ac
index 3e1b231a..a902f644 100644
--- a/configure.ac
+++ b/configure.ac
@@ -160,9 +160,6 @@ gl_AC_TYPE_UINTMAX_T
AC_CHECK_TYPE(ssize_t, int)
AC_CHECK_SIZEOF(unsigned int)
AC_CHECK_SIZEOF(unsigned long)
-AC_EGREP_HEADER([int.*sprintf], stdio.h,
- AC_DEFINE(SPRINTF_RET, int, [return type of sprintf]),
- AC_DEFINE(SPRINTF_RET, char *))
dnl see if time_t is defined in <sys/types.h>
AC_TRY_COMPILE([#include <sys/types.h>],[
time_t foo;
diff --git a/protos.h b/protos.h
index 0a9674d9..f95f80a0 100644
--- a/protos.h
+++ b/protos.h
@@ -103,8 +103,6 @@ extern int wait(int *);
#endif
extern void _exit(int);
-extern SPRINTF_RET sprintf(char *, const char *, ...);
-
#undef aptr_t
#if !defined(HAVE_STRCOLL)
diff --git a/test/ChangeLog b/test/ChangeLog
index 14a45b23..c539fc24 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,9 @@
+Tue Dec 7 22:31:51 2010 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (profile1, profile2): New tests.
+ * dtdgport.awk, xref.original, xref.awk, profile2.ok: New files.
+ * Gentests: Use POSIX character classes instead of ranges.
+
Mon Dec 6 19:47:09 2010 Arnold D. Robbins <arnold@skeeve.com>
* Makefile.am (whiny): Removed test.
diff --git a/test/Gentests b/test/Gentests
index b4d62f41..fc779f00 100755
--- a/test/Gentests
+++ b/test/Gentests
@@ -17,8 +17,8 @@ BEGIN {
# process the file Makefile.am:
-/^[A-Z_]*_TESTS *=/,/[^\\]$/ {
- gsub(/(^[A-Z_]*_TESTS *=|\\$)/,"")
+/^[[:upper:]_]*_TESTS *=/,/[^\\]$/ {
+ gsub(/(^[[:upper:]_]*_TESTS *=|\\$)/,"")
for (i = 1; i <= NF; i++)
tests[++ntests] = $i
next
@@ -45,7 +45,7 @@ BEGIN {
next
}
-/^[a-zA-Z_][a-zA-Z0-9_]*:/ {
+/^[[:alpha:]_][[:alnum:]_]*:/ {
# remember all targets from Makefile.am
sub(/:.*/,"")
targets[$0]
diff --git a/test/Makefile.am b/test/Makefile.am
index 13ba2155..fad37cbd 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -165,6 +165,7 @@ EXTRA_DIST = \
double1.ok \
double2.awk \
double2.ok \
+ dtdgport.awk \
dumpvars.in \
dumpvars.ok \
dynlj.awk \
@@ -704,6 +705,8 @@ EXTRA_DIST = \
wjposer1.awk \
wjposer1.in \
wjposer1.ok \
+ xref.awk \
+ xref.original \
zero2.awk \
zero2.ok \
zeroe0.awk \
@@ -754,7 +757,7 @@ GAWK_EXT_TESTS = \
gnuops2 gnuops3 gnureops \
icasefs icasers igncdym igncfs ignrcas2 ignrcase indirectcall lint \
lintold manyfiles match1 match2 match3 mbstr1 nondec nondec2 patsplit \
- posix printfbad1 printfbad2 \
+ posix profile1 profile2 printfbad1 printfbad2 \
procinfs rebuf regx8bit reint reint2 rsstart1 rsstart2 rsstart3 \
rstest6 shadow splitarg4 strftime strtonum switch2
@@ -781,6 +784,7 @@ GENTESTS_UNUSED = Makefile.in gtlnbufv.awk printfloat.awk
CMP = cmp
AWKPROG = ../gawk$(EXEEXT)
+PGAWKPROG = ../pgawk$(EXEEXT)
# This business forces the locale to be C for running the tests,
# unless we override it to something else for testing.
@@ -788,6 +792,7 @@ AWKPROG = ../gawk$(EXEEXT)
# This can also be done in individual tests where we wish to
# check things specifically not in the C locale.
AWK = LC_ALL=$${GAWKLOCALE:-C} LANG=$${GAWKLOCALE:-C} $(AWKPROG)
+PGAWK = LC_ALL=$${GAWKLOCALE:-C} LANG=$${GAWKLOCALE:-C} $(PGAWKPROG)
# Message stuff is to make it a little easier to follow.
# Make the pass-fail last and dependent on others to avoid
@@ -1299,6 +1304,18 @@ dumpvars::
@mv awkvars.out _$@
@-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@
+profile1:
+ @echo $@
+ @$(AWK) --profile -f $(srcdir)/xref.awk $(srcdir)/dtdgport.awk > _$@.out1
+ @$(AWK) -f awkprof.out $(srcdir)/dtdgport.awk > _$@.out2
+ @cmp _$@.out1 _$@.out2 && rm _$@.out[12] || echo EXIT CODE: $$? >>_$@
+
+profile2:
+ @echo $@
+ @$(PGAWK) -f $(srcdir)/xref.awk $(srcdir)/dtdgport.awk > /dev/null
+ @sed 1,2d < awkprof.out > _$@; rm awkprof.out
+ @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@
+
# Targets generated for other tests:
include Maketests
diff --git a/test/Makefile.in b/test/Makefile.in
index 096d8817..39f317e6 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -350,6 +350,7 @@ EXTRA_DIST = \
double1.ok \
double2.awk \
double2.ok \
+ dtdgport.awk \
dumpvars.in \
dumpvars.ok \
dynlj.awk \
@@ -889,6 +890,8 @@ EXTRA_DIST = \
wjposer1.awk \
wjposer1.in \
wjposer1.ok \
+ xref.awk \
+ xref.original \
zero2.awk \
zero2.ok \
zeroe0.awk \
@@ -938,7 +941,7 @@ GAWK_EXT_TESTS = \
gnuops2 gnuops3 gnureops \
icasefs icasers igncdym igncfs ignrcas2 ignrcase indirectcall lint \
lintold manyfiles match1 match2 match3 mbstr1 nondec nondec2 patsplit \
- posix printfbad1 printfbad2 \
+ posix profile1 profile2 printfbad1 printfbad2 \
procinfs rebuf regx8bit reint reint2 rsstart1 rsstart2 rsstart3 \
rstest6 shadow splitarg4 strftime strtonum switch2
@@ -962,6 +965,8 @@ FAIL_CODE1 = fnarray2 fnmisc gsubasgn mixed1 noparms paramdup synerr1 synerr2 un
GENTESTS_UNUSED = Makefile.in gtlnbufv.awk printfloat.awk
CMP = cmp
AWKPROG = ../gawk$(EXEEXT)
+PGAWKPROG = ../pgawk$(EXEEXT)
+PGAWK = LC_ALL=$${GAWKLOCALE:-C} LANG=$${GAWKLOCALE:-C} $(PGAWKPROG)
all: all-am
.SUFFIXES:
@@ -1646,6 +1651,18 @@ dumpvars::
@AWKPATH=$(srcdir) $(AWK) --dump-variables 1 < $(srcdir)/$@.in >/dev/null 2>&1 || echo EXIT CODE: $$? >>_$@
@mv awkvars.out _$@
@-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@
+
+profile1:
+ @echo $@
+ @$(AWK) --profile -f $(srcdir)/xref.awk $(srcdir)/dtdgport.awk > _$@.out1
+ @$(AWK) -f awkprof.out $(srcdir)/dtdgport.awk > _$@.out2
+ @cmp _$@.out1 _$@.out2 && rm _$@.out[12] || echo EXIT CODE: $$? >>_$@
+
+profile2:
+ @echo $@
+ @$(PGAWK) -f $(srcdir)/xref.awk $(srcdir)/dtdgport.awk > /dev/null
+ @sed 1,2d < awkprof.out > _$@; rm awkprof.out
+ @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@
Gt-dummy:
# file Maketests, generated from Makefile.am by the Gentests program
addcomma:
diff --git a/test/profile2.ok b/test/profile2.ok
new file mode 100644
index 00000000..11dccdcd
--- /dev/null
+++ b/test/profile2.ok
@@ -0,0 +1,170 @@
+ # BEGIN block(s)
+
+ BEGIN {
+ 1 asplit("BEGIN:END:atan2:break:close:continue:cos:delete:" "do:else:exit:exp:for:getline:gsub:if:in:index:int:" "length:log:match:next:print:printf:rand:return:sin:" "split:sprintf:sqrt:srand:sub:substr:system:while", keywords, ":")
+ 1 split("00:00:00:00:00:00:00:00:00:00:" "20:10:10:12:12:11:07:00:00:00:" "08:08:08:08:08:33:08:00:00:00:" "08:44:08:36:08:08:08:00:00:00:" "08:44:45:42:42:41:08", machine, ":")
+ 1 state = 1
+ 571 for (; ; ) {
+ 571 symb = lex()
+ 571 nextstate = substr(machine[state symb], 1, 1)
+ 571 act = substr(machine[state symb], 2, 1)
+ 571 if (act == "0") { # 12
+ 559 } else {
+ 559 if (act == "1") { # 8
+ 8 if (! inarray(tok, names)) { # 3
+ 3 names[++nnames] = tok
+ }
+ 8 lines[tok, ++xnames[tok]] = NR
+ 551 } else {
+ 551 if (act == "2") { # 426
+ 426 if (tok in local) { # 309
+ 309 tok = tok "(" funcname ")"
+ 309 if (! inarray(tok, names)) { # 22
+ 22 names[++nnames] = tok
+ }
+ 309 lines[tok, ++xnames[tok]] = NR
+ 117 } else {
+ 117 tok = tok "()"
+ 117 if (! inarray(tok, names)) { # 22
+ 22 names[++nnames] = tok
+ }
+ 117 lines[tok, ++xnames[tok]] = NR
+ }
+ 125 } else {
+ 125 if (act == "3") { # 4
+ 4 funcname = tok
+ 4 flines[tok] = NR
+ 121 } else {
+ 121 if (act == "4") { # 49
+ 49 braces++
+ 72 } else {
+ 72 if (act == "5") { # 49
+ 49 braces--
+ 49 if (braces == 0) { # 4
+ 22 for (temp in local) {
+ 22 delete local[temp]
+ }
+ 4 funcname = ""
+ 4 nextstate = 1
+ }
+ 23 } else {
+ 23 if (act == "6") { # 22
+ 22 local[tok] = 1
+ 1 } else {
+ 1 if (act == "7") { # 1
+ 1 break
+ } else {
+ if (act == "8") {
+ print("error: xref.awk: line " NR ": aborting") > "/dev/con"
+ exit 1
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ 570 state = nextstate
+ }
+ 1 sortcmd = "sort -k1"
+ 47 for (i = 1; i <= nnames; i++) {
+ 47 printf("%d ", xnames[names[i]]) | sortcmd
+ 47 if (index(names[i], "(") == 0) { # 3
+ 3 printf("%s(%d)", names[i], flines[names[i]]) | sortcmd
+ 44 } else {
+ 44 printf("%s", names[i]) | sortcmd
+ }
+ 434 for (j = 1; j <= xnames[names[i]]; j++) {
+ 434 if (lines[names[i], j] != lines[names[i], j - 1]) { # 390
+ 390 printf(" %d", lines[names[i], j]) | sortcmd
+ }
+ }
+ 47 printf("\n") | sortcmd
+ }
+ 1 close(sortcmd)
+ }
+
+
+ # Functions, listed alphabetically
+
+ 1 function asplit(str, arr, fs, n)
+ {
+ 1 n = split(str, temp_asplit, fs)
+ 36 for (i = 1; i <= n; i++) {
+ 36 arr[temp_asplit[i]]++
+ }
+ }
+
+ 434 function inarray(val, arr, j)
+ {
+ 11003 for (j in arr) {
+ 11003 if (arr[j] == val) { # 387
+ 387 return j
+ }
+ }
+ 47 return ""
+ }
+
+ 571 function lex()
+ {
+ 1702 for (; ; ) {
+ 1702 if (tok == "(eof)") {
+ return 7
+ }
+ 326 while (length(line) == 0) {
+ 326 if ((getline line) == 0) { # 1
+ 1 tok = "(eof)"
+ 1 return 7
+ }
+ }
+ 1701 sub(/^[ \t]+/, "", line)
+ 1701 sub(/^"([^"]|\\")*"/, "", line)
+ 1701 sub(/^\/([^\/]|\\\/)+\//, "", line)
+ 1701 sub(/^#.*/, "", line)
+ 1701 if (line ~ /^function/) { # 4
+ 4 tok = "function"
+ 4 line = substr(line, 9)
+ 4 return 1
+ 1697 } else {
+ 1697 if (line ~ /^{/) { # 53
+ 53 tok = "{"
+ 53 line = substr(line, 2)
+ 53 return 2
+ 1644 } else {
+ 1644 if (line ~ /^}/) { # 53
+ 53 tok = "}"
+ 53 line = substr(line, 2)
+ 53 return 3
+ 1591 } else {
+ 1591 if (match(line, /^[[:alpha:]_][[:alnum:]]*\[/)) { # 43
+ 43 tok = substr(line, 1, RLENGTH - 1)
+ 43 line = substr(line, RLENGTH + 1)
+ 43 return 5
+ 1548 } else {
+ 1548 if (match(line, /^[[:alpha:]_][[:alnum:]]*\(/)) { # 87
+ 87 tok = substr(line, 1, RLENGTH - 1)
+ 87 line = substr(line, RLENGTH + 1)
+ 87 if (! (tok in keywords)) { # 12
+ 12 return 6
+ }
+ 1461 } else {
+ 1461 if (match(line, /^[[:alpha:]_][[:alnum:]]*/)) { # 525
+ 525 tok = substr(line, 1, RLENGTH)
+ 525 line = substr(line, RLENGTH + 1)
+ 525 if (! (tok in keywords)) { # 405
+ 405 return 4
+ }
+ 936 } else {
+ 936 match(line, /^[^[:alpha:]_{}]/)
+ 936 tok = substr(line, 1, RLENGTH)
+ 936 line = substr(line, RLENGTH + 1)
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
diff --git a/test/xref.awk b/test/xref.awk
new file mode 100644
index 00000000..5c3f192b
--- /dev/null
+++ b/test/xref.awk
@@ -0,0 +1,139 @@
+ # xref.awk - cross reference an awk program
+
+ # 12/2010: Modified for gawk test suite to use a variable
+ # for the sort command and to use `sort -k1' instead of `sort +1'
+
+ BEGIN {
+
+ # create array of keywords to be ignored by lexer
+ asplit("BEGIN:END:atan2:break:close:continue:cos:delete:" \
+ "do:else:exit:exp:for:getline:gsub:if:in:index:int:" \
+ "length:log:match:next:print:printf:rand:return:sin:" \
+ "split:sprintf:sqrt:srand:sub:substr:system:while",
+ keywords,":")
+
+ # build the symbol-state table
+ split("00:00:00:00:00:00:00:00:00:00:" \
+ "20:10:10:12:12:11:07:00:00:00:" \
+ "08:08:08:08:08:33:08:00:00:00:" \
+ "08:44:08:36:08:08:08:00:00:00:" \
+ "08:44:45:42:42:41:08",machine,":")
+
+ # parse the input and store an intermediate representation
+ # of the cross-reference information
+
+ # set up the machine
+ state = 1
+
+ # run the machine
+ for (;;) {
+
+ # get next symbol
+ symb = lex()
+ nextstate = substr(machine[state symb],1,1)
+ act = substr(machine[state symb],2,1)
+
+ # perform required action
+ if ( act == "0" )
+ ; # do nothing
+ else if ( act == "1" ) {
+ if ( ! inarray(tok,names) )
+ names[++nnames] = tok
+ lines[tok,++xnames[tok]] = NR }
+ else if ( act == "2" ) {
+ if ( tok in local ) {
+ tok = tok "(" funcname ")"
+ if ( ! inarray(tok,names) )
+ names[++nnames] = tok
+ lines[tok,++xnames[tok]] = NR }
+ else {
+ tok = tok "()"
+ if ( ! inarray(tok,names) )
+ names[++nnames] = tok
+ lines[tok,++xnames[tok]] = NR } }
+ else if ( act == "3" ) {
+ funcname = tok
+ flines[tok] = NR }
+ else if ( act == "4" )
+ braces++
+ else if ( act == "5" ) {
+ braces--
+ if ( braces == 0 ) {
+ for ( temp in local )
+ delete local[temp]
+ funcname = ""
+ nextstate = 1 } }
+ else if ( act == "6" ) {
+ local[tok] = 1 }
+ else if ( act == "7" )
+ break
+ else if ( act == "8" ) {
+ print "error: xref.awk: line " NR ": aborting" \
+ > "/dev/con"
+ exit 1 }
+
+ # finished with current token
+ state = nextstate }
+
+ # finished parsing, now ready to print output
+ sortcmd = "sort -k1"
+ for ( i = 1; i <= nnames; i++ ) {
+ printf "%d ", xnames[names[i]] | sortcmd
+ if ( index(names[i],"(") == 0 )
+ printf "%s(%d)", names[i], flines[names[i]] | sortcmd
+ else
+ printf "%s", names[i] | sortcmd
+ for ( j = 1; j <= xnames[names[i]]; j++ )
+ if ( lines[names[i],j] != lines[names[i],j-1] )
+ printf " %d", lines[names[i],j] | sortcmd
+ printf "\n" | sortcmd }
+
+ close(sortcmd)
+ } # END OF PROGRAM
+
+ function asplit(str,arr,fs, n) { n = split(str,temp_asplit,fs)
+ for ( i = 1; i <= n; i++ ) arr[temp_asplit[i]]++ }
+
+ function inarray(val,arr, j) {
+ for ( j in arr )
+ if ( arr[j] == val ) return j
+ return "" }
+
+ function lex() {
+
+ for (;;) {
+
+ if ( tok == "(eof)" ) return 7
+
+ while ( length(line) == 0 )
+ if ( getline line == 0 ) {
+ tok = "(eof)"; return 7 }
+
+ sub(/^[ \t]+/,"",line) # remove white space,
+ sub(/^"([^"]|\\")*"/,"",line) # quoted strings,
+ sub(/^\/([^\/]|\\\/)+\//,"",line) # regular expressions,
+ sub(/^#.*/,"",line) # and comments
+
+ if ( line ~ /^function/ ) {
+ tok = "function"; line = substr(line,9); return 1 }
+ else if ( line ~ /^{/ ) {
+ tok = "{"; line = substr(line,2); return 2 }
+ else if ( line ~ /^}/ ) {
+ tok = "}"; line = substr(line,2); return 3 }
+ # change regexes to use posix character classes
+ else if ( match(line,/^[[:alpha:]_][[:alnum:]]*\[/) ) {
+ tok = substr(line,1,RLENGTH-1)
+ line = substr(line,RLENGTH+1)
+ return 5 }
+ else if ( match(line,/^[[:alpha:]_][[:alnum:]]*\(/) ) {
+ tok = substr(line,1,RLENGTH-1)
+ line = substr(line,RLENGTH+1)
+ if ( ! ( tok in keywords ) ) return 6 }
+ else if ( match(line,/^[[:alpha:]_][[:alnum:]]*/) ) {
+ tok = substr(line,1,RLENGTH)
+ line = substr(line,RLENGTH+1)
+ if ( ! ( tok in keywords ) ) return 4 }
+ else {
+ match(line,/^[^[:alpha:]_{}]/)
+ tok = substr(line,1,RLENGTH)
+ line = substr(line,RLENGTH+1) } } }
diff --git a/test/xref.original b/test/xref.original
new file mode 100644
index 00000000..a94de211
--- /dev/null
+++ b/test/xref.original
@@ -0,0 +1,313 @@
+XREF(AWK) Philip L. Bewig XREF(AWK)
+
+NAME
+
+ xref(awk) - produce a cross reference listing of an awk program
+
+SYNOPSIS
+
+ awk -f xref.awk [ file ... ]
+
+DESCRIPTION
+
+ XREF(AWK) takes as input a valid awk program and produces as out-
+ put a cross-reference listing of all variables and function calls
+ which appear in the program.
+
+ For ordinary variables and array variables, a line of the form
+
+ count var(func) lines ...
+
+ is produced, where "count" is the number of times the variable is
+ used, "var" is the name of the variable, "func" is the function
+ name to which the variable is local (a null "func" indicates that
+ the variable is global), and "lines" is the number of each line
+ where the variable appears. Appearances of the variable in a
+ function's parameter list are ignored. The number of lines shown
+ may differ from "count" if the variable appears more than once on
+ the same line.
+
+ For functions, a line of the form
+
+ count func(define) lines ...
+
+ is produced, where "count" is the number of times the function is
+ called, "func" is the name of the function, "define" is the lime
+ number where the function is defined, and "lines" is the number of
+ each line where the function is called. As for variables, the
+ number of lines shown may differ from "count."
+
+ Output lines for variables and functions are intermixed and are
+ sorted by name. Though terse, the output is informative, easy to
+ read, and amenable to further processing.
+
+EXAMPLE
+
+ The cross-reference listing produced by running xref.awk against
+ itself is shown below:
+
+ 5 NR() 39 45 50 53 68
+ 8 RLENGTH() 119 120 123 124 127 128 132 133
+ 10 act() 31 34 36 40 51 54 56 63 65 67
+ 1 arr(asplit) 90
+ 2 arr(inarray) 93 94
+ 1 asplit(89) 6
+ 3 braces() 55 57 58
+ 2 flines() 53 79
+ 1 fs(asplit) 89
+ 3 funcname() 42 52 61
+ 16 i() 76 77 78 79 81 82 83 84 90
+ 3 inarray(92) 37 43 48
+ 6 j() 82 83 84
+ 3 j(inarray) 93 94
+ 3 keywords() 10 125 129
+ 1 lex(97) 29
+ 31 line() 103 104 107 108 109 110 112 113 114 115 116 117 118
+ 119 120 122 123 124 126 127 128 131 132 133
+ 6 lines() 39 45 50 83 84
+ 4 local() 41 59 60 64
+ 3 machine() 17 30 31
+ 2 n(asplit) 89 90
+ 15 names() 37 38 43 44 48 49 77 78 79 81 82 83 84
+ 3 nextstate() 30 62 73
+ 4 nnames() 38 44 49 76
+ 4 state() 23 30 31 73
+ 1 str(asplit) 89
+ 3 symb() 29 30 31
+ 2 temp() 59 60
+ 2 temp_asplit() 89 90
+ 31 tok() 37 38 39 41 42 43 44 45 47 48 49 50 52 53 64 101 105
+ 113 115 117 119 123 125 127 129 132
+ 1 val(inarray) 94
+ 5 xnames() 39 45 50 77 82
+
+ For readability, some lines have been folded.
+
+SOURCE CODE
+
+ # xref.awk - cross reference an awk program
+
+ BEGIN {
+
+ # create array of keywords to be ignored by lexer
+ asplit("BEGIN:END:atan2:break:close:continue:cos:delete:" \
+ "do:else:exit:exp:for:getline:gsub:if:in:index:int:" \
+ "length:log:match:next:print:printf:rand:return:sin:" \
+ "split:sprintf:sqrt:srand:sub:substr:system:while",
+ keywords,":")
+
+ # build the symbol-state table
+ split("00:00:00:00:00:00:00:00:00:00:" \
+ "20:10:10:12:12:11:07:00:00:00:" \
+ "08:08:08:08:08:33:08:00:00:00:" \
+ "08:44:08:36:08:08:08:00:00:00:" \
+ "08:44:45:42:42:41:08",machine,":")
+
+ # parse the input and store an intermediate representation
+ # of the cross-reference information
+
+ # set up the machine
+ state = 1
+
+ # run the machine
+ for (;;) {
+
+ # get next symbol
+ symb = lex()
+ nextstate = substr(machine[state symb],1,1)
+ act = substr(machine[state symb],2,1)
+
+ # perform required action
+ if ( act == "0" )
+ ; # do nothing
+ else if ( act == "1" ) {
+ if ( ! inarray(tok,names) )
+ names[++nnames] = tok
+ lines[tok,++xnames[tok]] = NR }
+ else if ( act == "2" ) {
+ if ( tok in local ) {
+ tok = tok "(" funcname ")"
+ if ( ! inarray(tok,names) )
+ names[++nnames] = tok
+ lines[tok,++xnames[tok]] = NR }
+ else {
+ tok = tok "()"
+ if ( ! inarray(tok,names) )
+ names[++nnames] = tok
+ lines[tok,++xnames[tok]] = NR } }
+ else if ( act == "3" ) {
+ funcname = tok
+ flines[tok] = NR }
+ else if ( act == "4" )
+ braces++
+ else if ( act == "5" ) {
+ braces--
+ if ( braces == 0 ) {
+ for ( temp in local )
+ delete local[temp]
+ funcname = ""
+ nextstate = 1 } }
+ else if ( act == "6" ) {
+ local[tok] = 1 }
+ else if ( act == "7" )
+ break
+ else if ( act == "8" ) {
+ print "error: xref.awk: line " NR ": aborting" \
+ > "/dev/con"
+ exit 1 }
+
+ # finished with current token
+ state = nextstate }
+
+ # finished parsing, now ready to print output
+ for ( i = 1; i <= nnames; i++ ) {
+ printf "%d ", xnames[names[i]] |"sort +1"
+ if ( index(names[i],"(") == 0 )
+ printf "%s(%d)", names[i], flines[names[i]] |"sort +1"
+ else
+ printf "%s", names[i] |"sort +1"
+ for ( j = 1; j <= xnames[names[i]]; j++ )
+ if ( lines[names[i],j] != lines[names[i],j-1] )
+ printf " %d", lines[names[i],j] |"sort +1"
+ printf "\n" |"sort +1" }
+
+ } # END OF PROGRAM
+
+ function asplit(str,arr,fs, n) { n = split(str,temp_asplit,fs)
+ for ( i = 1; i <= n; i++ ) arr[temp_asplit[i]]++ }
+
+ function inarray(val,arr, j) {
+ for ( j in arr )
+ if ( arr[j] == val ) return j
+ return "" }
+
+ function lex() {
+
+ for (;;) {
+
+ if ( tok == "(eof)" ) return 7
+
+ while ( length(line) == 0 )
+ if ( getline line == 0 ) {
+ tok = "(eof)"; return 7 }
+
+ sub(/^[ \t]+/,"",line) # remove white space,
+ sub(/^"([^"]|\\")*"/,"",line) # quoted strings,
+ sub(/^\/([^\/]|\\\/)+\//,"",line) # regular expressions,
+ sub(/^#.*/,"",line) # and comments
+
+ if ( line ~ /^function/ ) {
+ tok = "function"; line = substr(line,9); return 1 }
+ else if ( line ~ /^{/ ) {
+ tok = "{"; line = substr(line,2); return 2 }
+ else if ( line ~ /^}/ ) {
+ tok = "}"; line = substr(line,2); return 3 }
+ else if ( match(line,/^[A-Za-z_][A-Za-z_0-9]*\[/) ) {
+ tok = substr(line,1,RLENGTH-1)
+ line = substr(line,RLENGTH+1)
+ return 5 }
+ else if ( match(line,/^[A-Za-z_][A-Za-z_0-9]*\(/) ) {
+ tok = substr(line,1,RLENGTH-1)
+ line = substr(line,RLENGTH+1)
+ if ( ! ( tok in keywords ) ) return 6 }
+ else if ( match(line,/^[A-Za-z_][A-Za-z_0-9]*/) ) {
+ tok = substr(line,1,RLENGTH)
+ line = substr(line,RLENGTH+1)
+ if ( ! ( tok in keywords ) ) return 4 }
+ else {
+ match(line,/^[^A-Za-z_{}]/)
+ tok = substr(line,1,RLENGTH)
+ line = substr(line,RLENGTH+1) } } }
+
+TECHNICAL DISCUSSION
+
+ Broadly, XREF(AWK) parses an awk program using a symbol-state
+ table, in much the same way as a yacc-generated parser. The
+ lexical analyzer recognizes seven distinct symbols: the word
+ "function", the left brace, the right brace, identifiers used
+ as variables, identifiers used as arrays, identifiers used as
+ functions, and end of file. The type of symbol is returned to
+ the parser as the value of the "lex" function, and the global
+ variable "tok" is set to the text of the current token.
+
+ The symbol-state table is stored in the "machine" array. The
+ table can be represented as follows:
+
+ symbol | 1 2 3 4 5 6 7
+ |
+ state | "function" { } var array func eof
+ -- -- -- -- -- -- -- -+- -- -- -- -- -- -- -- -- -- -- -- -- --
+ 1 any | 20 10 10 12 12 11 07
+ 2 "function" | 08 08 08 08 08 33 08
+ 3 "function" name | 08 44 08 36 08 08 08
+ 4 "function" name "{" | 08 44 45 42 42 41 08
+
+ where the first digit is the state to be entered after process-
+ ing the current token and the second digit is an action to be
+ performed. The actions are listed below:
+
+ 1 found a function call
+ 2 found a variable or array
+ 3 found a function definition
+ 4 found a left brace
+ 5 found a right brace
+ 6 found a local variable declaration
+ 7 found end of file
+ 8 found an error
+
+ Each of the first six actions causes some information about the
+ target program to be stored for later processing; the structures
+ used will be discussed below. The seventh action causes the
+ parser to exit. The eighth action causes errors to be reported
+ to standard error and the program to abort.
+
+ Before describing the intermediate data structures, we will
+ discuss some of the more interesting points in the action calls.
+ The "braces" variable keeps track of whether we are currently
+ within a functions; it is positive within a function and zero
+ without. When the right brace which causes the value of "braces"
+ to go from one to zero is found, the value of "nextstate" is
+ changed from four (scanning a function) to one (any) and the
+ names of local variables are forgotten. The "local" array is
+ accumulated from the variables found after the function name but
+ before the opening left brace of the function; action two care-
+ fully checks whether a variable is global or local before writing
+ to the intermediate data structure. The variable "funcname" is
+ the name of the current function when within a function and null
+ without.
+
+ The following arrays store an intermediate representation of the
+ variable and function identifiers of the target program:
+
+ names[1..nnames] = list of all identifiers, both variable and
+ function names; for variables, the name has the form
+ var(func), but for functions, there are no parentheses
+
+ xnames[names[i]] = number of times names[i] is used
+
+ lines[names[i],1..xnames[names[i]]] = list of line numbers
+ where names[i] is used
+
+ flines[names[i]] = line number where function names[i] is
+ defined
+
+ These arrays are created as the parser reads the input; when the
+ parser is finished, the arrays are output in user-readable form.
+
+PORTABILITY
+
+ XREF(AWK) will work with any implementation of nawk. The MKS
+ ToolKit implementation requires the large-model version of awk.
+
+HISTORY
+
+ Written by Phil Bewig on February 10, 1990. Inspired by
+ Exercise 3-16 of the book "The Awk Programming Language" by
+ Alfred V. Aho, Brian W. Kernighan and Peter J. Weinberger
+ (Addison-Wesley: 1988).
+
+COPYRIGHT
+
+ This program is placed in the public domain. However, the
+ author requests credit when distributed.
+