diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | TODO | 2 | ||||
-rw-r--r-- | configh.in | 3 | ||||
-rwxr-xr-x | configure | 16 | ||||
-rw-r--r-- | configure.ac | 3 | ||||
-rw-r--r-- | protos.h | 2 | ||||
-rw-r--r-- | test/ChangeLog | 6 | ||||
-rwxr-xr-x | test/Gentests | 6 | ||||
-rw-r--r-- | test/Makefile.am | 19 | ||||
-rw-r--r-- | test/Makefile.in | 19 | ||||
-rw-r--r-- | test/profile2.ok | 170 | ||||
-rw-r--r-- | test/xref.awk | 139 | ||||
-rw-r--r-- | test/xref.original | 313 |
13 files changed, 674 insertions, 30 deletions
@@ -1,3 +1,9 @@ +Tue Dec 7 11:59:00 2010 Arnold D. Robbins <arnold@skeeve.com> + + * configure.ac: Remove test for return type of sprintf. Another + renegade from the late 1980's bites the dust! + * protos.h (sprintf): Remove declaration. + Sun Dec 5 15:01:35 2010 Arnold D. Robbins <arnold@skeeve.com> * eval.c (grow_stack): Change env var to GAWK_STACKSIZE. @@ -24,7 +24,7 @@ xgawk features (@load, -l, others) #Review POSIX standard #Fix issues related to POSIX # - use of STRCOLL for comparison -Add tests for pgawk +#Add tests for pgawk Add tests for patches in emails #Add doc fix in email #Update debugger chapter with new features @@ -359,9 +359,6 @@ /* The size of `unsigned long', as computed by sizeof. */ #undef SIZEOF_UNSIGNED_LONG -/* return type of sprintf */ -#undef SPRINTF_RET - /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS @@ -8568,22 +8568,6 @@ _ACEOF cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#include <stdio.h> - -_ACEOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - $EGREP "int.*sprintf" >/dev/null 2>&1; then : - -$as_echo "#define SPRINTF_RET int" >>confdefs.h - -else - $as_echo "#define SPRINTF_RET char *" >>confdefs.h - -fi -rm -f conftest* - -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ #include <sys/types.h> int main () diff --git a/configure.ac b/configure.ac index 3e1b231a..a902f644 100644 --- a/configure.ac +++ b/configure.ac @@ -160,9 +160,6 @@ gl_AC_TYPE_UINTMAX_T AC_CHECK_TYPE(ssize_t, int) AC_CHECK_SIZEOF(unsigned int) AC_CHECK_SIZEOF(unsigned long) -AC_EGREP_HEADER([int.*sprintf], stdio.h, - AC_DEFINE(SPRINTF_RET, int, [return type of sprintf]), - AC_DEFINE(SPRINTF_RET, char *)) dnl see if time_t is defined in <sys/types.h> AC_TRY_COMPILE([#include <sys/types.h>],[ time_t foo; @@ -103,8 +103,6 @@ extern int wait(int *); #endif extern void _exit(int); -extern SPRINTF_RET sprintf(char *, const char *, ...); - #undef aptr_t #if !defined(HAVE_STRCOLL) diff --git a/test/ChangeLog b/test/ChangeLog index 14a45b23..c539fc24 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,9 @@ +Tue Dec 7 22:31:51 2010 Arnold D. Robbins <arnold@skeeve.com> + + * Makefile.am (profile1, profile2): New tests. + * dtdgport.awk, xref.original, xref.awk, profile2.ok: New files. + * Gentests: Use POSIX character classes instead of ranges. + Mon Dec 6 19:47:09 2010 Arnold D. Robbins <arnold@skeeve.com> * Makefile.am (whiny): Removed test. diff --git a/test/Gentests b/test/Gentests index b4d62f41..fc779f00 100755 --- a/test/Gentests +++ b/test/Gentests @@ -17,8 +17,8 @@ BEGIN { # process the file Makefile.am: -/^[A-Z_]*_TESTS *=/,/[^\\]$/ { - gsub(/(^[A-Z_]*_TESTS *=|\\$)/,"") +/^[[:upper:]_]*_TESTS *=/,/[^\\]$/ { + gsub(/(^[[:upper:]_]*_TESTS *=|\\$)/,"") for (i = 1; i <= NF; i++) tests[++ntests] = $i next @@ -45,7 +45,7 @@ BEGIN { next } -/^[a-zA-Z_][a-zA-Z0-9_]*:/ { +/^[[:alpha:]_][[:alnum:]_]*:/ { # remember all targets from Makefile.am sub(/:.*/,"") targets[$0] diff --git a/test/Makefile.am b/test/Makefile.am index 13ba2155..fad37cbd 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -165,6 +165,7 @@ EXTRA_DIST = \ double1.ok \ double2.awk \ double2.ok \ + dtdgport.awk \ dumpvars.in \ dumpvars.ok \ dynlj.awk \ @@ -704,6 +705,8 @@ EXTRA_DIST = \ wjposer1.awk \ wjposer1.in \ wjposer1.ok \ + xref.awk \ + xref.original \ zero2.awk \ zero2.ok \ zeroe0.awk \ @@ -754,7 +757,7 @@ GAWK_EXT_TESTS = \ gnuops2 gnuops3 gnureops \ icasefs icasers igncdym igncfs ignrcas2 ignrcase indirectcall lint \ lintold manyfiles match1 match2 match3 mbstr1 nondec nondec2 patsplit \ - posix printfbad1 printfbad2 \ + posix profile1 profile2 printfbad1 printfbad2 \ procinfs rebuf regx8bit reint reint2 rsstart1 rsstart2 rsstart3 \ rstest6 shadow splitarg4 strftime strtonum switch2 @@ -781,6 +784,7 @@ GENTESTS_UNUSED = Makefile.in gtlnbufv.awk printfloat.awk CMP = cmp AWKPROG = ../gawk$(EXEEXT) +PGAWKPROG = ../pgawk$(EXEEXT) # This business forces the locale to be C for running the tests, # unless we override it to something else for testing. @@ -788,6 +792,7 @@ AWKPROG = ../gawk$(EXEEXT) # This can also be done in individual tests where we wish to # check things specifically not in the C locale. AWK = LC_ALL=$${GAWKLOCALE:-C} LANG=$${GAWKLOCALE:-C} $(AWKPROG) +PGAWK = LC_ALL=$${GAWKLOCALE:-C} LANG=$${GAWKLOCALE:-C} $(PGAWKPROG) # Message stuff is to make it a little easier to follow. # Make the pass-fail last and dependent on others to avoid @@ -1299,6 +1304,18 @@ dumpvars:: @mv awkvars.out _$@ @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@ +profile1: + @echo $@ + @$(AWK) --profile -f $(srcdir)/xref.awk $(srcdir)/dtdgport.awk > _$@.out1 + @$(AWK) -f awkprof.out $(srcdir)/dtdgport.awk > _$@.out2 + @cmp _$@.out1 _$@.out2 && rm _$@.out[12] || echo EXIT CODE: $$? >>_$@ + +profile2: + @echo $@ + @$(PGAWK) -f $(srcdir)/xref.awk $(srcdir)/dtdgport.awk > /dev/null + @sed 1,2d < awkprof.out > _$@; rm awkprof.out + @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@ + # Targets generated for other tests: include Maketests diff --git a/test/Makefile.in b/test/Makefile.in index 096d8817..39f317e6 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -350,6 +350,7 @@ EXTRA_DIST = \ double1.ok \ double2.awk \ double2.ok \ + dtdgport.awk \ dumpvars.in \ dumpvars.ok \ dynlj.awk \ @@ -889,6 +890,8 @@ EXTRA_DIST = \ wjposer1.awk \ wjposer1.in \ wjposer1.ok \ + xref.awk \ + xref.original \ zero2.awk \ zero2.ok \ zeroe0.awk \ @@ -938,7 +941,7 @@ GAWK_EXT_TESTS = \ gnuops2 gnuops3 gnureops \ icasefs icasers igncdym igncfs ignrcas2 ignrcase indirectcall lint \ lintold manyfiles match1 match2 match3 mbstr1 nondec nondec2 patsplit \ - posix printfbad1 printfbad2 \ + posix profile1 profile2 printfbad1 printfbad2 \ procinfs rebuf regx8bit reint reint2 rsstart1 rsstart2 rsstart3 \ rstest6 shadow splitarg4 strftime strtonum switch2 @@ -962,6 +965,8 @@ FAIL_CODE1 = fnarray2 fnmisc gsubasgn mixed1 noparms paramdup synerr1 synerr2 un GENTESTS_UNUSED = Makefile.in gtlnbufv.awk printfloat.awk CMP = cmp AWKPROG = ../gawk$(EXEEXT) +PGAWKPROG = ../pgawk$(EXEEXT) +PGAWK = LC_ALL=$${GAWKLOCALE:-C} LANG=$${GAWKLOCALE:-C} $(PGAWKPROG) all: all-am .SUFFIXES: @@ -1646,6 +1651,18 @@ dumpvars:: @AWKPATH=$(srcdir) $(AWK) --dump-variables 1 < $(srcdir)/$@.in >/dev/null 2>&1 || echo EXIT CODE: $$? >>_$@ @mv awkvars.out _$@ @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@ + +profile1: + @echo $@ + @$(AWK) --profile -f $(srcdir)/xref.awk $(srcdir)/dtdgport.awk > _$@.out1 + @$(AWK) -f awkprof.out $(srcdir)/dtdgport.awk > _$@.out2 + @cmp _$@.out1 _$@.out2 && rm _$@.out[12] || echo EXIT CODE: $$? >>_$@ + +profile2: + @echo $@ + @$(PGAWK) -f $(srcdir)/xref.awk $(srcdir)/dtdgport.awk > /dev/null + @sed 1,2d < awkprof.out > _$@; rm awkprof.out + @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@ Gt-dummy: # file Maketests, generated from Makefile.am by the Gentests program addcomma: diff --git a/test/profile2.ok b/test/profile2.ok new file mode 100644 index 00000000..11dccdcd --- /dev/null +++ b/test/profile2.ok @@ -0,0 +1,170 @@ + # BEGIN block(s) + + BEGIN { + 1 asplit("BEGIN:END:atan2:break:close:continue:cos:delete:" "do:else:exit:exp:for:getline:gsub:if:in:index:int:" "length:log:match:next:print:printf:rand:return:sin:" "split:sprintf:sqrt:srand:sub:substr:system:while", keywords, ":") + 1 split("00:00:00:00:00:00:00:00:00:00:" "20:10:10:12:12:11:07:00:00:00:" "08:08:08:08:08:33:08:00:00:00:" "08:44:08:36:08:08:08:00:00:00:" "08:44:45:42:42:41:08", machine, ":") + 1 state = 1 + 571 for (; ; ) { + 571 symb = lex() + 571 nextstate = substr(machine[state symb], 1, 1) + 571 act = substr(machine[state symb], 2, 1) + 571 if (act == "0") { # 12 + 559 } else { + 559 if (act == "1") { # 8 + 8 if (! inarray(tok, names)) { # 3 + 3 names[++nnames] = tok + } + 8 lines[tok, ++xnames[tok]] = NR + 551 } else { + 551 if (act == "2") { # 426 + 426 if (tok in local) { # 309 + 309 tok = tok "(" funcname ")" + 309 if (! inarray(tok, names)) { # 22 + 22 names[++nnames] = tok + } + 309 lines[tok, ++xnames[tok]] = NR + 117 } else { + 117 tok = tok "()" + 117 if (! inarray(tok, names)) { # 22 + 22 names[++nnames] = tok + } + 117 lines[tok, ++xnames[tok]] = NR + } + 125 } else { + 125 if (act == "3") { # 4 + 4 funcname = tok + 4 flines[tok] = NR + 121 } else { + 121 if (act == "4") { # 49 + 49 braces++ + 72 } else { + 72 if (act == "5") { # 49 + 49 braces-- + 49 if (braces == 0) { # 4 + 22 for (temp in local) { + 22 delete local[temp] + } + 4 funcname = "" + 4 nextstate = 1 + } + 23 } else { + 23 if (act == "6") { # 22 + 22 local[tok] = 1 + 1 } else { + 1 if (act == "7") { # 1 + 1 break + } else { + if (act == "8") { + print("error: xref.awk: line " NR ": aborting") > "/dev/con" + exit 1 + } + } + } + } + } + } + } + } + } + 570 state = nextstate + } + 1 sortcmd = "sort -k1" + 47 for (i = 1; i <= nnames; i++) { + 47 printf("%d ", xnames[names[i]]) | sortcmd + 47 if (index(names[i], "(") == 0) { # 3 + 3 printf("%s(%d)", names[i], flines[names[i]]) | sortcmd + 44 } else { + 44 printf("%s", names[i]) | sortcmd + } + 434 for (j = 1; j <= xnames[names[i]]; j++) { + 434 if (lines[names[i], j] != lines[names[i], j - 1]) { # 390 + 390 printf(" %d", lines[names[i], j]) | sortcmd + } + } + 47 printf("\n") | sortcmd + } + 1 close(sortcmd) + } + + + # Functions, listed alphabetically + + 1 function asplit(str, arr, fs, n) + { + 1 n = split(str, temp_asplit, fs) + 36 for (i = 1; i <= n; i++) { + 36 arr[temp_asplit[i]]++ + } + } + + 434 function inarray(val, arr, j) + { + 11003 for (j in arr) { + 11003 if (arr[j] == val) { # 387 + 387 return j + } + } + 47 return "" + } + + 571 function lex() + { + 1702 for (; ; ) { + 1702 if (tok == "(eof)") { + return 7 + } + 326 while (length(line) == 0) { + 326 if ((getline line) == 0) { # 1 + 1 tok = "(eof)" + 1 return 7 + } + } + 1701 sub(/^[ \t]+/, "", line) + 1701 sub(/^"([^"]|\\")*"/, "", line) + 1701 sub(/^\/([^\/]|\\\/)+\//, "", line) + 1701 sub(/^#.*/, "", line) + 1701 if (line ~ /^function/) { # 4 + 4 tok = "function" + 4 line = substr(line, 9) + 4 return 1 + 1697 } else { + 1697 if (line ~ /^{/) { # 53 + 53 tok = "{" + 53 line = substr(line, 2) + 53 return 2 + 1644 } else { + 1644 if (line ~ /^}/) { # 53 + 53 tok = "}" + 53 line = substr(line, 2) + 53 return 3 + 1591 } else { + 1591 if (match(line, /^[[:alpha:]_][[:alnum:]]*\[/)) { # 43 + 43 tok = substr(line, 1, RLENGTH - 1) + 43 line = substr(line, RLENGTH + 1) + 43 return 5 + 1548 } else { + 1548 if (match(line, /^[[:alpha:]_][[:alnum:]]*\(/)) { # 87 + 87 tok = substr(line, 1, RLENGTH - 1) + 87 line = substr(line, RLENGTH + 1) + 87 if (! (tok in keywords)) { # 12 + 12 return 6 + } + 1461 } else { + 1461 if (match(line, /^[[:alpha:]_][[:alnum:]]*/)) { # 525 + 525 tok = substr(line, 1, RLENGTH) + 525 line = substr(line, RLENGTH + 1) + 525 if (! (tok in keywords)) { # 405 + 405 return 4 + } + 936 } else { + 936 match(line, /^[^[:alpha:]_{}]/) + 936 tok = substr(line, 1, RLENGTH) + 936 line = substr(line, RLENGTH + 1) + } + } + } + } + } + } + } + } diff --git a/test/xref.awk b/test/xref.awk new file mode 100644 index 00000000..5c3f192b --- /dev/null +++ b/test/xref.awk @@ -0,0 +1,139 @@ + # xref.awk - cross reference an awk program + + # 12/2010: Modified for gawk test suite to use a variable + # for the sort command and to use `sort -k1' instead of `sort +1' + + BEGIN { + + # create array of keywords to be ignored by lexer + asplit("BEGIN:END:atan2:break:close:continue:cos:delete:" \ + "do:else:exit:exp:for:getline:gsub:if:in:index:int:" \ + "length:log:match:next:print:printf:rand:return:sin:" \ + "split:sprintf:sqrt:srand:sub:substr:system:while", + keywords,":") + + # build the symbol-state table + split("00:00:00:00:00:00:00:00:00:00:" \ + "20:10:10:12:12:11:07:00:00:00:" \ + "08:08:08:08:08:33:08:00:00:00:" \ + "08:44:08:36:08:08:08:00:00:00:" \ + "08:44:45:42:42:41:08",machine,":") + + # parse the input and store an intermediate representation + # of the cross-reference information + + # set up the machine + state = 1 + + # run the machine + for (;;) { + + # get next symbol + symb = lex() + nextstate = substr(machine[state symb],1,1) + act = substr(machine[state symb],2,1) + + # perform required action + if ( act == "0" ) + ; # do nothing + else if ( act == "1" ) { + if ( ! inarray(tok,names) ) + names[++nnames] = tok + lines[tok,++xnames[tok]] = NR } + else if ( act == "2" ) { + if ( tok in local ) { + tok = tok "(" funcname ")" + if ( ! inarray(tok,names) ) + names[++nnames] = tok + lines[tok,++xnames[tok]] = NR } + else { + tok = tok "()" + if ( ! inarray(tok,names) ) + names[++nnames] = tok + lines[tok,++xnames[tok]] = NR } } + else if ( act == "3" ) { + funcname = tok + flines[tok] = NR } + else if ( act == "4" ) + braces++ + else if ( act == "5" ) { + braces-- + if ( braces == 0 ) { + for ( temp in local ) + delete local[temp] + funcname = "" + nextstate = 1 } } + else if ( act == "6" ) { + local[tok] = 1 } + else if ( act == "7" ) + break + else if ( act == "8" ) { + print "error: xref.awk: line " NR ": aborting" \ + > "/dev/con" + exit 1 } + + # finished with current token + state = nextstate } + + # finished parsing, now ready to print output + sortcmd = "sort -k1" + for ( i = 1; i <= nnames; i++ ) { + printf "%d ", xnames[names[i]] | sortcmd + if ( index(names[i],"(") == 0 ) + printf "%s(%d)", names[i], flines[names[i]] | sortcmd + else + printf "%s", names[i] | sortcmd + for ( j = 1; j <= xnames[names[i]]; j++ ) + if ( lines[names[i],j] != lines[names[i],j-1] ) + printf " %d", lines[names[i],j] | sortcmd + printf "\n" | sortcmd } + + close(sortcmd) + } # END OF PROGRAM + + function asplit(str,arr,fs, n) { n = split(str,temp_asplit,fs) + for ( i = 1; i <= n; i++ ) arr[temp_asplit[i]]++ } + + function inarray(val,arr, j) { + for ( j in arr ) + if ( arr[j] == val ) return j + return "" } + + function lex() { + + for (;;) { + + if ( tok == "(eof)" ) return 7 + + while ( length(line) == 0 ) + if ( getline line == 0 ) { + tok = "(eof)"; return 7 } + + sub(/^[ \t]+/,"",line) # remove white space, + sub(/^"([^"]|\\")*"/,"",line) # quoted strings, + sub(/^\/([^\/]|\\\/)+\//,"",line) # regular expressions, + sub(/^#.*/,"",line) # and comments + + if ( line ~ /^function/ ) { + tok = "function"; line = substr(line,9); return 1 } + else if ( line ~ /^{/ ) { + tok = "{"; line = substr(line,2); return 2 } + else if ( line ~ /^}/ ) { + tok = "}"; line = substr(line,2); return 3 } + # change regexes to use posix character classes + else if ( match(line,/^[[:alpha:]_][[:alnum:]]*\[/) ) { + tok = substr(line,1,RLENGTH-1) + line = substr(line,RLENGTH+1) + return 5 } + else if ( match(line,/^[[:alpha:]_][[:alnum:]]*\(/) ) { + tok = substr(line,1,RLENGTH-1) + line = substr(line,RLENGTH+1) + if ( ! ( tok in keywords ) ) return 6 } + else if ( match(line,/^[[:alpha:]_][[:alnum:]]*/) ) { + tok = substr(line,1,RLENGTH) + line = substr(line,RLENGTH+1) + if ( ! ( tok in keywords ) ) return 4 } + else { + match(line,/^[^[:alpha:]_{}]/) + tok = substr(line,1,RLENGTH) + line = substr(line,RLENGTH+1) } } } diff --git a/test/xref.original b/test/xref.original new file mode 100644 index 00000000..a94de211 --- /dev/null +++ b/test/xref.original @@ -0,0 +1,313 @@ +XREF(AWK) Philip L. Bewig XREF(AWK) + +NAME + + xref(awk) - produce a cross reference listing of an awk program + +SYNOPSIS + + awk -f xref.awk [ file ... ] + +DESCRIPTION + + XREF(AWK) takes as input a valid awk program and produces as out- + put a cross-reference listing of all variables and function calls + which appear in the program. + + For ordinary variables and array variables, a line of the form + + count var(func) lines ... + + is produced, where "count" is the number of times the variable is + used, "var" is the name of the variable, "func" is the function + name to which the variable is local (a null "func" indicates that + the variable is global), and "lines" is the number of each line + where the variable appears. Appearances of the variable in a + function's parameter list are ignored. The number of lines shown + may differ from "count" if the variable appears more than once on + the same line. + + For functions, a line of the form + + count func(define) lines ... + + is produced, where "count" is the number of times the function is + called, "func" is the name of the function, "define" is the lime + number where the function is defined, and "lines" is the number of + each line where the function is called. As for variables, the + number of lines shown may differ from "count." + + Output lines for variables and functions are intermixed and are + sorted by name. Though terse, the output is informative, easy to + read, and amenable to further processing. + +EXAMPLE + + The cross-reference listing produced by running xref.awk against + itself is shown below: + + 5 NR() 39 45 50 53 68 + 8 RLENGTH() 119 120 123 124 127 128 132 133 + 10 act() 31 34 36 40 51 54 56 63 65 67 + 1 arr(asplit) 90 + 2 arr(inarray) 93 94 + 1 asplit(89) 6 + 3 braces() 55 57 58 + 2 flines() 53 79 + 1 fs(asplit) 89 + 3 funcname() 42 52 61 + 16 i() 76 77 78 79 81 82 83 84 90 + 3 inarray(92) 37 43 48 + 6 j() 82 83 84 + 3 j(inarray) 93 94 + 3 keywords() 10 125 129 + 1 lex(97) 29 + 31 line() 103 104 107 108 109 110 112 113 114 115 116 117 118 + 119 120 122 123 124 126 127 128 131 132 133 + 6 lines() 39 45 50 83 84 + 4 local() 41 59 60 64 + 3 machine() 17 30 31 + 2 n(asplit) 89 90 + 15 names() 37 38 43 44 48 49 77 78 79 81 82 83 84 + 3 nextstate() 30 62 73 + 4 nnames() 38 44 49 76 + 4 state() 23 30 31 73 + 1 str(asplit) 89 + 3 symb() 29 30 31 + 2 temp() 59 60 + 2 temp_asplit() 89 90 + 31 tok() 37 38 39 41 42 43 44 45 47 48 49 50 52 53 64 101 105 + 113 115 117 119 123 125 127 129 132 + 1 val(inarray) 94 + 5 xnames() 39 45 50 77 82 + + For readability, some lines have been folded. + +SOURCE CODE + + # xref.awk - cross reference an awk program + + BEGIN { + + # create array of keywords to be ignored by lexer + asplit("BEGIN:END:atan2:break:close:continue:cos:delete:" \ + "do:else:exit:exp:for:getline:gsub:if:in:index:int:" \ + "length:log:match:next:print:printf:rand:return:sin:" \ + "split:sprintf:sqrt:srand:sub:substr:system:while", + keywords,":") + + # build the symbol-state table + split("00:00:00:00:00:00:00:00:00:00:" \ + "20:10:10:12:12:11:07:00:00:00:" \ + "08:08:08:08:08:33:08:00:00:00:" \ + "08:44:08:36:08:08:08:00:00:00:" \ + "08:44:45:42:42:41:08",machine,":") + + # parse the input and store an intermediate representation + # of the cross-reference information + + # set up the machine + state = 1 + + # run the machine + for (;;) { + + # get next symbol + symb = lex() + nextstate = substr(machine[state symb],1,1) + act = substr(machine[state symb],2,1) + + # perform required action + if ( act == "0" ) + ; # do nothing + else if ( act == "1" ) { + if ( ! inarray(tok,names) ) + names[++nnames] = tok + lines[tok,++xnames[tok]] = NR } + else if ( act == "2" ) { + if ( tok in local ) { + tok = tok "(" funcname ")" + if ( ! inarray(tok,names) ) + names[++nnames] = tok + lines[tok,++xnames[tok]] = NR } + else { + tok = tok "()" + if ( ! inarray(tok,names) ) + names[++nnames] = tok + lines[tok,++xnames[tok]] = NR } } + else if ( act == "3" ) { + funcname = tok + flines[tok] = NR } + else if ( act == "4" ) + braces++ + else if ( act == "5" ) { + braces-- + if ( braces == 0 ) { + for ( temp in local ) + delete local[temp] + funcname = "" + nextstate = 1 } } + else if ( act == "6" ) { + local[tok] = 1 } + else if ( act == "7" ) + break + else if ( act == "8" ) { + print "error: xref.awk: line " NR ": aborting" \ + > "/dev/con" + exit 1 } + + # finished with current token + state = nextstate } + + # finished parsing, now ready to print output + for ( i = 1; i <= nnames; i++ ) { + printf "%d ", xnames[names[i]] |"sort +1" + if ( index(names[i],"(") == 0 ) + printf "%s(%d)", names[i], flines[names[i]] |"sort +1" + else + printf "%s", names[i] |"sort +1" + for ( j = 1; j <= xnames[names[i]]; j++ ) + if ( lines[names[i],j] != lines[names[i],j-1] ) + printf " %d", lines[names[i],j] |"sort +1" + printf "\n" |"sort +1" } + + } # END OF PROGRAM + + function asplit(str,arr,fs, n) { n = split(str,temp_asplit,fs) + for ( i = 1; i <= n; i++ ) arr[temp_asplit[i]]++ } + + function inarray(val,arr, j) { + for ( j in arr ) + if ( arr[j] == val ) return j + return "" } + + function lex() { + + for (;;) { + + if ( tok == "(eof)" ) return 7 + + while ( length(line) == 0 ) + if ( getline line == 0 ) { + tok = "(eof)"; return 7 } + + sub(/^[ \t]+/,"",line) # remove white space, + sub(/^"([^"]|\\")*"/,"",line) # quoted strings, + sub(/^\/([^\/]|\\\/)+\//,"",line) # regular expressions, + sub(/^#.*/,"",line) # and comments + + if ( line ~ /^function/ ) { + tok = "function"; line = substr(line,9); return 1 } + else if ( line ~ /^{/ ) { + tok = "{"; line = substr(line,2); return 2 } + else if ( line ~ /^}/ ) { + tok = "}"; line = substr(line,2); return 3 } + else if ( match(line,/^[A-Za-z_][A-Za-z_0-9]*\[/) ) { + tok = substr(line,1,RLENGTH-1) + line = substr(line,RLENGTH+1) + return 5 } + else if ( match(line,/^[A-Za-z_][A-Za-z_0-9]*\(/) ) { + tok = substr(line,1,RLENGTH-1) + line = substr(line,RLENGTH+1) + if ( ! ( tok in keywords ) ) return 6 } + else if ( match(line,/^[A-Za-z_][A-Za-z_0-9]*/) ) { + tok = substr(line,1,RLENGTH) + line = substr(line,RLENGTH+1) + if ( ! ( tok in keywords ) ) return 4 } + else { + match(line,/^[^A-Za-z_{}]/) + tok = substr(line,1,RLENGTH) + line = substr(line,RLENGTH+1) } } } + +TECHNICAL DISCUSSION + + Broadly, XREF(AWK) parses an awk program using a symbol-state + table, in much the same way as a yacc-generated parser. The + lexical analyzer recognizes seven distinct symbols: the word + "function", the left brace, the right brace, identifiers used + as variables, identifiers used as arrays, identifiers used as + functions, and end of file. The type of symbol is returned to + the parser as the value of the "lex" function, and the global + variable "tok" is set to the text of the current token. + + The symbol-state table is stored in the "machine" array. The + table can be represented as follows: + + symbol | 1 2 3 4 5 6 7 + | + state | "function" { } var array func eof + -- -- -- -- -- -- -- -+- -- -- -- -- -- -- -- -- -- -- -- -- -- + 1 any | 20 10 10 12 12 11 07 + 2 "function" | 08 08 08 08 08 33 08 + 3 "function" name | 08 44 08 36 08 08 08 + 4 "function" name "{" | 08 44 45 42 42 41 08 + + where the first digit is the state to be entered after process- + ing the current token and the second digit is an action to be + performed. The actions are listed below: + + 1 found a function call + 2 found a variable or array + 3 found a function definition + 4 found a left brace + 5 found a right brace + 6 found a local variable declaration + 7 found end of file + 8 found an error + + Each of the first six actions causes some information about the + target program to be stored for later processing; the structures + used will be discussed below. The seventh action causes the + parser to exit. The eighth action causes errors to be reported + to standard error and the program to abort. + + Before describing the intermediate data structures, we will + discuss some of the more interesting points in the action calls. + The "braces" variable keeps track of whether we are currently + within a functions; it is positive within a function and zero + without. When the right brace which causes the value of "braces" + to go from one to zero is found, the value of "nextstate" is + changed from four (scanning a function) to one (any) and the + names of local variables are forgotten. The "local" array is + accumulated from the variables found after the function name but + before the opening left brace of the function; action two care- + fully checks whether a variable is global or local before writing + to the intermediate data structure. The variable "funcname" is + the name of the current function when within a function and null + without. + + The following arrays store an intermediate representation of the + variable and function identifiers of the target program: + + names[1..nnames] = list of all identifiers, both variable and + function names; for variables, the name has the form + var(func), but for functions, there are no parentheses + + xnames[names[i]] = number of times names[i] is used + + lines[names[i],1..xnames[names[i]]] = list of line numbers + where names[i] is used + + flines[names[i]] = line number where function names[i] is + defined + + These arrays are created as the parser reads the input; when the + parser is finished, the arrays are output in user-readable form. + +PORTABILITY + + XREF(AWK) will work with any implementation of nawk. The MKS + ToolKit implementation requires the large-model version of awk. + +HISTORY + + Written by Phil Bewig on February 10, 1990. Inspired by + Exercise 3-16 of the book "The Awk Programming Language" by + Alfred V. Aho, Brian W. Kernighan and Peter J. Weinberger + (Addison-Wesley: 1988). + +COPYRIGHT + + This program is placed in the public domain. However, the + author requests credit when distributed. + |