diff options
-rw-r--r-- | test/ChangeLog | 5 | ||||
-rw-r--r-- | test/Makefile.am | 4 | ||||
-rw-r--r-- | test/Makefile.in | 9 | ||||
-rw-r--r-- | test/Maketests | 5 | ||||
-rw-r--r-- | test/fpat4.awk | 105 | ||||
-rw-r--r-- | test/fpat4.ok | 65 |
6 files changed, 191 insertions, 2 deletions
diff --git a/test/ChangeLog b/test/ChangeLog index 9f97f734..8a264e3f 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,8 @@ +2015-03-10 Arnold D. Robbins <arnold@skeeve.com> + + * Makefile.am (fpat4): New test. + * fpat4.awk, fpat4.ok: New files. + 2015-03-06 Arnold D. Robbins <arnold@skeeve.com> * charasbytes.awk, ofs1.awk, range1.awk, sortglos.awk, diff --git a/test/Makefile.am b/test/Makefile.am index 71cfa513..34899943 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -285,6 +285,8 @@ EXTRA_DIST = \ fpat3.awk \ fpat3.in \ fpat3.ok \ + fpat4.awk \ + fpat4.ok \ fpatnull.awk \ fpatnull.in \ fpatnull.ok \ @@ -1034,7 +1036,7 @@ GAWK_EXT_TESTS = \ aadelete1 aadelete2 aarray1 aasort aasorti argtest arraysort \ backw badargs beginfile1 beginfile2 binmode1 charasbytes \ colonwarn clos1way crlf dbugeval delsub devfd devfd1 devfd2 dumpvars exit \ - fieldwdth fpat1 fpat2 fpat3 fpatnull fsfwfs funlen \ + fieldwdth fpat1 fpat2 fpat3 fpat4 fpatnull fsfwfs funlen \ functab1 functab2 functab3 fwtest fwtest2 fwtest3 \ genpot gensub gensub2 getlndir gnuops2 gnuops3 gnureops \ icasefs icasers id igncdym igncfs ignrcas2 ignrcase \ diff --git a/test/Makefile.in b/test/Makefile.in index 7ac2ad7d..489b0d14 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -542,6 +542,8 @@ EXTRA_DIST = \ fpat3.awk \ fpat3.in \ fpat3.ok \ + fpat4.awk \ + fpat4.ok \ fpatnull.awk \ fpatnull.in \ fpatnull.ok \ @@ -1290,7 +1292,7 @@ GAWK_EXT_TESTS = \ aadelete1 aadelete2 aarray1 aasort aasorti argtest arraysort \ backw badargs beginfile1 beginfile2 binmode1 charasbytes \ colonwarn clos1way crlf dbugeval delsub devfd devfd1 devfd2 dumpvars exit \ - fieldwdth fpat1 fpat2 fpat3 fpatnull fsfwfs funlen \ + fieldwdth fpat1 fpat2 fpat3 fpat4 fpatnull fsfwfs funlen \ functab1 functab2 functab3 fwtest fwtest2 fwtest3 \ genpot gensub gensub2 getlndir gnuops2 gnuops3 gnureops \ icasefs icasers id igncdym igncfs ignrcas2 ignrcase \ @@ -3462,6 +3464,11 @@ fpat3: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +fpat4: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + fpatnull: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/Maketests b/test/Maketests index adf95cc5..8c270869 100644 --- a/test/Maketests +++ b/test/Maketests @@ -992,6 +992,11 @@ fpat3: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +fpat4: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + fpatnull: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/fpat4.awk b/test/fpat4.awk new file mode 100644 index 00000000..79cd6a7f --- /dev/null +++ b/test/fpat4.awk @@ -0,0 +1,105 @@ +BEGIN { + false = 0 + true = 1 + + fpat[1] = "([^,]*)|(\"[^\"]+\")" + fpat[2] = fpat[1] + fpat[3] = fpat[1] + fpat[4] = "aa+" + fpat[5] = fpat[4] + fpat[6] = "[a-z]" + + data[1] = "Robbins,,Arnold," + data[2] = "Smith,,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA" + data[3] = "Robbins,Arnold,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA" + data[4] = "bbbaaacccdddaaaaaqqqq" + data[5] = "bbbaaacccdddaaaaaqqqqa" # should get trailing qqqa + data[6] = "aAbBcC" + + for (i = 1; i in data; i++) { + printf("Splitting: <%s>\n", data[i]) + n = mypatsplit(data[i], fields, fpat[i], seps) + m = patsplit(data[i], fields2, fpat[i], seps2) + print "n =", n, "m =", m + if (n != m) { + printf("ERROR: counts wrong!\n") > "/dev/stderr" + exit 1 + } + for (j = 1; j <= n; j++) { + printf("fields[%d] = <%s>\tfields2[%d] = <%s>\n", j, fields[j], j, fields2[j]) + if (fields[j] != fields2[j]) { + printf("ERROR: data %d, field %d mismatch!\n", i, j) > "/dev/stderr" + exit 1 + } + } + for (j = 0; j in seps; j++) { + printf("seps[%d] = <%s>\tseps2[%d] = <%s>\n", j, seps[j], j, seps2[j]) + if (seps[j] != seps2[j]) { + printf("ERROR: data %d, separator %d mismatch!\n", i, j) > "/dev/stderr" + exit 1 + } + } + } +} + +function mypatsplit(string, array, pattern, seps, + eosflag, non_empty, nf) # locals +{ + delete array + delete seps + if (length(string) == 0) + return 0 + + eosflag = non_empty = false + nf = 0 + while (match(string, pattern)) { + if (RLENGTH > 0) { # easy case + non_empty = true + if (! (nf in seps)) { + if (RSTART == 1) # match at front of string + seps[nf] = "" + else + seps[nf] = substr(string, 1, RSTART - 1) + } + array[++nf] = substr(string, RSTART, RLENGTH) + string = substr(string, RSTART+RLENGTH) + if (length(string) == 0) + break + } else if (non_empty) { + # last match was non-empty, and at the + # current character we get a zero length match, + # which we don't want, so skip over it + non_empty = false + seps[nf] = substr(string, 1, 1) + string = substr(string, 2) + } else { + # 0 length match + if (! (nf in seps)) { + if (RSTART == 1) + seps[nf] = "" + else + seps[nf] = substr(string, 1, RSTART - 1) + } + array[++nf] = "" + if (! non_empty && ! eosflag) { # prev was empty + seps[nf] = substr(string, 1, 1) + } + if (RSTART == 1) { + string = substr(string, 2) + } else { + string = substr(string, RSTART + 1) + } + non_empty = false + } + if (length(string) == 0) { + if (eosflag) + break + else + eosflag = true + } + } + if (length(string) > 0) + seps[nf] = string + + return length(array) +} diff --git a/test/fpat4.ok b/test/fpat4.ok new file mode 100644 index 00000000..b4430aba --- /dev/null +++ b/test/fpat4.ok @@ -0,0 +1,65 @@ +Splitting: <Robbins,,Arnold,> +n = 4 m = 4 +fields[1] = <Robbins> fields2[1] = <Robbins> +fields[2] = <> fields2[2] = <> +fields[3] = <Arnold> fields2[3] = <Arnold> +fields[4] = <> fields2[4] = <> +seps[0] = <> seps2[0] = <> +seps[1] = <,> seps2[1] = <,> +seps[2] = <,> seps2[2] = <,> +seps[3] = <,> seps2[3] = <,> +Splitting: <Smith,,"1234 A Pretty Place, NE",Sometown,NY,12345-6789,USA> +n = 7 m = 7 +fields[1] = <Smith> fields2[1] = <Smith> +fields[2] = <> fields2[2] = <> +fields[3] = <"1234 A Pretty Place, NE"> fields2[3] = <"1234 A Pretty Place, NE"> +fields[4] = <Sometown> fields2[4] = <Sometown> +fields[5] = <NY> fields2[5] = <NY> +fields[6] = <12345-6789> fields2[6] = <12345-6789> +fields[7] = <USA> fields2[7] = <USA> +seps[0] = <> seps2[0] = <> +seps[1] = <,> seps2[1] = <,> +seps[2] = <,> seps2[2] = <,> +seps[3] = <,> seps2[3] = <,> +seps[4] = <,> seps2[4] = <,> +seps[5] = <,> seps2[5] = <,> +seps[6] = <,> seps2[6] = <,> +Splitting: <Robbins,Arnold,"1234 A Pretty Place, NE",Sometown,NY,12345-6789,USA> +n = 7 m = 7 +fields[1] = <Robbins> fields2[1] = <Robbins> +fields[2] = <Arnold> fields2[2] = <Arnold> +fields[3] = <"1234 A Pretty Place, NE"> fields2[3] = <"1234 A Pretty Place, NE"> +fields[4] = <Sometown> fields2[4] = <Sometown> +fields[5] = <NY> fields2[5] = <NY> +fields[6] = <12345-6789> fields2[6] = <12345-6789> +fields[7] = <USA> fields2[7] = <USA> +seps[0] = <> seps2[0] = <> +seps[1] = <,> seps2[1] = <,> +seps[2] = <,> seps2[2] = <,> +seps[3] = <,> seps2[3] = <,> +seps[4] = <,> seps2[4] = <,> +seps[5] = <,> seps2[5] = <,> +seps[6] = <,> seps2[6] = <,> +Splitting: <bbbaaacccdddaaaaaqqqq> +n = 2 m = 2 +fields[1] = <aaa> fields2[1] = <aaa> +fields[2] = <aaaaa> fields2[2] = <aaaaa> +seps[0] = <bbb> seps2[0] = <bbb> +seps[1] = <cccddd> seps2[1] = <cccddd> +seps[2] = <qqqq> seps2[2] = <qqqq> +Splitting: <bbbaaacccdddaaaaaqqqqa> +n = 2 m = 2 +fields[1] = <aaa> fields2[1] = <aaa> +fields[2] = <aaaaa> fields2[2] = <aaaaa> +seps[0] = <bbb> seps2[0] = <bbb> +seps[1] = <cccddd> seps2[1] = <cccddd> +seps[2] = <qqqqa> seps2[2] = <qqqqa> +Splitting: <aAbBcC> +n = 3 m = 3 +fields[1] = <a> fields2[1] = <a> +fields[2] = <b> fields2[2] = <b> +fields[3] = <c> fields2[3] = <c> +seps[0] = <> seps2[0] = <> +seps[1] = <A> seps2[1] = <A> +seps[2] = <B> seps2[2] = <B> +seps[3] = <C> seps2[3] = <C> |