diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2012-08-12 21:02:48 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2012-08-12 21:02:48 +0300 |
commit | 4f196c2431034aa4865fcd4e3bcc45a6e764266f (patch) | |
tree | 454540708626c22efebf92cb5280ed0c3bd508a7 | |
parent | 84d6acb5c16aa9aed908fde7cb0bc53c2ecbeede (diff) | |
download | egawk-4f196c2431034aa4865fcd4e3bcc45a6e764266f.tar.gz egawk-4f196c2431034aa4865fcd4e3bcc45a6e764266f.tar.bz2 egawk-4f196c2431034aa4865fcd4e3bcc45a6e764266f.zip |
Fix doc on ranges and locales again. Add test.
-rw-r--r-- | doc/ChangeLog | 5 | ||||
-rw-r--r-- | doc/gawk.info | 119 | ||||
-rw-r--r-- | doc/gawk.texi | 4 | ||||
-rw-r--r-- | test/ChangeLog | 5 | ||||
-rw-r--r-- | test/Makefile.am | 5 | ||||
-rw-r--r-- | test/Makefile.in | 10 | ||||
-rw-r--r-- | test/Maketests | 5 | ||||
-rw-r--r-- | test/regexprange.awk | 14 | ||||
-rw-r--r-- | test/regexprange.ok | 52 |
9 files changed, 154 insertions, 65 deletions
diff --git a/doc/ChangeLog b/doc/ChangeLog index 88743adc..f279986c 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,8 @@ +2012-08-12 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi (Ranges and Locales): Clarified ranges and + locales. Again. + 2012-08-05 Arnold D. Robbins <arnold@skeeve.com> * gawk.texi (PC Binary Installation): Document Eli Zaretskii's diff --git a/doc/gawk.info b/doc/gawk.info index 91c9d516..275188ef 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -20186,8 +20186,7 @@ outside those locales, the ordering was defined to be based on In many locales, `A' and `a' are both less than `B'. In other words, these locales sort characters in dictionary order, and `[a-dx-z]' is typically not equivalent to `[abcdxyz]'; instead it might -be equivalent to `[aBbCcDdXxYyZz]', for example. (And to make things -worse, on other systems, it might be equivalent to `[aAbBcCdDxXyYz]'.) +be equivalent to `[ABCXYabcdxyz]', for example. This point needs to be emphasized: Much literature teaches that you should use `[a-z]' to match a lowercase character. But on systems with @@ -27834,63 +27833,63 @@ Node: BTL798431 Node: POSIX/GNU799165 Node: Common Extensions804316 Node: Ranges and Locales805423 -Ref: Ranges and Locales-Footnote-1810134 -Ref: Ranges and Locales-Footnote-2810161 -Ref: Ranges and Locales-Footnote-3810421 -Node: Contributors810642 -Node: Installation814904 -Node: Gawk Distribution815798 -Node: Getting816282 -Node: Extracting817108 -Node: Distribution contents818800 -Node: Unix Installation824022 -Node: Quick Installation824639 -Node: Additional Configuration Options826601 -Node: Configuration Philosophy828078 -Node: Non-Unix Installation830420 -Node: PC Installation830878 -Node: PC Binary Installation832177 -Node: PC Compiling834192 -Node: PC Testing837136 -Node: PC Using838312 -Node: Cygwin842497 -Node: MSYS843497 -Node: VMS Installation844011 -Node: VMS Compilation844614 -Ref: VMS Compilation-Footnote-1845621 -Node: VMS Installation Details845679 -Node: VMS Running847314 -Node: VMS Old Gawk848921 -Node: Bugs849395 -Node: Other Versions853247 -Node: Notes858528 -Node: Compatibility Mode859220 -Node: Additions860003 -Node: Accessing The Source860815 -Node: Adding Code862240 -Node: New Ports868207 -Node: Dynamic Extensions872320 -Node: Internals873696 -Node: Plugin License882799 -Node: Sample Library883433 -Node: Internal File Description884119 -Node: Internal File Ops887834 -Ref: Internal File Ops-Footnote-1892615 -Node: Using Internal File Ops892755 -Node: Future Extensions895132 -Node: Basic Concepts897636 -Node: Basic High Level898393 -Ref: Basic High Level-Footnote-1902428 -Node: Basic Data Typing902613 -Node: Floating Point Issues907138 -Node: String Conversion Precision908221 -Ref: String Conversion Precision-Footnote-1909921 -Node: Unexpected Results910030 -Node: POSIX Floating Point Problems911856 -Ref: POSIX Floating Point Problems-Footnote-1915561 -Node: Glossary915599 -Node: Copying940575 -Node: GNU Free Documentation License978132 -Node: Index1003269 +Ref: Ranges and Locales-Footnote-1810041 +Ref: Ranges and Locales-Footnote-2810068 +Ref: Ranges and Locales-Footnote-3810328 +Node: Contributors810549 +Node: Installation814811 +Node: Gawk Distribution815705 +Node: Getting816189 +Node: Extracting817015 +Node: Distribution contents818707 +Node: Unix Installation823929 +Node: Quick Installation824546 +Node: Additional Configuration Options826508 +Node: Configuration Philosophy827985 +Node: Non-Unix Installation830327 +Node: PC Installation830785 +Node: PC Binary Installation832084 +Node: PC Compiling834099 +Node: PC Testing837043 +Node: PC Using838219 +Node: Cygwin842404 +Node: MSYS843404 +Node: VMS Installation843918 +Node: VMS Compilation844521 +Ref: VMS Compilation-Footnote-1845528 +Node: VMS Installation Details845586 +Node: VMS Running847221 +Node: VMS Old Gawk848828 +Node: Bugs849302 +Node: Other Versions853154 +Node: Notes858435 +Node: Compatibility Mode859127 +Node: Additions859910 +Node: Accessing The Source860722 +Node: Adding Code862147 +Node: New Ports868114 +Node: Dynamic Extensions872227 +Node: Internals873603 +Node: Plugin License882706 +Node: Sample Library883340 +Node: Internal File Description884026 +Node: Internal File Ops887741 +Ref: Internal File Ops-Footnote-1892522 +Node: Using Internal File Ops892662 +Node: Future Extensions895039 +Node: Basic Concepts897543 +Node: Basic High Level898300 +Ref: Basic High Level-Footnote-1902335 +Node: Basic Data Typing902520 +Node: Floating Point Issues907045 +Node: String Conversion Precision908128 +Ref: String Conversion Precision-Footnote-1909828 +Node: Unexpected Results909937 +Node: POSIX Floating Point Problems911763 +Ref: POSIX Floating Point Problems-Footnote-1915468 +Node: Glossary915506 +Node: Copying940482 +Node: GNU Free Documentation License978039 +Node: Index1003176 End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index b2044649..1ff75230 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -27095,9 +27095,7 @@ But outside those locales, the ordering was defined to be based on In many locales, @samp{A} and @samp{a} are both less than @samp{B}. In other words, these locales sort characters in dictionary order, and @samp{[a-dx-z]} is typically not equivalent to @samp{[abcdxyz]}; -instead it might be equivalent to @samp{[aBbCcDdXxYyZz]}, for example. -(And to make things worse, on other systems, it might be equivalent to -@samp{[aAbBcCdDxXyYz]}.) +instead it might be equivalent to @samp{[ABCXYabcdxyz]}, for example. This point needs to be emphasized: Much literature teaches that you should use @samp{[a-z]} to match a lowercase character. But on systems with diff --git a/test/ChangeLog b/test/ChangeLog index 219396cf..75eb5ec9 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,8 @@ +2012-08-12 Arnold D. Robbins <arnold@skeeve.com> + + * Makefile.am (regexprange): New test. + * regexprange.awk, regexprange.ok: New files. + 2012-08-05 Arnold D. Robbins <arnold@skeeve.com> New test from Nelson Beebe. diff --git a/test/Makefile.am b/test/Makefile.am index 9c96c386..592cf144 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -615,6 +615,8 @@ EXTRA_DIST = \ regeq.awk \ regeq.in \ regeq.ok \ + regexprange.awk \ + regexprange.ok \ regrange.awk \ regrange.ok \ regtest.sh \ @@ -823,7 +825,8 @@ BASIC_TESTS = \ paramdup paramres paramtyp parse1 parsefld parseme pcntplus \ posix2008sub prdupval prec printf0 printf1 prmarscl prmreuse \ prt1eval prtoeval \ - rand range1 rebt8b1 redfilnm regeq regrange reindops reparse \ + rand range1 rebt8b1 redfilnm regeq regexprange regrange \ + reindops reparse \ resplit rri1 rs rsnul1nl rsnulbig rsnulbig2 rstest1 rstest2 \ rstest3 rstest4 rstest5 rswhite \ scalar sclforin sclifin sortempty splitargv splitarr splitdef \ diff --git a/test/Makefile.in b/test/Makefile.in index 56c07305..d76d7387 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -820,6 +820,8 @@ EXTRA_DIST = \ regeq.awk \ regeq.in \ regeq.ok \ + regexprange.awk \ + regexprange.ok \ regrange.awk \ regrange.ok \ regtest.sh \ @@ -1028,7 +1030,8 @@ BASIC_TESTS = \ paramdup paramres paramtyp parse1 parsefld parseme pcntplus \ posix2008sub prdupval prec printf0 printf1 prmarscl prmreuse \ prt1eval prtoeval \ - rand range1 rebt8b1 redfilnm regeq regrange reindops reparse \ + rand range1 rebt8b1 redfilnm regeq regexprange regrange \ + reindops reparse \ resplit rri1 rs rsnul1nl rsnulbig rsnulbig2 rstest1 rstest2 \ rstest3 rstest4 rstest5 rswhite \ scalar sclforin sclifin sortempty splitargv splitarr splitdef \ @@ -2523,6 +2526,11 @@ regeq: @AWKPATH=$(srcdir) $(AWK) -f $@.awk < $(srcdir)/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@ +regexprange: + @echo regexprange + @AWKPATH=$(srcdir) $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@ + regrange: @echo regrange @AWKPATH=$(srcdir) $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/Maketests b/test/Maketests index 12d7e35c..df52108d 100644 --- a/test/Maketests +++ b/test/Maketests @@ -670,6 +670,11 @@ regeq: @AWKPATH=$(srcdir) $(AWK) -f $@.awk < $(srcdir)/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@ +regexprange: + @echo regexprange + @AWKPATH=$(srcdir) $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) $(srcdir)/$@.ok _$@ && rm -f _$@ + regrange: @echo regrange @AWKPATH=$(srcdir) $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/regexprange.awk b/test/regexprange.awk new file mode 100644 index 00000000..861e5ee5 --- /dev/null +++ b/test/regexprange.awk @@ -0,0 +1,14 @@ +BEGIN { + range = "[a-dx-z]" + + split("ABCDEFGHIJKLMNOPQRSTUVWXYZ", upper, "") + split("abcdefghijklmnopqrstuvwxyz", lower, "") + + for (i = 1; i in upper; i++) + printf("%s ~ %s ---> %s\n", + upper[i], range, (upper[i] ~ range) ? "true" : "false") + + for (i = 1; i in lower; i++) + printf("%s ~ %s ---> %s\n", + lower[i], range, (lower[i] ~ range) ? "true" : "false") +} diff --git a/test/regexprange.ok b/test/regexprange.ok new file mode 100644 index 00000000..dbf5d35a --- /dev/null +++ b/test/regexprange.ok @@ -0,0 +1,52 @@ +A ~ [a-dx-z] ---> false +B ~ [a-dx-z] ---> false +C ~ [a-dx-z] ---> false +D ~ [a-dx-z] ---> false +E ~ [a-dx-z] ---> false +F ~ [a-dx-z] ---> false +G ~ [a-dx-z] ---> false +H ~ [a-dx-z] ---> false +I ~ [a-dx-z] ---> false +J ~ [a-dx-z] ---> false +K ~ [a-dx-z] ---> false +L ~ [a-dx-z] ---> false +M ~ [a-dx-z] ---> false +N ~ [a-dx-z] ---> false +O ~ [a-dx-z] ---> false +P ~ [a-dx-z] ---> false +Q ~ [a-dx-z] ---> false +R ~ [a-dx-z] ---> false +S ~ [a-dx-z] ---> false +T ~ [a-dx-z] ---> false +U ~ [a-dx-z] ---> false +V ~ [a-dx-z] ---> false +W ~ [a-dx-z] ---> false +X ~ [a-dx-z] ---> false +Y ~ [a-dx-z] ---> false +Z ~ [a-dx-z] ---> false +a ~ [a-dx-z] ---> true +b ~ [a-dx-z] ---> true +c ~ [a-dx-z] ---> true +d ~ [a-dx-z] ---> true +e ~ [a-dx-z] ---> false +f ~ [a-dx-z] ---> false +g ~ [a-dx-z] ---> false +h ~ [a-dx-z] ---> false +i ~ [a-dx-z] ---> false +j ~ [a-dx-z] ---> false +k ~ [a-dx-z] ---> false +l ~ [a-dx-z] ---> false +m ~ [a-dx-z] ---> false +n ~ [a-dx-z] ---> false +o ~ [a-dx-z] ---> false +p ~ [a-dx-z] ---> false +q ~ [a-dx-z] ---> false +r ~ [a-dx-z] ---> false +s ~ [a-dx-z] ---> false +t ~ [a-dx-z] ---> false +u ~ [a-dx-z] ---> false +v ~ [a-dx-z] ---> false +w ~ [a-dx-z] ---> false +x ~ [a-dx-z] ---> true +y ~ [a-dx-z] ---> true +z ~ [a-dx-z] ---> true |