diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2010-11-25 21:22:47 +0200 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2010-11-25 21:22:47 +0200 |
commit | 286748e1a8500f647c3bccfb467b02bf3a37f398 (patch) | |
tree | 6385bb2f1ee6c0837204edfd307babceeae7f89a | |
parent | 50d4a80f67e5bcbf3902138d85a25f6a90847d31 (diff) | |
download | egawk-286748e1a8500f647c3bccfb467b02bf3a37f398.tar.gz egawk-286748e1a8500f647c3bccfb467b02bf3a37f398.tar.bz2 egawk-286748e1a8500f647c3bccfb467b02bf3a37f398.zip |
Add POSIX string comparison with strcoll.
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | NEWS | 6 | ||||
-rw-r--r-- | TODO | 6 | ||||
-rw-r--r-- | configh.in | 3 | ||||
-rwxr-xr-x | configure | 2 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | doc/gawk.info | 823 | ||||
-rw-r--r-- | doc/gawk.texi | 31 | ||||
-rw-r--r-- | eval.c | 85 | ||||
-rw-r--r-- | missing_d/ChangeLog | 6 | ||||
-rw-r--r-- | missing_d/memcmp.c | 4 | ||||
-rw-r--r-- | missing_d/memcpy.c | 6 | ||||
-rw-r--r-- | missing_d/memmove.c | 2 | ||||
-rw-r--r-- | missing_d/memset.c | 8 | ||||
-rw-r--r-- | missing_d/strchr.c | 6 | ||||
-rw-r--r-- | missing_d/strcoll.c | 7 | ||||
-rw-r--r-- | missing_d/strtod.c | 4 | ||||
-rw-r--r-- | protos.h | 4 | ||||
-rw-r--r-- | replace.c | 4 |
19 files changed, 597 insertions, 420 deletions
@@ -1,3 +1,10 @@ +Thu Nov 25 08:32:31 2010 Arnold D. Robbins <arnold@skeeve.com> + + * eval.c (posix_compare): Do string comparison with strcoll() / + wcscoll(). + (cmp_nodes): Call it if do_posix. This may be a bad idea, + but what the heck. Standards compatibility uber alles! + Wed Nov 24 20:09:23 2010 Arnold D. Robbins <arnold@skeeve.com> * ext.c (do_ext): Require definition of `plugin_is_GPL_compatible' @@ -21,7 +28,6 @@ Sun Nov 21 14:23:58 2010 John Haque <j.eh@mchsi.com> (assign_common, assign, compare): Nuked macros. (cmp_scalar, op_assign): New functions as replacements for the macros. - Fri Nov 19 11:57:28 2010 Arnold D. Robbins <arnold@skeeve.com> * bootstrap.sh, Makefile.am: Remove treatment of CVS. @@ -66,8 +66,12 @@ Changes from 3.1.8 to 4.0.0 22. Per the GNU Coding Standards, dynamic extensions must now define a global symbol indicating that they are GPL-compatible. See the documentation and example extensions. + THIS CHANGES BEHAVIOR!!!! -23. Many code cleanups. Removed code for many old, unsupported systems. +23. In POSIX mode, string comparisons use strcoll/wcscoll. + THIS CHANGES BEHAVIOR!!!! + +24. Many code cleanups. Removed code for many old, unsupported systems. Changes from 3.1.7 to 3.1.8 --------------------------- @@ -22,9 +22,9 @@ xgawk features (@load, -l, others) #Xfer it to savannah #Tag last CVS revisions #Remove treatment of CVS directories from makefiles (awklib, check others) -# Review POSIX standard -Fix issues related to POSIX - - use of STRCOLL for comparison +#Review POSIX standard +#Fix issues related to POSIX +# - use of STRCOLL for comparison Add tests for pgawk Add tests for patches in emails Add doc fix in email @@ -197,6 +197,9 @@ /* Define to 1 if you have the `strchr' function. */ #undef HAVE_STRCHR +/* Define to 1 if you have the `strcoll' function. */ +#undef HAVE_STRCOLL + /* Define to 1 if you have the `strerror' function. */ #undef HAVE_STRERROR @@ -9747,7 +9747,7 @@ for ac_func in atexit btowc fmod getgrent getgroups grantpt \ isascii iswctype iswlower iswupper mbrlen \ memcmp memcpy memcpy_ulong memmove memset \ memset_ulong mkstemp setenv setlocale snprintf strchr \ - strerror strftime strncasecmp strtod strtoul \ + strerror strftime strncasecmp strcoll strtod strtoul \ system tmpfile towlower towupper tzset usleep wcrtomb \ wcscoll wcscoll wctype do : diff --git a/configure.ac b/configure.ac index d1998f63..3e1b231a 100644 --- a/configure.ac +++ b/configure.ac @@ -268,7 +268,7 @@ AC_CHECK_FUNCS(atexit btowc fmod getgrent getgroups grantpt \ isascii iswctype iswlower iswupper mbrlen \ memcmp memcpy memcpy_ulong memmove memset \ memset_ulong mkstemp setenv setlocale snprintf strchr \ - strerror strftime strncasecmp strtod strtoul \ + strerror strftime strncasecmp strcoll strtod strtoul \ system tmpfile towlower towupper tzset usleep wcrtomb \ wcscoll wcscoll wctype) dnl this check is for both mbrtowc and the mbstate_t type, which is good diff --git a/doc/gawk.info b/doc/gawk.info index dd1f8a98..70c63046 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -237,6 +237,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) with `<', etc. * Variable Typing:: String type versus numeric type. * Comparison Operators:: The comparison operators. +* POSIX String Comparison:: String comparison with POSIX rules. * Boolean Ops:: Combining comparison expressions using boolean operators `||' (``or''), `&&' (``and'') and `!' (``not''). @@ -6664,6 +6665,7 @@ are typed, and how `awk' compares variables. * Variable Typing:: String type versus numeric type. * Comparison Operators:: The comparison operators. +* POSIX String Comparison:: String comparison with POSIX rules. File: gawk.info, Node: Variable Typing, Next: Comparison Operators, Up: Typing and Comparison @@ -6763,7 +6765,7 @@ otherwise: gratifying that the POSIX standard is also now correct. -File: gawk.info, Node: Comparison Operators, Prev: Variable Typing, Up: Typing and Comparison +File: gawk.info, Node: Comparison Operators, Next: POSIX String Comparison, Prev: Variable Typing, Up: Typing and Comparison 5.3.2.2 Comparison Operators ............................ @@ -6876,6 +6878,33 @@ abbreviation for the following comparison expression: Constant Regexps::, where this is discussed in more detail. +File: gawk.info, Node: POSIX String Comparison, Prev: Comparison Operators, Up: Typing and Comparison + +5.3.2.3 String comparison with POSIX rules. +........................................... + +The POSIX standard says that string comparison is performed based on +the locale's collating order. This is usually very different from the +results obtained when doing straight character-by-character +comparison.(1) + + Because this behavior differs considerably from existing practice, +`gawk' only implements it when in POSIX mode (*note Options::). Here +is an example to illustrate the difference, in a `en_US.UTF-8' locale: + + $ gawk 'BEGIN { printf("ABC < abc = %s\n", + > ("ABC" < "abc" ? "TRUE" : "FALSE")) }' + -| ABC < abc = TRUE + $ gawk --posix 'BEGIN { printf("ABC < abc = %s\n", + > ("ABC" < "abc" ? "TRUE" : "FALSE")) }' + -| ABC < abc = FALSE + + ---------- Footnotes ---------- + + (1) Technically, string comparison is supposed to behave the same +way as if the strings are compared with the C `strcoll()' function. + + File: gawk.info, Node: Boolean Ops, Next: Conditional Exp, Prev: Typing and Comparison, Up: Truth Values and Conditions 5.3.3 Boolean Expressions @@ -26826,400 +26855,402 @@ Index Tag Table: Node: Top1340 -Node: Foreword29871 -Node: Preface34187 -Ref: Preface-Footnote-137139 -Ref: Preface-Footnote-237245 -Node: History37477 -Node: Names39709 -Ref: Names-Footnote-141186 -Node: This Manual41258 -Ref: This Manual-Footnote-146156 -Node: Conventions46256 -Node: Manual History48315 -Ref: Manual History-Footnote-151493 -Ref: Manual History-Footnote-251534 -Node: How To Contribute51608 -Node: Acknowledgments52752 -Node: Getting Started57021 -Node: Running gawk59393 -Node: One-shot60579 -Node: Read Terminal61804 -Ref: Read Terminal-Footnote-163454 -Ref: Read Terminal-Footnote-263728 -Node: Long63899 -Node: Executable Scripts65275 -Ref: Executable Scripts-Footnote-167136 -Ref: Executable Scripts-Footnote-267238 -Node: Comments67689 -Node: Quoting70057 -Node: DOS Quoting74674 -Node: Sample Data Files75342 -Node: Very Simple78374 -Node: Two Rules82971 -Node: More Complex85118 -Ref: More Complex-Footnote-188048 -Node: Statements/Lines88128 -Ref: Statements/Lines-Footnote-192484 -Node: Other Features92749 -Node: When93618 -Node: Regexp95761 -Node: Regexp Usage97215 -Node: Escape Sequences99241 -Node: Regexp Operators104984 -Ref: Regexp Operators-Footnote-1112156 -Ref: Regexp Operators-Footnote-2112303 -Node: Character Lists112401 -Ref: table-char-classes114176 -Node: GNU Regexp Operators116801 -Node: Case-sensitivity120514 -Ref: Case-sensitivity-Footnote-1123469 -Ref: Case-sensitivity-Footnote-2123704 -Node: Leftmost Longest123812 -Node: Computed Regexps125013 -Node: Locales128430 -Node: Reading Files131520 -Node: Records133536 -Ref: Records-Footnote-1142102 -Node: Fields142139 -Ref: Fields-Footnote-1145171 -Node: Nonconstant Fields145257 -Node: Changing Fields147459 -Node: Field Separators152744 -Node: Default Field Splitting155373 -Node: Regexp Field Splitting156490 -Node: Single Character Fields159840 -Node: Command Line Field Separator160891 -Node: Field Splitting Summary164330 -Ref: Field Splitting Summary-Footnote-1167516 -Node: Constant Size167617 -Node: Splitting By Content172088 -Ref: Splitting By Content-Footnote-1175690 -Node: Multiple Line175730 -Ref: Multiple Line-Footnote-1181470 -Node: Getline181649 -Node: Plain Getline183870 -Node: Getline/Variable185959 -Node: Getline/File187100 -Node: Getline/Variable/File188422 -Ref: Getline/Variable/File-Footnote-1190021 -Node: Getline/Pipe190108 -Node: Getline/Variable/Pipe192656 -Node: Getline/Coprocess193763 -Node: Getline/Variable/Coprocess195006 -Node: Getline Notes195720 -Node: Getline Summary197662 -Ref: table-getline-variants197946 -Node: BEGINFILE/ENDFILE198851 -Node: Command line directories201706 -Node: Printing202341 -Node: Print203972 -Node: Print Examples205309 -Node: Output Separators208093 -Node: OFMT209852 -Node: Printf211210 -Node: Basic Printf212116 -Node: Control Letters213653 -Node: Format Modifiers217465 -Node: Printf Examples223476 -Node: Redirection226191 -Node: Special Files233169 -Node: Special FD233702 -Ref: Special FD-Footnote-1237277 -Node: Special Network237351 -Node: Special Caveats238206 -Node: Close Files And Pipes239000 -Ref: Close Files And Pipes-Footnote-1245944 -Ref: Close Files And Pipes-Footnote-2246092 -Node: Expressions246242 -Node: Values247311 -Node: Constants247987 -Node: Scalar Constants248667 -Ref: Scalar Constants-Footnote-1249526 -Node: Nondecimal-numbers249708 -Node: Regexp Constants252767 -Node: Using Constant Regexps253242 -Node: Variables256247 -Node: Using Variables256902 -Node: Assignment Options258629 -Node: Conversion260510 -Ref: table-locale-affects265884 -Ref: Conversion-Footnote-1266508 -Node: All Operators266617 -Node: Arithmetic Ops267247 -Node: Concatenation269746 -Ref: Concatenation-Footnote-1272539 -Node: Assignment Ops272658 -Ref: table-assign-ops277646 -Node: Increment Ops279047 -Node: Truth Values and Conditions282525 -Node: Truth Values283608 -Node: Typing and Comparison284656 -Node: Variable Typing285377 -Ref: Variable Typing-Footnote-1289274 -Node: Comparison Operators289396 -Ref: table-relational-ops289774 -Node: Boolean Ops293323 -Ref: Boolean Ops-Footnote-1297401 -Node: Conditional Exp297492 -Node: Function Calls299224 -Node: Precedence302783 -Node: Patterns and Actions306436 -Node: Pattern Overview307490 -Node: Regexp Patterns308927 -Node: Expression Patterns309470 -Node: Ranges313020 -Node: BEGIN/END316109 -Node: Using BEGIN/END316859 -Ref: Using BEGIN/END-Footnote-1319590 -Node: I/O And BEGIN/END319704 -Node: Empty321971 -Node: Using Shell Variables322279 -Node: Action Overview324560 -Node: Statements326918 -Node: If Statement328774 -Node: While Statement330273 -Node: Do Statement332305 -Node: For Statement333454 -Node: Switch Statement336594 -Node: Break Statement338642 -Node: Continue Statement340462 -Node: Next Statement342160 -Node: Nextfile Statement344440 -Node: Exit Statement347158 -Node: Built-in Variables349429 -Node: User-modified350524 -Ref: User-modified-Footnote-1358490 -Node: Auto-set358552 -Ref: Auto-set-Footnote-1367214 -Node: ARGC and ARGV367419 -Node: Arrays371180 -Node: Array Basics372689 -Node: Array Intro373400 -Node: Reference to Elements377787 -Node: Assigning Elements379686 -Node: Array Example380177 -Node: Scanning an Array381909 -Node: Delete384186 -Ref: Delete-Footnote-1386576 -Node: Numeric Array Subscripts386633 -Node: Uninitialized Subscripts388820 -Node: Multi-dimensional390426 -Node: Multi-scanning393517 -Node: Array Sorting395101 -Node: Arrays of Arrays398931 -Node: Functions403039 -Node: Built-in403848 -Node: Calling Built-in404862 -Node: Numeric Functions406838 -Ref: Numeric Functions-Footnote-1410592 -Ref: Numeric Functions-Footnote-2410926 -Node: String Functions411195 -Ref: String Functions-Footnote-1433029 -Ref: String Functions-Footnote-2433158 -Ref: String Functions-Footnote-3433406 -Node: Gory Details433493 -Ref: table-sub-escapes435150 -Ref: table-sub-posix-92436496 -Ref: table-sub-proposed437839 -Ref: table-posix-2001-sub439199 -Ref: table-gensub-escapes440474 -Ref: Gory Details-Footnote-1441677 -Node: I/O Functions441728 -Ref: I/O Functions-Footnote-1448516 -Node: Time Functions448607 -Ref: Time Functions-Footnote-1459419 -Ref: Time Functions-Footnote-2459487 -Ref: Time Functions-Footnote-3459645 -Ref: Time Functions-Footnote-4459756 -Ref: Time Functions-Footnote-5459883 -Ref: Time Functions-Footnote-6460110 -Node: Bitwise Functions460376 -Ref: table-bitwise-ops460954 -Ref: Bitwise Functions-Footnote-1465194 -Node: I18N Functions465378 -Node: User-defined467101 -Node: Definition Syntax467905 -Node: Function Example472603 -Node: Function Caveats475185 -Node: Return Statement479110 -Node: Dynamic Typing481767 -Node: Indirect Calls482504 -Node: Internationalization492139 -Node: I18N and L10N493558 -Node: Explaining gettext494242 -Ref: Explaining gettext-Footnote-1499153 -Ref: Explaining gettext-Footnote-2499392 -Node: Programmer i18n499561 -Node: Translator i18n503796 -Node: String Extraction504587 -Ref: String Extraction-Footnote-1505544 -Node: Printf Ordering505670 -Ref: Printf Ordering-Footnote-1508450 -Node: I18N Portability508514 -Ref: I18N Portability-Footnote-1510959 -Node: I18N Example511022 -Ref: I18N Example-Footnote-1513642 -Node: Gawk I18N513714 -Node: Advanced Features514292 -Node: Nondecimal Data515607 -Node: Two-way I/O517168 -Ref: Two-way I/O-Footnote-1522651 -Node: TCP/IP Networking522728 -Node: Profiling525518 -Node: Invoking Gawk532979 -Node: Command Line534286 -Node: Options535071 -Ref: Options-Footnote-1548159 -Node: Other Arguments548184 -Node: AWKPATH Variable550865 -Ref: AWKPATH Variable-Footnote-1553640 -Node: Exit Status553900 -Node: Include Files554572 -Node: Obsolete558173 -Node: Undocumented558974 -Node: Known Bugs559236 -Node: Library Functions559838 -Ref: Library Functions-Footnote-1562819 -Node: Library Names562990 -Ref: Library Names-Footnote-1566463 -Ref: Library Names-Footnote-2566682 -Node: General Functions566768 -Node: Nextfile Function567831 -Node: Strtonum Function572195 -Node: Assert Function575136 -Node: Round Function578440 -Node: Cliff Random Function579980 -Node: Ordinal Functions580995 -Ref: Ordinal Functions-Footnote-1584055 -Node: Join Function584271 -Ref: Join Function-Footnote-1586033 -Node: Gettimeofday Function586233 -Node: Data File Management589944 -Node: Filetrans Function590576 -Node: Rewind Function594002 -Node: File Checking595448 -Node: Empty Files596478 -Node: Ignoring Assigns598703 -Node: Getopt Function600251 -Ref: Getopt Function-Footnote-1611533 -Node: Passwd Functions611736 -Ref: Passwd Functions-Footnote-1620714 -Node: Group Functions620802 -Node: Sample Programs628899 -Node: Running Examples629568 -Node: Clones630296 -Node: Cut Program631428 -Node: Egrep Program641187 -Ref: Egrep Program-Footnote-1648937 -Node: Id Program649047 -Node: Split Program652654 -Node: Tee Program656122 -Node: Uniq Program658865 -Node: Wc Program666232 -Ref: Wc Program-Footnote-1670476 -Node: Miscellaneous Programs670672 -Node: Dupword Program671792 -Node: Alarm Program673823 -Node: Translate Program678365 -Ref: Translate Program-Footnote-1682744 -Ref: Translate Program-Footnote-2682981 -Node: Labels Program683115 -Ref: Labels Program-Footnote-1686406 -Node: Word Sorting686490 -Node: History Sorting690837 -Node: Extract Program692675 -Node: Simple Sed700033 -Node: Igawk Program703090 -Ref: Igawk Program-Footnote-1717821 -Ref: Igawk Program-Footnote-2718022 -Node: Signature Program718160 -Node: Debugger719240 -Node: Debugging720116 -Node: Debugging Concepts720430 -Node: Debugging Terms722283 -Node: Awk Debugging724831 -Node: Sample dgawk session725723 -Node: dgawk invocation726215 -Node: Finding The Bug727399 -Node: List of Debugger Commands733914 -Node: Breakpoint Control735229 -Node: Dgawk Execution Control738439 -Node: Viewing And Changing Data741788 -Node: Dgawk Stack745084 -Node: Dgawk Info746545 -Node: Miscellaneous Dgawk Commands750483 -Node: Readline Support756199 -Node: Dgawk Limitations757015 -Node: Language History759187 -Node: V7/SVR3.1760564 -Node: SVR4762859 -Node: POSIX764304 -Node: BTL766016 -Node: POSIX/GNU767706 -Node: Contributors777370 -Node: Installation780975 -Node: Gawk Distribution781946 -Node: Getting782430 -Node: Extracting783256 -Node: Distribution contents784644 -Node: Unix Installation789717 -Node: Quick Installation790308 -Node: Additional Configuration Options792010 -Node: Configuration Philosophy793773 -Node: Non-Unix Installation796137 -Node: PC Installation796602 -Node: PC Binary Installation797908 -Node: PC Compiling799751 -Node: PC Dynamic804256 -Node: PC Using806619 -Node: Cygwin811167 -Node: MSYS812151 -Node: VMS Installation812657 -Node: VMS Compilation813261 -Node: VMS Installation Details814838 -Node: VMS Running816468 -Node: VMS POSIX818065 -Node: VMS Old Gawk819363 -Node: Unsupported819832 -Node: Atari Installation820294 -Node: Atari Compiling821581 -Node: Atari Using823470 -Node: BeOS Installation826317 -Node: Tandem Installation827462 -Node: Bugs829141 -Node: Other Versions832973 -Node: Notes838195 -Node: Compatibility Mode838887 -Node: Additions839670 -Node: Adding Code840420 -Node: New Ports846472 -Node: Dynamic Extensions850604 -Node: Internals851985 -Node: Plugin License862390 -Node: Sample Library863024 -Node: Internal File Description863688 -Node: Internal File Ops867383 -Ref: Internal File Ops-Footnote-1872259 -Node: Using Internal File Ops872407 -Node: Future Extensions874432 -Node: Basic Concepts878469 -Node: Basic High Level879226 -Ref: Basic High Level-Footnote-1883342 -Node: Basic Data Typing883536 -Node: Floating Point Issues887973 -Node: String Conversion Precision889056 -Ref: String Conversion Precision-Footnote-1890750 -Node: Unexpected Results890859 -Node: POSIX Floating Point Problems892685 -Ref: POSIX Floating Point Problems-Footnote-1896384 -Node: Glossary896422 -Node: Copying920190 -Node: GNU Free Documentation License957747 -Node: next-edition982891 -Node: unresolved983243 -Node: revision983743 -Node: consistency984166 -Node: Index987519 +Node: Foreword29942 +Node: Preface34258 +Ref: Preface-Footnote-137210 +Ref: Preface-Footnote-237316 +Node: History37548 +Node: Names39780 +Ref: Names-Footnote-141257 +Node: This Manual41329 +Ref: This Manual-Footnote-146227 +Node: Conventions46327 +Node: Manual History48386 +Ref: Manual History-Footnote-151564 +Ref: Manual History-Footnote-251605 +Node: How To Contribute51679 +Node: Acknowledgments52823 +Node: Getting Started57092 +Node: Running gawk59464 +Node: One-shot60650 +Node: Read Terminal61875 +Ref: Read Terminal-Footnote-163525 +Ref: Read Terminal-Footnote-263799 +Node: Long63970 +Node: Executable Scripts65346 +Ref: Executable Scripts-Footnote-167207 +Ref: Executable Scripts-Footnote-267309 +Node: Comments67760 +Node: Quoting70128 +Node: DOS Quoting74745 +Node: Sample Data Files75413 +Node: Very Simple78445 +Node: Two Rules83042 +Node: More Complex85189 +Ref: More Complex-Footnote-188119 +Node: Statements/Lines88199 +Ref: Statements/Lines-Footnote-192555 +Node: Other Features92820 +Node: When93689 +Node: Regexp95832 +Node: Regexp Usage97286 +Node: Escape Sequences99312 +Node: Regexp Operators105055 +Ref: Regexp Operators-Footnote-1112227 +Ref: Regexp Operators-Footnote-2112374 +Node: Character Lists112472 +Ref: table-char-classes114247 +Node: GNU Regexp Operators116872 +Node: Case-sensitivity120585 +Ref: Case-sensitivity-Footnote-1123540 +Ref: Case-sensitivity-Footnote-2123775 +Node: Leftmost Longest123883 +Node: Computed Regexps125084 +Node: Locales128501 +Node: Reading Files131591 +Node: Records133607 +Ref: Records-Footnote-1142173 +Node: Fields142210 +Ref: Fields-Footnote-1145242 +Node: Nonconstant Fields145328 +Node: Changing Fields147530 +Node: Field Separators152815 +Node: Default Field Splitting155444 +Node: Regexp Field Splitting156561 +Node: Single Character Fields159911 +Node: Command Line Field Separator160962 +Node: Field Splitting Summary164401 +Ref: Field Splitting Summary-Footnote-1167587 +Node: Constant Size167688 +Node: Splitting By Content172159 +Ref: Splitting By Content-Footnote-1175761 +Node: Multiple Line175801 +Ref: Multiple Line-Footnote-1181541 +Node: Getline181720 +Node: Plain Getline183941 +Node: Getline/Variable186030 +Node: Getline/File187171 +Node: Getline/Variable/File188493 +Ref: Getline/Variable/File-Footnote-1190092 +Node: Getline/Pipe190179 +Node: Getline/Variable/Pipe192727 +Node: Getline/Coprocess193834 +Node: Getline/Variable/Coprocess195077 +Node: Getline Notes195791 +Node: Getline Summary197733 +Ref: table-getline-variants198017 +Node: BEGINFILE/ENDFILE198922 +Node: Command line directories201777 +Node: Printing202412 +Node: Print204043 +Node: Print Examples205380 +Node: Output Separators208164 +Node: OFMT209923 +Node: Printf211281 +Node: Basic Printf212187 +Node: Control Letters213724 +Node: Format Modifiers217536 +Node: Printf Examples223547 +Node: Redirection226262 +Node: Special Files233240 +Node: Special FD233773 +Ref: Special FD-Footnote-1237348 +Node: Special Network237422 +Node: Special Caveats238277 +Node: Close Files And Pipes239071 +Ref: Close Files And Pipes-Footnote-1246015 +Ref: Close Files And Pipes-Footnote-2246163 +Node: Expressions246313 +Node: Values247382 +Node: Constants248058 +Node: Scalar Constants248738 +Ref: Scalar Constants-Footnote-1249597 +Node: Nondecimal-numbers249779 +Node: Regexp Constants252838 +Node: Using Constant Regexps253313 +Node: Variables256318 +Node: Using Variables256973 +Node: Assignment Options258700 +Node: Conversion260581 +Ref: table-locale-affects265955 +Ref: Conversion-Footnote-1266579 +Node: All Operators266688 +Node: Arithmetic Ops267318 +Node: Concatenation269817 +Ref: Concatenation-Footnote-1272610 +Node: Assignment Ops272729 +Ref: table-assign-ops277717 +Node: Increment Ops279118 +Node: Truth Values and Conditions282596 +Node: Truth Values283679 +Node: Typing and Comparison284727 +Node: Variable Typing285516 +Ref: Variable Typing-Footnote-1289413 +Node: Comparison Operators289535 +Ref: table-relational-ops289945 +Node: POSIX String Comparison293494 +Ref: POSIX String Comparison-Footnote-1294451 +Node: Boolean Ops294589 +Ref: Boolean Ops-Footnote-1298667 +Node: Conditional Exp298758 +Node: Function Calls300490 +Node: Precedence304049 +Node: Patterns and Actions307702 +Node: Pattern Overview308756 +Node: Regexp Patterns310193 +Node: Expression Patterns310736 +Node: Ranges314286 +Node: BEGIN/END317375 +Node: Using BEGIN/END318125 +Ref: Using BEGIN/END-Footnote-1320856 +Node: I/O And BEGIN/END320970 +Node: Empty323237 +Node: Using Shell Variables323545 +Node: Action Overview325826 +Node: Statements328184 +Node: If Statement330040 +Node: While Statement331539 +Node: Do Statement333571 +Node: For Statement334720 +Node: Switch Statement337860 +Node: Break Statement339908 +Node: Continue Statement341728 +Node: Next Statement343426 +Node: Nextfile Statement345706 +Node: Exit Statement348424 +Node: Built-in Variables350695 +Node: User-modified351790 +Ref: User-modified-Footnote-1359756 +Node: Auto-set359818 +Ref: Auto-set-Footnote-1368480 +Node: ARGC and ARGV368685 +Node: Arrays372446 +Node: Array Basics373955 +Node: Array Intro374666 +Node: Reference to Elements379053 +Node: Assigning Elements380952 +Node: Array Example381443 +Node: Scanning an Array383175 +Node: Delete385452 +Ref: Delete-Footnote-1387842 +Node: Numeric Array Subscripts387899 +Node: Uninitialized Subscripts390086 +Node: Multi-dimensional391692 +Node: Multi-scanning394783 +Node: Array Sorting396367 +Node: Arrays of Arrays400197 +Node: Functions404305 +Node: Built-in405114 +Node: Calling Built-in406128 +Node: Numeric Functions408104 +Ref: Numeric Functions-Footnote-1411858 +Ref: Numeric Functions-Footnote-2412192 +Node: String Functions412461 +Ref: String Functions-Footnote-1434295 +Ref: String Functions-Footnote-2434424 +Ref: String Functions-Footnote-3434672 +Node: Gory Details434759 +Ref: table-sub-escapes436416 +Ref: table-sub-posix-92437762 +Ref: table-sub-proposed439105 +Ref: table-posix-2001-sub440465 +Ref: table-gensub-escapes441740 +Ref: Gory Details-Footnote-1442943 +Node: I/O Functions442994 +Ref: I/O Functions-Footnote-1449782 +Node: Time Functions449873 +Ref: Time Functions-Footnote-1460685 +Ref: Time Functions-Footnote-2460753 +Ref: Time Functions-Footnote-3460911 +Ref: Time Functions-Footnote-4461022 +Ref: Time Functions-Footnote-5461149 +Ref: Time Functions-Footnote-6461376 +Node: Bitwise Functions461642 +Ref: table-bitwise-ops462220 +Ref: Bitwise Functions-Footnote-1466460 +Node: I18N Functions466644 +Node: User-defined468367 +Node: Definition Syntax469171 +Node: Function Example473869 +Node: Function Caveats476451 +Node: Return Statement480376 +Node: Dynamic Typing483033 +Node: Indirect Calls483770 +Node: Internationalization493405 +Node: I18N and L10N494824 +Node: Explaining gettext495508 +Ref: Explaining gettext-Footnote-1500419 +Ref: Explaining gettext-Footnote-2500658 +Node: Programmer i18n500827 +Node: Translator i18n505062 +Node: String Extraction505853 +Ref: String Extraction-Footnote-1506810 +Node: Printf Ordering506936 +Ref: Printf Ordering-Footnote-1509716 +Node: I18N Portability509780 +Ref: I18N Portability-Footnote-1512225 +Node: I18N Example512288 +Ref: I18N Example-Footnote-1514908 +Node: Gawk I18N514980 +Node: Advanced Features515558 +Node: Nondecimal Data516873 +Node: Two-way I/O518434 +Ref: Two-way I/O-Footnote-1523917 +Node: TCP/IP Networking523994 +Node: Profiling526784 +Node: Invoking Gawk534245 +Node: Command Line535552 +Node: Options536337 +Ref: Options-Footnote-1549425 +Node: Other Arguments549450 +Node: AWKPATH Variable552131 +Ref: AWKPATH Variable-Footnote-1554906 +Node: Exit Status555166 +Node: Include Files555838 +Node: Obsolete559439 +Node: Undocumented560240 +Node: Known Bugs560502 +Node: Library Functions561104 +Ref: Library Functions-Footnote-1564085 +Node: Library Names564256 +Ref: Library Names-Footnote-1567729 +Ref: Library Names-Footnote-2567948 +Node: General Functions568034 +Node: Nextfile Function569097 +Node: Strtonum Function573461 +Node: Assert Function576402 +Node: Round Function579706 +Node: Cliff Random Function581246 +Node: Ordinal Functions582261 +Ref: Ordinal Functions-Footnote-1585321 +Node: Join Function585537 +Ref: Join Function-Footnote-1587299 +Node: Gettimeofday Function587499 +Node: Data File Management591210 +Node: Filetrans Function591842 +Node: Rewind Function595268 +Node: File Checking596714 +Node: Empty Files597744 +Node: Ignoring Assigns599969 +Node: Getopt Function601517 +Ref: Getopt Function-Footnote-1612799 +Node: Passwd Functions613002 +Ref: Passwd Functions-Footnote-1621980 +Node: Group Functions622068 +Node: Sample Programs630165 +Node: Running Examples630834 +Node: Clones631562 +Node: Cut Program632694 +Node: Egrep Program642453 +Ref: Egrep Program-Footnote-1650203 +Node: Id Program650313 +Node: Split Program653920 +Node: Tee Program657388 +Node: Uniq Program660131 +Node: Wc Program667498 +Ref: Wc Program-Footnote-1671742 +Node: Miscellaneous Programs671938 +Node: Dupword Program673058 +Node: Alarm Program675089 +Node: Translate Program679631 +Ref: Translate Program-Footnote-1684010 +Ref: Translate Program-Footnote-2684247 +Node: Labels Program684381 +Ref: Labels Program-Footnote-1687672 +Node: Word Sorting687756 +Node: History Sorting692103 +Node: Extract Program693941 +Node: Simple Sed701299 +Node: Igawk Program704356 +Ref: Igawk Program-Footnote-1719087 +Ref: Igawk Program-Footnote-2719288 +Node: Signature Program719426 +Node: Debugger720506 +Node: Debugging721382 +Node: Debugging Concepts721696 +Node: Debugging Terms723549 +Node: Awk Debugging726097 +Node: Sample dgawk session726989 +Node: dgawk invocation727481 +Node: Finding The Bug728665 +Node: List of Debugger Commands735180 +Node: Breakpoint Control736495 +Node: Dgawk Execution Control739705 +Node: Viewing And Changing Data743054 +Node: Dgawk Stack746350 +Node: Dgawk Info747811 +Node: Miscellaneous Dgawk Commands751749 +Node: Readline Support757465 +Node: Dgawk Limitations758281 +Node: Language History760453 +Node: V7/SVR3.1761830 +Node: SVR4764125 +Node: POSIX765570 +Node: BTL767282 +Node: POSIX/GNU768972 +Node: Contributors778636 +Node: Installation782241 +Node: Gawk Distribution783212 +Node: Getting783696 +Node: Extracting784522 +Node: Distribution contents785910 +Node: Unix Installation790983 +Node: Quick Installation791574 +Node: Additional Configuration Options793276 +Node: Configuration Philosophy795039 +Node: Non-Unix Installation797403 +Node: PC Installation797868 +Node: PC Binary Installation799174 +Node: PC Compiling801017 +Node: PC Dynamic805522 +Node: PC Using807885 +Node: Cygwin812433 +Node: MSYS813417 +Node: VMS Installation813923 +Node: VMS Compilation814527 +Node: VMS Installation Details816104 +Node: VMS Running817734 +Node: VMS POSIX819331 +Node: VMS Old Gawk820629 +Node: Unsupported821098 +Node: Atari Installation821560 +Node: Atari Compiling822847 +Node: Atari Using824736 +Node: BeOS Installation827583 +Node: Tandem Installation828728 +Node: Bugs830407 +Node: Other Versions834239 +Node: Notes839461 +Node: Compatibility Mode840153 +Node: Additions840936 +Node: Adding Code841686 +Node: New Ports847738 +Node: Dynamic Extensions851870 +Node: Internals853251 +Node: Plugin License863656 +Node: Sample Library864290 +Node: Internal File Description864954 +Node: Internal File Ops868649 +Ref: Internal File Ops-Footnote-1873525 +Node: Using Internal File Ops873673 +Node: Future Extensions875698 +Node: Basic Concepts879735 +Node: Basic High Level880492 +Ref: Basic High Level-Footnote-1884608 +Node: Basic Data Typing884802 +Node: Floating Point Issues889239 +Node: String Conversion Precision890322 +Ref: String Conversion Precision-Footnote-1892016 +Node: Unexpected Results892125 +Node: POSIX Floating Point Problems893951 +Ref: POSIX Floating Point Problems-Footnote-1897650 +Node: Glossary897688 +Node: Copying921456 +Node: GNU Free Documentation License959013 +Node: next-edition984157 +Node: unresolved984509 +Node: revision985009 +Node: consistency985432 +Node: Index988785 End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index 28692a39..59770d5f 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -417,6 +417,7 @@ particular records in a file and perform operations upon them. with @samp{<}, etc. * Variable Typing:: String type versus numeric type. * Comparison Operators:: The comparison operators. +* POSIX String Comparison:: String comparison with POSIX rules. * Boolean Ops:: Combining comparison expressions using boolean operators @samp{||} (``or''), @samp{&&} (``and'') and @samp{!} (``not''). @@ -8938,6 +8939,7 @@ compares variables. @menu * Variable Typing:: String type versus numeric type. * Comparison Operators:: The comparison operators. +* POSIX String Comparison:: String comparison with POSIX rules. @end menu @node Variable Typing @@ -9154,8 +9156,8 @@ the longer one. Thus, @code{"abc"} is less than @code{"abcd"}. @cindex troubleshooting, @code{==} operator It is very easy to accidentally mistype the @samp{==} operator and -leave off one of the @samp{=} characters. The result is still valid @command{awk} -code, but the program does not do what is intended: +leave off one of the @samp{=} characters. The result is still valid +@command{awk} code, but the program does not do what is intended: @example if (a = b) # oops! should be a == b @@ -9258,6 +9260,31 @@ One special place where @code{/foo/} is @emph{not} an abbreviation for @samp{!~}. @xref{Using Constant Regexps}, where this is discussed in more detail. + +@node POSIX String Comparison +@subsubsection String comparison with POSIX rules. + +The POSIX standard says that string comparison is performed based +on the locale's collating order. This is usually very different +from the results obtained when doing straight character-by-character +comparison.@footnote{Technically, string comparison is supposed +to behave the same way as if the strings are compared with the C +@code{strcoll()} function.} + +Because this behavior differs considerably from existing practice, +@command{gawk} only implements it when in POSIX mode (@pxref{Options}). +Here is an example to illustrate the difference, in a @code{en_US.UTF-8} +locale: + +@example +$ @kbd{gawk 'BEGIN @{ printf("ABC < abc = %s\n",} +> @kbd{("ABC" < "abc" ? "TRUE" : "FALSE")) @}'} +@print{} ABC < abc = TRUE +$ @kbd{gawk --posix 'BEGIN @{ printf("ABC < abc = %s\n",} +> @kbd{("ABC" < "abc" ? "TRUE" : "FALSE")) @}'} +@print{} ABC < abc = FALSE +@end example + @c ENDOFRANGE comex @c ENDOFRANGE excom @c ENDOFRANGE vartypc @@ -500,6 +500,87 @@ genflags2str(int flagval, const struct flagtab *tab) return buffer; } +/* posix_compare --- compare strings using strcoll */ + +static int +posix_compare(NODE *s1, NODE *s2) +{ + int ret = 0; + char save1, save2; + size_t l = 0; + + save1 = s1->stptr[s1->stlen]; + s1->stptr[s1->stlen] = '\0'; + + save2 = s2->stptr[s2->stlen]; + s2->stptr[s2->stlen] = '\0'; + + if (gawk_mb_cur_max == 1) { + if (strlen(s1->stptr) == s1->stlen && strlen(s2->stptr) == s2->stlen) + ret = strcoll(s1->stptr, s2->stptr); + else { + char b1[2], b2[2]; + char *p1, *p2; + size_t i; + + if (s1->stlen < s2->stlen) + l = s1->stlen; + else + l = s2->stlen; + + b1[1] = b2[1] = '\0'; + for (i = ret = 0, p1 = s1->stptr, p2 = s2->stptr; + ret == 0 && i < l; + p1++, p2++) { + b1[0] = *p1; + b2[0] = *p2; + ret = strcoll(b1, b2); + } + } + /* + * Either worked through the strings or ret != 0. + * In either case, ret will be the right thing to return. + */ + } +#ifdef MBS_SUPPORT + else { + /* Similar logic, using wide characters */ + (void) force_wstring(s1); + (void) force_wstring(s2); + + if (wcslen(s1->wstptr) == s1->wstlen && wcslen(s2->wstptr) == s2->wstlen) + ret = wcscoll(s1->wstptr, s2->wstptr); + else { + wchar_t b1[2], b2[2]; + wchar_t *p1, *p2; + size_t i; + + if (s1->wstlen < s2->wstlen) + l = s1->wstlen; + else + l = s2->wstlen; + + b1[1] = b2[1] = L'\0'; + for (i = ret = 0, p1 = s1->wstptr, p2 = s2->wstptr; + ret == 0 && i < l; + p1++, p2++) { + b1[0] = *p1; + b2[0] = *p2; + ret = wcscoll(b1, b2); + } + } + /* + * Either worked through the strings or ret != 0. + * In either case, ret will be the right thing to return. + */ + } +#endif + + s1->stptr[s1->stlen] = save1; + s2->stptr[s2->stlen] = save2; + return ret; +} + /* cmp_nodes --- compare two nodes, returning negative, 0, positive */ @@ -535,6 +616,10 @@ cmp_nodes(NODE *t1, NODE *t2) ldiff = len1 - len2; if (len1 == 0 || len2 == 0) return ldiff; + + if (do_posix) + return posix_compare(t1, t2); + l = (ldiff <= 0 ? len1 : len2); if (IGNORECASE) { const unsigned char *cp1 = (const unsigned char *) t1->stptr; diff --git a/missing_d/ChangeLog b/missing_d/ChangeLog index 6430217f..40e4ede0 100644 --- a/missing_d/ChangeLog +++ b/missing_d/ChangeLog @@ -1,3 +1,9 @@ +Thu Nov 25 21:16:58 2010 Arnold D. Robbins <arnold@skeeve.com> + + * strcoll.c: New file. + * memcmp.c, memcpy.c, memset.c, strchr.c, strtod.c: Remove + `register' keyword everywhere, as in mainline code. + Sun Jun 6 21:44:19 2010 Arnold D. Robbins <arnold@skeeve.com> * getaddrinfo.h: Add undef for addrinfo, freeaddrinfo, and diff --git a/missing_d/memcmp.c b/missing_d/memcmp.c index 63cb5f8f..12b3775d 100644 --- a/missing_d/memcmp.c +++ b/missing_d/memcmp.c @@ -7,8 +7,8 @@ int memcmp (s1, s2, l) -register char *s1, *s2; -register int l; +char *s1, *s2; +int l; { for (; l-- > 0; s1++, s2++) { if (*s1 != *s2) diff --git a/missing_d/memcpy.c b/missing_d/memcpy.c index 3c4accdf..568a006a 100644 --- a/missing_d/memcpy.c +++ b/missing_d/memcpy.c @@ -6,10 +6,10 @@ char * memcpy (dest, src, l) -register char *dest, *src; -register int l; +char *dest, *src; +int l; { - register char *ret = dest; + char *ret = dest; while (l--) *dest++ = *src++; diff --git a/missing_d/memmove.c b/missing_d/memmove.c index a28a32d1..4f577b38 100644 --- a/missing_d/memmove.c +++ b/missing_d/memmove.c @@ -21,7 +21,7 @@ /* * August 2006. For Gawk: Borrowed from GLIBC and hacked unmercifully. - * DON'T steal this for your own code, got straight to the GLIBC + * DON'T steal this for your own code, go straight to the GLIBC * source for the original versions. */ diff --git a/missing_d/memset.c b/missing_d/memset.c index 1ff4458b..0e509e85 100644 --- a/missing_d/memset.c +++ b/missing_d/memset.c @@ -7,11 +7,11 @@ void * memset(dest, val, l) void *dest; -register int val; -register size_t l; +int val; +size_t l; { - register char *ret = dest; - register char *d = dest; + char *ret = dest; + char *d = dest; while (l--) *d++ = val; diff --git a/missing_d/strchr.c b/missing_d/strchr.c index 7da479fc..e549099d 100644 --- a/missing_d/strchr.c +++ b/missing_d/strchr.c @@ -10,7 +10,7 @@ char * strchr(str, c) -register const char *str, c; +const char *str, c; { if (c == '\0') { /* thanks to Mike Brennan ... */ @@ -35,9 +35,9 @@ register const char *str, c; char * strrchr(str, c) -register const char *str, c; +const char *str, c; { - register const char *save = NULL; + const char *save = NULL; for (; *str; str++) if (*str == c) diff --git a/missing_d/strcoll.c b/missing_d/strcoll.c new file mode 100644 index 00000000..ac65795e --- /dev/null +++ b/missing_d/strcoll.c @@ -0,0 +1,7 @@ +/* replacement strcoll.c */ + +int +strcoll(const char *s1, const char *s2) +{ + return strcmp(s1, s2); /* nyah, nyah, so there */ +} diff --git a/missing_d/strtod.c b/missing_d/strtod.c index c4f9d2bd..570f6407 100644 --- a/missing_d/strtod.c +++ b/missing_d/strtod.c @@ -40,8 +40,8 @@ extern double atof(); double gawk_strtod(s, ptr) -register const char *s; -register const char **ptr; +const char *s; +const char **ptr; { const char *start = s; /* save original start of string */ const char *begin = NULL; /* where the number really begins */ @@ -106,3 +106,7 @@ extern void _exit(int); extern SPRINTF_RET sprintf(char *, const char *, ...); #undef aptr_t + +#if !defined(HAVE_STRCOLL) +extern int strcoll(const char *, const char *); +#endif @@ -100,3 +100,7 @@ #ifndef HAVE_SETENV #include "missing_d/setenv.c" #endif + +#ifndef HAVE_STRCOLL +#include "missing_d/strcoll.c" +#endif |