aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2016-08-25 22:10:58 +0300
committerArnold D. Robbins <arnold@skeeve.com>2016-08-25 22:10:58 +0300
commit00682d87a1a1c0535c0fa5adb27867578dc76d49 (patch)
tree604bff488e97735f1a1c14078f02a99610546911
parent7453c813457583197fcf0fe1c7d2301d6013bfea (diff)
downloadegawk-00682d87a1a1c0535c0fa5adb27867578dc76d49.tar.gz
egawk-00682d87a1a1c0535c0fa5adb27867578dc76d49.tar.bz2
egawk-00682d87a1a1c0535c0fa5adb27867578dc76d49.zip
New POSIX rules for string comparison.
-rw-r--r--ChangeLog14
-rw-r--r--awk.h2
-rw-r--r--debug.c2
-rw-r--r--doc/ChangeLog6
-rw-r--r--doc/gawk.info830
-rw-r--r--doc/gawk.texi42
-rw-r--r--doc/gawktexi.in42
-rw-r--r--eval.c15
-rw-r--r--interpret.h14
-rw-r--r--re.c2
10 files changed, 526 insertions, 443 deletions
diff --git a/ChangeLog b/ChangeLog
index 3e87c20d..75d1bd37 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2016-08-25 Arnold D. Robbins <arnold@skeeve.com>
+
+ POSIX now says use strcmp for == and !=. Thanks to Chet Ramey
+ for pointing me at the change. Make it so:
+
+ * awk.h (cmp_nodes): New 3rd param indicating strcmp, not strcoll.
+ * debug.c (cmp_val): Update call to cmp_nodes.
+ * eval.c (cmp_nodes): New 3rd param indicating strcmp, not strcoll.
+ Adjust code and all callers.
+ (scalar_cmp_t): New enum type. Used in ...
+ (cmp_scalars): ... in order to call cmp_nodes correctly.
+ * interpret.h: Use the enum type in calls to cmp_scalars.
+ * re.c (re_update): Adjust call to cmp_nodes.
+
2016-08-25 Norihiro Tanaka <noritnk@kcn.ne.jp>
* awk.h (struct Regexp): Remove dfa. Now dfareg instead of it. All
diff --git a/awk.h b/awk.h
index d8b5b8d4..2234abf8 100644
--- a/awk.h
+++ b/awk.h
@@ -1442,7 +1442,7 @@ extern int sanitize_exit_status(int status);
extern void PUSH_CODE(INSTRUCTION *cp);
extern INSTRUCTION *POP_CODE(void);
extern void init_interpret(void);
-extern int cmp_nodes(NODE *t1, NODE *t2);
+extern int cmp_nodes(NODE *t1, NODE *t2, bool use_strcmp);
extern int cmp_awknums(const NODE *t1, const NODE *t2);
extern void set_IGNORECASE(void);
extern void set_OFS(void);
diff --git a/debug.c b/debug.c
index f4640adb..9f2d948b 100644
--- a/debug.c
+++ b/debug.c
@@ -1670,7 +1670,7 @@ cmp_val(struct list_item *w, NODE *old, NODE *new)
if (new->type == Node_var_array) /* 5 */
return true;
- return cmp_nodes(old, new); /* 4 */
+ return cmp_nodes(old, new, true); /* 4 */
}
/* watchpoint_triggered --- check if we should stop at this watchpoint;
diff --git a/doc/ChangeLog b/doc/ChangeLog
index e6d9bf05..efa2b561 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,5 +1,11 @@
2016-08-25 Arnold D. Robbins <arnold@skeeve.com>
+ * gawktexi.in (POSIX String Comparison): Update for new
+ spec where == and != use strcmp, rest use strcoll. Thanks to
+ Chet Ramey for pointing me at the new rules.
+
+2016-08-25 Arnold D. Robbins <arnold@skeeve.com>
+
* 4.1.4: Release tar ball made.
2016-08-24 Arnold D. Robbins <arnold@skeeve.com>
diff --git a/doc/gawk.info b/doc/gawk.info
index 1766ab94..973af87c 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -8615,18 +8615,18 @@ Constant Regexps::, where this is discussed in more detail.

File: gawk.info, Node: POSIX String Comparison, Prev: Comparison Operators, Up: Typing and Comparison
-6.3.2.3 String Comparison with POSIX Rules
-..........................................
+6.3.2.3 String Comparison Based on Locale Collating Order
+.........................................................
-The POSIX standard says that string comparison is performed based on the
-locale's "collating order". This is the order in which characters sort,
-as defined by the locale (for more discussion, *note Locales::). This
-order is usually very different from the results obtained when doing
-straight character-by-character comparison.(1)
+The POSIX standard used to say that all string comparisons are performed
+based on the locale's "collating order". This is the order in which
+characters sort, as defined by the locale (for more discussion, *note
+Locales::). This order is usually very different from the results
+obtained when doing straight byte-by-byte comparison.(1)
Because this behavior differs considerably from existing practice,
-'gawk' only implements it when in POSIX mode (*note Options::). Here is
-an example to illustrate the difference, in an 'en_US.UTF-8' locale:
+'gawk' only implemented it when in POSIX mode (*note Options::). Here
+is an example to illustrate the difference, in an 'en_US.UTF-8' locale:
$ gawk 'BEGIN { printf("ABC < abc = %s\n",
> ("ABC" < "abc" ? "TRUE" : "FALSE")) }'
@@ -8635,11 +8635,28 @@ an example to illustrate the difference, in an 'en_US.UTF-8' locale:
> ("ABC" < "abc" ? "TRUE" : "FALSE")) }'
-| ABC < abc = FALSE
+ Fortunately, as of August 2016, comparison based on locale collating
+order is no longer required for the '==' and '!=' operators.(2)
+However, comparison based on locales is still required for '<', '<=',
+'>', and '>='. POSIX thus recommends as follows:
+
+ Since the '==' operator checks whether strings are identical, not
+ whether they collate equally, applications needing to check whether
+ strings collate equally can use:
+
+ a <= b && a >= b
+
+ As of version 4.2, 'gawk' continues to use locale collating order for
+'<', '<=', '>', and '>=' only in POSIX mode.
+
---------- Footnotes ----------
(1) Technically, string comparison is supposed to behave the same way
as if the strings were compared with the C 'strcoll()' function.
+ (2) See the Austin Group website
+(http://austingroupbugs.net/view.php?id=1070).
+

File: gawk.info, Node: Boolean Ops, Next: Conditional Exp, Prev: Typing and Comparison, Up: Truth Values and Conditions
@@ -27603,7 +27620,7 @@ ranges, such that outside the '"C"' and '"POSIX"' locales, the meaning
of range expressions was _undefined_.(3)
By using this lovely technical term, the standard gives license to
-implementors to implement ranges in whatever way they choose. The
+implementers to implement ranges in whatever way they choose. The
'gawk' maintainer chose to apply the pre-POSIX meaning both with the
default regexp matching and when '--traditional' or '--posix' are used.
In all cases 'gawk' remains POSIX-compliant.
@@ -35427,401 +35444,402 @@ Node: Variable Typing364779
Node: Comparison Operators368403
Ref: table-relational-ops368822
Node: POSIX String Comparison372317
-Ref: POSIX String Comparison-Footnote-1373391
-Node: Boolean Ops373530
-Ref: Boolean Ops-Footnote-1378012
-Node: Conditional Exp378104
-Node: Function Calls379840
-Node: Precedence383717
-Node: Locales387376
-Node: Expressions Summary389008
-Node: Patterns and Actions391581
-Node: Pattern Overview392701
-Node: Regexp Patterns394378
-Node: Expression Patterns394920
-Node: Ranges398701
-Node: BEGIN/END401809
-Node: Using BEGIN/END402570
-Ref: Using BEGIN/END-Footnote-1405306
-Node: I/O And BEGIN/END405412
-Node: BEGINFILE/ENDFILE407726
-Node: Empty410633
-Node: Using Shell Variables410950
-Node: Action Overview413224
-Node: Statements415549
-Node: If Statement417397
-Node: While Statement418892
-Node: Do Statement420920
-Node: For Statement422068
-Node: Switch Statement425226
-Node: Break Statement427612
-Node: Continue Statement429704
-Node: Next Statement431531
-Node: Nextfile Statement433914
-Node: Exit Statement436566
-Node: Built-in Variables438969
-Node: User-modified440102
-Node: Auto-set447688
-Ref: Auto-set-Footnote-1462341
-Ref: Auto-set-Footnote-2462547
-Node: ARGC and ARGV462603
-Node: Pattern Action Summary466816
-Node: Arrays469246
-Node: Array Basics470575
-Node: Array Intro471419
-Ref: figure-array-elements473394
-Ref: Array Intro-Footnote-1476098
-Node: Reference to Elements476226
-Node: Assigning Elements478690
-Node: Array Example479181
-Node: Scanning an Array480940
-Node: Controlling Scanning483962
-Ref: Controlling Scanning-Footnote-1489361
-Node: Numeric Array Subscripts489677
-Node: Uninitialized Subscripts491861
-Node: Delete493480
-Ref: Delete-Footnote-1496232
-Node: Multidimensional496289
-Node: Multiscanning499384
-Node: Arrays of Arrays500975
-Node: Arrays Summary505742
-Node: Functions507835
-Node: Built-in508873
-Node: Calling Built-in509954
-Node: Numeric Functions511950
-Ref: Numeric Functions-Footnote-1516783
-Ref: Numeric Functions-Footnote-2517140
-Ref: Numeric Functions-Footnote-3517188
-Node: String Functions517460
-Ref: String Functions-Footnote-1540964
-Ref: String Functions-Footnote-2541092
-Ref: String Functions-Footnote-3541340
-Node: Gory Details541427
-Ref: table-sub-escapes543218
-Ref: table-sub-proposed544737
-Ref: table-posix-sub546100
-Ref: table-gensub-escapes547641
-Ref: Gory Details-Footnote-1548464
-Node: I/O Functions548618
-Ref: table-system-return-values555200
-Ref: I/O Functions-Footnote-1557180
-Ref: I/O Functions-Footnote-2557328
-Node: Time Functions557448
-Ref: Time Functions-Footnote-1567953
-Ref: Time Functions-Footnote-2568021
-Ref: Time Functions-Footnote-3568179
-Ref: Time Functions-Footnote-4568290
-Ref: Time Functions-Footnote-5568402
-Ref: Time Functions-Footnote-6568629
-Node: Bitwise Functions568895
-Ref: table-bitwise-ops569489
-Ref: Bitwise Functions-Footnote-1573827
-Node: Type Functions574000
-Node: I18N Functions576532
-Node: User-defined578183
-Node: Definition Syntax578988
-Ref: Definition Syntax-Footnote-1584675
-Node: Function Example584746
-Ref: Function Example-Footnote-1587668
-Node: Function Caveats587690
-Node: Calling A Function588208
-Node: Variable Scope589166
-Node: Pass By Value/Reference592160
-Node: Return Statement595659
-Node: Dynamic Typing598638
-Node: Indirect Calls599568
-Ref: Indirect Calls-Footnote-1609819
-Node: Functions Summary609947
-Node: Library Functions612652
-Ref: Library Functions-Footnote-1616259
-Ref: Library Functions-Footnote-2616402
-Node: Library Names616573
-Ref: Library Names-Footnote-1620033
-Ref: Library Names-Footnote-2620256
-Node: General Functions620342
-Node: Strtonum Function621445
-Node: Assert Function624467
-Node: Round Function627793
-Node: Cliff Random Function629334
-Node: Ordinal Functions630350
-Ref: Ordinal Functions-Footnote-1633413
-Ref: Ordinal Functions-Footnote-2633665
-Node: Join Function633875
-Ref: Join Function-Footnote-1635645
-Node: Getlocaltime Function635845
-Node: Readfile Function639587
-Node: Shell Quoting641559
-Node: Data File Management642960
-Node: Filetrans Function643592
-Node: Rewind Function647688
-Node: File Checking649594
-Ref: File Checking-Footnote-1650928
-Node: Empty Files651129
-Node: Ignoring Assigns653108
-Node: Getopt Function654658
-Ref: Getopt Function-Footnote-1666127
-Node: Passwd Functions666327
-Ref: Passwd Functions-Footnote-1675166
-Node: Group Functions675254
-Ref: Group Functions-Footnote-1683151
-Node: Walking Arrays683358
-Node: Library Functions Summary686366
-Node: Library Exercises687772
-Node: Sample Programs688237
-Node: Running Examples689007
-Node: Clones689735
-Node: Cut Program690959
-Node: Egrep Program700888
-Ref: Egrep Program-Footnote-1708400
-Node: Id Program708510
-Node: Split Program712190
-Ref: Split Program-Footnote-1715649
-Node: Tee Program715778
-Node: Uniq Program718568
-Node: Wc Program725994
-Ref: Wc Program-Footnote-1730249
-Node: Miscellaneous Programs730343
-Node: Dupword Program731556
-Node: Alarm Program733586
-Node: Translate Program738441
-Ref: Translate Program-Footnote-1743006
-Node: Labels Program743276
-Ref: Labels Program-Footnote-1746627
-Node: Word Sorting746711
-Node: History Sorting750783
-Node: Extract Program752618
-Node: Simple Sed760147
-Node: Igawk Program763221
-Ref: Igawk Program-Footnote-1777552
-Ref: Igawk Program-Footnote-2777754
-Ref: Igawk Program-Footnote-3777876
-Node: Anagram Program777991
-Node: Signature Program781053
-Node: Programs Summary782300
-Node: Programs Exercises783514
-Ref: Programs Exercises-Footnote-1787643
-Node: Advanced Features787734
-Node: Nondecimal Data789724
-Node: Array Sorting791315
-Node: Controlling Array Traversal792015
-Ref: Controlling Array Traversal-Footnote-1800382
-Node: Array Sorting Functions800500
-Ref: Array Sorting Functions-Footnote-1805591
-Node: Two-way I/O805787
-Ref: Two-way I/O-Footnote-1812337
-Ref: Two-way I/O-Footnote-2812524
-Node: TCP/IP Networking812606
-Node: Profiling815724
-Ref: Profiling-Footnote-1824217
-Node: Advanced Features Summary824540
-Node: Internationalization826384
-Node: I18N and L10N827864
-Node: Explaining gettext828551
-Ref: Explaining gettext-Footnote-1834443
-Ref: Explaining gettext-Footnote-2834628
-Node: Programmer i18n834793
-Ref: Programmer i18n-Footnote-1839648
-Node: Translator i18n839697
-Node: String Extraction840491
-Ref: String Extraction-Footnote-1841623
-Node: Printf Ordering841709
-Ref: Printf Ordering-Footnote-1844495
-Node: I18N Portability844559
-Ref: I18N Portability-Footnote-1847015
-Node: I18N Example847078
-Ref: I18N Example-Footnote-1849884
-Node: Gawk I18N849957
-Node: I18N Summary850602
-Node: Debugger851943
-Node: Debugging852965
-Node: Debugging Concepts853406
-Node: Debugging Terms855215
-Node: Awk Debugging857790
-Node: Sample Debugging Session858696
-Node: Debugger Invocation859230
-Node: Finding The Bug860616
-Node: List of Debugger Commands867094
-Node: Breakpoint Control868427
-Node: Debugger Execution Control872121
-Node: Viewing And Changing Data875483
-Node: Execution Stack878857
-Node: Debugger Info880494
-Node: Miscellaneous Debugger Commands884565
-Node: Readline Support889653
-Node: Limitations890549
-Ref: Limitations-Footnote-1894780
-Node: Debugging Summary894831
-Node: Arbitrary Precision Arithmetic896110
-Node: Computer Arithmetic897526
-Ref: table-numeric-ranges901117
-Ref: Computer Arithmetic-Footnote-1901839
-Node: Math Definitions901896
-Ref: table-ieee-formats905210
-Ref: Math Definitions-Footnote-1905813
-Node: MPFR features905918
-Node: FP Math Caution907635
-Ref: FP Math Caution-Footnote-1908707
-Node: Inexactness of computations909076
-Node: Inexact representation910036
-Node: Comparing FP Values911396
-Node: Errors accumulate912478
-Node: Getting Accuracy913911
-Node: Try To Round916621
-Node: Setting precision917520
-Ref: table-predefined-precision-strings918217
-Node: Setting the rounding mode920047
-Ref: table-gawk-rounding-modes920421
-Ref: Setting the rounding mode-Footnote-1923829
-Node: Arbitrary Precision Integers924008
-Ref: Arbitrary Precision Integers-Footnote-1928925
-Node: POSIX Floating Point Problems929074
-Ref: POSIX Floating Point Problems-Footnote-1932956
-Node: Floating point summary932994
-Node: Dynamic Extensions935184
-Node: Extension Intro936737
-Node: Plugin License938003
-Node: Extension Mechanism Outline938800
-Ref: figure-load-extension939239
-Ref: figure-register-new-function940804
-Ref: figure-call-new-function941896
-Node: Extension API Description943958
-Node: Extension API Functions Introduction945490
-Node: General Data Types950349
-Ref: General Data Types-Footnote-1956304
-Node: Memory Allocation Functions956603
-Ref: Memory Allocation Functions-Footnote-1959448
-Node: Constructor Functions959547
-Node: Registration Functions961292
-Node: Extension Functions961977
-Node: Exit Callback Functions964600
-Node: Extension Version String965850
-Node: Input Parsers966513
-Node: Output Wrappers976395
-Node: Two-way processors980907
-Node: Printing Messages983172
-Ref: Printing Messages-Footnote-1984343
-Node: Updating ERRNO984496
-Node: Requesting Values985235
-Ref: table-value-types-returned985972
-Node: Accessing Parameters986855
-Node: Symbol Table Access988090
-Node: Symbol table by name988602
-Node: Symbol table by cookie990623
-Ref: Symbol table by cookie-Footnote-1994775
-Node: Cached values994839
-Ref: Cached values-Footnote-1998346
-Node: Array Manipulation998437
-Ref: Array Manipulation-Footnote-1999528
-Node: Array Data Types999565
-Ref: Array Data Types-Footnote-11002223
-Node: Array Functions1002315
-Node: Flattening Arrays1006173
-Node: Creating Arrays1013081
-Node: Redirection API1017850
-Node: Extension API Variables1020681
-Node: Extension Versioning1021314
-Ref: gawk-api-version1021751
-Node: Extension API Informational Variables1023507
-Node: Extension API Boilerplate1024571
-Node: Finding Extensions1028385
-Node: Extension Example1028944
-Node: Internal File Description1029742
-Node: Internal File Ops1033822
-Ref: Internal File Ops-Footnote-11045584
-Node: Using Internal File Ops1045724
-Ref: Using Internal File Ops-Footnote-11048107
-Node: Extension Samples1048381
-Node: Extension Sample File Functions1049910
-Node: Extension Sample Fnmatch1057559
-Node: Extension Sample Fork1059046
-Node: Extension Sample Inplace1060264
-Node: Extension Sample Ord1063474
-Node: Extension Sample Readdir1064310
-Ref: table-readdir-file-types1065199
-Node: Extension Sample Revout1066004
-Node: Extension Sample Rev2way1066593
-Node: Extension Sample Read write array1067333
-Node: Extension Sample Readfile1069275
-Node: Extension Sample Time1070370
-Node: Extension Sample API Tests1071718
-Node: gawkextlib1072210
-Node: Extension summary1074657
-Node: Extension Exercises1078359
-Node: Language History1079857
-Node: V7/SVR3.11081513
-Node: SVR41083665
-Node: POSIX1085099
-Node: BTL1086478
-Node: POSIX/GNU1087207
-Node: Feature History1093069
-Node: Common Extensions1107439
-Node: Ranges and Locales1108722
-Ref: Ranges and Locales-Footnote-11113338
-Ref: Ranges and Locales-Footnote-21113365
-Ref: Ranges and Locales-Footnote-31113600
-Node: Contributors1113821
-Node: History summary1119381
-Node: Installation1120761
-Node: Gawk Distribution1121705
-Node: Getting1122189
-Node: Extracting1123150
-Node: Distribution contents1124788
-Node: Unix Installation1130882
-Node: Quick Installation1131564
-Node: Shell Startup Files1133978
-Node: Additional Configuration Options1135056
-Node: Configuration Philosophy1136861
-Node: Non-Unix Installation1139230
-Node: PC Installation1139688
-Node: PC Binary Installation1141008
-Node: PC Compiling1142860
-Ref: PC Compiling-Footnote-11145654
-Node: PC Testing1145763
-Node: PC Using1146943
-Ref: PC Using-Footnote-11151096
-Node: Cygwin1151169
-Node: MSYS1151939
-Node: VMS Installation1152440
-Node: VMS Compilation1153231
-Ref: VMS Compilation-Footnote-11154460
-Node: VMS Dynamic Extensions1154518
-Node: VMS Installation Details1156203
-Node: VMS Running1158456
-Node: VMS GNV1162735
-Node: VMS Old Gawk1163470
-Node: Bugs1163941
-Node: Other Versions1168256
-Node: Installation summary1174840
-Node: Notes1175891
-Node: Compatibility Mode1176756
-Node: Additions1177538
-Node: Accessing The Source1178463
-Node: Adding Code1179898
-Node: New Ports1186117
-Node: Derived Files1190605
-Ref: Derived Files-Footnote-11196090
-Ref: Derived Files-Footnote-21196125
-Ref: Derived Files-Footnote-31196723
-Node: Future Extensions1196837
-Node: Implementation Limitations1197495
-Node: Extension Design1198678
-Node: Old Extension Problems1199832
-Ref: Old Extension Problems-Footnote-11201350
-Node: Extension New Mechanism Goals1201407
-Ref: Extension New Mechanism Goals-Footnote-11204771
-Node: Extension Other Design Decisions1204960
-Node: Extension Future Growth1207073
-Node: Old Extension Mechanism1207909
-Node: Notes summary1209672
-Node: Basic Concepts1210854
-Node: Basic High Level1211535
-Ref: figure-general-flow1211817
-Ref: figure-process-flow1212502
-Ref: Basic High Level-Footnote-11215803
-Node: Basic Data Typing1215988
-Node: Glossary1219316
-Node: Copying1251263
-Node: GNU Free Documentation License1288802
-Node: Index1313920
+Ref: POSIX String Comparison-Footnote-1374012
+Ref: POSIX String Comparison-Footnote-2374151
+Node: Boolean Ops374235
+Ref: Boolean Ops-Footnote-1378717
+Node: Conditional Exp378809
+Node: Function Calls380545
+Node: Precedence384422
+Node: Locales388081
+Node: Expressions Summary389713
+Node: Patterns and Actions392286
+Node: Pattern Overview393406
+Node: Regexp Patterns395083
+Node: Expression Patterns395625
+Node: Ranges399406
+Node: BEGIN/END402514
+Node: Using BEGIN/END403275
+Ref: Using BEGIN/END-Footnote-1406011
+Node: I/O And BEGIN/END406117
+Node: BEGINFILE/ENDFILE408431
+Node: Empty411338
+Node: Using Shell Variables411655
+Node: Action Overview413929
+Node: Statements416254
+Node: If Statement418102
+Node: While Statement419597
+Node: Do Statement421625
+Node: For Statement422773
+Node: Switch Statement425931
+Node: Break Statement428317
+Node: Continue Statement430409
+Node: Next Statement432236
+Node: Nextfile Statement434619
+Node: Exit Statement437271
+Node: Built-in Variables439674
+Node: User-modified440807
+Node: Auto-set448393
+Ref: Auto-set-Footnote-1463046
+Ref: Auto-set-Footnote-2463252
+Node: ARGC and ARGV463308
+Node: Pattern Action Summary467521
+Node: Arrays469951
+Node: Array Basics471280
+Node: Array Intro472124
+Ref: figure-array-elements474099
+Ref: Array Intro-Footnote-1476803
+Node: Reference to Elements476931
+Node: Assigning Elements479395
+Node: Array Example479886
+Node: Scanning an Array481645
+Node: Controlling Scanning484667
+Ref: Controlling Scanning-Footnote-1490066
+Node: Numeric Array Subscripts490382
+Node: Uninitialized Subscripts492566
+Node: Delete494185
+Ref: Delete-Footnote-1496937
+Node: Multidimensional496994
+Node: Multiscanning500089
+Node: Arrays of Arrays501680
+Node: Arrays Summary506447
+Node: Functions508540
+Node: Built-in509578
+Node: Calling Built-in510659
+Node: Numeric Functions512655
+Ref: Numeric Functions-Footnote-1517488
+Ref: Numeric Functions-Footnote-2517845
+Ref: Numeric Functions-Footnote-3517893
+Node: String Functions518165
+Ref: String Functions-Footnote-1541669
+Ref: String Functions-Footnote-2541797
+Ref: String Functions-Footnote-3542045
+Node: Gory Details542132
+Ref: table-sub-escapes543923
+Ref: table-sub-proposed545442
+Ref: table-posix-sub546805
+Ref: table-gensub-escapes548346
+Ref: Gory Details-Footnote-1549169
+Node: I/O Functions549323
+Ref: table-system-return-values555905
+Ref: I/O Functions-Footnote-1557885
+Ref: I/O Functions-Footnote-2558033
+Node: Time Functions558153
+Ref: Time Functions-Footnote-1568658
+Ref: Time Functions-Footnote-2568726
+Ref: Time Functions-Footnote-3568884
+Ref: Time Functions-Footnote-4568995
+Ref: Time Functions-Footnote-5569107
+Ref: Time Functions-Footnote-6569334
+Node: Bitwise Functions569600
+Ref: table-bitwise-ops570194
+Ref: Bitwise Functions-Footnote-1574532
+Node: Type Functions574705
+Node: I18N Functions577237
+Node: User-defined578888
+Node: Definition Syntax579693
+Ref: Definition Syntax-Footnote-1585380
+Node: Function Example585451
+Ref: Function Example-Footnote-1588373
+Node: Function Caveats588395
+Node: Calling A Function588913
+Node: Variable Scope589871
+Node: Pass By Value/Reference592865
+Node: Return Statement596364
+Node: Dynamic Typing599343
+Node: Indirect Calls600273
+Ref: Indirect Calls-Footnote-1610524
+Node: Functions Summary610652
+Node: Library Functions613357
+Ref: Library Functions-Footnote-1616964
+Ref: Library Functions-Footnote-2617107
+Node: Library Names617278
+Ref: Library Names-Footnote-1620738
+Ref: Library Names-Footnote-2620961
+Node: General Functions621047
+Node: Strtonum Function622150
+Node: Assert Function625172
+Node: Round Function628498
+Node: Cliff Random Function630039
+Node: Ordinal Functions631055
+Ref: Ordinal Functions-Footnote-1634118
+Ref: Ordinal Functions-Footnote-2634370
+Node: Join Function634580
+Ref: Join Function-Footnote-1636350
+Node: Getlocaltime Function636550
+Node: Readfile Function640292
+Node: Shell Quoting642264
+Node: Data File Management643665
+Node: Filetrans Function644297
+Node: Rewind Function648393
+Node: File Checking650299
+Ref: File Checking-Footnote-1651633
+Node: Empty Files651834
+Node: Ignoring Assigns653813
+Node: Getopt Function655363
+Ref: Getopt Function-Footnote-1666832
+Node: Passwd Functions667032
+Ref: Passwd Functions-Footnote-1675871
+Node: Group Functions675959
+Ref: Group Functions-Footnote-1683856
+Node: Walking Arrays684063
+Node: Library Functions Summary687071
+Node: Library Exercises688477
+Node: Sample Programs688942
+Node: Running Examples689712
+Node: Clones690440
+Node: Cut Program691664
+Node: Egrep Program701593
+Ref: Egrep Program-Footnote-1709105
+Node: Id Program709215
+Node: Split Program712895
+Ref: Split Program-Footnote-1716354
+Node: Tee Program716483
+Node: Uniq Program719273
+Node: Wc Program726699
+Ref: Wc Program-Footnote-1730954
+Node: Miscellaneous Programs731048
+Node: Dupword Program732261
+Node: Alarm Program734291
+Node: Translate Program739146
+Ref: Translate Program-Footnote-1743711
+Node: Labels Program743981
+Ref: Labels Program-Footnote-1747332
+Node: Word Sorting747416
+Node: History Sorting751488
+Node: Extract Program753323
+Node: Simple Sed760852
+Node: Igawk Program763926
+Ref: Igawk Program-Footnote-1778257
+Ref: Igawk Program-Footnote-2778459
+Ref: Igawk Program-Footnote-3778581
+Node: Anagram Program778696
+Node: Signature Program781758
+Node: Programs Summary783005
+Node: Programs Exercises784219
+Ref: Programs Exercises-Footnote-1788348
+Node: Advanced Features788439
+Node: Nondecimal Data790429
+Node: Array Sorting792020
+Node: Controlling Array Traversal792720
+Ref: Controlling Array Traversal-Footnote-1801087
+Node: Array Sorting Functions801205
+Ref: Array Sorting Functions-Footnote-1806296
+Node: Two-way I/O806492
+Ref: Two-way I/O-Footnote-1813042
+Ref: Two-way I/O-Footnote-2813229
+Node: TCP/IP Networking813311
+Node: Profiling816429
+Ref: Profiling-Footnote-1824922
+Node: Advanced Features Summary825245
+Node: Internationalization827089
+Node: I18N and L10N828569
+Node: Explaining gettext829256
+Ref: Explaining gettext-Footnote-1835148
+Ref: Explaining gettext-Footnote-2835333
+Node: Programmer i18n835498
+Ref: Programmer i18n-Footnote-1840353
+Node: Translator i18n840402
+Node: String Extraction841196
+Ref: String Extraction-Footnote-1842328
+Node: Printf Ordering842414
+Ref: Printf Ordering-Footnote-1845200
+Node: I18N Portability845264
+Ref: I18N Portability-Footnote-1847720
+Node: I18N Example847783
+Ref: I18N Example-Footnote-1850589
+Node: Gawk I18N850662
+Node: I18N Summary851307
+Node: Debugger852648
+Node: Debugging853670
+Node: Debugging Concepts854111
+Node: Debugging Terms855920
+Node: Awk Debugging858495
+Node: Sample Debugging Session859401
+Node: Debugger Invocation859935
+Node: Finding The Bug861321
+Node: List of Debugger Commands867799
+Node: Breakpoint Control869132
+Node: Debugger Execution Control872826
+Node: Viewing And Changing Data876188
+Node: Execution Stack879562
+Node: Debugger Info881199
+Node: Miscellaneous Debugger Commands885270
+Node: Readline Support890358
+Node: Limitations891254
+Ref: Limitations-Footnote-1895485
+Node: Debugging Summary895536
+Node: Arbitrary Precision Arithmetic896815
+Node: Computer Arithmetic898231
+Ref: table-numeric-ranges901822
+Ref: Computer Arithmetic-Footnote-1902544
+Node: Math Definitions902601
+Ref: table-ieee-formats905915
+Ref: Math Definitions-Footnote-1906518
+Node: MPFR features906623
+Node: FP Math Caution908340
+Ref: FP Math Caution-Footnote-1909412
+Node: Inexactness of computations909781
+Node: Inexact representation910741
+Node: Comparing FP Values912101
+Node: Errors accumulate913183
+Node: Getting Accuracy914616
+Node: Try To Round917326
+Node: Setting precision918225
+Ref: table-predefined-precision-strings918922
+Node: Setting the rounding mode920752
+Ref: table-gawk-rounding-modes921126
+Ref: Setting the rounding mode-Footnote-1924534
+Node: Arbitrary Precision Integers924713
+Ref: Arbitrary Precision Integers-Footnote-1929630
+Node: POSIX Floating Point Problems929779
+Ref: POSIX Floating Point Problems-Footnote-1933661
+Node: Floating point summary933699
+Node: Dynamic Extensions935889
+Node: Extension Intro937442
+Node: Plugin License938708
+Node: Extension Mechanism Outline939505
+Ref: figure-load-extension939944
+Ref: figure-register-new-function941509
+Ref: figure-call-new-function942601
+Node: Extension API Description944663
+Node: Extension API Functions Introduction946195
+Node: General Data Types951054
+Ref: General Data Types-Footnote-1957009
+Node: Memory Allocation Functions957308
+Ref: Memory Allocation Functions-Footnote-1960153
+Node: Constructor Functions960252
+Node: Registration Functions961997
+Node: Extension Functions962682
+Node: Exit Callback Functions965305
+Node: Extension Version String966555
+Node: Input Parsers967218
+Node: Output Wrappers977100
+Node: Two-way processors981612
+Node: Printing Messages983877
+Ref: Printing Messages-Footnote-1985048
+Node: Updating ERRNO985201
+Node: Requesting Values985940
+Ref: table-value-types-returned986677
+Node: Accessing Parameters987560
+Node: Symbol Table Access988795
+Node: Symbol table by name989307
+Node: Symbol table by cookie991328
+Ref: Symbol table by cookie-Footnote-1995480
+Node: Cached values995544
+Ref: Cached values-Footnote-1999051
+Node: Array Manipulation999142
+Ref: Array Manipulation-Footnote-11000233
+Node: Array Data Types1000270
+Ref: Array Data Types-Footnote-11002928
+Node: Array Functions1003020
+Node: Flattening Arrays1006878
+Node: Creating Arrays1013786
+Node: Redirection API1018555
+Node: Extension API Variables1021386
+Node: Extension Versioning1022019
+Ref: gawk-api-version1022456
+Node: Extension API Informational Variables1024212
+Node: Extension API Boilerplate1025276
+Node: Finding Extensions1029090
+Node: Extension Example1029649
+Node: Internal File Description1030447
+Node: Internal File Ops1034527
+Ref: Internal File Ops-Footnote-11046289
+Node: Using Internal File Ops1046429
+Ref: Using Internal File Ops-Footnote-11048812
+Node: Extension Samples1049086
+Node: Extension Sample File Functions1050615
+Node: Extension Sample Fnmatch1058264
+Node: Extension Sample Fork1059751
+Node: Extension Sample Inplace1060969
+Node: Extension Sample Ord1064179
+Node: Extension Sample Readdir1065015
+Ref: table-readdir-file-types1065904
+Node: Extension Sample Revout1066709
+Node: Extension Sample Rev2way1067298
+Node: Extension Sample Read write array1068038
+Node: Extension Sample Readfile1069980
+Node: Extension Sample Time1071075
+Node: Extension Sample API Tests1072423
+Node: gawkextlib1072915
+Node: Extension summary1075362
+Node: Extension Exercises1079064
+Node: Language History1080562
+Node: V7/SVR3.11082218
+Node: SVR41084370
+Node: POSIX1085804
+Node: BTL1087183
+Node: POSIX/GNU1087912
+Node: Feature History1093774
+Node: Common Extensions1108144
+Node: Ranges and Locales1109427
+Ref: Ranges and Locales-Footnote-11114043
+Ref: Ranges and Locales-Footnote-21114070
+Ref: Ranges and Locales-Footnote-31114305
+Node: Contributors1114526
+Node: History summary1120086
+Node: Installation1121466
+Node: Gawk Distribution1122410
+Node: Getting1122894
+Node: Extracting1123855
+Node: Distribution contents1125493
+Node: Unix Installation1131587
+Node: Quick Installation1132269
+Node: Shell Startup Files1134683
+Node: Additional Configuration Options1135761
+Node: Configuration Philosophy1137566
+Node: Non-Unix Installation1139935
+Node: PC Installation1140393
+Node: PC Binary Installation1141713
+Node: PC Compiling1143565
+Ref: PC Compiling-Footnote-11146359
+Node: PC Testing1146468
+Node: PC Using1147648
+Ref: PC Using-Footnote-11151801
+Node: Cygwin1151874
+Node: MSYS1152644
+Node: VMS Installation1153145
+Node: VMS Compilation1153936
+Ref: VMS Compilation-Footnote-11155165
+Node: VMS Dynamic Extensions1155223
+Node: VMS Installation Details1156908
+Node: VMS Running1159161
+Node: VMS GNV1163440
+Node: VMS Old Gawk1164175
+Node: Bugs1164646
+Node: Other Versions1168961
+Node: Installation summary1175545
+Node: Notes1176596
+Node: Compatibility Mode1177461
+Node: Additions1178243
+Node: Accessing The Source1179168
+Node: Adding Code1180603
+Node: New Ports1186822
+Node: Derived Files1191310
+Ref: Derived Files-Footnote-11196795
+Ref: Derived Files-Footnote-21196830
+Ref: Derived Files-Footnote-31197428
+Node: Future Extensions1197542
+Node: Implementation Limitations1198200
+Node: Extension Design1199383
+Node: Old Extension Problems1200537
+Ref: Old Extension Problems-Footnote-11202055
+Node: Extension New Mechanism Goals1202112
+Ref: Extension New Mechanism Goals-Footnote-11205476
+Node: Extension Other Design Decisions1205665
+Node: Extension Future Growth1207778
+Node: Old Extension Mechanism1208614
+Node: Notes summary1210377
+Node: Basic Concepts1211559
+Node: Basic High Level1212240
+Ref: figure-general-flow1212522
+Ref: figure-process-flow1213207
+Ref: Basic High Level-Footnote-11216508
+Node: Basic Data Typing1216693
+Node: Glossary1220021
+Node: Copying1251968
+Node: GNU Free Documentation License1289507
+Node: Index1314625

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index a4b61895..90f6dcfc 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -12577,19 +12577,19 @@ One special place where @code{/foo/} is @emph{not} an abbreviation for
where this is discussed in more detail.
@node POSIX String Comparison
-@subsubsection String Comparison with POSIX Rules
+@subsubsection String Comparison Based on Locale Collating Order
-The POSIX standard says that string comparison is performed based
-on the locale's @dfn{collating order}. This is the order in which
-characters sort, as defined by the locale (for more discussion,
-@pxref{Locales}). This order is usually very different
-from the results obtained when doing straight character-by-character
-comparison.@footnote{Technically, string comparison is supposed
-to behave the same way as if the strings were compared with the C
-@code{strcoll()} function.}
+The POSIX standard used to say that all string comparisons are
+performed based on the locale's @dfn{collating order}. This
+is the order in which characters sort, as defined by the locale
+(for more discussion, @pxref{Locales}). This order is usually very
+different from the results obtained when doing straight byte-by-byte
+comparison.@footnote{Technically, string comparison is supposed to behave
+the same way as if the strings were compared with the C @code{strcoll()}
+function.}
Because this behavior differs considerably from existing practice,
-@command{gawk} only implements it when in POSIX mode (@pxref{Options}).
+@command{gawk} only implemented it when in POSIX mode (@pxref{Options}).
Here is an example to illustrate the difference, in an @code{en_US.UTF-8}
locale:
@@ -12602,6 +12602,26 @@ $ @kbd{gawk --posix 'BEGIN @{ printf("ABC < abc = %s\n",}
@print{} ABC < abc = FALSE
@end example
+Fortunately, as of August 2016, comparison based on locale
+collating order is no longer required for the @code{==} and @code{!=}
+operators.@footnote{See @uref{http://austingroupbugs.net/view.php?id=1070,
+the Austin Group website}.} However, comparison based on locales is still
+required for @code{<}, @code{<=}, @code{>}, and @code{>=}. POSIX thus
+recommends as follows:
+
+@quotation
+Since the @code{==} operator checks whether strings are identical,
+not whether they collate equally, applications needing to check whether
+strings collate equally can use:
+
+@example
+a <= b && a >= b
+@end example
+@end quotation
+
+As of @value{PVERSION} 4.2, @command{gawk} continues to use locale
+collating order for @code{<}, @code{<=}, @code{>}, and @code{>=} only
+in POSIX mode.
@node Boolean Ops
@subsection Boolean Expressions
@@ -37385,7 +37405,7 @@ and
@uref{http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap09.html#tag_21_09_03_05, its rationale}.}
By using this lovely technical term, the standard gives license
-to implementors to implement ranges in whatever way they choose.
+to implementers to implement ranges in whatever way they choose.
The @command{gawk} maintainer chose to apply the pre-POSIX meaning
both with the default regexp matching and when @option{--traditional} or
@option{--posix} are used.
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index 9c2864cd..782884bb 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -11896,19 +11896,19 @@ One special place where @code{/foo/} is @emph{not} an abbreviation for
where this is discussed in more detail.
@node POSIX String Comparison
-@subsubsection String Comparison with POSIX Rules
+@subsubsection String Comparison Based on Locale Collating Order
-The POSIX standard says that string comparison is performed based
-on the locale's @dfn{collating order}. This is the order in which
-characters sort, as defined by the locale (for more discussion,
-@pxref{Locales}). This order is usually very different
-from the results obtained when doing straight character-by-character
-comparison.@footnote{Technically, string comparison is supposed
-to behave the same way as if the strings were compared with the C
-@code{strcoll()} function.}
+The POSIX standard used to say that all string comparisons are
+performed based on the locale's @dfn{collating order}. This
+is the order in which characters sort, as defined by the locale
+(for more discussion, @pxref{Locales}). This order is usually very
+different from the results obtained when doing straight byte-by-byte
+comparison.@footnote{Technically, string comparison is supposed to behave
+the same way as if the strings were compared with the C @code{strcoll()}
+function.}
Because this behavior differs considerably from existing practice,
-@command{gawk} only implements it when in POSIX mode (@pxref{Options}).
+@command{gawk} only implemented it when in POSIX mode (@pxref{Options}).
Here is an example to illustrate the difference, in an @code{en_US.UTF-8}
locale:
@@ -11921,6 +11921,26 @@ $ @kbd{gawk --posix 'BEGIN @{ printf("ABC < abc = %s\n",}
@print{} ABC < abc = FALSE
@end example
+Fortunately, as of August 2016, comparison based on locale
+collating order is no longer required for the @code{==} and @code{!=}
+operators.@footnote{See @uref{http://austingroupbugs.net/view.php?id=1070,
+the Austin Group website}.} However, comparison based on locales is still
+required for @code{<}, @code{<=}, @code{>}, and @code{>=}. POSIX thus
+recommends as follows:
+
+@quotation
+Since the @code{==} operator checks whether strings are identical,
+not whether they collate equally, applications needing to check whether
+strings collate equally can use:
+
+@example
+a <= b && a >= b
+@end example
+@end quotation
+
+As of @value{PVERSION} 4.2, @command{gawk} continues to use locale
+collating order for @code{<}, @code{<=}, @code{>}, and @code{>=} only
+in POSIX mode.
@node Boolean Ops
@subsection Boolean Expressions
@@ -36467,7 +36487,7 @@ and
@uref{http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap09.html#tag_21_09_03_05, its rationale}.}
By using this lovely technical term, the standard gives license
-to implementors to implement ranges in whatever way they choose.
+to implementers to implement ranges in whatever way they choose.
The @command{gawk} maintainer chose to apply the pre-POSIX meaning
both with the default regexp matching and when @option{--traditional} or
@option{--posix} are used.
diff --git a/eval.c b/eval.c
index 3e335300..18517568 100644
--- a/eval.c
+++ b/eval.c
@@ -574,7 +574,7 @@ posix_compare(NODE *s1, NODE *s2)
/* cmp_nodes --- compare two nodes, returning negative, 0, positive */
int
-cmp_nodes(NODE *t1, NODE *t2)
+cmp_nodes(NODE *t1, NODE *t2, bool use_strcmp)
{
int ret = 0;
size_t len1, len2;
@@ -597,7 +597,7 @@ cmp_nodes(NODE *t1, NODE *t2)
if (len1 == 0 || len2 == 0)
return ldiff;
- if (do_posix)
+ if (do_posix && ! use_strcmp)
return posix_compare(t1, t2);
l = (ldiff <= 0 ? len1 : len2);
@@ -884,7 +884,7 @@ fmt_index(NODE *n)
emalloc(fmt_list, NODE **, fmt_num*sizeof(*fmt_list), "fmt_index");
n = force_string(n);
while (ix < fmt_hiwater) {
- if (cmp_nodes(fmt_list[ix], n) == 0)
+ if (cmp_nodes(fmt_list[ix], n, true) == 0)
return ix;
ix++;
}
@@ -1508,10 +1508,15 @@ eval_condition(NODE *t)
return boolval(t);
}
+typedef enum {
+ SCALAR_EQ_NEQ,
+ SCALAR_RELATIONAL
+} scalar_cmp_t;
+
/* cmp_scalars -- compare two nodes on the stack */
static inline int
-cmp_scalars()
+cmp_scalars(scalar_cmp_t comparison_type)
{
NODE *t1, *t2;
int di;
@@ -1522,7 +1527,7 @@ cmp_scalars()
DEREF(t2);
fatal(_("attempt to use array `%s' in a scalar context"), array_vname(t1));
}
- di = cmp_nodes(t1, t2);
+ di = cmp_nodes(t1, t2, comparison_type == SCALAR_EQ_NEQ);
DEREF(t1);
DEREF(t2);
return di;
diff --git a/interpret.h b/interpret.h
index 4b8dc472..5aa62a8a 100644
--- a/interpret.h
+++ b/interpret.h
@@ -444,37 +444,37 @@ uninitialized_scalar:
break;
case Op_equal:
- r = node_Boolean[cmp_scalars() == 0];
+ r = node_Boolean[cmp_scalars(SCALAR_EQ_NEQ) == 0];
UPREF(r);
REPLACE(r);
break;
case Op_notequal:
- r = node_Boolean[cmp_scalars() != 0];
+ r = node_Boolean[cmp_scalars(SCALAR_EQ_NEQ) != 0];
UPREF(r);
REPLACE(r);
break;
case Op_less:
- r = node_Boolean[cmp_scalars() < 0];
+ r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) < 0];
UPREF(r);
REPLACE(r);
break;
case Op_greater:
- r = node_Boolean[cmp_scalars() > 0];
+ r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) > 0];
UPREF(r);
REPLACE(r);
break;
case Op_leq:
- r = node_Boolean[cmp_scalars() <= 0];
+ r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) <= 0];
UPREF(r);
REPLACE(r);
break;
case Op_geq:
- r = node_Boolean[cmp_scalars() >= 0];
+ r = node_Boolean[cmp_scalars(SCALAR_RELATIONAL) >= 0];
UPREF(r);
REPLACE(r);
break;
@@ -836,7 +836,7 @@ mod:
} else {
t1 = POP_SCALAR(); /* case value */
t2 = TOP_SCALAR(); /* switch expression */
- di = (cmp_nodes(t2, t1) == 0);
+ di = (cmp_nodes(t2, t1, true) == 0);
DEREF(t1);
}
diff --git a/re.c b/re.c
index b11a6984..c822c90f 100644
--- a/re.c
+++ b/re.c
@@ -356,7 +356,7 @@ re_update(NODE *t)
t1 = t->re_exp;
if (t->re_text != NULL) {
/* if contents haven't changed, just return it */
- if (cmp_nodes(t->re_text, t1) == 0)
+ if (cmp_nodes(t->re_text, t1, true) == 0)
return t->re_reg;
/* things changed, fall through to recompile */
unref(t->re_text);