aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2018-07-31 09:21:43 +0300
committerArnold D. Robbins <arnold@skeeve.com>2018-07-31 09:21:43 +0300
commit86b063b99d78df97fcd761073f437ce00b018712 (patch)
tree6f834ea3d46ecb541bd6a722e565a83aac0e2268
parentf856979d85ace61bfeb2d31146485ec668202ad8 (diff)
downloadegawk-86b063b99d78df97fcd761073f437ce00b018712.tar.gz
egawk-86b063b99d78df97fcd761073f437ce00b018712.tar.bz2
egawk-86b063b99d78df97fcd761073f437ce00b018712.zip
Fix handling of physical newlines in -v arguments and related improvemnts.
-rw-r--r--ChangeLog12
-rw-r--r--awk.h2
-rw-r--r--awkgram.c2
-rw-r--r--awkgram.y2
-rw-r--r--doc/ChangeLog5
-rw-r--r--doc/gawk.info914
-rw-r--r--doc/gawk.texi63
-rw-r--r--doc/gawktexi.in63
-rw-r--r--main.c9
-rw-r--r--node.c2
10 files changed, 642 insertions, 432 deletions
diff --git a/ChangeLog b/ChangeLog
index ba471617..904e984c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,17 @@
2018-07-31 Arnold D. Robbins <arnold@skeeve.com>
+ Handle newlines in -v and fix \-<newline>. Thanks to
+ Samy Mahmoudi <samy.mahmoudi@gmail.com> for the report.
+
+ * awk.h [ELIDE_BACK_NL]: New constant.
+ * awkgram.y (yylex): Disallow any physical newlines in a string
+ even if escaped, in POSIX mode.
+ * main.c (arg_assign): In POSIX mode disallow physical newline
+ in a -v value. Otherwise call make_str_node() with ELIDE_BACK_NL.
+ * node.c (make_str_node): Handle ELIDE_BACK_NL.
+
+2018-07-31 Arnold D. Robbins <arnold@skeeve.com>
+
* array.c (cmp_strings): Preserve value of lmin so it can be passed
to memcmp() if IGNORECASE comparison failed. Thanks to
M. Rashid Zamani <rashid.z@gmail.com> for the report.
diff --git a/awk.h b/awk.h
index a6d8193d..a848e39e 100644
--- a/awk.h
+++ b/awk.h
@@ -1328,8 +1328,10 @@ DEREF(NODE *r)
#define make_string(s, l) make_str_node((s), (l), 0)
+// Flags for making string nodes
#define SCAN 1
#define ALREADY_MALLOCED 2
+#define ELIDE_BACK_NL 4
#define cant_happen() r_fatal("internal error line %d, file: %s", \
__LINE__, __FILE__)
diff --git a/awkgram.c b/awkgram.c
index 36947c41..10c55fa4 100644
--- a/awkgram.c
+++ b/awkgram.c
@@ -6272,6 +6272,8 @@ retry:
if (c == '\r') /* allow MS-DOS files. bleah */
c = nextc(true);
if (c == '\n') {
+ if (do_posix)
+ fatal(_("POSIX does not allow physical newlines in string values"));
sourceline++;
continue;
}
diff --git a/awkgram.y b/awkgram.y
index 95353c15..f3d3b38b 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -3852,6 +3852,8 @@ retry:
if (c == '\r') /* allow MS-DOS files. bleah */
c = nextc(true);
if (c == '\n') {
+ if (do_posix)
+ fatal(_("POSIX does not allow physical newlines in string values"));
sourceline++;
continue;
}
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 51f19120..0821be14 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,5 +1,10 @@
2018-07-31 Arnold D. Robbins <arnold@skeeve.com>
+ * gawktexi.in (Scalar Constants): Document what happens with
+ physical newlines in strings, escaped and otherwise.
+
+2018-07-31 Arnold D. Robbins <arnold@skeeve.com>
+
* gawktexi.in (Two-way I/O): Fix some typos.
* gawkworkflow.texi (Configuring git): Correct some
command usages. Thanks to Antonio Columbo for the fix.
diff --git a/doc/gawk.info b/doc/gawk.info
index bc245e3e..b4f59c53 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -7669,6 +7669,55 @@ can be of any length, and they can contain any of the possible eight-bit
ASCII characters, including ASCII NUL (character code zero). Other
'awk' implementations may have difficulty with some character codes.
+ Some languages allow you to continue long strings across multiple
+lines by ending the line with a backslash. For example in C:
+
+ #include <stdio.h>
+
+ int main()
+ {
+ printf "hello, \
+ world\n");
+ return 0;
+ }
+
+In such a case, the C compiler removes both the backslash and the
+newline, producing a string as if it had been typed '"hello, world\n"'.
+This is useful when a single string needs to contain a large amount of
+text.
+
+ The POSIX standard says explicitly that newlines are not allowed
+inside string constants. And indeed, all 'awk' implementations report
+an error if you try to do so. For example:
+
+ $ gawk 'BEGIN { print "hello,
+ > world" }'
+ -| gawk: cmd. line:1: BEGIN print "hello,
+ -| gawk: cmd. line:1: ^ unterminated string
+ -| gawk: cmd. line:1: BEGIN print "hello,
+ -| gawk: cmd. line:1: ^ syntax error
+
+ Although POSIX doesn't define what happens if you use an escaped
+newline, as in the previous C example, all known versions of 'awk' allow
+you to do so. Unfortunately, what each one does with such a string
+varies. (d.c.) 'gawk', 'mawk', and the OpenSolaris POSIX 'awk' (*note
+Other Versions::) elide the backslash and newline, as in C:
+
+ $ gawk 'BEGIN { print "hello, \
+ > world" }'
+ -| hello, world
+
+ Brian Kernighan's 'awk' and Busybox 'awk' remove the backslash but
+leave the newline intact, as part of the string:
+
+ $ nawk 'BEGIN { print "hello, \
+ > world" }'
+ -| hello,
+ -| world
+
+ In POSIX mode (*note Options::), 'gawk' does not allow escaped
+newlines. Otherwise, it behaves as just described.
+
---------- Footnotes ----------
(1) The internal representation of all numbers, including integers,
@@ -33879,6 +33928,7 @@ Index
* dark corner, regexp constants, as arguments to user-defined functions: Standard Regexp Constants.
(line 43)
* dark corner, split() function: String Functions. (line 364)
+* dark corner, string continuation: Scalar Constants. (line 53)
* dark corner, strings, storing: gawk split records. (line 82)
* dark corner, value of ARGV[0]: Auto-set. (line 39)
* dark corner, ^, in FS: Regexp Field Splitting.
@@ -34096,6 +34146,7 @@ Index
* differences in awk and gawk, split() function: String Functions.
(line 351)
* differences in awk and gawk, strings: Scalar Constants. (line 20)
+* differences in awk and gawk, strings <1>: Scalar Constants. (line 53)
* differences in awk and gawk, strings, storing: gawk split records.
(line 76)
* differences in awk and gawk, SYMTAB variable: Auto-set. (line 331)
@@ -35810,6 +35861,7 @@ Index
* string-matching operators: Regexp Usage. (line 19)
* string-translation functions: I18N Functions. (line 6)
* strings splitting, example: String Functions. (line 337)
+* strings, continuation across lines: Scalar Constants. (line 53)
* strings, converting: Strings And Numbers. (line 6)
* strings, converting <1>: Bitwise Functions. (line 109)
* strings, converting letter case: String Functions. (line 526)
@@ -36246,436 +36298,436 @@ Node: Expressions332646
Node: Values333834
Node: Constants334512
Node: Scalar Constants335203
-Ref: Scalar Constants-Footnote-1336067
-Node: Nondecimal-numbers336317
-Node: Regexp Constants339318
-Node: Using Constant Regexps339844
-Node: Standard Regexp Constants340466
-Node: Strong Regexp Constants343654
-Node: Variables346612
-Node: Using Variables347269
-Node: Assignment Options349179
-Node: Conversion351052
-Node: Strings And Numbers351576
-Ref: Strings And Numbers-Footnote-1354639
-Node: Locale influences conversions354748
-Ref: table-locale-affects357506
-Node: All Operators358124
-Node: Arithmetic Ops358753
-Node: Concatenation361259
-Ref: Concatenation-Footnote-1364106
-Node: Assignment Ops364213
-Ref: table-assign-ops369204
-Node: Increment Ops370517
-Node: Truth Values and Conditions373977
-Node: Truth Values375051
-Node: Typing and Comparison376099
-Node: Variable Typing376919
-Ref: Variable Typing-Footnote-1383382
-Ref: Variable Typing-Footnote-2383454
-Node: Comparison Operators383531
-Ref: table-relational-ops383950
-Node: POSIX String Comparison387445
-Ref: POSIX String Comparison-Footnote-1389140
-Ref: POSIX String Comparison-Footnote-2389279
-Node: Boolean Ops389363
-Ref: Boolean Ops-Footnote-1393845
-Node: Conditional Exp393937
-Node: Function Calls395673
-Node: Precedence399550
-Node: Locales403209
-Node: Expressions Summary404841
-Node: Patterns and Actions407414
-Node: Pattern Overview408534
-Node: Regexp Patterns410211
-Node: Expression Patterns410753
-Node: Ranges414534
-Node: BEGIN/END417642
-Node: Using BEGIN/END418403
-Ref: Using BEGIN/END-Footnote-1421139
-Node: I/O And BEGIN/END421245
-Node: BEGINFILE/ENDFILE423559
-Node: Empty426472
-Node: Using Shell Variables426789
-Node: Action Overview429063
-Node: Statements431388
-Node: If Statement433236
-Node: While Statement434731
-Node: Do Statement436759
-Node: For Statement437907
-Node: Switch Statement441078
-Node: Break Statement443464
-Node: Continue Statement445556
-Node: Next Statement447383
-Node: Nextfile Statement449766
-Node: Exit Statement452418
-Node: Built-in Variables454821
-Node: User-modified455954
-Node: Auto-set463721
-Ref: Auto-set-Footnote-1480023
-Ref: Auto-set-Footnote-2480229
-Node: ARGC and ARGV480285
-Node: Pattern Action Summary484498
-Node: Arrays486928
-Node: Array Basics488257
-Node: Array Intro489101
-Ref: figure-array-elements491076
-Ref: Array Intro-Footnote-1493780
-Node: Reference to Elements493908
-Node: Assigning Elements496372
-Node: Array Example496863
-Node: Scanning an Array498622
-Node: Controlling Scanning501644
-Ref: Controlling Scanning-Footnote-1507043
-Node: Numeric Array Subscripts507359
-Node: Uninitialized Subscripts509543
-Node: Delete511162
-Ref: Delete-Footnote-1513914
-Node: Multidimensional513971
-Node: Multiscanning517066
-Node: Arrays of Arrays518657
-Node: Arrays Summary523424
-Node: Functions525517
-Node: Built-in526555
-Node: Calling Built-in527636
-Node: Numeric Functions529632
-Ref: Numeric Functions-Footnote-1533660
-Ref: Numeric Functions-Footnote-2534017
-Ref: Numeric Functions-Footnote-3534065
-Node: String Functions534337
-Ref: String Functions-Footnote-1558046
-Ref: String Functions-Footnote-2558174
-Ref: String Functions-Footnote-3558422
-Node: Gory Details558509
-Ref: table-sub-escapes560300
-Ref: table-sub-proposed561819
-Ref: table-posix-sub563182
-Ref: table-gensub-escapes564723
-Ref: Gory Details-Footnote-1565546
-Node: I/O Functions565700
-Ref: table-system-return-values572168
-Ref: I/O Functions-Footnote-1574148
-Ref: I/O Functions-Footnote-2574296
-Node: Time Functions574416
-Ref: Time Functions-Footnote-1585087
-Ref: Time Functions-Footnote-2585155
-Ref: Time Functions-Footnote-3585313
-Ref: Time Functions-Footnote-4585424
-Ref: Time Functions-Footnote-5585536
-Ref: Time Functions-Footnote-6585763
-Node: Bitwise Functions586029
-Ref: table-bitwise-ops586623
-Ref: Bitwise Functions-Footnote-1592686
-Ref: Bitwise Functions-Footnote-2592859
-Node: Type Functions593050
-Node: I18N Functions595801
-Node: User-defined597452
-Node: Definition Syntax598257
-Ref: Definition Syntax-Footnote-1603944
-Node: Function Example604015
-Ref: Function Example-Footnote-1606937
-Node: Function Caveats606959
-Node: Calling A Function607477
-Node: Variable Scope608435
-Node: Pass By Value/Reference611429
-Node: Return Statement614928
-Node: Dynamic Typing617907
-Node: Indirect Calls618837
-Ref: Indirect Calls-Footnote-1629089
-Node: Functions Summary629217
-Node: Library Functions631922
-Ref: Library Functions-Footnote-1635529
-Ref: Library Functions-Footnote-2635672
-Node: Library Names635843
-Ref: Library Names-Footnote-1639303
-Ref: Library Names-Footnote-2639526
-Node: General Functions639612
-Node: Strtonum Function640715
-Node: Assert Function643737
-Node: Round Function647063
-Node: Cliff Random Function648603
-Node: Ordinal Functions649619
-Ref: Ordinal Functions-Footnote-1652682
-Ref: Ordinal Functions-Footnote-2652934
-Node: Join Function653144
-Ref: Join Function-Footnote-1654914
-Node: Getlocaltime Function655114
-Node: Readfile Function658856
-Node: Shell Quoting660833
-Node: Data File Management662234
-Node: Filetrans Function662866
-Node: Rewind Function666962
-Node: File Checking668872
-Ref: File Checking-Footnote-1670206
-Node: Empty Files670407
-Node: Ignoring Assigns672386
-Node: Getopt Function673936
-Ref: Getopt Function-Footnote-1685405
-Node: Passwd Functions685605
-Ref: Passwd Functions-Footnote-1694444
-Node: Group Functions694532
-Ref: Group Functions-Footnote-1702430
-Node: Walking Arrays702637
-Node: Library Functions Summary705645
-Node: Library Exercises707051
-Node: Sample Programs707516
-Node: Running Examples708286
-Node: Clones709014
-Node: Cut Program710238
-Node: Egrep Program720167
-Ref: Egrep Program-Footnote-1727679
-Node: Id Program727789
-Node: Split Program731469
-Ref: Split Program-Footnote-1734927
-Node: Tee Program735056
-Node: Uniq Program737846
-Node: Wc Program745272
-Ref: Wc Program-Footnote-1749527
-Node: Miscellaneous Programs749621
-Node: Dupword Program750834
-Node: Alarm Program752864
-Node: Translate Program757719
-Ref: Translate Program-Footnote-1762284
-Node: Labels Program762554
-Ref: Labels Program-Footnote-1765905
-Node: Word Sorting765989
-Node: History Sorting770061
-Node: Extract Program771896
-Node: Simple Sed779950
-Node: Igawk Program783024
-Ref: Igawk Program-Footnote-1797355
-Ref: Igawk Program-Footnote-2797557
-Ref: Igawk Program-Footnote-3797679
-Node: Anagram Program797794
-Node: Signature Program800856
-Node: Programs Summary802103
-Node: Programs Exercises803317
-Ref: Programs Exercises-Footnote-1807446
-Node: Advanced Features807537
-Node: Nondecimal Data809527
-Node: Array Sorting811118
-Node: Controlling Array Traversal811818
-Ref: Controlling Array Traversal-Footnote-1820186
-Node: Array Sorting Functions820304
-Ref: Array Sorting Functions-Footnote-1825395
-Node: Two-way I/O825591
-Ref: Two-way I/O-Footnote-1833311
-Ref: Two-way I/O-Footnote-2833498
-Node: TCP/IP Networking833580
-Node: Profiling836698
-Ref: Profiling-Footnote-1845370
-Node: Advanced Features Summary845693
-Node: Internationalization847537
-Node: I18N and L10N849017
-Node: Explaining gettext849704
-Ref: Explaining gettext-Footnote-1855596
-Ref: Explaining gettext-Footnote-2855781
-Node: Programmer i18n855946
-Ref: Programmer i18n-Footnote-1860895
-Node: Translator i18n860944
-Node: String Extraction861738
-Ref: String Extraction-Footnote-1862870
-Node: Printf Ordering862956
-Ref: Printf Ordering-Footnote-1865742
-Node: I18N Portability865806
-Ref: I18N Portability-Footnote-1868262
-Node: I18N Example868325
-Ref: I18N Example-Footnote-1871131
-Node: Gawk I18N871204
-Node: I18N Summary871849
-Node: Debugger873190
-Node: Debugging874213
-Node: Debugging Concepts874654
-Node: Debugging Terms876463
-Node: Awk Debugging879038
-Node: Sample Debugging Session879944
-Node: Debugger Invocation880478
-Node: Finding The Bug881864
-Node: List of Debugger Commands888342
-Node: Breakpoint Control889675
-Node: Debugger Execution Control893369
-Node: Viewing And Changing Data896731
-Node: Execution Stack900105
-Node: Debugger Info901742
-Node: Miscellaneous Debugger Commands905813
-Node: Readline Support910875
-Node: Limitations911771
-Node: Debugging Summary913880
-Node: Arbitrary Precision Arithmetic915159
-Node: Computer Arithmetic916644
-Ref: table-numeric-ranges920410
-Ref: table-floating-point-ranges920903
-Ref: Computer Arithmetic-Footnote-1921561
-Node: Math Definitions921618
-Ref: table-ieee-formats924934
-Ref: Math Definitions-Footnote-1925537
-Node: MPFR features925642
-Node: FP Math Caution927360
-Ref: FP Math Caution-Footnote-1928432
-Node: Inexactness of computations928801
-Node: Inexact representation929761
-Node: Comparing FP Values931121
-Node: Errors accumulate932203
-Node: Getting Accuracy933636
-Node: Try To Round936346
-Node: Setting precision937245
-Ref: table-predefined-precision-strings937942
-Node: Setting the rounding mode939772
-Ref: table-gawk-rounding-modes940146
-Ref: Setting the rounding mode-Footnote-1944077
-Node: Arbitrary Precision Integers944256
-Ref: Arbitrary Precision Integers-Footnote-1947431
-Node: Checking for MPFR947580
-Node: POSIX Floating Point Problems949054
-Ref: POSIX Floating Point Problems-Footnote-1953339
-Node: Floating point summary953377
-Node: Dynamic Extensions955567
-Node: Extension Intro957120
-Node: Plugin License958386
-Node: Extension Mechanism Outline959183
-Ref: figure-load-extension959622
-Ref: figure-register-new-function961187
-Ref: figure-call-new-function962279
-Node: Extension API Description964341
-Node: Extension API Functions Introduction965983
-Node: General Data Types971523
-Ref: General Data Types-Footnote-1979884
-Node: Memory Allocation Functions980183
-Ref: Memory Allocation Functions-Footnote-1984393
-Node: Constructor Functions984492
-Node: Registration Functions988078
-Node: Extension Functions988763
-Node: Exit Callback Functions993978
-Node: Extension Version String995228
-Node: Input Parsers995891
-Node: Output Wrappers1008612
-Node: Two-way processors1013124
-Node: Printing Messages1015389
-Ref: Printing Messages-Footnote-11016560
-Node: Updating ERRNO1016713
-Node: Requesting Values1017452
-Ref: table-value-types-returned1018189
-Node: Accessing Parameters1019125
-Node: Symbol Table Access1020360
-Node: Symbol table by name1020872
-Node: Symbol table by cookie1022661
-Ref: Symbol table by cookie-Footnote-11026846
-Node: Cached values1026910
-Ref: Cached values-Footnote-11030446
-Node: Array Manipulation1030599
-Ref: Array Manipulation-Footnote-11031690
-Node: Array Data Types1031727
-Ref: Array Data Types-Footnote-11034385
-Node: Array Functions1034477
-Node: Flattening Arrays1038975
-Node: Creating Arrays1045951
-Node: Redirection API1050718
-Node: Extension API Variables1053551
-Node: Extension Versioning1054262
-Ref: gawk-api-version1054691
-Node: Extension GMP/MPFR Versioning1056422
-Node: Extension API Informational Variables1058050
-Node: Extension API Boilerplate1059123
-Node: Changes from API V11063097
-Node: Finding Extensions1064669
-Node: Extension Example1065228
-Node: Internal File Description1066026
-Node: Internal File Ops1070106
-Ref: Internal File Ops-Footnote-11081456
-Node: Using Internal File Ops1081596
-Ref: Using Internal File Ops-Footnote-11083979
-Node: Extension Samples1084253
-Node: Extension Sample File Functions1085782
-Node: Extension Sample Fnmatch1093431
-Node: Extension Sample Fork1094918
-Node: Extension Sample Inplace1096136
-Node: Extension Sample Ord1099353
-Node: Extension Sample Readdir1100189
-Ref: table-readdir-file-types1101078
-Node: Extension Sample Revout1101883
-Node: Extension Sample Rev2way1102472
-Node: Extension Sample Read write array1103212
-Node: Extension Sample Readfile1105154
-Node: Extension Sample Time1106249
-Node: Extension Sample API Tests1107597
-Node: gawkextlib1108089
-Node: Extension summary1111007
-Node: Extension Exercises1114709
-Node: Language History1116207
-Node: V7/SVR3.11117863
-Node: SVR41120015
-Node: POSIX1121449
-Node: BTL1122829
-Node: POSIX/GNU1123558
-Node: Feature History1129336
-Node: Common Extensions1145195
-Node: Ranges and Locales1146478
-Ref: Ranges and Locales-Footnote-11151094
-Ref: Ranges and Locales-Footnote-21151121
-Ref: Ranges and Locales-Footnote-31151356
-Node: Contributors1151577
-Node: History summary1157522
-Node: Installation1158902
-Node: Gawk Distribution1159846
-Node: Getting1160330
-Node: Extracting1161293
-Node: Distribution contents1162931
-Node: Unix Installation1169411
-Node: Quick Installation1170093
-Node: Shell Startup Files1172507
-Node: Additional Configuration Options1173596
-Node: Configuration Philosophy1175889
-Node: Non-Unix Installation1178258
-Node: PC Installation1178718
-Node: PC Binary Installation1179556
-Node: PC Compiling1179991
-Node: PC Using1181108
-Node: Cygwin1184323
-Node: MSYS1185422
-Node: VMS Installation1185923
-Node: VMS Compilation1186714
-Ref: VMS Compilation-Footnote-11187943
-Node: VMS Dynamic Extensions1188001
-Node: VMS Installation Details1189686
-Node: VMS Running1191939
-Node: VMS GNV1196218
-Node: VMS Old Gawk1196953
-Node: Bugs1197424
-Node: Bug address1198087
-Node: Usenet1200879
-Node: Maintainers1201656
-Node: Other Versions1202917
-Node: Installation summary1209679
-Node: Notes1210881
-Node: Compatibility Mode1211746
-Node: Additions1212528
-Node: Accessing The Source1213453
-Node: Adding Code1214890
-Node: New Ports1221109
-Node: Derived Files1225597
-Ref: Derived Files-Footnote-11231243
-Ref: Derived Files-Footnote-21231278
-Ref: Derived Files-Footnote-31231876
-Node: Future Extensions1231990
-Node: Implementation Limitations1232648
-Node: Extension Design1233831
-Node: Old Extension Problems1234985
-Ref: Old Extension Problems-Footnote-11236503
-Node: Extension New Mechanism Goals1236560
-Ref: Extension New Mechanism Goals-Footnote-11239924
-Node: Extension Other Design Decisions1240113
-Node: Extension Future Growth1242226
-Node: Old Extension Mechanism1243062
-Node: Notes summary1244825
-Node: Basic Concepts1246007
-Node: Basic High Level1246688
-Ref: figure-general-flow1246970
-Ref: figure-process-flow1247655
-Ref: Basic High Level-Footnote-11250956
-Node: Basic Data Typing1251141
-Node: Glossary1254469
-Node: Copying1286307
-Node: GNU Free Documentation License1323850
-Node: Index1348970
+Ref: Scalar Constants-Footnote-1337726
+Node: Nondecimal-numbers337976
+Node: Regexp Constants340977
+Node: Using Constant Regexps341503
+Node: Standard Regexp Constants342125
+Node: Strong Regexp Constants345313
+Node: Variables348271
+Node: Using Variables348928
+Node: Assignment Options350838
+Node: Conversion352711
+Node: Strings And Numbers353235
+Ref: Strings And Numbers-Footnote-1356298
+Node: Locale influences conversions356407
+Ref: table-locale-affects359165
+Node: All Operators359783
+Node: Arithmetic Ops360412
+Node: Concatenation362918
+Ref: Concatenation-Footnote-1365765
+Node: Assignment Ops365872
+Ref: table-assign-ops370863
+Node: Increment Ops372176
+Node: Truth Values and Conditions375636
+Node: Truth Values376710
+Node: Typing and Comparison377758
+Node: Variable Typing378578
+Ref: Variable Typing-Footnote-1385041
+Ref: Variable Typing-Footnote-2385113
+Node: Comparison Operators385190
+Ref: table-relational-ops385609
+Node: POSIX String Comparison389104
+Ref: POSIX String Comparison-Footnote-1390799
+Ref: POSIX String Comparison-Footnote-2390938
+Node: Boolean Ops391022
+Ref: Boolean Ops-Footnote-1395504
+Node: Conditional Exp395596
+Node: Function Calls397332
+Node: Precedence401209
+Node: Locales404868
+Node: Expressions Summary406500
+Node: Patterns and Actions409073
+Node: Pattern Overview410193
+Node: Regexp Patterns411870
+Node: Expression Patterns412412
+Node: Ranges416193
+Node: BEGIN/END419301
+Node: Using BEGIN/END420062
+Ref: Using BEGIN/END-Footnote-1422798
+Node: I/O And BEGIN/END422904
+Node: BEGINFILE/ENDFILE425218
+Node: Empty428131
+Node: Using Shell Variables428448
+Node: Action Overview430722
+Node: Statements433047
+Node: If Statement434895
+Node: While Statement436390
+Node: Do Statement438418
+Node: For Statement439566
+Node: Switch Statement442737
+Node: Break Statement445123
+Node: Continue Statement447215
+Node: Next Statement449042
+Node: Nextfile Statement451425
+Node: Exit Statement454077
+Node: Built-in Variables456480
+Node: User-modified457613
+Node: Auto-set465380
+Ref: Auto-set-Footnote-1481682
+Ref: Auto-set-Footnote-2481888
+Node: ARGC and ARGV481944
+Node: Pattern Action Summary486157
+Node: Arrays488587
+Node: Array Basics489916
+Node: Array Intro490760
+Ref: figure-array-elements492735
+Ref: Array Intro-Footnote-1495439
+Node: Reference to Elements495567
+Node: Assigning Elements498031
+Node: Array Example498522
+Node: Scanning an Array500281
+Node: Controlling Scanning503303
+Ref: Controlling Scanning-Footnote-1508702
+Node: Numeric Array Subscripts509018
+Node: Uninitialized Subscripts511202
+Node: Delete512821
+Ref: Delete-Footnote-1515573
+Node: Multidimensional515630
+Node: Multiscanning518725
+Node: Arrays of Arrays520316
+Node: Arrays Summary525083
+Node: Functions527176
+Node: Built-in528214
+Node: Calling Built-in529295
+Node: Numeric Functions531291
+Ref: Numeric Functions-Footnote-1535319
+Ref: Numeric Functions-Footnote-2535676
+Ref: Numeric Functions-Footnote-3535724
+Node: String Functions535996
+Ref: String Functions-Footnote-1559705
+Ref: String Functions-Footnote-2559833
+Ref: String Functions-Footnote-3560081
+Node: Gory Details560168
+Ref: table-sub-escapes561959
+Ref: table-sub-proposed563478
+Ref: table-posix-sub564841
+Ref: table-gensub-escapes566382
+Ref: Gory Details-Footnote-1567205
+Node: I/O Functions567359
+Ref: table-system-return-values573827
+Ref: I/O Functions-Footnote-1575807
+Ref: I/O Functions-Footnote-2575955
+Node: Time Functions576075
+Ref: Time Functions-Footnote-1586746
+Ref: Time Functions-Footnote-2586814
+Ref: Time Functions-Footnote-3586972
+Ref: Time Functions-Footnote-4587083
+Ref: Time Functions-Footnote-5587195
+Ref: Time Functions-Footnote-6587422
+Node: Bitwise Functions587688
+Ref: table-bitwise-ops588282
+Ref: Bitwise Functions-Footnote-1594345
+Ref: Bitwise Functions-Footnote-2594518
+Node: Type Functions594709
+Node: I18N Functions597460
+Node: User-defined599111
+Node: Definition Syntax599916
+Ref: Definition Syntax-Footnote-1605603
+Node: Function Example605674
+Ref: Function Example-Footnote-1608596
+Node: Function Caveats608618
+Node: Calling A Function609136
+Node: Variable Scope610094
+Node: Pass By Value/Reference613088
+Node: Return Statement616587
+Node: Dynamic Typing619566
+Node: Indirect Calls620496
+Ref: Indirect Calls-Footnote-1630748
+Node: Functions Summary630876
+Node: Library Functions633581
+Ref: Library Functions-Footnote-1637188
+Ref: Library Functions-Footnote-2637331
+Node: Library Names637502
+Ref: Library Names-Footnote-1640962
+Ref: Library Names-Footnote-2641185
+Node: General Functions641271
+Node: Strtonum Function642374
+Node: Assert Function645396
+Node: Round Function648722
+Node: Cliff Random Function650262
+Node: Ordinal Functions651278
+Ref: Ordinal Functions-Footnote-1654341
+Ref: Ordinal Functions-Footnote-2654593
+Node: Join Function654803
+Ref: Join Function-Footnote-1656573
+Node: Getlocaltime Function656773
+Node: Readfile Function660515
+Node: Shell Quoting662492
+Node: Data File Management663893
+Node: Filetrans Function664525
+Node: Rewind Function668621
+Node: File Checking670531
+Ref: File Checking-Footnote-1671865
+Node: Empty Files672066
+Node: Ignoring Assigns674045
+Node: Getopt Function675595
+Ref: Getopt Function-Footnote-1687064
+Node: Passwd Functions687264
+Ref: Passwd Functions-Footnote-1696103
+Node: Group Functions696191
+Ref: Group Functions-Footnote-1704089
+Node: Walking Arrays704296
+Node: Library Functions Summary707304
+Node: Library Exercises708710
+Node: Sample Programs709175
+Node: Running Examples709945
+Node: Clones710673
+Node: Cut Program711897
+Node: Egrep Program721826
+Ref: Egrep Program-Footnote-1729338
+Node: Id Program729448
+Node: Split Program733128
+Ref: Split Program-Footnote-1736586
+Node: Tee Program736715
+Node: Uniq Program739505
+Node: Wc Program746931
+Ref: Wc Program-Footnote-1751186
+Node: Miscellaneous Programs751280
+Node: Dupword Program752493
+Node: Alarm Program754523
+Node: Translate Program759378
+Ref: Translate Program-Footnote-1763943
+Node: Labels Program764213
+Ref: Labels Program-Footnote-1767564
+Node: Word Sorting767648
+Node: History Sorting771720
+Node: Extract Program773555
+Node: Simple Sed781609
+Node: Igawk Program784683
+Ref: Igawk Program-Footnote-1799014
+Ref: Igawk Program-Footnote-2799216
+Ref: Igawk Program-Footnote-3799338
+Node: Anagram Program799453
+Node: Signature Program802515
+Node: Programs Summary803762
+Node: Programs Exercises804976
+Ref: Programs Exercises-Footnote-1809105
+Node: Advanced Features809196
+Node: Nondecimal Data811186
+Node: Array Sorting812777
+Node: Controlling Array Traversal813477
+Ref: Controlling Array Traversal-Footnote-1821845
+Node: Array Sorting Functions821963
+Ref: Array Sorting Functions-Footnote-1827054
+Node: Two-way I/O827250
+Ref: Two-way I/O-Footnote-1834970
+Ref: Two-way I/O-Footnote-2835157
+Node: TCP/IP Networking835239
+Node: Profiling838357
+Ref: Profiling-Footnote-1847029
+Node: Advanced Features Summary847352
+Node: Internationalization849196
+Node: I18N and L10N850676
+Node: Explaining gettext851363
+Ref: Explaining gettext-Footnote-1857255
+Ref: Explaining gettext-Footnote-2857440
+Node: Programmer i18n857605
+Ref: Programmer i18n-Footnote-1862554
+Node: Translator i18n862603
+Node: String Extraction863397
+Ref: String Extraction-Footnote-1864529
+Node: Printf Ordering864615
+Ref: Printf Ordering-Footnote-1867401
+Node: I18N Portability867465
+Ref: I18N Portability-Footnote-1869921
+Node: I18N Example869984
+Ref: I18N Example-Footnote-1872790
+Node: Gawk I18N872863
+Node: I18N Summary873508
+Node: Debugger874849
+Node: Debugging875872
+Node: Debugging Concepts876313
+Node: Debugging Terms878122
+Node: Awk Debugging880697
+Node: Sample Debugging Session881603
+Node: Debugger Invocation882137
+Node: Finding The Bug883523
+Node: List of Debugger Commands890001
+Node: Breakpoint Control891334
+Node: Debugger Execution Control895028
+Node: Viewing And Changing Data898390
+Node: Execution Stack901764
+Node: Debugger Info903401
+Node: Miscellaneous Debugger Commands907472
+Node: Readline Support912534
+Node: Limitations913430
+Node: Debugging Summary915539
+Node: Arbitrary Precision Arithmetic916818
+Node: Computer Arithmetic918303
+Ref: table-numeric-ranges922069
+Ref: table-floating-point-ranges922562
+Ref: Computer Arithmetic-Footnote-1923220
+Node: Math Definitions923277
+Ref: table-ieee-formats926593
+Ref: Math Definitions-Footnote-1927196
+Node: MPFR features927301
+Node: FP Math Caution929019
+Ref: FP Math Caution-Footnote-1930091
+Node: Inexactness of computations930460
+Node: Inexact representation931420
+Node: Comparing FP Values932780
+Node: Errors accumulate933862
+Node: Getting Accuracy935295
+Node: Try To Round938005
+Node: Setting precision938904
+Ref: table-predefined-precision-strings939601
+Node: Setting the rounding mode941431
+Ref: table-gawk-rounding-modes941805
+Ref: Setting the rounding mode-Footnote-1945736
+Node: Arbitrary Precision Integers945915
+Ref: Arbitrary Precision Integers-Footnote-1949090
+Node: Checking for MPFR949239
+Node: POSIX Floating Point Problems950713
+Ref: POSIX Floating Point Problems-Footnote-1954998
+Node: Floating point summary955036
+Node: Dynamic Extensions957226
+Node: Extension Intro958779
+Node: Plugin License960045
+Node: Extension Mechanism Outline960842
+Ref: figure-load-extension961281
+Ref: figure-register-new-function962846
+Ref: figure-call-new-function963938
+Node: Extension API Description966000
+Node: Extension API Functions Introduction967642
+Node: General Data Types973182
+Ref: General Data Types-Footnote-1981543
+Node: Memory Allocation Functions981842
+Ref: Memory Allocation Functions-Footnote-1986052
+Node: Constructor Functions986151
+Node: Registration Functions989737
+Node: Extension Functions990422
+Node: Exit Callback Functions995637
+Node: Extension Version String996887
+Node: Input Parsers997550
+Node: Output Wrappers1010271
+Node: Two-way processors1014783
+Node: Printing Messages1017048
+Ref: Printing Messages-Footnote-11018219
+Node: Updating ERRNO1018372
+Node: Requesting Values1019111
+Ref: table-value-types-returned1019848
+Node: Accessing Parameters1020784
+Node: Symbol Table Access1022019
+Node: Symbol table by name1022531
+Node: Symbol table by cookie1024320
+Ref: Symbol table by cookie-Footnote-11028505
+Node: Cached values1028569
+Ref: Cached values-Footnote-11032105
+Node: Array Manipulation1032258
+Ref: Array Manipulation-Footnote-11033349
+Node: Array Data Types1033386
+Ref: Array Data Types-Footnote-11036044
+Node: Array Functions1036136
+Node: Flattening Arrays1040634
+Node: Creating Arrays1047610
+Node: Redirection API1052377
+Node: Extension API Variables1055210
+Node: Extension Versioning1055921
+Ref: gawk-api-version1056350
+Node: Extension GMP/MPFR Versioning1058081
+Node: Extension API Informational Variables1059709
+Node: Extension API Boilerplate1060782
+Node: Changes from API V11064756
+Node: Finding Extensions1066328
+Node: Extension Example1066887
+Node: Internal File Description1067685
+Node: Internal File Ops1071765
+Ref: Internal File Ops-Footnote-11083115
+Node: Using Internal File Ops1083255
+Ref: Using Internal File Ops-Footnote-11085638
+Node: Extension Samples1085912
+Node: Extension Sample File Functions1087441
+Node: Extension Sample Fnmatch1095090
+Node: Extension Sample Fork1096577
+Node: Extension Sample Inplace1097795
+Node: Extension Sample Ord1101012
+Node: Extension Sample Readdir1101848
+Ref: table-readdir-file-types1102737
+Node: Extension Sample Revout1103542
+Node: Extension Sample Rev2way1104131
+Node: Extension Sample Read write array1104871
+Node: Extension Sample Readfile1106813
+Node: Extension Sample Time1107908
+Node: Extension Sample API Tests1109256
+Node: gawkextlib1109748
+Node: Extension summary1112666
+Node: Extension Exercises1116368
+Node: Language History1117866
+Node: V7/SVR3.11119522
+Node: SVR41121674
+Node: POSIX1123108
+Node: BTL1124488
+Node: POSIX/GNU1125217
+Node: Feature History1130995
+Node: Common Extensions1146854
+Node: Ranges and Locales1148137
+Ref: Ranges and Locales-Footnote-11152753
+Ref: Ranges and Locales-Footnote-21152780
+Ref: Ranges and Locales-Footnote-31153015
+Node: Contributors1153236
+Node: History summary1159181
+Node: Installation1160561
+Node: Gawk Distribution1161505
+Node: Getting1161989
+Node: Extracting1162952
+Node: Distribution contents1164590
+Node: Unix Installation1171070
+Node: Quick Installation1171752
+Node: Shell Startup Files1174166
+Node: Additional Configuration Options1175255
+Node: Configuration Philosophy1177548
+Node: Non-Unix Installation1179917
+Node: PC Installation1180377
+Node: PC Binary Installation1181215
+Node: PC Compiling1181650
+Node: PC Using1182767
+Node: Cygwin1185982
+Node: MSYS1187081
+Node: VMS Installation1187582
+Node: VMS Compilation1188373
+Ref: VMS Compilation-Footnote-11189602
+Node: VMS Dynamic Extensions1189660
+Node: VMS Installation Details1191345
+Node: VMS Running1193598
+Node: VMS GNV1197877
+Node: VMS Old Gawk1198612
+Node: Bugs1199083
+Node: Bug address1199746
+Node: Usenet1202538
+Node: Maintainers1203315
+Node: Other Versions1204576
+Node: Installation summary1211338
+Node: Notes1212540
+Node: Compatibility Mode1213405
+Node: Additions1214187
+Node: Accessing The Source1215112
+Node: Adding Code1216549
+Node: New Ports1222768
+Node: Derived Files1227256
+Ref: Derived Files-Footnote-11232902
+Ref: Derived Files-Footnote-21232937
+Ref: Derived Files-Footnote-31233535
+Node: Future Extensions1233649
+Node: Implementation Limitations1234307
+Node: Extension Design1235490
+Node: Old Extension Problems1236644
+Ref: Old Extension Problems-Footnote-11238162
+Node: Extension New Mechanism Goals1238219
+Ref: Extension New Mechanism Goals-Footnote-11241583
+Node: Extension Other Design Decisions1241772
+Node: Extension Future Growth1243885
+Node: Old Extension Mechanism1244721
+Node: Notes summary1246484
+Node: Basic Concepts1247666
+Node: Basic High Level1248347
+Ref: figure-general-flow1248629
+Ref: figure-process-flow1249314
+Ref: Basic High Level-Footnote-11252615
+Node: Basic Data Typing1252800
+Node: Glossary1256128
+Node: Copying1287966
+Node: GNU Free Documentation License1325509
+Node: Index1350629

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 6db3a2af..68ba82dd 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -11025,6 +11025,69 @@ eight-bit ASCII characters, including ASCII @sc{nul} (character code zero).
Other @command{awk}
implementations may have difficulty with some character codes.
+Some languages allow you to continue long strings across
+multiple lines by ending the line with a backslash. For example in C:
+
+@example
+#include <stdio.h>
+
+int main()
+@{
+ printf "hello, \
+world\n");
+ return 0;
+@}
+@end example
+
+@noindent
+In such a case, the C compiler removes both the backslash and the newline,
+producing a string as if it had been typed @samp{"hello, world\n"}.
+This is useful when a single string needs to contain a large amount of text.
+
+The POSIX standard says explicitly that newlines are not allowed inside string
+constants. And indeed, all @command{awk} implementations report an error
+if you try to do so. For example:
+
+@example
+$ @kbd{gawk 'BEGIN @{ print "hello, }
+> @kbd{world" @}'}
+@print{} gawk: cmd. line:1: BEGIN { print "hello,
+@print{} gawk: cmd. line:1: ^ unterminated string
+@print{} gawk: cmd. line:1: BEGIN { print "hello,
+@print{} gawk: cmd. line:1: ^ syntax error
+@end example
+
+@cindex dark corner, string continuation
+@cindex strings, continuation across lines
+@cindex differences in @command{awk} and @command{gawk}, strings
+Although POSIX doesn't define what happens if you use an escaped
+newline, as in the previous C example, all known versions of
+@command{awk} allow you to do so. Unfortunately, what each one
+does with such a string varies. @value{DARKCORNER} @command{gawk},
+@command{mawk}, and the OpenSolaris POSIX @command{awk}
+(@pxref{Other Versions}) elide the backslash and newline, as in C:
+
+@example
+$ @kbd{gawk 'BEGIN @{ print "hello, \}
+> @kbd{world" @}'}
+@print{} hello, world
+@end example
+
+Brian Kernighan's @command{awk} and Busybox @command{awk}
+remove the backslash but leave the newline
+intact, as part of the string:
+
+@example
+$ @kbd{nawk 'BEGIN @{ print "hello, \}
+> @kbd{world" @}'}
+@print{} hello,
+@print{} world
+@end example
+
+In POSIX mode (@pxref{Options}), @command{gawk} does not
+allow escaped newlines. Otherwise, it behaves as
+just described.
+
@node Nondecimal-numbers
@subsubsection Octal and Hexadecimal Numbers
@cindex octal numbers
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index 2a2fdd4b..3ba511ba 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -10511,6 +10511,69 @@ eight-bit ASCII characters, including ASCII @sc{nul} (character code zero).
Other @command{awk}
implementations may have difficulty with some character codes.
+Some languages allow you to continue long strings across
+multiple lines by ending the line with a backslash. For example in C:
+
+@example
+#include <stdio.h>
+
+int main()
+@{
+ printf "hello, \
+world\n");
+ return 0;
+@}
+@end example
+
+@noindent
+In such a case, the C compiler removes both the backslash and the newline,
+producing a string as if it had been typed @samp{"hello, world\n"}.
+This is useful when a single string needs to contain a large amount of text.
+
+The POSIX standard says explicitly that newlines are not allowed inside string
+constants. And indeed, all @command{awk} implementations report an error
+if you try to do so. For example:
+
+@example
+$ @kbd{gawk 'BEGIN @{ print "hello, }
+> @kbd{world" @}'}
+@print{} gawk: cmd. line:1: BEGIN { print "hello,
+@print{} gawk: cmd. line:1: ^ unterminated string
+@print{} gawk: cmd. line:1: BEGIN { print "hello,
+@print{} gawk: cmd. line:1: ^ syntax error
+@end example
+
+@cindex dark corner, string continuation
+@cindex strings, continuation across lines
+@cindex differences in @command{awk} and @command{gawk}, strings
+Although POSIX doesn't define what happens if you use an escaped
+newline, as in the previous C example, all known versions of
+@command{awk} allow you to do so. Unfortunately, what each one
+does with such a string varies. @value{DARKCORNER} @command{gawk},
+@command{mawk}, and the OpenSolaris POSIX @command{awk}
+(@pxref{Other Versions}) elide the backslash and newline, as in C:
+
+@example
+$ @kbd{gawk 'BEGIN @{ print "hello, \}
+> @kbd{world" @}'}
+@print{} hello, world
+@end example
+
+Brian Kernighan's @command{awk} and Busybox @command{awk}
+remove the backslash but leave the newline
+intact, as part of the string:
+
+@example
+$ @kbd{nawk 'BEGIN @{ print "hello, \}
+> @kbd{world" @}'}
+@print{} hello,
+@print{} world
+@end example
+
+In POSIX mode (@pxref{Options}), @command{gawk} does not
+allow escaped newlines. Otherwise, it behaves as
+just described.
+
@node Nondecimal-numbers
@subsubsection Octal and Hexadecimal Numbers
@cindex octal numbers
diff --git a/main.c b/main.c
index 25a628ba..4e94bddd 100644
--- a/main.c
+++ b/main.c
@@ -1165,11 +1165,18 @@ arg_assign(char *arg, bool initing)
fatal(_("cannot use function `%s' as variable name"), arg);
}
+ // POSIX disallows any newlines inside strings
+ // The scanner handles that for program files.
+ // We have to check here for strings passed to -v.
+ if (do_posix && strchr(cp, '\n') != NULL)
+ fatal(_("POSIX does not allow physical newlines in string values"));
+
/*
* BWK awk expands escapes inside assignments.
* This makes sense, so we do it too.
+ * In addition, remove \-<newline> as in scanning.
*/
- it = make_str_node(cp, strlen(cp), SCAN);
+ it = make_str_node(cp, strlen(cp), SCAN | ELIDE_BACK_NL);
it->flags |= USER_INPUT;
#ifdef LC_NUMERIC
/*
diff --git a/node.c b/node.c
index 54ea6627..4ad41ef1 100644
--- a/node.c
+++ b/node.c
@@ -451,6 +451,8 @@ make_str_node(const char *s, size_t len, int flags)
if (c < 0) {
if (do_lint)
lintwarn(_("backslash at end of string"));
+ if ((flags & ELIDE_BACK_NL) != 0)
+ continue;
c = '\\';
}
*ptm++ = c;