diff options
-rw-r--r-- | ChangeLog | 38 | ||||
-rw-r--r-- | NEWS | 20 | ||||
-rw-r--r-- | awk.h | 3 | ||||
-rw-r--r-- | awkgram.c | 742 | ||||
-rw-r--r-- | awkgram.y | 24 | ||||
-rw-r--r-- | awklib/eg/lib/assert.awk | 2 | ||||
-rw-r--r-- | awklib/eg/lib/bits2str.awk | 2 | ||||
-rw-r--r-- | awklib/eg/lib/quicksort.awk | 5 | ||||
-rw-r--r-- | awklib/eg/prog/anagram.awk | 6 | ||||
-rw-r--r-- | awklib/eg/prog/extract.awk | 2 | ||||
-rw-r--r-- | awklib/eg/prog/translate.awk | 2 | ||||
-rw-r--r-- | doc/ChangeLog | 37 | ||||
-rw-r--r-- | doc/gawk.info | 1910 | ||||
-rw-r--r-- | doc/gawk.texi | 687 | ||||
-rw-r--r-- | doc/gawktexi.in | 680 | ||||
-rw-r--r-- | eval.c | 6 | ||||
-rw-r--r-- | gawkapi.c | 2 | ||||
-rw-r--r-- | main.c | 4 | ||||
-rw-r--r-- | symbol.c | 63 | ||||
-rw-r--r-- | test/ChangeLog | 13 | ||||
-rw-r--r-- | test/Makefile.am | 18 | ||||
-rw-r--r-- | test/Makefile.in | 24 | ||||
-rw-r--r-- | test/Maketests | 5 | ||||
-rw-r--r-- | test/callparam.awk | 6 | ||||
-rw-r--r-- | test/callparam.ok | 2 | ||||
-rwxr-xr-x | test/exit.sh | 2 | ||||
-rw-r--r-- | test/paramasfunc1.awk | 9 | ||||
-rw-r--r-- | test/paramasfunc1.ok | 3 | ||||
-rw-r--r-- | test/paramasfunc2.awk | 10 | ||||
-rw-r--r-- | test/paramasfunc2.ok | 3 |
30 files changed, 2396 insertions, 1934 deletions
@@ -1,3 +1,41 @@ +2015-02-05 Andrew J. Schorr <aschorr@telemetry-investments.com> + + * eval.c (set_IGNORECASE): If IGNORECASE has a numeric value, try + using that before treating it as a string. This fixes a problem + where setting -v IGNORECASE=0 on the command line was not working + properly. + +2015-02-01 Arnold D. Robbins <arnold@skeeve.com> + + Move POSIX requirement for disallowing paramater names with the + same name as a function into --posix. + + * NEWS: Document it. + * awkgram.y (parse_program): Check do_posix before calling + check_param_names(). + * symbol.c (check_param_names): Set up a fake node and call + in_array() for function parameter names instead of linear + searching the function list a second time. Thanks to Andrew + Schorr for the motivation. + +2015-01-30 Arnold D. Robbins <arnold@skeeve.com> + + Don't allow function parameter names to be the same as function + names - required by POSIX. Bug first reported in comp.lang.awk. + + In addition, don't allow use of a parameter as a function name + in a call (but it's ok in indirect calls). + + * NEWS: Updated. + * awk.h (check_param_names): Add declaration. + * awkgram.y (at_seen): New variable. Communicates between + yylex() and the parser. + (FUNC_CALL production): Check at_seen and check that the identifier + is a function name. + (parse_program): Call check_param_names() and set errcount. + (yylex): Set at_seen after seeing an at-sign. + * symbol.c (check_param_names): New function. + 2015-01-24 Arnold D. Robbins <arnold@skeeve.com> Infrastructure updates. @@ -53,6 +53,7 @@ Changes from 4.1.1 to 4.1.2 - Chapter 15 on MPFR reworked. - Summary sections added to all chapters. - Exercises added in several chapters. + - Heavily proof-read and copyedited. 2. The debugger's "restart" command now works again. @@ -60,14 +61,14 @@ Changes from 4.1.1 to 4.1.2 4. A number of bugs have been fixed in the MPFR code. -5. Indirect function calls now work for both built-in and - extension functions. +5. Indirect function calls now work for both built-in and extension functions. 6. Built-in functions are now included in FUNCTAB. -7. In non-English locales, it was accidentally possible to use "letters" - beside those of the English alphabet in identifiers. This has - been fixed. (isalpha and isalnum are NOT our friends.) +7. POSIX and historical practice require the exclusive use of the English + alphabet in identifiers. In non-English locales, it was accidentally + possible to use "letters" beside those of the English alphabet. This + has been fixed. (isalpha and isalnum are NOT our friends.) If you feel that you must have this misfeature, use `configure --help' to see what option to use when configuring gawk to reenable it. @@ -84,6 +85,15 @@ Changes from 4.1.1 to 4.1.2 10. Infrastructure upgrades: Automake 1.15, Gettext 0.19.4, Libtool 2.4.5, Bison 3.0.4. +11. If a user-defined function has a parameter with the same name as another + user-defined function, it is no longer possible to call the second + function from inside the first. + +12. POSIX requires that the names of function parameters not be the + same as any of the special built-in variables and also not conflict + with the names of any functions. Gawk has checked for the former + since 3.1.7. With --posix, it now also checks for the latter. + XX. A number of bugs have been fixed. See the ChangeLog. Changes from 4.1.0 to 4.1.1 @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-2014 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-2015 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -1616,6 +1616,7 @@ extern void free_context(AWK_CONTEXT *ctxt, bool keep_globals); extern NODE **variable_list(); extern NODE **function_list(bool sort); extern void print_vars(NODE **table, Func_print print_func, FILE *fp); +extern bool check_param_names(void); /* floatcomp.c */ #ifdef HAVE_UINTMAX_T @@ -127,6 +127,7 @@ static int one_line_close(int fd); static void split_comment(void); static void check_comment(void); +static bool at_seen = false; static bool want_source = false; static bool want_regexp = false; /* lexical scanning kludge */ static char *in_function; /* parsing kludge */ @@ -201,7 +202,7 @@ extern double fmod(double x, double y); #define YYSTYPE INSTRUCTION * -#line 205 "awkgram.c" /* yacc.c:339 */ +#line 206 "awkgram.c" /* yacc.c:339 */ # ifndef YY_NULLPTR # if defined __cplusplus && 201103L <= __cplusplus @@ -355,7 +356,7 @@ int yyparse (void); /* Copy the second part of user declarations. */ -#line 359 "awkgram.c" /* yacc.c:358 */ +#line 360 "awkgram.c" /* yacc.c:358 */ #ifdef short # undef short @@ -657,25 +658,25 @@ static const yytype_uint8 yytranslate[] = /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ static const yytype_uint16 yyrline[] = { - 0, 204, 204, 206, 211, 212, 216, 228, 233, 244, - 250, 255, 263, 271, 273, 278, 286, 288, 294, 302, - 312, 342, 356, 370, 378, 389, 401, 403, 405, 411, - 416, 417, 421, 456, 455, 489, 491, 496, 502, 530, - 535, 536, 540, 542, 544, 551, 641, 683, 725, 838, - 845, 852, 862, 871, 880, 889, 900, 916, 915, 939, - 951, 951, 1049, 1049, 1082, 1112, 1118, 1119, 1125, 1126, - 1133, 1138, 1150, 1164, 1166, 1174, 1179, 1181, 1189, 1191, - 1200, 1201, 1209, 1214, 1214, 1225, 1229, 1237, 1238, 1241, - 1243, 1248, 1249, 1258, 1259, 1264, 1269, 1275, 1277, 1279, - 1286, 1287, 1293, 1294, 1299, 1301, 1306, 1308, 1316, 1321, - 1330, 1337, 1339, 1341, 1357, 1367, 1374, 1376, 1381, 1383, - 1385, 1393, 1395, 1400, 1402, 1407, 1409, 1411, 1461, 1463, - 1465, 1467, 1469, 1471, 1473, 1475, 1489, 1494, 1499, 1524, - 1530, 1532, 1534, 1536, 1538, 1540, 1545, 1549, 1581, 1583, - 1589, 1595, 1608, 1609, 1610, 1615, 1620, 1624, 1628, 1643, - 1656, 1661, 1697, 1715, 1716, 1722, 1723, 1728, 1730, 1737, - 1754, 1771, 1773, 1780, 1785, 1793, 1803, 1815, 1824, 1828, - 1832, 1836, 1840, 1844, 1847, 1849, 1853, 1857, 1861 + 0, 205, 205, 207, 212, 213, 217, 229, 234, 245, + 251, 257, 266, 274, 276, 281, 289, 291, 297, 305, + 315, 345, 359, 373, 381, 392, 404, 406, 408, 414, + 422, 423, 427, 462, 461, 495, 497, 502, 508, 536, + 541, 542, 546, 548, 550, 557, 647, 689, 731, 844, + 851, 858, 868, 877, 886, 895, 906, 922, 921, 945, + 957, 957, 1055, 1055, 1088, 1118, 1124, 1125, 1131, 1132, + 1139, 1144, 1156, 1170, 1172, 1180, 1185, 1187, 1195, 1197, + 1206, 1207, 1215, 1220, 1220, 1231, 1235, 1243, 1244, 1247, + 1249, 1254, 1255, 1264, 1265, 1270, 1275, 1281, 1283, 1285, + 1292, 1293, 1299, 1300, 1305, 1307, 1312, 1314, 1322, 1327, + 1336, 1343, 1345, 1347, 1363, 1373, 1380, 1382, 1387, 1389, + 1391, 1399, 1401, 1406, 1408, 1413, 1415, 1417, 1467, 1469, + 1471, 1473, 1475, 1477, 1479, 1481, 1495, 1500, 1505, 1530, + 1536, 1538, 1540, 1542, 1544, 1546, 1551, 1555, 1587, 1589, + 1595, 1601, 1614, 1615, 1616, 1621, 1626, 1630, 1634, 1649, + 1662, 1667, 1704, 1733, 1734, 1740, 1741, 1746, 1748, 1755, + 1772, 1789, 1791, 1798, 1803, 1811, 1821, 1833, 1842, 1846, + 1850, 1854, 1858, 1862, 1865, 1867, 1871, 1875, 1879 }; #endif @@ -1848,24 +1849,24 @@ yyreduce: switch (yyn) { case 3: -#line 207 "awkgram.y" /* yacc.c:1646 */ +#line 208 "awkgram.y" /* yacc.c:1646 */ { rule = 0; yyerrok; } -#line 1857 "awkgram.c" /* yacc.c:1646 */ +#line 1858 "awkgram.c" /* yacc.c:1646 */ break; case 5: -#line 213 "awkgram.y" /* yacc.c:1646 */ +#line 214 "awkgram.y" /* yacc.c:1646 */ { next_sourcefile(); } -#line 1865 "awkgram.c" /* yacc.c:1646 */ +#line 1866 "awkgram.c" /* yacc.c:1646 */ break; case 6: -#line 217 "awkgram.y" /* yacc.c:1646 */ +#line 218 "awkgram.y" /* yacc.c:1646 */ { rule = 0; /* @@ -1874,20 +1875,20 @@ yyreduce: */ /* yyerrok; */ } -#line 1878 "awkgram.c" /* yacc.c:1646 */ +#line 1879 "awkgram.c" /* yacc.c:1646 */ break; case 7: -#line 229 "awkgram.y" /* yacc.c:1646 */ +#line 230 "awkgram.y" /* yacc.c:1646 */ { (void) append_rule((yyvsp[-1]), (yyvsp[0])); first_rule = false; } -#line 1887 "awkgram.c" /* yacc.c:1646 */ +#line 1888 "awkgram.c" /* yacc.c:1646 */ break; case 8: -#line 234 "awkgram.y" /* yacc.c:1646 */ +#line 235 "awkgram.y" /* yacc.c:1646 */ { if (rule != Rule) { msg(_("%s blocks must have an action part"), ruletab[rule]); @@ -1898,39 +1899,41 @@ yyreduce: } else /* pattern rule with non-empty pattern */ (void) append_rule((yyvsp[-1]), NULL); } -#line 1902 "awkgram.c" /* yacc.c:1646 */ +#line 1903 "awkgram.c" /* yacc.c:1646 */ break; case 9: -#line 245 "awkgram.y" /* yacc.c:1646 */ +#line 246 "awkgram.y" /* yacc.c:1646 */ { in_function = NULL; (void) mk_function((yyvsp[-1]), (yyvsp[0])); yyerrok; } -#line 1912 "awkgram.c" /* yacc.c:1646 */ +#line 1913 "awkgram.c" /* yacc.c:1646 */ break; case 10: -#line 251 "awkgram.y" /* yacc.c:1646 */ +#line 252 "awkgram.y" /* yacc.c:1646 */ { want_source = false; + at_seen = false; yyerrok; } -#line 1921 "awkgram.c" /* yacc.c:1646 */ +#line 1923 "awkgram.c" /* yacc.c:1646 */ break; case 11: -#line 256 "awkgram.y" /* yacc.c:1646 */ +#line 258 "awkgram.y" /* yacc.c:1646 */ { want_source = false; + at_seen = false; yyerrok; } -#line 1930 "awkgram.c" /* yacc.c:1646 */ +#line 1933 "awkgram.c" /* yacc.c:1646 */ break; case 12: -#line 264 "awkgram.y" /* yacc.c:1646 */ +#line 267 "awkgram.y" /* yacc.c:1646 */ { if (include_source((yyvsp[0])) < 0) YYABORT; @@ -1938,23 +1941,23 @@ yyreduce: bcfree((yyvsp[0])); (yyval) = NULL; } -#line 1942 "awkgram.c" /* yacc.c:1646 */ +#line 1945 "awkgram.c" /* yacc.c:1646 */ break; case 13: -#line 272 "awkgram.y" /* yacc.c:1646 */ +#line 275 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 1948 "awkgram.c" /* yacc.c:1646 */ +#line 1951 "awkgram.c" /* yacc.c:1646 */ break; case 14: -#line 274 "awkgram.y" /* yacc.c:1646 */ +#line 277 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 1954 "awkgram.c" /* yacc.c:1646 */ +#line 1957 "awkgram.c" /* yacc.c:1646 */ break; case 15: -#line 279 "awkgram.y" /* yacc.c:1646 */ +#line 282 "awkgram.y" /* yacc.c:1646 */ { if (load_library((yyvsp[0])) < 0) YYABORT; @@ -1962,23 +1965,23 @@ yyreduce: bcfree((yyvsp[0])); (yyval) = NULL; } -#line 1966 "awkgram.c" /* yacc.c:1646 */ +#line 1969 "awkgram.c" /* yacc.c:1646 */ break; case 16: -#line 287 "awkgram.y" /* yacc.c:1646 */ +#line 290 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 1972 "awkgram.c" /* yacc.c:1646 */ +#line 1975 "awkgram.c" /* yacc.c:1646 */ break; case 17: -#line 289 "awkgram.y" /* yacc.c:1646 */ +#line 292 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 1978 "awkgram.c" /* yacc.c:1646 */ +#line 1981 "awkgram.c" /* yacc.c:1646 */ break; case 18: -#line 294 "awkgram.y" /* yacc.c:1646 */ +#line 297 "awkgram.y" /* yacc.c:1646 */ { rule = Rule; if (comment != NULL) { @@ -1987,11 +1990,11 @@ yyreduce: } else (yyval) = NULL; } -#line 1991 "awkgram.c" /* yacc.c:1646 */ +#line 1994 "awkgram.c" /* yacc.c:1646 */ break; case 19: -#line 303 "awkgram.y" /* yacc.c:1646 */ +#line 306 "awkgram.y" /* yacc.c:1646 */ { rule = Rule; if (comment != NULL) { @@ -2000,11 +2003,11 @@ yyreduce: } else (yyval) = (yyvsp[0]); } -#line 2004 "awkgram.c" /* yacc.c:1646 */ +#line 2007 "awkgram.c" /* yacc.c:1646 */ break; case 20: -#line 313 "awkgram.y" /* yacc.c:1646 */ +#line 316 "awkgram.y" /* yacc.c:1646 */ { INSTRUCTION *tp; @@ -2034,11 +2037,11 @@ yyreduce: (yyval) = list_append(list_merge((yyvsp[-3]), (yyvsp[0])), tp); rule = Rule; } -#line 2038 "awkgram.c" /* yacc.c:1646 */ +#line 2041 "awkgram.c" /* yacc.c:1646 */ break; case 21: -#line 343 "awkgram.y" /* yacc.c:1646 */ +#line 346 "awkgram.y" /* yacc.c:1646 */ { static int begin_seen = 0; @@ -2052,11 +2055,11 @@ yyreduce: check_comment(); (yyval) = (yyvsp[0]); } -#line 2056 "awkgram.c" /* yacc.c:1646 */ +#line 2059 "awkgram.c" /* yacc.c:1646 */ break; case 22: -#line 357 "awkgram.y" /* yacc.c:1646 */ +#line 360 "awkgram.y" /* yacc.c:1646 */ { static int end_seen = 0; @@ -2070,11 +2073,11 @@ yyreduce: check_comment(); (yyval) = (yyvsp[0]); } -#line 2074 "awkgram.c" /* yacc.c:1646 */ +#line 2077 "awkgram.c" /* yacc.c:1646 */ break; case 23: -#line 371 "awkgram.y" /* yacc.c:1646 */ +#line 374 "awkgram.y" /* yacc.c:1646 */ { func_first = false; (yyvsp[0])->in_rule = rule = BEGINFILE; @@ -2082,11 +2085,11 @@ yyreduce: check_comment(); (yyval) = (yyvsp[0]); } -#line 2086 "awkgram.c" /* yacc.c:1646 */ +#line 2089 "awkgram.c" /* yacc.c:1646 */ break; case 24: -#line 379 "awkgram.y" /* yacc.c:1646 */ +#line 382 "awkgram.y" /* yacc.c:1646 */ { func_first = false; (yyvsp[0])->in_rule = rule = ENDFILE; @@ -2094,11 +2097,11 @@ yyreduce: check_comment(); (yyval) = (yyvsp[0]); } -#line 2098 "awkgram.c" /* yacc.c:1646 */ +#line 2101 "awkgram.c" /* yacc.c:1646 */ break; case 25: -#line 390 "awkgram.y" /* yacc.c:1646 */ +#line 393 "awkgram.y" /* yacc.c:1646 */ { INSTRUCTION *ip; if ((yyvsp[-3]) == NULL) @@ -2107,39 +2110,42 @@ yyreduce: ip = (yyvsp[-3]); (yyval) = ip; } -#line 2111 "awkgram.c" /* yacc.c:1646 */ +#line 2114 "awkgram.c" /* yacc.c:1646 */ break; case 26: -#line 402 "awkgram.y" /* yacc.c:1646 */ +#line 405 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 2117 "awkgram.c" /* yacc.c:1646 */ +#line 2120 "awkgram.c" /* yacc.c:1646 */ break; case 27: -#line 404 "awkgram.y" /* yacc.c:1646 */ +#line 407 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 2123 "awkgram.c" /* yacc.c:1646 */ +#line 2126 "awkgram.c" /* yacc.c:1646 */ break; case 28: -#line 406 "awkgram.y" /* yacc.c:1646 */ +#line 409 "awkgram.y" /* yacc.c:1646 */ { yyerror(_("`%s' is a built-in function, it cannot be redefined"), tokstart); YYABORT; } -#line 2133 "awkgram.c" /* yacc.c:1646 */ +#line 2136 "awkgram.c" /* yacc.c:1646 */ break; case 29: -#line 412 "awkgram.y" /* yacc.c:1646 */ - { (yyval) = (yyvsp[0]); } -#line 2139 "awkgram.c" /* yacc.c:1646 */ +#line 415 "awkgram.y" /* yacc.c:1646 */ + { + (yyval) = (yyvsp[0]); + at_seen = false; + } +#line 2145 "awkgram.c" /* yacc.c:1646 */ break; case 32: -#line 422 "awkgram.y" /* yacc.c:1646 */ +#line 428 "awkgram.y" /* yacc.c:1646 */ { /* * treat any comments between BOF and the first function @@ -2166,17 +2172,17 @@ yyreduce: /* $4 already free'd in install_function */ (yyval) = (yyvsp[-5]); } -#line 2170 "awkgram.c" /* yacc.c:1646 */ +#line 2176 "awkgram.c" /* yacc.c:1646 */ break; case 33: -#line 456 "awkgram.y" /* yacc.c:1646 */ +#line 462 "awkgram.y" /* yacc.c:1646 */ { want_regexp = true; } -#line 2176 "awkgram.c" /* yacc.c:1646 */ +#line 2182 "awkgram.c" /* yacc.c:1646 */ break; case 34: -#line 458 "awkgram.y" /* yacc.c:1646 */ +#line 464 "awkgram.y" /* yacc.c:1646 */ { NODE *n, *exp; char *re; @@ -2205,28 +2211,28 @@ yyreduce: (yyval)->opcode = Op_match_rec; (yyval)->memory = n; } -#line 2209 "awkgram.c" /* yacc.c:1646 */ +#line 2215 "awkgram.c" /* yacc.c:1646 */ break; case 35: -#line 490 "awkgram.y" /* yacc.c:1646 */ +#line 496 "awkgram.y" /* yacc.c:1646 */ { bcfree((yyvsp[0])); } -#line 2215 "awkgram.c" /* yacc.c:1646 */ +#line 2221 "awkgram.c" /* yacc.c:1646 */ break; case 37: -#line 496 "awkgram.y" /* yacc.c:1646 */ +#line 502 "awkgram.y" /* yacc.c:1646 */ { if (comment != NULL) { (yyval) = list_create(comment); comment = NULL; } else (yyval) = NULL; } -#line 2226 "awkgram.c" /* yacc.c:1646 */ +#line 2232 "awkgram.c" /* yacc.c:1646 */ break; case 38: -#line 503 "awkgram.y" /* yacc.c:1646 */ +#line 509 "awkgram.y" /* yacc.c:1646 */ { if ((yyvsp[0]) == NULL) { if (comment == NULL) @@ -2254,40 +2260,40 @@ yyreduce: } yyerrok; } -#line 2258 "awkgram.c" /* yacc.c:1646 */ +#line 2264 "awkgram.c" /* yacc.c:1646 */ break; case 39: -#line 531 "awkgram.y" /* yacc.c:1646 */ +#line 537 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 2264 "awkgram.c" /* yacc.c:1646 */ +#line 2270 "awkgram.c" /* yacc.c:1646 */ break; case 42: -#line 541 "awkgram.y" /* yacc.c:1646 */ +#line 547 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 2270 "awkgram.c" /* yacc.c:1646 */ +#line 2276 "awkgram.c" /* yacc.c:1646 */ break; case 43: -#line 543 "awkgram.y" /* yacc.c:1646 */ +#line 549 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[-1]); } -#line 2276 "awkgram.c" /* yacc.c:1646 */ +#line 2282 "awkgram.c" /* yacc.c:1646 */ break; case 44: -#line 545 "awkgram.y" /* yacc.c:1646 */ +#line 551 "awkgram.y" /* yacc.c:1646 */ { if (do_pretty_print) (yyval) = list_prepend((yyvsp[0]), instruction(Op_exec_count)); else (yyval) = (yyvsp[0]); } -#line 2287 "awkgram.c" /* yacc.c:1646 */ +#line 2293 "awkgram.c" /* yacc.c:1646 */ break; case 45: -#line 552 "awkgram.y" /* yacc.c:1646 */ +#line 558 "awkgram.y" /* yacc.c:1646 */ { INSTRUCTION *dflt, *curr = NULL, *cexp, *cstmt; INSTRUCTION *ip, *nextc, *tbreak; @@ -2377,11 +2383,11 @@ yyreduce: break_allowed--; fix_break_continue(ip, tbreak, NULL); } -#line 2381 "awkgram.c" /* yacc.c:1646 */ +#line 2387 "awkgram.c" /* yacc.c:1646 */ break; case 46: -#line 642 "awkgram.y" /* yacc.c:1646 */ +#line 648 "awkgram.y" /* yacc.c:1646 */ { /* * ----------------- @@ -2423,11 +2429,11 @@ yyreduce: continue_allowed--; fix_break_continue(ip, tbreak, tcont); } -#line 2427 "awkgram.c" /* yacc.c:1646 */ +#line 2433 "awkgram.c" /* yacc.c:1646 */ break; case 47: -#line 684 "awkgram.y" /* yacc.c:1646 */ +#line 690 "awkgram.y" /* yacc.c:1646 */ { /* * ----------------- @@ -2469,11 +2475,11 @@ yyreduce: } /* else $1 and $4 are NULLs */ } -#line 2473 "awkgram.c" /* yacc.c:1646 */ +#line 2479 "awkgram.c" /* yacc.c:1646 */ break; case 48: -#line 726 "awkgram.y" /* yacc.c:1646 */ +#line 732 "awkgram.y" /* yacc.c:1646 */ { INSTRUCTION *ip; char *var_name = (yyvsp[-5])->lextok; @@ -2586,44 +2592,44 @@ regular_loop: break_allowed--; continue_allowed--; } -#line 2590 "awkgram.c" /* yacc.c:1646 */ +#line 2596 "awkgram.c" /* yacc.c:1646 */ break; case 49: -#line 839 "awkgram.y" /* yacc.c:1646 */ +#line 845 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_for_loop((yyvsp[-11]), (yyvsp[-9]), (yyvsp[-6]), (yyvsp[-3]), (yyvsp[0])); break_allowed--; continue_allowed--; } -#line 2601 "awkgram.c" /* yacc.c:1646 */ +#line 2607 "awkgram.c" /* yacc.c:1646 */ break; case 50: -#line 846 "awkgram.y" /* yacc.c:1646 */ +#line 852 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_for_loop((yyvsp[-10]), (yyvsp[-8]), (INSTRUCTION *) NULL, (yyvsp[-3]), (yyvsp[0])); break_allowed--; continue_allowed--; } -#line 2612 "awkgram.c" /* yacc.c:1646 */ +#line 2618 "awkgram.c" /* yacc.c:1646 */ break; case 51: -#line 853 "awkgram.y" /* yacc.c:1646 */ +#line 859 "awkgram.y" /* yacc.c:1646 */ { if (do_pretty_print) (yyval) = list_prepend((yyvsp[0]), instruction(Op_exec_count)); else (yyval) = (yyvsp[0]); } -#line 2623 "awkgram.c" /* yacc.c:1646 */ +#line 2629 "awkgram.c" /* yacc.c:1646 */ break; case 52: -#line 863 "awkgram.y" /* yacc.c:1646 */ +#line 869 "awkgram.y" /* yacc.c:1646 */ { if (! break_allowed) error_ln((yyvsp[-1])->source_line, @@ -2632,11 +2638,11 @@ regular_loop: (yyval) = list_create((yyvsp[-1])); } -#line 2636 "awkgram.c" /* yacc.c:1646 */ +#line 2642 "awkgram.c" /* yacc.c:1646 */ break; case 53: -#line 872 "awkgram.y" /* yacc.c:1646 */ +#line 878 "awkgram.y" /* yacc.c:1646 */ { if (! continue_allowed) error_ln((yyvsp[-1])->source_line, @@ -2645,11 +2651,11 @@ regular_loop: (yyval) = list_create((yyvsp[-1])); } -#line 2649 "awkgram.c" /* yacc.c:1646 */ +#line 2655 "awkgram.c" /* yacc.c:1646 */ break; case 54: -#line 881 "awkgram.y" /* yacc.c:1646 */ +#line 887 "awkgram.y" /* yacc.c:1646 */ { /* if inside function (rule = 0), resolve context at run-time */ if (rule && rule != Rule) @@ -2658,11 +2664,11 @@ regular_loop: (yyvsp[-1])->target_jmp = ip_rec; (yyval) = list_create((yyvsp[-1])); } -#line 2662 "awkgram.c" /* yacc.c:1646 */ +#line 2668 "awkgram.c" /* yacc.c:1646 */ break; case 55: -#line 890 "awkgram.y" /* yacc.c:1646 */ +#line 896 "awkgram.y" /* yacc.c:1646 */ { /* if inside function (rule = 0), resolve context at run-time */ if (rule == BEGIN || rule == END || rule == ENDFILE) @@ -2673,11 +2679,11 @@ regular_loop: (yyvsp[-1])->target_endfile = ip_endfile; (yyval) = list_create((yyvsp[-1])); } -#line 2677 "awkgram.c" /* yacc.c:1646 */ +#line 2683 "awkgram.c" /* yacc.c:1646 */ break; case 56: -#line 901 "awkgram.y" /* yacc.c:1646 */ +#line 907 "awkgram.y" /* yacc.c:1646 */ { /* Initialize the two possible jump targets, the actual target * is resolved at run-time. @@ -2692,20 +2698,20 @@ regular_loop: } else (yyval) = list_append((yyvsp[-1]), (yyvsp[-2])); } -#line 2696 "awkgram.c" /* yacc.c:1646 */ +#line 2702 "awkgram.c" /* yacc.c:1646 */ break; case 57: -#line 916 "awkgram.y" /* yacc.c:1646 */ +#line 922 "awkgram.y" /* yacc.c:1646 */ { if (! in_function) yyerror(_("`return' used outside function context")); } -#line 2705 "awkgram.c" /* yacc.c:1646 */ +#line 2711 "awkgram.c" /* yacc.c:1646 */ break; case 58: -#line 919 "awkgram.y" /* yacc.c:1646 */ +#line 925 "awkgram.y" /* yacc.c:1646 */ { if ((yyvsp[-1]) == NULL) { (yyval) = list_create((yyvsp[-3])); @@ -2726,17 +2732,17 @@ regular_loop: (yyval) = list_append((yyvsp[-1]), (yyvsp[-3])); } } -#line 2730 "awkgram.c" /* yacc.c:1646 */ +#line 2736 "awkgram.c" /* yacc.c:1646 */ break; case 60: -#line 951 "awkgram.y" /* yacc.c:1646 */ +#line 957 "awkgram.y" /* yacc.c:1646 */ { in_print = true; in_parens = 0; } -#line 2736 "awkgram.c" /* yacc.c:1646 */ +#line 2742 "awkgram.c" /* yacc.c:1646 */ break; case 61: -#line 952 "awkgram.y" /* yacc.c:1646 */ +#line 958 "awkgram.y" /* yacc.c:1646 */ { /* * Optimization: plain `print' has no expression list, so $3 is null. @@ -2833,17 +2839,17 @@ regular_print: } } } -#line 2837 "awkgram.c" /* yacc.c:1646 */ +#line 2843 "awkgram.c" /* yacc.c:1646 */ break; case 62: -#line 1049 "awkgram.y" /* yacc.c:1646 */ +#line 1055 "awkgram.y" /* yacc.c:1646 */ { sub_counter = 0; } -#line 2843 "awkgram.c" /* yacc.c:1646 */ +#line 2849 "awkgram.c" /* yacc.c:1646 */ break; case 63: -#line 1050 "awkgram.y" /* yacc.c:1646 */ +#line 1056 "awkgram.y" /* yacc.c:1646 */ { char *arr = (yyvsp[-2])->lextok; @@ -2876,11 +2882,11 @@ regular_print: (yyval) = list_append(list_append((yyvsp[0]), (yyvsp[-2])), (yyvsp[-3])); } } -#line 2880 "awkgram.c" /* yacc.c:1646 */ +#line 2886 "awkgram.c" /* yacc.c:1646 */ break; case 64: -#line 1087 "awkgram.y" /* yacc.c:1646 */ +#line 1093 "awkgram.y" /* yacc.c:1646 */ { static bool warned = false; char *arr = (yyvsp[-1])->lextok; @@ -2906,52 +2912,52 @@ regular_print: fatal(_("`delete' is not allowed with FUNCTAB")); } } -#line 2910 "awkgram.c" /* yacc.c:1646 */ +#line 2916 "awkgram.c" /* yacc.c:1646 */ break; case 65: -#line 1113 "awkgram.y" /* yacc.c:1646 */ +#line 1119 "awkgram.y" /* yacc.c:1646 */ { (yyval) = optimize_assignment((yyvsp[0])); } -#line 2916 "awkgram.c" /* yacc.c:1646 */ +#line 2922 "awkgram.c" /* yacc.c:1646 */ break; case 66: -#line 1118 "awkgram.y" /* yacc.c:1646 */ +#line 1124 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 2922 "awkgram.c" /* yacc.c:1646 */ +#line 2928 "awkgram.c" /* yacc.c:1646 */ break; case 67: -#line 1120 "awkgram.y" /* yacc.c:1646 */ +#line 1126 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 2928 "awkgram.c" /* yacc.c:1646 */ +#line 2934 "awkgram.c" /* yacc.c:1646 */ break; case 68: -#line 1125 "awkgram.y" /* yacc.c:1646 */ +#line 1131 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 2934 "awkgram.c" /* yacc.c:1646 */ +#line 2940 "awkgram.c" /* yacc.c:1646 */ break; case 69: -#line 1127 "awkgram.y" /* yacc.c:1646 */ +#line 1133 "awkgram.y" /* yacc.c:1646 */ { if ((yyvsp[-1]) == NULL) (yyval) = list_create((yyvsp[0])); else (yyval) = list_prepend((yyvsp[-1]), (yyvsp[0])); } -#line 2945 "awkgram.c" /* yacc.c:1646 */ +#line 2951 "awkgram.c" /* yacc.c:1646 */ break; case 70: -#line 1134 "awkgram.y" /* yacc.c:1646 */ +#line 1140 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 2951 "awkgram.c" /* yacc.c:1646 */ +#line 2957 "awkgram.c" /* yacc.c:1646 */ break; case 71: -#line 1139 "awkgram.y" /* yacc.c:1646 */ +#line 1145 "awkgram.y" /* yacc.c:1646 */ { INSTRUCTION *casestmt = (yyvsp[0]); if ((yyvsp[0]) == NULL) @@ -2963,11 +2969,11 @@ regular_print: bcfree((yyvsp[-2])); (yyval) = (yyvsp[-4]); } -#line 2967 "awkgram.c" /* yacc.c:1646 */ +#line 2973 "awkgram.c" /* yacc.c:1646 */ break; case 72: -#line 1151 "awkgram.y" /* yacc.c:1646 */ +#line 1157 "awkgram.y" /* yacc.c:1646 */ { INSTRUCTION *casestmt = (yyvsp[0]); if ((yyvsp[0]) == NULL) @@ -2978,17 +2984,17 @@ regular_print: (yyvsp[-3])->case_stmt = casestmt; (yyval) = (yyvsp[-3]); } -#line 2982 "awkgram.c" /* yacc.c:1646 */ +#line 2988 "awkgram.c" /* yacc.c:1646 */ break; case 73: -#line 1165 "awkgram.y" /* yacc.c:1646 */ +#line 1171 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 2988 "awkgram.c" /* yacc.c:1646 */ +#line 2994 "awkgram.c" /* yacc.c:1646 */ break; case 74: -#line 1167 "awkgram.y" /* yacc.c:1646 */ +#line 1173 "awkgram.y" /* yacc.c:1646 */ { NODE *n = (yyvsp[0])->memory; (void) force_number(n); @@ -2996,71 +3002,71 @@ regular_print: bcfree((yyvsp[-1])); (yyval) = (yyvsp[0]); } -#line 3000 "awkgram.c" /* yacc.c:1646 */ +#line 3006 "awkgram.c" /* yacc.c:1646 */ break; case 75: -#line 1175 "awkgram.y" /* yacc.c:1646 */ +#line 1181 "awkgram.y" /* yacc.c:1646 */ { bcfree((yyvsp[-1])); (yyval) = (yyvsp[0]); } -#line 3009 "awkgram.c" /* yacc.c:1646 */ +#line 3015 "awkgram.c" /* yacc.c:1646 */ break; case 76: -#line 1180 "awkgram.y" /* yacc.c:1646 */ +#line 1186 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3015 "awkgram.c" /* yacc.c:1646 */ +#line 3021 "awkgram.c" /* yacc.c:1646 */ break; case 77: -#line 1182 "awkgram.y" /* yacc.c:1646 */ +#line 1188 "awkgram.y" /* yacc.c:1646 */ { (yyvsp[0])->opcode = Op_push_re; (yyval) = (yyvsp[0]); } -#line 3024 "awkgram.c" /* yacc.c:1646 */ +#line 3030 "awkgram.c" /* yacc.c:1646 */ break; case 78: -#line 1190 "awkgram.y" /* yacc.c:1646 */ +#line 1196 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3030 "awkgram.c" /* yacc.c:1646 */ +#line 3036 "awkgram.c" /* yacc.c:1646 */ break; case 79: -#line 1192 "awkgram.y" /* yacc.c:1646 */ +#line 1198 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3036 "awkgram.c" /* yacc.c:1646 */ +#line 3042 "awkgram.c" /* yacc.c:1646 */ break; case 81: -#line 1202 "awkgram.y" /* yacc.c:1646 */ +#line 1208 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[-1]); } -#line 3044 "awkgram.c" /* yacc.c:1646 */ +#line 3050 "awkgram.c" /* yacc.c:1646 */ break; case 82: -#line 1209 "awkgram.y" /* yacc.c:1646 */ +#line 1215 "awkgram.y" /* yacc.c:1646 */ { in_print = false; in_parens = 0; (yyval) = NULL; } -#line 3054 "awkgram.c" /* yacc.c:1646 */ +#line 3060 "awkgram.c" /* yacc.c:1646 */ break; case 83: -#line 1214 "awkgram.y" /* yacc.c:1646 */ +#line 1220 "awkgram.y" /* yacc.c:1646 */ { in_print = false; in_parens = 0; } -#line 3060 "awkgram.c" /* yacc.c:1646 */ +#line 3066 "awkgram.c" /* yacc.c:1646 */ break; case 84: -#line 1215 "awkgram.y" /* yacc.c:1646 */ +#line 1221 "awkgram.y" /* yacc.c:1646 */ { if ((yyvsp[-2])->redir_type == redirect_twoway && (yyvsp[0])->lasti->opcode == Op_K_getline_redir @@ -3068,136 +3074,136 @@ regular_print: yyerror(_("multistage two-way pipelines don't work")); (yyval) = list_prepend((yyvsp[0]), (yyvsp[-2])); } -#line 3072 "awkgram.c" /* yacc.c:1646 */ +#line 3078 "awkgram.c" /* yacc.c:1646 */ break; case 85: -#line 1226 "awkgram.y" /* yacc.c:1646 */ +#line 1232 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_condition((yyvsp[-3]), (yyvsp[-5]), (yyvsp[0]), NULL, NULL); } -#line 3080 "awkgram.c" /* yacc.c:1646 */ +#line 3086 "awkgram.c" /* yacc.c:1646 */ break; case 86: -#line 1231 "awkgram.y" /* yacc.c:1646 */ +#line 1237 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_condition((yyvsp[-6]), (yyvsp[-8]), (yyvsp[-3]), (yyvsp[-2]), (yyvsp[0])); } -#line 3088 "awkgram.c" /* yacc.c:1646 */ +#line 3094 "awkgram.c" /* yacc.c:1646 */ break; case 91: -#line 1248 "awkgram.y" /* yacc.c:1646 */ +#line 1254 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 3094 "awkgram.c" /* yacc.c:1646 */ +#line 3100 "awkgram.c" /* yacc.c:1646 */ break; case 92: -#line 1250 "awkgram.y" /* yacc.c:1646 */ +#line 1256 "awkgram.y" /* yacc.c:1646 */ { bcfree((yyvsp[-1])); (yyval) = (yyvsp[0]); } -#line 3103 "awkgram.c" /* yacc.c:1646 */ +#line 3109 "awkgram.c" /* yacc.c:1646 */ break; case 93: -#line 1258 "awkgram.y" /* yacc.c:1646 */ +#line 1264 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 3109 "awkgram.c" /* yacc.c:1646 */ +#line 3115 "awkgram.c" /* yacc.c:1646 */ break; case 94: -#line 1260 "awkgram.y" /* yacc.c:1646 */ +#line 1266 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3115 "awkgram.c" /* yacc.c:1646 */ +#line 3121 "awkgram.c" /* yacc.c:1646 */ break; case 95: -#line 1265 "awkgram.y" /* yacc.c:1646 */ +#line 1271 "awkgram.y" /* yacc.c:1646 */ { (yyvsp[0])->param_count = 0; (yyval) = list_create((yyvsp[0])); } -#line 3124 "awkgram.c" /* yacc.c:1646 */ +#line 3130 "awkgram.c" /* yacc.c:1646 */ break; case 96: -#line 1270 "awkgram.y" /* yacc.c:1646 */ +#line 1276 "awkgram.y" /* yacc.c:1646 */ { (yyvsp[0])->param_count = (yyvsp[-2])->lasti->param_count + 1; (yyval) = list_append((yyvsp[-2]), (yyvsp[0])); yyerrok; } -#line 3134 "awkgram.c" /* yacc.c:1646 */ +#line 3140 "awkgram.c" /* yacc.c:1646 */ break; case 97: -#line 1276 "awkgram.y" /* yacc.c:1646 */ +#line 1282 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 3140 "awkgram.c" /* yacc.c:1646 */ +#line 3146 "awkgram.c" /* yacc.c:1646 */ break; case 98: -#line 1278 "awkgram.y" /* yacc.c:1646 */ +#line 1284 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[-1]); } -#line 3146 "awkgram.c" /* yacc.c:1646 */ +#line 3152 "awkgram.c" /* yacc.c:1646 */ break; case 99: -#line 1280 "awkgram.y" /* yacc.c:1646 */ +#line 1286 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[-2]); } -#line 3152 "awkgram.c" /* yacc.c:1646 */ +#line 3158 "awkgram.c" /* yacc.c:1646 */ break; case 100: -#line 1286 "awkgram.y" /* yacc.c:1646 */ +#line 1292 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 3158 "awkgram.c" /* yacc.c:1646 */ +#line 3164 "awkgram.c" /* yacc.c:1646 */ break; case 101: -#line 1288 "awkgram.y" /* yacc.c:1646 */ +#line 1294 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3164 "awkgram.c" /* yacc.c:1646 */ +#line 3170 "awkgram.c" /* yacc.c:1646 */ break; case 102: -#line 1293 "awkgram.y" /* yacc.c:1646 */ +#line 1299 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 3170 "awkgram.c" /* yacc.c:1646 */ +#line 3176 "awkgram.c" /* yacc.c:1646 */ break; case 103: -#line 1295 "awkgram.y" /* yacc.c:1646 */ +#line 1301 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3176 "awkgram.c" /* yacc.c:1646 */ +#line 3182 "awkgram.c" /* yacc.c:1646 */ break; case 104: -#line 1300 "awkgram.y" /* yacc.c:1646 */ +#line 1306 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_expression_list(NULL, (yyvsp[0])); } -#line 3182 "awkgram.c" /* yacc.c:1646 */ +#line 3188 "awkgram.c" /* yacc.c:1646 */ break; case 105: -#line 1302 "awkgram.y" /* yacc.c:1646 */ +#line 1308 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_expression_list((yyvsp[-2]), (yyvsp[0])); yyerrok; } -#line 3191 "awkgram.c" /* yacc.c:1646 */ +#line 3197 "awkgram.c" /* yacc.c:1646 */ break; case 106: -#line 1307 "awkgram.y" /* yacc.c:1646 */ +#line 1313 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 3197 "awkgram.c" /* yacc.c:1646 */ +#line 3203 "awkgram.c" /* yacc.c:1646 */ break; case 107: -#line 1309 "awkgram.y" /* yacc.c:1646 */ +#line 1315 "awkgram.y" /* yacc.c:1646 */ { /* * Returning the expression list instead of NULL lets @@ -3205,52 +3211,52 @@ regular_print: */ (yyval) = (yyvsp[-1]); } -#line 3209 "awkgram.c" /* yacc.c:1646 */ +#line 3215 "awkgram.c" /* yacc.c:1646 */ break; case 108: -#line 1317 "awkgram.y" /* yacc.c:1646 */ +#line 1323 "awkgram.y" /* yacc.c:1646 */ { /* Ditto */ (yyval) = mk_expression_list((yyvsp[-2]), (yyvsp[0])); } -#line 3218 "awkgram.c" /* yacc.c:1646 */ +#line 3224 "awkgram.c" /* yacc.c:1646 */ break; case 109: -#line 1322 "awkgram.y" /* yacc.c:1646 */ +#line 1328 "awkgram.y" /* yacc.c:1646 */ { /* Ditto */ (yyval) = (yyvsp[-2]); } -#line 3227 "awkgram.c" /* yacc.c:1646 */ +#line 3233 "awkgram.c" /* yacc.c:1646 */ break; case 110: -#line 1331 "awkgram.y" /* yacc.c:1646 */ +#line 1337 "awkgram.y" /* yacc.c:1646 */ { if (do_lint && (yyvsp[0])->lasti->opcode == Op_match_rec) lintwarn_ln((yyvsp[-1])->source_line, _("regular expression on right of assignment")); (yyval) = mk_assignment((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); } -#line 3238 "awkgram.c" /* yacc.c:1646 */ +#line 3244 "awkgram.c" /* yacc.c:1646 */ break; case 111: -#line 1338 "awkgram.y" /* yacc.c:1646 */ +#line 1344 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_boolean((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); } -#line 3244 "awkgram.c" /* yacc.c:1646 */ +#line 3250 "awkgram.c" /* yacc.c:1646 */ break; case 112: -#line 1340 "awkgram.y" /* yacc.c:1646 */ +#line 1346 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_boolean((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); } -#line 3250 "awkgram.c" /* yacc.c:1646 */ +#line 3256 "awkgram.c" /* yacc.c:1646 */ break; case 113: -#line 1342 "awkgram.y" /* yacc.c:1646 */ +#line 1348 "awkgram.y" /* yacc.c:1646 */ { if ((yyvsp[-2])->lasti->opcode == Op_match_rec) warning_ln((yyvsp[-1])->source_line, @@ -3266,11 +3272,11 @@ regular_print: (yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1])); } } -#line 3270 "awkgram.c" /* yacc.c:1646 */ +#line 3276 "awkgram.c" /* yacc.c:1646 */ break; case 114: -#line 1358 "awkgram.y" /* yacc.c:1646 */ +#line 1364 "awkgram.y" /* yacc.c:1646 */ { if (do_lint_old) warning_ln((yyvsp[-1])->source_line, @@ -3280,91 +3286,91 @@ regular_print: (yyvsp[-1])->expr_count = 1; (yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1])); } -#line 3284 "awkgram.c" /* yacc.c:1646 */ +#line 3290 "awkgram.c" /* yacc.c:1646 */ break; case 115: -#line 1368 "awkgram.y" /* yacc.c:1646 */ +#line 1374 "awkgram.y" /* yacc.c:1646 */ { if (do_lint && (yyvsp[0])->lasti->opcode == Op_match_rec) lintwarn_ln((yyvsp[-1])->source_line, _("regular expression on right of comparison")); (yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1])); } -#line 3295 "awkgram.c" /* yacc.c:1646 */ +#line 3301 "awkgram.c" /* yacc.c:1646 */ break; case 116: -#line 1375 "awkgram.y" /* yacc.c:1646 */ +#line 1381 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_condition((yyvsp[-4]), (yyvsp[-3]), (yyvsp[-2]), (yyvsp[-1]), (yyvsp[0])); } -#line 3301 "awkgram.c" /* yacc.c:1646 */ +#line 3307 "awkgram.c" /* yacc.c:1646 */ break; case 117: -#line 1377 "awkgram.y" /* yacc.c:1646 */ +#line 1383 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3307 "awkgram.c" /* yacc.c:1646 */ +#line 3313 "awkgram.c" /* yacc.c:1646 */ break; case 118: -#line 1382 "awkgram.y" /* yacc.c:1646 */ +#line 1388 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3313 "awkgram.c" /* yacc.c:1646 */ +#line 3319 "awkgram.c" /* yacc.c:1646 */ break; case 119: -#line 1384 "awkgram.y" /* yacc.c:1646 */ +#line 1390 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3319 "awkgram.c" /* yacc.c:1646 */ +#line 3325 "awkgram.c" /* yacc.c:1646 */ break; case 120: -#line 1386 "awkgram.y" /* yacc.c:1646 */ +#line 1392 "awkgram.y" /* yacc.c:1646 */ { (yyvsp[0])->opcode = Op_assign_quotient; (yyval) = (yyvsp[0]); } -#line 3328 "awkgram.c" /* yacc.c:1646 */ +#line 3334 "awkgram.c" /* yacc.c:1646 */ break; case 121: -#line 1394 "awkgram.y" /* yacc.c:1646 */ +#line 1400 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3334 "awkgram.c" /* yacc.c:1646 */ +#line 3340 "awkgram.c" /* yacc.c:1646 */ break; case 122: -#line 1396 "awkgram.y" /* yacc.c:1646 */ +#line 1402 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3340 "awkgram.c" /* yacc.c:1646 */ +#line 3346 "awkgram.c" /* yacc.c:1646 */ break; case 123: -#line 1401 "awkgram.y" /* yacc.c:1646 */ +#line 1407 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3346 "awkgram.c" /* yacc.c:1646 */ +#line 3352 "awkgram.c" /* yacc.c:1646 */ break; case 124: -#line 1403 "awkgram.y" /* yacc.c:1646 */ +#line 1409 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3352 "awkgram.c" /* yacc.c:1646 */ +#line 3358 "awkgram.c" /* yacc.c:1646 */ break; case 125: -#line 1408 "awkgram.y" /* yacc.c:1646 */ +#line 1414 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3358 "awkgram.c" /* yacc.c:1646 */ +#line 3364 "awkgram.c" /* yacc.c:1646 */ break; case 126: -#line 1410 "awkgram.y" /* yacc.c:1646 */ +#line 1416 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3364 "awkgram.c" /* yacc.c:1646 */ +#line 3370 "awkgram.c" /* yacc.c:1646 */ break; case 127: -#line 1412 "awkgram.y" /* yacc.c:1646 */ +#line 1418 "awkgram.y" /* yacc.c:1646 */ { int count = 2; bool is_simple_var = false; @@ -3411,47 +3417,47 @@ regular_print: max_args = count; } } -#line 3415 "awkgram.c" /* yacc.c:1646 */ +#line 3421 "awkgram.c" /* yacc.c:1646 */ break; case 129: -#line 1464 "awkgram.y" /* yacc.c:1646 */ +#line 1470 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_binary((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); } -#line 3421 "awkgram.c" /* yacc.c:1646 */ +#line 3427 "awkgram.c" /* yacc.c:1646 */ break; case 130: -#line 1466 "awkgram.y" /* yacc.c:1646 */ +#line 1472 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_binary((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); } -#line 3427 "awkgram.c" /* yacc.c:1646 */ +#line 3433 "awkgram.c" /* yacc.c:1646 */ break; case 131: -#line 1468 "awkgram.y" /* yacc.c:1646 */ +#line 1474 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_binary((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); } -#line 3433 "awkgram.c" /* yacc.c:1646 */ +#line 3439 "awkgram.c" /* yacc.c:1646 */ break; case 132: -#line 1470 "awkgram.y" /* yacc.c:1646 */ +#line 1476 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_binary((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); } -#line 3439 "awkgram.c" /* yacc.c:1646 */ +#line 3445 "awkgram.c" /* yacc.c:1646 */ break; case 133: -#line 1472 "awkgram.y" /* yacc.c:1646 */ +#line 1478 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_binary((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); } -#line 3445 "awkgram.c" /* yacc.c:1646 */ +#line 3451 "awkgram.c" /* yacc.c:1646 */ break; case 134: -#line 1474 "awkgram.y" /* yacc.c:1646 */ +#line 1480 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_binary((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); } -#line 3451 "awkgram.c" /* yacc.c:1646 */ +#line 3457 "awkgram.c" /* yacc.c:1646 */ break; case 135: -#line 1476 "awkgram.y" /* yacc.c:1646 */ +#line 1482 "awkgram.y" /* yacc.c:1646 */ { /* * In BEGINFILE/ENDFILE, allow `getline [var] < file' @@ -3465,29 +3471,29 @@ regular_print: _("non-redirected `getline' undefined inside END action")); (yyval) = mk_getline((yyvsp[-2]), (yyvsp[-1]), (yyvsp[0]), redirect_input); } -#line 3469 "awkgram.c" /* yacc.c:1646 */ +#line 3475 "awkgram.c" /* yacc.c:1646 */ break; case 136: -#line 1490 "awkgram.y" /* yacc.c:1646 */ +#line 1496 "awkgram.y" /* yacc.c:1646 */ { (yyvsp[0])->opcode = Op_postincrement; (yyval) = mk_assignment((yyvsp[-1]), NULL, (yyvsp[0])); } -#line 3478 "awkgram.c" /* yacc.c:1646 */ +#line 3484 "awkgram.c" /* yacc.c:1646 */ break; case 137: -#line 1495 "awkgram.y" /* yacc.c:1646 */ +#line 1501 "awkgram.y" /* yacc.c:1646 */ { (yyvsp[0])->opcode = Op_postdecrement; (yyval) = mk_assignment((yyvsp[-1]), NULL, (yyvsp[0])); } -#line 3487 "awkgram.c" /* yacc.c:1646 */ +#line 3493 "awkgram.c" /* yacc.c:1646 */ break; case 138: -#line 1500 "awkgram.y" /* yacc.c:1646 */ +#line 1506 "awkgram.y" /* yacc.c:1646 */ { if (do_lint_old) { warning_ln((yyvsp[-1])->source_line, @@ -3507,64 +3513,64 @@ regular_print: (yyval) = list_append(list_merge(t, (yyvsp[0])), (yyvsp[-1])); } } -#line 3511 "awkgram.c" /* yacc.c:1646 */ +#line 3517 "awkgram.c" /* yacc.c:1646 */ break; case 139: -#line 1525 "awkgram.y" /* yacc.c:1646 */ +#line 1531 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_getline((yyvsp[-1]), (yyvsp[0]), (yyvsp[-3]), (yyvsp[-2])->redir_type); bcfree((yyvsp[-2])); } -#line 3520 "awkgram.c" /* yacc.c:1646 */ +#line 3526 "awkgram.c" /* yacc.c:1646 */ break; case 140: -#line 1531 "awkgram.y" /* yacc.c:1646 */ +#line 1537 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_binary((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); } -#line 3526 "awkgram.c" /* yacc.c:1646 */ +#line 3532 "awkgram.c" /* yacc.c:1646 */ break; case 141: -#line 1533 "awkgram.y" /* yacc.c:1646 */ +#line 1539 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_binary((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); } -#line 3532 "awkgram.c" /* yacc.c:1646 */ +#line 3538 "awkgram.c" /* yacc.c:1646 */ break; case 142: -#line 1535 "awkgram.y" /* yacc.c:1646 */ +#line 1541 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_binary((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); } -#line 3538 "awkgram.c" /* yacc.c:1646 */ +#line 3544 "awkgram.c" /* yacc.c:1646 */ break; case 143: -#line 1537 "awkgram.y" /* yacc.c:1646 */ +#line 1543 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_binary((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); } -#line 3544 "awkgram.c" /* yacc.c:1646 */ +#line 3550 "awkgram.c" /* yacc.c:1646 */ break; case 144: -#line 1539 "awkgram.y" /* yacc.c:1646 */ +#line 1545 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_binary((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); } -#line 3550 "awkgram.c" /* yacc.c:1646 */ +#line 3556 "awkgram.c" /* yacc.c:1646 */ break; case 145: -#line 1541 "awkgram.y" /* yacc.c:1646 */ +#line 1547 "awkgram.y" /* yacc.c:1646 */ { (yyval) = mk_binary((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); } -#line 3556 "awkgram.c" /* yacc.c:1646 */ +#line 3562 "awkgram.c" /* yacc.c:1646 */ break; case 146: -#line 1546 "awkgram.y" /* yacc.c:1646 */ +#line 1552 "awkgram.y" /* yacc.c:1646 */ { (yyval) = list_create((yyvsp[0])); } -#line 3564 "awkgram.c" /* yacc.c:1646 */ +#line 3570 "awkgram.c" /* yacc.c:1646 */ break; case 147: -#line 1550 "awkgram.y" /* yacc.c:1646 */ +#line 1556 "awkgram.y" /* yacc.c:1646 */ { if ((yyvsp[0])->opcode == Op_match_rec) { (yyvsp[0])->opcode = Op_nomatch; @@ -3596,37 +3602,37 @@ regular_print: } } } -#line 3600 "awkgram.c" /* yacc.c:1646 */ +#line 3606 "awkgram.c" /* yacc.c:1646 */ break; case 148: -#line 1582 "awkgram.y" /* yacc.c:1646 */ +#line 1588 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[-1]); } -#line 3606 "awkgram.c" /* yacc.c:1646 */ +#line 3612 "awkgram.c" /* yacc.c:1646 */ break; case 149: -#line 1584 "awkgram.y" /* yacc.c:1646 */ +#line 1590 "awkgram.y" /* yacc.c:1646 */ { (yyval) = snode((yyvsp[-1]), (yyvsp[-3])); if ((yyval) == NULL) YYABORT; } -#line 3616 "awkgram.c" /* yacc.c:1646 */ +#line 3622 "awkgram.c" /* yacc.c:1646 */ break; case 150: -#line 1590 "awkgram.y" /* yacc.c:1646 */ +#line 1596 "awkgram.y" /* yacc.c:1646 */ { (yyval) = snode((yyvsp[-1]), (yyvsp[-3])); if ((yyval) == NULL) YYABORT; } -#line 3626 "awkgram.c" /* yacc.c:1646 */ +#line 3632 "awkgram.c" /* yacc.c:1646 */ break; case 151: -#line 1596 "awkgram.y" /* yacc.c:1646 */ +#line 1602 "awkgram.y" /* yacc.c:1646 */ { static bool warned = false; @@ -3639,45 +3645,45 @@ regular_print: if ((yyval) == NULL) YYABORT; } -#line 3643 "awkgram.c" /* yacc.c:1646 */ +#line 3649 "awkgram.c" /* yacc.c:1646 */ break; case 154: -#line 1611 "awkgram.y" /* yacc.c:1646 */ +#line 1617 "awkgram.y" /* yacc.c:1646 */ { (yyvsp[-1])->opcode = Op_preincrement; (yyval) = mk_assignment((yyvsp[0]), NULL, (yyvsp[-1])); } -#line 3652 "awkgram.c" /* yacc.c:1646 */ +#line 3658 "awkgram.c" /* yacc.c:1646 */ break; case 155: -#line 1616 "awkgram.y" /* yacc.c:1646 */ +#line 1622 "awkgram.y" /* yacc.c:1646 */ { (yyvsp[-1])->opcode = Op_predecrement; (yyval) = mk_assignment((yyvsp[0]), NULL, (yyvsp[-1])); } -#line 3661 "awkgram.c" /* yacc.c:1646 */ +#line 3667 "awkgram.c" /* yacc.c:1646 */ break; case 156: -#line 1621 "awkgram.y" /* yacc.c:1646 */ +#line 1627 "awkgram.y" /* yacc.c:1646 */ { (yyval) = list_create((yyvsp[0])); } -#line 3669 "awkgram.c" /* yacc.c:1646 */ +#line 3675 "awkgram.c" /* yacc.c:1646 */ break; case 157: -#line 1625 "awkgram.y" /* yacc.c:1646 */ +#line 1631 "awkgram.y" /* yacc.c:1646 */ { (yyval) = list_create((yyvsp[0])); } -#line 3677 "awkgram.c" /* yacc.c:1646 */ +#line 3683 "awkgram.c" /* yacc.c:1646 */ break; case 158: -#line 1629 "awkgram.y" /* yacc.c:1646 */ +#line 1635 "awkgram.y" /* yacc.c:1646 */ { if ((yyvsp[0])->lasti->opcode == Op_push_i && ((yyvsp[0])->lasti->memory->flags & (STRCUR|STRING)) == 0 @@ -3692,11 +3698,11 @@ regular_print: (yyval) = list_append((yyvsp[0]), (yyvsp[-1])); } } -#line 3696 "awkgram.c" /* yacc.c:1646 */ +#line 3702 "awkgram.c" /* yacc.c:1646 */ break; case 159: -#line 1644 "awkgram.y" /* yacc.c:1646 */ +#line 1650 "awkgram.y" /* yacc.c:1646 */ { /* * was: $$ = $2 @@ -3706,20 +3712,20 @@ regular_print: (yyvsp[-1])->memory = make_number(0.0); (yyval) = list_append((yyvsp[0]), (yyvsp[-1])); } -#line 3710 "awkgram.c" /* yacc.c:1646 */ +#line 3716 "awkgram.c" /* yacc.c:1646 */ break; case 160: -#line 1657 "awkgram.y" /* yacc.c:1646 */ +#line 1663 "awkgram.y" /* yacc.c:1646 */ { func_use((yyvsp[0])->lasti->func_name, FUNC_USE); (yyval) = (yyvsp[0]); } -#line 3719 "awkgram.c" /* yacc.c:1646 */ +#line 3725 "awkgram.c" /* yacc.c:1646 */ break; case 161: -#line 1662 "awkgram.y" /* yacc.c:1646 */ +#line 1668 "awkgram.y" /* yacc.c:1646 */ { /* indirect function call */ INSTRUCTION *f, *t; @@ -3751,13 +3757,25 @@ regular_print: */ (yyval) = list_prepend((yyvsp[0]), t); + at_seen = false; } -#line 3756 "awkgram.c" /* yacc.c:1646 */ +#line 3763 "awkgram.c" /* yacc.c:1646 */ break; case 162: -#line 1698 "awkgram.y" /* yacc.c:1646 */ +#line 1705 "awkgram.y" /* yacc.c:1646 */ { + NODE *n; + + if (! at_seen) { + n = lookup((yyvsp[-3])->func_name); + if (n != NULL && n->type != Node_func + && n->type != Node_ext_func && n->type != Node_old_ext_func) { + error_ln((yyvsp[-3])->source_line, + _("attempt to use non-function `%s' in function call"), + (yyvsp[-3])->func_name); + } + } param_sanity((yyvsp[-1])); (yyvsp[-3])->opcode = Op_func_call; (yyvsp[-3])->func_body = NULL; @@ -3770,49 +3788,49 @@ regular_print: (yyval) = list_append(t, (yyvsp[-3])); } } -#line 3774 "awkgram.c" /* yacc.c:1646 */ +#line 3792 "awkgram.c" /* yacc.c:1646 */ break; case 163: -#line 1715 "awkgram.y" /* yacc.c:1646 */ +#line 1733 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 3780 "awkgram.c" /* yacc.c:1646 */ +#line 3798 "awkgram.c" /* yacc.c:1646 */ break; case 164: -#line 1717 "awkgram.y" /* yacc.c:1646 */ +#line 1735 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3786 "awkgram.c" /* yacc.c:1646 */ +#line 3804 "awkgram.c" /* yacc.c:1646 */ break; case 165: -#line 1722 "awkgram.y" /* yacc.c:1646 */ +#line 1740 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 3792 "awkgram.c" /* yacc.c:1646 */ +#line 3810 "awkgram.c" /* yacc.c:1646 */ break; case 166: -#line 1724 "awkgram.y" /* yacc.c:1646 */ +#line 1742 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[-1]); } -#line 3798 "awkgram.c" /* yacc.c:1646 */ +#line 3816 "awkgram.c" /* yacc.c:1646 */ break; case 167: -#line 1729 "awkgram.y" /* yacc.c:1646 */ +#line 1747 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3804 "awkgram.c" /* yacc.c:1646 */ +#line 3822 "awkgram.c" /* yacc.c:1646 */ break; case 168: -#line 1731 "awkgram.y" /* yacc.c:1646 */ +#line 1749 "awkgram.y" /* yacc.c:1646 */ { (yyval) = list_merge((yyvsp[-1]), (yyvsp[0])); } -#line 3812 "awkgram.c" /* yacc.c:1646 */ +#line 3830 "awkgram.c" /* yacc.c:1646 */ break; case 169: -#line 1738 "awkgram.y" /* yacc.c:1646 */ +#line 1756 "awkgram.y" /* yacc.c:1646 */ { INSTRUCTION *ip = (yyvsp[0])->lasti; int count = ip->sub_count; /* # of SUBSEP-seperated expressions */ @@ -3826,11 +3844,11 @@ regular_print: sub_counter++; /* count # of dimensions */ (yyval) = (yyvsp[0]); } -#line 3830 "awkgram.c" /* yacc.c:1646 */ +#line 3848 "awkgram.c" /* yacc.c:1646 */ break; case 170: -#line 1755 "awkgram.y" /* yacc.c:1646 */ +#line 1773 "awkgram.y" /* yacc.c:1646 */ { INSTRUCTION *t = (yyvsp[-1]); if ((yyvsp[-1]) == NULL) { @@ -3844,31 +3862,31 @@ regular_print: (yyvsp[0])->sub_count = count_expressions(&t, false); (yyval) = list_append(t, (yyvsp[0])); } -#line 3848 "awkgram.c" /* yacc.c:1646 */ +#line 3866 "awkgram.c" /* yacc.c:1646 */ break; case 171: -#line 1772 "awkgram.y" /* yacc.c:1646 */ +#line 1790 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); } -#line 3854 "awkgram.c" /* yacc.c:1646 */ +#line 3872 "awkgram.c" /* yacc.c:1646 */ break; case 172: -#line 1774 "awkgram.y" /* yacc.c:1646 */ +#line 1792 "awkgram.y" /* yacc.c:1646 */ { (yyval) = list_merge((yyvsp[-1]), (yyvsp[0])); } -#line 3862 "awkgram.c" /* yacc.c:1646 */ +#line 3880 "awkgram.c" /* yacc.c:1646 */ break; case 173: -#line 1781 "awkgram.y" /* yacc.c:1646 */ +#line 1799 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[-1]); } -#line 3868 "awkgram.c" /* yacc.c:1646 */ +#line 3886 "awkgram.c" /* yacc.c:1646 */ break; case 174: -#line 1786 "awkgram.y" /* yacc.c:1646 */ +#line 1804 "awkgram.y" /* yacc.c:1646 */ { char *var_name = (yyvsp[0])->lextok; @@ -3876,22 +3894,22 @@ regular_print: (yyvsp[0])->memory = variable((yyvsp[0])->source_line, var_name, Node_var_new); (yyval) = list_create((yyvsp[0])); } -#line 3880 "awkgram.c" /* yacc.c:1646 */ +#line 3898 "awkgram.c" /* yacc.c:1646 */ break; case 175: -#line 1794 "awkgram.y" /* yacc.c:1646 */ +#line 1812 "awkgram.y" /* yacc.c:1646 */ { char *arr = (yyvsp[-1])->lextok; (yyvsp[-1])->memory = variable((yyvsp[-1])->source_line, arr, Node_var_new); (yyvsp[-1])->opcode = Op_push_array; (yyval) = list_prepend((yyvsp[0]), (yyvsp[-1])); } -#line 3891 "awkgram.c" /* yacc.c:1646 */ +#line 3909 "awkgram.c" /* yacc.c:1646 */ break; case 176: -#line 1804 "awkgram.y" /* yacc.c:1646 */ +#line 1822 "awkgram.y" /* yacc.c:1646 */ { INSTRUCTION *ip = (yyvsp[0])->nexti; if (ip->opcode == Op_push @@ -3903,73 +3921,73 @@ regular_print: } else (yyval) = (yyvsp[0]); } -#line 3907 "awkgram.c" /* yacc.c:1646 */ +#line 3925 "awkgram.c" /* yacc.c:1646 */ break; case 177: -#line 1816 "awkgram.y" /* yacc.c:1646 */ +#line 1834 "awkgram.y" /* yacc.c:1646 */ { (yyval) = list_append((yyvsp[-1]), (yyvsp[-2])); if ((yyvsp[0]) != NULL) mk_assignment((yyvsp[-1]), NULL, (yyvsp[0])); } -#line 3917 "awkgram.c" /* yacc.c:1646 */ +#line 3935 "awkgram.c" /* yacc.c:1646 */ break; case 178: -#line 1825 "awkgram.y" /* yacc.c:1646 */ +#line 1843 "awkgram.y" /* yacc.c:1646 */ { (yyvsp[0])->opcode = Op_postincrement; } -#line 3925 "awkgram.c" /* yacc.c:1646 */ +#line 3943 "awkgram.c" /* yacc.c:1646 */ break; case 179: -#line 1829 "awkgram.y" /* yacc.c:1646 */ +#line 1847 "awkgram.y" /* yacc.c:1646 */ { (yyvsp[0])->opcode = Op_postdecrement; } -#line 3933 "awkgram.c" /* yacc.c:1646 */ +#line 3951 "awkgram.c" /* yacc.c:1646 */ break; case 180: -#line 1832 "awkgram.y" /* yacc.c:1646 */ +#line 1850 "awkgram.y" /* yacc.c:1646 */ { (yyval) = NULL; } -#line 3939 "awkgram.c" /* yacc.c:1646 */ +#line 3957 "awkgram.c" /* yacc.c:1646 */ break; case 182: -#line 1840 "awkgram.y" /* yacc.c:1646 */ +#line 1858 "awkgram.y" /* yacc.c:1646 */ { yyerrok; } -#line 3945 "awkgram.c" /* yacc.c:1646 */ +#line 3963 "awkgram.c" /* yacc.c:1646 */ break; case 183: -#line 1844 "awkgram.y" /* yacc.c:1646 */ +#line 1862 "awkgram.y" /* yacc.c:1646 */ { yyerrok; } -#line 3951 "awkgram.c" /* yacc.c:1646 */ +#line 3969 "awkgram.c" /* yacc.c:1646 */ break; case 186: -#line 1853 "awkgram.y" /* yacc.c:1646 */ +#line 1871 "awkgram.y" /* yacc.c:1646 */ { yyerrok; } -#line 3957 "awkgram.c" /* yacc.c:1646 */ +#line 3975 "awkgram.c" /* yacc.c:1646 */ break; case 187: -#line 1857 "awkgram.y" /* yacc.c:1646 */ +#line 1875 "awkgram.y" /* yacc.c:1646 */ { (yyval) = (yyvsp[0]); yyerrok; } -#line 3963 "awkgram.c" /* yacc.c:1646 */ +#line 3981 "awkgram.c" /* yacc.c:1646 */ break; case 188: -#line 1861 "awkgram.y" /* yacc.c:1646 */ +#line 1879 "awkgram.y" /* yacc.c:1646 */ { yyerrok; } -#line 3969 "awkgram.c" /* yacc.c:1646 */ +#line 3987 "awkgram.c" /* yacc.c:1646 */ break; -#line 3973 "awkgram.c" /* yacc.c:1646 */ +#line 3991 "awkgram.c" /* yacc.c:1646 */ default: break; } /* User semantic actions sometimes alter yychar, and that requires @@ -4197,7 +4215,7 @@ yyreturn: #endif return yyresult; } -#line 1863 "awkgram.y" /* yacc.c:1906 */ +#line 1881 "awkgram.y" /* yacc.c:1906 */ struct token { @@ -4736,6 +4754,9 @@ parse_program(INSTRUCTION **pcode) if (ret == 0) /* avoid spurious warning if parser aborted with YYABORT */ check_funcs(); + if (do_posix && ! check_param_names()) + errcount++; + if (args_array == NULL) emalloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program"); else @@ -5665,6 +5686,7 @@ retry: return lasttok = NEWLINE; case '@': + at_seen = true; return lasttok = '@'; case '\\': @@ -87,6 +87,7 @@ static int one_line_close(int fd); static void split_comment(void); static void check_comment(void); +static bool at_seen = false; static bool want_source = false; static bool want_regexp = false; /* lexical scanning kludge */ static char *in_function; /* parsing kludge */ @@ -250,11 +251,13 @@ rule | '@' LEX_INCLUDE source statement_term { want_source = false; + at_seen = false; yyerrok; } | '@' LEX_LOAD library statement_term { want_source = false; + at_seen = false; yyerrok; } ; @@ -409,7 +412,10 @@ func_name YYABORT; } | '@' LEX_EVAL - { $$ = $2; } + { + $$ = $2; + at_seen = false; + } ; lex_builtin @@ -1690,12 +1696,24 @@ func_call */ $$ = list_prepend($2, t); + at_seen = false; } ; direct_func_call : FUNC_CALL '(' opt_expression_list r_paren { + NODE *n; + + if (! at_seen) { + n = lookup($1->func_name); + if (n != NULL && n->type != Node_func + && n->type != Node_ext_func && n->type != Node_old_ext_func) { + error_ln($1->source_line, + _("attempt to use non-function `%s' in function call"), + $1->func_name); + } + } param_sanity($3); $1->opcode = Op_func_call; $1->func_body = NULL; @@ -2398,6 +2416,9 @@ parse_program(INSTRUCTION **pcode) if (ret == 0) /* avoid spurious warning if parser aborted with YYABORT */ check_funcs(); + if (do_posix && ! check_param_names()) + errcount++; + if (args_array == NULL) emalloc(args_array, NODE **, (max_args + 2) * sizeof(NODE *), "parse_program"); else @@ -3327,6 +3348,7 @@ retry: return lasttok = NEWLINE; case '@': + at_seen = true; return lasttok = '@'; case '\\': diff --git a/awklib/eg/lib/assert.awk b/awklib/eg/lib/assert.awk index 75fd8853..c8e13490 100644 --- a/awklib/eg/lib/assert.awk +++ b/awklib/eg/lib/assert.awk @@ -1,4 +1,4 @@ -# assert --- assert that a condition is true. Otherwise exit. +# assert --- assert that a condition is true. Otherwise, exit. # # Arnold Robbins, arnold@skeeve.com, Public Domain diff --git a/awklib/eg/lib/bits2str.awk b/awklib/eg/lib/bits2str.awk index 9725ee8f..a10ffad1 100644 --- a/awklib/eg/lib/bits2str.awk +++ b/awklib/eg/lib/bits2str.awk @@ -1,4 +1,4 @@ -# bits2str --- turn a byte into readable 1's and 0's +# bits2str --- turn a byte into readable ones and zeros function bits2str(bits, data, mask) { diff --git a/awklib/eg/lib/quicksort.awk b/awklib/eg/lib/quicksort.awk index 3ba2d6e3..e0ed8bc7 100644 --- a/awklib/eg/lib/quicksort.awk +++ b/awklib/eg/lib/quicksort.awk @@ -4,8 +4,9 @@ # Arnold Robbins, arnold@skeeve.com, Public Domain # January 2009 -# quicksort --- C.A.R. Hoare's quick sort algorithm. See Wikipedia -# or almost any algorithms or computer science text + +# quicksort --- C.A.R. Hoare's quicksort algorithm. See Wikipedia +# or almost any algorithms or computer science text. # # Adapted from K&R-II, page 110 diff --git a/awklib/eg/prog/anagram.awk b/awklib/eg/prog/anagram.awk index 7ca14559..df2768d9 100644 --- a/awklib/eg/prog/anagram.awk +++ b/awklib/eg/prog/anagram.awk @@ -1,5 +1,5 @@ -# anagram.awk --- An implementation of the anagram finding algorithm -# from Jon Bentley's "Programming Pearls", 2nd edition. +# anagram.awk --- An implementation of the anagram-finding algorithm +# from Jon Bentley's "Programming Pearls," 2nd edition. # Addison Wesley, 2000, ISBN 0-201-65788-0. # Column 2, Problem C, section 2.8, pp 18-20. # @@ -21,7 +21,7 @@ key = word2key($1) # Build signature data[key][$1] = $1 # Store word with signature } -# word2key --- split word apart into letters, sort, joining back together +# word2key --- split word apart into letters, sort, and join back together function word2key(word, a, i, n, result) { diff --git a/awklib/eg/prog/extract.awk b/awklib/eg/prog/extract.awk index 24f40ce5..f5dfcf40 100644 --- a/awklib/eg/prog/extract.awk +++ b/awklib/eg/prog/extract.awk @@ -1,4 +1,4 @@ -# extract.awk --- extract files and run programs from texinfo files +# extract.awk --- extract files and run programs from Texinfo files # # Arnold Robbins, arnold@skeeve.com, Public Domain # May 1993 diff --git a/awklib/eg/prog/translate.awk b/awklib/eg/prog/translate.awk index cf7f3897..e7403717 100644 --- a/awklib/eg/prog/translate.awk +++ b/awklib/eg/prog/translate.awk @@ -4,7 +4,7 @@ # August 1989 # February 2009 - bug fix -# Bugs: does not handle things like: tr A-Z a-z, it has +# Bugs: does not handle things like tr A-Z a-z; it has # to be spelled out. However, if `to' is shorter than `from', # the last character in `to' is used for the rest of `from'. diff --git a/doc/ChangeLog b/doc/ChangeLog index ba31b052..1e3f1551 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,38 @@ +2015-02-06 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: O'Reilly fixes. + +2015-02-04 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: O'Reilly fixes. + * gawktexi.in: Update various version-related bits of info. + +2015-02-02 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: O'Reilly fixes. + +2015-02-01 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: POSIX requirement that function parameters cannot + have the same name as a function is now --posix. + Restore indirectcall example. + + More O'Reilly fixes. + +2015-01-30 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: Document POSIX requirement that function parameters + cannot have the same name as a function. Fix indirectcall example. + +2015-01-27 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: O'Reilly fixes. + And still more. Also, fix @code --> @command in a number of places. + +2015-01-26 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in: O'Reilly fixes. + 2015-01-25 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in: Fix a bad URL. And another one. @@ -20,7 +55,7 @@ 2015-01-19 Arnold D. Robbins <arnold@skeeve.com> * gawkinet.texi: Fix capitalization in document title. - * gawktexi.in: Here we again: Starting on more O'Reilly fixes. + * gawktexi.in: Here we go again: Starting on more O'Reilly fixes. 2014-12-26 Antonio Giovanni Colombo <azc100@gmail.com> diff --git a/doc/gawk.info b/doc/gawk.info index 59ce5ce6..fad0fce3 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -3453,8 +3453,8 @@ sequences apply to both string constants and regexp constants: would continue incorporating hexadecimal digits into the value until a non-hexadecimal digit or the end of the string was encountered. However, using more than two hexadecimal - digits produced undefined results. As of version *FIXME:* - 4.3.0, only two digits are processed. + digits produced undefined results. As of version 4.2, only + two digits are processed. `\/' A literal slash (necessary for regexp constants only). This @@ -10126,11 +10126,11 @@ description of each variable.) use binary I/O. Any other string value is treated the same as `"rw"', but causes `gawk' to generate a warning message. `BINMODE' is described in more detail in *note PC Using::. `mawk' - (*note Other Versions::), also supports this variable, but only + (*note Other Versions::) also supports this variable, but only using numeric values. ``CONVFMT'' - This string controls conversion of numbers to strings (*note + A string that controls the conversion of numbers to strings (*note Conversion::). It works by being passed, in effect, as the first argument to the `sprintf()' function (*note String Functions::). Its default value is `"%.6g"'. `CONVFMT' was introduced by the @@ -10177,7 +10177,7 @@ description of each variable.) `IGNORECASE #' If `IGNORECASE' is nonzero or non-null, then all string comparisons - and all regular expression matching are case independent. Thus, + and all regular expression matching are case-independent. Thus, regexp matching with `~' and `!~', as well as the `gensub()', `gsub()', `index()', `match()', `patsplit()', `split()', and `sub()' functions, record termination with `RS', and field @@ -10197,7 +10197,7 @@ description of each variable.) Assigning a false value to `LINT' turns off the lint warnings. This variable is a `gawk' extension. It is not special in other - `awk' implementations. Unlike the other special variables, + `awk' implementations. Unlike with the other special variables, changing `LINT' does affect the production of lint warnings, even if `gawk' is in compatibility mode. Much as the `--lint' and `--traditional' options independently control different aspects of @@ -10205,17 +10205,18 @@ description of each variable.) execution is independent of the flavor of `awk' being executed. `OFMT' - Controls conversion of numbers to strings (*note Conversion::) for - printing with the `print' statement. It works by being passed as - the first argument to the `sprintf()' function (*note String - Functions::). Its default value is `"%.6g"'. Earlier versions of - `awk' used `OFMT' to specify the format for converting numbers to - strings in general expressions; this is now done by `CONVFMT'. + A string that controls conversion of numbers to strings (*note + Conversion::) for printing with the `print' statement. It works + by being passed as the first argument to the `sprintf()' function + (*note String Functions::). Its default value is `"%.6g"'. + Earlier versions of `awk' used `OFMT' to specify the format for + converting numbers to strings in general expressions; this is now + done by `CONVFMT'. `OFS' - This is the output field separator (*note Output Separators::). - It is output between the fields printed by a `print' statement. - Its default value is `" "', a string consisting of a single space. + The output field separator (*note Output Separators::). It is + output between the fields printed by a `print' statement. Its + default value is `" "', a string consisting of a single space. `ORS' The output record separator. It is output at the end of every @@ -10265,7 +10266,7 @@ description of each variable.) File: gawk.info, Node: Auto-set, Next: ARGC and ARGV, Prev: User-modified, Up: Built-in Variables -7.5.2 Built-In Variables That Convey Information +7.5.2 Built-in Variables That Convey Information ------------------------------------------------ The following is an alphabetical list of variables that `awk' sets @@ -10383,14 +10384,14 @@ Options::), they are not special: `NF' The number of fields in the current input record. `NF' is set - each time a new record is read, when a new field is created or + each time a new record is read, when a new field is created, or when `$0' changes (*note Fields::). Unlike most of the variables described in this node, assigning a value to `NF' has the potential to affect `awk''s internal workings. In particular, assignments to `NF' can be used to - create or remove fields from the current record. *Note Changing - Fields::. + create fields in or remove fields from the current record. *Note + Changing Fields::. `FUNCTAB #' An array whose indices and corresponding values are the names of @@ -10425,7 +10426,7 @@ Options::), they are not special: `PROCINFO["identifiers"]' A subarray, indexed by the names of all identifiers used in - the text of the AWK program. An "identifier" is simply the + the text of the `awk' program. An "identifier" is simply the name of a variable (be it scalar or array), built-in function, user-defined function, or extension function. For each identifier, the value of the element is one of the @@ -10446,7 +10447,7 @@ Options::), they are not special: `"untyped"' The identifier is untyped (could be used as a scalar or - array, `gawk' doesn't know yet). + an array; `gawk' doesn't know yet). `"user"' The identifier is a user-defined function. @@ -10535,7 +10536,7 @@ Options::), they are not special: string, or -1 if no match is found. `RSTART' - The start-index in characters of the substring that is matched by + The start index in characters of the substring that is matched by the `match()' function (*note String Functions::). `RSTART' is set by invoking the `match()' function. Its value is the position of the string where the matched substring starts, or zero if no @@ -10585,7 +10586,7 @@ Options::), they are not special: } NOTE: In order to avoid severe time-travel paradoxes,(2) - neither `FUNCTAB' nor `SYMTAB' are available as elements + neither `FUNCTAB' nor `SYMTAB' is available as an element within the `SYMTAB' array. Changing `NR' and `FNR' @@ -10724,7 +10725,7 @@ are passed on to the `awk' program. (*Note Getopt Function::, for an When designing your program, you should choose options that don't conflict with `gawk''s, because it will process any options that it accepts before passing the rest of the command line on to your program. -Using `#!' with the `-E' option may help (*Note Executable Scripts::, +Using `#!' with the `-E' option may help (*note Executable Scripts::, and *note Options::,). @@ -10735,14 +10736,14 @@ File: gawk.info, Node: Pattern Action Summary, Prev: Built-in Variables, Up: * Pattern-action pairs make up the basic elements of an `awk' program. Patterns are either normal expressions, range - expressions, regexp constants, one of the special keywords - `BEGIN', `END', `BEGINFILE', `ENDFILE', or empty. The action + expressions, or regexp constants; one of the special keywords + `BEGIN', `END', `BEGINFILE', or `ENDFILE'; or empty. The action executes if the current record matches the pattern. Empty (missing) patterns match all records. - * I/O from `BEGIN' and `END' rules have certain constraints. This - is also true, only more so, for `BEGINFILE' and `ENDFILE' rules. - The latter two give you "hooks" into `gawk''s file processing, + * I/O from `BEGIN' and `END' rules has certain constraints. This is + also true, only more so, for `BEGINFILE' and `ENDFILE' rules. The + latter two give you "hooks" into `gawk''s file processing, allowing you to recover from a file that otherwise would cause a fatal error (such as a file that cannot be opened). @@ -10763,11 +10764,11 @@ File: gawk.info, Node: Pattern Action Summary, Prev: Built-in Variables, Up: iteration of a loop (or get out of a `switch'). * `next' and `nextfile' let you read the next record and start over - at the top of your program, or skip to the next input file and + at the top of your program or skip to the next input file and start over, respectively. * The `exit' statement terminates your program. When executed from - an action (or function body) it transfers control to the `END' + an action (or function body), it transfers control to the `END' statements. From an `END' statement body, it exits immediately. You may pass an optional numeric value to be used as `awk''s exit status. @@ -10869,9 +10870,9 @@ languages allow arbitrary starting and ending indices--e.g., `15 .. 27'--but the size of the array is still fixed when the array is declared.) - A contiguous array of four elements might look like the following -example, conceptually, if the element values are 8, `"foo"', `""', and -30 as shown in *note figure-array-elements::: + A contiguous array of four elements might look like *note +figure-array-elements::, conceptually, if the element values are eight, +`"foo"', `""', and 30. +---------+---------+--------+---------+ | 8 | "foo" | "" | 30 | @r{Value} @@ -10880,17 +10881,19 @@ example, conceptually, if the element values are 8, `"foo"', `""', and Figure 8.1: A contiguous array Only the values are stored; the indices are implicit from the order of -the values. Here, 8 is the value at index zero, because 8 appears in the -position with zero elements before it. +the values. Here, eight is the value at index zero, because eight +appears in the position with zero elements before it. Arrays in `awk' are different--they are "associative". This means that each array is a collection of pairs--an index and its corresponding array element value: - Index 3 Value 30 - Index 1 Value "foo" - Index 0 Value 8 - Index 2 Value "" + Index Value +------------------------ + `3' `30' + `1' `"foo"' + `0' `8' + `2' `""' The pairs are shown in jumbled order because their order is irrelevant.(1) @@ -10899,11 +10902,13 @@ irrelevant.(1) at any time. For example, suppose a tenth element is added to the array whose value is `"number ten"'. The result is: - Index 10 Value "number ten" - Index 3 Value 30 - Index 1 Value "foo" - Index 0 Value 8 - Index 2 Value "" + Index Value +------------------------------- + `10' `"number ten"' + `3' `30' + `1' `"foo"' + `0' `8' + `2' `""' Now the array is "sparse", which just means some indices are missing. It has elements 0-3 and 10, but doesn't have elements 4, 5, 6, 7, 8, or @@ -10914,17 +10919,19 @@ have to be positive integers. Any number, or even a string, can be an index. For example, the following is an array that translates words from English to French: - Index "dog" Value "chien" - Index "cat" Value "chat" - Index "one" Value "un" - Index 1 Value "un" + Index Value +------------------------ + `"dog"' `"chien"' + `"cat"' `"chat"' + `"one"' `"un"' + `1' `"un"' Here we decided to translate the number one in both spelled-out and numeric form--thus illustrating that a single array can have both numbers and strings as indices. (In fact, array subscripts are always strings. There are some subtleties to how numbers work when used as array subscripts; this is discussed in more detail in *note Numeric -Array Subscripts::.) Here, the number `1' isn't double quoted, because +Array Subscripts::.) Here, the number `1' isn't double-quoted, because `awk' automatically converts it to a string. The value of `IGNORECASE' has no effect upon array subscripting. @@ -10948,7 +10955,7 @@ File: gawk.info, Node: Reference to Elements, Next: Assigning Elements, Prev: ----------------------------------- The principal way to use an array is to refer to one of its elements. -An array reference is an expression as follows: +An "array reference" is an expression as follows: ARRAY[INDEX-EXPRESSION] @@ -10956,8 +10963,8 @@ Here, ARRAY is the name of an array. The expression INDEX-EXPRESSION is the index of the desired element of the array. The value of the array reference is the current value of that array -element. For example, `foo[4.3]' is an expression for the element of -array `foo' at index `4.3'. +element. For example, `foo[4.3]' is an expression referencing the +element of array `foo' at index `4.3'. A reference to an array element that has no recorded value yields a value of `""', the null string. This includes elements that have not @@ -11024,7 +11031,7 @@ File: gawk.info, Node: Array Example, Next: Scanning an Array, Prev: Assignin The following program takes a list of lines, each beginning with a line number, and prints them out in order of line number. The line numbers -are not in order when they are first read--instead they are scrambled. +are not in order when they are first read--instead, they are scrambled. This program sorts the lines by making an array using the line numbers as subscripts. The program then prints out the lines in sorted order of their numbers. It is a very simple program and gets confused upon @@ -11095,7 +11102,7 @@ has previously used, with the variable VAR set to that index. The following program uses this form of the `for' statement. The first rule scans the input records and notes which words appear (at least once) in the input, by storing a one into the array `used' with -the word as index. The second rule scans the elements of `used' to +the word as the index. The second rule scans the elements of `used' to find all the distinct words that appear in the input. It prints each word that is more than 10 characters long and also prints the number of such words. *Note String Functions::, for more information on the @@ -11178,7 +11185,7 @@ internal implementation of arrays and will vary from one version of Often, though, you may wish to do something simple, such as "traverse the array by comparing the indices in ascending order," or "traverse the array by comparing the values in descending order." -`gawk' provides two mechanisms which give you this control. +`gawk' provides two mechanisms that give you this control: * Set `PROCINFO["sorted_in"]' to one of a set of predefined values. We describe this now. @@ -11226,22 +11233,26 @@ available: which `gawk' uses internally to perform the sorting. `"@ind_str_desc"' - String indices ordered from high to low. + Like `"@ind_str_asc"', but the string indices are ordered from + high to low. `"@ind_num_desc"' - Numeric indices ordered from high to low. + Like `"@ind_num_asc"', but the numeric indices are ordered from + high to low. `"@val_type_desc"' - Element values, based on type, ordered from high to low. - Subarrays, if present, come out first. + Like `"@val_type_asc"', but the element values, based on type, are + ordered from high to low. Subarrays, if present, come out first. `"@val_str_desc"' - Element values, treated as strings, ordered from high to low. - Subarrays, if present, come out first. + Like `"@val_str_asc"', but the element values, treated as strings, + are ordered from high to low. Subarrays, if present, come out + first. `"@val_num_desc"' - Element values, treated as numbers, ordered from high to low. - Subarrays, if present, come out first. + Like `"@val_num_asc"', but the element values, treated as numbers, + are ordered from high to low. Subarrays, if present, come out + first. The array traversal order is determined before the `for' loop starts to run. Changing `PROCINFO["sorted_in"]' in the loop body does not @@ -11427,8 +11438,8 @@ deleting elements in an array: This example removes all the elements from the array `frequencies'. Once an element is deleted, a subsequent `for' statement to scan the -array does not report that element and the `in' operator to check for -the presence of that element returns zero (i.e., false): +array does not report that element and using the `in' operator to check +for the presence of that element returns zero (i.e., false): delete foo[4] if (4 in foo) @@ -11631,7 +11642,7 @@ two-element subarray at index `1' of the main array `a': This simulates a true two-dimensional array. Each subarray element can contain another subarray as a value, which in turn can hold other arrays as well. In this way, you can create arrays of three or more -dimensions. The indices can be any `awk' expression, including scalars +dimensions. The indices can be any `awk' expressions, including scalars separated by commas (i.e., a regular `awk' simulated multidimensional subscript). So the following is valid in `gawk': @@ -11640,7 +11651,7 @@ subscript). So the following is valid in `gawk': Each subarray and the main array can be of different length. In fact, the elements of an array or its subarray do not all have to have the same type. This means that the main array and any of its subarrays -can be non-rectangular, or jagged in structure. You can assign a scalar +can be nonrectangular, or jagged in structure. You can assign a scalar value to the index `4' of the main array `a', even though `a[1]' is itself an array and not a scalar: @@ -11658,8 +11669,8 @@ the element at that index: a[4][5][6][7] = "An element in a four-dimensional array" This removes the scalar value from index `4' and then inserts a -subarray of subarray of subarray containing a scalar. You can also -delete an entire subarray or subarray of subarrays: +three-level nested subarray containing a scalar. You can also delete an +entire subarray or subarray of subarrays: delete a[4][5] a[4][5] = "An element in subarray a[4]" @@ -11667,7 +11678,7 @@ delete an entire subarray or subarray of subarrays: But recall that you can not delete the main array `a' and then use it as a scalar. - The built-in functions which take array arguments can also be used + The built-in functions that take array arguments can also be used with subarrays. For example, the following code fragment uses `length()' (*note String Functions::) to determine the number of elements in the main array `a' and its subarrays: @@ -11688,7 +11699,7 @@ be nested to scan all the elements of an array of arrays if it is rectangular in structure. In order to print the contents (scalar values) of a two-dimensional array of arrays (i.e., in which each first-level element is itself an array, not necessarily of the same -length) you could use the following code: +length), you could use the following code: for (i in array) for (j in array[i]) @@ -11770,9 +11781,9 @@ File: gawk.info, Node: Arrays Summary, Prev: Arrays of Arrays, Up: Arrays of `awk'. * Standard `awk' simulates multidimensional arrays by separating - subscript values with a comma. The values are concatenated into a + subscript values with commas. The values are concatenated into a single string, separated by the value of `SUBSEP'. The fact that - such a subscript was created in this way is not retained; thus + such a subscript was created in this way is not retained; thus, changing `SUBSEP' may have unexpected consequences. You can use `(SUB1, SUB2, ...) in ARRAY' to see if such a multidimensional subscript exists in ARRAY. @@ -11780,7 +11791,7 @@ File: gawk.info, Node: Arrays Summary, Prev: Arrays of Arrays, Up: Arrays * `gawk' provides true arrays of arrays. You use a separate set of square brackets for each dimension in such an array: `data[row][col]', for example. Array elements may thus be either - scalar values (number or string) or another array. + scalar values (number or string) or other arrays. * Use the `isarray()' built-in function to determine if an array element is itself a subarray. @@ -11800,7 +11811,9 @@ internationalize and localize programs. Besides the built-in functions, `awk' has provisions for writing new functions that the rest of a program can use. The second half of this -major node describes these "user-defined" functions. +major node describes these "user-defined" functions. Finally, we +explore indirect function calls, a `gawk'-specific extension that lets +you determine at runtime what function is to be called. * Menu: @@ -11812,7 +11825,7 @@ major node describes these "user-defined" functions. File: gawk.info, Node: Built-in, Next: User-defined, Up: Functions -9.1 Built-In Functions +9.1 Built-in Functions ====================== "Built-in" functions are always available for your `awk' program to @@ -11837,7 +11850,7 @@ for your convenience. File: gawk.info, Node: Calling Built-in, Next: Numeric Functions, Up: Built-in -9.1.1 Calling Built-In Functions +9.1.1 Calling Built-in Functions -------------------------------- To call one of `awk''s built-in functions, write the name of the @@ -11874,9 +11887,10 @@ are evaluated from left to right or from right to left. For example: j = atan2(++i, i *= 2) If the order of evaluation is left to right, then `i' first becomes -6, and then 12, and `atan2()' is called with the two arguments 6 and -12. But if the order of evaluation is right to left, `i' first becomes -10, then 11, and `atan2()' is called with the two arguments 11 and 10. +six, and then 12, and `atan2()' is called with the two arguments six +and 12. But if the order of evaluation is right to left, `i' first +becomes 10, then 11, and `atan2()' is called with the two arguments 11 +and 10. File: gawk.info, Node: Numeric Functions, Next: String Functions, Prev: Calling Built-in, Up: Built-in @@ -11932,7 +11946,7 @@ brackets ([ ]): Often random integers are needed instead. Following is a user-defined function that can be used to obtain a random - non-negative integer less than N: + nonnegative integer less than N: function randint(n) { @@ -12022,7 +12036,7 @@ File: gawk.info, Node: String Functions, Next: I/O Functions, Prev: Numeric F The functions in this minor node look at or change the text of one or more strings. - `gawk' understands locales (*note Locales::), and does all string + `gawk' understands locales (*note Locales::) and does all string processing in terms of _characters_, not _bytes_. This distinction is particularly important to understand for locales where one character may be represented by multiple bytes. Thus, for example, `length()' @@ -12093,7 +12107,7 @@ Options::): a[2] = "de" a[3] = "sac" - The `asorti()' function works similarly to `asort()', however, the + The `asorti()' function works similarly to `asort()'; however, the _indices_ are sorted, instead of the values. Thus, in the previous example, starting with the same initial set of indices and values in `a', calling `asorti(a)' would yield: @@ -12181,7 +12195,7 @@ Options::): With BWK `awk' and `gawk', it is a fatal error to use a regexp constant for FIND. Other implementations allow it, simply treating the regexp constant as an expression meaning `$0 ~ - /regexp/'. (d.c.). + /regexp/'. (d.c.) `length('[STRING]`)' Return the number of characters in STRING. If STRING is a number, @@ -12225,9 +12239,9 @@ Options::): `match(STRING, REGEXP' [`, ARRAY']`)' Search STRING for the longest, leftmost substring matched by the - regular expression, REGEXP and return the character position - (index) at which that substring begins (one, if it starts at the - beginning of STRING). If no match is found, return zero. + regular expression REGEXP and return the character position (index) + at which that substring begins (one, if it starts at the beginning + of STRING). If no match is found, return zero. The REGEXP argument may be either a regexp constant (`/'...`/') or a string constant (`"'...`"'). In the latter case, the string is @@ -12235,7 +12249,7 @@ Options::): discussion of the difference between the two forms, and the implications for writing your program correctly. - The order of the first two arguments is backwards from most other + The order of the first two arguments is the opposite of most other string functions that work with regular expressions, such as `sub()' and `gsub()'. It might help to remember that for `match()', the order is the same as for the `~' operator: `STRING @@ -12302,8 +12316,8 @@ Options::): There may not be subscripts for the start and index for every parenthesized subexpression, because they may not all have matched - text; thus they should be tested for with the `in' operator (*note - Reference to Elements::). + text; thus, they should be tested for with the `in' operator + (*note Reference to Elements::). The ARRAY argument to `match()' is a `gawk' extension. In compatibility mode (*note Options::), using a third argument is a @@ -12336,19 +12350,19 @@ Options::): FIELDSEP, is a regexp describing where to split STRING (much as `FS' can be a regexp describing where to split input records). If FIELDSEP is omitted, the value of `FS' is used. `split()' returns - the number of elements created. SEPS is a `gawk' extension with + the number of elements created. SEPS is a `gawk' extension, with `SEPS[I]' being the separator string between `ARRAY[I]' and - `ARRAY[I+1]'. If FIELDSEP is a single space then any leading + `ARRAY[I+1]'. If FIELDSEP is a single space, then any leading whitespace goes into `SEPS[0]' and any trailing whitespace goes - into `SEPS[N]' where N is the return value of `split()' (i.e., the - number of elements in ARRAY). + into `SEPS[N]', where N is the return value of `split()' (i.e., + the number of elements in ARRAY). The `split()' function splits strings into pieces in a manner similar to the way input lines are split into fields. For example: split("cul-de-sac", a, "-", seps) - splits the string `cul-de-sac' into three fields using `-' as the + splits the string `"cul-de-sac"' into three fields using `-' as the separator. It sets the contents of the array `a' as follows: a[1] = "cul" @@ -12365,17 +12379,18 @@ Options::): As with input field-splitting, when the value of FIELDSEP is `" "', leading and trailing whitespace is ignored in values assigned to the elements of ARRAY but not in SEPS, and the elements - are separated by runs of whitespace. Also, as with input - field-splitting, if FIELDSEP is the null string, each individual + are separated by runs of whitespace. Also, as with input field + splitting, if FIELDSEP is the null string, each individual character in the string is split into its own array element. (c.e.) Note, however, that `RS' has no effect on the way `split()' works. - Even though `RS = ""' causes newline to also be an input field - separator, this does not affect how `split()' splits strings. + Even though `RS = ""' causes the newline character to also be an + input field separator, this does not affect how `split()' splits + strings. Modern implementations of `awk', including `gawk', allow the third - argument to be a regexp constant (`/abc/') as well as a string. + argument to be a regexp constant (`/'...`/') as well as a string. (d.c.) The POSIX standard allows this as well. *Note Computed Regexps::, for a discussion of the difference between using a string constant or a regexp constant, and the implications for @@ -12476,7 +12491,7 @@ Options::): { sub(/\|/, "\\&"); print } As mentioned, the third argument to `sub()' must be a variable, - field or array element. Some versions of `awk' allow the third + field, or array element. Some versions of `awk' allow the third argument to be an expression that is not an lvalue. In such a case, `sub()' still searches for the pattern and returns zero or one, but the result of the substitution (if any) is thrown away @@ -12601,11 +12616,11 @@ example, `"a\qb"' is treated as `"aqb"'. At the runtime level, the various functions handle sequences of `\' and `&' differently. The situation is (sadly) somewhat complex. -Historically, the `sub()' and `gsub()' functions treated the two -character sequence `\&' specially; this sequence was replaced in the -generated text with a single `&'. Any other `\' within the REPLACEMENT -string that did not precede an `&' was passed through unchanged. This -is illustrated in *note table-sub-escapes::. +Historically, the `sub()' and `gsub()' functions treated the +two-character sequence `\&' specially; this sequence was replaced in +the generated text with a single `&'. Any other `\' within the +REPLACEMENT string that did not precede an `&' was passed through +unchanged. This is illustrated in *note table-sub-escapes::. You type `sub()' sees `sub()' generates ------- --------- -------------- @@ -12620,10 +12635,10 @@ is illustrated in *note table-sub-escapes::. Table 9.1: Historical escape sequence processing for `sub()' and `gsub()' -This table shows both the lexical-level processing, where an odd number -of backslashes becomes an even number at the runtime level, as well as -the runtime processing done by `sub()'. (For the sake of simplicity, -the rest of the following tables only show the case of even numbers of +This table shows the lexical-level processing, where an odd number of +backslashes becomes an even number at the runtime level, as well as the +runtime processing done by `sub()'. (For the sake of simplicity, the +rest of the following tables only show the case of even numbers of backslashes entered at the lexical level.) The problem with the historical approach is that there is no way to @@ -12647,10 +12662,10 @@ This is shown in *note table-sub-proposed::. `\\q' `\q' A literal `\q' `\\\\' `\\' `\\' -Table 9.2: GNU `awk' rules for `sub()' and backslash +Table 9.2: `gawk' rules for `sub()' and backslash In a nutshell, at the runtime level, there are now three special -sequences of characters (`\\\&', `\\&' and `\&') whereas historically +sequences of characters (`\\\&', `\\&', and `\&') whereas historically there was only one. However, as in the historical case, any `\' that is not part of one of these three sequences is not special and appears in the output literally. @@ -12680,7 +12695,7 @@ Table 9.3: POSIX rules for `sub()' and `gsub()' `\\\\' is seen as `\\' and produces `\' instead of `\\'. Starting with version 3.1.4, `gawk' followed the POSIX rules when -`--posix' is specified (*note Options::). Otherwise, it continued to +`--posix' was specified (*note Options::). Otherwise, it continued to follow the proposed rules, as that had been its behavior for many years. When version 4.0.0 was released, the `gawk' maintainer made the @@ -12707,9 +12722,9 @@ the `\' does not, as shown in *note table-gensub-escapes::. Table 9.4: Escape sequence processing for `gensub()' - Because of the complexity of the lexical and runtime level processing -and the special cases for `sub()' and `gsub()', we recommend the use of -`gawk' and `gensub()' when you have to do substitutions. + Because of the complexity of the lexical- and runtime-level +processing and the special cases for `sub()' and `gsub()', we recommend +the use of `gawk' and `gensub()' when you have to do substitutions. ---------- Footnotes ---------- @@ -12736,10 +12751,10 @@ parameters are enclosed in square brackets ([ ]): When closing a coprocess, it is occasionally useful to first close one end of the two-way pipe and then to close the other. This is done by providing a second argument to `close()'. This second - argument should be one of the two string values `"to"' or `"from"', - indicating which end of the pipe to close. Case in the string does - not matter. *Note Two-way I/O::, which discusses this feature in - more detail and gives an example. + argument (HOW) should be one of the two string values `"to"' or + `"from"', indicating which end of the pipe to close. Case in the + string does not matter. *Note Two-way I/O::, which discusses this + feature in more detail and gives an example. Note that the second argument to `close()' is a `gawk' extension; it is not available in compatibility mode (*note Options::). @@ -12757,7 +12772,7 @@ parameters are enclosed in square brackets ([ ]): sometimes it is necessary to force a program to "flush" its buffers (i.e., write the information to its destination, even if a buffer is not full). This is the purpose of the `fflush()' - function--`gawk' also buffers its output and the `fflush()' + function--`gawk' also buffers its output, and the `fflush()' function forces `gawk' to flush its buffers. Brian Kernighan added `fflush()' to his `awk' in April 1992. For @@ -12774,16 +12789,17 @@ parameters are enclosed in square brackets ([ ]): output files and pipes if the argument was the null string. This was changed in order to be compatible with Brian Kernighan's `awk', in the hope that standardizing this - feature in POSIX would then be easier (which indeed helped). + feature in POSIX would then be easier (which indeed proved to + be the case). With `gawk', you can use `fflush("/dev/stdout")' if you wish to flush only the standard output. `fflush()' returns zero if the buffer is successfully flushed; - otherwise, it returns non-zero. (`gawk' returns -1.) In the case - where all buffers are flushed, the return value is zero only if - all buffers were flushed successfully. Otherwise, it is -1, and - `gawk' warns about the problem FILENAME. + otherwise, it returns a nonzero value. (`gawk' returns -1.) In + the case where all buffers are flushed, the return value is zero + only if all buffers were flushed successfully. Otherwise, it is + -1, and `gawk' warns about the problem FILENAME. `gawk' also issues a warning message if you attempt to flush a file or pipe that was opened for reading (such as with `getline'), @@ -12792,9 +12808,9 @@ parameters are enclosed in square brackets ([ ]): Interactive Versus Noninteractive Buffering - As a side point, buffering issues can be even more confusing, - depending upon whether your program is "interactive" (i.e., - communicating with a user sitting at a keyboard).(1) + As a side point, buffering issues can be even more confusing if + your program is "interactive" (i.e., communicating with a user + sitting at a keyboard).(1) Interactive programs generally "line buffer" their output (i.e., they write out every line). Noninteractive programs wait until @@ -12823,7 +12839,7 @@ parameters are enclosed in square brackets ([ ]): shot. `system(COMMAND)' - Execute the operating-system command COMMAND and then return to + Execute the operating system command COMMAND and then return to the `awk' program. Return COMMAND's exit status. For example, if the following fragment of code is put in your `awk' @@ -12912,14 +12928,14 @@ File: gawk.info, Node: Time Functions, Next: Bitwise Functions, Prev: I/O Fun `awk' programs are commonly used to process log files containing timestamp information, indicating when a particular log record was -written. Many programs log their timestamp in the form returned by the -`time()' system call, which is the number of seconds since a particular -epoch. On POSIX-compliant systems, it is the number of seconds since -1970-01-01 00:00:00 UTC, not counting leap seconds.(1) All known -POSIX-compliant systems support timestamps from 0 through 2^31 - 1, -which is sufficient to represent times through 2038-01-19 03:14:07 UTC. -Many systems support a wider range of timestamps, including negative -timestamps that represent times before the epoch. +written. Many programs log their timestamps in the form returned by +the `time()' system call, which is the number of seconds since a +particular epoch. On POSIX-compliant systems, it is the number of +seconds since 1970-01-01 00:00:00 UTC, not counting leap seconds.(1) +All known POSIX-compliant systems support timestamps from 0 through +2^31 - 1, which is sufficient to represent times through 2038-01-19 +03:14:07 UTC. Many systems support a wider range of timestamps, +including negative timestamps that represent times before the epoch. In order to make it easier to process such log files and to produce useful reports, `gawk' provides the following functions for working @@ -12942,9 +12958,9 @@ enclosed in square brackets ([ ]): specified; for example, an hour of -1 means 1 hour before midnight. The origin-zero Gregorian calendar is assumed, with year 0 preceding year 1 and year -1 preceding year 0. The time is - assumed to be in the local timezone. If the daylight-savings flag - is positive, the time is assumed to be daylight savings time; if - zero, the time is assumed to be standard time; and if negative + assumed to be in the local time zone. If the daylight-savings + flag is positive, the time is assumed to be daylight savings time; + if zero, the time is assumed to be standard time; and if negative (the default), `mktime()' attempts to determine whether daylight savings time is in effect for the specified time. @@ -13085,23 +13101,23 @@ the following date format specifications: The weekday as a decimal number (1-7). Monday is day one. `%U' - The week number of the year (the first Sunday as the first day of - week one) as a decimal number (00-53). + The week number of the year (with the first Sunday as the first + day of week one) as a decimal number (00-53). `%V' - The week number of the year (the first Monday as the first day of - week one) as a decimal number (01-53). The method for determining - the week number is as specified by ISO 8601. (To wit: if the week - containing January 1 has four or more days in the new year, then - it is week one; otherwise it is week 53 of the previous year and - the next week is week one.) + The week number of the year (with the first Monday as the first + day of week one) as a decimal number (01-53). The method for + determining the week number is as specified by ISO 8601. (To wit: + if the week containing January 1 has four or more days in the new + year, then it is week one; otherwise it is week 53 of the previous + year and the next week is week one.) `%w' The weekday as a decimal number (0-6). Sunday is day zero. `%W' - The week number of the year (the first Monday as the first day of - week one) as a decimal number (00-53). + The week number of the year (with the first Monday as the first + day of week one) as a decimal number (00-53). `%x' The locale's "appropriate" date representation. (This is `%A %B @@ -13118,8 +13134,8 @@ the following date format specifications: The full year as a decimal number (e.g., 2015). `%z' - The timezone offset in a +HHMM format (e.g., the format necessary - to produce RFC 822/RFC 1036 date headers). + The time zone offset in a `+HHMM' format (e.g., the format + necessary to produce RFC 822/RFC 1036 date headers). `%Z' The time zone name or abbreviation; no characters if no time zone @@ -13236,7 +13252,7 @@ each successive pair of bits in the operands. Three common operations are bitwise AND, OR, and XOR. The operations are described in *note table-bitwise-ops::. - Bit Operator + Bit operator | AND | OR | XOR |--+--+--+--+--+-- Operands | 0 | 1 | 0 | 1 | 0 | 1 @@ -13292,7 +13308,7 @@ paragraph, don't worry about it.) Here is a user-defined function (*note User-defined::) that illustrates the use of these functions: - # bits2str --- turn a byte into readable 1's and 0's + # bits2str --- turn a byte into readable ones and zeros function bits2str(bits, data, mask) { @@ -13331,13 +13347,14 @@ This program produces the following output when run: -| lshift(0x99, 2) = 0x264 = 0000001001100100 -| rshift(0x99, 2) = 0x26 = 00100110 - The `bits2str()' function turns a binary number into a string. The -number `1' represents a binary value where the rightmost bit is set to -1. Using this mask, the function repeatedly checks the rightmost bit. -ANDing the mask with the value indicates whether the rightmost bit is 1 -or not. If so, a `"1"' is concatenated onto the front of the string. -Otherwise, a `"0"' is added. The value is then shifted right by one -bit and the loop continues until there are no more 1 bits. + The `bits2str()' function turns a binary number into a string. +Initializing `mask' to one creates a binary value where the rightmost +bit is set to one. Using this mask, the function repeatedly checks the +rightmost bit. ANDing the mask with the value indicates whether the +rightmost bit is one or not. If so, a `"1"' is concatenated onto the +front of the string. Otherwise, a `"0"' is added. The value is then +shifted right by one bit and the loop continues until there are no more +one bits. If the initial value is zero, it returns a simple `"0"'. Otherwise, at the end, it pads the value with zeros to represent multiples of @@ -13350,9 +13367,9 @@ Nondecimal-numbers::), and then demonstrates the results of the ---------- Footnotes ---------- - (1) This example shows that 0's come in on the left side. For + (1) This example shows that zeros come in on the left side. For `gawk', this is always true, but in some languages, it's possible to -have the left side fill with 1's. +have the left side fill with ones. File: gawk.info, Node: Type Functions, Next: I18N Functions, Prev: Bitwise Functions, Up: Built-in @@ -13366,7 +13383,7 @@ traverses every element of an array of arrays (*note Arrays of Arrays::). `isarray(X)' - Return a true value if X is an array. Otherwise return false. + Return a true value if X is an array. Otherwise, return false. `isarray()' is meant for use in two circumstances. The first is when traversing a multidimensional array: you can test if an element is @@ -13413,8 +13430,8 @@ brackets ([ ]): Return the plural form used for NUMBER of the translation of STRING1 and STRING2 in text domain DOMAIN for locale category CATEGORY. STRING1 is the English singular variant of a message, - and STRING2 the English plural variant of the same message. The - default value for DOMAIN is the current value of `TEXTDOMAIN'. + and STRING2 is the English plural variant of the same message. + The default value for DOMAIN is the current value of `TEXTDOMAIN'. The default value for CATEGORY is `"LC_MESSAGES"'. @@ -13443,7 +13460,7 @@ File: gawk.info, Node: Definition Syntax, Next: Function Example, Up: User-de 9.2.1 Function Definition Syntax -------------------------------- - It's entirely fair to say that the `awk' syntax for local variable + It's entirely fair to say that the awk syntax for local variable definitions is appallingly awful. -- Brian Kernighan Definitions of functions can appear anywhere between the rules of an @@ -13473,17 +13490,22 @@ the argument names are used to hold the argument values given in the call. A function cannot have two parameters with the same name, nor may it -have a parameter with the same name as the function itself. In -addition, according to the POSIX standard, function parameters cannot -have the same name as one of the special predefined variables (*note -Built-in Variables::). Not all versions of `awk' enforce this -restriction. +have a parameter with the same name as the function itself. + + CAUTION: According to the POSIX standard, function parameters + cannot have the same name as one of the special predefined + variables (*note Built-in Variables::), nor may a function + parameter have the same name as another function. + + Not all versions of `awk' enforce these restrictions. `gawk' + always enforces the first restriction. With `--posix' (*note + Options::), it also enforces the second restriction. Local variables act like the empty string if referenced where a string value is required, and like zero if referenced where a numeric -value is required. This is the same as regular variables that have -never been assigned a value. (There is more to understand about local -variables; *note Dynamic Typing::.) +value is required. This is the same as the behavior of regular +variables that have never been assigned a value. (There is more to +understand about local variables; *note Dynamic Typing::.) The BODY-OF-FUNCTION consists of `awk' statements. It is the most important part of the definition, because it says what the function @@ -13512,9 +13534,9 @@ function is supposed to be used. variable values hide, or "shadow", any variables of the same names used in the rest of the program. The shadowed variables are not accessible in the function definition, because there is no way to name them while -their names have been taken away for the local variables. All other -variables used in the `awk' program can be referenced or set normally -in the function's body. +their names have been taken away for the arguments and local variables. +All other variables used in the `awk' program can be referenced or set +normally in the function's body. The arguments and local variables last only as long as the function body is executing. Once the body finishes, you can once again access @@ -13567,7 +13589,7 @@ takes a number and prints it in a specific format: printf "%6.3g\n", num } -To illustrate, here is an `awk' rule that uses our `myprint' function: +To illustrate, here is an `awk' rule that uses our `myprint()' function: $3 > 0 { myprint($3) } @@ -13596,13 +13618,13 @@ extra whitespace signifies the start of the local variable list): When working with arrays, it is often necessary to delete all the elements in an array and start over with a new list of elements (*note Delete::). Instead of having to repeat this loop everywhere that you -need to clear out an array, your program can just call `delarray'. +need to clear out an array, your program can just call `delarray()'. (This guarantees portability. The use of `delete ARRAY' to delete the contents of an entire array is a relatively recent(1) addition to the POSIX standard.) The following is an example of a recursive function. It takes a -string as an input parameter and returns the string in backwards order. +string as an input parameter and returns the string in reverse order. Recursive functions must always have a test that stops the recursion. In this case, the recursion terminates when the input string is already empty: @@ -13693,14 +13715,14 @@ File: gawk.info, Node: Variable Scope, Next: Pass By Value/Reference, Prev: C 9.2.3.2 Controlling Variable Scope .................................. -Unlike many languages, there is no way to make a variable local to a +Unlike in many languages, there is no way to make a variable local to a `{' ... `}' block in `awk', but you can make a variable local to a function. It is good practice to do so whenever a variable is needed only in that function. To make a variable local to a function, simply declare the variable as an argument after the actual function arguments (*note Definition -Syntax::). Look at the following example where variable `i' is a +Syntax::). Look at the following example, where variable `i' is a global variable used by both functions `foo()' and `bar()': function bar() @@ -13736,7 +13758,7 @@ variable instance: foo's i=3 top's i=3 - If you want `i' to be local to both `foo()' and `bar()' do as + If you want `i' to be local to both `foo()' and `bar()', do as follows (the extra space before `i' is a coding convention to indicate that `i' is a local variable, not an argument): @@ -13818,7 +13840,7 @@ explicitly whether the arguments are passed "by value" or "by reference". Instead, the passing convention is determined at runtime when the -function is called according to the following rule: if the argument is +function is called, according to the following rule: if the argument is an array variable, then it is passed by reference. Otherwise, the argument is passed by value. @@ -13876,7 +13898,7 @@ function _are_ visible outside that function. stores `"two"' in the second element of `a'. Some `awk' implementations allow you to call a function that has not -been defined. They only report a problem at runtime when the program +been defined. They only report a problem at runtime, when the program actually tries to call the function. For example: BEGIN { @@ -13921,15 +13943,15 @@ undefined, and therefore, unpredictable. In practice, though, all versions of `awk' simply return the null string, which acts like zero if used in a numeric context. - A `return' statement with no value expression is assumed at the end -of every function definition. So if control reaches the end of the -function body, then technically, the function returns an unpredictable + A `return' statement without an EXPRESSION is assumed at the end of +every function definition. So, if control reaches the end of the +function body, then technically the function returns an unpredictable value. In practice, it returns the empty string. `awk' does _not_ warn you if you use the return value of such a function. Sometimes, you want to write a function for what it does, not for what it returns. Such a function corresponds to a `void' function in -C, C++ or Java, or to a `procedure' in Ada. Thus, it may be +C, C++, or Java, or to a `procedure' in Ada. Thus, it may be appropriate to not return any value; simply bear in mind that you should not be using the return value of such a function. @@ -14035,13 +14057,13 @@ you can specify the name of the function to call as a string variable, and then call the function. Let's look at an example. Suppose you have a file with your test scores for the classes you -are taking. The first field is the class name. The following fields -are the functions to call to process the data, up to a "marker" field +are taking, and you wish to get the sum and the average of your test +scores. The first field is the class name. The following fields are +the functions to call to process the data, up to a "marker" field `data:'. Following the marker, to the end of the record, are the various numeric test scores. - Here is the initial file; you wish to get the sum and the average of -your test scores: + Here is the initial file: Biology_101 sum average data: 87.0 92.4 78.5 94.9 Chemistry_305 sum average data: 75.2 98.3 94.7 88.2 @@ -14099,9 +14121,9 @@ using indirect function calls: return ret } - These two functions expect to work on fields; thus the parameters + These two functions expect to work on fields; thus, the parameters `first' and `last' indicate where in the fields to start and end. -Otherwise they perform the expected computations and are not unusual: +Otherwise, they perform the expected computations and are not unusual: # For each record, print the class name and the requested statistics { @@ -14154,18 +14176,19 @@ to force it to be a string value.) may think at first. The C and C++ languages provide "function pointers," which are a mechanism for calling a function chosen at runtime. One of the most well-known uses of this ability is the C -`qsort()' function, which sorts an array using the famous "quick sort" +`qsort()' function, which sorts an array using the famous "quicksort" algorithm (see the Wikipedia article -(http://en.wikipedia.org/wiki/Quick_sort) for more information). To -use this function, you supply a pointer to a comparison function. This +(http://en.wikipedia.org/wiki/Quicksort) for more information). To use +this function, you supply a pointer to a comparison function. This mechanism allows you to sort arbitrary data in an arbitrary fashion. We can do something similar using `gawk', like this: # quicksort.awk --- Quicksort algorithm, with user-supplied # comparison function - # quicksort --- C.A.R. Hoare's quick sort algorithm. See Wikipedia - # or almost any algorithms or computer science text + + # quicksort --- C.A.R. Hoare's quicksort algorithm. See Wikipedia + # or almost any algorithms or computer science text. function quicksort(data, left, right, less_than, i, last) { @@ -14194,7 +14217,7 @@ mechanism allows you to sort arbitrary data in an arbitrary fashion. The `quicksort()' function receives the `data' array, the starting and ending indices to sort (`left' and `right'), and the name of a function that performs a "less than" comparison. It then implements -the quick sort algorithm. +the quicksort algorithm. To make use of the sorting function, we return to our previous example. The first thing to do is write some comparison functions: @@ -14434,7 +14457,7 @@ File: gawk.info, Node: Library Functions, Next: Sample Programs, Prev: Functi *note User-defined::, describes how to write your own `awk' functions. Writing functions is important, because it allows you to encapsulate algorithms and program tasks in a single place. It simplifies -programming, making program development more manageable, and making +programming, making program development more manageable and making programs more readable. In their seminal 1976 book, `Software Tools',(1) Brian Kernighan and @@ -14539,7 +14562,7 @@ often use variable names like these for their own purposes. The example programs shown in this major node all start the names of their private variables with an underscore (`_'). Users generally don't use leading underscores in their variable names, so this -convention immediately decreases the chances that the variable name +convention immediately decreases the chances that the variable names will be accidentally shared with the user's program. In addition, several of the library functions use a prefix that helps @@ -14552,7 +14575,7 @@ for private function names.(1) As a final note on variable naming, if a function makes global variables available for use by a main program, it is a good convention -to start that variable's name with a capital letter--for example, +to start those variables' names with a capital letter--for example, `getopt()''s `Opterr' and `Optind' variables (*note Getopt Function::). The leading capital letter indicates that it is global, while the fact that the variable name is not all capital letters indicates that the @@ -14560,7 +14583,7 @@ variable is not one of `awk''s predefined variables, such as `FS'. It is also important that _all_ variables in library functions that do not need to save state are, in fact, declared local.(2) If this is -not done, the variable could accidentally be used in the user's +not done, the variables could accidentally be used in the user's program, leading to bugs that are very difficult to track down: function lib_func(x, y, l1, l2) @@ -14738,7 +14761,7 @@ for use in printing the diagnostic message. This is not possible in `awk', so this `assert()' function also requires a string version of the condition that is being tested. Following is the function: - # assert --- assert that a condition is true. Otherwise exit. + # assert --- assert that a condition is true. Otherwise, exit. function assert(condition, string) { @@ -14759,7 +14782,7 @@ the condition that is being tested. Following is the function: false, it prints a message to standard error, using the `string' parameter to describe the failed condition. It then sets the variable `_assert_exit' to one and executes the `exit' statement. The `exit' -statement jumps to the `END' rule. If the `END' rules finds +statement jumps to the `END' rule. If the `END' rule finds `_assert_exit' to be true, it exits immediately. The purpose of the test in the `END' rule is to keep any other `END' @@ -14974,9 +14997,9 @@ the strings in an array into one long string. The following function, `join()', accomplishes this task. It is used later in several of the application programs (*note Sample Programs::). - Good function design is important; this function needs to be general -but it should also have a reasonable default behavior. It is called -with an array as well as the beginning and ending indices of the + Good function design is important; this function needs to be +general, but it should also have a reasonable default behavior. It is +called with an array as well as the beginning and ending indices of the elements in the array to be merged. This assumes that the array indices are numeric--a reasonable assumption, as the array was likely created with `split()' (*note String Functions::): @@ -15095,7 +15118,7 @@ optional timestamp value to use instead of the current time. File: gawk.info, Node: Readfile Function, Next: Shell Quoting, Prev: Getlocaltime Function, Up: General Functions -10.2.8 Reading a Whole File At Once +10.2.8 Reading a Whole File at Once ----------------------------------- Often, it is convenient to have the entire contents of a file available @@ -15137,13 +15160,13 @@ reads the entire contents of the named file in one shot: It works by setting `RS' to `^$', a regular expression that will never match if the file has contents. `gawk' reads data from the file -into `tmp' attempting to match `RS'. The match fails after each read, +into `tmp', attempting to match `RS'. The match fails after each read, but fails quickly, such that `gawk' fills `tmp' with the entire contents of the file. (*Note Records::, for information on `RT' and `RS'.) In the case that `file' is empty, the return value is the null -string. Thus calling code may use something like: +string. Thus, calling code may use something like: contents = readfile("/some/path") if (length(contents) == 0) @@ -15233,8 +15256,9 @@ File: gawk.info, Node: Filetrans Function, Next: Rewind Function, Up: Data Fi The `BEGIN' and `END' rules are each executed exactly once, at the beginning and end of your `awk' program, respectively (*note BEGIN/END::). We (the `gawk' authors) once had a user who mistakenly -thought that the `BEGIN' rule is executed at the beginning of each data -file and the `END' rule is executed at the end of each data file. +thought that the `BEGIN' rules were executed at the beginning of each +data file and the `END' rules were executed at the end of each data +file. When informed that this was not the case, the user requested that we add new special patterns to `gawk', named `BEGIN_FILE' and `END_FILE', @@ -15268,7 +15292,7 @@ does so _portably_; this works with any implementation of `awk': This file must be loaded before the user's "main" program, so that the rule it supplies is executed first. - This rule relies on `awk''s `FILENAME' variable that automatically + This rule relies on `awk''s `FILENAME' variable, which automatically changes for each new data file. The current file name is saved in a private variable, `_oldfilename'. If `FILENAME' does not equal `_oldfilename', then a new data file is being processed and it is @@ -15283,7 +15307,7 @@ correctly even for the first data file. The program also supplies an `END' rule to do the final processing for the last file. Because this `END' rule comes before any `END' rules supplied in the "main" program, `endfile()' is called first. Once -again the value of multiple `BEGIN' and `END' rules should be clear. +again, the value of multiple `BEGIN' and `END' rules should be clear. If the same data file occurs twice in a row on the command line, then `endfile()' and `beginfile()' are not executed at the end of the first @@ -15310,7 +15334,7 @@ how it simplifies writing the main program. You are probably wondering, if `beginfile()' and `endfile()' functions can do the job, why does `gawk' have `BEGINFILE' and -`ENDFILE' patterns (*note BEGINFILE/ENDFILE::)? +`ENDFILE' patterns? Good question. Normally, if `awk' cannot open a file, this causes an immediate fatal error. In this case, there is no way for a @@ -15318,7 +15342,8 @@ user-defined function to deal with the problem, as the mechanism for calling it relies on the file being open and at the first record. Thus, the main reason for `BEGINFILE' is to give you a "hook" to catch files that cannot be processed. `ENDFILE' exists for symmetry, and because -it provides an easy way to do per-file cleanup processing. +it provides an easy way to do per-file cleanup processing. For more +information, refer to *note BEGINFILE/ENDFILE::. File: gawk.info, Node: Rewind Function, Next: File Checking, Prev: Filetrans Function, Up: Data File Management @@ -15326,15 +15351,14 @@ File: gawk.info, Node: Rewind Function, Next: File Checking, Prev: Filetrans 10.3.2 Rereading the Current File --------------------------------- -Another request for a new built-in function was for a `rewind()' -function that would make it possible to reread the current file. The -requesting user didn't want to have to use `getline' (*note Getline::) -inside a loop. +Another request for a new built-in function was for a function that +would make it possible to reread the current file. The requesting user +didn't want to have to use `getline' (*note Getline::) inside a loop. However, as long as you are not in the `END' rule, it is quite easy to arrange to immediately close the current input file and then start -over with it from the top. For lack of a better name, we'll call it -`rewind()': +over with it from the top. For lack of a better name, we'll call the +function `rewind()': # rewind.awk --- rewind the current file and start over @@ -15392,7 +15416,7 @@ longer in the list). See also *note ARGC and ARGV::. Because `awk' variable names only allow the English letters, the regular expression check purposely does not use character classes such -as `[:alpha:]' and `[:alnum:]' (*note Bracket Expressions::) +as `[:alpha:]' and `[:alnum:]' (*note Bracket Expressions::). ---------- Footnotes ---------- @@ -15403,14 +15427,14 @@ opened. However, the code here provides a portable solution. File: gawk.info, Node: Empty Files, Next: Ignoring Assigns, Prev: File Checking, Up: Data File Management -10.3.4 Checking for Zero-length Files +10.3.4 Checking for Zero-Length Files ------------------------------------- All known `awk' implementations silently skip over zero-length files. This is a by-product of `awk''s implicit read-a-record-and-match-against-the-rules loop: when `awk' tries to -read a record from an empty file, it immediately receives an end of -file indication, closes the file, and proceeds on to the next +read a record from an empty file, it immediately receives an +end-of-file indication, closes the file, and proceeds on to the next command-line data file, _without_ executing any user-level `awk' program code. @@ -15460,7 +15484,7 @@ File: gawk.info, Node: Ignoring Assigns, Prev: Empty Files, Up: Data File Man Occasionally, you might not want `awk' to process command-line variable assignments (*note Assignment Options::). In particular, if you have a file name that contains an `=' character, `awk' treats the file name as -an assignment, and does not process it. +an assignment and does not process it. Some users have suggested an additional command-line option for `gawk' to disable command-line assignments. However, some simple @@ -15750,8 +15774,8 @@ which is in `ARGV[0]': } } - The rest of the `BEGIN' rule is a simple test program. Here is the -result of two sample runs of the test program: + The rest of the `BEGIN' rule is a simple test program. Here are the +results of two sample runs of the test program: $ awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x -| c = <a>, Optarg = <> @@ -15797,10 +15821,10 @@ File: gawk.info, Node: Passwd Functions, Next: Group Functions, Prev: Getopt ============================== The `PROCINFO' array (*note Built-in Variables::) provides access to -the current user's real and effective user and group ID numbers, and if -available, the user's supplementary group set. However, because these -are numbers, they do not provide very useful information to the average -user. There needs to be some way to find the user information +the current user's real and effective user and group ID numbers, and, +if available, the user's supplementary group set. However, because +these are numbers, they do not provide very useful information to the +average user. There needs to be some way to find the user information associated with the user and group ID numbers. This minor node presents a suite of functions for retrieving information from the user database. *Note Group Functions::, for a similar suite that retrieves @@ -15811,7 +15835,7 @@ kept. Instead, it provides the `<pwd.h>' header file and several C language subroutines for obtaining user information. The primary function is `getpwent()', for "get password entry." The "password" comes from the original user database file, `/etc/passwd', which stores -user information, along with the encrypted passwords (hence the name). +user information along with the encrypted passwords (hence the name). Although an `awk' program could simply read `/etc/passwd' directly, this file may not contain complete information about the system's set @@ -15859,7 +15883,7 @@ Encrypted password User-ID The user's numeric user ID number. (On some systems, it's a C - `long', and not an `int'. Thus we cast it to `long' for all + `long', and not an `int'. Thus, we cast it to `long' for all cases.) Group-ID @@ -15958,8 +15982,8 @@ or on some other `awk' implementation. `PROCINFO["FS"]', is similar. The main part of the function uses a loop to read database lines, -split the line into fields, and then store the line into each array as -necessary. When the loop is done, `_pw_init()' cleans up by closing +split the lines into fields, and then store the lines into each array +as necessary. When the loop is done, `_pw_init()' cleans up by closing the pipeline, setting `_pw_inited' to one, and restoring `FS' (and `FIELDWIDTHS' or `FPAT' if necessary), `RS', and `$0'. The use of `_pw_count' is explained shortly. @@ -16087,7 +16111,7 @@ Group Password Group ID Number The group's numeric group ID number; the association of name to number must be unique within the file. (On some systems it's a C - `long', and not an `int'. Thus we cast it to `long' for all + `long', and not an `int'. Thus, we cast it to `long' for all cases.) Group Member List @@ -16177,29 +16201,30 @@ to ensure that the database is scanned no more than once. The `_gr_init()' function first saves `FS', `RS', and `$0', and then sets `FS' and `RS' to the correct values for scanning the group information. It also takes care to note whether `FIELDWIDTHS' or `FPAT' is being -used, and to restore the appropriate field splitting mechanism. +used, and to restore the appropriate field-splitting mechanism. - The group information is stored is several associative arrays. The + The group information is stored in several associative arrays. The arrays are indexed by group name (`_gr_byname'), by group ID number (`_gr_bygid'), and by position in the database (`_gr_bycount'). There is an additional array indexed by username (`_gr_groupsbyuser'), which is a space-separated list of groups to which each user belongs. - Unlike the user database, it is possible to have multiple records in -the database for the same group. This is common when a group has a + Unlike in the user database, it is possible to have multiple records +in the database for the same group. This is common when a group has a large number of members. A pair of such entries might look like the following: - tvpeople:*:101:johny,jay,arsenio + tvpeople:*:101:johnny,jay,arsenio tvpeople:*:101:david,conan,tom,joan For this reason, `_gr_init()' looks to see if a group name or group -ID number is already seen. If it is, the usernames are simply +ID number is already seen. If so, the usernames are simply concatenated onto the previous list of users.(1) Finally, `_gr_init()' closes the pipeline to `grcat', restores `FS' -(and `FIELDWIDTHS' or `FPAT' if necessary), `RS', and `$0', initializes -`_gr_count' to zero (it is used later), and makes `_gr_inited' nonzero. +(and `FIELDWIDTHS' or `FPAT', if necessary), `RS', and `$0', +initializes `_gr_count' to zero (it is used later), and makes +`_gr_inited' nonzero. The `getgrnam()' function takes a group name as its argument, and if that group exists, it is returned. Otherwise, it relies on the array @@ -16262,9 +16287,9 @@ very simple, relying on `awk''s associative arrays to do work. ---------- Footnotes ---------- - (1) There is actually a subtle problem with the code just presented. -Suppose that the first time there were no names. This code adds the -names with a leading comma. It also doesn't check that there is a `$4'. + (1) There is a subtle problem with the code just presented. Suppose +that the first time there were no names. This code adds the names with +a leading comma. It also doesn't check that there is a `$4'. File: gawk.info, Node: Walking Arrays, Next: Library Functions Summary, Prev: Group Functions, Up: Library Functions @@ -16273,11 +16298,11 @@ File: gawk.info, Node: Walking Arrays, Next: Library Functions Summary, Prev: ================================ *note Arrays of Arrays::, described how `gawk' provides arrays of -arrays. In particular, any element of an array may be either a scalar, +arrays. In particular, any element of an array may be either a scalar or another array. The `isarray()' function (*note Type Functions::) lets you distinguish an array from a scalar. The following function, -`walk_array()', recursively traverses an array, printing each element's -indices and value. You call it with the array and a string +`walk_array()', recursively traverses an array, printing the element +indices and values. You call it with the array and a string representing the name of the array: function walk_array(arr, name, i) @@ -16334,24 +16359,24 @@ File: gawk.info, Node: Library Functions Summary, Next: Library Exercises, Pr * The functions presented here fit into the following categories: General problems - Number-to-string conversion, assertions, rounding, random - number generation, converting characters to numbers, joining - strings, getting easily usable time-of-day information, and - reading a whole file in one shot. + Number-to-string conversion, testing assertions, rounding, + random number generation, converting characters to numbers, + joining strings, getting easily usable time-of-day + information, and reading a whole file in one shot Managing data files Noting data file boundaries, rereading the current file, checking for readable files, checking for zero-length files, - and treating assignments as file names. + and treating assignments as file names Processing command-line options - An `awk' version of the standard C `getopt()' function. + An `awk' version of the standard C `getopt()' function Reading the user and group databases - Two sets of routines that parallel the C library versions. + Two sets of routines that parallel the C library versions Traversing arrays of arrays - A simple function to traverse an array of arrays to any depth. + A simple function to traverse an array of arrays to any depth @@ -16446,7 +16471,7 @@ you. to replace the installed versions on your system. Nor may all of these programs be fully compliant with the most recent POSIX standard. This is not a problem; their purpose is to illustrate `awk' language -programming for "real world" tasks. +programming for "real-world" tasks. The programs are presented in alphabetical order. @@ -16472,7 +16497,7 @@ separated by TABs by default, but you may supply a command-line option to change the field "delimiter" (i.e., the field-separator character). `cut''s definition of fields is less general than `awk''s. - A common use of `cut' might be to pull out just the login name of + A common use of `cut' might be to pull out just the login names of logged-on users from the output of `who'. For example, the following pipeline generates a sorted, unique list of the logged-on users: @@ -16881,7 +16906,7 @@ unsuccessful match. If the line does not match, the `next' statement just moves on to the next record. A number of additional tests are made, but they are only done if we -are not counting lines. First, if the user only wants exit status +are not counting lines. First, if the user only wants the exit status (`no_print' is true), then it is enough to know that _one_ line in this file matched, and we can skip on to the next file with `nextfile'. Similarly, if we are only printing file names, we can print the file @@ -16915,7 +16940,7 @@ line is printed, with a leading file name and colon if necessary: } The `END' rule takes care of producing the correct exit status. If -there are no matches, the exit status is one; otherwise it is zero: +there are no matches, the exit status is one; otherwise, it is zero: END { exit (total == 0) @@ -16957,7 +16982,8 @@ a more palatable output than just individual numbers. Here is a simple version of `id' written in `awk'. It uses the user database library functions (*note Passwd Functions::) and the group -database library functions (*note Group Functions::): +database library functions (*note Group Functions::) from *note Library +Functions::. The program is fairly straightforward. All the work is done in the `BEGIN' rule. The user and group ID numbers are obtained from @@ -17054,8 +17080,8 @@ is as follows:(1) By default, the output files are named `xaa', `xab', and so on. Each file has 1,000 lines in it, with the likely exception of the last file. To change the number of lines in each file, supply a number on the -command line preceded with a minus (e.g., `-500' for files with 500 -lines in them instead of 1,000). To change the name of the output +command line preceded with a minus sign (e.g., `-500' for files with +500 lines in them instead of 1,000). To change the names of the output files to something like `myfileaa', `myfileab', and so on, supply an additional argument that specifies the file name prefix. @@ -17692,7 +17718,7 @@ checking and setting of defaults: the delay, the count, and the message to print. If the user supplied a message without the ASCII BEL character (known as the "alert" character, `"\a"'), then it is added to the message. (On many systems, printing the ASCII BEL generates an -audible alert. Thus when the alarm goes off, the system calls attention +audible alert. Thus, when the alarm goes off, the system calls attention to itself in case the user is not looking at the computer.) Just for a change, this program uses a `switch' statement (*note Switch Statement::), but the processing could be done with a series of @@ -17824,7 +17850,7 @@ the "from" list. Once upon a time, a user proposed adding a transliteration function to `gawk'. The following program was written to prove that character transliteration could be done with a user-level function. This program -is not as complete as the system `tr' utility but it does most of the +is not as complete as the system `tr' utility, but it does most of the job. The `translate' program was written long before `gawk' acquired the @@ -17834,13 +17860,13 @@ and `gsub()' built-in functions (*note String Functions::). There are two functions. The first, `stranslate()', takes three arguments: `from' - A list of characters from which to translate. + A list of characters from which to translate `to' - A list of characters to which to translate. + A list of characters to which to translate `target' - The string on which to do the translation. + The string on which to do the translation Associative arrays make the translation part fairly easy. `t_ar' holds the "to" characters, indexed by the "from" characters. Then a @@ -17848,7 +17874,7 @@ simple loop goes through `from', one character at a time. For each character in `from', if the character appears in `target', it is replaced with the corresponding `to' character. - The `translate()' function calls `stranslate()' using `$0' as the + The `translate()' function calls `stranslate()', using `$0' as the target. The main program sets two global variables, `FROM' and `TO', from the command line, and then changes `ARGV' so that `awk' reads from the standard input. @@ -17857,7 +17883,7 @@ the standard input. record: # translate.awk --- do tr-like stuff - # Bugs: does not handle things like: tr A-Z a-z, it has + # Bugs: does not handle things like tr A-Z a-z; it has # to be spelled out. However, if `to' is shorter than `from', # the last character in `to' is used for the rest of `from'. @@ -17935,13 +17961,13 @@ File: gawk.info, Node: Labels Program, Next: Word Sorting, Prev: Translate Pr 11.3.4 Printing Mailing Labels ------------------------------ -Here is a "real world"(1) program. This script reads lists of names and +Here is a "real-world"(1) program. This script reads lists of names and addresses and generates mailing labels. Each page of labels has 20 labels on it, two across and 10 down. The addresses are guaranteed to be no more than five lines of data. Each address is separated from the next by a blank line. - The basic idea is to read 20 labels worth of data. Each line of + The basic idea is to read 20 labels' worth of data. Each line of each label is stored in the `line' array. The single rule takes care of filling the `line' array and printing the page when 20 labels have been read. @@ -17953,13 +17979,13 @@ splits records at blank lines (*note Records::). It sets `MAXLINES' to Most of the work is done in the `printpage()' function. The label lines are stored sequentially in the `line' array. But they have to -print horizontally; `line[1]' next to `line[6]', `line[2]' next to +print horizontally: `line[1]' next to `line[6]', `line[2]' next to `line[7]', and so on. Two loops accomplish this. The outer loop, controlled by `i', steps through every 10 lines of data; this is each row of labels. The inner loop, controlled by `j', goes through the -lines within the row. As `j' goes from 0 to 4, `i+j' is the `j'-th -line in the row, and `i+j+5' is the entry next to it. The output ends -up looking something like this: +lines within the row. As `j' goes from 0 to 4, `i+j' is the `j'th line +in the row, and `i+j+5' is the entry next to it. The output ends up +looking something like this: line 1 line 6 line 2 line 7 @@ -18062,8 +18088,8 @@ a useful format. printf "%s\t%d\n", word, freq[word] } - The program relies on `awk''s default field splitting mechanism to -break each line up into "words," and uses an associative array named + The program relies on `awk''s default field-splitting mechanism to +break each line up into "words" and uses an associative array named `freq', indexed by each word, to count the number of times the word occurs. In the `END' rule, it prints the counts. @@ -18149,7 +18175,7 @@ File: gawk.info, Node: History Sorting, Next: Extract Program, Prev: Word Sor 11.3.6 Removing Duplicates from Unsorted Text --------------------------------------------- -The `uniq' program (*note Uniq Program::), removes duplicate lines from +The `uniq' program (*note Uniq Program::) removes duplicate lines from _sorted_ data. Suppose, however, you need to remove duplicate lines from a data @@ -18202,7 +18228,7 @@ hand. Here we present a program that can extract parts of a Texinfo input file into separate files. This Info file is written in Texinfo -(http://www.gnu.org/software/texinfo/), the GNU project's document +(http://www.gnu.org/software/texinfo/), the GNU Project's document formatting language. A single Texinfo source file can be used to produce both printed documentation, with TeX, and online documentation. (The Texinfo language is described fully, starting with *note @@ -18243,7 +18269,7 @@ them in a standard directory where `gawk' can find them. The Texinfo file looks something like this: ... - This program has a @code{BEGIN} rule, + This program has a @code{BEGIN} rule that prints a nice message: @example @@ -18268,7 +18294,7 @@ upper- and lowercase letters in the directives won't matter. given (`NF' is at least three) and also checking that the command exits with a zero exit status, signifying OK: - # extract.awk --- extract files and run programs from texinfo files + # extract.awk --- extract files and run programs from Texinfo files BEGIN { IGNORECASE = 1 } @@ -18295,11 +18321,11 @@ The variable `e' is used so that the rule fits nicely on the screen. file name is given in the directive. If the file named is not the current file, then the current file is closed. Keeping the current file open until a new file is encountered allows the use of the `>' -redirection for printing the contents, keeping open file management +redirection for printing the contents, keeping open-file management simple. The `for' loop does the work. It reads lines using `getline' (*note -Getline::). For an unexpected end of file, it calls the +Getline::). For an unexpected end-of-file, it calls the `unexpected_eof()' function. If the line is an "endfile" line, then it breaks out of the loop. If the line is an `@group' or `@end group' line, then it ignores it and goes on to the next line. Similarly, @@ -18389,10 +18415,10 @@ File: gawk.info, Node: Simple Sed, Next: Igawk Program, Prev: Extract Program 11.3.8 A Simple Stream Editor ----------------------------- -The `sed' utility is a stream editor, a program that reads a stream of -data, makes changes to it, and passes it on. It is often used to make -global changes to a large file or to a stream of data generated by a -pipeline of commands. Although `sed' is a complicated program in its +The `sed' utility is a "stream editor", a program that reads a stream +of data, makes changes to it, and passes it on. It is often used to +make global changes to a large file or to a stream of data generated by +a pipeline of commands. Although `sed' is a complicated program in its own right, its most common use is to perform global substitutions in the middle of a pipeline: @@ -18506,7 +18532,7 @@ include a library function twice. `igawk' should behave just like `gawk' externally. This means it should accept all of `gawk''s command-line arguments, including the -ability to have multiple source files specified via `-f', and the +ability to have multiple source files specified via `-f' and the ability to mix command-line and library source files. The program is written using the POSIX Shell (`sh') command @@ -18536,8 +18562,8 @@ language.(1) It works as follows: file names). This program uses shell variables extensively: for storing -command-line arguments, the text of the `awk' program that will expand -the user's program, for the user's original program, and for the +command-line arguments and the text of the `awk' program that will +expand the user's program, for the user's original program, and for the expanded program. Doing so removes some potential problems that might arise were we to use temporary files instead, at the cost of making the script somewhat more complicated. @@ -18795,7 +18821,7 @@ It's done in these steps: The last step is to call `gawk' with the expanded program, along with the original options and command-line arguments that the user -supplied. +supplied: eval gawk $opts -- '"$processed_program"' '"$@"' @@ -18858,15 +18884,15 @@ One word is an anagram of another if both words contain the same letters Column 2, Problem C, of Jon Bentley's `Programming Pearls', Second Edition, presents an elegant algorithm. The idea is to give words that are anagrams a common signature, sort all the words together by their -signature, and then print them. Dr. Bentley observes that taking the -letters in each word and sorting them produces that common signature. +signatures, and then print them. Dr. Bentley observes that taking the +letters in each word and sorting them produces those common signatures. The following program uses arrays of arrays to bring together words with the same signature and array sorting to print the words in sorted order: - # anagram.awk --- An implementation of the anagram finding algorithm - # from Jon Bentley's "Programming Pearls", 2nd edition. + # anagram.awk --- An implementation of the anagram-finding algorithm + # from Jon Bentley's "Programming Pearls," 2nd edition. # Addison Wesley, 2000, ISBN 0-201-65788-0. # Column 2, Problem C, section 2.8, pp 18-20. @@ -18886,7 +18912,7 @@ signature; the second dimension is the word itself: apart into individual letters, sorts the letters, and then joins them back together: - # word2key --- split word apart into letters, sort, joining back together + # word2key --- split word apart into letters, sort, and join back together function word2key(word, a, i, n, result) { @@ -18984,12 +19010,13 @@ File: gawk.info, Node: Programs Summary, Next: Programs Exercises, Prev: Misc characters. The ability to use `split()' with the empty string as the separator can considerably simplify such tasks. - * The library functions from *note Library Functions::, proved their - usefulness for a number of real (if small) programs. + * The examples here demonstrate the usefulness of the library + functions from *note Library Functions::, for a number of real (if + small) programs. * Besides reinventing POSIX wheels, other programs solved a - selection of interesting problems, such as finding duplicates - words in text, printing mailing labels, and finding anagrams. + selection of interesting problems, such as finding duplicate words + in text, printing mailing labels, and finding anagrams. @@ -19106,16 +19133,16 @@ File: gawk.info, Node: Advanced Features, Next: Internationalization, Prev: S This major node discusses advanced features in `gawk'. It's a bit of a "grab bag" of items that are otherwise unrelated to each other. -First, a command-line option allows `gawk' to recognize nondecimal -numbers in input data, not just in `awk' programs. Then, `gawk''s -special features for sorting arrays are presented. Next, two-way I/O, -discussed briefly in earlier parts of this Info file, is described in -full detail, along with the basics of TCP/IP networking. Finally, -`gawk' can "profile" an `awk' program, making it possible to tune it -for performance. +First, we look at a command-line option that allows `gawk' to recognize +nondecimal numbers in input data, not just in `awk' programs. Then, +`gawk''s special features for sorting arrays are presented. Next, +two-way I/O, discussed briefly in earlier parts of this Info file, is +described in full detail, along with the basics of TCP/IP networking. +Finally, we see how `gawk' can "profile" an `awk' program, making it +possible to tune it for performance. - A number of advanced features require separate major nodes of their -own: + Additional advanced features are discussed in separate major nodes +of their own: * *note Internationalization::, discusses how to internationalize your `awk' programs, so that they can speak multiple national @@ -19189,7 +19216,7 @@ File: gawk.info, Node: Array Sorting, Next: Two-way I/O, Prev: Nondecimal Dat 12.2 Controlling Array Traversal and Array Sorting ================================================== -`gawk' lets you control the order in which a `for (i in array)' loop +`gawk' lets you control the order in which a `for (INDX in ARRAY)' loop traverses an array. In addition, two built-in functions, `asort()' and `asorti()', let @@ -19208,9 +19235,9 @@ File: gawk.info, Node: Controlling Array Traversal, Next: Array Sorting Functi 12.2.1 Controlling Array Traversal ---------------------------------- -By default, the order in which a `for (i in array)' loop scans an array -is not defined; it is generally based upon the internal implementation -of arrays inside `awk'. +By default, the order in which a `for (INDX in ARRAY)' loop scans an +array is not defined; it is generally based upon the internal +implementation of arrays inside `awk'. Often, though, it is desirable to be able to loop over the elements in a particular order that you, the programmer, choose. `gawk' lets @@ -19232,21 +19259,22 @@ arguments: RETURN < 0; 0; OR > 0 } - Here, I1 and I2 are the indices, and V1 and V2 are the corresponding -values of the two elements being compared. Either V1 or V2, or both, -can be arrays if the array being traversed contains subarrays as values. -(*Note Arrays of Arrays::, for more information about subarrays.) The -three possible return values are interpreted as follows: + Here, `i1' and `i2' are the indices, and `v1' and `v2' are the +corresponding values of the two elements being compared. Either `v1' +or `v2', or both, can be arrays if the array being traversed contains +subarrays as values. (*Note Arrays of Arrays::, for more information +about subarrays.) The three possible return values are interpreted as +follows: `comp_func(i1, v1, i2, v2) < 0' - Index I1 comes before index I2 during loop traversal. + Index `i1' comes before index `i2' during loop traversal. `comp_func(i1, v1, i2, v2) == 0' - Indices I1 and I2 come together but the relative order with + Indices `i1' and `i2' come together, but the relative order with respect to each other is undefined. `comp_func(i1, v1, i2, v2) > 0' - Index I1 comes after index I2 during loop traversal. + Index `i1' comes after index `i2' during loop traversal. Our first comparison function can be used to scan an array in numerical order of the indices: @@ -19389,7 +19417,7 @@ elements compare equal. This is usually not a problem, but letting the tied elements come out in arbitrary order can be an issue, especially when comparing item values. The partial ordering of the equal elements may change the next time the array is traversed, if other elements are -added or removed from the array. One way to resolve ties when +added to or removed from the array. One way to resolve ties when comparing elements with otherwise equal values is to include the indices in the comparison rules. Note that doing this may make the loop traversal less efficient, so consider it only if necessary. The @@ -19423,14 +19451,14 @@ lowercase letters as equivalent or distinct. Another point to keep in mind is that in the case of subarrays, the element values can themselves be arrays; a production comparison -function should use the `isarray()' function (*note Type Functions::), +function should use the `isarray()' function (*note Type Functions::) to check for this, and choose a defined sorting order for subarrays. All sorting based on `PROCINFO["sorted_in"]' is disabled in POSIX mode, because the `PROCINFO' array is not special in that case. As a side note, sorting the array indices before traversing the -array has been reported to add 15% to 20% overhead to the execution +array has been reported to add a 15% to 20% overhead to the execution time of `awk' programs. For this reason, sorted array traversal is not the default. @@ -19479,8 +19507,8 @@ array is not affected. Often, what's needed is to sort on the values of the _indices_ instead of the values of the elements. To do that, use the `asorti()' function. The interface and behavior are identical to that of -`asort()', except that the index values are used for sorting, and -become the values of the result array: +`asort()', except that the index values are used for sorting and become +the values of the result array: { source[$0] = some_func($0) } @@ -19512,8 +19540,8 @@ chooses_, taking into account just the indices, just the values, or both. This is extremely powerful. Once the array is sorted, `asort()' takes the _values_ in their -final order, and uses them to fill in the result array, whereas -`asorti()' takes the _indices_ in their final order, and uses them to +final order and uses them to fill in the result array, whereas +`asorti()' takes the _indices_ in their final order and uses them to fill in the result array. NOTE: Copying array indices and elements isn't expensive in terms @@ -19711,7 +19739,7 @@ REMOTE-PORT name. NOTE: Failure in opening a two-way socket will result in a - non-fatal error being returned to the calling code. The value of + nonfatal error being returned to the calling code. The value of `ERRNO' indicates the error (*note Auto-set::). Consider the following very simple example: @@ -19792,8 +19820,8 @@ First, the `awk' program: junk Here is the `awkprof.out' that results from running the `gawk' -profiler on this program and data. (This example also illustrates that -`awk' programmers sometimes get up very early in the morning to work.) +profiler on this program and data (this example also illustrates that +`awk' programmers sometimes get up very early in the morning to work): # gawk profile, created Mon Sep 29 05:16:21 2014 @@ -19846,7 +19874,7 @@ profiler on this program and data. (This example also illustrates that output. They are as follows: * The program is printed in the order `BEGIN' rules, `BEGINFILE' - rules, pattern/action rules, `ENDFILE' rules, `END' rules and + rules, pattern-action rules, `ENDFILE' rules, `END' rules, and functions, listed alphabetically. Multiple `BEGIN' and `END' rules retain their separate identities, as do multiple `BEGINFILE' and `ENDFILE' rules. @@ -19891,13 +19919,13 @@ output. They are as follows: scalar, it gets parenthesized. * `gawk' supplies leading comments in front of the `BEGIN' and `END' - rules, the `BEGINFILE' and `ENDFILE' rules, the pattern/action + rules, the `BEGINFILE' and `ENDFILE' rules, the pattern-action rules, and the functions. The profiled version of your program may not look exactly like what you typed when you wrote it. This is because `gawk' creates the -profiled version by "pretty printing" its internal representation of +profiled version by "pretty-printing" its internal representation of the program. The advantage to this is that `gawk' can produce a standard representation. Also, things such as: @@ -19947,15 +19975,15 @@ output profile file. produces the profile and the function call trace and then exits. When `gawk' runs on MS-Windows systems, it uses the `INT' and `QUIT' -signals for producing the profile and, in the case of the `INT' signal, +signals for producing the profile, and in the case of the `INT' signal, `gawk' exits. This is because these systems don't support the `kill' command, so the only signals you can deliver to a program are those generated by the keyboard. The `INT' signal is generated by the -`Ctrl-<C>' or `Ctrl-<BREAK>' key, while the `QUIT' signal is generated -by the `Ctrl-<\>' key. +`Ctrl-c' or `Ctrl-BREAK' key, while the `QUIT' signal is generated by +the `Ctrl-\' key. Finally, `gawk' also accepts another option, `--pretty-print'. When -called this way, `gawk' "pretty prints" the program into `awkprof.out', +called this way, `gawk' "pretty-prints" the program into `awkprof.out', without any execution counts. NOTE: Once upon a time, the `--pretty-print' option would also run @@ -20007,7 +20035,7 @@ File: gawk.info, Node: Advanced Features Summary, Prev: Profiling, Up: Advanc two-way communications. * By using special file names with the `|&' operator, you can open a - TCP/IP (or UDP/IP) connection to remote hosts in the Internet. + TCP/IP (or UDP/IP) connection to remote hosts on the Internet. `gawk' supports both IPv4 and IPv6. * You can generate statement count profiles of your program. This @@ -20016,7 +20044,7 @@ File: gawk.info, Node: Advanced Features Summary, Prev: Profiling, Up: Advanc `USR1' signal while profiling causes `gawk' to dump the profile and keep going, including a function call stack. - * You can also just "pretty print" the program. This currently also + * You can also just "pretty-print" the program. This currently also runs the program, but that will change in the next major release. @@ -26492,6 +26520,9 @@ the current version of `gawk'. - The `bindtextdomain()', `dcgettext()' and `dcngettext()' functions for internationalization (*note Programmer i18n::). + - The `div()' function for doing integer division and remainder + (*note Numeric Functions::). + * Changes and/or additions in the command-line options: - The `AWKPATH' environment variable for specifying a path @@ -26546,7 +26577,10 @@ the current version of `gawk'. - Ultrix - * Support for MirBSD was removed at `gawk' version 4.2. + * Support for the following systems was removed from the code for + `gawk' version 4.2: + + - MirBSD @@ -26933,6 +26967,26 @@ in POSIX `awk', in the order they were added to `gawk'. * The dynamic extension interface was completely redone (*note Dynamic Extensions::). + * Support for Ultrix was removed. + + + Version 4.2 introduced the following changes: + + * Changes to `ENVIRON' are reflected into `gawk''s environment and + that of programs that it runs. *Note Auto-set::. + + * The `--pretty-print' option no longer runs the `awk' program too. + *Note Options::. + + * The `igawk' program and its manual page are no longer installed + when `gawk' is built. *Note Igawk Program::. + + * The `div()' function. *Note Numeric Functions::. + + * The maximum number of hexdecimal digits in `\x' escapes is now two. + *Note Escape Sequences::. + + * Support for MirBSD was removed. File: gawk.info, Node: Common Extensions, Next: Ranges and Locales, Prev: Feature History, Up: Language History @@ -31588,7 +31642,7 @@ Index * * (asterisk), * operator, as regexp operator: Regexp Operators. (line 89) * * (asterisk), * operator, null strings, matching: String Functions. - (line 536) + (line 537) * * (asterisk), ** operator <1>: Precedence. (line 49) * * (asterisk), ** operator: Arithmetic Ops. (line 81) * * (asterisk), **= operator <1>: Precedence. (line 95) @@ -31647,7 +31701,7 @@ Index * --re-interval option: Options. (line 279) * --sandbox option: Options. (line 286) * --sandbox option, disabling system() function: I/O Functions. - (line 128) + (line 129) * --sandbox option, input redirection with getline: Getline. (line 19) * --sandbox option, output redirection with print, printf: Redirection. (line 6) @@ -31888,7 +31942,7 @@ Index * arrays: Arrays. (line 6) * arrays of arrays: Arrays of Arrays. (line 6) * arrays, an example of using: Array Example. (line 6) -* arrays, and IGNORECASE variable: Array Intro. (line 94) +* arrays, and IGNORECASE variable: Array Intro. (line 100) * arrays, as parameters to functions: Pass By Value/Reference. (line 44) * arrays, associative: Array Intro. (line 50) @@ -31915,7 +31969,7 @@ Index (line 6) * arrays, sorting, and IGNORECASE variable: Array Sorting Functions. (line 83) -* arrays, sparse: Array Intro. (line 72) +* arrays, sparse: Array Intro. (line 76) * arrays, subscripts, uninitialized variables as: Uninitialized Subscripts. (line 6) * arrays, unassigned elements: Reference to Elements. @@ -31949,7 +32003,7 @@ Index * asterisk (*), * operator, as regexp operator: Regexp Operators. (line 89) * asterisk (*), * operator, null strings, matching: String Functions. - (line 536) + (line 537) * asterisk (*), ** operator <1>: Precedence. (line 49) * asterisk (*), ** operator: Arithmetic Ops. (line 81) * asterisk (*), **= operator <1>: Precedence. (line 95) @@ -32093,7 +32147,7 @@ Index * BEGINFILE pattern: BEGINFILE/ENDFILE. (line 6) * BEGINFILE pattern, Boolean patterns and: Expression Patterns. (line 69) -* beginfile() user-defined function: Filetrans Function. (line 61) +* beginfile() user-defined function: Filetrans Function. (line 62) * Bentley, Jon: Glossary. (line 207) * Benzinger, Michael: Contributors. (line 97) * Berry, Karl <1>: Ranges and Locales. (line 74) @@ -32156,7 +32210,7 @@ Index * Brennan, Michael: Foreword3. (line 84) * Brian Kernighan's awk <1>: I/O Functions. (line 43) * Brian Kernighan's awk <2>: Gory Details. (line 19) -* Brian Kernighan's awk <3>: String Functions. (line 492) +* Brian Kernighan's awk <3>: String Functions. (line 493) * Brian Kernighan's awk <4>: Delete. (line 51) * Brian Kernighan's awk <5>: Nextfile Statement. (line 47) * Brian Kernighan's awk <6>: Continue Statement. (line 44) @@ -32182,8 +32236,8 @@ Index * Buening, Andreas <2>: Contributors. (line 92) * Buening, Andreas: Acknowledgments. (line 60) * buffering, input/output <1>: Two-way I/O. (line 52) -* buffering, input/output: I/O Functions. (line 140) -* buffering, interactive vs. noninteractive: I/O Functions. (line 75) +* buffering, input/output: I/O Functions. (line 141) +* buffering, interactive vs. noninteractive: I/O Functions. (line 76) * buffers, flushing: I/O Functions. (line 32) * buffers, operators for: GNU Regexp Operators. (line 48) @@ -32208,8 +32262,8 @@ Index * case keyword: Switch Statement. (line 6) * case sensitivity, and regexps: User-modified. (line 76) * case sensitivity, and string comparisons: User-modified. (line 76) -* case sensitivity, array indices and: Array Intro. (line 94) -* case sensitivity, converting case: String Functions. (line 522) +* case sensitivity, array indices and: Array Intro. (line 100) +* case sensitivity, converting case: String Functions. (line 523) * case sensitivity, example programs: Library Functions. (line 53) * case sensitivity, gawk: Case-sensitivity. (line 26) * case sensitivity, regexps and: Case-sensitivity. (line 6) @@ -32287,7 +32341,7 @@ Index * common extensions, \x escape sequence: Escape Sequences. (line 61) * common extensions, BINMODE variable: PC Using. (line 33) * common extensions, delete to delete entire arrays: Delete. (line 39) -* common extensions, func keyword: Definition Syntax. (line 93) +* common extensions, func keyword: Definition Syntax. (line 98) * common extensions, length() applied to an array: String Functions. (line 201) * common extensions, RS as a regexp: gawk split records. (line 6) @@ -32338,9 +32392,9 @@ Index * control statements: Statements. (line 6) * controlling array scanning order: Controlling Scanning. (line 14) -* convert string to lower case: String Functions. (line 523) -* convert string to number: String Functions. (line 390) -* convert string to upper case: String Functions. (line 529) +* convert string to lower case: String Functions. (line 524) +* convert string to number: String Functions. (line 391) +* convert string to upper case: String Functions. (line 530) * converting integer array subscripts: Numeric Array Subscripts. (line 31) * converting, dates to timestamps: Time Functions. (line 76) @@ -32418,7 +32472,7 @@ Index (line 148) * dark corner, regexp constants, as arguments to user-defined functions: Using Constant Regexps. (line 43) -* dark corner, split() function: String Functions. (line 361) +* dark corner, split() function: String Functions. (line 362) * dark corner, strings, storing: gawk split records. (line 83) * dark corner, value of ARGV[0]: Auto-set. (line 39) * data, fixed-width: Constant Size. (line 6) @@ -32635,7 +32689,7 @@ Index (line 77) * differences in awk and gawk, SYMTAB variable: Auto-set. (line 283) * differences in awk and gawk, TEXTDOMAIN variable: User-modified. - (line 151) + (line 152) * differences in awk and gawk, trunc-mod operation: Arithmetic Ops. (line 66) * directories, command-line: Command-line directories. @@ -32713,9 +32767,9 @@ Index * END pattern, print statement and: I/O And BEGIN/END. (line 16) * ENDFILE pattern: BEGINFILE/ENDFILE. (line 6) * ENDFILE pattern, Boolean patterns and: Expression Patterns. (line 69) -* endfile() user-defined function: Filetrans Function. (line 61) -* endgrent() function (C library): Group Functions. (line 211) -* endgrent() user-defined function: Group Functions. (line 214) +* endfile() user-defined function: Filetrans Function. (line 62) +* endgrent() function (C library): Group Functions. (line 212) +* endgrent() user-defined function: Group Functions. (line 215) * endpwent() function (C library): Passwd Functions. (line 207) * endpwent() user-defined function: Passwd Functions. (line 210) * English, Steve: Advanced Features. (line 6) @@ -32809,7 +32863,7 @@ Index * extensions, common, BINMODE variable: PC Using. (line 33) * extensions, common, delete to delete entire arrays: Delete. (line 39) * extensions, common, fflush() function: I/O Functions. (line 43) -* extensions, common, func keyword: Definition Syntax. (line 93) +* extensions, common, func keyword: Definition Syntax. (line 98) * extensions, common, length() applied to an array: String Functions. (line 201) * extensions, common, RS as a regexp: gawk split records. (line 6) @@ -32955,7 +33009,7 @@ Index * format time string: Time Functions. (line 48) * formats, numeric output: OFMT. (line 6) * formatting output: Printf. (line 6) -* formatting strings: String Functions. (line 383) +* formatting strings: String Functions. (line 384) * forward slash (/) to enclose regular expressions: Regexp. (line 10) * forward slash (/), / operator: Precedence. (line 55) * forward slash (/), /= operator <1>: Precedence. (line 95) @@ -33026,7 +33080,7 @@ Index * functions, library, user database, reading: Passwd Functions. (line 6) * functions, names of: Definition Syntax. (line 23) -* functions, recursive: Definition Syntax. (line 83) +* functions, recursive: Definition Syntax. (line 88) * functions, string-translation: I18N Functions. (line 6) * functions, undefined: Pass By Value/Reference. (line 68) @@ -33082,7 +33136,7 @@ Index * gawk, IGNORECASE variable in <1>: Array Sorting Functions. (line 83) * gawk, IGNORECASE variable in <2>: String Functions. (line 58) -* gawk, IGNORECASE variable in <3>: Array Intro. (line 94) +* gawk, IGNORECASE variable in <3>: Array Intro. (line 100) * gawk, IGNORECASE variable in <4>: User-modified. (line 76) * gawk, IGNORECASE variable in: Case-sensitivity. (line 26) * gawk, implementation issues: Notes. (line 6) @@ -33124,7 +33178,7 @@ Index * gawk, splitting fields and: Constant Size. (line 87) * gawk, string-translation functions: I18N Functions. (line 6) * gawk, SYMTAB array in: Auto-set. (line 283) -* gawk, TEXTDOMAIN variable in: User-modified. (line 151) +* gawk, TEXTDOMAIN variable in: User-modified. (line 152) * gawk, timestamps: Time Functions. (line 6) * gawk, uses for: Preface. (line 34) * gawk, versions of, information about, printing: Options. (line 300) @@ -33149,12 +33203,12 @@ Index * getaddrinfo() function (C library): TCP/IP Networking. (line 38) * getgrent() function (C library): Group Functions. (line 6) * getgrent() user-defined function: Group Functions. (line 6) -* getgrgid() function (C library): Group Functions. (line 182) -* getgrgid() user-defined function: Group Functions. (line 185) -* getgrnam() function (C library): Group Functions. (line 171) -* getgrnam() user-defined function: Group Functions. (line 176) -* getgruser() function (C library): Group Functions. (line 191) -* getgruser() function, user-defined: Group Functions. (line 194) +* getgrgid() function (C library): Group Functions. (line 183) +* getgrgid() user-defined function: Group Functions. (line 186) +* getgrnam() function (C library): Group Functions. (line 172) +* getgrnam() user-defined function: Group Functions. (line 177) +* getgruser() function (C library): Group Functions. (line 192) +* getgruser() function, user-defined: Group Functions. (line 195) * getline command: Reading Files. (line 20) * getline command, _gr_init() user-defined function: Group Functions. (line 83) @@ -33220,7 +33274,7 @@ Index * gsub <1>: String Functions. (line 140) * gsub: Using Constant Regexps. (line 43) -* gsub() function, arguments of: String Functions. (line 462) +* gsub() function, arguments of: String Functions. (line 463) * gsub() function, escape processing: Gory Details. (line 6) * h debugger command (alias for help): Miscellaneous Debugger Commands. (line 66) @@ -33247,7 +33301,7 @@ Index * hyphen (-), in bracket expressions: Bracket Expressions. (line 17) * i debugger command (alias for info): Debugger Info. (line 13) * id utility: Id Program. (line 6) -* id.awk program: Id Program. (line 30) +* id.awk program: Id Program. (line 31) * if statement: If Statement. (line 6) * if statement, actions, changing: Ranges. (line 25) * if statement, use of regexps in: Regexp Usage. (line 19) @@ -33255,7 +33309,7 @@ Index * ignore breakpoint: Breakpoint Control. (line 87) * ignore debugger command: Breakpoint Control. (line 87) * IGNORECASE variable: User-modified. (line 76) -* IGNORECASE variable, and array indices: Array Intro. (line 94) +* IGNORECASE variable, and array indices: Array Intro. (line 100) * IGNORECASE variable, and array sorting functions: Array Sorting Functions. (line 83) * IGNORECASE variable, in example programs: Library Functions. @@ -33323,12 +33377,12 @@ Index * integers, arbitrary precision: Arbitrary Precision Integers. (line 6) * integers, unsigned: Computer Arithmetic. (line 41) -* interacting with other programs: I/O Functions. (line 106) +* interacting with other programs: I/O Functions. (line 107) * internationalization <1>: I18N and L10N. (line 6) * internationalization: I18N Functions. (line 6) * internationalization, localization <1>: Internationalization. (line 13) -* internationalization, localization: User-modified. (line 151) +* internationalization, localization: User-modified. (line 152) * internationalization, localization, character classes: Bracket Expressions. (line 101) * internationalization, localization, gawk and: Internationalization. @@ -33344,7 +33398,7 @@ Index * interpreted programs: Basic High Level. (line 15) * interval expressions, regexp operator: Regexp Operators. (line 116) * inventory-shipped file: Sample Data Files. (line 32) -* invoke shell command: I/O Functions. (line 106) +* invoke shell command: I/O Functions. (line 107) * isarray: Type Functions. (line 11) * ISO: Glossary. (line 461) * ISO 8859-1: Glossary. (line 197) @@ -33504,7 +33558,7 @@ Index * matching, expressions, See comparison expressions: Typing and Comparison. (line 9) * matching, leftmost longest: Multiple Line. (line 26) -* matching, null strings: String Functions. (line 536) +* matching, null strings: String Functions. (line 537) * mawk utility <1>: Other Versions. (line 48) * mawk utility <2>: Nextfile Statement. (line 47) * mawk utility <3>: Concatenation. (line 36) @@ -33595,7 +33649,7 @@ Index (line 43) * null strings, converting numbers to strings: Strings And Numbers. (line 21) -* null strings, matching: String Functions. (line 536) +* null strings, matching: String Functions. (line 537) * number as string of bits: Bitwise Functions. (line 110) * number of array elements: String Functions. (line 201) * number sign (#), #! (executable scripts): Executable Scripts. @@ -33624,7 +33678,7 @@ Index * OFMT variable <2>: Strings And Numbers. (line 57) * OFMT variable: OFMT. (line 15) * OFMT variable, POSIX awk and: OFMT. (line 27) -* OFS variable <1>: User-modified. (line 113) +* OFS variable <1>: User-modified. (line 114) * OFS variable <2>: Output Separators. (line 6) * OFS variable: Changing Fields. (line 64) * OpenBSD: Glossary. (line 753) @@ -33677,7 +33731,7 @@ Index (line 12) * ord() user-defined function: Ordinal Functions. (line 16) * order of evaluation, concatenation: Concatenation. (line 41) -* ORS variable <1>: User-modified. (line 118) +* ORS variable <1>: User-modified. (line 119) * ORS variable: Output Separators. (line 21) * output field separator, See OFS variable: Changing Fields. (line 64) * output record separator, See ORS variable: Output Separators. @@ -33753,7 +33807,7 @@ Index (line 65) * portability, deleting array elements: Delete. (line 56) * portability, example programs: Library Functions. (line 42) -* portability, functions, defining: Definition Syntax. (line 109) +* portability, functions, defining: Definition Syntax. (line 114) * portability, gawk: New Ports. (line 6) * portability, gettext library and: Explaining gettext. (line 11) * portability, internationalization and: I18N Portability. (line 6) @@ -33765,7 +33819,7 @@ Index * portability, operators: Increment Ops. (line 60) * portability, operators, not in POSIX awk: Precedence. (line 98) * portability, POSIXLY_CORRECT environment variable: Options. (line 359) -* portability, substr() function: String Functions. (line 512) +* portability, substr() function: String Functions. (line 513) * portable object files <1>: Translator i18n. (line 6) * portable object files: Explaining gettext. (line 37) * portable object files, converting to message object files: I18N Example. @@ -33798,7 +33852,7 @@ Index * POSIX awk, field separators and <1>: Full Line Fields. (line 16) * POSIX awk, field separators and: Fields. (line 6) * POSIX awk, FS variable and: User-modified. (line 60) -* POSIX awk, function keyword in: Definition Syntax. (line 93) +* POSIX awk, function keyword in: Definition Syntax. (line 98) * POSIX awk, functions and, gsub()/sub(): Gory Details. (line 90) * POSIX awk, functions and, length(): String Functions. (line 180) * POSIX awk, GNU long options and: Options. (line 15) @@ -33817,7 +33871,7 @@ Index * POSIX, gawk extensions not included in: POSIX/GNU. (line 6) * POSIX, programs, implementing in awk: Clones. (line 6) * POSIXLY_CORRECT environment variable: Options. (line 339) -* PREC variable: User-modified. (line 123) +* PREC variable: User-modified. (line 124) * precedence <1>: Precedence. (line 6) * precedence: Increment Ops. (line 60) * precedence, regexp operators: Regexp Operators. (line 156) @@ -33832,7 +33886,7 @@ Index * print statement, commas, omitting: Print Examples. (line 31) * print statement, I/O operators in: Precedence. (line 71) * print statement, line continuations and: Print Examples. (line 76) -* print statement, OFMT variable and: User-modified. (line 113) +* print statement, OFMT variable and: User-modified. (line 114) * print statement, See Also redirection, of output: Redirection. (line 17) * print statement, sprintf() function and: Round Function. (line 6) @@ -33891,7 +33945,7 @@ Index * programming conventions, functions, calling: Calling Built-in. (line 10) * programming conventions, functions, writing: Definition Syntax. - (line 65) + (line 70) * programming conventions, gawk extensions: Internal File Ops. (line 45) * programming conventions, private variable names: Library Names. @@ -33947,7 +34001,7 @@ Index * readfile() user-defined function: Readfile Function. (line 30) * reading input files: Reading Files. (line 6) * recipe for a programming language: History. (line 6) -* record separators <1>: User-modified. (line 132) +* record separators <1>: User-modified. (line 133) * record separators: awk split records. (line 6) * record separators, changing: awk split records. (line 85) * record separators, regular expressions as: awk split records. @@ -33960,7 +34014,7 @@ Index * records, splitting input into: Records. (line 6) * records, terminating: awk split records. (line 125) * records, treating files as: gawk split records. (line 93) -* recursive functions: Definition Syntax. (line 83) +* recursive functions: Definition Syntax. (line 88) * redirect gawk output, in debugger: Debugger Info. (line 72) * redirection of input: Getline/File. (line 6) * redirection of output: Redirection. (line 6) @@ -34014,7 +34068,7 @@ Index * regular expressions, searching for: Egrep Program. (line 6) * relational operators, See comparison operators: Typing and Comparison. (line 9) -* replace in string: String Functions. (line 408) +* replace in string: String Functions. (line 409) * return debugger command: Debugger Execution Control. (line 54) * return statement, user-defined functions: Return Statement. (line 6) @@ -34025,7 +34079,7 @@ Index (line 11) * revtwoway extension: Extension Sample Rev2way. (line 12) -* rewind() user-defined function: Rewind Function. (line 16) +* rewind() user-defined function: Rewind Function. (line 15) * right angle bracket (>), > operator <1>: Precedence. (line 65) * right angle bracket (>), > operator: Comparison Operators. (line 11) @@ -34059,8 +34113,8 @@ Index * round to nearest integer: Numeric Functions. (line 38) * round() user-defined function: Round Function. (line 16) * rounding numbers: Round Function. (line 6) -* ROUNDMODE variable: User-modified. (line 127) -* RS variable <1>: User-modified. (line 132) +* ROUNDMODE variable: User-modified. (line 128) +* RS variable <1>: User-modified. (line 133) * RS variable: awk split records. (line 12) * RS variable, multiline records and: Multiple Line. (line 17) * rshift: Bitwise Functions. (line 53) @@ -34117,19 +34171,19 @@ Index * separators, field, FIELDWIDTHS variable and: User-modified. (line 37) * separators, field, FPAT variable and: User-modified. (line 43) * separators, field, POSIX and: Fields. (line 6) -* separators, for records <1>: User-modified. (line 132) +* separators, for records <1>: User-modified. (line 133) * separators, for records: awk split records. (line 6) * separators, for records, regular expressions as: awk split records. (line 125) * separators, for statements in actions: Action Overview. (line 19) -* separators, subscript: User-modified. (line 145) +* separators, subscript: User-modified. (line 146) * set breakpoint: Breakpoint Control. (line 11) * set debugger command: Viewing And Changing Data. (line 59) * set directory of message catalogs: I18N Functions. (line 12) * set watchpoint: Viewing And Changing Data. (line 67) -* shadowing of variable values: Definition Syntax. (line 71) +* shadowing of variable values: Definition Syntax. (line 76) * shell quoting, rules for: Quoting. (line 6) * shells, piping commands into: Redirection. (line 136) * shells, quoting: Using Shell Variables. @@ -34171,14 +34225,14 @@ Index (line 14) * sidebar, Changing NR and FNR: Auto-set. (line 326) * sidebar, Controlling Output Buffering with system(): I/O Functions. - (line 138) + (line 139) * sidebar, Escape Sequences for Metacharacters: Escape Sequences. (line 137) * sidebar, FS and IGNORECASE: Field Splitting Summary. (line 38) * sidebar, Interactive Versus Noninteractive Buffering: I/O Functions. - (line 73) -* sidebar, Matching the Null String: String Functions. (line 534) + (line 74) +* sidebar, Matching the Null String: String Functions. (line 535) * sidebar, Operator Evaluation Order: Increment Ops. (line 58) * sidebar, Piping into sh: Redirection. (line 134) * sidebar, Pre-POSIX awk Used OFMT for String Conversion: Strings And Numbers. @@ -34186,7 +34240,7 @@ Index * sidebar, Recipe for a Programming Language: History. (line 6) * sidebar, RS = "\0" Is Not Portable: gawk split records. (line 63) * sidebar, So Why Does gawk Have BEGINFILE and ENDFILE?: Filetrans Function. - (line 82) + (line 83) * sidebar, Syntactic Ambiguities Between /= and Regular Expressions: Assignment Ops. (line 146) * sidebar, Understanding #!: Executable Scripts. (line 31) @@ -34245,16 +34299,16 @@ Index * source code, QuikTrim Awk: Other Versions. (line 139) * source code, Solaris awk: Other Versions. (line 100) * source files, search path for: Programs Exercises. (line 70) -* sparse arrays: Array Intro. (line 72) +* sparse arrays: Array Intro. (line 76) * Spencer, Henry: Glossary. (line 16) * split: String Functions. (line 316) * split string into array: String Functions. (line 297) * split utility: Split Program. (line 6) * split() function, array elements, deleting: Delete. (line 61) * split.awk program: Split Program. (line 30) -* sprintf <1>: String Functions. (line 383) +* sprintf <1>: String Functions. (line 384) * sprintf: OFMT. (line 15) -* sprintf() function, OFMT variable and: User-modified. (line 113) +* sprintf() function, OFMT variable and: User-modified. (line 114) * sprintf() function, print/printf statements and: Round Function. (line 6) * sqrt: Numeric Functions. (line 92) @@ -34298,7 +34352,7 @@ Index * strings splitting, example: String Functions. (line 335) * strings, converting <1>: Bitwise Functions. (line 110) * strings, converting: Strings And Numbers. (line 6) -* strings, converting letter case: String Functions. (line 522) +* strings, converting letter case: String Functions. (line 523) * strings, converting, numbers to: User-modified. (line 30) * strings, empty, See null strings: awk split records. (line 115) * strings, extracting: String Extraction. (line 6) @@ -34308,15 +34362,15 @@ Index * strings, null: Regexp Field Splitting. (line 43) * strings, numeric: Variable Typing. (line 6) -* strtonum: String Functions. (line 390) +* strtonum: String Functions. (line 391) * strtonum() function (gawk), --non-decimal-data option and: Nondecimal Data. (line 35) -* sub <1>: String Functions. (line 408) +* sub <1>: String Functions. (line 409) * sub: Using Constant Regexps. (line 43) -* sub() function, arguments of: String Functions. (line 462) +* sub() function, arguments of: String Functions. (line 463) * sub() function, escape processing: Gory Details. (line 6) -* subscript separators: User-modified. (line 145) +* subscript separators: User-modified. (line 146) * subscripts in arrays, multidimensional: Multidimensional. (line 10) * subscripts in arrays, multidimensional, scanning: Multiscanning. (line 11) @@ -34324,19 +34378,19 @@ Index (line 6) * subscripts in arrays, uninitialized variables as: Uninitialized Subscripts. (line 6) -* SUBSEP variable: User-modified. (line 145) +* SUBSEP variable: User-modified. (line 146) * SUBSEP variable, and multidimensional arrays: Multidimensional. (line 16) * substitute in string: String Functions. (line 90) -* substr: String Functions. (line 481) -* substring: String Functions. (line 481) +* substr: String Functions. (line 482) +* substring: String Functions. (line 482) * Sumner, Andrew: Other Versions. (line 68) * supplementary groups of gawk process: Auto-set. (line 251) * switch statement: Switch Statement. (line 6) * SYMTAB array: Auto-set. (line 283) * syntactic ambiguity: /= operator vs. /=.../ regexp constant: Assignment Ops. (line 148) -* system: I/O Functions. (line 106) +* system: I/O Functions. (line 107) * systime: Time Functions. (line 66) * t debugger command (alias for tbreak): Breakpoint Control. (line 90) * tbreak debugger command: Breakpoint Control. (line 90) @@ -34363,7 +34417,7 @@ Index * text, printing: Print. (line 22) * text, printing, unduplicated lines of: Uniq Program. (line 6) * TEXTDOMAIN variable <1>: Programmer i18n. (line 9) -* TEXTDOMAIN variable: User-modified. (line 151) +* TEXTDOMAIN variable: User-modified. (line 152) * TEXTDOMAIN variable, BEGIN pattern and: Programmer i18n. (line 60) * TEXTDOMAIN variable, portability and: I18N Portability. (line 20) * textdomain() function (C library): Explaining gettext. (line 28) @@ -34386,8 +34440,8 @@ Index * timestamps, converting dates to: Time Functions. (line 76) * timestamps, formatted: Getlocaltime Function. (line 6) -* tolower: String Functions. (line 523) -* toupper: String Functions. (line 529) +* tolower: String Functions. (line 524) +* toupper: String Functions. (line 530) * tr utility: Translate Program. (line 6) * trace debugger command: Miscellaneous Debugger Commands. (line 108) @@ -34406,14 +34460,14 @@ Index (line 22) * troubleshooting, fatal errors, printf format strings: Format Modifiers. (line 158) -* troubleshooting, fflush() function: I/O Functions. (line 62) +* troubleshooting, fflush() function: I/O Functions. (line 63) * troubleshooting, function call syntax: Function Calls. (line 30) * troubleshooting, gawk: Compatibility Mode. (line 6) * troubleshooting, gawk, bug reports: Bugs. (line 9) * troubleshooting, gawk, fatal errors, function arguments: Calling Built-in. (line 16) * troubleshooting, getline function: File Checking. (line 25) -* troubleshooting, gsub()/sub() functions: String Functions. (line 472) +* troubleshooting, gsub()/sub() functions: String Functions. (line 473) * troubleshooting, match() function: String Functions. (line 292) * troubleshooting, print statement, omitting commas: Print Examples. (line 31) @@ -34423,8 +34477,8 @@ Index * troubleshooting, regexp constants vs. string constants: Computed Regexps. (line 40) * troubleshooting, string concatenation: Concatenation. (line 26) -* troubleshooting, substr() function: String Functions. (line 499) -* troubleshooting, system() function: I/O Functions. (line 128) +* troubleshooting, substr() function: String Functions. (line 500) +* troubleshooting, system() function: I/O Functions. (line 129) * troubleshooting, typographical errors, global variables: Options. (line 98) * true, logical: Truth Values. (line 6) @@ -34503,7 +34557,7 @@ Index * variables, predefined conveying information: Auto-set. (line 6) * variables, private: Library Names. (line 11) * variables, setting: Options. (line 32) -* variables, shadowing: Definition Syntax. (line 71) +* variables, shadowing: Definition Syntax. (line 76) * variables, types of: Assignment Ops. (line 40) * variables, types of, comparison expressions and: Typing and Comparison. (line 9) @@ -34662,501 +34716,501 @@ Node: Invoking Summary151836 Node: Regexp153499 Node: Regexp Usage154953 Node: Escape Sequences156990 -Node: Regexp Operators163230 -Ref: Regexp Operators-Footnote-1170640 -Ref: Regexp Operators-Footnote-2170787 -Node: Bracket Expressions170885 -Ref: table-char-classes172900 -Node: Leftmost Longest175842 -Node: Computed Regexps177144 -Node: GNU Regexp Operators180573 -Node: Case-sensitivity184245 -Ref: Case-sensitivity-Footnote-1187130 -Ref: Case-sensitivity-Footnote-2187365 -Node: Regexp Summary187473 -Node: Reading Files188940 -Node: Records191033 -Node: awk split records191766 -Node: gawk split records196695 -Ref: gawk split records-Footnote-1201234 -Node: Fields201271 -Ref: Fields-Footnote-1204049 -Node: Nonconstant Fields204135 -Ref: Nonconstant Fields-Footnote-1206373 -Node: Changing Fields206576 -Node: Field Separators212507 -Node: Default Field Splitting215211 -Node: Regexp Field Splitting216328 -Node: Single Character Fields219678 -Node: Command Line Field Separator220737 -Node: Full Line Fields223954 -Ref: Full Line Fields-Footnote-1225475 -Ref: Full Line Fields-Footnote-2225521 -Node: Field Splitting Summary225622 -Node: Constant Size227696 -Node: Splitting By Content232279 -Ref: Splitting By Content-Footnote-1236244 -Node: Multiple Line236407 -Ref: Multiple Line-Footnote-1242288 -Node: Getline242467 -Node: Plain Getline244674 -Node: Getline/Variable247314 -Node: Getline/File248463 -Node: Getline/Variable/File249848 -Ref: Getline/Variable/File-Footnote-1251451 -Node: Getline/Pipe251538 -Node: Getline/Variable/Pipe254216 -Node: Getline/Coprocess255347 -Node: Getline/Variable/Coprocess256611 -Node: Getline Notes257350 -Node: Getline Summary260144 -Ref: table-getline-variants260556 -Node: Read Timeout261385 -Ref: Read Timeout-Footnote-1265222 -Node: Command-line directories265280 -Node: Input Summary266185 -Node: Input Exercises269570 -Node: Printing270298 -Node: Print272075 -Node: Print Examples273532 -Node: Output Separators276311 -Node: OFMT278329 -Node: Printf279684 -Node: Basic Printf280469 -Node: Control Letters282041 -Node: Format Modifiers286026 -Node: Printf Examples292036 -Node: Redirection294522 -Node: Special FD301360 -Ref: Special FD-Footnote-1304526 -Node: Special Files304600 -Node: Other Inherited Files305217 -Node: Special Network306217 -Node: Special Caveats307079 -Node: Close Files And Pipes308028 -Ref: Close Files And Pipes-Footnote-1315219 -Ref: Close Files And Pipes-Footnote-2315367 -Node: Output Summary315517 -Node: Output Exercises316515 -Node: Expressions317195 -Node: Values318384 -Node: Constants319061 -Node: Scalar Constants319752 -Ref: Scalar Constants-Footnote-1320614 -Node: Nondecimal-numbers320864 -Node: Regexp Constants323874 -Node: Using Constant Regexps324400 -Node: Variables327563 -Node: Using Variables328220 -Node: Assignment Options330131 -Node: Conversion332006 -Node: Strings And Numbers332530 -Ref: Strings And Numbers-Footnote-1335595 -Node: Locale influences conversions335704 -Ref: table-locale-affects338450 -Node: All Operators339042 -Node: Arithmetic Ops339671 -Node: Concatenation342176 -Ref: Concatenation-Footnote-1344995 -Node: Assignment Ops345102 -Ref: table-assign-ops350081 -Node: Increment Ops351391 -Node: Truth Values and Conditions354822 -Node: Truth Values355905 -Node: Typing and Comparison356954 -Node: Variable Typing357770 -Node: Comparison Operators361437 -Ref: table-relational-ops361847 -Node: POSIX String Comparison365342 -Ref: POSIX String Comparison-Footnote-1366414 -Node: Boolean Ops366553 -Ref: Boolean Ops-Footnote-1371031 -Node: Conditional Exp371122 -Node: Function Calls372860 -Node: Precedence376740 -Node: Locales380400 -Node: Expressions Summary382032 -Node: Patterns and Actions384603 -Node: Pattern Overview385723 -Node: Regexp Patterns387402 -Node: Expression Patterns387945 -Node: Ranges391654 -Node: BEGIN/END394761 -Node: Using BEGIN/END395522 -Ref: Using BEGIN/END-Footnote-1398258 -Node: I/O And BEGIN/END398364 -Node: BEGINFILE/ENDFILE400679 -Node: Empty403576 -Node: Using Shell Variables403893 -Node: Action Overview406166 -Node: Statements408492 -Node: If Statement410340 -Node: While Statement411835 -Node: Do Statement413863 -Node: For Statement415011 -Node: Switch Statement418169 -Node: Break Statement420551 -Node: Continue Statement422592 -Node: Next Statement424419 -Node: Nextfile Statement426800 -Node: Exit Statement429428 -Node: Built-in Variables431839 -Node: User-modified432972 -Ref: User-modified-Footnote-1440653 -Node: Auto-set440715 -Ref: Auto-set-Footnote-1454407 -Ref: Auto-set-Footnote-2454612 -Node: ARGC and ARGV454668 -Node: Pattern Action Summary458886 -Node: Arrays461313 -Node: Array Basics462642 -Node: Array Intro463486 -Ref: figure-array-elements465450 -Ref: Array Intro-Footnote-1467976 -Node: Reference to Elements468104 -Node: Assigning Elements470556 -Node: Array Example471047 -Node: Scanning an Array472805 -Node: Controlling Scanning475821 -Ref: Controlling Scanning-Footnote-1481017 -Node: Numeric Array Subscripts481333 -Node: Uninitialized Subscripts483518 -Node: Delete485135 -Ref: Delete-Footnote-1487878 -Node: Multidimensional487935 -Node: Multiscanning491032 -Node: Arrays of Arrays492621 -Node: Arrays Summary497380 -Node: Functions499472 -Node: Built-in500371 -Node: Calling Built-in501449 -Node: Numeric Functions503440 -Ref: Numeric Functions-Footnote-1508259 -Ref: Numeric Functions-Footnote-2508616 -Ref: Numeric Functions-Footnote-3508664 -Node: String Functions508936 -Ref: String Functions-Footnote-1532411 -Ref: String Functions-Footnote-2532540 -Ref: String Functions-Footnote-3532788 -Node: Gory Details532875 -Ref: table-sub-escapes534656 -Ref: table-sub-proposed536176 -Ref: table-posix-sub537540 -Ref: table-gensub-escapes539076 -Ref: Gory Details-Footnote-1539908 -Node: I/O Functions540059 -Ref: I/O Functions-Footnote-1547277 -Node: Time Functions547424 -Ref: Time Functions-Footnote-1557912 -Ref: Time Functions-Footnote-2557980 -Ref: Time Functions-Footnote-3558138 -Ref: Time Functions-Footnote-4558249 -Ref: Time Functions-Footnote-5558361 -Ref: Time Functions-Footnote-6558588 -Node: Bitwise Functions558854 -Ref: table-bitwise-ops559416 -Ref: Bitwise Functions-Footnote-1563725 -Node: Type Functions563894 -Node: I18N Functions565045 -Node: User-defined566690 -Node: Definition Syntax567495 -Ref: Definition Syntax-Footnote-1572902 -Node: Function Example572973 -Ref: Function Example-Footnote-1575892 -Node: Function Caveats575914 -Node: Calling A Function576432 -Node: Variable Scope577390 -Node: Pass By Value/Reference580378 -Node: Return Statement583873 -Node: Dynamic Typing586854 -Node: Indirect Calls587783 -Ref: Indirect Calls-Footnote-1599085 -Node: Functions Summary599213 -Node: Library Functions601915 -Ref: Library Functions-Footnote-1605524 -Ref: Library Functions-Footnote-2605667 -Node: Library Names605838 -Ref: Library Names-Footnote-1609292 -Ref: Library Names-Footnote-2609515 -Node: General Functions609601 -Node: Strtonum Function610704 -Node: Assert Function613726 -Node: Round Function617050 -Node: Cliff Random Function618591 -Node: Ordinal Functions619607 -Ref: Ordinal Functions-Footnote-1622670 -Ref: Ordinal Functions-Footnote-2622922 -Node: Join Function623133 -Ref: Join Function-Footnote-1624902 -Node: Getlocaltime Function625102 -Node: Readfile Function628846 -Node: Shell Quoting630816 -Node: Data File Management632217 -Node: Filetrans Function632849 -Node: Rewind Function636905 -Node: File Checking638292 -Ref: File Checking-Footnote-1639624 -Node: Empty Files639825 -Node: Ignoring Assigns641804 -Node: Getopt Function643355 -Ref: Getopt Function-Footnote-1654817 -Node: Passwd Functions655017 -Ref: Passwd Functions-Footnote-1663854 -Node: Group Functions663942 -Ref: Group Functions-Footnote-1671836 -Node: Walking Arrays672049 -Node: Library Functions Summary673652 -Node: Library Exercises675053 -Node: Sample Programs676333 -Node: Running Examples677103 -Node: Clones677831 -Node: Cut Program679055 -Node: Egrep Program688774 -Ref: Egrep Program-Footnote-1696272 -Node: Id Program696382 -Node: Split Program700027 -Ref: Split Program-Footnote-1703475 -Node: Tee Program703603 -Node: Uniq Program706392 -Node: Wc Program713811 -Ref: Wc Program-Footnote-1718061 -Node: Miscellaneous Programs718155 -Node: Dupword Program719368 -Node: Alarm Program721399 -Node: Translate Program726203 -Ref: Translate Program-Footnote-1730768 -Node: Labels Program731038 -Ref: Labels Program-Footnote-1734389 -Node: Word Sorting734473 -Node: History Sorting738544 -Node: Extract Program740380 -Node: Simple Sed747905 -Node: Igawk Program750973 -Ref: Igawk Program-Footnote-1765297 -Ref: Igawk Program-Footnote-2765498 -Ref: Igawk Program-Footnote-3765620 -Node: Anagram Program765735 -Node: Signature Program768792 -Node: Programs Summary770039 -Node: Programs Exercises771232 -Ref: Programs Exercises-Footnote-1775363 -Node: Advanced Features775454 -Node: Nondecimal Data777402 -Node: Array Sorting778992 -Node: Controlling Array Traversal779689 -Ref: Controlling Array Traversal-Footnote-1788022 -Node: Array Sorting Functions788140 -Ref: Array Sorting Functions-Footnote-1792029 -Node: Two-way I/O792225 -Ref: Two-way I/O-Footnote-1797170 -Ref: Two-way I/O-Footnote-2797356 -Node: TCP/IP Networking797438 -Node: Profiling800311 -Node: Advanced Features Summary808588 -Node: Internationalization810521 -Node: I18N and L10N812001 -Node: Explaining gettext812687 -Ref: Explaining gettext-Footnote-1817712 -Ref: Explaining gettext-Footnote-2817896 -Node: Programmer i18n818061 -Ref: Programmer i18n-Footnote-1822927 -Node: Translator i18n822976 -Node: String Extraction823770 -Ref: String Extraction-Footnote-1824901 -Node: Printf Ordering824987 -Ref: Printf Ordering-Footnote-1827773 -Node: I18N Portability827837 -Ref: I18N Portability-Footnote-1830292 -Node: I18N Example830355 -Ref: I18N Example-Footnote-1833158 -Node: Gawk I18N833230 -Node: I18N Summary833868 -Node: Debugger835207 -Node: Debugging836229 -Node: Debugging Concepts836670 -Node: Debugging Terms838523 -Node: Awk Debugging841095 -Node: Sample Debugging Session841989 -Node: Debugger Invocation842509 -Node: Finding The Bug843893 -Node: List of Debugger Commands850368 -Node: Breakpoint Control851701 -Node: Debugger Execution Control855397 -Node: Viewing And Changing Data858761 -Node: Execution Stack862139 -Node: Debugger Info863776 -Node: Miscellaneous Debugger Commands867793 -Node: Readline Support872822 -Node: Limitations873714 -Node: Debugging Summary875828 -Node: Arbitrary Precision Arithmetic876996 -Node: Computer Arithmetic878412 -Ref: table-numeric-ranges882010 -Ref: Computer Arithmetic-Footnote-1882869 -Node: Math Definitions882926 -Ref: table-ieee-formats886214 -Ref: Math Definitions-Footnote-1886818 -Node: MPFR features886923 -Node: FP Math Caution888594 -Ref: FP Math Caution-Footnote-1889644 -Node: Inexactness of computations890013 -Node: Inexact representation890972 -Node: Comparing FP Values892329 -Node: Errors accumulate893411 -Node: Getting Accuracy894844 -Node: Try To Round897506 -Node: Setting precision898405 -Ref: table-predefined-precision-strings899089 -Node: Setting the rounding mode900878 -Ref: table-gawk-rounding-modes901242 -Ref: Setting the rounding mode-Footnote-1904697 -Node: Arbitrary Precision Integers904876 -Ref: Arbitrary Precision Integers-Footnote-1909776 -Node: POSIX Floating Point Problems909925 -Ref: POSIX Floating Point Problems-Footnote-1913798 -Node: Floating point summary913836 -Node: Dynamic Extensions916030 -Node: Extension Intro917582 -Node: Plugin License918848 -Node: Extension Mechanism Outline919645 -Ref: figure-load-extension920073 -Ref: figure-register-new-function921553 -Ref: figure-call-new-function922557 -Node: Extension API Description924543 -Node: Extension API Functions Introduction925993 -Node: General Data Types930817 -Ref: General Data Types-Footnote-1936556 -Node: Memory Allocation Functions936855 -Ref: Memory Allocation Functions-Footnote-1939694 -Node: Constructor Functions939790 -Node: Registration Functions941524 -Node: Extension Functions942209 -Node: Exit Callback Functions944506 -Node: Extension Version String945754 -Node: Input Parsers946419 -Node: Output Wrappers956298 -Node: Two-way processors960813 -Node: Printing Messages963017 -Ref: Printing Messages-Footnote-1964093 -Node: Updating `ERRNO'964245 -Node: Requesting Values964985 -Ref: table-value-types-returned965713 -Node: Accessing Parameters966670 -Node: Symbol Table Access967901 -Node: Symbol table by name968415 -Node: Symbol table by cookie970396 -Ref: Symbol table by cookie-Footnote-1974540 -Node: Cached values974603 -Ref: Cached values-Footnote-1978102 -Node: Array Manipulation978193 -Ref: Array Manipulation-Footnote-1979291 -Node: Array Data Types979328 -Ref: Array Data Types-Footnote-1981983 -Node: Array Functions982075 -Node: Flattening Arrays985929 -Node: Creating Arrays992821 -Node: Extension API Variables997592 -Node: Extension Versioning998228 -Node: Extension API Informational Variables1000129 -Node: Extension API Boilerplate1001194 -Node: Finding Extensions1005003 -Node: Extension Example1005563 -Node: Internal File Description1006335 -Node: Internal File Ops1010402 -Ref: Internal File Ops-Footnote-11022072 -Node: Using Internal File Ops1022212 -Ref: Using Internal File Ops-Footnote-11024595 -Node: Extension Samples1024868 -Node: Extension Sample File Functions1026394 -Node: Extension Sample Fnmatch1034032 -Node: Extension Sample Fork1035523 -Node: Extension Sample Inplace1036738 -Node: Extension Sample Ord1038413 -Node: Extension Sample Readdir1039249 -Ref: table-readdir-file-types1040125 -Node: Extension Sample Revout1040936 -Node: Extension Sample Rev2way1041526 -Node: Extension Sample Read write array1042266 -Node: Extension Sample Readfile1044206 -Node: Extension Sample Time1045301 -Node: Extension Sample API Tests1046650 -Node: gawkextlib1047141 -Node: Extension summary1049799 -Node: Extension Exercises1053488 -Node: Language History1054210 -Node: V7/SVR3.11055866 -Node: SVR41058047 -Node: POSIX1059492 -Node: BTL1060881 -Node: POSIX/GNU1061615 -Node: Feature History1067239 -Node: Common Extensions1080337 -Node: Ranges and Locales1081661 -Ref: Ranges and Locales-Footnote-11086279 -Ref: Ranges and Locales-Footnote-21086306 -Ref: Ranges and Locales-Footnote-31086540 -Node: Contributors1086761 -Node: History summary1092302 -Node: Installation1093672 -Node: Gawk Distribution1094618 -Node: Getting1095102 -Node: Extracting1095925 -Node: Distribution contents1097560 -Node: Unix Installation1103625 -Node: Quick Installation1104308 -Node: Shell Startup Files1106719 -Node: Additional Configuration Options1107798 -Node: Configuration Philosophy1109537 -Node: Non-Unix Installation1111906 -Node: PC Installation1112364 -Node: PC Binary Installation1113683 -Node: PC Compiling1115531 -Ref: PC Compiling-Footnote-11118552 -Node: PC Testing1118661 -Node: PC Using1119837 -Node: Cygwin1123952 -Node: MSYS1124775 -Node: VMS Installation1125275 -Node: VMS Compilation1126067 -Ref: VMS Compilation-Footnote-11127289 -Node: VMS Dynamic Extensions1127347 -Node: VMS Installation Details1129031 -Node: VMS Running1131283 -Node: VMS GNV1134119 -Node: VMS Old Gawk1134853 -Node: Bugs1135323 -Node: Other Versions1139206 -Node: Installation summary1145630 -Node: Notes1146686 -Node: Compatibility Mode1147551 -Node: Additions1148333 -Node: Accessing The Source1149258 -Node: Adding Code1150693 -Node: New Ports1156850 -Node: Derived Files1161332 -Ref: Derived Files-Footnote-11166807 -Ref: Derived Files-Footnote-21166841 -Ref: Derived Files-Footnote-31167437 -Node: Future Extensions1167551 -Node: Implementation Limitations1168157 -Node: Extension Design1169405 -Node: Old Extension Problems1170559 -Ref: Old Extension Problems-Footnote-11172076 -Node: Extension New Mechanism Goals1172133 -Ref: Extension New Mechanism Goals-Footnote-11175493 -Node: Extension Other Design Decisions1175682 -Node: Extension Future Growth1177790 -Node: Old Extension Mechanism1178626 -Node: Notes summary1180388 -Node: Basic Concepts1181574 -Node: Basic High Level1182255 -Ref: figure-general-flow1182527 -Ref: figure-process-flow1183126 -Ref: Basic High Level-Footnote-11186355 -Node: Basic Data Typing1186540 -Node: Glossary1189868 -Node: Copying1221797 -Node: GNU Free Documentation License1259353 -Node: Index1284489 +Node: Regexp Operators163219 +Ref: Regexp Operators-Footnote-1170629 +Ref: Regexp Operators-Footnote-2170776 +Node: Bracket Expressions170874 +Ref: table-char-classes172889 +Node: Leftmost Longest175831 +Node: Computed Regexps177133 +Node: GNU Regexp Operators180562 +Node: Case-sensitivity184234 +Ref: Case-sensitivity-Footnote-1187119 +Ref: Case-sensitivity-Footnote-2187354 +Node: Regexp Summary187462 +Node: Reading Files188929 +Node: Records191022 +Node: awk split records191755 +Node: gawk split records196684 +Ref: gawk split records-Footnote-1201223 +Node: Fields201260 +Ref: Fields-Footnote-1204038 +Node: Nonconstant Fields204124 +Ref: Nonconstant Fields-Footnote-1206362 +Node: Changing Fields206565 +Node: Field Separators212496 +Node: Default Field Splitting215200 +Node: Regexp Field Splitting216317 +Node: Single Character Fields219667 +Node: Command Line Field Separator220726 +Node: Full Line Fields223943 +Ref: Full Line Fields-Footnote-1225464 +Ref: Full Line Fields-Footnote-2225510 +Node: Field Splitting Summary225611 +Node: Constant Size227685 +Node: Splitting By Content232268 +Ref: Splitting By Content-Footnote-1236233 +Node: Multiple Line236396 +Ref: Multiple Line-Footnote-1242277 +Node: Getline242456 +Node: Plain Getline244663 +Node: Getline/Variable247303 +Node: Getline/File248452 +Node: Getline/Variable/File249837 +Ref: Getline/Variable/File-Footnote-1251440 +Node: Getline/Pipe251527 +Node: Getline/Variable/Pipe254205 +Node: Getline/Coprocess255336 +Node: Getline/Variable/Coprocess256600 +Node: Getline Notes257339 +Node: Getline Summary260133 +Ref: table-getline-variants260545 +Node: Read Timeout261374 +Ref: Read Timeout-Footnote-1265211 +Node: Command-line directories265269 +Node: Input Summary266174 +Node: Input Exercises269559 +Node: Printing270287 +Node: Print272064 +Node: Print Examples273521 +Node: Output Separators276300 +Node: OFMT278318 +Node: Printf279673 +Node: Basic Printf280458 +Node: Control Letters282030 +Node: Format Modifiers286015 +Node: Printf Examples292025 +Node: Redirection294511 +Node: Special FD301349 +Ref: Special FD-Footnote-1304515 +Node: Special Files304589 +Node: Other Inherited Files305206 +Node: Special Network306206 +Node: Special Caveats307068 +Node: Close Files And Pipes308017 +Ref: Close Files And Pipes-Footnote-1315208 +Ref: Close Files And Pipes-Footnote-2315356 +Node: Output Summary315506 +Node: Output Exercises316504 +Node: Expressions317184 +Node: Values318373 +Node: Constants319050 +Node: Scalar Constants319741 +Ref: Scalar Constants-Footnote-1320603 +Node: Nondecimal-numbers320853 +Node: Regexp Constants323863 +Node: Using Constant Regexps324389 +Node: Variables327552 +Node: Using Variables328209 +Node: Assignment Options330120 +Node: Conversion331995 +Node: Strings And Numbers332519 +Ref: Strings And Numbers-Footnote-1335584 +Node: Locale influences conversions335693 +Ref: table-locale-affects338439 +Node: All Operators339031 +Node: Arithmetic Ops339660 +Node: Concatenation342165 +Ref: Concatenation-Footnote-1344984 +Node: Assignment Ops345091 +Ref: table-assign-ops350070 +Node: Increment Ops351380 +Node: Truth Values and Conditions354811 +Node: Truth Values355894 +Node: Typing and Comparison356943 +Node: Variable Typing357759 +Node: Comparison Operators361426 +Ref: table-relational-ops361836 +Node: POSIX String Comparison365331 +Ref: POSIX String Comparison-Footnote-1366403 +Node: Boolean Ops366542 +Ref: Boolean Ops-Footnote-1371020 +Node: Conditional Exp371111 +Node: Function Calls372849 +Node: Precedence376729 +Node: Locales380389 +Node: Expressions Summary382021 +Node: Patterns and Actions384592 +Node: Pattern Overview385712 +Node: Regexp Patterns387391 +Node: Expression Patterns387934 +Node: Ranges391643 +Node: BEGIN/END394750 +Node: Using BEGIN/END395511 +Ref: Using BEGIN/END-Footnote-1398247 +Node: I/O And BEGIN/END398353 +Node: BEGINFILE/ENDFILE400668 +Node: Empty403565 +Node: Using Shell Variables403882 +Node: Action Overview406155 +Node: Statements408481 +Node: If Statement410329 +Node: While Statement411824 +Node: Do Statement413852 +Node: For Statement415000 +Node: Switch Statement418158 +Node: Break Statement420540 +Node: Continue Statement422581 +Node: Next Statement424408 +Node: Nextfile Statement426789 +Node: Exit Statement429417 +Node: Built-in Variables431828 +Node: User-modified432961 +Ref: User-modified-Footnote-1440664 +Node: Auto-set440726 +Ref: Auto-set-Footnote-1454435 +Ref: Auto-set-Footnote-2454640 +Node: ARGC and ARGV454696 +Node: Pattern Action Summary458914 +Node: Arrays461347 +Node: Array Basics462676 +Node: Array Intro463520 +Ref: figure-array-elements465454 +Ref: Array Intro-Footnote-1468074 +Node: Reference to Elements468202 +Node: Assigning Elements470664 +Node: Array Example471155 +Node: Scanning an Array472914 +Node: Controlling Scanning475934 +Ref: Controlling Scanning-Footnote-1481328 +Node: Numeric Array Subscripts481644 +Node: Uninitialized Subscripts483829 +Node: Delete485446 +Ref: Delete-Footnote-1488195 +Node: Multidimensional488252 +Node: Multiscanning491349 +Node: Arrays of Arrays492938 +Node: Arrays Summary497692 +Node: Functions499783 +Node: Built-in500822 +Node: Calling Built-in501900 +Node: Numeric Functions503895 +Ref: Numeric Functions-Footnote-1508713 +Ref: Numeric Functions-Footnote-2509070 +Ref: Numeric Functions-Footnote-3509118 +Node: String Functions509390 +Ref: String Functions-Footnote-1532891 +Ref: String Functions-Footnote-2533020 +Ref: String Functions-Footnote-3533268 +Node: Gory Details533355 +Ref: table-sub-escapes535136 +Ref: table-sub-proposed536651 +Ref: table-posix-sub538013 +Ref: table-gensub-escapes539550 +Ref: Gory Details-Footnote-1540383 +Node: I/O Functions540534 +Ref: I/O Functions-Footnote-1547770 +Node: Time Functions547917 +Ref: Time Functions-Footnote-1558426 +Ref: Time Functions-Footnote-2558494 +Ref: Time Functions-Footnote-3558652 +Ref: Time Functions-Footnote-4558763 +Ref: Time Functions-Footnote-5558875 +Ref: Time Functions-Footnote-6559102 +Node: Bitwise Functions559368 +Ref: table-bitwise-ops559930 +Ref: Bitwise Functions-Footnote-1564258 +Node: Type Functions564430 +Node: I18N Functions565582 +Node: User-defined567229 +Node: Definition Syntax568034 +Ref: Definition Syntax-Footnote-1573693 +Node: Function Example573764 +Ref: Function Example-Footnote-1576685 +Node: Function Caveats576707 +Node: Calling A Function577225 +Node: Variable Scope578183 +Node: Pass By Value/Reference581176 +Node: Return Statement584673 +Node: Dynamic Typing587652 +Node: Indirect Calls588581 +Ref: Indirect Calls-Footnote-1599887 +Node: Functions Summary600015 +Node: Library Functions602717 +Ref: Library Functions-Footnote-1606325 +Ref: Library Functions-Footnote-2606468 +Node: Library Names606639 +Ref: Library Names-Footnote-1610097 +Ref: Library Names-Footnote-2610320 +Node: General Functions610406 +Node: Strtonum Function611509 +Node: Assert Function614531 +Node: Round Function617855 +Node: Cliff Random Function619396 +Node: Ordinal Functions620412 +Ref: Ordinal Functions-Footnote-1623475 +Ref: Ordinal Functions-Footnote-2623727 +Node: Join Function623938 +Ref: Join Function-Footnote-1625708 +Node: Getlocaltime Function625908 +Node: Readfile Function629652 +Node: Shell Quoting631624 +Node: Data File Management633025 +Node: Filetrans Function633657 +Node: Rewind Function637753 +Node: File Checking639139 +Ref: File Checking-Footnote-1640472 +Node: Empty Files640673 +Node: Ignoring Assigns642652 +Node: Getopt Function644202 +Ref: Getopt Function-Footnote-1655666 +Node: Passwd Functions655866 +Ref: Passwd Functions-Footnote-1664706 +Node: Group Functions664794 +Ref: Group Functions-Footnote-1672691 +Node: Walking Arrays672896 +Node: Library Functions Summary674496 +Node: Library Exercises675900 +Node: Sample Programs677180 +Node: Running Examples677950 +Node: Clones678678 +Node: Cut Program679902 +Node: Egrep Program689622 +Ref: Egrep Program-Footnote-1697125 +Node: Id Program697235 +Node: Split Program700911 +Ref: Split Program-Footnote-1704365 +Node: Tee Program704493 +Node: Uniq Program707282 +Node: Wc Program714701 +Ref: Wc Program-Footnote-1718951 +Node: Miscellaneous Programs719045 +Node: Dupword Program720258 +Node: Alarm Program722289 +Node: Translate Program727094 +Ref: Translate Program-Footnote-1731657 +Node: Labels Program731927 +Ref: Labels Program-Footnote-1735278 +Node: Word Sorting735362 +Node: History Sorting739432 +Node: Extract Program741267 +Node: Simple Sed748791 +Node: Igawk Program751861 +Ref: Igawk Program-Footnote-1766187 +Ref: Igawk Program-Footnote-2766388 +Ref: Igawk Program-Footnote-3766510 +Node: Anagram Program766625 +Node: Signature Program769686 +Node: Programs Summary770933 +Node: Programs Exercises772154 +Ref: Programs Exercises-Footnote-1776285 +Node: Advanced Features776376 +Node: Nondecimal Data778358 +Node: Array Sorting779948 +Node: Controlling Array Traversal780648 +Ref: Controlling Array Traversal-Footnote-1789014 +Node: Array Sorting Functions789132 +Ref: Array Sorting Functions-Footnote-1793018 +Node: Two-way I/O793214 +Ref: Two-way I/O-Footnote-1798159 +Ref: Two-way I/O-Footnote-2798345 +Node: TCP/IP Networking798427 +Node: Profiling801299 +Node: Advanced Features Summary809570 +Node: Internationalization811503 +Node: I18N and L10N812983 +Node: Explaining gettext813669 +Ref: Explaining gettext-Footnote-1818694 +Ref: Explaining gettext-Footnote-2818878 +Node: Programmer i18n819043 +Ref: Programmer i18n-Footnote-1823909 +Node: Translator i18n823958 +Node: String Extraction824752 +Ref: String Extraction-Footnote-1825883 +Node: Printf Ordering825969 +Ref: Printf Ordering-Footnote-1828755 +Node: I18N Portability828819 +Ref: I18N Portability-Footnote-1831274 +Node: I18N Example831337 +Ref: I18N Example-Footnote-1834140 +Node: Gawk I18N834212 +Node: I18N Summary834850 +Node: Debugger836189 +Node: Debugging837211 +Node: Debugging Concepts837652 +Node: Debugging Terms839505 +Node: Awk Debugging842077 +Node: Sample Debugging Session842971 +Node: Debugger Invocation843491 +Node: Finding The Bug844875 +Node: List of Debugger Commands851350 +Node: Breakpoint Control852683 +Node: Debugger Execution Control856379 +Node: Viewing And Changing Data859743 +Node: Execution Stack863121 +Node: Debugger Info864758 +Node: Miscellaneous Debugger Commands868775 +Node: Readline Support873804 +Node: Limitations874696 +Node: Debugging Summary876810 +Node: Arbitrary Precision Arithmetic877978 +Node: Computer Arithmetic879394 +Ref: table-numeric-ranges882992 +Ref: Computer Arithmetic-Footnote-1883851 +Node: Math Definitions883908 +Ref: table-ieee-formats887196 +Ref: Math Definitions-Footnote-1887800 +Node: MPFR features887905 +Node: FP Math Caution889576 +Ref: FP Math Caution-Footnote-1890626 +Node: Inexactness of computations890995 +Node: Inexact representation891954 +Node: Comparing FP Values893311 +Node: Errors accumulate894393 +Node: Getting Accuracy895826 +Node: Try To Round898488 +Node: Setting precision899387 +Ref: table-predefined-precision-strings900071 +Node: Setting the rounding mode901860 +Ref: table-gawk-rounding-modes902224 +Ref: Setting the rounding mode-Footnote-1905679 +Node: Arbitrary Precision Integers905858 +Ref: Arbitrary Precision Integers-Footnote-1910758 +Node: POSIX Floating Point Problems910907 +Ref: POSIX Floating Point Problems-Footnote-1914780 +Node: Floating point summary914818 +Node: Dynamic Extensions917012 +Node: Extension Intro918564 +Node: Plugin License919830 +Node: Extension Mechanism Outline920627 +Ref: figure-load-extension921055 +Ref: figure-register-new-function922535 +Ref: figure-call-new-function923539 +Node: Extension API Description925525 +Node: Extension API Functions Introduction926975 +Node: General Data Types931799 +Ref: General Data Types-Footnote-1937538 +Node: Memory Allocation Functions937837 +Ref: Memory Allocation Functions-Footnote-1940676 +Node: Constructor Functions940772 +Node: Registration Functions942506 +Node: Extension Functions943191 +Node: Exit Callback Functions945488 +Node: Extension Version String946736 +Node: Input Parsers947401 +Node: Output Wrappers957280 +Node: Two-way processors961795 +Node: Printing Messages963999 +Ref: Printing Messages-Footnote-1965075 +Node: Updating `ERRNO'965227 +Node: Requesting Values965967 +Ref: table-value-types-returned966695 +Node: Accessing Parameters967652 +Node: Symbol Table Access968883 +Node: Symbol table by name969397 +Node: Symbol table by cookie971378 +Ref: Symbol table by cookie-Footnote-1975522 +Node: Cached values975585 +Ref: Cached values-Footnote-1979084 +Node: Array Manipulation979175 +Ref: Array Manipulation-Footnote-1980273 +Node: Array Data Types980310 +Ref: Array Data Types-Footnote-1982965 +Node: Array Functions983057 +Node: Flattening Arrays986911 +Node: Creating Arrays993803 +Node: Extension API Variables998574 +Node: Extension Versioning999210 +Node: Extension API Informational Variables1001111 +Node: Extension API Boilerplate1002176 +Node: Finding Extensions1005985 +Node: Extension Example1006545 +Node: Internal File Description1007317 +Node: Internal File Ops1011384 +Ref: Internal File Ops-Footnote-11023054 +Node: Using Internal File Ops1023194 +Ref: Using Internal File Ops-Footnote-11025577 +Node: Extension Samples1025850 +Node: Extension Sample File Functions1027376 +Node: Extension Sample Fnmatch1035014 +Node: Extension Sample Fork1036505 +Node: Extension Sample Inplace1037720 +Node: Extension Sample Ord1039395 +Node: Extension Sample Readdir1040231 +Ref: table-readdir-file-types1041107 +Node: Extension Sample Revout1041918 +Node: Extension Sample Rev2way1042508 +Node: Extension Sample Read write array1043248 +Node: Extension Sample Readfile1045188 +Node: Extension Sample Time1046283 +Node: Extension Sample API Tests1047632 +Node: gawkextlib1048123 +Node: Extension summary1050781 +Node: Extension Exercises1054470 +Node: Language History1055192 +Node: V7/SVR3.11056848 +Node: SVR41059029 +Node: POSIX1060474 +Node: BTL1061863 +Node: POSIX/GNU1062597 +Node: Feature History1068386 +Node: Common Extensions1082112 +Node: Ranges and Locales1083436 +Ref: Ranges and Locales-Footnote-11088054 +Ref: Ranges and Locales-Footnote-21088081 +Ref: Ranges and Locales-Footnote-31088315 +Node: Contributors1088536 +Node: History summary1094077 +Node: Installation1095447 +Node: Gawk Distribution1096393 +Node: Getting1096877 +Node: Extracting1097700 +Node: Distribution contents1099335 +Node: Unix Installation1105400 +Node: Quick Installation1106083 +Node: Shell Startup Files1108494 +Node: Additional Configuration Options1109573 +Node: Configuration Philosophy1111312 +Node: Non-Unix Installation1113681 +Node: PC Installation1114139 +Node: PC Binary Installation1115458 +Node: PC Compiling1117306 +Ref: PC Compiling-Footnote-11120327 +Node: PC Testing1120436 +Node: PC Using1121612 +Node: Cygwin1125727 +Node: MSYS1126550 +Node: VMS Installation1127050 +Node: VMS Compilation1127842 +Ref: VMS Compilation-Footnote-11129064 +Node: VMS Dynamic Extensions1129122 +Node: VMS Installation Details1130806 +Node: VMS Running1133058 +Node: VMS GNV1135894 +Node: VMS Old Gawk1136628 +Node: Bugs1137098 +Node: Other Versions1140981 +Node: Installation summary1147405 +Node: Notes1148461 +Node: Compatibility Mode1149326 +Node: Additions1150108 +Node: Accessing The Source1151033 +Node: Adding Code1152468 +Node: New Ports1158625 +Node: Derived Files1163107 +Ref: Derived Files-Footnote-11168582 +Ref: Derived Files-Footnote-21168616 +Ref: Derived Files-Footnote-31169212 +Node: Future Extensions1169326 +Node: Implementation Limitations1169932 +Node: Extension Design1171180 +Node: Old Extension Problems1172334 +Ref: Old Extension Problems-Footnote-11173851 +Node: Extension New Mechanism Goals1173908 +Ref: Extension New Mechanism Goals-Footnote-11177268 +Node: Extension Other Design Decisions1177457 +Node: Extension Future Growth1179565 +Node: Old Extension Mechanism1180401 +Node: Notes summary1182163 +Node: Basic Concepts1183349 +Node: Basic High Level1184030 +Ref: figure-general-flow1184302 +Ref: figure-process-flow1184901 +Ref: Basic High Level-Footnote-11188130 +Node: Basic Data Typing1188315 +Node: Glossary1191643 +Node: Copying1223572 +Node: GNU Free Documentation License1261128 +Node: Index1286264 End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index ac973b9b..e702f407 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -55,6 +55,7 @@ @set VERSION 4.1 @set PATCHLEVEL 2 +@set GAWKINETTITLE TCP/IP Internetworking with @command{gawk} @ifset FOR_PRINT @set TITLE Effective awk Programming @end ifset @@ -472,7 +473,7 @@ particular records in a file and perform operations upon them. @command{gawk}. * Internationalization:: Getting @command{gawk} to speak your language. -* Debugger:: The @code{gawk} debugger. +* Debugger:: The @command{gawk} debugger. * Arbitrary Precision Arithmetic:: Arbitrary precision arithmetic with @command{gawk}. * Dynamic Extensions:: Adding new built-in functions to @@ -955,7 +956,7 @@ particular records in a file and perform operations upon them. * Internal File Ops:: The code for internal file operations. * Using Internal File Ops:: How to use an external extension. * Extension Samples:: The sample extensions that ship with - @code{gawk}. + @command{gawk}. * Extension Sample File Functions:: The file functions sample. * Extension Sample Fnmatch:: An interface to @code{fnmatch()}. * Extension Sample Fork:: An interface to @code{fork()} and @@ -1496,7 +1497,7 @@ In May 1997, J@"urgen Kahrs felt the need for network access from @command{awk}, and with a little help from me, set about adding features to do this for @command{gawk}. At that time, he also wrote the bulk of -@cite{TCP/IP Internetworking with @command{gawk}} +@cite{@value{GAWKINETTITLE}} (a separate document, available as part of the @command{gawk} distribution). His code finally became part of the main @command{gawk} distribution with @command{gawk} @value{PVERSION} 3.1. @@ -4677,7 +4678,7 @@ $ @kbd{gawk -f test2} @print{} This is script test2. @end example -@code{gawk} runs the @file{test2} script, which includes @file{test1} +@command{gawk} runs the @file{test2} script, which includes @file{test1} using the @code{@@include} keyword. So, to include external @command{awk} source files, you just use @code{@@include} followed by the name of the file to be included, @@ -4886,7 +4887,7 @@ This seems to have been a long-undocumented feature in Unix @command{awk}. Similarly, you may use @code{print} or @code{printf} statements in the @var{init} and @var{increment} parts of a @code{for} loop. This is another -long-undocumented ``feature'' of Unix @code{awk}. +long-undocumented ``feature'' of Unix @command{awk}. @end ignore @@ -5178,13 +5179,12 @@ letters or numbers. @value{COMMONEXT} @quotation CAUTION In ISO C, the escape sequence continues until the first nonhexadecimal digit is seen. -@c FIXME: Add exact version here. For many years, @command{gawk} would continue incorporating hexadecimal digits into the value until a non-hexadecimal digit or the end of the string was encountered. However, using more than two hexadecimal digits produced undefined results. -As of @value{PVERSION} @strong{FIXME:} 4.3.0, only two digits +As of @value{PVERSION} 4.2, only two digits are processed. @end quotation @@ -14508,7 +14508,7 @@ respectively, should use binary I/O. A string value of @code{"rw"} or @code{"wr"} indicates that all files should use binary I/O. Any other string value is treated the same as @code{"rw"}, but causes @command{gawk} to generate a warning message. @code{BINMODE} is described in more -detail in @ref{PC Using}. @command{mawk} (@pxref{Other Versions}), +detail in @ref{PC Using}. @command{mawk} (@pxref{Other Versions}) also supports this variable, but only using numeric values. @cindex @code{CONVFMT} variable @@ -14516,7 +14516,7 @@ also supports this variable, but only using numeric values. @cindex numbers, converting, to strings @cindex strings, converting, numbers to @item @code{CONVFMT} -This string controls conversion of numbers to +A string that controls the conversion of numbers to strings (@pxref{Conversion}). It works by being passed, in effect, as the first argument to the @code{sprintf()} function @@ -14591,7 +14591,7 @@ is to simply say @samp{FS = FS}, perhaps with an explanatory comment. @cindex regular expressions, case sensitivity @item IGNORECASE # If @code{IGNORECASE} is nonzero or non-null, then all string comparisons -and all regular expression matching are case independent. Thus, regexp +and all regular expression matching are case-independent. Thus, regexp matching with @samp{~} and @samp{!~}, as well as the @code{gensub()}, @code{gsub()}, @code{index()}, @code{match()}, @code{patsplit()}, @code{split()}, and @code{sub()} @@ -14617,7 +14617,7 @@ Any other true value prints nonfatal warnings. Assigning a false value to @code{LINT} turns off the lint warnings. This variable is a @command{gawk} extension. It is not special -in other @command{awk} implementations. Unlike the other special variables, +in other @command{awk} implementations. Unlike with the other special variables, changing @code{LINT} does affect the production of lint warnings, even if @command{gawk} is in compatibility mode. Much as the @option{--lint} and @option{--traditional} options independently @@ -14629,7 +14629,7 @@ of @command{awk} being executed. @cindex numbers, converting, to strings @cindex strings, converting, numbers to @item OFMT -Controls conversion of numbers to +A string that controls conversion of numbers to strings (@pxref{Conversion}) for printing with the @code{print} statement. It works by being passed as the first argument to the @code{sprintf()} function @@ -14644,7 +14644,7 @@ strings in general expressions; this is now done by @code{CONVFMT}. @cindex separators, field @cindex field separators @item OFS -This is the output field separator (@pxref{Output Separators}). It is +The output field separator (@pxref{Output Separators}). It is output between the fields printed by a @code{print} statement. Its default value is @w{@code{" "}}, a string consisting of a single space. @@ -14662,7 +14662,7 @@ The working precision of arbitrary-precision floating-point numbers, @cindex @code{ROUNDMODE} variable @item ROUNDMODE # The rounding mode to use for arbitrary-precision arithmetic on -numbers, by default @code{"N"} (@samp{roundTiesToEven} in +numbers, by default @code{"N"} (@code{roundTiesToEven} in the IEEE 754 standard; @pxref{Setting the rounding mode}). @cindex @code{RS} variable @@ -14691,7 +14691,7 @@ just the first character of @code{RS}'s value is used. @item @code{SUBSEP} The subscript separator. It has the default value of @code{"\034"} and is used to separate the parts of the indices of a -multidimensional array. Thus, the expression @code{@w{foo["A", "B"]}} +multidimensional array. Thus, the expression @samp{@w{foo["A", "B"]}} really accesses @code{foo["A\034B"]} (@pxref{Multidimensional}). @@ -14709,7 +14709,7 @@ The default value of @code{TEXTDOMAIN} is @code{"messages"}. @end table @node Auto-set -@subsection Built-In Variables That Convey Information +@subsection Built-in Variables That Convey Information @cindex predefined variables, conveying information @cindex variables, predefined conveying information @@ -14867,12 +14867,12 @@ input file. @item @code{NF} The number of fields in the current input record. @code{NF} is set each time a new record is read, when a new field is -created or when @code{$0} changes (@pxref{Fields}). +created, or when @code{$0} changes (@pxref{Fields}). Unlike most of the variables described in this @value{SUBSECTION}, assigning a value to @code{NF} has the potential to affect @command{awk}'s internal workings. In particular, assignments -to @code{NF} can be used to create or remove fields from the +to @code{NF} can be used to create fields in or remove fields from the current record. @xref{Changing Fields}. @cindex @code{FUNCTAB} array @@ -14922,7 +14922,7 @@ or @code{"FPAT"} if field matching with @code{FPAT} is in effect. @item PROCINFO["identifiers"] @cindex program identifiers A subarray, indexed by the names of all identifiers used in the text of -the AWK program. An @dfn{identifier} is simply the name of a variable +the @command{awk} program. An @dfn{identifier} is simply the name of a variable (be it scalar or array), built-in function, user-defined function, or extension function. For each identifier, the value of the element is one of the following: @@ -14942,7 +14942,7 @@ The identifier is an extension function loaded via The identifier is a scalar. @item "untyped" -The identifier is untyped (could be used as a scalar or array, +The identifier is untyped (could be used as a scalar or an array; @command{gawk} doesn't know yet). @item "user" @@ -15063,7 +15063,7 @@ is the length of the matched string, or @minus{}1 if no match is found. @cindex @code{RSTART} variable @item @code{RSTART} -The start-index in characters of the substring that is matched by the +The start index in characters of the substring that is matched by the @code{match()} function (@pxref{String Functions}). @code{RSTART} is set by invoking the @code{match()} function. Its value @@ -15130,7 +15130,7 @@ function multiply(variable, amount) @quotation NOTE In order to avoid severe time-travel paradoxes,@footnote{Not to mention difficult implementation issues.} neither @code{FUNCTAB} nor @code{SYMTAB} -are available as elements within the @code{SYMTAB} array. +is available as an element within the @code{SYMTAB} array. @end quotation @end table @@ -15350,7 +15350,7 @@ When designing your program, you should choose options that don't conflict with @command{gawk}'s, because it will process any options that it accepts before passing the rest of the command line on to your program. Using @samp{#!} with the @option{-E} option may help -(@DBXREF{Executable Scripts} +(@DBPXREF{Executable Scripts} and @ifnotdocbook @DBPXREF{Options}). @@ -15364,15 +15364,15 @@ and @itemize @value{BULLET} @item -Pattern-action pairs make up the basic elements of an @command{awk} +Pattern--action pairs make up the basic elements of an @command{awk} program. Patterns are either normal expressions, range expressions, -regexp constants, one of the special keywords @code{BEGIN}, @code{END}, -@code{BEGINFILE}, @code{ENDFILE}, or empty. The action executes if +or regexp constants; one of the special keywords @code{BEGIN}, @code{END}, +@code{BEGINFILE}, or @code{ENDFILE}; or empty. The action executes if the current record matches the pattern. Empty (missing) patterns match all records. @item -I/O from @code{BEGIN} and @code{END} rules have certain constraints. +I/O from @code{BEGIN} and @code{END} rules has certain constraints. This is also true, only more so, for @code{BEGINFILE} and @code{ENDFILE} rules. The latter two give you ``hooks'' into @command{gawk}'s file processing, allowing you to recover from a file that otherwise would @@ -15402,12 +15402,12 @@ iteration of a loop (or get out of a @code{switch}). @item @code{next} and @code{nextfile} let you read the next record and start -over at the top of your program, or skip to the next input file and +over at the top of your program or skip to the next input file and start over, respectively. @item The @code{exit} statement terminates your program. When executed -from an action (or function body) it transfers control to the +from an action (or function body), it transfers control to the @code{END} statements. From an @code{END} statement body, it exits immediately. You may pass an optional numeric value to be used as @command{awk}'s exit status. @@ -15510,15 +15510,17 @@ the declaration. indices---e.g., @samp{15 .. 27}---but the size of the array is still fixed when the array is declared.) -A contiguous array of four elements might look like the following example, -conceptually, if the element values are 8, @code{"foo"}, -@code{""}, and 30 +@c 1/2015: Do not put the numeric values into @code. Array element +@c values are no different than scalar variable values. +A contiguous array of four elements might look like @ifnotdocbook -as shown in @ref{figure-array-elements}: +@ref{figure-array-elements}, @end ifnotdocbook @ifdocbook -as shown in @inlineraw{docbook, <xref linkend="figure-array-elements"/>}: +@inlineraw{docbook, <xref linkend="figure-array-elements"/>}, @end ifdocbook +conceptually, if the element values are eight, @code{"foo"}, +@code{""}, and 30. @ifnotdocbook @float Figure,figure-array-elements @@ -15543,7 +15545,7 @@ as shown in @inlineraw{docbook, <xref linkend="figure-array-elements"/>}: @noindent Only the values are stored; the indices are implicit from the order of -the values. Here, 8 is the value at index zero, because 8 appears in the +the values. Here, eight is the value at index zero, because eight appears in the position with zero elements before it. @cindex arrays, indexing @@ -15555,19 +15557,21 @@ that each array is a collection of pairs---an index and its corresponding array element value: @ifnotdocbook -@example -@r{Index} 3 @r{Value} 30 -@r{Index} 1 @r{Value} "foo" -@r{Index} 0 @r{Value} 8 -@r{Index} 2 @r{Value} "" -@end example +@c extra empty column to indent it right +@multitable @columnfractions .1 .1 .1 +@headitem @tab Index @tab Value +@item @tab @code{3} @tab @code{30} +@item @tab @code{1} @tab @code{"foo"} +@item @tab @code{0} @tab @code{8} +@item @tab @code{2} @tab @code{""} +@end multitable @end ifnotdocbook @docbook <informaltable> <tgroup cols="2"> -<colspec colname="1" align="center"/> -<colspec colname="2" align="center"/> +<colspec colname="1" align="left"/> +<colspec colname="2" align="left"/> <thead> <row> <entry>Index</entry> @@ -15613,20 +15617,22 @@ at any time. For example, suppose a tenth element is added to the array whose value is @w{@code{"number ten"}}. The result is: @ifnotdocbook -@example -@r{Index} 10 @r{Value} "number ten" -@r{Index} 3 @r{Value} 30 -@r{Index} 1 @r{Value} "foo" -@r{Index} 0 @r{Value} 8 -@r{Index} 2 @r{Value} "" -@end example +@c extra empty column to indent it right +@multitable @columnfractions .1 .1 .2 +@headitem @tab Index @tab Value +@item @tab @code{10} @tab @code{"number ten"} +@item @tab @code{3} @tab @code{30} +@item @tab @code{1} @tab @code{"foo"} +@item @tab @code{0} @tab @code{8} +@item @tab @code{2} @tab @code{""} +@end multitable @end ifnotdocbook @docbook <informaltable> <tgroup cols="2"> -<colspec colname="1" align="center"/> -<colspec colname="2" align="center"/> +<colspec colname="1" align="left"/> +<colspec colname="2" align="left"/> <thead> <row> <entry>Index</entry> @@ -15678,19 +15684,20 @@ an index. For example, the following is an array that translates words from English to French: @ifnotdocbook -@example -@r{Index} "dog" @r{Value} "chien" -@r{Index} "cat" @r{Value} "chat" -@r{Index} "one" @r{Value} "un" -@r{Index} 1 @r{Value} "un" -@end example +@multitable @columnfractions .1 .1 .1 +@headitem @tab Index @tab Value +@item @tab @code{"dog"} @tab @code{"chien"} +@item @tab @code{"cat"} @tab @code{"chat"} +@item @tab @code{"one"} @tab @code{"un"} +@item @tab @code{1} @tab @code{"un"} +@end multitable @end ifnotdocbook @docbook <informaltable> <tgroup cols="2"> -<colspec colname="1" align="center"/> -<colspec colname="2" align="center"/> +<colspec colname="1" align="left"/> +<colspec colname="2" align="left"/> <thead> <row> <entry>Index</entry> @@ -15732,7 +15739,7 @@ numbers and strings as indices. There are some subtleties to how numbers work when used as array subscripts; this is discussed in more detail in @ref{Numeric Array Subscripts}.) -Here, the number @code{1} isn't double quoted, because @command{awk} +Here, the number @code{1} isn't double-quoted, because @command{awk} automatically converts it to a string. @cindex @command{gawk}, @code{IGNORECASE} variable in @@ -15757,7 +15764,7 @@ is independent of the number of elements in the array. @cindex elements of arrays The principal way to use an array is to refer to one of its elements. -An array reference is an expression as follows: +An @dfn{array reference} is an expression as follows: @example @var{array}[@var{index-expression}] @@ -15767,8 +15774,11 @@ An array reference is an expression as follows: Here, @var{array} is the name of an array. The expression @var{index-expression} is the index of the desired element of the array. +@c 1/2015: Having the 4.3 in @samp is a little iffy. It's essentially +@c an expression though, so leave be. It's to early in the discussion +@c to mention that it's really a string. The value of the array reference is the current value of that array -element. For example, @code{foo[4.3]} is an expression for the element +element. For example, @code{foo[4.3]} is an expression referencing the element of array @code{foo} at index @samp{4.3}. @cindex arrays, unassigned elements @@ -15860,7 +15870,7 @@ assign to that element of the array. The following program takes a list of lines, each beginning with a line number, and prints them out in order of line number. The line numbers -are not in order when they are first read---instead they +are not in order when they are first read---instead, they are scrambled. This program sorts the lines by making an array using the line numbers as subscripts. The program then prints out the lines in sorted order of their numbers. It is a very simple program and gets @@ -15954,7 +15964,7 @@ program has previously used, with the variable @var{var} set to that index. The following program uses this form of the @code{for} statement. The first rule scans the input records and notes which words appear (at least once) in the input, by storing a one into the array @code{used} with -the word as index. The second rule scans the elements of @code{used} to +the word as the index. The second rule scans the elements of @code{used} to find all the distinct words that appear in the input. It prints each word that is more than 10 characters long and also prints the number of such words. @@ -16051,7 +16061,7 @@ and will vary from one version of @command{awk} to the next. Often, though, you may wish to do something simple, such as ``traverse the array by comparing the indices in ascending order,'' or ``traverse the array by comparing the values in descending order.'' -@command{gawk} provides two mechanisms which give you this control. +@command{gawk} provides two mechanisms that give you this control: @itemize @value{BULLET} @item @@ -16108,21 +16118,26 @@ across different environments.} which @command{gawk} uses internally to perform the sorting. @item "@@ind_str_desc" -String indices ordered from high to low. +Like @code{"@@ind_str_asc"}, but the +string indices are ordered from high to low. @item "@@ind_num_desc" -Numeric indices ordered from high to low. +Like @code{"@@ind_num_asc"}, but the +numeric indices are ordered from high to low. @item "@@val_type_desc" -Element values, based on type, ordered from high to low. +Like @code{"@@val_type_asc"}, but the +element values, based on type, are ordered from high to low. Subarrays, if present, come out first. @item "@@val_str_desc" -Element values, treated as strings, ordered from high to low. +Like @code{"@@val_str_asc"}, but the +element values, treated as strings, are ordered from high to low. Subarrays, if present, come out first. @item "@@val_num_desc" -Element values, treated as numbers, ordered from high to low. +Like @code{"@@val_num_asc"}, but the +element values, treated as numbers, are ordered from high to low. Subarrays, if present, come out first. @end table @@ -16345,7 +16360,7 @@ for (i in frequencies) @noindent This example removes all the elements from the array @code{frequencies}. Once an element is deleted, a subsequent @code{for} statement to scan the array -does not report that element and the @code{in} operator to check for +does not report that element and using the @code{in} operator to check for the presence of that element returns zero (i.e., false): @example @@ -16605,7 +16620,7 @@ a[1][2] = 2 This simulates a true two-dimensional array. Each subarray element can contain another subarray as a value, which in turn can hold other arrays as well. In this way, you can create arrays of three or more dimensions. -The indices can be any @command{awk} expression, including scalars +The indices can be any @command{awk} expressions, including scalars separated by commas (i.e., a regular @command{awk} simulated multidimensional subscript). So the following is valid in @command{gawk}: @@ -16617,7 +16632,7 @@ a[1][3][1, "name"] = "barney" Each subarray and the main array can be of different length. In fact, the elements of an array or its subarray do not all have to have the same type. This means that the main array and any of its subarrays can be -non-rectangular, or jagged in structure. You can assign a scalar value to +nonrectangular, or jagged in structure. You can assign a scalar value to the index @code{4} of the main array @code{a}, even though @code{a[1]} is itself an array and not a scalar: @@ -16641,7 +16656,8 @@ a[4][5][6][7] = "An element in a four-dimensional array" @noindent This removes the scalar value from index @code{4} and then inserts a -subarray of subarray of subarray containing a scalar. You can also +three-level nested subarray +containing a scalar. You can also delete an entire subarray or subarray of subarrays: @example @@ -16652,7 +16668,7 @@ a[4][5] = "An element in subarray a[4]" But recall that you can not delete the main array @code{a} and then use it as a scalar. -The built-in functions which take array arguments can also be used +The built-in functions that take array arguments can also be used with subarrays. For example, the following code fragment uses @code{length()} (@pxref{String Functions}) to determine the number of elements in the main array @code{a} and @@ -16682,7 +16698,7 @@ can be nested to scan all the elements of an array of arrays if it is rectangular in structure. In order to print the contents (scalar values) of a two-dimensional array of arrays (i.e., in which each first-level element is itself an -array, not necessarily of the same length) +array, not necessarily of the same length), you could use the following code: @example @@ -16782,9 +16798,9 @@ versions of @command{awk}. @item Standard @command{awk} simulates multidimensional arrays by separating -subscript values with a comma. The values are concatenated into a +subscript values with commas. The values are concatenated into a single string, separated by the value of @code{SUBSEP}. The fact -that such a subscript was created in this way is not retained; thus +that such a subscript was created in this way is not retained; thus, changing @code{SUBSEP} may have unexpected consequences. You can use @samp{(@var{sub1}, @var{sub2}, @dots{}) in @var{array}} to see if such a multidimensional subscript exists in @var{array}. @@ -16793,7 +16809,7 @@ a multidimensional subscript exists in @var{array}. @command{gawk} provides true arrays of arrays. You use a separate set of square brackets for each dimension in such an array: @code{data[row][col]}, for example. Array elements may thus be either -scalar values (number or string) or another array. +scalar values (number or string) or other arrays. @item Use the @code{isarray()} built-in function to determine if an array @@ -16818,6 +16834,9 @@ Besides the built-in functions, @command{awk} has provisions for writing new functions that the rest of a program can use. The second half of this @value{CHAPTER} describes these @dfn{user-defined} functions. +Finally, we explore indirect function calls, a @command{gawk}-specific +extension that lets you determine at runtime what function is to +be called. @menu * Built-in:: Summarizes the built-in functions. @@ -16827,7 +16846,7 @@ The second half of this @value{CHAPTER} describes these @end menu @node Built-in -@section Built-In Functions +@section Built-in Functions @dfn{Built-in} functions are always available for your @command{awk} program to call. This @value{SECTION} defines all @@ -16850,7 +16869,7 @@ but are summarized here for your convenience. @end menu @node Calling Built-in -@subsection Calling Built-In Functions +@subsection Calling Built-in Functions To call one of @command{awk}'s built-in functions, write the name of the function followed @@ -16901,7 +16920,7 @@ j = atan2(++i, i *= 2) @end example If the order of evaluation is left to right, then @code{i} first becomes -6, and then 12, and @code{atan2()} is called with the two arguments 6 +six, and then 12, and @code{atan2()} is called with the two arguments six and 12. But if the order of evaluation is right to left, @code{i} first becomes 10, then 11, and @code{atan2()} is called with the two arguments 11 and 10. @@ -16982,7 +17001,7 @@ In fact, @command{gawk} uses the BSD @code{random()} function, which is considerably better than @code{rand()}, to produce random numbers.} Often random integers are needed instead. Following is a user-defined function -that can be used to obtain a random non-negative integer less than @var{n}: +that can be used to obtain a random nonnegative integer less than @var{n}: @example function randint(n) @@ -17077,7 +17096,7 @@ implementations. The functions in this @value{SECTION} look at or change the text of one or more strings. -@code{gawk} understands locales (@pxref{Locales}), and does all +@command{gawk} understands locales (@pxref{Locales}) and does all string processing in terms of @emph{characters}, not @emph{bytes}. This distinction is particularly important to understand for locales where one character may be represented by multiple bytes. Thus, for @@ -17166,7 +17185,7 @@ a[2] = "de" a[3] = "sac" @end example -The @code{asorti()} function works similarly to @code{asort()}, however, +The @code{asorti()} function works similarly to @code{asort()}; however, the @emph{indices} are sorted, instead of the values. Thus, in the previous example, starting with the same initial set of indices and values in @code{a}, calling @samp{asorti(a)} would yield: @@ -17281,7 +17300,7 @@ If @var{find} is not found, @code{index()} returns zero. With BWK @command{awk} and @command{gawk}, it is a fatal error to use a regexp constant for @var{find}. Other implementations allow it, simply treating the regexp -constant as an expression meaning @samp{$0 ~ /regexp/}. @value{DARKCORNER}. +constant as an expression meaning @samp{$0 ~ /regexp/}. @value{DARKCORNER} @item @code{length(}[@var{string}]@code{)} @cindexawkfunc{length} @@ -17364,7 +17383,7 @@ If @option{--posix} is supplied, using an array argument is a fatal error @cindex string, regular expression match @cindex match regexp in string Search @var{string} for the -longest, leftmost substring matched by the regular expression, +longest, leftmost substring matched by the regular expression @var{regexp} and return the character position (index) at which that substring begins (one, if it starts at the beginning of @var{string}). If no match is found, return zero. @@ -17376,7 +17395,7 @@ In the latter case, the string is treated as a regexp to be matched. discussion of the difference between the two forms, and the implications for writing your program correctly. -The order of the first two arguments is backwards from most other string +The order of the first two arguments is the opposite of most other string functions that work with regular expressions, such as @code{sub()} and @code{gsub()}. It might help to remember that for @code{match()}, the order is the same as for the @samp{~} operator: @@ -17465,7 +17484,7 @@ $ @kbd{echo foooobazbarrrrr |} @end example There may not be subscripts for the start and index for every parenthesized -subexpression, because they may not all have matched text; thus they +subexpression, because they may not all have matched text; thus, they should be tested for with the @code{in} operator (@pxref{Reference to Elements}). @@ -17512,13 +17531,13 @@ a regexp describing where to split @var{string} (much as @code{FS} can be a regexp describing where to split input records). If @var{fieldsep} is omitted, the value of @code{FS} is used. @code{split()} returns the number of elements created. -@var{seps} is a @command{gawk} extension with @code{@var{seps}[@var{i}]} +@var{seps} is a @command{gawk} extension, with @code{@var{seps}[@var{i}]} being the separator string between @code{@var{array}[@var{i}]} and @code{@var{array}[@var{i}+1]}. If @var{fieldsep} is a single -space then any leading whitespace goes into @code{@var{seps}[0]} and +space, then any leading whitespace goes into @code{@var{seps}[0]} and any trailing -whitespace goes into @code{@var{seps}[@var{n}]} where @var{n} is the +whitespace goes into @code{@var{seps}[@var{n}]}, where @var{n} is the return value of @code{split()} (i.e., the number of elements in @var{array}). @@ -17531,7 +17550,7 @@ split("cul-de-sac", a, "-", seps) @noindent @cindex strings splitting, example -splits the string @samp{cul-de-sac} into three fields using @samp{-} as the +splits the string @code{"cul-de-sac"} into three fields using @samp{-} as the separator. It sets the contents of the array @code{a} as follows: @example @@ -17556,19 +17575,18 @@ As with input field-splitting, when the value of @var{fieldsep} is the elements of @var{array} but not in @var{seps}, and the elements are separated by runs of whitespace. -Also, as with input field-splitting, if @var{fieldsep} is the null string, each +Also, as with input field splitting, if @var{fieldsep} is the null string, each individual character in the string is split into its own array element. @value{COMMONEXT} Note, however, that @code{RS} has no effect on the way @code{split()} -works. Even though @samp{RS = ""} causes newline to also be an input +works. Even though @samp{RS = ""} causes the newline character to also be an input field separator, this does not affect how @code{split()} splits strings. @cindex dark corner, @code{split()} function Modern implementations of @command{awk}, including @command{gawk}, allow -the third argument to be a regexp constant (@code{/abc/}) as well as a -string. -@value{DARKCORNER} +the third argument to be a regexp constant (@w{@code{/}@dots{}@code{/}}) +as well as a string. @value{DARKCORNER} The POSIX standard allows this as well. @DBXREF{Computed Regexps} for a discussion of the difference between using a string constant or a regexp constant, @@ -17705,7 +17723,7 @@ an @samp{&}: @cindex @code{sub()} function, arguments of @cindex @code{gsub()} function, arguments of As mentioned, the third argument to @code{sub()} must -be a variable, field or array element. +be a variable, field, or array element. Some versions of @command{awk} allow the third argument to be an expression that is not an lvalue. In such a case, @code{sub()} still searches for the pattern and returns zero or one, but the result of @@ -17897,8 +17915,8 @@ example, @code{"a\qb"} is treated as @code{"aqb"}. At the runtime level, the various functions handle sequences of @samp{\} and @samp{&} differently. The situation is (sadly) somewhat complex. -Historically, the @code{sub()} and @code{gsub()} functions treated the two -character sequence @samp{\&} specially; this sequence was replaced in +Historically, the @code{sub()} and @code{gsub()} functions treated the +two-character sequence @samp{\&} specially; this sequence was replaced in the generated text with a single @samp{&}. Any other @samp{\} within the @var{replacement} string that did not precede an @samp{&} was passed through unchanged. This is illustrated in @ref{table-sub-escapes}. @@ -17956,7 +17974,7 @@ _bigskip} @end float @noindent -This table shows both the lexical-level processing, where +This table shows the lexical-level processing, where an odd number of backslashes becomes an even number at the runtime level, as well as the runtime processing done by @code{sub()}. (For the sake of simplicity, the rest of the following tables only show the @@ -17977,7 +17995,7 @@ This is shown in @ref{table-sub-proposed}. @float Table,table-sub-proposed -@caption{GNU @command{awk} rules for @code{sub()} and backslash} +@caption{@command{gawk} rules for @code{sub()} and backslash} @tex \vbox{\bigskip % We need more characters for escape and tab ... @@ -18022,7 +18040,7 @@ _bigskip} @end float In a nutshell, at the runtime level, there are now three special sequences -of characters (@samp{\\\&}, @samp{\\&} and @samp{\&}) whereas historically +of characters (@samp{\\\&}, @samp{\\&}, and @samp{\&}) whereas historically there was only one. However, as in the historical case, any @samp{\} that is not part of one of these three sequences is not special and appears in the output literally. @@ -18088,7 +18106,7 @@ The only case where the difference is noticeable is the last one: @samp{\\\\} is seen as @samp{\\} and produces @samp{\} instead of @samp{\\}. Starting with @value{PVERSION} 3.1.4, @command{gawk} followed the POSIX rules -when @option{--posix} is specified (@pxref{Options}). Otherwise, +when @option{--posix} was specified (@pxref{Options}). Otherwise, it continued to follow the proposed rules, as that had been its behavior for many years. @@ -18156,7 +18174,7 @@ _bigskip} @end ifnottex @end float -Because of the complexity of the lexical and runtime level processing +Because of the complexity of the lexical- and runtime-level processing and the special cases for @code{sub()} and @code{gsub()}, we recommend the use of @command{gawk} and @code{gensub()} when you have to do substitutions. @@ -18182,6 +18200,7 @@ for more information. When closing a coprocess, it is occasionally useful to first close one end of the two-way pipe and then to close the other. This is done by providing a second argument to @code{close()}. This second argument +(@var{how}) should be one of the two string values @code{"to"} or @code{"from"}, indicating which end of the pipe to close. Case in the string does not matter. @@ -18208,7 +18227,7 @@ every little bit of information as soon as it is ready. However, sometimes it is necessary to force a program to @dfn{flush} its buffers (i.e., write the information to its destination, even if a buffer is not full). This is the purpose of the @code{fflush()} function---@command{gawk} also -buffers its output and the @code{fflush()} function forces +buffers its output, and the @code{fflush()} function forces @command{gawk} to flush its buffers. @cindex extensions, common@comma{} @code{fflush()} function @@ -18229,7 +18248,7 @@ would flush only the standard output if there was no argument, and flush all output files and pipes if the argument was the null string. This was changed in order to be compatible with Brian Kernighan's @command{awk}, in the hope that standardizing this -feature in POSIX would then be easier (which indeed helped). +feature in POSIX would then be easier (which indeed proved to be the case). With @command{gawk}, you can use @samp{fflush("/dev/stdout")} if you wish to flush @@ -18240,7 +18259,7 @@ only the standard output. @c @cindex warnings, automatic @cindex troubleshooting, @code{fflush()} function @code{fflush()} returns zero if the buffer is successfully flushed; -otherwise, it returns non-zero. (@command{gawk} returns @minus{}1.) +otherwise, it returns a nonzero value. (@command{gawk} returns @minus{}1.) In the case where all buffers are flushed, the return value is zero only if all buffers were flushed successfully. Otherwise, it is @minus{}1, and @command{gawk} warns about the problem @var{filename}. @@ -18258,8 +18277,8 @@ In such a case, @code{fflush()} returns @minus{}1, as well. @cindex buffering, interactive vs.@: noninteractive -As a side point, buffering issues can be even more confusing, depending -upon whether your program is @dfn{interactive} (i.e., communicating +As a side point, buffering issues can be even more confusing if +your program is @dfn{interactive} (i.e., communicating with a user sitting at a keyboard).@footnote{A program is interactive if the standard output is connected to a terminal device. On modern systems, this means your keyboard and screen.} @@ -18309,8 +18328,8 @@ it is all buffered and sent down the pipe to @command{cat} in one shot. @cindex buffering, interactive vs.@: noninteractive -As a side point, buffering issues can be even more confusing, depending -upon whether your program is @dfn{interactive} (i.e., communicating +As a side point, buffering issues can be even more confusing if +your program is @dfn{interactive} (i.e., communicating with a user sitting at a keyboard).@footnote{A program is interactive if the standard output is connected to a terminal device. On modern systems, this means your keyboard and screen.} @@ -18354,7 +18373,7 @@ it is all buffered and sent down the pipe to @command{cat} in one shot. @cindexawkfunc{system} @cindex invoke shell command @cindex interacting with other programs -Execute the operating-system +Execute the operating system command @var{command} and then return to the @command{awk} program. Return @var{command}'s exit status. @@ -18534,9 +18553,9 @@ you would see the latter (undesirable) output. @cindex files, log@comma{} timestamps in @cindex @command{gawk}, timestamps @cindex POSIX @command{awk}, timestamps and -@code{awk} programs are commonly used to process log files +@command{awk} programs are commonly used to process log files containing timestamp information, indicating when a -particular log record was written. Many programs log their timestamp +particular log record was written. Many programs log their timestamps in the form returned by the @code{time()} system call, which is the number of seconds since a particular epoch. On POSIX-compliant systems, it is the number of seconds since @@ -18597,7 +18616,7 @@ The values of these numbers need not be within the ranges specified; for example, an hour of @minus{}1 means 1 hour before midnight. The origin-zero Gregorian calendar is assumed, with year 0 preceding year 1 and year @minus{}1 preceding year 0. -The time is assumed to be in the local timezone. +The time is assumed to be in the local time zone. If the daylight-savings flag is positive, the time is assumed to be daylight savings time; if zero, the time is assumed to be standard time; and if negative (the default), @code{mktime()} attempts to determine @@ -18757,12 +18776,12 @@ Equivalent to specifying @samp{%H:%M:%S}. The weekday as a decimal number (1--7). Monday is day one. @item %U -The week number of the year (the first Sunday as the first day of week one) +The week number of the year (with the first Sunday as the first day of week one) as a decimal number (00--53). @c @cindex ISO 8601 @item %V -The week number of the year (the first Monday as the first +The week number of the year (with the first Monday as the first day of week one) as a decimal number (01--53). The method for determining the week number is as specified by ISO 8601. (To wit: if the week containing January 1 has four or more days in the @@ -18773,7 +18792,7 @@ and the next week is week one.) The weekday as a decimal number (0--6). Sunday is day zero. @item %W -The week number of the year (the first Monday as the first day of week one) +The week number of the year (with the first Monday as the first day of week one) as a decimal number (00--53). @item %x @@ -18793,8 +18812,8 @@ The full year as a decimal number (e.g., 2015). @c @cindex RFC 822 @c @cindex RFC 1036 @item %z -The timezone offset in a +HHMM format (e.g., the format necessary to -produce RFC 822/RFC 1036 date headers). +The time zone offset in a @samp{+@var{HHMM}} format (e.g., the format +necessary to produce RFC 822/RFC 1036 date headers). @item %Z The time zone name or abbreviation; no characters if @@ -18934,7 +18953,7 @@ The operations are described in @ref{table-bitwise-ops}. @ifnottex @ifnotdocbook @display - Bit Operator + Bit operator | AND | OR | XOR |---+---+---+---+---+--- Operands | 0 | 1 | 0 | 1 | 0 | 1 @@ -18992,7 +19011,7 @@ Operands | 0 | 1 | 0 | 1 | 0 | 1 <tbody> <row> <entry colsep="0"></entry> -<entry spanname="optitle"><emphasis role="bold">Bit Operator</emphasis></entry> +<entry spanname="optitle"><emphasis role="bold">Bit operator</emphasis></entry> </row> <row rowsep="1"> @@ -19056,10 +19075,9 @@ of a given value. Finally, two other common operations are to shift the bits left or right. For example, if you have a bit string @samp{10111001} and you shift it right by three bits, you end up with @samp{00010111}.@footnote{This example -shows that 0's come in on the left side. For @command{gawk}, this is +shows that zeros come in on the left side. For @command{gawk}, this is always true, but in some languages, it's possible to have the left side -fill with 1's.} -@c Purposely decided to use 0's and 1's here. 2/2001. +fill with ones.} If you start over again with @samp{10111001} and shift it left by three bits, you end up with @samp{11001000}. The following list describes @command{gawk}'s built-in functions that implement the bitwise operations. @@ -19113,7 +19131,7 @@ that illustrates the use of these functions: @example @group @c file eg/lib/bits2str.awk -# bits2str --- turn a byte into readable 1's and 0's +# bits2str --- turn a byte into readable ones and zeros function bits2str(bits, data, mask) @{ @@ -19187,15 +19205,16 @@ $ @kbd{gawk -f testbits.awk} @cindex converting, numbers to strings @cindex number as string of bits The @code{bits2str()} function turns a binary number into a string. -The number @code{1} represents a binary value where the rightmost bit -is set to 1. Using this mask, +Initializing @code{mask} to one creates +a binary value where the rightmost bit +is set to one. Using this mask, the function repeatedly checks the rightmost bit. ANDing the mask with the value indicates whether the -rightmost bit is 1 or not. If so, a @code{"1"} is concatenated onto the front +rightmost bit is one or not. If so, a @code{"1"} is concatenated onto the front of the string. Otherwise, a @code{"0"} is added. The value is then shifted right by one bit and the loop continues -until there are no more 1 bits. +until there are no more one bits. If the initial value is zero, it returns a simple @code{"0"}. Otherwise, at the end, it pads the value with zeros to represent multiples @@ -19219,7 +19238,7 @@ that traverses every element of an array of arrays @cindexgawkfunc{isarray} @cindex scalar or array @item isarray(@var{x}) -Return a true value if @var{x} is an array. Otherwise return false. +Return a true value if @var{x} is an array. Otherwise, return false. @end table @code{isarray()} is meant for use in two circumstances. The first is when @@ -19280,7 +19299,7 @@ The default value for @var{category} is @code{"LC_MESSAGES"}. Return the plural form used for @var{number} of the translation of @var{string1} and @var{string2} in text domain @var{domain} for locale category @var{category}. @var{string1} is the -English singular variant of a message, and @var{string2} the English plural +English singular variant of a message, and @var{string2} is the English plural variant of the same message. The default value for @var{domain} is the current value of @code{TEXTDOMAIN}. The default value for @var{category} is @code{"LC_MESSAGES"}. @@ -19309,7 +19328,7 @@ them (i.e., to tell @command{awk} what they should do). @subsection Function Definition Syntax @quotation -@i{It's entirely fair to say that the @command{awk} syntax for local +@i{It's entirely fair to say that the awk syntax for local variable definitions is appallingly awful.} @author Brian Kernighan @end quotation @@ -19351,14 +19370,23 @@ the call. A function cannot have two parameters with the same name, nor may it have a parameter with the same name as the function itself. -In addition, according to the POSIX standard, function parameters + +@quotation CAUTION +According to the POSIX standard, function parameters cannot have the same name as one of the special predefined variables -(@pxref{Built-in Variables}). Not all versions of @command{awk} enforce -this restriction. +(@pxref{Built-in Variables}), nor may a function parameter have the +same name as another function. + +Not all versions of @command{awk} enforce +these restrictions. +@command{gawk} always enforces the first restriction. +With @option{--posix} (@pxref{Options}), +it also enforces the second restriction. +@end quotation Local variables act like the empty string if referenced where a string value is required, and like zero if referenced where a numeric value -is required. This is the same as regular variables that have never been +is required. This is the same as the behavior of regular variables that have never been assigned a value. (There is more to understand about local variables; @pxref{Dynamic Typing}.) @@ -19392,7 +19420,7 @@ During execution of the function body, the arguments and local variable values hide, or @dfn{shadow}, any variables of the same names used in the rest of the program. The shadowed variables are not accessible in the function definition, because there is no way to name them while their -names have been taken away for the local variables. All other variables +names have been taken away for the arguments and local variables. All other variables used in the @command{awk} program can be referenced or set normally in the function's body. @@ -19459,7 +19487,7 @@ function myprint(num) @end example @noindent -To illustrate, here is an @command{awk} rule that uses our @code{myprint} +To illustrate, here is an @command{awk} rule that uses our @code{myprint()} function: @example @@ -19500,13 +19528,13 @@ in an array and start over with a new list of elements (@pxref{Delete}). Instead of having to repeat this loop everywhere that you need to clear out -an array, your program can just call @code{delarray}. +an array, your program can just call @code{delarray()}. (This guarantees portability. The use of @samp{delete @var{array}} to delete the contents of an entire array is a relatively recent@footnote{Late in 2012.} addition to the POSIX standard.) The following is an example of a recursive function. It takes a string -as an input parameter and returns the string in backwards order. +as an input parameter and returns the string in reverse order. Recursive functions must always have a test that stops the recursion. In this case, the recursion terminates when the input string is already empty: @@ -19603,7 +19631,7 @@ an error. @cindex local variables, in a function @cindex variables, local to a function -Unlike many languages, +Unlike in many languages, there is no way to make a variable local to a @code{@{} @dots{} @code{@}} block in @command{awk}, but you can make a variable local to a function. It is good practice to do so whenever a variable is needed only in that @@ -19612,7 +19640,7 @@ function. To make a variable local to a function, simply declare the variable as an argument after the actual function arguments (@pxref{Definition Syntax}). -Look at the following example where variable +Look at the following example, where variable @code{i} is a global variable used by both functions @code{foo()} and @code{bar()}: @@ -19653,7 +19681,7 @@ foo's i=3 top's i=3 @end example -If you want @code{i} to be local to both @code{foo()} and @code{bar()} do as +If you want @code{i} to be local to both @code{foo()} and @code{bar()}, do as follows (the extra space before @code{i} is a coding convention to indicate that @code{i} is a local variable, not an argument): @@ -19741,7 +19769,7 @@ declare explicitly whether the arguments are passed @dfn{by value} or @dfn{by reference}. Instead, the passing convention is determined at runtime when -the function is called according to the following rule: +the function is called, according to the following rule: if the argument is an array variable, then it is passed by reference. Otherwise, the argument is passed by value. @@ -19818,7 +19846,7 @@ prints @samp{a[1] = 1, a[2] = two, a[3] = 3}, because @cindex undefined functions @cindex functions, undefined Some @command{awk} implementations allow you to call a function that -has not been defined. They only report a problem at runtime when the +has not been defined. They only report a problem at runtime, when the program actually tries to call the function. For example: @example @@ -19877,15 +19905,15 @@ makes the returned value undefined, and therefore, unpredictable. In practice, though, all versions of @command{awk} simply return the null string, which acts like zero if used in a numeric context. -A @code{return} statement with no value expression is assumed at the end of -every function definition. So if control reaches the end of the function -body, then technically, the function returns an unpredictable value. +A @code{return} statement without an @var{expression} is assumed at the end of +every function definition. So, if control reaches the end of the function +body, then technically the function returns an unpredictable value. In practice, it returns the empty string. @command{awk} does @emph{not} warn you if you use the return value of such a function. Sometimes, you want to write a function for what it does, not for what it returns. Such a function corresponds to a @code{void} function -in C, C++ or Java, or to a @code{procedure} in Ada. Thus, it may be appropriate to not +in C, C++, or Java, or to a @code{procedure} in Ada. Thus, it may be appropriate to not return any value; simply bear in mind that you should not be using the return value of such a function. @@ -20004,13 +20032,15 @@ function calls, you can specify the name of the function to call as a string variable, and then call the function. Let's look at an example. Suppose you have a file with your test scores for the classes you -are taking. The first field is the class name. The following fields +are taking, and +you wish to get the sum and the average of +your test scores. +The first field is the class name. The following fields are the functions to call to process the data, up to a ``marker'' field @samp{data:}. Following the marker, to the end of the record, are the various numeric test scores. -Here is the initial file; you wish to get the sum and the average of -your test scores: +Here is the initial file: @example @c file eg/data/class_data1 @@ -20093,9 +20123,9 @@ function sum(first, last, ret, i) @c endfile @end example -These two functions expect to work on fields; thus the parameters +These two functions expect to work on fields; thus, the parameters @code{first} and @code{last} indicate where in the fields to start and end. -Otherwise they perform the expected computations and are not unusual: +Otherwise, they perform the expected computations and are not unusual: @example @c file eg/prog/indirectcall.awk @@ -20154,8 +20184,8 @@ The ability to use indirect function calls is more powerful than you may think at first. The C and C++ languages provide ``function pointers,'' which are a mechanism for calling a function chosen at runtime. One of the most well-known uses of this ability is the C @code{qsort()} function, which sorts -an array using the famous ``quick sort'' algorithm -(see @uref{http://en.wikipedia.org/wiki/Quick_sort, the Wikipedia article} +an array using the famous ``quicksort'' algorithm +(see @uref{http://en.wikipedia.org/wiki/Quicksort, the Wikipedia article} for more information). To use this function, you supply a pointer to a comparison function. This mechanism allows you to sort arbitrary data in an arbitrary fashion. @@ -20174,11 +20204,11 @@ We can do something similar using @command{gawk}, like this: # January 2009 @c endfile - @end ignore @c file eg/lib/quicksort.awk -# quicksort --- C.A.R. Hoare's quick sort algorithm. See Wikipedia -# or almost any algorithms or computer science text + +# quicksort --- C.A.R. Hoare's quicksort algorithm. See Wikipedia +# or almost any algorithms or computer science text. @c endfile @ignore @c file eg/lib/quicksort.awk @@ -20216,7 +20246,7 @@ function quicksort_swap(data, i, j, temp) The @code{quicksort()} function receives the @code{data} array, the starting and ending indices to sort (@code{left} and @code{right}), and the name of a function that -performs a ``less than'' comparison. It then implements the quick sort algorithm. +performs a ``less than'' comparison. It then implements the quicksort algorithm. To make use of the sorting function, we return to our previous example. The first thing to do is write some comparison functions: @@ -20407,7 +20437,7 @@ for (i = 1; i <= n; i++) @end example @noindent -@code{gawk} looks up the actual function to call only once. +@command{gawk} looks up the actual function to call only once. @node Functions Summary @section Summary @@ -20503,7 +20533,7 @@ It contains the following chapters: your own @command{awk} functions. Writing functions is important, because it allows you to encapsulate algorithms and program tasks in a single place. It simplifies programming, making program development more -manageable, and making programs more readable. +manageable and making programs more readable. @cindex Kernighan, Brian @cindex Plauger, P.J.@: @@ -20632,7 +20662,7 @@ often use variable names like these for their own purposes. The example programs shown in this @value{CHAPTER} all start the names of their private variables with an underscore (@samp{_}). Users generally don't use leading underscores in their variable names, so this convention immediately -decreases the chances that the variable name will be accidentally shared +decreases the chances that the variable names will be accidentally shared with the user's program. @cindex @code{_} (underscore), in names of private variables @@ -20650,8 +20680,8 @@ show how our own @command{awk} programming style has evolved and to provide some basis for this discussion.} As a final note on variable naming, if a function makes global variables -available for use by a main program, it is a good convention to start that -variable's name with a capital letter---for +available for use by a main program, it is a good convention to start those +variables' names with a capital letter---for example, @code{getopt()}'s @code{Opterr} and @code{Optind} variables (@pxref{Getopt Function}). The leading capital letter indicates that it is global, while the fact that @@ -20662,7 +20692,7 @@ not one of @command{awk}'s predefined variables, such as @code{FS}. It is also important that @emph{all} variables in library functions that do not need to save state are, in fact, declared local.@footnote{@command{gawk}'s @option{--dump-variables} command-line -option is useful for verifying this.} If this is not done, the variable +option is useful for verifying this.} If this is not done, the variables could accidentally be used in the user's program, leading to bugs that are very difficult to track down: @@ -20860,7 +20890,7 @@ Following is the function: @example @c file eg/lib/assert.awk -# assert --- assert that a condition is true. Otherwise exit. +# assert --- assert that a condition is true. Otherwise, exit. @c endfile @ignore @@ -20896,7 +20926,7 @@ is false, it prints a message to standard error, using the @code{string} parameter to describe the failed condition. It then sets the variable @code{_assert_exit} to one and executes the @code{exit} statement. The @code{exit} statement jumps to the @code{END} rule. If the @code{END} -rules finds @code{_assert_exit} to be true, it exits immediately. +rule finds @code{_assert_exit} to be true, it exits immediately. The purpose of the test in the @code{END} rule is to keep any other @code{END} rules from running. When an assertion fails, the @@ -21188,7 +21218,7 @@ all the strings in an array into one long string. The following function, the application programs (@pxref{Sample Programs}). -Good function design is important; this function needs to be general but it +Good function design is important; this function needs to be general, but it should also have a reasonable default behavior. It is called with an array as well as the beginning and ending indices of the elements in the array to be merged. This assumes that the array indices are numeric---a reasonable @@ -21336,7 +21366,7 @@ allowed the user to supply an optional timestamp value to use instead of the current time. @node Readfile Function -@subsection Reading a Whole File At Once +@subsection Reading a Whole File at Once Often, it is convenient to have the entire contents of a file available in memory as a single string. A straightforward but naive way to @@ -21393,13 +21423,13 @@ function readfile(file, tmp, save_rs) It works by setting @code{RS} to @samp{^$}, a regular expression that will never match if the file has contents. @command{gawk} reads data from -the file into @code{tmp} attempting to match @code{RS}. The match fails +the file into @code{tmp}, attempting to match @code{RS}. The match fails after each read, but fails quickly, such that @command{gawk} fills @code{tmp} with the entire contents of the file. (@DBXREF{Records} for information on @code{RT} and @code{RS}.) In the case that @code{file} is empty, the return value is the null -string. Thus calling code may use something like: +string. Thus, calling code may use something like: @example contents = readfile("/some/path") @@ -21410,7 +21440,7 @@ if (length(contents) == 0) This tests the result to see if it is empty or not. An equivalent test would be @samp{contents == ""}. -@xref{Extension Sample Readfile}, for an extension function that +@DBXREF{Extension Sample Readfile} for an extension function that also reads an entire file into memory. @node Shell Quoting @@ -21517,8 +21547,8 @@ The @code{BEGIN} and @code{END} rules are each executed exactly once, at the beginning and end of your @command{awk} program, respectively (@pxref{BEGIN/END}). We (the @command{gawk} authors) once had a user who mistakenly thought that the -@code{BEGIN} rule is executed at the beginning of each @value{DF} and the -@code{END} rule is executed at the end of each @value{DF}. +@code{BEGIN} rules were executed at the beginning of each @value{DF} and the +@code{END} rules were executed at the end of each @value{DF}. When informed that this was not the case, the user requested that we add new special @@ -21558,7 +21588,7 @@ END @{ endfile(FILENAME) @} This file must be loaded before the user's ``main'' program, so that the rule it supplies is executed first. -This rule relies on @command{awk}'s @code{FILENAME} variable that +This rule relies on @command{awk}'s @code{FILENAME} variable, which automatically changes for each new @value{DF}. The current @value{FN} is saved in a private variable, @code{_oldfilename}. If @code{FILENAME} does not equal @code{_oldfilename}, then a new @value{DF} is being processed and @@ -21574,7 +21604,7 @@ first @value{DF}. The program also supplies an @code{END} rule to do the final processing for the last file. Because this @code{END} rule comes before any @code{END} rules supplied in the ``main'' program, @code{endfile()} is called first. Once -again the value of multiple @code{BEGIN} and @code{END} rules should be clear. +again, the value of multiple @code{BEGIN} and @code{END} rules should be clear. @cindex @code{beginfile()} user-defined function @cindex @code{endfile()} user-defined function @@ -21622,7 +21652,7 @@ how it simplifies writing the main program. You are probably wondering, if @code{beginfile()} and @code{endfile()} functions can do the job, why does @command{gawk} have -@code{BEGINFILE} and @code{ENDFILE} patterns (@pxref{BEGINFILE/ENDFILE})? +@code{BEGINFILE} and @code{ENDFILE} patterns? Good question. Normally, if @command{awk} cannot open a file, this causes an immediate fatal error. In this case, there is no way for a @@ -21631,6 +21661,7 @@ calling it relies on the file being open and at the first record. Thus, the main reason for @code{BEGINFILE} is to give you a ``hook'' to catch files that cannot be processed. @code{ENDFILE} exists for symmetry, and because it provides an easy way to do per-file cleanup processing. +For more information, refer to @ref{BEGINFILE/ENDFILE}. @docbook </sidebar> @@ -21645,7 +21676,7 @@ and because it provides an easy way to do per-file cleanup processing. You are probably wondering, if @code{beginfile()} and @code{endfile()} functions can do the job, why does @command{gawk} have -@code{BEGINFILE} and @code{ENDFILE} patterns (@pxref{BEGINFILE/ENDFILE})? +@code{BEGINFILE} and @code{ENDFILE} patterns? Good question. Normally, if @command{awk} cannot open a file, this causes an immediate fatal error. In this case, there is no way for a @@ -21654,6 +21685,7 @@ calling it relies on the file being open and at the first record. Thus, the main reason for @code{BEGINFILE} is to give you a ``hook'' to catch files that cannot be processed. @code{ENDFILE} exists for symmetry, and because it provides an easy way to do per-file cleanup processing. +For more information, refer to @ref{BEGINFILE/ENDFILE}. @end cartouche @end ifnotdocbook @@ -21661,7 +21693,7 @@ and because it provides an easy way to do per-file cleanup processing. @subsection Rereading the Current File @cindex files, reading -Another request for a new built-in function was for a @code{rewind()} +Another request for a new built-in function was for a function that would make it possible to reread the current file. The requesting user didn't want to have to use @code{getline} (@pxref{Getline}) @@ -21670,7 +21702,7 @@ inside a loop. However, as long as you are not in the @code{END} rule, it is quite easy to arrange to immediately close the current input file and then start over with it from the top. -For lack of a better name, we'll call it @code{rewind()}: +For lack of a better name, we'll call the function @code{rewind()}: @cindex @code{rewind()} user-defined function @example @@ -21763,16 +21795,16 @@ See also @ref{ARGC and ARGV}. Because @command{awk} variable names only allow the English letters, the regular expression check purposely does not use character classes such as @samp{[:alpha:]} and @samp{[:alnum:]} -(@pxref{Bracket Expressions}) +(@pxref{Bracket Expressions}). @node Empty Files -@subsection Checking for Zero-length Files +@subsection Checking for Zero-Length Files All known @command{awk} implementations silently skip over zero-length files. This is a by-product of @command{awk}'s implicit read-a-record-and-match-against-the-rules loop: when @command{awk} tries to read a record from an empty file, it immediately receives an -end of file indication, closes the file, and proceeds on to the next +end-of-file indication, closes the file, and proceeds on to the next command-line @value{DF}, @emph{without} executing any user-level @command{awk} program code. @@ -21837,7 +21869,7 @@ Occasionally, you might not want @command{awk} to process command-line variable assignments (@pxref{Assignment Options}). In particular, if you have a @value{FN} that contains an @samp{=} character, -@command{awk} treats the @value{FN} as an assignment, and does not process it. +@command{awk} treats the @value{FN} as an assignment and does not process it. Some users have suggested an additional command-line option for @command{gawk} to disable command-line assignments. However, some simple programming with @@ -22199,8 +22231,8 @@ BEGIN @{ @c endfile @end example -The rest of the @code{BEGIN} rule is a simple test program. Here is the -result of two sample runs of the test program: +The rest of the @code{BEGIN} rule is a simple test program. Here are the +results of two sample runs of the test program: @example $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x} @@ -22258,7 +22290,7 @@ use @code{getopt()} to process their arguments. The @code{PROCINFO} array (@pxref{Built-in Variables}) provides access to the current user's real and effective user and group ID -numbers, and if available, the user's supplementary group set. +numbers, and, if available, the user's supplementary group set. However, because these are numbers, they do not provide very useful information to the average user. There needs to be some way to find the user information associated with the user and group ID numbers. This @@ -22278,7 +22310,7 @@ kept. Instead, it provides the @code{<pwd.h>} header file and several C language subroutines for obtaining user information. The primary function is @code{getpwent()}, for ``get password entry.'' The ``password'' comes from the original user database file, -@file{/etc/passwd}, which stores user information, along with the +@file{/etc/passwd}, which stores user information along with the encrypted passwords (hence the name). @cindex @command{pwcat} program @@ -22377,7 +22409,7 @@ The user's encrypted password. This may not be available on some systems. @item User-ID The user's numeric user ID number. -(On some systems, it's a C @code{long}, and not an @code{int}. Thus +(On some systems, it's a C @code{long}, and not an @code{int}. Thus, we cast it to @code{long} for all cases.) @item Group-ID @@ -22504,7 +22536,7 @@ The code that checks for using @code{FPAT}, using @code{using_fpat} and @code{PROCINFO["FS"]}, is similar. The main part of the function uses a loop to read database lines, split -the line into fields, and then store the line into each array as necessary. +the lines into fields, and then store the lines into each array as necessary. When the loop is done, @code{@w{_pw_init()}} cleans up by closing the pipeline, setting @code{@w{_pw_inited}} to one, and restoring @code{FS} (and @code{FIELDWIDTHS} or @code{FPAT} @@ -22721,7 +22753,7 @@ it is usually empty or set to @samp{*}. @item Group ID Number The group's numeric group ID number; the association of name to number must be unique within the file. -(On some systems it's a C @code{long}, and not an @code{int}. Thus +(On some systems it's a C @code{long}, and not an @code{int}. Thus, we cast it to @code{long} for all cases.) @item Group Member List @@ -22835,32 +22867,32 @@ The @code{@w{_gr_init()}} function first saves @code{FS}, @code{$0}, and then sets @code{FS} and @code{RS} to the correct values for scanning the group information. It also takes care to note whether @code{FIELDWIDTHS} or @code{FPAT} -is being used, and to restore the appropriate field splitting mechanism. +is being used, and to restore the appropriate field-splitting mechanism. -The group information is stored is several associative arrays. +The group information is stored in several associative arrays. The arrays are indexed by group name (@code{@w{_gr_byname}}), by group ID number (@code{@w{_gr_bygid}}), and by position in the database (@code{@w{_gr_bycount}}). There is an additional array indexed by username (@code{@w{_gr_groupsbyuser}}), which is a space-separated list of groups to which each user belongs. -Unlike the user database, it is possible to have multiple records in the +Unlike in the user database, it is possible to have multiple records in the database for the same group. This is common when a group has a large number of members. A pair of such entries might look like the following: @example -tvpeople:*:101:johny,jay,arsenio +tvpeople:*:101:johnny,jay,arsenio tvpeople:*:101:david,conan,tom,joan @end example For this reason, @code{_gr_init()} looks to see if a group name or -group ID number is already seen. If it is, the usernames are -simply concatenated onto the previous list of users.@footnote{There is actually a +group ID number is already seen. If so, the usernames are +simply concatenated onto the previous list of users.@footnote{There is a subtle problem with the code just presented. Suppose that the first time there were no names. This code adds the names with a leading comma. It also doesn't check that there is a @code{$4}.} Finally, @code{_gr_init()} closes the pipeline to @command{grcat}, restores -@code{FS} (and @code{FIELDWIDTHS} or @code{FPAT} if necessary), @code{RS}, and @code{$0}, +@code{FS} (and @code{FIELDWIDTHS} or @code{FPAT}, if necessary), @code{RS}, and @code{$0}, initializes @code{_gr_count} to zero (it is used later), and makes @code{_gr_inited} nonzero. @@ -22960,12 +22992,12 @@ uses these functions. @DBREF{Arrays of Arrays} described how @command{gawk} provides arrays of arrays. In particular, any element of -an array may be either a scalar, or another array. The +an array may be either a scalar or another array. The @code{isarray()} function (@pxref{Type Functions}) lets you distinguish an array from a scalar. The following function, @code{walk_array()}, recursively traverses -an array, printing each element's indices and value. +an array, printing the element indices and values. You call it with the array and a string representing the name of the array: @@ -23037,24 +23069,24 @@ The functions presented here fit into the following categories: @c nested list @table @asis @item General problems -Number-to-string conversion, assertions, rounding, random number +Number-to-string conversion, testing assertions, rounding, random number generation, converting characters to numbers, joining strings, getting easily usable time-of-day information, and reading a whole file in -one shot. +one shot @item Managing @value{DF}s Noting @value{DF} boundaries, rereading the current file, checking for readable files, checking for zero-length files, and treating assignments -as @value{FN}s. +as @value{FN}s @item Processing command-line options -An @command{awk} version of the standard C @code{getopt()} function. +An @command{awk} version of the standard C @code{getopt()} function @item Reading the user and group databases -Two sets of routines that parallel the C library versions. +Two sets of routines that parallel the C library versions @item Traversing arrays of arrays -A simple function to traverse an array of arrays to any depth. +A simple function to traverse an array of arrays to any depth @end table @c end nested list @@ -23149,10 +23181,10 @@ in this @value{CHAPTER}. The second presents @command{awk} versions of several common POSIX utilities. These are programs that you are hopefully already familiar with, -and therefore, whose problems are understood. +and therefore whose problems are understood. By reimplementing these programs in @command{awk}, you can focus on the @command{awk}-related aspects of solving -the programming problem. +the programming problems. The third is a grab bag of interesting programs. These solve a number of different data-manipulation and management @@ -23212,7 +23244,7 @@ It should be noted that these programs are not necessarily intended to replace the installed versions on your system. Nor may all of these programs be fully compliant with the most recent POSIX standard. This is not a problem; their -purpose is to illustrate @command{awk} language programming for ``real world'' +purpose is to illustrate @command{awk} language programming for ``real-world'' tasks. The programs are presented in alphabetical order. @@ -23241,7 +23273,7 @@ but you may supply a command-line option to change the field @dfn{delimiter} (i.e., the field-separator character). @command{cut}'s definition of fields is less general than @command{awk}'s. -A common use of @command{cut} might be to pull out just the login name of +A common use of @command{cut} might be to pull out just the login names of logged-on users from the output of @command{who}. For example, the following pipeline generates a sorted, unique list of the logged-on users: @@ -23750,7 +23782,7 @@ successful or unsuccessful match. If the line does not match, the @code{next} statement just moves on to the next record. A number of additional tests are made, but they are only done if we -are not counting lines. First, if the user only wants exit status +are not counting lines. First, if the user only wants the exit status (@code{no_print} is true), then it is enough to know that @emph{one} line in this file matched, and we can skip on to the next file with @code{nextfile}. Similarly, if we are only printing @value{FN}s, we can @@ -23791,7 +23823,7 @@ if necessary: @end example The @code{END} rule takes care of producing the correct exit status. If -there are no matches, the exit status is one; otherwise it is zero: +there are no matches, the exit status is one; otherwise, it is zero: @example @c file eg/prog/egrep.awk @@ -23843,7 +23875,8 @@ Here is a simple version of @command{id} written in @command{awk}. It uses the user database library functions (@pxref{Passwd Functions}) and the group database library functions -(@pxref{Group Functions}): +(@pxref{Group Functions}) +from @ref{Library Functions}. The program is fairly straightforward. All the work is done in the @code{BEGIN} rule. The user and group ID numbers are obtained from @@ -23970,8 +24003,8 @@ By default, the output files are named @file{xaa}, @file{xab}, and so on. Each file has 1,000 lines in it, with the likely exception of the last file. To change the number of lines in each file, supply a number on the command line -preceded with a minus (e.g., @samp{-500} for files with 500 lines in them -instead of 1,000). To change the name of the output files to something like +preceded with a minus sign (e.g., @samp{-500} for files with 500 lines in them +instead of 1,000). To change the names of the output files to something like @file{myfileaa}, @file{myfileab}, and so on, supply an additional argument that specifies the @value{FN} prefix. @@ -24810,7 +24843,7 @@ checking and setting of defaults: the delay, the count, and the message to print. If the user supplied a message without the ASCII BEL character (known as the ``alert'' character, @code{"\a"}), then it is added to the message. (On many systems, printing the ASCII BEL generates an -audible alert. Thus when the alarm goes off, the system calls attention +audible alert. Thus, when the alarm goes off, the system calls attention to itself in case the user is not looking at the computer.) Just for a change, this program uses a @code{switch} statement (@pxref{Switch Statement}), but the processing could be done with a series of @@ -24979,7 +25012,7 @@ to @command{gawk}. @c at least theoretically The following program was written to prove that character transliteration could be done with a user-level -function. This program is not as complete as the system @command{tr} utility +function. This program is not as complete as the system @command{tr} utility, but it does most of the job. The @command{translate} program was written long before @command{gawk} @@ -24991,13 +25024,13 @@ takes three arguments: @table @code @item from -A list of characters from which to translate. +A list of characters from which to translate @item to -A list of characters to which to translate. +A list of characters to which to translate @item target -The string on which to do the translation. +The string on which to do the translation @end table Associative arrays make the translation part fairly easy. @code{t_ar} holds @@ -25006,7 +25039,7 @@ loop goes through @code{from}, one character at a time. For each character in @code{from}, if the character appears in @code{target}, it is replaced with the corresponding @code{to} character. -The @code{translate()} function calls @code{stranslate()} using @code{$0} +The @code{translate()} function calls @code{stranslate()}, using @code{$0} as the target. The main program sets two global variables, @code{FROM} and @code{TO}, from the command line, and then changes @code{ARGV} so that @command{awk} reads from the standard input. @@ -25028,7 +25061,7 @@ Finally, the processing rule simply calls @code{translate()} for each record: @c endfile @end ignore @c file eg/prog/translate.awk -# Bugs: does not handle things like: tr A-Z a-z, it has +# Bugs: does not handle things like tr A-Z a-z; it has # to be spelled out. However, if `to' is shorter than `from', # the last character in `to' is used for the rest of `from'. @@ -25104,7 +25137,7 @@ for inspiration. @cindex printing, mailing labels @cindex mailing labels@comma{} printing -Here is a ``real world''@footnote{``Real world'' is defined as +Here is a ``real-world''@footnote{``Real world'' is defined as ``a program actually used to get something done.''} program. This script reads lists of names and @@ -25113,7 +25146,7 @@ on it, two across and 10 down. The addresses are guaranteed to be no more than five lines of data. Each address is separated from the next by a blank line. -The basic idea is to read 20 labels worth of data. Each line of each label +The basic idea is to read 20 labels' worth of data. Each line of each label is stored in the @code{line} array. The single rule takes care of filling the @code{line} array and printing the page when 20 labels have been read. @@ -25136,12 +25169,12 @@ of lines on the page Most of the work is done in the @code{printpage()} function. The label lines are stored sequentially in the @code{line} array. But they -have to print horizontally; @code{line[1]} next to @code{line[6]}, +have to print horizontally: @code{line[1]} next to @code{line[6]}, @code{line[2]} next to @code{line[7]}, and so on. Two loops accomplish this. The outer loop, controlled by @code{i}, steps through every 10 lines of data; this is each row of labels. The inner loop, controlled by @code{j}, goes through the lines within the row. -As @code{j} goes from 0 to 4, @samp{i+j} is the @code{j}-th line in +As @code{j} goes from 0 to 4, @samp{i+j} is the @code{j}th line in the row, and @samp{i+j+5} is the entry next to it. The output ends up looking something like this: @@ -25259,8 +25292,8 @@ END @{ @} @end example -The program relies on @command{awk}'s default field splitting -mechanism to break each line up into ``words,'' and uses an +The program relies on @command{awk}'s default field-splitting +mechanism to break each line up into ``words'' and uses an associative array named @code{freq}, indexed by each word, to count the number of times the word occurs. In the @code{END} rule, it prints the counts. @@ -25365,7 +25398,7 @@ to use the @command{sort} program. @cindex lines, duplicate@comma{} removing The @command{uniq} program -(@pxref{Uniq Program}), +(@pxref{Uniq Program}) removes duplicate lines from @emph{sorted} data. Suppose, however, you need to remove duplicate lines from a @value{DF} but @@ -25452,7 +25485,7 @@ Texinfo input file into separate files. @cindex Texinfo This @value{DOCUMENT} is written in @uref{http://www.gnu.org/software/texinfo/, Texinfo}, -the GNU project's document formatting language. +the GNU Project's document formatting language. A single Texinfo source file can be used to produce both printed documentation, with @TeX{}, and online documentation. @ifnotinfo @@ -25511,7 +25544,7 @@ The Texinfo file looks something like this: @example @dots{} -This program has a @@code@{BEGIN@} rule, +This program has a @@code@{BEGIN@} rule that prints a nice message: @@example @@ -25540,7 +25573,7 @@ exits with a zero exit status, signifying OK: @cindex @code{extract.awk} program @example @c file eg/prog/extract.awk -# extract.awk --- extract files and run programs from texinfo files +# extract.awk --- extract files and run programs from Texinfo files @c endfile @ignore @c file eg/prog/extract.awk @@ -25581,12 +25614,12 @@ The second rule handles moving data into files. It verifies that a @value{FN} is given in the directive. If the file named is not the current file, then the current file is closed. Keeping the current file open until a new file is encountered allows the use of the @samp{>} -redirection for printing the contents, keeping open file management +redirection for printing the contents, keeping open-file management simple. The @code{for} loop does the work. It reads lines using @code{getline} (@pxref{Getline}). -For an unexpected end of file, it calls the @code{@w{unexpected_eof()}} +For an unexpected end-of-file, it calls the @code{@w{unexpected_eof()}} function. If the line is an ``endfile'' line, then it breaks out of the loop. If the line is an @samp{@@group} or @samp{@@end group} line, then it @@ -25688,7 +25721,7 @@ END @{ @cindex @command{sed} utility @cindex stream editors -The @command{sed} utility is a stream editor, a program that reads a +The @command{sed} utility is a @dfn{stream editor}, a program that reads a stream of data, makes changes to it, and passes it on. It is often used to make global changes to a large file or to a stream of data generated by a pipeline of commands. @@ -25833,7 +25866,7 @@ includes don't accidentally include a library function twice. @command{igawk} should behave just like @command{gawk} externally. This means it should accept all of @command{gawk}'s command-line arguments, including the ability to have multiple source files specified via -@option{-f}, and the ability to mix command-line and library source files. +@option{-f} and the ability to mix command-line and library source files. The program is written using the POSIX Shell (@command{sh}) command language.@footnote{Fully explaining the @command{sh} language is beyond @@ -25872,7 +25905,7 @@ Run the expanded program with @command{gawk} and any other original command-line arguments that the user supplied (such as the @value{DF} names). @end enumerate -This program uses shell variables extensively: for storing command-line arguments, +This program uses shell variables extensively: for storing command-line arguments and the text of the @command{awk} program that will expand the user's program, for the user's original program, and for the expanded program. Doing so removes some potential problems that might arise were we to use temporary files instead, @@ -26189,22 +26222,7 @@ Save the results of this processing in the shell variable The last step is to call @command{gawk} with the expanded program, along with the original -options and command-line arguments that the user supplied. - -@c this causes more problems than it solves, so leave it out. -@ignore -The special file @file{/dev/null} is passed as a @value{DF} to @command{gawk} -to handle an interesting case. Suppose that the user's program only has -a @code{BEGIN} rule and there are no @value{DF}s to read. -The program should exit without reading any @value{DF}s. -However, suppose that an included library file defines an @code{END} -rule of its own. In this case, @command{gawk} will hang, reading standard -input. In order to avoid this, @file{/dev/null} is explicitly added to the -command line. Reading from @file{/dev/null} always returns an immediate -end of file indication. - -@c Hmm. Add /dev/null if $# is 0? Still messes up ARGV. Sigh. -@end ignore +options and command-line arguments that the user supplied: @example @c file eg/prog/igawk.sh @@ -26270,8 +26288,8 @@ the same letters Column 2, Problem C, of Jon Bentley's @cite{Programming Pearls}, Second Edition, presents an elegant algorithm. The idea is to give words that are anagrams a common signature, sort all the words together by their -signature, and then print them. Dr.@: Bentley observes that taking the -letters in each word and sorting them produces that common signature. +signatures, and then print them. Dr.@: Bentley observes that taking the +letters in each word and sorting them produces those common signatures. The following program uses arrays of arrays to bring together words with the same signature and array sorting to print the words @@ -26280,8 +26298,8 @@ in sorted order: @cindex @code{anagram.awk} program @example @c file eg/prog/anagram.awk -# anagram.awk --- An implementation of the anagram finding algorithm -# from Jon Bentley's "Programming Pearls", 2nd edition. +# anagram.awk --- An implementation of the anagram-finding algorithm +# from Jon Bentley's "Programming Pearls," 2nd edition. # Addison Wesley, 2000, ISBN 0-201-65788-0. # Column 2, Problem C, section 2.8, pp 18-20. @c endfile @@ -26329,7 +26347,7 @@ sorts the letters, and then joins them back together: @example @c file eg/prog/anagram.awk -# word2key --- split word apart into letters, sort, joining back together +# word2key --- split word apart into letters, sort, and join back together function word2key(word, a, i, n, result) @{ @@ -26524,12 +26542,13 @@ characters. The ability to use @code{split()} with the empty string as the separator can considerably simplify such tasks. @item -The library functions from @ref{Library Functions}, proved their -usefulness for a number of real (if small) programs. +The examples here demonstrate the usefulness of the library +functions from @DBREF{Library Functions} +for a number of real (if small) programs. @item Besides reinventing POSIX wheels, other programs solved a selection of -interesting problems, such as finding duplicates words in text, printing +interesting problems, such as finding duplicate words in text, printing mailing labels, and finding anagrams. @end itemize @@ -26725,18 +26744,18 @@ a violent psychopath who knows where you live.} This @value{CHAPTER} discusses advanced features in @command{gawk}. It's a bit of a ``grab bag'' of items that are otherwise unrelated to each other. -First, a command-line option allows @command{gawk} to recognize +First, we look at a command-line option that allows @command{gawk} to recognize nondecimal numbers in input data, not just in @command{awk} programs. Then, @command{gawk}'s special features for sorting arrays are presented. Next, two-way I/O, discussed briefly in earlier parts of this @value{DOCUMENT}, is described in full detail, along with the basics -of TCP/IP networking. Finally, @command{gawk} +of TCP/IP networking. Finally, we see how @command{gawk} can @dfn{profile} an @command{awk} program, making it possible to tune it for performance. @c FULLXREF ON -A number of advanced features require separate @value{CHAPTER}s of their +Additional advanced features are discussed in separate @value{CHAPTER}s of their own: @itemize @value{BULLET} @@ -26830,7 +26849,8 @@ This option may disappear in a future version of @command{gawk}. @node Array Sorting @section Controlling Array Traversal and Array Sorting -@command{gawk} lets you control the order in which a @samp{for (i in array)} +@command{gawk} lets you control the order in which a +@samp{for (@var{indx} in @var{array})} loop traverses an array. In addition, two built-in functions, @code{asort()} and @code{asorti()}, @@ -26846,7 +26866,7 @@ to order the elements during sorting. @node Controlling Array Traversal @subsection Controlling Array Traversal -By default, the order in which a @samp{for (i in array)} loop +By default, the order in which a @samp{for (@var{indx} in @var{array})} loop scans an array is not defined; it is generally based upon the internal implementation of arrays inside @command{awk}. @@ -26875,23 +26895,23 @@ function comp_func(i1, v1, i2, v2) @} @end example -Here, @var{i1} and @var{i2} are the indices, and @var{v1} and @var{v2} +Here, @code{i1} and @code{i2} are the indices, and @code{v1} and @code{v2} are the corresponding values of the two elements being compared. -Either @var{v1} or @var{v2}, or both, can be arrays if the array being +Either @code{v1} or @code{v2}, or both, can be arrays if the array being traversed contains subarrays as values. (@DBXREF{Arrays of Arrays} for more information about subarrays.) The three possible return values are interpreted as follows: @table @code @item comp_func(i1, v1, i2, v2) < 0 -Index @var{i1} comes before index @var{i2} during loop traversal. +Index @code{i1} comes before index @code{i2} during loop traversal. @item comp_func(i1, v1, i2, v2) == 0 -Indices @var{i1} and @var{i2} -come together but the relative order with respect to each other is undefined. +Indices @code{i1} and @code{i2} +come together, but the relative order with respect to each other is undefined. @item comp_func(i1, v1, i2, v2) > 0 -Index @var{i1} comes after index @var{i2} during loop traversal. +Index @code{i1} comes after index @code{i2} during loop traversal. @end table Our first comparison function can be used to scan an array in @@ -27052,7 +27072,7 @@ As already mentioned, the order of the indices is arbitrary if two elements compare equal. This is usually not a problem, but letting the tied elements come out in arbitrary order can be an issue, especially when comparing item values. The partial ordering of the equal elements -may change the next time the array is traversed, if other elements are added or +may change the next time the array is traversed, if other elements are added to or removed from the array. One way to resolve ties when comparing elements with otherwise equal values is to include the indices in the comparison rules. Note that doing this may make the loop traversal less efficient, @@ -27095,7 +27115,7 @@ equivalent or distinct. Another point to keep in mind is that in the case of subarrays, the element values can themselves be arrays; a production comparison function should use the @code{isarray()} function -(@pxref{Type Functions}), +(@pxref{Type Functions}) to check for this, and choose a defined sorting order for subarrays. All sorting based on @code{PROCINFO["sorted_in"]} @@ -27103,7 +27123,7 @@ is disabled in POSIX mode, because the @code{PROCINFO} array is not special in that case. As a side note, sorting the array indices before traversing -the array has been reported to add 15% to 20% overhead to the +the array has been reported to add a 15% to 20% overhead to the execution time of @command{awk} programs. For this reason, sorted array traversal is not the default. @@ -27162,7 +27182,7 @@ However, the @code{source} array is not affected. Often, what's needed is to sort on the values of the @emph{indices} instead of the values of the elements. To do that, use the @code{asorti()} function. The interface and behavior are identical to -that of @code{asort()}, except that the index values are used for sorting, +that of @code{asort()}, except that the index values are used for sorting and become the values of the result array: @example @@ -27197,8 +27217,8 @@ it chooses}, taking into account just the indices, just the values, or both. This is extremely powerful. Once the array is sorted, @code{asort()} takes the @emph{values} in -their final order, and uses them to fill in the result array, whereas -@code{asorti()} takes the @emph{indices} in their final order, and uses +their final order and uses them to fill in the result array, whereas +@code{asorti()} takes the @emph{indices} in their final order and uses them to fill in the result array. @cindex reference counting, sorting arrays @@ -27495,7 +27515,7 @@ service name. @cindex @command{gawk}, @code{ERRNO} variable in @cindex @code{ERRNO} variable @quotation NOTE -Failure in opening a two-way socket will result in a non-fatal error +Failure in opening a two-way socket will result in a nonfatal error being returned to the calling code. The value of @code{ERRNO} indicates the error (@pxref{Auto-set}). @end quotation @@ -27512,19 +27532,19 @@ BEGIN @{ @end example This program reads the current date and time from the local system's -TCP @samp{daytime} server. +TCP @code{daytime} server. It then prints the results and closes the connection. Because this topic is extensive, the use of @command{gawk} for TCP/IP programming is documented separately. @ifinfo See -@inforef{Top, , General Introduction, gawkinet, TCP/IP Internetworking with @command{gawk}}, +@inforef{Top, , General Introduction, gawkinet, @value{GAWKINETTITLE}}, @end ifinfo @ifnotinfo See @uref{http://www.gnu.org/software/gawk/manual/gawkinet/, -@cite{TCP/IP Internetworking with @command{gawk}}}, +@cite{@value{GAWKINETTITLE}}}, which comes as part of the @command{gawk} distribution, @end ifnotinfo for a much more complete introduction and discussion, as well as @@ -27600,9 +27620,9 @@ junk @end example Here is the @file{awkprof.out} that results from running the -@command{gawk} profiler on this program and data. (This example also +@command{gawk} profiler on this program and data (this example also illustrates that @command{awk} programmers sometimes get up very early -in the morning to work.) +in the morning to work): @cindex @code{BEGIN} pattern, and profiling @cindex @code{END} pattern, and profiling @@ -27662,8 +27682,8 @@ They are as follows: @item The program is printed in the order @code{BEGIN} rules, @code{BEGINFILE} rules, -pattern/action rules, -@code{ENDFILE} rules, @code{END} rules and functions, listed +pattern--action rules, +@code{ENDFILE} rules, @code{END} rules, and functions, listed alphabetically. Multiple @code{BEGIN} and @code{END} rules retain their separate identities, as do @@ -27671,7 +27691,7 @@ multiple @code{BEGINFILE} and @code{ENDFILE} rules. @cindex patterns, counts, in a profile @item -Pattern-action rules have two counts. +Pattern--action rules have two counts. The first count, to the left of the rule, shows how many times the rule's pattern was @emph{tested}. The second count, to the right of the rule's opening left brace @@ -27738,13 +27758,13 @@ the target of a redirection isn't a scalar, it gets parenthesized. @command{gawk} supplies leading comments in front of the @code{BEGIN} and @code{END} rules, the @code{BEGINFILE} and @code{ENDFILE} rules, -the pattern/action rules, and the functions. +the pattern--action rules, and the functions. @end itemize The profiled version of your program may not look exactly like what you typed when you wrote it. This is because @command{gawk} creates the -profiled version by ``pretty printing'' its internal representation of +profiled version by ``pretty-printing'' its internal representation of the program. The advantage to this is that @command{gawk} can produce a standard representation. Also, things such as: @@ -27827,16 +27847,16 @@ If you use the @code{HUP} signal instead of the @code{USR1} signal, @cindex @code{SIGQUIT} signal (MS-Windows) @cindex signals, @code{QUIT}/@code{SIGQUIT} (MS-Windows) When @command{gawk} runs on MS-Windows systems, it uses the -@code{INT} and @code{QUIT} signals for producing the profile and, in +@code{INT} and @code{QUIT} signals for producing the profile, and in the case of the @code{INT} signal, @command{gawk} exits. This is because these systems don't support the @command{kill} command, so the only signals you can deliver to a program are those generated by the keyboard. The @code{INT} signal is generated by the -@kbd{Ctrl-@key{C}} or @kbd{Ctrl-@key{BREAK}} key, while the -@code{QUIT} signal is generated by the @kbd{Ctrl-@key{\}} key. +@kbd{Ctrl-c} or @kbd{Ctrl-BREAK} key, while the +@code{QUIT} signal is generated by the @kbd{Ctrl-\} key. Finally, @command{gawk} also accepts another option, @option{--pretty-print}. -When called this way, @command{gawk} ``pretty prints'' the program into +When called this way, @command{gawk} ``pretty-prints'' the program into @file{awkprof.out}, without any execution counts. @quotation NOTE @@ -27890,7 +27910,7 @@ optionally, close off one side of the two-way communications. @item By using special @value{FN}s with the @samp{|&} operator, you can open a -TCP/IP (or UDP/IP) connection to remote hosts in the Internet. @command{gawk} +TCP/IP (or UDP/IP) connection to remote hosts on the Internet. @command{gawk} supports both IPv4 and IPv6. @item @@ -27900,7 +27920,7 @@ you tune them more easily. Sending the @code{USR1} signal while profiling cause @command{gawk} to dump the profile and keep going, including a function call stack. @item -You can also just ``pretty print'' the program. This currently also runs +You can also just ``pretty-print'' the program. This currently also runs the program, but that will change in the next major release. @end itemize @@ -31062,7 +31082,7 @@ Allowing completely alphabetic strings to have valid numeric values is also a very severe departure from historical practice. @end itemize -The second problem is that the @code{gawk} maintainer feels that this +The second problem is that the @command{gawk} maintainer feels that this interpretation of the standard, which requires a certain amount of ``language lawyering'' to arrive at in the first place, was not even intended by the standard developers. In other words, ``we see how you @@ -31221,7 +31241,7 @@ When @option{--sandbox} is specified, extensions are disabled * Finding Extensions:: How @command{gawk} finds compiled extensions. * Extension Example:: Example C code for an extension. * Extension Samples:: The sample extensions that ship with - @code{gawk}. + @command{gawk}. * gawkextlib:: The @code{gawkextlib} project. * Extension summary:: Extension summary. * Extension Exercises:: Exercises. @@ -32185,7 +32205,7 @@ If the concept of a ``record terminator'' makes sense, then @code{*rt_start} should be set to point to the data to be used for @code{RT}, and @code{*rt_len} should be set to the length of the data. Otherwise, @code{*rt_len} should be set to zero. -@code{gawk} makes its own copy of this data, so the +@command{gawk} makes its own copy of this data, so the extension must manage this storage. @end table @@ -32231,7 +32251,7 @@ When writing an input parser, you should think about (and document) how it is expected to interact with @command{awk} code. You may want it to always be called, and take effect as appropriate (as the @code{readdir} extension does). Or you may want it to take effect -based upon the value of an @code{awk} variable, as the XML extension +based upon the value of an @command{awk} variable, as the XML extension from the @code{gawkextlib} project does (@pxref{gawkextlib}). In the latter case, code in a @code{BEGINFILE} section can look at @code{FILENAME} and @code{ERRNO} to decide whether or @@ -33014,7 +33034,7 @@ converts it to a string. Using non-integral values is possible, but requires that you understand how such values are converted to strings (@pxref{Conversion}); thus using integral values is safest. -As with @emph{all} strings passed into @code{gawk} from an extension, +As with @emph{all} strings passed into @command{gawk} from an extension, the string value of @code{index} must come from @code{gawk_malloc()}, @code{gawk_calloc()} or @code{gawk_realloc()}, and @command{gawk} releases the storage. @@ -35721,6 +35741,11 @@ The @code{isarray()} function to check if a variable is an array or not The @code{bindtextdomain()}, @code{dcgettext()} and @code{dcngettext()} functions for internationalization (@pxref{Programmer i18n}). + +@item +The @code{div()} function for doing integer +division and remainder +(@pxref{Numeric Functions}). @end itemize @item @@ -35854,8 +35879,14 @@ Ultrix @end itemize @item -@c FIXME: Verify the version here. -Support for MirBSD was removed at @command{gawk} @value{PVERSION} 4.2. +Support for the following systems was removed from the code +for @command{gawk} @value{PVERSION} 4.2: + +@c nested table +@itemize @value{MINUS} +@item +MirBSD +@end itemize @end itemize @@ -36469,6 +36500,40 @@ with a minimum of two The dynamic extension interface was completely redone (@pxref{Dynamic Extensions}). +@item +Support for Ultrix was removed. + +@end itemize + +Version 4.2 introduced the following changes: + +@itemize @bullet +@item +Changes to @code{ENVIRON} are reflected into @command{gawk}'s +environment and that of programs that it runs. +@xref{Auto-set}. + +@item +The @option{--pretty-print} option no longer runs the @command{awk} +program too. +@xref{Options}. + +@item +The @command{igawk} program and its manual page are no longer +installed when @command{gawk} is built. +@xref{Igawk Program}. + +@item +The @code{div()} function. +@xref{Numeric Functions}. + +@item +The maximum number of hexdecimal digits in @samp{\x} escapes +is now two. +@xref{Escape Sequences}. + +@item +Support for MirBSD was removed. @end itemize @c XXX ADD MORE STUFF HERE @@ -37116,10 +37181,10 @@ The generated Info file for this @value{DOCUMENT}. @item doc/gawkinet.texi The Texinfo source file for @ifinfo -@inforef{Top, , General Introduction, gawkinet, TCP/IP Internetworking with @command{gawk}}. +@inforef{Top, , General Introduction, gawkinet, @value{GAWKINETTITLE}}. @end ifinfo @ifnotinfo -@cite{TCP/IP Internetworking with @command{gawk}}. +@cite{@value{GAWKINETTITLE}}. @end ifnotinfo It should be processed with @TeX{} (via @command{texi2dvi} or @command{texi2pdf}) @@ -37128,7 +37193,7 @@ with @command{makeinfo} to produce an Info or HTML file. @item doc/gawkinet.info The generated Info file for -@cite{TCP/IP Internetworking with @command{gawk}}. +@cite{@value{GAWKINETTITLE}}. @item doc/igawk.1 The @command{troff} source for a manual page describing the @command{igawk} @@ -37367,7 +37432,7 @@ can be configured and compiled. @cindex @option{--disable-lint} configuration option @cindex configuration option, @code{--disable-lint} @item --disable-lint -Disable all lint checking within @code{gawk}. The +Disable all lint checking within @command{gawk}. The @option{--lint} and @option{--lint-old} options (@pxref{Options}) are accepted, but silently do nothing. diff --git a/doc/gawktexi.in b/doc/gawktexi.in index 9f20f608..0e100f69 100644 --- a/doc/gawktexi.in +++ b/doc/gawktexi.in @@ -50,6 +50,7 @@ @set VERSION 4.1 @set PATCHLEVEL 2 +@set GAWKINETTITLE TCP/IP Internetworking with @command{gawk} @ifset FOR_PRINT @set TITLE Effective awk Programming @end ifset @@ -467,7 +468,7 @@ particular records in a file and perform operations upon them. @command{gawk}. * Internationalization:: Getting @command{gawk} to speak your language. -* Debugger:: The @code{gawk} debugger. +* Debugger:: The @command{gawk} debugger. * Arbitrary Precision Arithmetic:: Arbitrary precision arithmetic with @command{gawk}. * Dynamic Extensions:: Adding new built-in functions to @@ -950,7 +951,7 @@ particular records in a file and perform operations upon them. * Internal File Ops:: The code for internal file operations. * Using Internal File Ops:: How to use an external extension. * Extension Samples:: The sample extensions that ship with - @code{gawk}. + @command{gawk}. * Extension Sample File Functions:: The file functions sample. * Extension Sample Fnmatch:: An interface to @code{fnmatch()}. * Extension Sample Fork:: An interface to @code{fork()} and @@ -1463,7 +1464,7 @@ In May 1997, J@"urgen Kahrs felt the need for network access from @command{awk}, and with a little help from me, set about adding features to do this for @command{gawk}. At that time, he also wrote the bulk of -@cite{TCP/IP Internetworking with @command{gawk}} +@cite{@value{GAWKINETTITLE}} (a separate document, available as part of the @command{gawk} distribution). His code finally became part of the main @command{gawk} distribution with @command{gawk} @value{PVERSION} 3.1. @@ -4588,7 +4589,7 @@ $ @kbd{gawk -f test2} @print{} This is script test2. @end example -@code{gawk} runs the @file{test2} script, which includes @file{test1} +@command{gawk} runs the @file{test2} script, which includes @file{test1} using the @code{@@include} keyword. So, to include external @command{awk} source files, you just use @code{@@include} followed by the name of the file to be included, @@ -4797,7 +4798,7 @@ This seems to have been a long-undocumented feature in Unix @command{awk}. Similarly, you may use @code{print} or @code{printf} statements in the @var{init} and @var{increment} parts of a @code{for} loop. This is another -long-undocumented ``feature'' of Unix @code{awk}. +long-undocumented ``feature'' of Unix @command{awk}. @end ignore @@ -5089,13 +5090,12 @@ letters or numbers. @value{COMMONEXT} @quotation CAUTION In ISO C, the escape sequence continues until the first nonhexadecimal digit is seen. -@c FIXME: Add exact version here. For many years, @command{gawk} would continue incorporating hexadecimal digits into the value until a non-hexadecimal digit or the end of the string was encountered. However, using more than two hexadecimal digits produced undefined results. -As of @value{PVERSION} @strong{FIXME:} 4.3.0, only two digits +As of @value{PVERSION} 4.2, only two digits are processed. @end quotation @@ -13836,7 +13836,7 @@ respectively, should use binary I/O. A string value of @code{"rw"} or @code{"wr"} indicates that all files should use binary I/O. Any other string value is treated the same as @code{"rw"}, but causes @command{gawk} to generate a warning message. @code{BINMODE} is described in more -detail in @ref{PC Using}. @command{mawk} (@pxref{Other Versions}), +detail in @ref{PC Using}. @command{mawk} (@pxref{Other Versions}) also supports this variable, but only using numeric values. @cindex @code{CONVFMT} variable @@ -13844,7 +13844,7 @@ also supports this variable, but only using numeric values. @cindex numbers, converting, to strings @cindex strings, converting, numbers to @item @code{CONVFMT} -This string controls conversion of numbers to +A string that controls the conversion of numbers to strings (@pxref{Conversion}). It works by being passed, in effect, as the first argument to the @code{sprintf()} function @@ -13919,7 +13919,7 @@ is to simply say @samp{FS = FS}, perhaps with an explanatory comment. @cindex regular expressions, case sensitivity @item IGNORECASE # If @code{IGNORECASE} is nonzero or non-null, then all string comparisons -and all regular expression matching are case independent. Thus, regexp +and all regular expression matching are case-independent. Thus, regexp matching with @samp{~} and @samp{!~}, as well as the @code{gensub()}, @code{gsub()}, @code{index()}, @code{match()}, @code{patsplit()}, @code{split()}, and @code{sub()} @@ -13945,7 +13945,7 @@ Any other true value prints nonfatal warnings. Assigning a false value to @code{LINT} turns off the lint warnings. This variable is a @command{gawk} extension. It is not special -in other @command{awk} implementations. Unlike the other special variables, +in other @command{awk} implementations. Unlike with the other special variables, changing @code{LINT} does affect the production of lint warnings, even if @command{gawk} is in compatibility mode. Much as the @option{--lint} and @option{--traditional} options independently @@ -13957,7 +13957,7 @@ of @command{awk} being executed. @cindex numbers, converting, to strings @cindex strings, converting, numbers to @item OFMT -Controls conversion of numbers to +A string that controls conversion of numbers to strings (@pxref{Conversion}) for printing with the @code{print} statement. It works by being passed as the first argument to the @code{sprintf()} function @@ -13972,7 +13972,7 @@ strings in general expressions; this is now done by @code{CONVFMT}. @cindex separators, field @cindex field separators @item OFS -This is the output field separator (@pxref{Output Separators}). It is +The output field separator (@pxref{Output Separators}). It is output between the fields printed by a @code{print} statement. Its default value is @w{@code{" "}}, a string consisting of a single space. @@ -13990,7 +13990,7 @@ The working precision of arbitrary-precision floating-point numbers, @cindex @code{ROUNDMODE} variable @item ROUNDMODE # The rounding mode to use for arbitrary-precision arithmetic on -numbers, by default @code{"N"} (@samp{roundTiesToEven} in +numbers, by default @code{"N"} (@code{roundTiesToEven} in the IEEE 754 standard; @pxref{Setting the rounding mode}). @cindex @code{RS} variable @@ -14019,7 +14019,7 @@ just the first character of @code{RS}'s value is used. @item @code{SUBSEP} The subscript separator. It has the default value of @code{"\034"} and is used to separate the parts of the indices of a -multidimensional array. Thus, the expression @code{@w{foo["A", "B"]}} +multidimensional array. Thus, the expression @samp{@w{foo["A", "B"]}} really accesses @code{foo["A\034B"]} (@pxref{Multidimensional}). @@ -14037,7 +14037,7 @@ The default value of @code{TEXTDOMAIN} is @code{"messages"}. @end table @node Auto-set -@subsection Built-In Variables That Convey Information +@subsection Built-in Variables That Convey Information @cindex predefined variables, conveying information @cindex variables, predefined conveying information @@ -14195,12 +14195,12 @@ input file. @item @code{NF} The number of fields in the current input record. @code{NF} is set each time a new record is read, when a new field is -created or when @code{$0} changes (@pxref{Fields}). +created, or when @code{$0} changes (@pxref{Fields}). Unlike most of the variables described in this @value{SUBSECTION}, assigning a value to @code{NF} has the potential to affect @command{awk}'s internal workings. In particular, assignments -to @code{NF} can be used to create or remove fields from the +to @code{NF} can be used to create fields in or remove fields from the current record. @xref{Changing Fields}. @cindex @code{FUNCTAB} array @@ -14250,7 +14250,7 @@ or @code{"FPAT"} if field matching with @code{FPAT} is in effect. @item PROCINFO["identifiers"] @cindex program identifiers A subarray, indexed by the names of all identifiers used in the text of -the AWK program. An @dfn{identifier} is simply the name of a variable +the @command{awk} program. An @dfn{identifier} is simply the name of a variable (be it scalar or array), built-in function, user-defined function, or extension function. For each identifier, the value of the element is one of the following: @@ -14270,7 +14270,7 @@ The identifier is an extension function loaded via The identifier is a scalar. @item "untyped" -The identifier is untyped (could be used as a scalar or array, +The identifier is untyped (could be used as a scalar or an array; @command{gawk} doesn't know yet). @item "user" @@ -14391,7 +14391,7 @@ is the length of the matched string, or @minus{}1 if no match is found. @cindex @code{RSTART} variable @item @code{RSTART} -The start-index in characters of the substring that is matched by the +The start index in characters of the substring that is matched by the @code{match()} function (@pxref{String Functions}). @code{RSTART} is set by invoking the @code{match()} function. Its value @@ -14458,7 +14458,7 @@ function multiply(variable, amount) @quotation NOTE In order to avoid severe time-travel paradoxes,@footnote{Not to mention difficult implementation issues.} neither @code{FUNCTAB} nor @code{SYMTAB} -are available as elements within the @code{SYMTAB} array. +is available as an element within the @code{SYMTAB} array. @end quotation @end table @@ -14632,7 +14632,7 @@ When designing your program, you should choose options that don't conflict with @command{gawk}'s, because it will process any options that it accepts before passing the rest of the command line on to your program. Using @samp{#!} with the @option{-E} option may help -(@DBXREF{Executable Scripts} +(@DBPXREF{Executable Scripts} and @ifnotdocbook @DBPXREF{Options}). @@ -14646,15 +14646,15 @@ and @itemize @value{BULLET} @item -Pattern-action pairs make up the basic elements of an @command{awk} +Pattern--action pairs make up the basic elements of an @command{awk} program. Patterns are either normal expressions, range expressions, -regexp constants, one of the special keywords @code{BEGIN}, @code{END}, -@code{BEGINFILE}, @code{ENDFILE}, or empty. The action executes if +or regexp constants; one of the special keywords @code{BEGIN}, @code{END}, +@code{BEGINFILE}, or @code{ENDFILE}; or empty. The action executes if the current record matches the pattern. Empty (missing) patterns match all records. @item -I/O from @code{BEGIN} and @code{END} rules have certain constraints. +I/O from @code{BEGIN} and @code{END} rules has certain constraints. This is also true, only more so, for @code{BEGINFILE} and @code{ENDFILE} rules. The latter two give you ``hooks'' into @command{gawk}'s file processing, allowing you to recover from a file that otherwise would @@ -14684,12 +14684,12 @@ iteration of a loop (or get out of a @code{switch}). @item @code{next} and @code{nextfile} let you read the next record and start -over at the top of your program, or skip to the next input file and +over at the top of your program or skip to the next input file and start over, respectively. @item The @code{exit} statement terminates your program. When executed -from an action (or function body) it transfers control to the +from an action (or function body), it transfers control to the @code{END} statements. From an @code{END} statement body, it exits immediately. You may pass an optional numeric value to be used as @command{awk}'s exit status. @@ -14792,15 +14792,17 @@ the declaration. indices---e.g., @samp{15 .. 27}---but the size of the array is still fixed when the array is declared.) -A contiguous array of four elements might look like the following example, -conceptually, if the element values are 8, @code{"foo"}, -@code{""}, and 30 +@c 1/2015: Do not put the numeric values into @code. Array element +@c values are no different than scalar variable values. +A contiguous array of four elements might look like @ifnotdocbook -as shown in @ref{figure-array-elements}: +@ref{figure-array-elements}, @end ifnotdocbook @ifdocbook -as shown in @inlineraw{docbook, <xref linkend="figure-array-elements"/>}: +@inlineraw{docbook, <xref linkend="figure-array-elements"/>}, @end ifdocbook +conceptually, if the element values are eight, @code{"foo"}, +@code{""}, and 30. @ifnotdocbook @float Figure,figure-array-elements @@ -14825,7 +14827,7 @@ as shown in @inlineraw{docbook, <xref linkend="figure-array-elements"/>}: @noindent Only the values are stored; the indices are implicit from the order of -the values. Here, 8 is the value at index zero, because 8 appears in the +the values. Here, eight is the value at index zero, because eight appears in the position with zero elements before it. @cindex arrays, indexing @@ -14837,19 +14839,21 @@ that each array is a collection of pairs---an index and its corresponding array element value: @ifnotdocbook -@example -@r{Index} 3 @r{Value} 30 -@r{Index} 1 @r{Value} "foo" -@r{Index} 0 @r{Value} 8 -@r{Index} 2 @r{Value} "" -@end example +@c extra empty column to indent it right +@multitable @columnfractions .1 .1 .1 +@headitem @tab Index @tab Value +@item @tab @code{3} @tab @code{30} +@item @tab @code{1} @tab @code{"foo"} +@item @tab @code{0} @tab @code{8} +@item @tab @code{2} @tab @code{""} +@end multitable @end ifnotdocbook @docbook <informaltable> <tgroup cols="2"> -<colspec colname="1" align="center"/> -<colspec colname="2" align="center"/> +<colspec colname="1" align="left"/> +<colspec colname="2" align="left"/> <thead> <row> <entry>Index</entry> @@ -14895,20 +14899,22 @@ at any time. For example, suppose a tenth element is added to the array whose value is @w{@code{"number ten"}}. The result is: @ifnotdocbook -@example -@r{Index} 10 @r{Value} "number ten" -@r{Index} 3 @r{Value} 30 -@r{Index} 1 @r{Value} "foo" -@r{Index} 0 @r{Value} 8 -@r{Index} 2 @r{Value} "" -@end example +@c extra empty column to indent it right +@multitable @columnfractions .1 .1 .2 +@headitem @tab Index @tab Value +@item @tab @code{10} @tab @code{"number ten"} +@item @tab @code{3} @tab @code{30} +@item @tab @code{1} @tab @code{"foo"} +@item @tab @code{0} @tab @code{8} +@item @tab @code{2} @tab @code{""} +@end multitable @end ifnotdocbook @docbook <informaltable> <tgroup cols="2"> -<colspec colname="1" align="center"/> -<colspec colname="2" align="center"/> +<colspec colname="1" align="left"/> +<colspec colname="2" align="left"/> <thead> <row> <entry>Index</entry> @@ -14960,19 +14966,20 @@ an index. For example, the following is an array that translates words from English to French: @ifnotdocbook -@example -@r{Index} "dog" @r{Value} "chien" -@r{Index} "cat" @r{Value} "chat" -@r{Index} "one" @r{Value} "un" -@r{Index} 1 @r{Value} "un" -@end example +@multitable @columnfractions .1 .1 .1 +@headitem @tab Index @tab Value +@item @tab @code{"dog"} @tab @code{"chien"} +@item @tab @code{"cat"} @tab @code{"chat"} +@item @tab @code{"one"} @tab @code{"un"} +@item @tab @code{1} @tab @code{"un"} +@end multitable @end ifnotdocbook @docbook <informaltable> <tgroup cols="2"> -<colspec colname="1" align="center"/> -<colspec colname="2" align="center"/> +<colspec colname="1" align="left"/> +<colspec colname="2" align="left"/> <thead> <row> <entry>Index</entry> @@ -15014,7 +15021,7 @@ numbers and strings as indices. There are some subtleties to how numbers work when used as array subscripts; this is discussed in more detail in @ref{Numeric Array Subscripts}.) -Here, the number @code{1} isn't double quoted, because @command{awk} +Here, the number @code{1} isn't double-quoted, because @command{awk} automatically converts it to a string. @cindex @command{gawk}, @code{IGNORECASE} variable in @@ -15039,7 +15046,7 @@ is independent of the number of elements in the array. @cindex elements of arrays The principal way to use an array is to refer to one of its elements. -An array reference is an expression as follows: +An @dfn{array reference} is an expression as follows: @example @var{array}[@var{index-expression}] @@ -15049,8 +15056,11 @@ An array reference is an expression as follows: Here, @var{array} is the name of an array. The expression @var{index-expression} is the index of the desired element of the array. +@c 1/2015: Having the 4.3 in @samp is a little iffy. It's essentially +@c an expression though, so leave be. It's to early in the discussion +@c to mention that it's really a string. The value of the array reference is the current value of that array -element. For example, @code{foo[4.3]} is an expression for the element +element. For example, @code{foo[4.3]} is an expression referencing the element of array @code{foo} at index @samp{4.3}. @cindex arrays, unassigned elements @@ -15142,7 +15152,7 @@ assign to that element of the array. The following program takes a list of lines, each beginning with a line number, and prints them out in order of line number. The line numbers -are not in order when they are first read---instead they +are not in order when they are first read---instead, they are scrambled. This program sorts the lines by making an array using the line numbers as subscripts. The program then prints out the lines in sorted order of their numbers. It is a very simple program and gets @@ -15236,7 +15246,7 @@ program has previously used, with the variable @var{var} set to that index. The following program uses this form of the @code{for} statement. The first rule scans the input records and notes which words appear (at least once) in the input, by storing a one into the array @code{used} with -the word as index. The second rule scans the elements of @code{used} to +the word as the index. The second rule scans the elements of @code{used} to find all the distinct words that appear in the input. It prints each word that is more than 10 characters long and also prints the number of such words. @@ -15333,7 +15343,7 @@ and will vary from one version of @command{awk} to the next. Often, though, you may wish to do something simple, such as ``traverse the array by comparing the indices in ascending order,'' or ``traverse the array by comparing the values in descending order.'' -@command{gawk} provides two mechanisms which give you this control. +@command{gawk} provides two mechanisms that give you this control: @itemize @value{BULLET} @item @@ -15390,21 +15400,26 @@ across different environments.} which @command{gawk} uses internally to perform the sorting. @item "@@ind_str_desc" -String indices ordered from high to low. +Like @code{"@@ind_str_asc"}, but the +string indices are ordered from high to low. @item "@@ind_num_desc" -Numeric indices ordered from high to low. +Like @code{"@@ind_num_asc"}, but the +numeric indices are ordered from high to low. @item "@@val_type_desc" -Element values, based on type, ordered from high to low. +Like @code{"@@val_type_asc"}, but the +element values, based on type, are ordered from high to low. Subarrays, if present, come out first. @item "@@val_str_desc" -Element values, treated as strings, ordered from high to low. +Like @code{"@@val_str_asc"}, but the +element values, treated as strings, are ordered from high to low. Subarrays, if present, come out first. @item "@@val_num_desc" -Element values, treated as numbers, ordered from high to low. +Like @code{"@@val_num_asc"}, but the +element values, treated as numbers, are ordered from high to low. Subarrays, if present, come out first. @end table @@ -15627,7 +15642,7 @@ for (i in frequencies) @noindent This example removes all the elements from the array @code{frequencies}. Once an element is deleted, a subsequent @code{for} statement to scan the array -does not report that element and the @code{in} operator to check for +does not report that element and using the @code{in} operator to check for the presence of that element returns zero (i.e., false): @example @@ -15887,7 +15902,7 @@ a[1][2] = 2 This simulates a true two-dimensional array. Each subarray element can contain another subarray as a value, which in turn can hold other arrays as well. In this way, you can create arrays of three or more dimensions. -The indices can be any @command{awk} expression, including scalars +The indices can be any @command{awk} expressions, including scalars separated by commas (i.e., a regular @command{awk} simulated multidimensional subscript). So the following is valid in @command{gawk}: @@ -15899,7 +15914,7 @@ a[1][3][1, "name"] = "barney" Each subarray and the main array can be of different length. In fact, the elements of an array or its subarray do not all have to have the same type. This means that the main array and any of its subarrays can be -non-rectangular, or jagged in structure. You can assign a scalar value to +nonrectangular, or jagged in structure. You can assign a scalar value to the index @code{4} of the main array @code{a}, even though @code{a[1]} is itself an array and not a scalar: @@ -15923,7 +15938,8 @@ a[4][5][6][7] = "An element in a four-dimensional array" @noindent This removes the scalar value from index @code{4} and then inserts a -subarray of subarray of subarray containing a scalar. You can also +three-level nested subarray +containing a scalar. You can also delete an entire subarray or subarray of subarrays: @example @@ -15934,7 +15950,7 @@ a[4][5] = "An element in subarray a[4]" But recall that you can not delete the main array @code{a} and then use it as a scalar. -The built-in functions which take array arguments can also be used +The built-in functions that take array arguments can also be used with subarrays. For example, the following code fragment uses @code{length()} (@pxref{String Functions}) to determine the number of elements in the main array @code{a} and @@ -15964,7 +15980,7 @@ can be nested to scan all the elements of an array of arrays if it is rectangular in structure. In order to print the contents (scalar values) of a two-dimensional array of arrays (i.e., in which each first-level element is itself an -array, not necessarily of the same length) +array, not necessarily of the same length), you could use the following code: @example @@ -16064,9 +16080,9 @@ versions of @command{awk}. @item Standard @command{awk} simulates multidimensional arrays by separating -subscript values with a comma. The values are concatenated into a +subscript values with commas. The values are concatenated into a single string, separated by the value of @code{SUBSEP}. The fact -that such a subscript was created in this way is not retained; thus +that such a subscript was created in this way is not retained; thus, changing @code{SUBSEP} may have unexpected consequences. You can use @samp{(@var{sub1}, @var{sub2}, @dots{}) in @var{array}} to see if such a multidimensional subscript exists in @var{array}. @@ -16075,7 +16091,7 @@ a multidimensional subscript exists in @var{array}. @command{gawk} provides true arrays of arrays. You use a separate set of square brackets for each dimension in such an array: @code{data[row][col]}, for example. Array elements may thus be either -scalar values (number or string) or another array. +scalar values (number or string) or other arrays. @item Use the @code{isarray()} built-in function to determine if an array @@ -16100,6 +16116,9 @@ Besides the built-in functions, @command{awk} has provisions for writing new functions that the rest of a program can use. The second half of this @value{CHAPTER} describes these @dfn{user-defined} functions. +Finally, we explore indirect function calls, a @command{gawk}-specific +extension that lets you determine at runtime what function is to +be called. @menu * Built-in:: Summarizes the built-in functions. @@ -16109,7 +16128,7 @@ The second half of this @value{CHAPTER} describes these @end menu @node Built-in -@section Built-In Functions +@section Built-in Functions @dfn{Built-in} functions are always available for your @command{awk} program to call. This @value{SECTION} defines all @@ -16132,7 +16151,7 @@ but are summarized here for your convenience. @end menu @node Calling Built-in -@subsection Calling Built-In Functions +@subsection Calling Built-in Functions To call one of @command{awk}'s built-in functions, write the name of the function followed @@ -16183,7 +16202,7 @@ j = atan2(++i, i *= 2) @end example If the order of evaluation is left to right, then @code{i} first becomes -6, and then 12, and @code{atan2()} is called with the two arguments 6 +six, and then 12, and @code{atan2()} is called with the two arguments six and 12. But if the order of evaluation is right to left, @code{i} first becomes 10, then 11, and @code{atan2()} is called with the two arguments 11 and 10. @@ -16264,7 +16283,7 @@ In fact, @command{gawk} uses the BSD @code{random()} function, which is considerably better than @code{rand()}, to produce random numbers.} Often random integers are needed instead. Following is a user-defined function -that can be used to obtain a random non-negative integer less than @var{n}: +that can be used to obtain a random nonnegative integer less than @var{n}: @example function randint(n) @@ -16359,7 +16378,7 @@ implementations. The functions in this @value{SECTION} look at or change the text of one or more strings. -@code{gawk} understands locales (@pxref{Locales}), and does all +@command{gawk} understands locales (@pxref{Locales}) and does all string processing in terms of @emph{characters}, not @emph{bytes}. This distinction is particularly important to understand for locales where one character may be represented by multiple bytes. Thus, for @@ -16448,7 +16467,7 @@ a[2] = "de" a[3] = "sac" @end example -The @code{asorti()} function works similarly to @code{asort()}, however, +The @code{asorti()} function works similarly to @code{asort()}; however, the @emph{indices} are sorted, instead of the values. Thus, in the previous example, starting with the same initial set of indices and values in @code{a}, calling @samp{asorti(a)} would yield: @@ -16563,7 +16582,7 @@ If @var{find} is not found, @code{index()} returns zero. With BWK @command{awk} and @command{gawk}, it is a fatal error to use a regexp constant for @var{find}. Other implementations allow it, simply treating the regexp -constant as an expression meaning @samp{$0 ~ /regexp/}. @value{DARKCORNER}. +constant as an expression meaning @samp{$0 ~ /regexp/}. @value{DARKCORNER} @item @code{length(}[@var{string}]@code{)} @cindexawkfunc{length} @@ -16646,7 +16665,7 @@ If @option{--posix} is supplied, using an array argument is a fatal error @cindex string, regular expression match @cindex match regexp in string Search @var{string} for the -longest, leftmost substring matched by the regular expression, +longest, leftmost substring matched by the regular expression @var{regexp} and return the character position (index) at which that substring begins (one, if it starts at the beginning of @var{string}). If no match is found, return zero. @@ -16658,7 +16677,7 @@ In the latter case, the string is treated as a regexp to be matched. discussion of the difference between the two forms, and the implications for writing your program correctly. -The order of the first two arguments is backwards from most other string +The order of the first two arguments is the opposite of most other string functions that work with regular expressions, such as @code{sub()} and @code{gsub()}. It might help to remember that for @code{match()}, the order is the same as for the @samp{~} operator: @@ -16747,7 +16766,7 @@ $ @kbd{echo foooobazbarrrrr |} @end example There may not be subscripts for the start and index for every parenthesized -subexpression, because they may not all have matched text; thus they +subexpression, because they may not all have matched text; thus, they should be tested for with the @code{in} operator (@pxref{Reference to Elements}). @@ -16794,13 +16813,13 @@ a regexp describing where to split @var{string} (much as @code{FS} can be a regexp describing where to split input records). If @var{fieldsep} is omitted, the value of @code{FS} is used. @code{split()} returns the number of elements created. -@var{seps} is a @command{gawk} extension with @code{@var{seps}[@var{i}]} +@var{seps} is a @command{gawk} extension, with @code{@var{seps}[@var{i}]} being the separator string between @code{@var{array}[@var{i}]} and @code{@var{array}[@var{i}+1]}. If @var{fieldsep} is a single -space then any leading whitespace goes into @code{@var{seps}[0]} and +space, then any leading whitespace goes into @code{@var{seps}[0]} and any trailing -whitespace goes into @code{@var{seps}[@var{n}]} where @var{n} is the +whitespace goes into @code{@var{seps}[@var{n}]}, where @var{n} is the return value of @code{split()} (i.e., the number of elements in @var{array}). @@ -16813,7 +16832,7 @@ split("cul-de-sac", a, "-", seps) @noindent @cindex strings splitting, example -splits the string @samp{cul-de-sac} into three fields using @samp{-} as the +splits the string @code{"cul-de-sac"} into three fields using @samp{-} as the separator. It sets the contents of the array @code{a} as follows: @example @@ -16838,19 +16857,18 @@ As with input field-splitting, when the value of @var{fieldsep} is the elements of @var{array} but not in @var{seps}, and the elements are separated by runs of whitespace. -Also, as with input field-splitting, if @var{fieldsep} is the null string, each +Also, as with input field splitting, if @var{fieldsep} is the null string, each individual character in the string is split into its own array element. @value{COMMONEXT} Note, however, that @code{RS} has no effect on the way @code{split()} -works. Even though @samp{RS = ""} causes newline to also be an input +works. Even though @samp{RS = ""} causes the newline character to also be an input field separator, this does not affect how @code{split()} splits strings. @cindex dark corner, @code{split()} function Modern implementations of @command{awk}, including @command{gawk}, allow -the third argument to be a regexp constant (@code{/abc/}) as well as a -string. -@value{DARKCORNER} +the third argument to be a regexp constant (@w{@code{/}@dots{}@code{/}}) +as well as a string. @value{DARKCORNER} The POSIX standard allows this as well. @DBXREF{Computed Regexps} for a discussion of the difference between using a string constant or a regexp constant, @@ -16987,7 +17005,7 @@ an @samp{&}: @cindex @code{sub()} function, arguments of @cindex @code{gsub()} function, arguments of As mentioned, the third argument to @code{sub()} must -be a variable, field or array element. +be a variable, field, or array element. Some versions of @command{awk} allow the third argument to be an expression that is not an lvalue. In such a case, @code{sub()} still searches for the pattern and returns zero or one, but the result of @@ -17146,8 +17164,8 @@ example, @code{"a\qb"} is treated as @code{"aqb"}. At the runtime level, the various functions handle sequences of @samp{\} and @samp{&} differently. The situation is (sadly) somewhat complex. -Historically, the @code{sub()} and @code{gsub()} functions treated the two -character sequence @samp{\&} specially; this sequence was replaced in +Historically, the @code{sub()} and @code{gsub()} functions treated the +two-character sequence @samp{\&} specially; this sequence was replaced in the generated text with a single @samp{&}. Any other @samp{\} within the @var{replacement} string that did not precede an @samp{&} was passed through unchanged. This is illustrated in @ref{table-sub-escapes}. @@ -17205,7 +17223,7 @@ _bigskip} @end float @noindent -This table shows both the lexical-level processing, where +This table shows the lexical-level processing, where an odd number of backslashes becomes an even number at the runtime level, as well as the runtime processing done by @code{sub()}. (For the sake of simplicity, the rest of the following tables only show the @@ -17226,7 +17244,7 @@ This is shown in @ref{table-sub-proposed}. @float Table,table-sub-proposed -@caption{GNU @command{awk} rules for @code{sub()} and backslash} +@caption{@command{gawk} rules for @code{sub()} and backslash} @tex \vbox{\bigskip % We need more characters for escape and tab ... @@ -17271,7 +17289,7 @@ _bigskip} @end float In a nutshell, at the runtime level, there are now three special sequences -of characters (@samp{\\\&}, @samp{\\&} and @samp{\&}) whereas historically +of characters (@samp{\\\&}, @samp{\\&}, and @samp{\&}) whereas historically there was only one. However, as in the historical case, any @samp{\} that is not part of one of these three sequences is not special and appears in the output literally. @@ -17337,7 +17355,7 @@ The only case where the difference is noticeable is the last one: @samp{\\\\} is seen as @samp{\\} and produces @samp{\} instead of @samp{\\}. Starting with @value{PVERSION} 3.1.4, @command{gawk} followed the POSIX rules -when @option{--posix} is specified (@pxref{Options}). Otherwise, +when @option{--posix} was specified (@pxref{Options}). Otherwise, it continued to follow the proposed rules, as that had been its behavior for many years. @@ -17405,7 +17423,7 @@ _bigskip} @end ifnottex @end float -Because of the complexity of the lexical and runtime level processing +Because of the complexity of the lexical- and runtime-level processing and the special cases for @code{sub()} and @code{gsub()}, we recommend the use of @command{gawk} and @code{gensub()} when you have to do substitutions. @@ -17431,6 +17449,7 @@ for more information. When closing a coprocess, it is occasionally useful to first close one end of the two-way pipe and then to close the other. This is done by providing a second argument to @code{close()}. This second argument +(@var{how}) should be one of the two string values @code{"to"} or @code{"from"}, indicating which end of the pipe to close. Case in the string does not matter. @@ -17457,7 +17476,7 @@ every little bit of information as soon as it is ready. However, sometimes it is necessary to force a program to @dfn{flush} its buffers (i.e., write the information to its destination, even if a buffer is not full). This is the purpose of the @code{fflush()} function---@command{gawk} also -buffers its output and the @code{fflush()} function forces +buffers its output, and the @code{fflush()} function forces @command{gawk} to flush its buffers. @cindex extensions, common@comma{} @code{fflush()} function @@ -17478,7 +17497,7 @@ would flush only the standard output if there was no argument, and flush all output files and pipes if the argument was the null string. This was changed in order to be compatible with Brian Kernighan's @command{awk}, in the hope that standardizing this -feature in POSIX would then be easier (which indeed helped). +feature in POSIX would then be easier (which indeed proved to be the case). With @command{gawk}, you can use @samp{fflush("/dev/stdout")} if you wish to flush @@ -17489,7 +17508,7 @@ only the standard output. @c @cindex warnings, automatic @cindex troubleshooting, @code{fflush()} function @code{fflush()} returns zero if the buffer is successfully flushed; -otherwise, it returns non-zero. (@command{gawk} returns @minus{}1.) +otherwise, it returns a nonzero value. (@command{gawk} returns @minus{}1.) In the case where all buffers are flushed, the return value is zero only if all buffers were flushed successfully. Otherwise, it is @minus{}1, and @command{gawk} warns about the problem @var{filename}. @@ -17502,8 +17521,8 @@ In such a case, @code{fflush()} returns @minus{}1, as well. @sidebar Interactive Versus Noninteractive Buffering @cindex buffering, interactive vs.@: noninteractive -As a side point, buffering issues can be even more confusing, depending -upon whether your program is @dfn{interactive} (i.e., communicating +As a side point, buffering issues can be even more confusing if +your program is @dfn{interactive} (i.e., communicating with a user sitting at a keyboard).@footnote{A program is interactive if the standard output is connected to a terminal device. On modern systems, this means your keyboard and screen.} @@ -17546,7 +17565,7 @@ it is all buffered and sent down the pipe to @command{cat} in one shot. @cindexawkfunc{system} @cindex invoke shell command @cindex interacting with other programs -Execute the operating-system +Execute the operating system command @var{command} and then return to the @command{awk} program. Return @var{command}'s exit status. @@ -17655,9 +17674,9 @@ you would see the latter (undesirable) output. @cindex files, log@comma{} timestamps in @cindex @command{gawk}, timestamps @cindex POSIX @command{awk}, timestamps and -@code{awk} programs are commonly used to process log files +@command{awk} programs are commonly used to process log files containing timestamp information, indicating when a -particular log record was written. Many programs log their timestamp +particular log record was written. Many programs log their timestamps in the form returned by the @code{time()} system call, which is the number of seconds since a particular epoch. On POSIX-compliant systems, it is the number of seconds since @@ -17718,7 +17737,7 @@ The values of these numbers need not be within the ranges specified; for example, an hour of @minus{}1 means 1 hour before midnight. The origin-zero Gregorian calendar is assumed, with year 0 preceding year 1 and year @minus{}1 preceding year 0. -The time is assumed to be in the local timezone. +The time is assumed to be in the local time zone. If the daylight-savings flag is positive, the time is assumed to be daylight savings time; if zero, the time is assumed to be standard time; and if negative (the default), @code{mktime()} attempts to determine @@ -17878,12 +17897,12 @@ Equivalent to specifying @samp{%H:%M:%S}. The weekday as a decimal number (1--7). Monday is day one. @item %U -The week number of the year (the first Sunday as the first day of week one) +The week number of the year (with the first Sunday as the first day of week one) as a decimal number (00--53). @c @cindex ISO 8601 @item %V -The week number of the year (the first Monday as the first +The week number of the year (with the first Monday as the first day of week one) as a decimal number (01--53). The method for determining the week number is as specified by ISO 8601. (To wit: if the week containing January 1 has four or more days in the @@ -17894,7 +17913,7 @@ and the next week is week one.) The weekday as a decimal number (0--6). Sunday is day zero. @item %W -The week number of the year (the first Monday as the first day of week one) +The week number of the year (with the first Monday as the first day of week one) as a decimal number (00--53). @item %x @@ -17914,8 +17933,8 @@ The full year as a decimal number (e.g., 2015). @c @cindex RFC 822 @c @cindex RFC 1036 @item %z -The timezone offset in a +HHMM format (e.g., the format necessary to -produce RFC 822/RFC 1036 date headers). +The time zone offset in a @samp{+@var{HHMM}} format (e.g., the format +necessary to produce RFC 822/RFC 1036 date headers). @item %Z The time zone name or abbreviation; no characters if @@ -18055,7 +18074,7 @@ The operations are described in @ref{table-bitwise-ops}. @ifnottex @ifnotdocbook @display - Bit Operator + Bit operator | AND | OR | XOR |---+---+---+---+---+--- Operands | 0 | 1 | 0 | 1 | 0 | 1 @@ -18113,7 +18132,7 @@ Operands | 0 | 1 | 0 | 1 | 0 | 1 <tbody> <row> <entry colsep="0"></entry> -<entry spanname="optitle"><emphasis role="bold">Bit Operator</emphasis></entry> +<entry spanname="optitle"><emphasis role="bold">Bit operator</emphasis></entry> </row> <row rowsep="1"> @@ -18177,10 +18196,9 @@ of a given value. Finally, two other common operations are to shift the bits left or right. For example, if you have a bit string @samp{10111001} and you shift it right by three bits, you end up with @samp{00010111}.@footnote{This example -shows that 0's come in on the left side. For @command{gawk}, this is +shows that zeros come in on the left side. For @command{gawk}, this is always true, but in some languages, it's possible to have the left side -fill with 1's.} -@c Purposely decided to use 0's and 1's here. 2/2001. +fill with ones.} If you start over again with @samp{10111001} and shift it left by three bits, you end up with @samp{11001000}. The following list describes @command{gawk}'s built-in functions that implement the bitwise operations. @@ -18234,7 +18252,7 @@ that illustrates the use of these functions: @example @group @c file eg/lib/bits2str.awk -# bits2str --- turn a byte into readable 1's and 0's +# bits2str --- turn a byte into readable ones and zeros function bits2str(bits, data, mask) @{ @@ -18308,15 +18326,16 @@ $ @kbd{gawk -f testbits.awk} @cindex converting, numbers to strings @cindex number as string of bits The @code{bits2str()} function turns a binary number into a string. -The number @code{1} represents a binary value where the rightmost bit -is set to 1. Using this mask, +Initializing @code{mask} to one creates +a binary value where the rightmost bit +is set to one. Using this mask, the function repeatedly checks the rightmost bit. ANDing the mask with the value indicates whether the -rightmost bit is 1 or not. If so, a @code{"1"} is concatenated onto the front +rightmost bit is one or not. If so, a @code{"1"} is concatenated onto the front of the string. Otherwise, a @code{"0"} is added. The value is then shifted right by one bit and the loop continues -until there are no more 1 bits. +until there are no more one bits. If the initial value is zero, it returns a simple @code{"0"}. Otherwise, at the end, it pads the value with zeros to represent multiples @@ -18340,7 +18359,7 @@ that traverses every element of an array of arrays @cindexgawkfunc{isarray} @cindex scalar or array @item isarray(@var{x}) -Return a true value if @var{x} is an array. Otherwise return false. +Return a true value if @var{x} is an array. Otherwise, return false. @end table @code{isarray()} is meant for use in two circumstances. The first is when @@ -18401,7 +18420,7 @@ The default value for @var{category} is @code{"LC_MESSAGES"}. Return the plural form used for @var{number} of the translation of @var{string1} and @var{string2} in text domain @var{domain} for locale category @var{category}. @var{string1} is the -English singular variant of a message, and @var{string2} the English plural +English singular variant of a message, and @var{string2} is the English plural variant of the same message. The default value for @var{domain} is the current value of @code{TEXTDOMAIN}. The default value for @var{category} is @code{"LC_MESSAGES"}. @@ -18430,7 +18449,7 @@ them (i.e., to tell @command{awk} what they should do). @subsection Function Definition Syntax @quotation -@i{It's entirely fair to say that the @command{awk} syntax for local +@i{It's entirely fair to say that the awk syntax for local variable definitions is appallingly awful.} @author Brian Kernighan @end quotation @@ -18472,14 +18491,23 @@ the call. A function cannot have two parameters with the same name, nor may it have a parameter with the same name as the function itself. -In addition, according to the POSIX standard, function parameters + +@quotation CAUTION +According to the POSIX standard, function parameters cannot have the same name as one of the special predefined variables -(@pxref{Built-in Variables}). Not all versions of @command{awk} enforce -this restriction. +(@pxref{Built-in Variables}), nor may a function parameter have the +same name as another function. + +Not all versions of @command{awk} enforce +these restrictions. +@command{gawk} always enforces the first restriction. +With @option{--posix} (@pxref{Options}), +it also enforces the second restriction. +@end quotation Local variables act like the empty string if referenced where a string value is required, and like zero if referenced where a numeric value -is required. This is the same as regular variables that have never been +is required. This is the same as the behavior of regular variables that have never been assigned a value. (There is more to understand about local variables; @pxref{Dynamic Typing}.) @@ -18513,7 +18541,7 @@ During execution of the function body, the arguments and local variable values hide, or @dfn{shadow}, any variables of the same names used in the rest of the program. The shadowed variables are not accessible in the function definition, because there is no way to name them while their -names have been taken away for the local variables. All other variables +names have been taken away for the arguments and local variables. All other variables used in the @command{awk} program can be referenced or set normally in the function's body. @@ -18580,7 +18608,7 @@ function myprint(num) @end example @noindent -To illustrate, here is an @command{awk} rule that uses our @code{myprint} +To illustrate, here is an @command{awk} rule that uses our @code{myprint()} function: @example @@ -18621,13 +18649,13 @@ in an array and start over with a new list of elements (@pxref{Delete}). Instead of having to repeat this loop everywhere that you need to clear out -an array, your program can just call @code{delarray}. +an array, your program can just call @code{delarray()}. (This guarantees portability. The use of @samp{delete @var{array}} to delete the contents of an entire array is a relatively recent@footnote{Late in 2012.} addition to the POSIX standard.) The following is an example of a recursive function. It takes a string -as an input parameter and returns the string in backwards order. +as an input parameter and returns the string in reverse order. Recursive functions must always have a test that stops the recursion. In this case, the recursion terminates when the input string is already empty: @@ -18724,7 +18752,7 @@ an error. @cindex local variables, in a function @cindex variables, local to a function -Unlike many languages, +Unlike in many languages, there is no way to make a variable local to a @code{@{} @dots{} @code{@}} block in @command{awk}, but you can make a variable local to a function. It is good practice to do so whenever a variable is needed only in that @@ -18733,7 +18761,7 @@ function. To make a variable local to a function, simply declare the variable as an argument after the actual function arguments (@pxref{Definition Syntax}). -Look at the following example where variable +Look at the following example, where variable @code{i} is a global variable used by both functions @code{foo()} and @code{bar()}: @@ -18774,7 +18802,7 @@ foo's i=3 top's i=3 @end example -If you want @code{i} to be local to both @code{foo()} and @code{bar()} do as +If you want @code{i} to be local to both @code{foo()} and @code{bar()}, do as follows (the extra space before @code{i} is a coding convention to indicate that @code{i} is a local variable, not an argument): @@ -18862,7 +18890,7 @@ declare explicitly whether the arguments are passed @dfn{by value} or @dfn{by reference}. Instead, the passing convention is determined at runtime when -the function is called according to the following rule: +the function is called, according to the following rule: if the argument is an array variable, then it is passed by reference. Otherwise, the argument is passed by value. @@ -18939,7 +18967,7 @@ prints @samp{a[1] = 1, a[2] = two, a[3] = 3}, because @cindex undefined functions @cindex functions, undefined Some @command{awk} implementations allow you to call a function that -has not been defined. They only report a problem at runtime when the +has not been defined. They only report a problem at runtime, when the program actually tries to call the function. For example: @example @@ -18998,15 +19026,15 @@ makes the returned value undefined, and therefore, unpredictable. In practice, though, all versions of @command{awk} simply return the null string, which acts like zero if used in a numeric context. -A @code{return} statement with no value expression is assumed at the end of -every function definition. So if control reaches the end of the function -body, then technically, the function returns an unpredictable value. +A @code{return} statement without an @var{expression} is assumed at the end of +every function definition. So, if control reaches the end of the function +body, then technically the function returns an unpredictable value. In practice, it returns the empty string. @command{awk} does @emph{not} warn you if you use the return value of such a function. Sometimes, you want to write a function for what it does, not for what it returns. Such a function corresponds to a @code{void} function -in C, C++ or Java, or to a @code{procedure} in Ada. Thus, it may be appropriate to not +in C, C++, or Java, or to a @code{procedure} in Ada. Thus, it may be appropriate to not return any value; simply bear in mind that you should not be using the return value of such a function. @@ -19125,13 +19153,15 @@ function calls, you can specify the name of the function to call as a string variable, and then call the function. Let's look at an example. Suppose you have a file with your test scores for the classes you -are taking. The first field is the class name. The following fields +are taking, and +you wish to get the sum and the average of +your test scores. +The first field is the class name. The following fields are the functions to call to process the data, up to a ``marker'' field @samp{data:}. Following the marker, to the end of the record, are the various numeric test scores. -Here is the initial file; you wish to get the sum and the average of -your test scores: +Here is the initial file: @example @c file eg/data/class_data1 @@ -19214,9 +19244,9 @@ function sum(first, last, ret, i) @c endfile @end example -These two functions expect to work on fields; thus the parameters +These two functions expect to work on fields; thus, the parameters @code{first} and @code{last} indicate where in the fields to start and end. -Otherwise they perform the expected computations and are not unusual: +Otherwise, they perform the expected computations and are not unusual: @example @c file eg/prog/indirectcall.awk @@ -19275,8 +19305,8 @@ The ability to use indirect function calls is more powerful than you may think at first. The C and C++ languages provide ``function pointers,'' which are a mechanism for calling a function chosen at runtime. One of the most well-known uses of this ability is the C @code{qsort()} function, which sorts -an array using the famous ``quick sort'' algorithm -(see @uref{http://en.wikipedia.org/wiki/Quick_sort, the Wikipedia article} +an array using the famous ``quicksort'' algorithm +(see @uref{http://en.wikipedia.org/wiki/Quicksort, the Wikipedia article} for more information). To use this function, you supply a pointer to a comparison function. This mechanism allows you to sort arbitrary data in an arbitrary fashion. @@ -19295,11 +19325,11 @@ We can do something similar using @command{gawk}, like this: # January 2009 @c endfile - @end ignore @c file eg/lib/quicksort.awk -# quicksort --- C.A.R. Hoare's quick sort algorithm. See Wikipedia -# or almost any algorithms or computer science text + +# quicksort --- C.A.R. Hoare's quicksort algorithm. See Wikipedia +# or almost any algorithms or computer science text. @c endfile @ignore @c file eg/lib/quicksort.awk @@ -19337,7 +19367,7 @@ function quicksort_swap(data, i, j, temp) The @code{quicksort()} function receives the @code{data} array, the starting and ending indices to sort (@code{left} and @code{right}), and the name of a function that -performs a ``less than'' comparison. It then implements the quick sort algorithm. +performs a ``less than'' comparison. It then implements the quicksort algorithm. To make use of the sorting function, we return to our previous example. The first thing to do is write some comparison functions: @@ -19528,7 +19558,7 @@ for (i = 1; i <= n; i++) @end example @noindent -@code{gawk} looks up the actual function to call only once. +@command{gawk} looks up the actual function to call only once. @node Functions Summary @section Summary @@ -19624,7 +19654,7 @@ It contains the following chapters: your own @command{awk} functions. Writing functions is important, because it allows you to encapsulate algorithms and program tasks in a single place. It simplifies programming, making program development more -manageable, and making programs more readable. +manageable and making programs more readable. @cindex Kernighan, Brian @cindex Plauger, P.J.@: @@ -19753,7 +19783,7 @@ often use variable names like these for their own purposes. The example programs shown in this @value{CHAPTER} all start the names of their private variables with an underscore (@samp{_}). Users generally don't use leading underscores in their variable names, so this convention immediately -decreases the chances that the variable name will be accidentally shared +decreases the chances that the variable names will be accidentally shared with the user's program. @cindex @code{_} (underscore), in names of private variables @@ -19771,8 +19801,8 @@ show how our own @command{awk} programming style has evolved and to provide some basis for this discussion.} As a final note on variable naming, if a function makes global variables -available for use by a main program, it is a good convention to start that -variable's name with a capital letter---for +available for use by a main program, it is a good convention to start those +variables' names with a capital letter---for example, @code{getopt()}'s @code{Opterr} and @code{Optind} variables (@pxref{Getopt Function}). The leading capital letter indicates that it is global, while the fact that @@ -19783,7 +19813,7 @@ not one of @command{awk}'s predefined variables, such as @code{FS}. It is also important that @emph{all} variables in library functions that do not need to save state are, in fact, declared local.@footnote{@command{gawk}'s @option{--dump-variables} command-line -option is useful for verifying this.} If this is not done, the variable +option is useful for verifying this.} If this is not done, the variables could accidentally be used in the user's program, leading to bugs that are very difficult to track down: @@ -19981,7 +20011,7 @@ Following is the function: @example @c file eg/lib/assert.awk -# assert --- assert that a condition is true. Otherwise exit. +# assert --- assert that a condition is true. Otherwise, exit. @c endfile @ignore @@ -20017,7 +20047,7 @@ is false, it prints a message to standard error, using the @code{string} parameter to describe the failed condition. It then sets the variable @code{_assert_exit} to one and executes the @code{exit} statement. The @code{exit} statement jumps to the @code{END} rule. If the @code{END} -rules finds @code{_assert_exit} to be true, it exits immediately. +rule finds @code{_assert_exit} to be true, it exits immediately. The purpose of the test in the @code{END} rule is to keep any other @code{END} rules from running. When an assertion fails, the @@ -20309,7 +20339,7 @@ all the strings in an array into one long string. The following function, the application programs (@pxref{Sample Programs}). -Good function design is important; this function needs to be general but it +Good function design is important; this function needs to be general, but it should also have a reasonable default behavior. It is called with an array as well as the beginning and ending indices of the elements in the array to be merged. This assumes that the array indices are numeric---a reasonable @@ -20457,7 +20487,7 @@ allowed the user to supply an optional timestamp value to use instead of the current time. @node Readfile Function -@subsection Reading a Whole File At Once +@subsection Reading a Whole File at Once Often, it is convenient to have the entire contents of a file available in memory as a single string. A straightforward but naive way to @@ -20514,13 +20544,13 @@ function readfile(file, tmp, save_rs) It works by setting @code{RS} to @samp{^$}, a regular expression that will never match if the file has contents. @command{gawk} reads data from -the file into @code{tmp} attempting to match @code{RS}. The match fails +the file into @code{tmp}, attempting to match @code{RS}. The match fails after each read, but fails quickly, such that @command{gawk} fills @code{tmp} with the entire contents of the file. (@DBXREF{Records} for information on @code{RT} and @code{RS}.) In the case that @code{file} is empty, the return value is the null -string. Thus calling code may use something like: +string. Thus, calling code may use something like: @example contents = readfile("/some/path") @@ -20531,7 +20561,7 @@ if (length(contents) == 0) This tests the result to see if it is empty or not. An equivalent test would be @samp{contents == ""}. -@xref{Extension Sample Readfile}, for an extension function that +@DBXREF{Extension Sample Readfile} for an extension function that also reads an entire file into memory. @node Shell Quoting @@ -20638,8 +20668,8 @@ The @code{BEGIN} and @code{END} rules are each executed exactly once, at the beginning and end of your @command{awk} program, respectively (@pxref{BEGIN/END}). We (the @command{gawk} authors) once had a user who mistakenly thought that the -@code{BEGIN} rule is executed at the beginning of each @value{DF} and the -@code{END} rule is executed at the end of each @value{DF}. +@code{BEGIN} rules were executed at the beginning of each @value{DF} and the +@code{END} rules were executed at the end of each @value{DF}. When informed that this was not the case, the user requested that we add new special @@ -20679,7 +20709,7 @@ END @{ endfile(FILENAME) @} This file must be loaded before the user's ``main'' program, so that the rule it supplies is executed first. -This rule relies on @command{awk}'s @code{FILENAME} variable that +This rule relies on @command{awk}'s @code{FILENAME} variable, which automatically changes for each new @value{DF}. The current @value{FN} is saved in a private variable, @code{_oldfilename}. If @code{FILENAME} does not equal @code{_oldfilename}, then a new @value{DF} is being processed and @@ -20695,7 +20725,7 @@ first @value{DF}. The program also supplies an @code{END} rule to do the final processing for the last file. Because this @code{END} rule comes before any @code{END} rules supplied in the ``main'' program, @code{endfile()} is called first. Once -again the value of multiple @code{BEGIN} and @code{END} rules should be clear. +again, the value of multiple @code{BEGIN} and @code{END} rules should be clear. @cindex @code{beginfile()} user-defined function @cindex @code{endfile()} user-defined function @@ -20738,7 +20768,7 @@ how it simplifies writing the main program. You are probably wondering, if @code{beginfile()} and @code{endfile()} functions can do the job, why does @command{gawk} have -@code{BEGINFILE} and @code{ENDFILE} patterns (@pxref{BEGINFILE/ENDFILE})? +@code{BEGINFILE} and @code{ENDFILE} patterns? Good question. Normally, if @command{awk} cannot open a file, this causes an immediate fatal error. In this case, there is no way for a @@ -20747,13 +20777,14 @@ calling it relies on the file being open and at the first record. Thus, the main reason for @code{BEGINFILE} is to give you a ``hook'' to catch files that cannot be processed. @code{ENDFILE} exists for symmetry, and because it provides an easy way to do per-file cleanup processing. +For more information, refer to @ref{BEGINFILE/ENDFILE}. @end sidebar @node Rewind Function @subsection Rereading the Current File @cindex files, reading -Another request for a new built-in function was for a @code{rewind()} +Another request for a new built-in function was for a function that would make it possible to reread the current file. The requesting user didn't want to have to use @code{getline} (@pxref{Getline}) @@ -20762,7 +20793,7 @@ inside a loop. However, as long as you are not in the @code{END} rule, it is quite easy to arrange to immediately close the current input file and then start over with it from the top. -For lack of a better name, we'll call it @code{rewind()}: +For lack of a better name, we'll call the function @code{rewind()}: @cindex @code{rewind()} user-defined function @example @@ -20855,16 +20886,16 @@ See also @ref{ARGC and ARGV}. Because @command{awk} variable names only allow the English letters, the regular expression check purposely does not use character classes such as @samp{[:alpha:]} and @samp{[:alnum:]} -(@pxref{Bracket Expressions}) +(@pxref{Bracket Expressions}). @node Empty Files -@subsection Checking for Zero-length Files +@subsection Checking for Zero-Length Files All known @command{awk} implementations silently skip over zero-length files. This is a by-product of @command{awk}'s implicit read-a-record-and-match-against-the-rules loop: when @command{awk} tries to read a record from an empty file, it immediately receives an -end of file indication, closes the file, and proceeds on to the next +end-of-file indication, closes the file, and proceeds on to the next command-line @value{DF}, @emph{without} executing any user-level @command{awk} program code. @@ -20929,7 +20960,7 @@ Occasionally, you might not want @command{awk} to process command-line variable assignments (@pxref{Assignment Options}). In particular, if you have a @value{FN} that contains an @samp{=} character, -@command{awk} treats the @value{FN} as an assignment, and does not process it. +@command{awk} treats the @value{FN} as an assignment and does not process it. Some users have suggested an additional command-line option for @command{gawk} to disable command-line assignments. However, some simple programming with @@ -21291,8 +21322,8 @@ BEGIN @{ @c endfile @end example -The rest of the @code{BEGIN} rule is a simple test program. Here is the -result of two sample runs of the test program: +The rest of the @code{BEGIN} rule is a simple test program. Here are the +results of two sample runs of the test program: @example $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x} @@ -21350,7 +21381,7 @@ use @code{getopt()} to process their arguments. The @code{PROCINFO} array (@pxref{Built-in Variables}) provides access to the current user's real and effective user and group ID -numbers, and if available, the user's supplementary group set. +numbers, and, if available, the user's supplementary group set. However, because these are numbers, they do not provide very useful information to the average user. There needs to be some way to find the user information associated with the user and group ID numbers. This @@ -21370,7 +21401,7 @@ kept. Instead, it provides the @code{<pwd.h>} header file and several C language subroutines for obtaining user information. The primary function is @code{getpwent()}, for ``get password entry.'' The ``password'' comes from the original user database file, -@file{/etc/passwd}, which stores user information, along with the +@file{/etc/passwd}, which stores user information along with the encrypted passwords (hence the name). @cindex @command{pwcat} program @@ -21469,7 +21500,7 @@ The user's encrypted password. This may not be available on some systems. @item User-ID The user's numeric user ID number. -(On some systems, it's a C @code{long}, and not an @code{int}. Thus +(On some systems, it's a C @code{long}, and not an @code{int}. Thus, we cast it to @code{long} for all cases.) @item Group-ID @@ -21596,7 +21627,7 @@ The code that checks for using @code{FPAT}, using @code{using_fpat} and @code{PROCINFO["FS"]}, is similar. The main part of the function uses a loop to read database lines, split -the line into fields, and then store the line into each array as necessary. +the lines into fields, and then store the lines into each array as necessary. When the loop is done, @code{@w{_pw_init()}} cleans up by closing the pipeline, setting @code{@w{_pw_inited}} to one, and restoring @code{FS} (and @code{FIELDWIDTHS} or @code{FPAT} @@ -21813,7 +21844,7 @@ it is usually empty or set to @samp{*}. @item Group ID Number The group's numeric group ID number; the association of name to number must be unique within the file. -(On some systems it's a C @code{long}, and not an @code{int}. Thus +(On some systems it's a C @code{long}, and not an @code{int}. Thus, we cast it to @code{long} for all cases.) @item Group Member List @@ -21927,32 +21958,32 @@ The @code{@w{_gr_init()}} function first saves @code{FS}, @code{$0}, and then sets @code{FS} and @code{RS} to the correct values for scanning the group information. It also takes care to note whether @code{FIELDWIDTHS} or @code{FPAT} -is being used, and to restore the appropriate field splitting mechanism. +is being used, and to restore the appropriate field-splitting mechanism. -The group information is stored is several associative arrays. +The group information is stored in several associative arrays. The arrays are indexed by group name (@code{@w{_gr_byname}}), by group ID number (@code{@w{_gr_bygid}}), and by position in the database (@code{@w{_gr_bycount}}). There is an additional array indexed by username (@code{@w{_gr_groupsbyuser}}), which is a space-separated list of groups to which each user belongs. -Unlike the user database, it is possible to have multiple records in the +Unlike in the user database, it is possible to have multiple records in the database for the same group. This is common when a group has a large number of members. A pair of such entries might look like the following: @example -tvpeople:*:101:johny,jay,arsenio +tvpeople:*:101:johnny,jay,arsenio tvpeople:*:101:david,conan,tom,joan @end example For this reason, @code{_gr_init()} looks to see if a group name or -group ID number is already seen. If it is, the usernames are -simply concatenated onto the previous list of users.@footnote{There is actually a +group ID number is already seen. If so, the usernames are +simply concatenated onto the previous list of users.@footnote{There is a subtle problem with the code just presented. Suppose that the first time there were no names. This code adds the names with a leading comma. It also doesn't check that there is a @code{$4}.} Finally, @code{_gr_init()} closes the pipeline to @command{grcat}, restores -@code{FS} (and @code{FIELDWIDTHS} or @code{FPAT} if necessary), @code{RS}, and @code{$0}, +@code{FS} (and @code{FIELDWIDTHS} or @code{FPAT}, if necessary), @code{RS}, and @code{$0}, initializes @code{_gr_count} to zero (it is used later), and makes @code{_gr_inited} nonzero. @@ -22052,12 +22083,12 @@ uses these functions. @DBREF{Arrays of Arrays} described how @command{gawk} provides arrays of arrays. In particular, any element of -an array may be either a scalar, or another array. The +an array may be either a scalar or another array. The @code{isarray()} function (@pxref{Type Functions}) lets you distinguish an array from a scalar. The following function, @code{walk_array()}, recursively traverses -an array, printing each element's indices and value. +an array, printing the element indices and values. You call it with the array and a string representing the name of the array: @@ -22129,24 +22160,24 @@ The functions presented here fit into the following categories: @c nested list @table @asis @item General problems -Number-to-string conversion, assertions, rounding, random number +Number-to-string conversion, testing assertions, rounding, random number generation, converting characters to numbers, joining strings, getting easily usable time-of-day information, and reading a whole file in -one shot. +one shot @item Managing @value{DF}s Noting @value{DF} boundaries, rereading the current file, checking for readable files, checking for zero-length files, and treating assignments -as @value{FN}s. +as @value{FN}s @item Processing command-line options -An @command{awk} version of the standard C @code{getopt()} function. +An @command{awk} version of the standard C @code{getopt()} function @item Reading the user and group databases -Two sets of routines that parallel the C library versions. +Two sets of routines that parallel the C library versions @item Traversing arrays of arrays -A simple function to traverse an array of arrays to any depth. +A simple function to traverse an array of arrays to any depth @end table @c end nested list @@ -22241,10 +22272,10 @@ in this @value{CHAPTER}. The second presents @command{awk} versions of several common POSIX utilities. These are programs that you are hopefully already familiar with, -and therefore, whose problems are understood. +and therefore whose problems are understood. By reimplementing these programs in @command{awk}, you can focus on the @command{awk}-related aspects of solving -the programming problem. +the programming problems. The third is a grab bag of interesting programs. These solve a number of different data-manipulation and management @@ -22304,7 +22335,7 @@ It should be noted that these programs are not necessarily intended to replace the installed versions on your system. Nor may all of these programs be fully compliant with the most recent POSIX standard. This is not a problem; their -purpose is to illustrate @command{awk} language programming for ``real world'' +purpose is to illustrate @command{awk} language programming for ``real-world'' tasks. The programs are presented in alphabetical order. @@ -22333,7 +22364,7 @@ but you may supply a command-line option to change the field @dfn{delimiter} (i.e., the field-separator character). @command{cut}'s definition of fields is less general than @command{awk}'s. -A common use of @command{cut} might be to pull out just the login name of +A common use of @command{cut} might be to pull out just the login names of logged-on users from the output of @command{who}. For example, the following pipeline generates a sorted, unique list of the logged-on users: @@ -22842,7 +22873,7 @@ successful or unsuccessful match. If the line does not match, the @code{next} statement just moves on to the next record. A number of additional tests are made, but they are only done if we -are not counting lines. First, if the user only wants exit status +are not counting lines. First, if the user only wants the exit status (@code{no_print} is true), then it is enough to know that @emph{one} line in this file matched, and we can skip on to the next file with @code{nextfile}. Similarly, if we are only printing @value{FN}s, we can @@ -22883,7 +22914,7 @@ if necessary: @end example The @code{END} rule takes care of producing the correct exit status. If -there are no matches, the exit status is one; otherwise it is zero: +there are no matches, the exit status is one; otherwise, it is zero: @example @c file eg/prog/egrep.awk @@ -22935,7 +22966,8 @@ Here is a simple version of @command{id} written in @command{awk}. It uses the user database library functions (@pxref{Passwd Functions}) and the group database library functions -(@pxref{Group Functions}): +(@pxref{Group Functions}) +from @ref{Library Functions}. The program is fairly straightforward. All the work is done in the @code{BEGIN} rule. The user and group ID numbers are obtained from @@ -23062,8 +23094,8 @@ By default, the output files are named @file{xaa}, @file{xab}, and so on. Each file has 1,000 lines in it, with the likely exception of the last file. To change the number of lines in each file, supply a number on the command line -preceded with a minus (e.g., @samp{-500} for files with 500 lines in them -instead of 1,000). To change the name of the output files to something like +preceded with a minus sign (e.g., @samp{-500} for files with 500 lines in them +instead of 1,000). To change the names of the output files to something like @file{myfileaa}, @file{myfileab}, and so on, supply an additional argument that specifies the @value{FN} prefix. @@ -23902,7 +23934,7 @@ checking and setting of defaults: the delay, the count, and the message to print. If the user supplied a message without the ASCII BEL character (known as the ``alert'' character, @code{"\a"}), then it is added to the message. (On many systems, printing the ASCII BEL generates an -audible alert. Thus when the alarm goes off, the system calls attention +audible alert. Thus, when the alarm goes off, the system calls attention to itself in case the user is not looking at the computer.) Just for a change, this program uses a @code{switch} statement (@pxref{Switch Statement}), but the processing could be done with a series of @@ -24071,7 +24103,7 @@ to @command{gawk}. @c at least theoretically The following program was written to prove that character transliteration could be done with a user-level -function. This program is not as complete as the system @command{tr} utility +function. This program is not as complete as the system @command{tr} utility, but it does most of the job. The @command{translate} program was written long before @command{gawk} @@ -24083,13 +24115,13 @@ takes three arguments: @table @code @item from -A list of characters from which to translate. +A list of characters from which to translate @item to -A list of characters to which to translate. +A list of characters to which to translate @item target -The string on which to do the translation. +The string on which to do the translation @end table Associative arrays make the translation part fairly easy. @code{t_ar} holds @@ -24098,7 +24130,7 @@ loop goes through @code{from}, one character at a time. For each character in @code{from}, if the character appears in @code{target}, it is replaced with the corresponding @code{to} character. -The @code{translate()} function calls @code{stranslate()} using @code{$0} +The @code{translate()} function calls @code{stranslate()}, using @code{$0} as the target. The main program sets two global variables, @code{FROM} and @code{TO}, from the command line, and then changes @code{ARGV} so that @command{awk} reads from the standard input. @@ -24120,7 +24152,7 @@ Finally, the processing rule simply calls @code{translate()} for each record: @c endfile @end ignore @c file eg/prog/translate.awk -# Bugs: does not handle things like: tr A-Z a-z, it has +# Bugs: does not handle things like tr A-Z a-z; it has # to be spelled out. However, if `to' is shorter than `from', # the last character in `to' is used for the rest of `from'. @@ -24196,7 +24228,7 @@ for inspiration. @cindex printing, mailing labels @cindex mailing labels@comma{} printing -Here is a ``real world''@footnote{``Real world'' is defined as +Here is a ``real-world''@footnote{``Real world'' is defined as ``a program actually used to get something done.''} program. This script reads lists of names and @@ -24205,7 +24237,7 @@ on it, two across and 10 down. The addresses are guaranteed to be no more than five lines of data. Each address is separated from the next by a blank line. -The basic idea is to read 20 labels worth of data. Each line of each label +The basic idea is to read 20 labels' worth of data. Each line of each label is stored in the @code{line} array. The single rule takes care of filling the @code{line} array and printing the page when 20 labels have been read. @@ -24228,12 +24260,12 @@ of lines on the page Most of the work is done in the @code{printpage()} function. The label lines are stored sequentially in the @code{line} array. But they -have to print horizontally; @code{line[1]} next to @code{line[6]}, +have to print horizontally: @code{line[1]} next to @code{line[6]}, @code{line[2]} next to @code{line[7]}, and so on. Two loops accomplish this. The outer loop, controlled by @code{i}, steps through every 10 lines of data; this is each row of labels. The inner loop, controlled by @code{j}, goes through the lines within the row. -As @code{j} goes from 0 to 4, @samp{i+j} is the @code{j}-th line in +As @code{j} goes from 0 to 4, @samp{i+j} is the @code{j}th line in the row, and @samp{i+j+5} is the entry next to it. The output ends up looking something like this: @@ -24351,8 +24383,8 @@ END @{ @} @end example -The program relies on @command{awk}'s default field splitting -mechanism to break each line up into ``words,'' and uses an +The program relies on @command{awk}'s default field-splitting +mechanism to break each line up into ``words'' and uses an associative array named @code{freq}, indexed by each word, to count the number of times the word occurs. In the @code{END} rule, it prints the counts. @@ -24457,7 +24489,7 @@ to use the @command{sort} program. @cindex lines, duplicate@comma{} removing The @command{uniq} program -(@pxref{Uniq Program}), +(@pxref{Uniq Program}) removes duplicate lines from @emph{sorted} data. Suppose, however, you need to remove duplicate lines from a @value{DF} but @@ -24544,7 +24576,7 @@ Texinfo input file into separate files. @cindex Texinfo This @value{DOCUMENT} is written in @uref{http://www.gnu.org/software/texinfo/, Texinfo}, -the GNU project's document formatting language. +the GNU Project's document formatting language. A single Texinfo source file can be used to produce both printed documentation, with @TeX{}, and online documentation. @ifnotinfo @@ -24603,7 +24635,7 @@ The Texinfo file looks something like this: @example @dots{} -This program has a @@code@{BEGIN@} rule, +This program has a @@code@{BEGIN@} rule that prints a nice message: @@example @@ -24632,7 +24664,7 @@ exits with a zero exit status, signifying OK: @cindex @code{extract.awk} program @example @c file eg/prog/extract.awk -# extract.awk --- extract files and run programs from texinfo files +# extract.awk --- extract files and run programs from Texinfo files @c endfile @ignore @c file eg/prog/extract.awk @@ -24673,12 +24705,12 @@ The second rule handles moving data into files. It verifies that a @value{FN} is given in the directive. If the file named is not the current file, then the current file is closed. Keeping the current file open until a new file is encountered allows the use of the @samp{>} -redirection for printing the contents, keeping open file management +redirection for printing the contents, keeping open-file management simple. The @code{for} loop does the work. It reads lines using @code{getline} (@pxref{Getline}). -For an unexpected end of file, it calls the @code{@w{unexpected_eof()}} +For an unexpected end-of-file, it calls the @code{@w{unexpected_eof()}} function. If the line is an ``endfile'' line, then it breaks out of the loop. If the line is an @samp{@@group} or @samp{@@end group} line, then it @@ -24780,7 +24812,7 @@ END @{ @cindex @command{sed} utility @cindex stream editors -The @command{sed} utility is a stream editor, a program that reads a +The @command{sed} utility is a @dfn{stream editor}, a program that reads a stream of data, makes changes to it, and passes it on. It is often used to make global changes to a large file or to a stream of data generated by a pipeline of commands. @@ -24925,7 +24957,7 @@ includes don't accidentally include a library function twice. @command{igawk} should behave just like @command{gawk} externally. This means it should accept all of @command{gawk}'s command-line arguments, including the ability to have multiple source files specified via -@option{-f}, and the ability to mix command-line and library source files. +@option{-f} and the ability to mix command-line and library source files. The program is written using the POSIX Shell (@command{sh}) command language.@footnote{Fully explaining the @command{sh} language is beyond @@ -24964,7 +24996,7 @@ Run the expanded program with @command{gawk} and any other original command-line arguments that the user supplied (such as the @value{DF} names). @end enumerate -This program uses shell variables extensively: for storing command-line arguments, +This program uses shell variables extensively: for storing command-line arguments and the text of the @command{awk} program that will expand the user's program, for the user's original program, and for the expanded program. Doing so removes some potential problems that might arise were we to use temporary files instead, @@ -25281,22 +25313,7 @@ Save the results of this processing in the shell variable The last step is to call @command{gawk} with the expanded program, along with the original -options and command-line arguments that the user supplied. - -@c this causes more problems than it solves, so leave it out. -@ignore -The special file @file{/dev/null} is passed as a @value{DF} to @command{gawk} -to handle an interesting case. Suppose that the user's program only has -a @code{BEGIN} rule and there are no @value{DF}s to read. -The program should exit without reading any @value{DF}s. -However, suppose that an included library file defines an @code{END} -rule of its own. In this case, @command{gawk} will hang, reading standard -input. In order to avoid this, @file{/dev/null} is explicitly added to the -command line. Reading from @file{/dev/null} always returns an immediate -end of file indication. - -@c Hmm. Add /dev/null if $# is 0? Still messes up ARGV. Sigh. -@end ignore +options and command-line arguments that the user supplied: @example @c file eg/prog/igawk.sh @@ -25362,8 +25379,8 @@ the same letters Column 2, Problem C, of Jon Bentley's @cite{Programming Pearls}, Second Edition, presents an elegant algorithm. The idea is to give words that are anagrams a common signature, sort all the words together by their -signature, and then print them. Dr.@: Bentley observes that taking the -letters in each word and sorting them produces that common signature. +signatures, and then print them. Dr.@: Bentley observes that taking the +letters in each word and sorting them produces those common signatures. The following program uses arrays of arrays to bring together words with the same signature and array sorting to print the words @@ -25372,8 +25389,8 @@ in sorted order: @cindex @code{anagram.awk} program @example @c file eg/prog/anagram.awk -# anagram.awk --- An implementation of the anagram finding algorithm -# from Jon Bentley's "Programming Pearls", 2nd edition. +# anagram.awk --- An implementation of the anagram-finding algorithm +# from Jon Bentley's "Programming Pearls," 2nd edition. # Addison Wesley, 2000, ISBN 0-201-65788-0. # Column 2, Problem C, section 2.8, pp 18-20. @c endfile @@ -25421,7 +25438,7 @@ sorts the letters, and then joins them back together: @example @c file eg/prog/anagram.awk -# word2key --- split word apart into letters, sort, joining back together +# word2key --- split word apart into letters, sort, and join back together function word2key(word, a, i, n, result) @{ @@ -25616,12 +25633,13 @@ characters. The ability to use @code{split()} with the empty string as the separator can considerably simplify such tasks. @item -The library functions from @ref{Library Functions}, proved their -usefulness for a number of real (if small) programs. +The examples here demonstrate the usefulness of the library +functions from @DBREF{Library Functions} +for a number of real (if small) programs. @item Besides reinventing POSIX wheels, other programs solved a selection of -interesting problems, such as finding duplicates words in text, printing +interesting problems, such as finding duplicate words in text, printing mailing labels, and finding anagrams. @end itemize @@ -25817,18 +25835,18 @@ a violent psychopath who knows where you live.} This @value{CHAPTER} discusses advanced features in @command{gawk}. It's a bit of a ``grab bag'' of items that are otherwise unrelated to each other. -First, a command-line option allows @command{gawk} to recognize +First, we look at a command-line option that allows @command{gawk} to recognize nondecimal numbers in input data, not just in @command{awk} programs. Then, @command{gawk}'s special features for sorting arrays are presented. Next, two-way I/O, discussed briefly in earlier parts of this @value{DOCUMENT}, is described in full detail, along with the basics -of TCP/IP networking. Finally, @command{gawk} +of TCP/IP networking. Finally, we see how @command{gawk} can @dfn{profile} an @command{awk} program, making it possible to tune it for performance. @c FULLXREF ON -A number of advanced features require separate @value{CHAPTER}s of their +Additional advanced features are discussed in separate @value{CHAPTER}s of their own: @itemize @value{BULLET} @@ -25922,7 +25940,8 @@ This option may disappear in a future version of @command{gawk}. @node Array Sorting @section Controlling Array Traversal and Array Sorting -@command{gawk} lets you control the order in which a @samp{for (i in array)} +@command{gawk} lets you control the order in which a +@samp{for (@var{indx} in @var{array})} loop traverses an array. In addition, two built-in functions, @code{asort()} and @code{asorti()}, @@ -25938,7 +25957,7 @@ to order the elements during sorting. @node Controlling Array Traversal @subsection Controlling Array Traversal -By default, the order in which a @samp{for (i in array)} loop +By default, the order in which a @samp{for (@var{indx} in @var{array})} loop scans an array is not defined; it is generally based upon the internal implementation of arrays inside @command{awk}. @@ -25967,23 +25986,23 @@ function comp_func(i1, v1, i2, v2) @} @end example -Here, @var{i1} and @var{i2} are the indices, and @var{v1} and @var{v2} +Here, @code{i1} and @code{i2} are the indices, and @code{v1} and @code{v2} are the corresponding values of the two elements being compared. -Either @var{v1} or @var{v2}, or both, can be arrays if the array being +Either @code{v1} or @code{v2}, or both, can be arrays if the array being traversed contains subarrays as values. (@DBXREF{Arrays of Arrays} for more information about subarrays.) The three possible return values are interpreted as follows: @table @code @item comp_func(i1, v1, i2, v2) < 0 -Index @var{i1} comes before index @var{i2} during loop traversal. +Index @code{i1} comes before index @code{i2} during loop traversal. @item comp_func(i1, v1, i2, v2) == 0 -Indices @var{i1} and @var{i2} -come together but the relative order with respect to each other is undefined. +Indices @code{i1} and @code{i2} +come together, but the relative order with respect to each other is undefined. @item comp_func(i1, v1, i2, v2) > 0 -Index @var{i1} comes after index @var{i2} during loop traversal. +Index @code{i1} comes after index @code{i2} during loop traversal. @end table Our first comparison function can be used to scan an array in @@ -26144,7 +26163,7 @@ As already mentioned, the order of the indices is arbitrary if two elements compare equal. This is usually not a problem, but letting the tied elements come out in arbitrary order can be an issue, especially when comparing item values. The partial ordering of the equal elements -may change the next time the array is traversed, if other elements are added or +may change the next time the array is traversed, if other elements are added to or removed from the array. One way to resolve ties when comparing elements with otherwise equal values is to include the indices in the comparison rules. Note that doing this may make the loop traversal less efficient, @@ -26187,7 +26206,7 @@ equivalent or distinct. Another point to keep in mind is that in the case of subarrays, the element values can themselves be arrays; a production comparison function should use the @code{isarray()} function -(@pxref{Type Functions}), +(@pxref{Type Functions}) to check for this, and choose a defined sorting order for subarrays. All sorting based on @code{PROCINFO["sorted_in"]} @@ -26195,7 +26214,7 @@ is disabled in POSIX mode, because the @code{PROCINFO} array is not special in that case. As a side note, sorting the array indices before traversing -the array has been reported to add 15% to 20% overhead to the +the array has been reported to add a 15% to 20% overhead to the execution time of @command{awk} programs. For this reason, sorted array traversal is not the default. @@ -26254,7 +26273,7 @@ However, the @code{source} array is not affected. Often, what's needed is to sort on the values of the @emph{indices} instead of the values of the elements. To do that, use the @code{asorti()} function. The interface and behavior are identical to -that of @code{asort()}, except that the index values are used for sorting, +that of @code{asort()}, except that the index values are used for sorting and become the values of the result array: @example @@ -26289,8 +26308,8 @@ it chooses}, taking into account just the indices, just the values, or both. This is extremely powerful. Once the array is sorted, @code{asort()} takes the @emph{values} in -their final order, and uses them to fill in the result array, whereas -@code{asorti()} takes the @emph{indices} in their final order, and uses +their final order and uses them to fill in the result array, whereas +@code{asorti()} takes the @emph{indices} in their final order and uses them to fill in the result array. @cindex reference counting, sorting arrays @@ -26587,7 +26606,7 @@ service name. @cindex @command{gawk}, @code{ERRNO} variable in @cindex @code{ERRNO} variable @quotation NOTE -Failure in opening a two-way socket will result in a non-fatal error +Failure in opening a two-way socket will result in a nonfatal error being returned to the calling code. The value of @code{ERRNO} indicates the error (@pxref{Auto-set}). @end quotation @@ -26604,19 +26623,19 @@ BEGIN @{ @end example This program reads the current date and time from the local system's -TCP @samp{daytime} server. +TCP @code{daytime} server. It then prints the results and closes the connection. Because this topic is extensive, the use of @command{gawk} for TCP/IP programming is documented separately. @ifinfo See -@inforef{Top, , General Introduction, gawkinet, TCP/IP Internetworking with @command{gawk}}, +@inforef{Top, , General Introduction, gawkinet, @value{GAWKINETTITLE}}, @end ifinfo @ifnotinfo See @uref{http://www.gnu.org/software/gawk/manual/gawkinet/, -@cite{TCP/IP Internetworking with @command{gawk}}}, +@cite{@value{GAWKINETTITLE}}}, which comes as part of the @command{gawk} distribution, @end ifnotinfo for a much more complete introduction and discussion, as well as @@ -26692,9 +26711,9 @@ junk @end example Here is the @file{awkprof.out} that results from running the -@command{gawk} profiler on this program and data. (This example also +@command{gawk} profiler on this program and data (this example also illustrates that @command{awk} programmers sometimes get up very early -in the morning to work.) +in the morning to work): @cindex @code{BEGIN} pattern, and profiling @cindex @code{END} pattern, and profiling @@ -26754,8 +26773,8 @@ They are as follows: @item The program is printed in the order @code{BEGIN} rules, @code{BEGINFILE} rules, -pattern/action rules, -@code{ENDFILE} rules, @code{END} rules and functions, listed +pattern--action rules, +@code{ENDFILE} rules, @code{END} rules, and functions, listed alphabetically. Multiple @code{BEGIN} and @code{END} rules retain their separate identities, as do @@ -26763,7 +26782,7 @@ multiple @code{BEGINFILE} and @code{ENDFILE} rules. @cindex patterns, counts, in a profile @item -Pattern-action rules have two counts. +Pattern--action rules have two counts. The first count, to the left of the rule, shows how many times the rule's pattern was @emph{tested}. The second count, to the right of the rule's opening left brace @@ -26830,13 +26849,13 @@ the target of a redirection isn't a scalar, it gets parenthesized. @command{gawk} supplies leading comments in front of the @code{BEGIN} and @code{END} rules, the @code{BEGINFILE} and @code{ENDFILE} rules, -the pattern/action rules, and the functions. +the pattern--action rules, and the functions. @end itemize The profiled version of your program may not look exactly like what you typed when you wrote it. This is because @command{gawk} creates the -profiled version by ``pretty printing'' its internal representation of +profiled version by ``pretty-printing'' its internal representation of the program. The advantage to this is that @command{gawk} can produce a standard representation. Also, things such as: @@ -26919,16 +26938,16 @@ If you use the @code{HUP} signal instead of the @code{USR1} signal, @cindex @code{SIGQUIT} signal (MS-Windows) @cindex signals, @code{QUIT}/@code{SIGQUIT} (MS-Windows) When @command{gawk} runs on MS-Windows systems, it uses the -@code{INT} and @code{QUIT} signals for producing the profile and, in +@code{INT} and @code{QUIT} signals for producing the profile, and in the case of the @code{INT} signal, @command{gawk} exits. This is because these systems don't support the @command{kill} command, so the only signals you can deliver to a program are those generated by the keyboard. The @code{INT} signal is generated by the -@kbd{Ctrl-@key{C}} or @kbd{Ctrl-@key{BREAK}} key, while the -@code{QUIT} signal is generated by the @kbd{Ctrl-@key{\}} key. +@kbd{Ctrl-c} or @kbd{Ctrl-BREAK} key, while the +@code{QUIT} signal is generated by the @kbd{Ctrl-\} key. Finally, @command{gawk} also accepts another option, @option{--pretty-print}. -When called this way, @command{gawk} ``pretty prints'' the program into +When called this way, @command{gawk} ``pretty-prints'' the program into @file{awkprof.out}, without any execution counts. @quotation NOTE @@ -26982,7 +27001,7 @@ optionally, close off one side of the two-way communications. @item By using special @value{FN}s with the @samp{|&} operator, you can open a -TCP/IP (or UDP/IP) connection to remote hosts in the Internet. @command{gawk} +TCP/IP (or UDP/IP) connection to remote hosts on the Internet. @command{gawk} supports both IPv4 and IPv6. @item @@ -26992,7 +27011,7 @@ you tune them more easily. Sending the @code{USR1} signal while profiling cause @command{gawk} to dump the profile and keep going, including a function call stack. @item -You can also just ``pretty print'' the program. This currently also runs +You can also just ``pretty-print'' the program. This currently also runs the program, but that will change in the next major release. @end itemize @@ -30154,7 +30173,7 @@ Allowing completely alphabetic strings to have valid numeric values is also a very severe departure from historical practice. @end itemize -The second problem is that the @code{gawk} maintainer feels that this +The second problem is that the @command{gawk} maintainer feels that this interpretation of the standard, which requires a certain amount of ``language lawyering'' to arrive at in the first place, was not even intended by the standard developers. In other words, ``we see how you @@ -30313,7 +30332,7 @@ When @option{--sandbox} is specified, extensions are disabled * Finding Extensions:: How @command{gawk} finds compiled extensions. * Extension Example:: Example C code for an extension. * Extension Samples:: The sample extensions that ship with - @code{gawk}. + @command{gawk}. * gawkextlib:: The @code{gawkextlib} project. * Extension summary:: Extension summary. * Extension Exercises:: Exercises. @@ -31277,7 +31296,7 @@ If the concept of a ``record terminator'' makes sense, then @code{*rt_start} should be set to point to the data to be used for @code{RT}, and @code{*rt_len} should be set to the length of the data. Otherwise, @code{*rt_len} should be set to zero. -@code{gawk} makes its own copy of this data, so the +@command{gawk} makes its own copy of this data, so the extension must manage this storage. @end table @@ -31323,7 +31342,7 @@ When writing an input parser, you should think about (and document) how it is expected to interact with @command{awk} code. You may want it to always be called, and take effect as appropriate (as the @code{readdir} extension does). Or you may want it to take effect -based upon the value of an @code{awk} variable, as the XML extension +based upon the value of an @command{awk} variable, as the XML extension from the @code{gawkextlib} project does (@pxref{gawkextlib}). In the latter case, code in a @code{BEGINFILE} section can look at @code{FILENAME} and @code{ERRNO} to decide whether or @@ -32106,7 +32125,7 @@ converts it to a string. Using non-integral values is possible, but requires that you understand how such values are converted to strings (@pxref{Conversion}); thus using integral values is safest. -As with @emph{all} strings passed into @code{gawk} from an extension, +As with @emph{all} strings passed into @command{gawk} from an extension, the string value of @code{index} must come from @code{gawk_malloc()}, @code{gawk_calloc()} or @code{gawk_realloc()}, and @command{gawk} releases the storage. @@ -34813,6 +34832,11 @@ The @code{isarray()} function to check if a variable is an array or not The @code{bindtextdomain()}, @code{dcgettext()} and @code{dcngettext()} functions for internationalization (@pxref{Programmer i18n}). + +@item +The @code{div()} function for doing integer +division and remainder +(@pxref{Numeric Functions}). @end itemize @item @@ -34946,8 +34970,14 @@ Ultrix @end itemize @item -@c FIXME: Verify the version here. -Support for MirBSD was removed at @command{gawk} @value{PVERSION} 4.2. +Support for the following systems was removed from the code +for @command{gawk} @value{PVERSION} 4.2: + +@c nested table +@itemize @value{MINUS} +@item +MirBSD +@end itemize @end itemize @@ -35561,6 +35591,40 @@ with a minimum of two The dynamic extension interface was completely redone (@pxref{Dynamic Extensions}). +@item +Support for Ultrix was removed. + +@end itemize + +Version 4.2 introduced the following changes: + +@itemize @bullet +@item +Changes to @code{ENVIRON} are reflected into @command{gawk}'s +environment and that of programs that it runs. +@xref{Auto-set}. + +@item +The @option{--pretty-print} option no longer runs the @command{awk} +program too. +@xref{Options}. + +@item +The @command{igawk} program and its manual page are no longer +installed when @command{gawk} is built. +@xref{Igawk Program}. + +@item +The @code{div()} function. +@xref{Numeric Functions}. + +@item +The maximum number of hexdecimal digits in @samp{\x} escapes +is now two. +@xref{Escape Sequences}. + +@item +Support for MirBSD was removed. @end itemize @c XXX ADD MORE STUFF HERE @@ -36208,10 +36272,10 @@ The generated Info file for this @value{DOCUMENT}. @item doc/gawkinet.texi The Texinfo source file for @ifinfo -@inforef{Top, , General Introduction, gawkinet, TCP/IP Internetworking with @command{gawk}}. +@inforef{Top, , General Introduction, gawkinet, @value{GAWKINETTITLE}}. @end ifinfo @ifnotinfo -@cite{TCP/IP Internetworking with @command{gawk}}. +@cite{@value{GAWKINETTITLE}}. @end ifnotinfo It should be processed with @TeX{} (via @command{texi2dvi} or @command{texi2pdf}) @@ -36220,7 +36284,7 @@ with @command{makeinfo} to produce an Info or HTML file. @item doc/gawkinet.info The generated Info file for -@cite{TCP/IP Internetworking with @command{gawk}}. +@cite{@value{GAWKINETTITLE}}. @item doc/igawk.1 The @command{troff} source for a manual page describing the @command{igawk} @@ -36459,7 +36523,7 @@ can be configured and compiled. @cindex @option{--disable-lint} configuration option @cindex configuration option, @code{--disable-lint} @item --disable-lint -Disable all lint checking within @code{gawk}. The +Disable all lint checking within @command{gawk}. The @option{--lint} and @option{--lint-old} options (@pxref{Options}) are accepted, but silently do nothing. @@ -708,6 +708,8 @@ set_IGNORECASE() load_casetable(); if (do_traditional) IGNORECASE = false; + else if ((n->flags & (NUMCUR|NUMBER)) != 0) + IGNORECASE = ! iszero(n); else if ((n->flags & (STRING|STRCUR)) != 0) { if ((n->flags & MAYBE_NUM) == 0) { (void) force_string(n); @@ -716,9 +718,7 @@ set_IGNORECASE() (void) force_number(n); IGNORECASE = ! iszero(n); } - } else if ((n->flags & (NUMCUR|NUMBER)) != 0) - IGNORECASE = ! iszero(n); - else + } else IGNORECASE = false; /* shouldn't happen */ set_RS(); /* set_RS() calls set_FS() if need be, for us */ @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 2012-2014 the Free Software Foundation, Inc. + * Copyright (C) 2012-2015 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-2014 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-2015 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -24,7 +24,7 @@ */ /* FIX THIS BEFORE EVERY RELEASE: */ -#define UPDATE_YEAR 2014 +#define UPDATE_YEAR 2015 #include "awk.h" #include "getopt.h" @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-2013 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-2015 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -625,6 +625,67 @@ load_symbols() unref(array); } +/* check_param_names --- make sure no parameter is the name of a function */ + +bool +check_param_names(void) +{ + int i, j; + NODE **list; + NODE *f; + long max; + bool result = true; + NODE n; + + if (func_table->table_size == 0) + return result; + + max = func_table->table_size * 2; + + memset(& n, sizeof n, 0); + n.type = Node_val; + n.flags = STRING|STRCUR; + n.stfmt = -1; + + /* + * assoc_list() returns an array with two elements per awk array + * element. Elements i and i+1 in the C array represent the key + * and value of element j in the awk array. Thus the loops use += 2 + * to go through the awk array. + * + * In this case, the name is in list[i], and the function is + * in list[i+1]. Just what we need. + */ + + list = assoc_list(func_table, "@unsorted", ASORTI); + + for (i = 0; i < max; i += 2) { + f = list[i+1]; + if (f->type == Node_builtin_func || f->param_cnt == 0) + continue; + + /* loop over each param in function i */ + for (j = 0; j < f->param_cnt; j++) { + /* compare to function names */ + + /* use a fake node to avoid malloc/free of make_string */ + n.stptr = f->fparms[j].param; + n.stlen = strlen(f->fparms[j].param); + + if (in_array(func_table, & n)) { + error( + _("function `%s': can't use function `%s' as a parameter name"), + list[i]->stptr, + f->fparms[j].param); + result = false; + } + } + } + + efree(list); + return result; +} + #define pool_size d.dl #define freei x.xi static INSTRUCTION *pool_list; diff --git a/test/ChangeLog b/test/ChangeLog index 19105027..e9d5620a 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,16 @@ +2015-02-01 Arnold D. Robbins <arnold@skeeve.com> + + * Makefile.am (paramasfunc1, paramasfunc2): Now need --posix. + * indirectcall.awk: Restore after code change. + +2015-01-30 Arnold D. Robbins <arnold@skeeve.com> + + * Makefile.am (callparam, paramasfunc1, paramasfunc2): New tests. + * callparam.awk, callparam.ok: New files. + * paramasfunc1.awk, paramasfunc1.ok: New files. + * paramasfunc2.awk, paramasfunc2.ok: New files. + * exit.sh, indirectcall.awk: Update after code change. + 2015-01-19 Arnold D. Robbins <arnold@skeeve.com> * Makefile.am (profile8): Actually add the test and the files. diff --git a/test/Makefile.am b/test/Makefile.am index 8f501b56..c4c0b8b3 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -132,6 +132,8 @@ EXTRA_DIST = \ beginfile2.ok \ beginfile2.sh \ binmode1.ok \ + callparam.awk \ + callparam.ok \ charasbytes.awk \ charasbytes.in \ charasbytes.ok \ @@ -642,6 +644,10 @@ EXTRA_DIST = \ out1.ok \ out2.ok \ out3.ok \ + paramasfunc1.awk \ + paramasfunc1.ok \ + paramasfunc2.awk \ + paramasfunc2.ok \ paramdup.awk \ paramdup.ok \ paramres.awk \ @@ -987,7 +993,7 @@ BASIC_TESTS = \ arynocls aryprm1 aryprm2 aryprm3 aryprm4 aryprm5 aryprm6 aryprm7 \ aryprm8 arysubnm asgext awkpath \ back89 backgsub badassign1 badbuild \ - childin clobber closebad clsflnam compare compare2 concat1 concat2 \ + callparam childin clobber closebad clsflnam compare compare2 concat1 concat2 \ concat3 concat4 convfmt \ datanonl defref delargv delarpm2 delarprm delfunc dfamb1 dfastress dynlj \ eofsplit exit2 exitval1 exitval2 \ @@ -1005,6 +1011,7 @@ BASIC_TESTS = \ nlinstr nlstrina noeffect nofile nofmtch noloop1 noloop2 nonl \ noparms nors nulrsend numindex numsubstr \ octsub ofmt ofmta ofmtbig ofmtfidl ofmts ofs1 onlynl opasnidx opasnslf \ + paramasfunc1 paramasfunc2 \ paramdup paramres paramtyp paramuninitglobal parse1 parsefld parseme \ pcntplus posix2008sub prdupval prec printf0 printf1 prmarscl prmreuse \ prt1eval prtoeval \ @@ -2019,6 +2026,15 @@ genpot: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk --gen-pot >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +paramasfunc1:: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk --posix >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + +paramasfunc2:: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk --posix >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ # Targets generated for other tests: include Maketests diff --git a/test/Makefile.in b/test/Makefile.in index 4660de76..212cb779 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -389,6 +389,8 @@ EXTRA_DIST = \ beginfile2.ok \ beginfile2.sh \ binmode1.ok \ + callparam.awk \ + callparam.ok \ charasbytes.awk \ charasbytes.in \ charasbytes.ok \ @@ -899,6 +901,10 @@ EXTRA_DIST = \ out1.ok \ out2.ok \ out3.ok \ + paramasfunc1.awk \ + paramasfunc1.ok \ + paramasfunc2.awk \ + paramasfunc2.ok \ paramdup.awk \ paramdup.ok \ paramres.awk \ @@ -1243,7 +1249,7 @@ BASIC_TESTS = \ arynocls aryprm1 aryprm2 aryprm3 aryprm4 aryprm5 aryprm6 aryprm7 \ aryprm8 arysubnm asgext awkpath \ back89 backgsub badassign1 badbuild \ - childin clobber closebad clsflnam compare compare2 concat1 concat2 \ + callparam childin clobber closebad clsflnam compare compare2 concat1 concat2 \ concat3 concat4 convfmt \ datanonl defref delargv delarpm2 delarprm delfunc dfamb1 dfastress dynlj \ eofsplit exit2 exitval1 exitval2 \ @@ -1261,6 +1267,7 @@ BASIC_TESTS = \ nlinstr nlstrina noeffect nofile nofmtch noloop1 noloop2 nonl \ noparms nors nulrsend numindex numsubstr \ octsub ofmt ofmta ofmtbig ofmtfidl ofmts ofs1 onlynl opasnidx opasnslf \ + paramasfunc1 paramasfunc2 \ paramdup paramres paramtyp paramuninitglobal parse1 parsefld parseme \ pcntplus posix2008sub prdupval prec printf0 printf1 prmarscl prmreuse \ prt1eval prtoeval \ @@ -2454,6 +2461,16 @@ genpot: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk --gen-pot >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + +paramasfunc1:: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk --posix >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + +paramasfunc2:: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk --posix >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ Gt-dummy: # file Maketests, generated from Makefile.am by the Gentests program addcomma: @@ -2586,6 +2603,11 @@ badbuild: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +callparam: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + childin: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/Maketests b/test/Maketests index 5c4c40f9..f3639b0f 100644 --- a/test/Maketests +++ b/test/Maketests @@ -130,6 +130,11 @@ badbuild: @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +callparam: + @echo $@ + @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + childin: @echo $@ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/callparam.awk b/test/callparam.awk new file mode 100644 index 00000000..b925db01 --- /dev/null +++ b/test/callparam.awk @@ -0,0 +1,6 @@ +BEGIN { f() } + +function f( a, b) +{ + a = b() +} diff --git a/test/callparam.ok b/test/callparam.ok new file mode 100644 index 00000000..00a027e8 --- /dev/null +++ b/test/callparam.ok @@ -0,0 +1,2 @@ +gawk: callparam.awk:5: error: attempt to use non-function `b' in function call +EXIT CODE: 1 diff --git a/test/exit.sh b/test/exit.sh index 9510dcdc..3922f83c 100755 --- a/test/exit.sh +++ b/test/exit.sh @@ -30,7 +30,7 @@ x='function f(){ exit}; BEGINFILE {f()}; NR>1{ f()}; END{print NR}' $AWK 'BEGIN { print "a\nb" }' | $AWK "$x" echo "-- 5" -y='function strip(f) { sub(/.*\//, "", f); return f };' +y='function strip(val) { sub(/.*\//, "", val); return val };' x='BEGINFILE{if(++i==1) exit;}; END{print i, strip(FILENAME)}' $AWK "$y$x" /dev/null $0 diff --git a/test/paramasfunc1.awk b/test/paramasfunc1.awk new file mode 100644 index 00000000..b0d06849 --- /dev/null +++ b/test/paramasfunc1.awk @@ -0,0 +1,9 @@ +BEGIN{ X() } + +function X( abc) +{ + abc = "stamp out " + print abc abc() +} + +function abc() { return "dark corners" } diff --git a/test/paramasfunc1.ok b/test/paramasfunc1.ok new file mode 100644 index 00000000..9ee95116 --- /dev/null +++ b/test/paramasfunc1.ok @@ -0,0 +1,3 @@ +gawk: paramasfunc1.awk:6: error: attempt to use non-function `abc' in function call +gawk: error: function `X': can't use function `abc' as a parameter name +EXIT CODE: 1 diff --git a/test/paramasfunc2.awk b/test/paramasfunc2.awk new file mode 100644 index 00000000..849b3d1b --- /dev/null +++ b/test/paramasfunc2.awk @@ -0,0 +1,10 @@ +BEGIN{ X() } + +function abc() { return "dark corners" } + +function X( abc) +{ + abc = "stamp out " + print abc abc() +} + diff --git a/test/paramasfunc2.ok b/test/paramasfunc2.ok new file mode 100644 index 00000000..2cdf4f66 --- /dev/null +++ b/test/paramasfunc2.ok @@ -0,0 +1,3 @@ +gawk: paramasfunc2.awk:8: error: attempt to use non-function `abc' in function call +gawk: error: function `X': can't use function `abc' as a parameter name +EXIT CODE: 1 |