diff options
-rw-r--r-- | ChangeLog | 16 | ||||
-rw-r--r-- | array.c | 276 | ||||
-rw-r--r-- | awk.h | 2 | ||||
-rw-r--r-- | debug.c | 2 | ||||
-rw-r--r-- | doc/ChangeLog | 5 | ||||
-rw-r--r-- | doc/gawk.info | 2483 | ||||
-rw-r--r-- | doc/gawk.texi | 1710 | ||||
-rw-r--r-- | eval.c | 9 | ||||
-rw-r--r-- | test/ChangeLog | 7 | ||||
-rw-r--r-- | test/arraysort.awk | 20 | ||||
-rw-r--r-- | test/arraysort.ok | 8 | ||||
-rw-r--r-- | test/sort1.awk | 46 | ||||
-rw-r--r-- | test/sort1.ok | 146 | ||||
-rw-r--r-- | test/sortfor.awk | 4 |
14 files changed, 2508 insertions, 2226 deletions
@@ -1,3 +1,19 @@ +Wed May 4 23:31:14 2011 Arnold D. Robbins <arnold@skeeve.com> + + Move array sorting to using predefined strings, add value sorting + by type of assignment. + + * array.c (sort_up_value_type, sort_down_value_type): New routines. + (asort_actual): Pass string value to assoc_list, not NODE *. + Make sure indices of new arrays have numeric value set also. + (sort_up_value_number): Don't break the tie based on string value. + (sort_selection): Removed. + (assoc_list): Third arg is string constant. Add name to table of + functions. Linear search it. + * awk.h (assoc_list): Fix declaration. + * debug.c (print_array): And use of assoc_list. + * eval.c (r_interpret): Ditto. + Wed May 4 23:06:17 2011 John Haque <j.eh@mchsi.com> * eval.c (setup_frame): Handle a Node_var in stack. Fixes @@ -63,6 +63,8 @@ static int sort_up_value_string(const void *, const void *); static int sort_down_value_string(const void *, const void *); static int sort_up_value_number(const void *, const void *); static int sort_down_value_number(const void *, const void *); +static int sort_up_value_type(const void *, const void *); +static int sort_down_value_type(const void *, const void *); /* array_init --- check relevant environment variables */ @@ -1030,16 +1032,27 @@ static NODE * asort_actual(int nargs, SORT_CTXT ctxt) { NODE *array, *dest = NULL, *result; - NODE *r, *subs, *sort_str; + NODE *r, *subs, *s; NODE **list, **ptr; #define TSIZE 100 /* an arbitrary amount */ static char buf[TSIZE+2]; unsigned long num_elems, i; + const char *sort_str; if (nargs == 3) /* 3rd optional arg */ - sort_str = POP_STRING(); + s = POP_STRING(); else - sort_str = Nnull_string; /* "" => default sorting */ + s = Nnull_string; /* "" => default sorting */ + + s = force_string(s); + sort_str = s->stptr; + if (s->stlen == 0) { /* default sorting */ + if (ctxt == ASORT) + sort_str = "@val_type_asc"; + else + sort_str = "@ind_str_asc"; + } + if (nargs >= 2) { /* 2nd optional arg */ dest = POP_PARAM(); @@ -1081,7 +1094,7 @@ asort_actual(int nargs, SORT_CTXT ctxt) /* sorting happens inside assoc_list */ list = assoc_list(array, sort_str, ctxt); - DEREF(sort_str); + DEREF(s); /* * Must not assoc_clear() the source array before constructing @@ -1106,12 +1119,15 @@ asort_actual(int nargs, SORT_CTXT ctxt) for (i = 1, ptr = list; i <= num_elems; i++) { sprintf(buf, "%lu", i); subs->stlen = strlen(buf); + /* make number valid in case this array gets sorted later */ + subs->numbr = i; + subs->flags |= NUMCUR; r = *ptr++; if (ctxt == ASORTI) { - /* We want the indices of the source array as values + /* + * We want the indices of the source array as values * of the 'result' array. */ - *assoc_lookup(result, subs, FALSE) = make_string(r->ahname_str, r->ahname_len); } else { @@ -1367,15 +1383,7 @@ sort_up_value_number(const void *p1, const void *p2) else ret = (n1->numbr > n2->numbr); - if (ret != 0) - return ret; - - /* break a tie using string comparison. First, make sure both - * n1 and n2 have string values. - */ - (void) force_string(n1); - (void) force_string(n2); - return cmp_string(n1, n2); + return ret; } /* sort_down_value_number --- descending value number */ @@ -1386,6 +1394,67 @@ sort_down_value_number(const void *p1, const void *p2) return -sort_up_value_number(p1, p2); } +/* sort_up_value_type --- qsort comparison function; ascending value type */ + +static int +sort_up_value_type(const void *p1, const void *p2) +{ + const NODE *t1, *t2; + NODE *n1, *n2; + int ret; + + /* we're passed a pair of index (array subscript) nodes */ + t1 = *(const NODE *const *) p1; + t2 = *(const NODE *const *) p2; + + /* and we want to compare the element values they refer to */ + n1 = t1->ahvalue; + n2 = t2->ahvalue; + + /* 1. Arrays vs. scalar, scalar is less than array */ + if (n1->type == Node_var_array) { + /* return 0 if n2 is a sub-array too, else return 1 */ + return (n2->type != Node_var_array); + } + if (n2->type == Node_var_array) { + return -1; /* n1 (scalar) < n2 (sub-array) */ + } + + /* two scalars */ + /* 2. Resolve MAYBE_NUM, so that have only NUMBER or STRING */ + if ((n1->flags & MAYBE_NUM) != 0) + (void) force_number(n1); + if ((n2->flags & MAYBE_NUM) != 0) + (void) force_number(n2); + + if ((n1->flags & NUMBER) != 0 && (n2->flags & NUMBER) != 0) { + if (n1->numbr < n2->numbr) + return -1; + else if (n1->numbr > n2->numbr) + return 1; + else + return 0; + } + + /* 3. All numbers are less than all strings. This is aribitrary. */ + if ((n1->flags & NUMBER) != 0 && (n2->flags & STRING) != 0) { + return -1; + } else if ((n1->flags & STRING) != 0 && (n2->flags & NUMBER) != 0) { + return 1; + } + + /* 4. Two strings */ + return cmp_string(n1, n2); +} + +/* sort_down_value_type --- descending value type */ + +static int +sort_down_value_type(const void *p1, const void *p2) +{ + return -sort_up_value_type(p1, p2); +} + /* sort_user_func --- user defined qsort comparison function */ static int @@ -1429,134 +1498,6 @@ sort_user_func(const void *p1, const void *p2) return (ret < 0.0) ? -1 : (ret > 0.0); } - -/* - * sort_selection --- parse user-specified sort specification; - * returns an index into sort_funcs table located in assoc_list(). - */ - -static int -sort_selection(NODE *sort_str, SORT_CTXT sort_ctxt) -{ - /* first 4 bits used to calculate index into sort_funcs table in assoc_list(), - * next 4 bits for grouping individual components. Note that "Unsorted" - * belongs to all the groups. - */ - - enum sort_bits { - Unrecognized = 0xFF, /* not part of a sort phrase */ - Unsorted = 0xF8, - Ascending = 0x40, - Descending = 0x44, - by_Index = 0x20, - by_Value = 0x22, - as_String = 0x10, - as_Number = 0x11 - }; - -#define INDEX_MASK 0x0F -#define GROUP_MASK 0xF0 - - /* - * The documented values are singular, but it's trivial to accept - * "index numbers" and "descending values" since we're using a - * prefix match. Latin plural of index is the only complication. - */ - static const struct sort_keys { - const char *const keyword; - const size_t keyword_len; - enum sort_bits keybit; - } sorts[] = { - { "unsorted", 8, Unsorted }, - { "ascending", 9, Ascending }, /* ascending vs descending */ - { "descending", 10, Descending }, - { "indexes", 7, by_Index }, /* by_Index vs by_Value */ - { "indices", 7, by_Index }, /* synonym for plural match */ - { "values", 6, by_Value }, - { "strings", 7, as_String }, /* as_String vs as_Number */ - { "numbers", 7, as_Number }, - { "numeric", 7, as_Number }, /* synonym; singular only */ - }; - static int num_keys = sizeof(sorts) / sizeof(sorts[0]); - int num_words, i; - char *word, *s; - size_t word_len; - enum sort_bits allparts, bval; - - if (sort_str == NULL) { - assert(sort_ctxt == SORTED_IN); - return (Unsorted & INDEX_MASK); /* no sorting */ - } - - (void) force_string(sort_str); - sort_str->stptr[sort_str->stlen] = '\0'; /* safety */ - - /* Initialize with the context-sensitive defaults; Note that group-bits aren't - * copied, doing so will prevent the user from explicitely specifying the defaults. - */ - - if (sort_ctxt == ASORT) - allparts = (Ascending|by_Value|as_String) & INDEX_MASK; - else - allparts = (Ascending|by_Index|as_String) & INDEX_MASK; - - num_words = 0; - - for (s = sort_str->stptr; *s != '\0'; ) { - /* skip leading spaces */ - while (*s == ' ') - s++; - if (*s == '\0') - break; - word = s; - /* find the end of the word */ - while (*s && *s != ' ') - s++; - word_len = (size_t) (s - word); - - if (++num_words > 3) /* too many words in phrase */ - return -1; - - bval = Unrecognized; - for (i = 0; i < num_keys; i++) { - if (word_len <= sorts[i].keyword_len - && strncasecmp(sorts[i].keyword, word, word_len) == 0) { - bval = sorts[i].keybit; - break; - } - } - - if (bval == Unrecognized /* invalid word in phrase */ - || (sort_ctxt == ASORT - && (bval == by_Index || bval == Unsorted) - ) /* "index" used in phrase for asort etc. */ - || (sort_ctxt == ASORTI - && (bval == by_Value || bval == Unsorted) - ) /* "value" used in phrase for asorti etc. */ - || ((allparts & bval) & GROUP_MASK - ) /* invalid grouping of words e.g. "str num" */ - ) - return -1; - - allparts |= bval; - } - - if (allparts == Unsorted) { - NODE *f; - /* user-defined function overrides default */ - - if ((f = lookup(sort_str->stptr)) != NULL && f->type == Node_func) - return -1; - } - - /* num_words <= 3 */ - return (allparts & INDEX_MASK); - -#undef INDEX_MASK -#undef GROUP_MASK -} - - /* sort_force_index_number -- pre-process list items for sorting indices as numbers */ static void @@ -1569,7 +1510,7 @@ sort_force_index_number(NODE **list, size_t num_elems) for (i = 0; i < num_elems; i++) { r = list[i]; - if ((r->flags & NUMIND) != 0) /* once in a lifetime is more than enough */ + if ((r->flags & NUMIND) != 0) /* once in a lifetime is plenty */ continue; temp_node.type = Node_val; temp_node.stptr = r->ahname_str; @@ -1615,24 +1556,27 @@ sort_force_value_string(NODE **list, size_t num_elems) /* assoc_list -- construct, and optionally sort, a list of array elements */ NODE ** -assoc_list(NODE *array, NODE *sort_str, SORT_CTXT sort_ctxt) +assoc_list(NODE *array, const char *sort_str, SORT_CTXT sort_ctxt) { typedef void (*qsort_prefunc)(NODE **, size_t); typedef int (*qsort_compfunc)(const void *, const void *); static const struct qsort_funcs { + const char *name; qsort_compfunc comp_func; qsort_prefunc pre_func; /* pre-processing of list items */ } sort_funcs[] = { - { sort_up_index_string, 0 }, /* ascending index string */ - { sort_up_index_number, sort_force_index_number }, /* ascending index number */ - { sort_up_value_string, sort_force_value_string }, /* ascending value string */ - { sort_up_value_number, sort_force_value_number }, /* ascending value number */ - { sort_down_index_string, 0 }, /* descending index string */ - { sort_down_index_number, sort_force_index_number }, /* descending index number */ - { sort_down_value_string, sort_force_value_string }, /* descending value string */ - { sort_down_value_number, sort_force_value_number }, /* descending value number */ - { 0, 0 } /* unsorted */ + { "@ind_str_asc", sort_up_index_string, 0 }, + { "@ind_num_asc", sort_up_index_number, sort_force_index_number }, + { "@val_str_asc", sort_up_value_string, sort_force_value_string }, + { "@val_num_asc", sort_up_value_number, sort_force_value_number }, + { "@ind_str_desc", sort_down_index_string, 0 }, + { "@ind_num_desc", sort_down_index_number, sort_force_index_number }, + { "@val_str_desc", sort_down_value_string, sort_force_value_string }, + { "@val_num_desc", sort_down_value_number, sort_force_value_number }, + { "@val_type_asc", sort_up_value_type, 0 }, + { "@val_type_desc", sort_down_value_type, 0 }, + { "@unsorted", 0, 0 }, }; NODE **list; NODE *r; @@ -1646,30 +1590,32 @@ assoc_list(NODE *array, NODE *sort_str, SORT_CTXT sort_ctxt) num_elems = array->table_size; assert(num_elems > 0); - qi = sort_selection(sort_str, sort_ctxt); + for (qi = 0, j = sizeof(sort_funcs)/sizeof(sort_funcs[0]); qi < j; qi++) { + if (strcmp(sort_funcs[qi].name, sort_str) == 0) + break; + } - if (qi >= 0) { + if (qi >= 0 && qi < j) { cmp_func = sort_funcs[qi].comp_func; pre_func = sort_funcs[qi].pre_func; } else { /* unrecognized */ NODE *f; - char *sp; + const char *sp; assert(sort_str != NULL); - (void) force_string(sort_str); - for (sp = sort_str->stptr; *sp != '\0' - && ! isspace((unsigned char) *sp); sp++) - ; + for (sp = sort_str; *sp != '\0' + && ! isspace((unsigned char) *sp); sp++) + continue; /* empty string or string with space(s) not valid as function name */ - if (sp == sort_str->stptr || *sp != '\0') - fatal(_("`%s' is invalid as a function name"), sort_str->stptr); + if (sp == sort_str || *sp != '\0') + fatal(_("`%s' is invalid as a function name"), sort_str); - f = lookup(sort_str->stptr); + f = lookup(sort_str); if (f == NULL || f->type != Node_func) - fatal(_("sort comparison function `%s' is not defined"), sort_str->stptr); + fatal(_("sort comparison function `%s' is not defined"), sort_str); cmp_func = sort_user_func; /* pre_func is still NULL */ @@ -1110,7 +1110,7 @@ typedef int (*Func_print)(FILE *, const char *, ...); /* array.c */ typedef enum sort_context { SORTED_IN = 1, ASORT, ASORTI } SORT_CTXT; -extern NODE **assoc_list(NODE *array, NODE *sort_str, SORT_CTXT sort_ctxt); +extern NODE **assoc_list(NODE *array, const char *sort_str, SORT_CTXT sort_ctxt); extern NODE *get_array(NODE *symbol, int canfatal); extern char *array_vname(const NODE *symbol); extern char *make_aname(NODE *array, NODE *subs); @@ -1074,7 +1074,7 @@ print_array(volatile NODE *arr, char *arr_name) num_elems = arr->table_size; /* sort indices, sub_arrays are also sorted! */ - list = assoc_list((NODE *) arr, Nnull_string, SORTED_IN); + list = assoc_list((NODE *) arr, "@unsorted", SORTED_IN); PUSH_BINDING(pager_quit_tag_stack, pager_quit_tag, pager_quit_tag_valid); if (setjmp(pager_quit_tag) == 0) { diff --git a/doc/ChangeLog b/doc/ChangeLog index d6581376..2ac2cce8 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,8 @@ +Wed May 4 23:39:09 2011 Arnold D. Robbins <arnold@skeeve.com> + + * gawk.texi: Start at revamping array sorting doc. Still + needs work. + Wed Apr 27 21:49:23 2011 Arnold D. Robbins <arnold@skeeve.com> * gawk.1: Minor edit. diff --git a/doc/gawk.info b/doc/gawk.info index f5f3413c..e34309cb 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -109,394 +109,437 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) * GNU Free Documentation License:: The license for this Info file. * Index:: Concept and Variable Index. -* History:: The history of `gawk' and - `awk'. -* Names:: What name to use to find `awk'. -* This Manual:: Using this Info file. Includes - sample input files that you can use. -* Conventions:: Typographical Conventions. -* Manual History:: Brief history of the GNU project and this - Info file. -* How To Contribute:: Helping to save the world. -* Acknowledgments:: Acknowledgments. -* Running gawk:: How to run `gawk' programs; - includes command-line syntax. -* One-shot:: Running a short throwaway `awk' - program. -* Read Terminal:: Using no input files (input from terminal - instead). -* Long:: Putting permanent `awk' programs in - files. -* Executable Scripts:: Making self-contained `awk' - programs. -* Comments:: Adding documentation to `gawk' - programs. -* Quoting:: More discussion of shell quoting issues. -* DOS Quoting:: Quoting in Windows Batch Files. -* Sample Data Files:: Sample data files for use in the - `awk' programs illustrated in this - Info file. -* Very Simple:: A very simple example. -* Two Rules:: A less simple one-line example using two - rules. -* More Complex:: A more complex example. -* Statements/Lines:: Subdividing or combining statements into - lines. -* Other Features:: Other Features of `awk'. -* When:: When to use `gawk' and when to use - other things. -* Command Line:: How to run `awk'. -* Options:: Command-line options and their meanings. -* Other Arguments:: Input file names and variable assignments. -* Naming Standard Input:: How to specify standard input with other - files. -* Environment Variables:: The environment variables `gawk' - uses. -* AWKPATH Variable:: Searching directories for `awk' - programs. -* Other Environment Variables:: The environment variables. -* Exit Status:: `gawk''s exit status. -* Include Files:: Including other files into your program. -* Obsolete:: Obsolete Options and/or features. -* Undocumented:: Undocumented Options and Features. -* Regexp Usage:: How to Use Regular Expressions. -* Escape Sequences:: How to write nonprinting characters. -* Regexp Operators:: Regular Expression Operators. -* Bracket Expressions:: What can go between `[...]'. -* GNU Regexp Operators:: Operators specific to GNU software. -* Case-sensitivity:: How to do case-insensitive matching. -* Leftmost Longest:: How much text matches. -* Computed Regexps:: Using Dynamic Regexps. -* Locales:: How the locale affects things. -* Records:: Controlling how data is split into records. -* Fields:: An introduction to fields. -* Nonconstant Fields:: Nonconstant Field Numbers. -* Changing Fields:: Changing the Contents of a Field. -* Field Separators:: The field separator and how to change it. -* Default Field Splitting:: How fields are normally separated. -* Regexp Field Splitting:: Using regexps as the field separator. -* Single Character Fields:: Making each character a separate field. -* Command Line Field Separator:: Setting `FS' from the command-line. -* Field Splitting Summary:: Some final points and a summary table. -* Constant Size:: Reading constant width data. -* Splitting By Content:: Defining Fields By Content -* Multiple Line:: Reading multi-line records. -* Getline:: Reading files under explicit program - control using the `getline' function. -* Plain Getline:: Using `getline' with no arguments. -* Getline/Variable:: Using `getline' into a variable. -* Getline/File:: Using `getline' from a file. -* Getline/Variable/File:: Using `getline' into a variable from a - file. -* Getline/Pipe:: Using `getline' from a pipe. -* Getline/Variable/Pipe:: Using `getline' into a variable from a - pipe. -* Getline/Coprocess:: Using `getline' from a coprocess. -* Getline/Variable/Coprocess:: Using `getline' into a variable from a - coprocess. -* Getline Notes:: Important things to know about - `getline'. -* Getline Summary:: Summary of `getline' Variants. -* Command line directories:: What happens if you put a directory on the - command line. -* Print:: The `print' statement. -* Print Examples:: Simple examples of `print' statements. -* Output Separators:: The output separators and how to change - them. -* OFMT:: Controlling Numeric Output With - `print'. -* Printf:: The `printf' statement. -* Basic Printf:: Syntax of the `printf' statement. -* Control Letters:: Format-control letters. -* Format Modifiers:: Format-specification modifiers. -* Printf Examples:: Several examples. -* Redirection:: How to redirect output to multiple files - and pipes. -* Special Files:: File name interpretation in `gawk'. - `gawk' allows access to inherited - file descriptors. -* Special FD:: Special files for I/O. -* Special Network:: Special files for network communications. -* Special Caveats:: Things to watch out for. -* Close Files And Pipes:: Closing Input and Output Files and Pipes. -* Values:: Constants, Variables, and Regular - Expressions. -* Constants:: String, numeric and regexp constants. -* Scalar Constants:: Numeric and string constants. -* Nondecimal-numbers:: What are octal and hex numbers. -* Regexp Constants:: Regular Expression constants. -* Using Constant Regexps:: When and how to use a regexp constant. -* Variables:: Variables give names to values for later - use. -* Using Variables:: Using variables in your programs. -* Assignment Options:: Setting variables on the command-line and a - summary of command-line syntax. This is an - advanced method of input. -* Conversion:: The conversion of strings to numbers and - vice versa. -* All Operators:: `gawk''s operators. -* Arithmetic Ops:: Arithmetic operations (`+', `-', - etc.) -* Concatenation:: Concatenating strings. -* Assignment Ops:: Changing the value of a variable or a - field. -* Increment Ops:: Incrementing the numeric value of a - variable. -* Truth Values and Conditions:: Testing for true and false. -* Truth Values:: What is ``true'' and what is ``false''. -* Typing and Comparison:: How variables acquire types and how this - affects comparison of numbers and strings - with `<', etc. -* Variable Typing:: String type versus numeric type. -* Comparison Operators:: The comparison operators. -* POSIX String Comparison:: String comparison with POSIX rules. -* Boolean Ops:: Combining comparison expressions using - boolean operators `||' (``or''), - `&&' (``and'') and `!' (``not''). -* Conditional Exp:: Conditional expressions select between two - subexpressions under control of a third - subexpression. -* Function Calls:: A function call is an expression. -* Precedence:: How various operators nest. -* Pattern Overview:: What goes into a pattern. -* Regexp Patterns:: Using regexps as patterns. -* Expression Patterns:: Any expression can be used as a pattern. -* Ranges:: Pairs of patterns specify record ranges. -* BEGIN/END:: Specifying initialization and cleanup - rules. -* Using BEGIN/END:: How and why to use BEGIN/END rules. -* I/O And BEGIN/END:: I/O issues in BEGIN/END rules. -* Empty:: The empty pattern, which matches every - record. -* BEGINFILE/ENDFILE:: Two special patterns for advanced control. -* Using Shell Variables:: How to use shell variables with - `awk'. -* Action Overview:: What goes into an action. -* Statements:: Describes the various control statements in - detail. -* If Statement:: Conditionally execute some `awk' - statements. -* While Statement:: Loop until some condition is satisfied. -* Do Statement:: Do specified action while looping until - some condition is satisfied. -* For Statement:: Another looping statement, that provides - initialization and increment clauses. -* Switch Statement:: Switch/case evaluation for conditional - execution of statements based on a value. -* Break Statement:: Immediately exit the innermost enclosing - loop. -* Continue Statement:: Skip to the end of the innermost enclosing - loop. -* Next Statement:: Stop processing the current input record. -* Nextfile Statement:: Stop processing the current file. -* Exit Statement:: Stop execution of `awk'. -* Built-in Variables:: Summarizes the built-in variables. -* User-modified:: Built-in variables that you change to - control `awk'. -* Auto-set:: Built-in variables where `awk' - gives you information. -* ARGC and ARGV:: Ways to use `ARGC' and `ARGV'. -* Array Basics:: The basics of arrays. -* Array Intro:: Introduction to Arrays -* Reference to Elements:: How to examine one element of an array. -* Assigning Elements:: How to change an element of an array. -* Array Example:: Basic Example of an Array -* Scanning an Array:: A variation of the `for' statement. It - loops through the indices of an array's - existing elements. -* Controlling Scanning:: Controlling the order in which arrays - are scanned. -* Delete:: The `delete' statement removes an - element from an array. -* Numeric Array Subscripts:: How to use numbers as subscripts in - `awk'. -* Uninitialized Subscripts:: Using Uninitialized variables as - subscripts. -* Multi-dimensional:: Emulating multidimensional arrays in - `awk'. -* Multi-scanning:: Scanning multidimensional arrays. -* Array Sorting:: Sorting array values and indices. -* Arrays of Arrays:: True multidimensional arrays. -* Built-in:: Summarizes the built-in functions. -* Calling Built-in:: How to call built-in functions. -* Numeric Functions:: Functions that work with numbers, including - `int()', `sin()' and - `rand()'. -* String Functions:: Functions for string manipulation, such as - `split()', `match()' and - `sprintf()'. -* Gory Details:: More than you want to know about `\' - and `&' with `sub()', - `gsub()', and `gensub()'. -* I/O Functions:: Functions for files and shell commands. -* Time Functions:: Functions for dealing with timestamps. -* Bitwise Functions:: Functions for bitwise operations. -* Type Functions:: Functions for type information. -* I18N Functions:: Functions for string translation. -* User-defined:: Describes User-defined functions in detail. -* Definition Syntax:: How to write definitions and what they - mean. -* Function Example:: An example function definition and what it - does. -* Function Caveats:: Things to watch out for. -* Calling A Function:: Don't use spaces. -* Variable Scope:: Controlling variable scope. -* Pass By Value/Reference:: Passing parameters. -* Return Statement:: Specifying the value a function returns. -* Dynamic Typing:: How variable types can change at runtime. -* Indirect Calls:: Choosing the function to call at runtime. -* I18N and L10N:: Internationalization and Localization. -* Explaining gettext:: How GNU `gettext' works. -* Programmer i18n:: Features for the programmer. -* Translator i18n:: Features for the translator. -* String Extraction:: Extracting marked strings. -* Printf Ordering:: Rearranging `printf' arguments. -* I18N Portability:: `awk'-level portability issues. -* I18N Example:: A simple i18n example. -* Gawk I18N:: `gawk' is also internationalized. -* Nondecimal Data:: Allowing nondecimal input data. -* Two-way I/O:: Two-way communications with another - process. -* TCP/IP Networking:: Using `gawk' for network - programming. -* Profiling:: Profiling your `awk' programs. -* Library Names:: How to best name private global variables - in library functions. -* General Functions:: Functions that are of general use. -* Strtonum Function:: A replacement for the built-in - `strtonum()' function. -* Assert Function:: A function for assertions in `awk' - programs. -* Round Function:: A function for rounding if `sprintf()' - does not do it correctly. -* Cliff Random Function:: The Cliff Random Number Generator. -* Ordinal Functions:: Functions for using characters as numbers - and vice versa. -* Join Function:: A function to join an array into a string. -* Gettimeofday Function:: A function to get formatted times. -* Data File Management:: Functions for managing command-line data - files. -* Filetrans Function:: A function for handling data file - transitions. -* Rewind Function:: A function for rereading the current file. -* File Checking:: Checking that data files are readable. -* Empty Files:: Checking for zero-length files. -* Ignoring Assigns:: Treating assignments as file names. -* Getopt Function:: A function for processing command-line - arguments. -* Passwd Functions:: Functions for getting user information. -* Group Functions:: Functions for getting group information. -* Running Examples:: How to run these examples. -* Clones:: Clones of common utilities. -* Cut Program:: The `cut' utility. -* Egrep Program:: The `egrep' utility. -* Id Program:: The `id' utility. -* Split Program:: The `split' utility. -* Tee Program:: The `tee' utility. -* Uniq Program:: The `uniq' utility. -* Wc Program:: The `wc' utility. -* Miscellaneous Programs:: Some interesting `awk' programs. -* Dupword Program:: Finding duplicated words in a document. -* Alarm Program:: An alarm clock. -* Translate Program:: A program similar to the `tr' - utility. -* Labels Program:: Printing mailing labels. -* Word Sorting:: A program to produce a word usage count. -* History Sorting:: Eliminating duplicate entries from a - history file. -* Extract Program:: Pulling out programs from Texinfo source - files. -* Simple Sed:: A Simple Stream Editor. -* Igawk Program:: A wrapper for `awk' that includes - files. -* Anagram Program:: Finding anagrams from a dictionary. -* Signature Program:: People do amazing things with too much time - on their hands. -* Debugging:: Introduction to `dgawk'. -* Debugging Concepts:: Debugging In General. -* Debugging Terms:: Additional Debugging Concepts. -* Awk Debugging:: Awk Debugging. -* Sample dgawk session:: Sample `dgawk' session. -* dgawk invocation:: `dgawk' Invocation. -* Finding The Bug:: Finding The Bug. -* List of Debugger Commands:: Main `dgawk' Commands. -* Breakpoint Control:: Control of breakpoints. -* Dgawk Execution Control:: Control of execution. -* Viewing And Changing Data:: Viewing and changing data. -* Dgawk Stack:: Dealing with the stack. -* Dgawk Info:: Obtaining information about the program and - the debugger state. -* Miscellaneous Dgawk Commands:: Miscellaneous Commands. -* Readline Support:: Readline Support. -* Dgawk Limitations:: Limitations and future plans. -* V7/SVR3.1:: The major changes between V7 and System V - Release 3.1. -* SVR4:: Minor changes between System V Releases 3.1 - and 4. -* POSIX:: New features from the POSIX standard. -* BTL:: New features from Brian Kernighan's - version of `awk'. -* POSIX/GNU:: The extensions in `gawk' not in - POSIX `awk'. -* Contributors:: The major contributors to `gawk'. -* Common Extensions:: Common Extensions Summary. -* Gawk Distribution:: What is in the `gawk' distribution. -* Getting:: How to get the distribution. -* Extracting:: How to extract the distribution. -* Distribution contents:: What is in the distribution. -* Unix Installation:: Installing `gawk' under various - versions of Unix. -* Quick Installation:: Compiling `gawk' under Unix. -* Additional Configuration Options:: Other compile-time options. -* Configuration Philosophy:: How it's all supposed to work. -* Non-Unix Installation:: Installation on Other Operating Systems. -* PC Installation:: Installing and Compiling `gawk' on - MS-DOS and OS/2. -* PC Binary Installation:: Installing a prepared distribution. -* PC Compiling:: Compiling `gawk' for MS-DOS, - Windows32, and OS/2. -* PC Testing:: Testing `gawk' on PC - Operating Systems. -* PC Using:: Running `gawk' on MS-DOS, Windows32 - and OS/2. -* Cygwin:: Building and running `gawk' for - Cygwin. -* MSYS:: Using `gawk' In The MSYS - Environment. -* VMS Installation:: Installing `gawk' on VMS. -* VMS Compilation:: How to compile `gawk' under VMS. -* VMS Installation Details:: How to install `gawk' under VMS. -* VMS Running:: How to run `gawk' under VMS. -* VMS Old Gawk:: An old version comes with some VMS systems. -* Bugs:: Reporting Problems and Bugs. -* Other Versions:: Other freely available `awk' - implementations. -* Compatibility Mode:: How to disable certain `gawk' - extensions. -* Additions:: Making Additions To `gawk'. -* Accessing The Source:: Accessing the Git repository. -* Adding Code:: Adding code to the main body of - `gawk'. -* New Ports:: Porting `gawk' to a new operating - system. -* Dynamic Extensions:: Adding new built-in functions to - `gawk'. -* Internals:: A brief look at some `gawk' - internals. -* Plugin License:: A note about licensing. -* Sample Library:: A example of new functions. -* Internal File Description:: What the new functions will do. -* Internal File Ops:: The code for internal file operations. -* Using Internal File Ops:: How to use an external extension. -* Future Extensions:: New features that may be implemented one - day. -* Basic High Level:: The high level view. -* Basic Data Typing:: A very quick intro to data types. -* Floating Point Issues:: Stuff to know about floating-point numbers. -* String Conversion Precision:: The String Value Can Lie. -* Unexpected Results:: Floating Point Numbers Are Not Abstract - Numbers. -* POSIX Floating Point Problems:: Standards Versus Existing Practice. +* History:: The history of `gawk' and + `awk'. +* Names:: What name to use to find `awk'. +* This Manual:: Using this Info file. Includes + sample input files that you can use. +* Conventions:: Typographical Conventions. +* Manual History:: Brief history of the GNU project and + this Info file. +* How To Contribute:: Helping to save the world. +* Acknowledgments:: Acknowledgments. +* Running gawk:: How to run `gawk' programs; + includes command-line syntax. +* One-shot:: Running a short throwaway `awk' + program. +* Read Terminal:: Using no input files (input from + terminal instead). +* Long:: Putting permanent `awk' + programs in files. +* Executable Scripts:: Making self-contained `awk' + programs. +* Comments:: Adding documentation to `gawk' + programs. +* Quoting:: More discussion of shell quoting + issues. +* DOS Quoting:: Quoting in Windows Batch Files. +* Sample Data Files:: Sample data files for use in the + `awk' programs illustrated in + this Info file. +* Very Simple:: A very simple example. +* Two Rules:: A less simple one-line example using + two rules. +* More Complex:: A more complex example. +* Statements/Lines:: Subdividing or combining statements + into lines. +* Other Features:: Other Features of `awk'. +* When:: When to use `gawk' and when to + use other things. +* Command Line:: How to run `awk'. +* Options:: Command-line options and their + meanings. +* Other Arguments:: Input file names and variable + assignments. +* Naming Standard Input:: How to specify standard input with + other files. +* Environment Variables:: The environment variables + `gawk' uses. +* AWKPATH Variable:: Searching directories for `awk' + programs. +* Other Environment Variables:: The environment variables. +* Exit Status:: `gawk''s exit status. +* Include Files:: Including other files into your + program. +* Obsolete:: Obsolete Options and/or features. +* Undocumented:: Undocumented Options and Features. +* Regexp Usage:: How to Use Regular Expressions. +* Escape Sequences:: How to write nonprinting characters. +* Regexp Operators:: Regular Expression Operators. +* Bracket Expressions:: What can go between `[...]'. +* GNU Regexp Operators:: Operators specific to GNU software. +* Case-sensitivity:: How to do case-insensitive matching. +* Leftmost Longest:: How much text matches. +* Computed Regexps:: Using Dynamic Regexps. +* Locales:: How the locale affects things. +* Records:: Controlling how data is split into + records. +* Fields:: An introduction to fields. +* Nonconstant Fields:: Nonconstant Field Numbers. +* Changing Fields:: Changing the Contents of a Field. +* Field Separators:: The field separator and how to change + it. +* Default Field Splitting:: How fields are normally separated. +* Regexp Field Splitting:: Using regexps as the field separator. +* Single Character Fields:: Making each character a separate field. +* Command Line Field Separator:: Setting `FS' from the + command-line. +* Field Splitting Summary:: Some final points and a summary table. +* Constant Size:: Reading constant width data. +* Splitting By Content:: Defining Fields By Content +* Multiple Line:: Reading multi-line records. +* Getline:: Reading files under explicit program + control using the `getline' + function. +* Plain Getline:: Using `getline' with no arguments. +* Getline/Variable:: Using `getline' into a variable. +* Getline/File:: Using `getline' from a file. +* Getline/Variable/File:: Using `getline' into a variable + from a file. +* Getline/Pipe:: Using `getline' from a pipe. +* Getline/Variable/Pipe:: Using `getline' into a variable + from a pipe. +* Getline/Coprocess:: Using `getline' from a coprocess. +* Getline/Variable/Coprocess:: Using `getline' into a variable + from a coprocess. +* Getline Notes:: Important things to know about + `getline'. +* Getline Summary:: Summary of `getline' Variants. +* Command line directories:: What happens if you put a directory on + the command line. +* Print:: The `print' statement. +* Print Examples:: Simple examples of `print' + statements. +* Output Separators:: The output separators and how to change + them. +* OFMT:: Controlling Numeric Output With + `print'. +* Printf:: The `printf' statement. +* Basic Printf:: Syntax of the `printf' statement. +* Control Letters:: Format-control letters. +* Format Modifiers:: Format-specification modifiers. +* Printf Examples:: Several examples. +* Redirection:: How to redirect output to multiple + files and pipes. +* Special Files:: File name interpretation in + `gawk'. `gawk' allows + access to inherited file descriptors. +* Special FD:: Special files for I/O. +* Special Network:: Special files for network + communications. +* Special Caveats:: Things to watch out for. +* Close Files And Pipes:: Closing Input and Output Files and + Pipes. +* Values:: Constants, Variables, and Regular + Expressions. +* Constants:: String, numeric and regexp constants. +* Scalar Constants:: Numeric and string constants. +* Nondecimal-numbers:: What are octal and hex numbers. +* Regexp Constants:: Regular Expression constants. +* Using Constant Regexps:: When and how to use a regexp constant. +* Variables:: Variables give names to values for + later use. +* Using Variables:: Using variables in your programs. +* Assignment Options:: Setting variables on the command-line + and a summary of command-line syntax. + This is an advanced method of input. +* Conversion:: The conversion of strings to numbers + and vice versa. +* All Operators:: `gawk''s operators. +* Arithmetic Ops:: Arithmetic operations (`+', + `-', etc.) +* Concatenation:: Concatenating strings. +* Assignment Ops:: Changing the value of a variable or a + field. +* Increment Ops:: Incrementing the numeric value of a + variable. +* Truth Values and Conditions:: Testing for true and false. +* Truth Values:: What is ``true'' and what is ``false''. +* Typing and Comparison:: How variables acquire types and how + this affects comparison of numbers and + strings with `<', etc. +* Variable Typing:: String type versus numeric type. +* Comparison Operators:: The comparison operators. +* POSIX String Comparison:: String comparison with POSIX rules. +* Boolean Ops:: Combining comparison expressions using + boolean operators `||' (``or''), + `&&' (``and'') and `!' + (``not''). +* Conditional Exp:: Conditional expressions select between + two subexpressions under control of a + third subexpression. +* Function Calls:: A function call is an expression. +* Precedence:: How various operators nest. +* Pattern Overview:: What goes into a pattern. +* Regexp Patterns:: Using regexps as patterns. +* Expression Patterns:: Any expression can be used as a + pattern. +* Ranges:: Pairs of patterns specify record + ranges. +* BEGIN/END:: Specifying initialization and cleanup + rules. +* Using BEGIN/END:: How and why to use BEGIN/END rules. +* I/O And BEGIN/END:: I/O issues in BEGIN/END rules. +* BEGINFILE/ENDFILE:: Two special patterns for advanced + control. +* Empty:: The empty pattern, which matches every + record. +* Using Shell Variables:: How to use shell variables with + `awk'. +* Action Overview:: What goes into an action. +* Statements:: Describes the various control + statements in detail. +* If Statement:: Conditionally execute some + `awk' statements. +* While Statement:: Loop until some condition is satisfied. +* Do Statement:: Do specified action while looping until + some condition is satisfied. +* For Statement:: Another looping statement, that + provides initialization and increment + clauses. +* Switch Statement:: Switch/case evaluation for conditional + execution of statements based on a + value. +* Break Statement:: Immediately exit the innermost + enclosing loop. +* Continue Statement:: Skip to the end of the innermost + enclosing loop. +* Next Statement:: Stop processing the current input + record. +* Nextfile Statement:: Stop processing the current file. +* Exit Statement:: Stop execution of `awk'. +* Built-in Variables:: Summarizes the built-in variables. +* User-modified:: Built-in variables that you change to + control `awk'. +* Auto-set:: Built-in variables where `awk' + gives you information. +* ARGC and ARGV:: Ways to use `ARGC' and + `ARGV'. +* Array Basics:: The basics of arrays. +* Array Intro:: Introduction to Arrays +* Reference to Elements:: How to examine one element of an array. +* Assigning Elements:: How to change an element of an array. +* Array Example:: Basic Example of an Array +* Scanning an Array:: A variation of the `for' + statement. It loops through the indices + of an array's existing elements. +* Delete:: The `delete' statement removes an + element from an array. +* Numeric Array Subscripts:: How to use numbers as subscripts in + `awk'. +* Uninitialized Subscripts:: Using Uninitialized variables as + subscripts. +* Multi-dimensional:: Emulating multidimensional arrays in + `awk'. +* Multi-scanning:: Scanning multidimensional arrays. +* Arrays of Arrays:: True multidimensional arrays. +* Built-in:: Summarizes the built-in functions. +* Calling Built-in:: How to call built-in functions. +* Numeric Functions:: Functions that work with numbers, + including `int()', `sin()' + and `rand()'. +* String Functions:: Functions for string manipulation, such + as `split()', `match()' and + `sprintf()'. +* Gory Details:: More than you want to know about + `\' and `&' with + `sub()', `gsub()', and + `gensub()'. +* I/O Functions:: Functions for files and shell commands. +* Time Functions:: Functions for dealing with timestamps. +* Bitwise Functions:: Functions for bitwise operations. +* Type Functions:: Functions for type information. +* I18N Functions:: Functions for string translation. +* User-defined:: Describes User-defined functions in + detail. +* Definition Syntax:: How to write definitions and what they + mean. +* Function Example:: An example function definition and what + it does. +* Function Caveats:: Things to watch out for. +* Calling A Function:: Don't use spaces. +* Variable Scope:: Controlling variable scope. +* Pass By Value/Reference:: Passing parameters. +* Return Statement:: Specifying the value a function + returns. +* Dynamic Typing:: How variable types can change at + runtime. +* Indirect Calls:: Choosing the function to call at + runtime. +* I18N and L10N:: Internationalization and Localization. +* Explaining gettext:: How GNU `gettext' works. +* Programmer i18n:: Features for the programmer. +* Translator i18n:: Features for the translator. +* String Extraction:: Extracting marked strings. +* Printf Ordering:: Rearranging `printf' arguments. +* I18N Portability:: `awk'-level portability issues. +* I18N Example:: A simple i18n example. +* Gawk I18N:: `gawk' is also + internationalized. +* Nondecimal Data:: Allowing nondecimal input data. +* Array Sorting:: Facilities for controlling array + traversal and sorting arrays. +* Controlling Array Traversal:: How to use PROCINFO["sorted_in"]. +* Controlling Scanning With A Function:: Using a function to control scanning. +* Controlling Scanning:: Controlling the order in which arrays + are scanned. +* Array Sorting Functions:: How to use `asort()' and + `asorti()'. +* Two-way I/O:: Two-way communications with another + process. +* TCP/IP Networking:: Using `gawk' for network + programming. +* Profiling:: Profiling your `awk' programs. +* Library Names:: How to best name private global + variables in library functions. +* General Functions:: Functions that are of general use. +* Strtonum Function:: A replacement for the built-in + `strtonum()' function. +* Assert Function:: A function for assertions in + `awk' programs. +* Round Function:: A function for rounding if + `sprintf()' does not do it + correctly. +* Cliff Random Function:: The Cliff Random Number Generator. +* Ordinal Functions:: Functions for using characters as + numbers and vice versa. +* Join Function:: A function to join an array into a + string. +* Gettimeofday Function:: A function to get formatted times. +* Data File Management:: Functions for managing command-line + data files. +* Filetrans Function:: A function for handling data file + transitions. +* Rewind Function:: A function for rereading the current + file. +* File Checking:: Checking that data files are readable. +* Empty Files:: Checking for zero-length files. +* Ignoring Assigns:: Treating assignments as file names. +* Getopt Function:: A function for processing command-line + arguments. +* Passwd Functions:: Functions for getting user information. +* Group Functions:: Functions for getting group + information. +* Walking Arrays:: A function to walk arrays of arrays. +* Running Examples:: How to run these examples. +* Clones:: Clones of common utilities. +* Cut Program:: The `cut' utility. +* Egrep Program:: The `egrep' utility. +* Id Program:: The `id' utility. +* Split Program:: The `split' utility. +* Tee Program:: The `tee' utility. +* Uniq Program:: The `uniq' utility. +* Wc Program:: The `wc' utility. +* Miscellaneous Programs:: Some interesting `awk' + programs. +* Dupword Program:: Finding duplicated words in a document. +* Alarm Program:: An alarm clock. +* Translate Program:: A program similar to the `tr' + utility. +* Labels Program:: Printing mailing labels. +* Word Sorting:: A program to produce a word usage + count. +* History Sorting:: Eliminating duplicate entries from a + history file. +* Extract Program:: Pulling out programs from Texinfo + source files. +* Simple Sed:: A Simple Stream Editor. +* Igawk Program:: A wrapper for `awk' that + includes files. +* Anagram Program:: Finding anagrams from a dictionary. +* Signature Program:: People do amazing things with too much + time on their hands. +* Debugging:: Introduction to `dgawk'. +* Debugging Concepts:: Debugging In General. +* Debugging Terms:: Additional Debugging Concepts. +* Awk Debugging:: Awk Debugging. +* Sample dgawk session:: Sample `dgawk' session. +* dgawk invocation:: `dgawk' Invocation. +* Finding The Bug:: Finding The Bug. +* List of Debugger Commands:: Main `dgawk' Commands. +* Breakpoint Control:: Control of breakpoints. +* Dgawk Execution Control:: Control of execution. +* Viewing And Changing Data:: Viewing and changing data. +* Dgawk Stack:: Dealing with the stack. +* Dgawk Info:: Obtaining information about the program + and the debugger state. +* Miscellaneous Dgawk Commands:: Miscellaneous Commands. +* Readline Support:: Readline Support. +* Dgawk Limitations:: Limitations and future plans. +* V7/SVR3.1:: The major changes between V7 and System + V Release 3.1. +* SVR4:: Minor changes between System V Releases + 3.1 and 4. +* POSIX:: New features from the POSIX standard. +* BTL:: New features from Brian Kernighan's + version of `awk'. +* POSIX/GNU:: The extensions in `gawk' not in + POSIX `awk'. +* Common Extensions:: Common Extensions Summary. +* Contributors:: The major contributors to + `gawk'. +* Gawk Distribution:: What is in the `gawk' + distribution. +* Getting:: How to get the distribution. +* Extracting:: How to extract the distribution. +* Distribution contents:: What is in the distribution. +* Unix Installation:: Installing `gawk' under various + versions of Unix. +* Quick Installation:: Compiling `gawk' under Unix. +* Additional Configuration Options:: Other compile-time options. +* Configuration Philosophy:: How it's all supposed to work. +* Non-Unix Installation:: Installation on Other Operating + Systems. +* PC Installation:: Installing and Compiling `gawk' + on MS-DOS and OS/2. +* PC Binary Installation:: Installing a prepared distribution. +* PC Compiling:: Compiling `gawk' for MS-DOS, + Windows32, and OS/2. +* PC Testing:: Testing `gawk' on PC systems. +* PC Using:: Running `gawk' on MS-DOS, + Windows32 and OS/2. +* Cygwin:: Building and running `gawk' for + Cygwin. +* MSYS:: Using `gawk' In The MSYS + Environment. +* VMS Installation:: Installing `gawk' on VMS. +* VMS Compilation:: How to compile `gawk' under + VMS. +* VMS Installation Details:: How to install `gawk' under + VMS. +* VMS Running:: How to run `gawk' under VMS. +* VMS Old Gawk:: An old version comes with some VMS + systems. +* Bugs:: Reporting Problems and Bugs. +* Other Versions:: Other freely available `awk' + implementations. +* Compatibility Mode:: How to disable certain `gawk' + extensions. +* Additions:: Making Additions To `gawk'. +* Accessing The Source:: Accessing the Git repository. +* Adding Code:: Adding code to the main body of + `gawk'. +* New Ports:: Porting `gawk' to a new + operating system. +* Dynamic Extensions:: Adding new built-in functions to + `gawk'. +* Internals:: A brief look at some `gawk' + internals. +* Plugin License:: A note about licensing. +* Sample Library:: A example of new functions. +* Internal File Description:: What the new functions will do. +* Internal File Ops:: The code for internal file operations. +* Using Internal File Ops:: How to use an external extension. +* Future Extensions:: New features that may be implemented + one day. +* Basic High Level:: The high level view. +* Basic Data Typing:: A very quick intro to data types. +* Floating Point Issues:: Stuff to know about floating-point + numbers. +* String Conversion Precision:: The String Value Can Lie. +* Unexpected Results:: Floating Point Numbers Are Not Abstract + Numbers. +* POSIX Floating Point Problems:: Standards Versus Existing Practice. To Miriam, for making me complete. @@ -9638,7 +9681,6 @@ cannot have a variable and an array with the same name in the same * Uninitialized Subscripts:: Using Uninitialized variables as subscripts. * Multi-dimensional:: Emulating multidimensional arrays in `awk'. -* Array Sorting:: Sorting array values and indices. * Arrays of Arrays:: True multidimensional arrays. @@ -9896,12 +9938,7 @@ File: gawk.info, Node: Scanning an Array, Prev: Array Example, Up: Array Basi 8.1.5 Scanning All Elements of an Array --------------------------------------- -* Menu: - -* Controlling Scanning:: Controlling the order in which arrays are scanned. -* Controlling Scanning With A Function:: Using a function to control scanning. - - In programs that use arrays, it is often necessary to use a loop that +In programs that use arrays, it is often necessary to use a loop that executes once for each element of an array. In other languages, where arrays are contiguous and indices are limited to positive integers, this is easy: all the valid indices can be found by counting from the @@ -9951,256 +9988,14 @@ statements in the loop body; it is not predictable whether the `for' loop will reach them. Similarly, changing VAR inside the loop may produce strange results. It is best to avoid such things. - -File: gawk.info, Node: Controlling Scanning, Next: Controlling Scanning With A Function, Up: Scanning an Array - -8.1.5.1 Controlling Array Scanning Order -........................................ - -As an extension, `gawk' makes it possible for you to loop over the + As an extension, `gawk' makes it possible for you to loop over the elements of an array in order, based on the value of -`PROCINFO["sorted_in"]' (*note Auto-set::). Several sorting options -are available: - -`ascending index string' - Order by indices compared as strings; this is the most basic sort. - (Internally, array indices are always strings, so with `a[2*5] = 1' - the index is actually `"10"' rather than numeric 10.) - -`ascending index number' - Order by indices but force them to be treated as numbers in the - process. Any index with non-numeric value will end up positioned - as if it were zero. - -`ascending value string' - Order by element values rather than by indices. Scalar values are - compared as strings. Subarrays, if present, come out last. - -`ascending value number' - Order by values but force scalar values to be treated as numbers - for the purpose of comparison. If there are subarrays, those - appear at the end of the sorted list. - -`descending index string' - Reverse order from the most basic sort. - -`descending index number' - Numeric indices ordered from high to low. - -`descending value string' - Element values, treated as strings, ordered from high to low. - Subarrays, if present, come out first. - -`descending value number' - Element values, treated as numbers, ordered from high to low. - Subarrays, if present, come out first. - -`unsorted' - Array elements are processed in arbitrary order, the normal `awk' - behavior. You can also get the normal behavior by just deleting - the `"sorted_in"' item from the `PROCINFO' array, if it previously - had a value assigned to it. - - The array traversal order is determined before the `for' loop starts -to run. Changing `PROCINFO["sorted_in"]' in the loop body will not -affect the loop. - - Portions of the sort specification string may be truncated or -omitted. The default is `ascending' for direction, `index' for sort -key type, and `string' for comparison mode. This implies that one can -simply assign the empty string, "", instead of "ascending index string" -to `PROCINFO["sorted_in"]' for the same effect. - - For example: - - $ gawk 'BEGIN { - > a[4] = 4 - > a[3] = 3 - > for (i in a) - > print i, a[i] - > }' - -| 4 4 - -| 3 3 - $ gawk 'BEGIN { - > PROCINFO["sorted_in"] = "asc index" - > a[4] = 4 - > a[3] = 3 - > for (i in a) - > print i, a[i] - > }' - -| 3 3 - -| 4 4 - - When sorting an array by element values, if a value happens to be a -subarray then it is considered to be greater than any string or numeric -value, regardless of what the subarray itself contains, and all -subarrays are treated as being equal to each other. Their order -relative to each other is determined by their index strings. - - -File: gawk.info, Node: Controlling Scanning With A Function, Prev: Controlling Scanning, Up: Scanning an Array - -8.1.5.2 Controlling Array Scanning Order With a User-defined Function -..................................................................... - -The value of `PROCINFO["sorted_in"]' can also be a function name. This -lets you traverse an array based on any custom criterion. The array -elements are ordered according to the return value of this function. -This comparison function should be defined with at least four arguments: - - function comp_func(i1, v1, i2, v2) - { - COMPARE ELEMENTS 1 AND 2 IN SOME FASHION - RETURN < 0; 0; OR > 0 - } - - Here, I1 and I2 are the indices, and V1 and V2 are the corresponding -values of the two elements being compared. Either V1 or V2, or both, -can be arrays if the array being traversed contains subarrays as -values. The three possible return values are interpreted this way: - - * If the return value of `comp_func(i1, v1, i2, v2)' is less than - zero, index I1 comes before index I2 during loop traversal. - - * If `comp_func(i1, v1, i2, v2)' returns zero, I1 and I2 come - together but the relative order with respect to each other is - undefined. - - * If the return value of `comp_func(i1, v1, i2, v2)' is greater than - zero, I1 comes after I2. - - The following comparison function can be used to scan an array in -numerical order of the indices: - - function cmp_num_idx(i1, v1, i2, v2) - { - # numerical index comparison, ascending order - return (i1 - i2) - } - - This function traverses an array based on an order by element values -rather than by indices: - - function cmp_str_val(i1, v1, i2, v2) - { - # string value comparison, ascending order - v1 = v1 "" - v2 = v2 "" - if (v1 < v2) - return -1 - return (v1 != v2) - } - - Here is a comparison function to make all numbers, and numeric -strings without any leading or trailing spaces, come out first during -loop traversal: - - function cmp_num_str_val(i1, v1, i2, v2, n1, n2) - { - # numbers before string value comparison, ascending order - n1 = v1 + 0 - n2 = v2 + 0 - if (n1 == v1) - return (n2 == v2) ? (n1 - n2) : -1 - else if (n2 == v2) - return 1 - return (v1 < v2) ? -1 : (v1 != v2) - } - - Consider sorting the entries of a GNU/Linux system password file -according to login names. The following program which sorts records by -a specific field position can be used for this purpose: - - # sort.awk --- simple program to sort by field position - # field position is specified by the global variable POS - - function cmp_field(i1, v1, i2, v2) - { - # comparison by value, as string, and ascending order - return v1[POS] < v2[POS] ? -1 : (v1[POS] != v2[POS]) - } - - { - for (i = 1; i <= NF; i++) - a[NR][i] = $i - } - - END { - PROCINFO["sorted_in"] = "cmp_field" - if (POS < 1 || POS > NF) - POS = 1 - for (i in a) { - for (j = 1; j <= NF; j++) - printf("%s%c", a[i][j], j < NF ? ":" : "") - print "" - } - } - - The first field in each entry of the password file is the user's -login name, and the fields are seperated by colons. Running the -program produces the following output: - - $ gawk -vPOS=1 -F: -f sort.awk /etc/passwd - -| adm:x:3:4:adm:/var/adm:/sbin/nologin - -| apache:x:48:48:Apache:/var/www:/sbin/nologin - -| avahi:x:70:70:Avahi daemon:/:/sbin/nologin - ... - - The comparison normally should always return the same value when -given a specific pair of array elements as its arguments. If -inconsistent results are returned then the order is undefined. This -behavior is sometimes exploited to introduce random order in otherwise -seemingly ordered data: - - function cmp_randomize(i1, v1, i2, v2) - { - # random order - return (2 - 4 * rand()) - } - - As mentioned above, the order of the indices is arbitrary if two -elements compare equal. This is usually not a problem, but letting the -tied elements come out in arbitrary order can be an issue, especially -when comparing item values. The partial ordering of the equal elements -may change during the next loop traversal, if other elements are added -or removed from the array. One way to resolve ties when comparing -elements with otherwise equal values is to include the indices in the -comparison rules. Note that doing this may make the loop traversal -less efficient, so consider it only if necessary. The following -comparison functions force a deterministic order, and are based on the -fact that the indices of two elements are never equal: +`PROCINFO["sorted_in"]' (*note Auto-set::). This is an advanced +feature, so discussion of it is delayed until *note Controlling Array +Traversal::. - function cmp_numeric(i1, v1, i2, v2) - { - # numerical value (and index) comparison, descending order - return (v1 != v2) ? (v2 - v1) : (i2 - i1) - } - - function cmp_string(i1, v1, i2, v2) - { - # string value (and index) comparison, descending order - v1 = v1 i1 - v2 = v2 i2 - return (v1 > v2) ? -1 : (v1 != v2) - } - - A custom comparison function can often simplify ordered loop -traversal, and the the sky is really the limit when it comes to -designing such a function. - - When string comparisons are made during a sort, either for element -values where one or both aren't numbers, or for element indices handled -as strings, the value of `IGNORECASE' (*note Built-in Variables::) -controls whether the comparisons treat corresponding uppercase and -lowercase letters as equivalent or distinct. - - All sorting based on `PROCINFO["sorted_in"]' is disabled in POSIX -mode, since the `PROCINFO' array is not special in that case. - - As a side note, sorting the array indices before traversing the -array has been reported to add 15% to 20% overhead to the execution -time of `awk' programs. For this reason, sorted array traversal is not -the default. + In addition, `gawk' provides built-in functions for sorting arrays; +see *note Array Sorting Functions::. File: gawk.info, Node: Delete, Next: Numeric Array Subscripts, Prev: Array Basics, Up: Arrays @@ -10376,7 +10171,7 @@ string as a subscript if `--lint' is provided on the command line (*note Options::). -File: gawk.info, Node: Multi-dimensional, Next: Array Sorting, Prev: Uninitialized Subscripts, Up: Arrays +File: gawk.info, Node: Multi-dimensional, Next: Arrays of Arrays, Prev: Uninitialized Subscripts, Up: Arrays 8.5 Multidimensional Arrays =========================== @@ -10502,105 +10297,9 @@ The result is to set `separate[1]' to `"1"' and `separate[2]' to recovered. -File: gawk.info, Node: Array Sorting, Next: Arrays of Arrays, Prev: Multi-dimensional, Up: Arrays - -8.6 Sorting Array Values and Indices with `gawk' -================================================ - -The order in which an array is scanned with a `for (i in array)' loop -is essentially arbitrary. In most `awk' implementations, sorting an -array requires writing a `sort' function. While this can be -educational for exploring different sorting algorithms, usually that's -not the point of the program. `gawk' provides the built-in `asort()' -and `asorti()' functions (*note String Functions::) for sorting arrays. -For example: - - POPULATE THE ARRAY data - n = asort(data) - for (i = 1; i <= n; i++) - DO SOMETHING WITH data[i] - - After the call to `asort()', the array `data' is indexed from 1 to -some number N, the total number of elements in `data'. (This count is -`asort()''s return value.) `data[1]' <= `data[2]' <= `data[3]', and so -on. The array elements are compared as strings. - - An important side effect of calling `asort()' is that _the array's -original indices are irrevocably lost_. As this isn't always -desirable, `asort()' accepts a second argument: - - POPULATE THE ARRAY source - n = asort(source, dest) - for (i = 1; i <= n; i++) - DO SOMETHING WITH dest[i] - - In this case, `gawk' copies the `source' array into the `dest' array -and then sorts `dest', destroying its indices. However, the `source' -array is not affected. - - `asort()' and `asorti()' accept a third string argument to control -the comparison rule for the array elements, and the direction of the -sorted results. The valid comparison modes are `string' and `number', -and the direction can be either `ascending' or `descending'. Either -mode or direction, or both, can be omitted in which case the defaults, -`string' or `ascending' is assumed for the comparison mode and the -direction, respectively. Seperate comparison mode from direction with -a single space, and they can appear in any order. To compare the -elements as numbers, and to reverse the elements of the `dest' array, -the call to asort in the above example can be replaced with: - - asort(source, dest, "descending number") - - The third argument to `asort()' can also be a user-defined function -name which is used to order the array elements before constructing the -result array. *Note Scanning an Array::, for more information. - - Often, what's needed is to sort on the values of the _indices_ -instead of the values of the elements. To do that, use the `asorti()' -function. The interface is identical to that of `asort()', except that -the index values are used for sorting, and become the values of the -result array: - - { source[$0] = some_func($0) } - - END { - n = asorti(source, dest) - for (i = 1; i <= n; i++) { - Work with sorted indices directly: - DO SOMETHING WITH dest[i] - ... - Access original array via sorted indices: - DO SOMETHING WITH source[dest[i]] - } - } - - Sorting the array by replacing the indices provides maximal -flexibility. To traverse the elements in decreasing order, use a loop -that goes from N down to 1, either over the elements or over the -indices. This is an alternative to specifying `descending' for the -sorting order using the optional third argument. - - Copying array indices and elements isn't expensive in terms of -memory. Internally, `gawk' maintains "reference counts" to data. For -example, when `asort()' copies the first array to the second one, there -is only one copy of the original array elements' data, even though both -arrays use the values. - - Because `IGNORECASE' affects string comparisons, the value of -`IGNORECASE' also affects sorting for both `asort()' and `asorti()'. -Note also that the locale's sorting order does _not_ come into play; -comparisons are based on character values only.(1) Caveat Emptor. - - ---------- Footnotes ---------- - - (1) This is true because locale-based comparison occurs only when in -POSIX compatibility mode, and since `asort()' and `asorti()' are `gawk' -extensions, they are not available in that case. - - -File: gawk.info, Node: Arrays of Arrays, Prev: Array Sorting, Up: Arrays +File: gawk.info, Node: Arrays of Arrays, Prev: Multi-dimensional, Up: Arrays -8.7 Arrays of Arrays +8.6 Arrays of Arrays ==================== `gawk' supports arrays of arrays. Elements of a subarray are referred @@ -10972,8 +10671,8 @@ pound sign (`#'): `IGNORECASE' affects the sorting. The third argument can also be a user-defined function name in which case the value returned by the function is used to order the array elements before - constructing the result array. *Note Scanning an Array::, for - more information. + constructing the result array. *Note Array Sorting Functions::, + for more information. For example, if the contents of `a' are as follows: @@ -10998,8 +10697,8 @@ pound sign (`#'): asort(a, a, "descending") The `asort()' function is described in more detail in *note Array - Sorting::. `asort()' is a `gawk' extension; it is not available - in compatibility mode (*note Options::). + Sorting Functions::. `asort()' is a `gawk' extension; it is not + available in compatibility mode (*note Options::). `asorti(SOURCE [, DEST [, HOW ] ]) #' Return the number of elements in the array SOURCE. It works @@ -11007,8 +10706,8 @@ pound sign (`#'): of the values. (Here too, `IGNORECASE' affects the sorting.) The `asorti()' function is described in more detail in *note Array - Sorting::. `asorti()' is a `gawk' extension; it is not available - in compatibility mode (*note Options::). + Sorting Functions::. `asorti()' is a `gawk' extension; it is not + available in compatibility mode (*note Options::). `gensub(REGEXP, REPLACEMENT, HOW [, TARGET]) #' Search the target string TARGET for matches of the regular @@ -13678,7 +13377,8 @@ File: gawk.info, Node: Advanced Features, Next: Library Functions, Prev: Inte This major node discusses advanced features in `gawk'. It's a bit of a "grab bag" of items that are otherwise unrelated to each other. First, a command-line option allows `gawk' to recognize nondecimal -numbers in input data, not just in `awk' programs. Next, two-way I/O, +numbers in input data, not just in `awk' programs. Then, `gawk''s +special features for sorting arrays are presented. Next, two-way I/O, discussed briefly in earlier parts of this Info file, is described in full detail, along with the basics of TCP/IP networking. Finally, `gawk' can "profile" an `awk' program, making it possible to tune it @@ -13691,12 +13391,14 @@ and likely to change, its description is relegated to an appendix. * Menu: * Nondecimal Data:: Allowing nondecimal input data. +* Array Sorting:: Facilities for controlling array traversal and + sorting arrays. * Two-way I/O:: Two-way communications with another process. * TCP/IP Networking:: Using `gawk' for network programming. * Profiling:: Profiling your `awk' programs. -File: gawk.info, Node: Nondecimal Data, Next: Two-way I/O, Up: Advanced Features +File: gawk.info, Node: Nondecimal Data, Next: Array Sorting, Up: Advanced Features 11.1 Allowing Nondecimal Input Data =================================== @@ -13738,9 +13440,418 @@ request it. surprising results. -File: gawk.info, Node: Two-way I/O, Next: TCP/IP Networking, Prev: Nondecimal Data, Up: Advanced Features +File: gawk.info, Node: Array Sorting, Next: Two-way I/O, Prev: Nondecimal Data, Up: Advanced Features + +11.2 Controlling Array Traversal and Array Sorting +================================================== + +`gawk' lets you control the order in which `for (i in array)' loops +will traverse an array. + + In addition, two built-in functions, `asort()' and `asorti()', let +you sort arrays based on the array values and indices, respectively. +These two functions also provide control over the sorting criteria used +to order the elements during sorting. + +* Menu: + +* Controlling Array Traversal:: How to use PROCINFO["sorted_in"]. +* Array Sorting Functions:: How to use `asort()' and `asorti()'. + + +File: gawk.info, Node: Controlling Array Traversal, Next: Array Sorting Functions, Up: Array Sorting + +11.2.1 Controlling Array Traversal +---------------------------------- + +By default, the order in which a `for (i in array)' loop will scan an +array is not defined; it is generally based upon the internal +implementation of arrays inside `awk'. + + Often, though, it is desirable to be able to loop over the elements +in a particular order that you, the programmer, choose. `gawk' lets +you do this; this node describes how. + +* Menu: + +* Controlling Scanning With A Function:: Using a function to control scanning. +* Controlling Scanning:: Controlling the order in which arrays + are scanned. + + +File: gawk.info, Node: Controlling Scanning With A Function, Next: Controlling Scanning, Up: Controlling Array Traversal + +11.2.1.1 Controlling Array Scanning Order With a User-defined Function +...................................................................... + +The value of `PROCINFO["sorted_in"]' can be a function name. This lets +you traverse an array based on any custom criterion. The array +elements are ordered according to the return value of this function. +This comparison function should be defined with at least four arguments: + + function comp_func(i1, v1, i2, v2) + { + COMPARE ELEMENTS 1 AND 2 IN SOME FASHION + RETURN < 0; 0; OR > 0 + } + + Here, I1 and I2 are the indices, and V1 and V2 are the corresponding +values of the two elements being compared. Either V1 or V2, or both, +can be arrays if the array being traversed contains subarrays as +values. The three possible return values are interpreted this way: + + * If the return value of `comp_func(i1, v1, i2, v2)' is less than + zero, index I1 comes before index I2 during loop traversal. + + * If `comp_func(i1, v1, i2, v2)' returns zero, I1 and I2 come + together but the relative order with respect to each other is + undefined. + + * If the return value of `comp_func(i1, v1, i2, v2)' is greater than + zero, I1 comes after I2. + + The following comparison function can be used to scan an array in +numerical order of the indices: + + function cmp_num_idx(i1, v1, i2, v2) + { + # numerical index comparison, ascending order + return (i1 - i2) + } + + This function traverses an array based on the string order of the +element values rather than by indices: + + function cmp_str_val(i1, v1, i2, v2) + { + # string value comparison, ascending order + v1 = v1 "" + v2 = v2 "" + if (v1 < v2) + return -1 + return (v1 != v2) + } + + Here is a comparison function to make all numbers, and numeric +strings without any leading or trailing spaces, come out first during +loop traversal: + + function cmp_num_str_val(i1, v1, i2, v2, n1, n2) + { + # numbers before string value comparison, ascending order + n1 = v1 + 0 + n2 = v2 + 0 + if (n1 == v1) + return (n2 == v2) ? (n1 - n2) : -1 + else if (n2 == v2) + return 1 + return (v1 < v2) ? -1 : (v1 != v2) + } + + *FIXME*: Put in a fuller example here of some data and show the +different results when traversing. + + Consider sorting the entries of a GNU/Linux system password file +according to login names. The following program which sorts records by +a specific field position can be used for this purpose: + + # sort.awk --- simple program to sort by field position + # field position is specified by the global variable POS + + function cmp_field(i1, v1, i2, v2) + { + # comparison by value, as string, and ascending order + return v1[POS] < v2[POS] ? -1 : (v1[POS] != v2[POS]) + } + + { + for (i = 1; i <= NF; i++) + a[NR][i] = $i + } + + END { + PROCINFO["sorted_in"] = "cmp_field" + if (POS < 1 || POS > NF) + POS = 1 + for (i in a) { + for (j = 1; j <= NF; j++) + printf("%s%c", a[i][j], j < NF ? ":" : "") + print "" + } + } + + The first field in each entry of the password file is the user's +login name, and the fields are seperated by colons. Each record +defines a subarray, which each field as an element in the subarray. +Running the program produces the following output: + + $ gawk -vPOS=1 -F: -f sort.awk /etc/passwd + -| adm:x:3:4:adm:/var/adm:/sbin/nologin + -| apache:x:48:48:Apache:/var/www:/sbin/nologin + -| avahi:x:70:70:Avahi daemon:/:/sbin/nologin + ... + + The comparison normally should always return the same value when +given a specific pair of array elements as its arguments. If +inconsistent results are returned then the order is undefined. This +behavior is sometimes exploited to introduce random order in otherwise +seemingly ordered data: + + function cmp_randomize(i1, v1, i2, v2) + { + # random order + return (2 - 4 * rand()) + } + + As mentioned above, the order of the indices is arbitrary if two +elements compare equal. This is usually not a problem, but letting the +tied elements come out in arbitrary order can be an issue, especially +when comparing item values. The partial ordering of the equal elements +may change during the next loop traversal, if other elements are added +or removed from the array. One way to resolve ties when comparing +elements with otherwise equal values is to include the indices in the +comparison rules. Note that doing this may make the loop traversal +less efficient, so consider it only if necessary. The following +comparison functions force a deterministic order, and are based on the +fact that the indices of two elements are never equal: + + function cmp_numeric(i1, v1, i2, v2) + { + # numerical value (and index) comparison, descending order + return (v1 != v2) ? (v2 - v1) : (i2 - i1) + } + + function cmp_string(i1, v1, i2, v2) + { + # string value (and index) comparison, descending order + v1 = v1 i1 + v2 = v2 i2 + return (v1 > v2) ? -1 : (v1 != v2) + } + + A custom comparison function can often simplify ordered loop +traversal, and the the sky is really the limit when it comes to +designing such a function. + + When string comparisons are made during a sort, either for element +values where one or both aren't numbers, or for element indices handled +as strings, the value of `IGNORECASE' (*note Built-in Variables::) +controls whether the comparisons treat corresponding uppercase and +lowercase letters as equivalent or distinct. + + Another point to keep in mind is that in the case of subarrays the +element values can themselves be arrays; a production comparison +function should use the `isarray()' function (*note Type Functions::), +to check for this, and choose a defined sorting order for subarrays. + + All sorting based on `PROCINFO["sorted_in"]' is disabled in POSIX +mode, since the `PROCINFO' array is not special in that case. + + As a side note, sorting the array indices before traversing the +array has been reported to add 15% to 20% overhead to the execution +time of `awk' programs. For this reason, sorted array traversal is not +the default. + + +File: gawk.info, Node: Controlling Scanning, Prev: Controlling Scanning With A Function, Up: Controlling Array Traversal + +11.2.1.2 Controlling Array Scanning Order +......................................... + +As described in *note Controlling Scanning With A Function::, you can +provide the name of a function as the value of `PROCINFO["sorted_in"]' +to specify custom sorting criteria. + + Often, though, you may wish to do something simple, such as "sort +based on comparing the indices in ascending order," or "sort based on +comparing the values in descending order." Having to write a simple +comparison function for this purpose for use in all of your programs +becomes tedious. For the most likely simple cases `gawk' provides the +option of supplying special names that do the requested sorting for you. +You can think of them as "predefined" sorting functions, if you like, +although the names purposely include characters that are not valid in +real `awk' function names. + + The following special values are available: + +`"@ind_str_asc"' + Order by indices compared as strings; this is the most basic sort. + (Internally, array indices are always strings, so with `a[2*5] = 1' + the index is actually `"10"' rather than numeric 10.) + +`"@ind_num_asc"' + Order by indices but force them to be treated as numbers in the + process. Any index with non-numeric value will end up positioned + as if it were zero. + +`"@val_type_asc"' + Order by element values rather than indices. Ordering is by the + type assigned to the element (*note Typing and Comparison::). All + numeric values come before all string values, which in turn come + before all subarrays. + +`"@val_str_asc"' + Order by element values rather than by indices. Scalar values are + compared as strings. Subarrays, if present, come out last. + +`"@val_num_asc"' + Order by values but force scalar values to be treated as numbers + for the purpose of comparison. If there are subarrays, those + appear at the end of the sorted list. + +`"@ind_str_desc"' + Reverse order from the most basic sort. + +`"@ind_num_desc"' + Numeric indices ordered from high to low. + +`"@val_type_desc"' + Element values, based on type, in descending order. + +`"@val_str_desc"' + Element values, treated as strings, ordered from high to low. + Subarrays, if present, come out first. + +`"@val_num_desc"' + Element values, treated as numbers, ordered from high to low. + Subarrays, if present, come out first. + +`"@unsorted"' + Array elements are processed in arbitrary order, which is the + normal `awk' behavior. You can also get the normal behavior by just + deleting the `"sorted_in"' element from the `PROCINFO' array, if + it previously had a value assigned to it. + + The array traversal order is determined before the `for' loop starts +to run. Changing `PROCINFO["sorted_in"]' in the loop body will not +affect the loop. + + For example: + + $ gawk 'BEGIN { + > a[4] = 4 + > a[3] = 3 + > for (i in a) + > print i, a[i] + > }' + -| 4 4 + -| 3 3 + $ gawk 'BEGIN { + > PROCINFO["sorted_in"] = "@str_ind_asc" + > a[4] = 4 + > a[3] = 3 + > for (i in a) + > print i, a[i] + > }' + -| 3 3 + -| 4 4 + + When sorting an array by element values, if a value happens to be a +subarray then it is considered to be greater than any string or numeric +value, regardless of what the subarray itself contains, and all +subarrays are treated as being equal to each other. Their order +relative to each other is determined by their index strings. + + +File: gawk.info, Node: Array Sorting Functions, Prev: Controlling Array Traversal, Up: Array Sorting + +11.2.2 Sorting Array Values and Indices with `gawk' +--------------------------------------------------- + +The order in which an array is scanned with a `for (i in array)' loop +is essentially arbitrary. In most `awk' implementations, sorting an +array requires writing a `sort' function. While this can be +educational for exploring different sorting algorithms, usually that's +not the point of the program. `gawk' provides the built-in `asort()' +and `asorti()' functions (*note String Functions::) for sorting arrays. +For example: + + POPULATE THE ARRAY data + n = asort(data) + for (i = 1; i <= n; i++) + DO SOMETHING WITH data[i] + + After the call to `asort()', the array `data' is indexed from 1 to +some number N, the total number of elements in `data'. (This count is +`asort()''s return value.) `data[1]' <= `data[2]' <= `data[3]', and so +on. The array elements are compared as strings. + + An important side effect of calling `asort()' is that _the array's +original indices are irrevocably lost_. As this isn't always +desirable, `asort()' accepts a second argument: + + POPULATE THE ARRAY source + n = asort(source, dest) + for (i = 1; i <= n; i++) + DO SOMETHING WITH dest[i] + + In this case, `gawk' copies the `source' array into the `dest' array +and then sorts `dest', destroying its indices. However, the `source' +array is not affected. + + `asort()' and `asorti()' accept a third string argument to control +the comparison rule for the array elements, and the direction of the +sorted results. The valid comparison modes are `string' and `number', +and the direction can be either `ascending' or `descending'. Either +mode or direction, or both, can be omitted in which case the defaults, +`string' or `ascending' is assumed for the comparison mode and the +direction, respectively. Seperate comparison mode from direction with +a single space, and they can appear in any order. To compare the +elements as numbers, and to reverse the elements of the `dest' array, +the call to asort in the above example can be replaced with: + + asort(source, dest, "descending number") + + The third argument to `asort()' can also be a user-defined function +name which is used to order the array elements before constructing the +result array. *Note Scanning an Array::, for more information. + + Often, what's needed is to sort on the values of the _indices_ +instead of the values of the elements. To do that, use the `asorti()' +function. The interface is identical to that of `asort()', except that +the index values are used for sorting, and become the values of the +result array: + + { source[$0] = some_func($0) } + + END { + n = asorti(source, dest) + for (i = 1; i <= n; i++) { + Work with sorted indices directly: + DO SOMETHING WITH dest[i] + ... + Access original array via sorted indices: + DO SOMETHING WITH source[dest[i]] + } + } + + Sorting the array by replacing the indices provides maximal +flexibility. To traverse the elements in decreasing order, use a loop +that goes from N down to 1, either over the elements or over the +indices. This is an alternative to specifying `descending' for the +sorting order using the optional third argument. + + Copying array indices and elements isn't expensive in terms of +memory. Internally, `gawk' maintains "reference counts" to data. For +example, when `asort()' copies the first array to the second one, there +is only one copy of the original array elements' data, even though both +arrays use the values. + + Because `IGNORECASE' affects string comparisons, the value of +`IGNORECASE' also affects sorting for both `asort()' and `asorti()'. +Note also that the locale's sorting order does _not_ come into play; +comparisons are based on character values only.(1) Caveat Emptor. + + ---------- Footnotes ---------- + + (1) This is true because locale-based comparison occurs only when in +POSIX compatibility mode, and since `asort()' and `asorti()' are `gawk' +extensions, they are not available in that case. + + +File: gawk.info, Node: Two-way I/O, Next: TCP/IP Networking, Prev: Array Sorting, Up: Advanced Features -11.2 Two-Way Communications with Another Process +11.3 Two-Way Communications with Another Process ================================================ From: brennan@whidbey.com (Mike Brennan) @@ -13875,7 +13986,7 @@ regular pipes. File: gawk.info, Node: TCP/IP Networking, Next: Profiling, Prev: Two-way I/O, Up: Advanced Features -11.3 Using `gawk' for Network Programming +11.4 Using `gawk' for Network Programming ========================================= `EMISTERED': @@ -13952,7 +14063,7 @@ examples. File: gawk.info, Node: Profiling, Prev: TCP/IP Networking, Up: Advanced Features -11.4 Profiling Your `awk' Programs +11.5 Profiling Your `awk' Programs ================================== You may produce execution traces of your `awk' programs. This is done @@ -19603,8 +19714,8 @@ you can find more information. * SVR4:: Minor changes between System V Releases 3.1 and 4. * POSIX:: New features from the POSIX standard. -* BTL:: New features from Brian Kernighan's - version of `awk'. +* BTL:: New features from Brian Kernighan's version of + `awk'. * POSIX/GNU:: The extensions in `gawk' not in POSIX `awk'. * Common Extensions:: Common Extensions Summary. @@ -19916,6 +20027,8 @@ the current version of `gawk'. - Tandem (non-POSIX) + - Prestandard VAX C compiler for VAX/VMS + @@ -19991,9 +20104,9 @@ Info file, in approximate chronological order: documentation. * Michal Jaegermann provided the port to Atari systems and its - documentation. He continues to provide portability checking with - DEC Alpha systems, and has done a lot of work to make sure `gawk' - works on non-32-bit systems. + documentation. (This port is no longer supported.) He continues + to provide portability checking with DEC Alpha systems, and has + done a lot of work to make sure `gawk' works on non-32-bit systems. * Fred Fish provided the port to Amiga systems and its documentation. (With Fred's sad passing, this is no longer supported.) @@ -24696,11 +24809,11 @@ Index * arrays, elements, assigning: Assigning Elements. (line 6) * arrays, elements, deleting: Delete. (line 6) * arrays, elements, installing: Internals. (line 79) -* arrays, elements, order of: Scanning an Array. (line 53) +* arrays, elements, order of: Scanning an Array. (line 48) * arrays, elements, referencing: Reference to Elements. (line 6) * arrays, elements, retrieving number of: String Functions. (line 29) -* arrays, for statement and: Scanning an Array. (line 25) +* arrays, for statement and: Scanning an Array. (line 20) * arrays, IGNORECASE variable and: Array Intro. (line 92) * arrays, indexing: Array Intro. (line 50) * arrays, merging into strings: Join Function. (line 6) @@ -24708,8 +24821,10 @@ Index * arrays, multidimensional, scanning: Multi-scanning. (line 11) * arrays, names of: Arrays. (line 18) * arrays, scanning: Scanning an Array. (line 6) -* arrays, sorting: Array Sorting. (line 6) -* arrays, sorting, IGNORECASE variable and: Array Sorting. (line 85) +* arrays, sorting: Array Sorting Functions. + (line 6) +* arrays, sorting, IGNORECASE variable and: Array Sorting Functions. + (line 85) * arrays, sparse: Array Intro. (line 71) * arrays, subscripts: Numeric Array Subscripts. (line 6) @@ -24719,9 +24834,11 @@ Index (line 55) * ASCII <1>: Glossary. (line 137) * ASCII: Ordinal Functions. (line 45) -* asort() function (gawk) <1>: String Functions. (line 29) -* asort() function (gawk): Array Sorting. (line 6) -* asort() function (gawk), arrays, sorting: Array Sorting. (line 6) +* asort() function (gawk) <1>: Array Sorting Functions. + (line 6) +* asort() function (gawk): String Functions. (line 29) +* asort() function (gawk), arrays, sorting: Array Sorting Functions. + (line 6) * asorti() function (gawk): String Functions. (line 77) * assert() function (C library): Assert Function. (line 6) * assert() user-defined function: Assert Function. (line 28) @@ -25386,7 +25503,7 @@ Index (line 6) * elements in arrays, assigning: Assigning Elements. (line 6) * elements in arrays, deleting: Delete. (line 6) -* elements in arrays, order of: Scanning an Array. (line 53) +* elements in arrays, order of: Scanning an Array. (line 48) * elements in arrays, scanning: Scanning an Array. (line 6) * email address for bug reports, bug-gawk@gnu.org: Bugs. (line 30) * EMISTERED: TCP/IP Networking. (line 6) @@ -25617,7 +25734,7 @@ Index * FNR variable: Records. (line 6) * FNR variable, changing: Auto-set. (line 229) * for statement: For Statement. (line 6) -* for statement, in arrays: Scanning an Array. (line 25) +* for statement, in arrays: Scanning an Array. (line 20) * force_number() internal function: Internals. (line 27) * force_string() internal function: Internals. (line 32) * force_wstring() internal function: Internals. (line 37) @@ -25751,8 +25868,9 @@ Index * gawk, function arguments and: Calling Built-in. (line 16) * gawk, functions, adding: Dynamic Extensions. (line 10) * gawk, hexadecimal numbers and: Nondecimal-numbers. (line 42) -* gawk, IGNORECASE variable in <1>: String Functions. (line 29) -* gawk, IGNORECASE variable in <2>: Array Sorting. (line 85) +* gawk, IGNORECASE variable in <1>: Array Sorting Functions. + (line 85) +* gawk, IGNORECASE variable in <2>: String Functions. (line 29) * gawk, IGNORECASE variable in <3>: Array Intro. (line 92) * gawk, IGNORECASE variable in <4>: User-modified. (line 82) * gawk, IGNORECASE variable in: Case-sensitivity. (line 26) @@ -25910,12 +26028,14 @@ Index * if statement, actions, changing: Ranges. (line 25) * igawk.sh program: Igawk Program. (line 124) * ignore debugger command: Breakpoint Control. (line 86) -* IGNORECASE variable <1>: String Functions. (line 29) -* IGNORECASE variable <2>: Array Sorting. (line 85) +* IGNORECASE variable <1>: Array Sorting Functions. + (line 85) +* IGNORECASE variable <2>: String Functions. (line 29) * IGNORECASE variable <3>: Array Intro. (line 92) * IGNORECASE variable <4>: User-modified. (line 82) * IGNORECASE variable: Case-sensitivity. (line 26) -* IGNORECASE variable, array sorting and: Array Sorting. (line 85) +* IGNORECASE variable, array sorting and: Array Sorting Functions. + (line 85) * IGNORECASE variable, array subscripts and: Array Intro. (line 92) * IGNORECASE variable, in example programs: Library Functions. (line 42) @@ -25928,7 +26048,7 @@ Index * in operator <3>: Precedence. (line 83) * in operator: Comparison Operators. (line 11) -* in operator, arrays and <1>: Scanning an Array. (line 22) +* in operator, arrays and <1>: Scanning an Array. (line 17) * in operator, arrays and: Reference to Elements. (line 37) * increment operators: Increment Ops. (line 6) @@ -26593,7 +26713,8 @@ Index * recursive functions: Definition Syntax. (line 73) * redirection of input: Getline/File. (line 6) * redirection of output: Redirection. (line 6) -* reference counting, sorting arrays: Array Sorting. (line 79) +* reference counting, sorting arrays: Array Sorting Functions. + (line 79) * regexp constants <1>: Comparison Operators. (line 103) * regexp constants <2>: Regexp Constants. (line 6) @@ -26751,7 +26872,8 @@ Index * side effects: Concatenation. (line 42) * side effects, array indexing: Reference to Elements. (line 42) -* side effects, asort() function: Array Sorting. (line 24) +* side effects, asort() function: Array Sorting Functions. + (line 24) * side effects, assignment expressions: Assignment Ops. (line 23) * side effects, Boolean operators: Boolean Ops. (line 30) * side effects, conditional expressions: Conditional Exp. (line 22) @@ -26781,7 +26903,8 @@ Index * Skywalker, Luke: Undocumented. (line 6) * sleep utility: Alarm Program. (line 109) * Solaris, POSIX-compliant awk: Other Versions. (line 86) -* sort function, arrays, sorting: Array Sorting. (line 6) +* sort function, arrays, sorting: Array Sorting Functions. + (line 6) * sort utility: Word Sorting. (line 50) * sort utility, coprocesses and: Two-way I/O. (line 83) * sorting characters in different languages: Explaining gettext. @@ -27132,407 +27255,409 @@ Index Tag Table: Node: Top1346 -Node: Foreword29926 -Node: Preface34271 -Ref: Preface-Footnote-137238 -Ref: Preface-Footnote-237344 -Node: History37576 -Node: Names39967 -Ref: Names-Footnote-141444 -Node: This Manual41516 -Ref: This Manual-Footnote-146464 -Node: Conventions46564 -Node: Manual History48698 -Ref: Manual History-Footnote-151968 -Ref: Manual History-Footnote-252009 -Node: How To Contribute52083 -Node: Acknowledgments53227 -Node: Getting Started57558 -Node: Running gawk59937 -Node: One-shot61123 -Node: Read Terminal62348 -Ref: Read Terminal-Footnote-163998 -Ref: Read Terminal-Footnote-264274 -Node: Long64445 -Node: Executable Scripts65821 -Ref: Executable Scripts-Footnote-167690 -Ref: Executable Scripts-Footnote-267792 -Node: Comments68243 -Node: Quoting70710 -Node: DOS Quoting75333 -Node: Sample Data Files76008 -Node: Very Simple79040 -Node: Two Rules83639 -Node: More Complex85786 -Ref: More Complex-Footnote-188716 -Node: Statements/Lines88801 -Ref: Statements/Lines-Footnote-193263 -Node: Other Features93528 -Node: When94456 -Node: Invoking Gawk96603 -Node: Command Line97988 -Node: Options98771 -Ref: Options-Footnote-1111903 -Node: Other Arguments111928 -Node: Naming Standard Input114586 -Node: Environment Variables115680 -Node: AWKPATH Variable116124 -Ref: AWKPATH Variable-Footnote-1118721 -Node: Other Environment Variables118981 -Node: Exit Status121321 -Node: Include Files121996 -Node: Obsolete125481 -Node: Undocumented126167 -Node: Regexp126408 -Node: Regexp Usage127860 -Node: Escape Sequences129886 -Node: Regexp Operators135649 -Ref: Regexp Operators-Footnote-1142846 -Ref: Regexp Operators-Footnote-2142993 -Node: Bracket Expressions143091 -Ref: table-char-classes144894 -Node: GNU Regexp Operators147538 -Node: Case-sensitivity151261 -Ref: Case-sensitivity-Footnote-1154229 -Ref: Case-sensitivity-Footnote-2154464 -Node: Leftmost Longest154572 -Node: Computed Regexps155773 -Node: Locales159199 -Node: Reading Files162906 -Node: Records164847 -Ref: Records-Footnote-1173521 -Node: Fields173558 -Ref: Fields-Footnote-1176591 -Node: Nonconstant Fields176677 -Node: Changing Fields178879 -Node: Field Separators184857 -Node: Default Field Splitting187486 -Node: Regexp Field Splitting188603 -Node: Single Character Fields191945 -Node: Command Line Field Separator193004 -Node: Field Splitting Summary196445 -Ref: Field Splitting Summary-Footnote-1199637 -Node: Constant Size199738 -Node: Splitting By Content204322 -Ref: Splitting By Content-Footnote-1208048 -Node: Multiple Line208088 -Ref: Multiple Line-Footnote-1213935 -Node: Getline214114 -Node: Plain Getline216342 -Node: Getline/Variable218431 -Node: Getline/File219572 -Node: Getline/Variable/File220894 -Ref: Getline/Variable/File-Footnote-1222493 -Node: Getline/Pipe222580 -Node: Getline/Variable/Pipe225140 -Node: Getline/Coprocess226247 -Node: Getline/Variable/Coprocess227490 -Node: Getline Notes228204 -Node: Getline Summary230146 -Ref: table-getline-variants230489 -Node: Command line directories231345 -Node: Printing231970 -Node: Print233601 -Node: Print Examples234938 -Node: Output Separators237722 -Node: OFMT239482 -Node: Printf240840 -Node: Basic Printf241746 -Node: Control Letters243285 -Node: Format Modifiers247097 -Node: Printf Examples253106 -Node: Redirection255821 -Node: Special Files262805 -Node: Special FD263338 -Ref: Special FD-Footnote-1266962 -Node: Special Network267036 -Node: Special Caveats267886 -Node: Close Files And Pipes268682 -Ref: Close Files And Pipes-Footnote-1275705 -Ref: Close Files And Pipes-Footnote-2275853 -Node: Expressions276003 -Node: Values277072 -Node: Constants277748 -Node: Scalar Constants278428 -Ref: Scalar Constants-Footnote-1279287 -Node: Nondecimal-numbers279469 -Node: Regexp Constants282528 -Node: Using Constant Regexps283003 -Node: Variables286058 -Node: Using Variables286713 -Node: Assignment Options288437 -Node: Conversion290309 -Ref: table-locale-affects295685 -Ref: Conversion-Footnote-1296309 -Node: All Operators296418 -Node: Arithmetic Ops297048 -Node: Concatenation299553 -Ref: Concatenation-Footnote-1302346 -Node: Assignment Ops302466 -Ref: table-assign-ops307454 -Node: Increment Ops308862 -Node: Truth Values and Conditions312332 -Node: Truth Values313415 -Node: Typing and Comparison314464 -Node: Variable Typing315253 -Ref: Variable Typing-Footnote-1319150 -Node: Comparison Operators319272 -Ref: table-relational-ops319682 -Node: POSIX String Comparison323231 -Ref: POSIX String Comparison-Footnote-1324187 -Node: Boolean Ops324325 -Ref: Boolean Ops-Footnote-1328403 -Node: Conditional Exp328494 -Node: Function Calls330226 -Node: Precedence333820 -Node: Patterns and Actions337473 -Node: Pattern Overview338527 -Node: Regexp Patterns340193 -Node: Expression Patterns340736 -Node: Ranges344310 -Node: BEGIN/END347276 -Node: Using BEGIN/END348038 -Ref: Using BEGIN/END-Footnote-1350769 -Node: I/O And BEGIN/END350875 -Node: BEGINFILE/ENDFILE353157 -Node: Empty355988 -Node: Using Shell Variables356304 -Node: Action Overview358589 -Node: Statements360946 -Node: If Statement362800 -Node: While Statement364299 -Node: Do Statement366343 -Node: For Statement367499 -Node: Switch Statement370651 -Node: Break Statement372748 -Node: Continue Statement374738 -Node: Next Statement376525 -Node: Nextfile Statement378915 -Node: Exit Statement381212 -Node: Built-in Variables383628 -Node: User-modified384723 -Ref: User-modified-Footnote-1392749 -Node: Auto-set392811 -Ref: Auto-set-Footnote-1403524 -Node: ARGC and ARGV403729 -Node: Arrays407580 -Node: Array Basics409151 -Node: Array Intro409862 -Node: Reference to Elements414180 -Node: Assigning Elements416450 -Node: Array Example416941 -Node: Scanning an Array418673 -Node: Controlling Scanning421129 -Node: Controlling Scanning With A Function424172 -Node: Delete430174 -Ref: Delete-Footnote-1432609 -Node: Numeric Array Subscripts432666 -Node: Uninitialized Subscripts434849 -Node: Multi-dimensional436477 -Node: Multi-scanning439568 -Node: Array Sorting441152 -Ref: Array Sorting-Footnote-1445138 -Node: Arrays of Arrays445332 -Node: Functions449905 -Node: Built-in450727 -Node: Calling Built-in451805 -Node: Numeric Functions453793 -Ref: Numeric Functions-Footnote-1457558 -Ref: Numeric Functions-Footnote-2457915 -Ref: Numeric Functions-Footnote-3457963 -Node: String Functions458232 -Ref: String Functions-Footnote-1481703 -Ref: String Functions-Footnote-2481832 -Ref: String Functions-Footnote-3482080 -Node: Gory Details482167 -Ref: table-sub-escapes483846 -Ref: table-posix-sub485160 -Ref: table-gensub-escapes486073 -Node: I/O Functions487244 -Ref: I/O Functions-Footnote-1493899 -Node: Time Functions494046 -Ref: Time Functions-Footnote-1504938 -Ref: Time Functions-Footnote-2505006 -Ref: Time Functions-Footnote-3505164 -Ref: Time Functions-Footnote-4505275 -Ref: Time Functions-Footnote-5505387 -Ref: Time Functions-Footnote-6505614 -Node: Bitwise Functions505880 -Ref: table-bitwise-ops506438 -Ref: Bitwise Functions-Footnote-1510598 -Node: Type Functions510782 -Node: I18N Functions511252 -Node: User-defined512879 -Node: Definition Syntax513683 -Ref: Definition Syntax-Footnote-1518593 -Node: Function Example518662 -Node: Function Caveats521256 -Node: Calling A Function521677 -Node: Variable Scope522792 -Node: Pass By Value/Reference524767 -Node: Return Statement528207 -Node: Dynamic Typing531188 -Node: Indirect Calls531923 -Node: Internationalization541608 -Node: I18N and L10N543034 -Node: Explaining gettext543720 -Ref: Explaining gettext-Footnote-1548786 -Ref: Explaining gettext-Footnote-2548970 -Node: Programmer i18n549135 -Node: Translator i18n553335 -Node: String Extraction554128 -Ref: String Extraction-Footnote-1555089 -Node: Printf Ordering555175 -Ref: Printf Ordering-Footnote-1557959 -Node: I18N Portability558023 -Ref: I18N Portability-Footnote-1560472 -Node: I18N Example560535 -Ref: I18N Example-Footnote-1563170 -Node: Gawk I18N563242 -Node: Advanced Features563859 -Node: Nondecimal Data565178 -Node: Two-way I/O566759 -Ref: Two-way I/O-Footnote-1572193 -Node: TCP/IP Networking572263 -Node: Profiling575107 -Node: Library Functions582581 -Ref: Library Functions-Footnote-1585588 -Node: Library Names585759 -Ref: Library Names-Footnote-1589230 -Ref: Library Names-Footnote-2589450 -Node: General Functions589536 -Node: Strtonum Function590489 -Node: Assert Function593419 -Node: Round Function596745 -Node: Cliff Random Function598288 -Node: Ordinal Functions599304 -Ref: Ordinal Functions-Footnote-1602374 -Ref: Ordinal Functions-Footnote-2602626 -Node: Join Function602835 -Ref: Join Function-Footnote-1604606 -Node: Gettimeofday Function604806 -Node: Data File Management608521 -Node: Filetrans Function609153 -Node: Rewind Function613292 -Node: File Checking614679 -Node: Empty Files615773 -Node: Ignoring Assigns618003 -Node: Getopt Function619556 -Ref: Getopt Function-Footnote-1630860 -Node: Passwd Functions631063 -Ref: Passwd Functions-Footnote-1640038 -Node: Group Functions640126 -Node: Walking Arrays648210 -Node: Sample Programs649779 -Node: Running Examples650444 -Node: Clones651172 -Node: Cut Program652396 -Node: Egrep Program662241 -Ref: Egrep Program-Footnote-1670014 -Node: Id Program670124 -Node: Split Program673740 -Ref: Split Program-Footnote-1677259 -Node: Tee Program677387 -Node: Uniq Program680190 -Node: Wc Program687619 -Ref: Wc Program-Footnote-1691885 -Ref: Wc Program-Footnote-2692085 -Node: Miscellaneous Programs692177 -Node: Dupword Program693365 -Node: Alarm Program695396 -Node: Translate Program700145 -Ref: Translate Program-Footnote-1704532 -Ref: Translate Program-Footnote-2704760 -Node: Labels Program704894 -Ref: Labels Program-Footnote-1708265 -Node: Word Sorting708349 -Node: History Sorting712233 -Node: Extract Program714072 -Ref: Extract Program-Footnote-1721555 -Node: Simple Sed721683 -Node: Igawk Program724745 -Ref: Igawk Program-Footnote-1739778 -Ref: Igawk Program-Footnote-2739979 -Node: Anagram Program740117 -Node: Signature Program743185 -Node: Debugger744285 -Node: Debugging745196 -Node: Debugging Concepts745609 -Node: Debugging Terms747465 -Node: Awk Debugging750087 -Node: Sample dgawk session750979 -Node: dgawk invocation751471 -Node: Finding The Bug752653 -Node: List of Debugger Commands759139 -Node: Breakpoint Control760450 -Node: Dgawk Execution Control764086 -Node: Viewing And Changing Data767437 -Node: Dgawk Stack770774 -Node: Dgawk Info772234 -Node: Miscellaneous Dgawk Commands776182 -Node: Readline Support781610 -Node: Dgawk Limitations782448 -Node: Language History784637 -Node: V7/SVR3.1786075 -Node: SVR4788396 -Node: POSIX789838 -Node: BTL790846 -Node: POSIX/GNU791580 -Node: Common Extensions796681 -Node: Contributors797782 -Node: Installation801921 -Node: Gawk Distribution802815 -Node: Getting803299 -Node: Extracting804125 -Node: Distribution contents805817 -Node: Unix Installation811039 -Node: Quick Installation811656 -Node: Additional Configuration Options813618 -Node: Configuration Philosophy815095 -Node: Non-Unix Installation817437 -Node: PC Installation817895 -Node: PC Binary Installation819194 -Node: PC Compiling821042 -Node: PC Testing823986 -Node: PC Using825162 -Node: Cygwin829347 -Node: MSYS830347 -Node: VMS Installation830861 -Node: VMS Compilation831464 -Ref: VMS Compilation-Footnote-1832471 -Node: VMS Installation Details832529 -Node: VMS Running834164 -Node: VMS Old Gawk835771 -Node: Bugs836245 -Node: Other Versions840155 -Node: Notes845434 -Node: Compatibility Mode846126 -Node: Additions846909 -Node: Accessing The Source847721 -Node: Adding Code849146 -Node: New Ports855113 -Node: Dynamic Extensions859226 -Node: Internals860602 -Node: Plugin License869705 -Node: Sample Library870339 -Node: Internal File Description871025 -Node: Internal File Ops874740 -Ref: Internal File Ops-Footnote-1879521 -Node: Using Internal File Ops879661 -Node: Future Extensions882038 -Node: Basic Concepts884542 -Node: Basic High Level885299 -Ref: Basic High Level-Footnote-1889334 -Node: Basic Data Typing889519 -Node: Floating Point Issues894044 -Node: String Conversion Precision895127 -Ref: String Conversion Precision-Footnote-1896821 -Node: Unexpected Results896930 -Node: POSIX Floating Point Problems898756 -Ref: POSIX Floating Point Problems-Footnote-1902458 -Node: Glossary902496 -Node: Copying926639 -Node: GNU Free Documentation License964196 -Node: Index989333 +Node: Foreword33320 +Node: Preface37665 +Ref: Preface-Footnote-140632 +Ref: Preface-Footnote-240738 +Node: History40970 +Node: Names43361 +Ref: Names-Footnote-144838 +Node: This Manual44910 +Ref: This Manual-Footnote-149858 +Node: Conventions49958 +Node: Manual History52092 +Ref: Manual History-Footnote-155362 +Ref: Manual History-Footnote-255403 +Node: How To Contribute55477 +Node: Acknowledgments56621 +Node: Getting Started60952 +Node: Running gawk63331 +Node: One-shot64517 +Node: Read Terminal65742 +Ref: Read Terminal-Footnote-167392 +Ref: Read Terminal-Footnote-267668 +Node: Long67839 +Node: Executable Scripts69215 +Ref: Executable Scripts-Footnote-171084 +Ref: Executable Scripts-Footnote-271186 +Node: Comments71637 +Node: Quoting74104 +Node: DOS Quoting78727 +Node: Sample Data Files79402 +Node: Very Simple82434 +Node: Two Rules87033 +Node: More Complex89180 +Ref: More Complex-Footnote-192110 +Node: Statements/Lines92195 +Ref: Statements/Lines-Footnote-196657 +Node: Other Features96922 +Node: When97850 +Node: Invoking Gawk99997 +Node: Command Line101382 +Node: Options102165 +Ref: Options-Footnote-1115297 +Node: Other Arguments115322 +Node: Naming Standard Input117980 +Node: Environment Variables119074 +Node: AWKPATH Variable119518 +Ref: AWKPATH Variable-Footnote-1122115 +Node: Other Environment Variables122375 +Node: Exit Status124715 +Node: Include Files125390 +Node: Obsolete128875 +Node: Undocumented129561 +Node: Regexp129802 +Node: Regexp Usage131254 +Node: Escape Sequences133280 +Node: Regexp Operators139043 +Ref: Regexp Operators-Footnote-1146240 +Ref: Regexp Operators-Footnote-2146387 +Node: Bracket Expressions146485 +Ref: table-char-classes148288 +Node: GNU Regexp Operators150932 +Node: Case-sensitivity154655 +Ref: Case-sensitivity-Footnote-1157623 +Ref: Case-sensitivity-Footnote-2157858 +Node: Leftmost Longest157966 +Node: Computed Regexps159167 +Node: Locales162593 +Node: Reading Files166300 +Node: Records168241 +Ref: Records-Footnote-1176915 +Node: Fields176952 +Ref: Fields-Footnote-1179985 +Node: Nonconstant Fields180071 +Node: Changing Fields182273 +Node: Field Separators188251 +Node: Default Field Splitting190880 +Node: Regexp Field Splitting191997 +Node: Single Character Fields195339 +Node: Command Line Field Separator196398 +Node: Field Splitting Summary199839 +Ref: Field Splitting Summary-Footnote-1203031 +Node: Constant Size203132 +Node: Splitting By Content207716 +Ref: Splitting By Content-Footnote-1211442 +Node: Multiple Line211482 +Ref: Multiple Line-Footnote-1217329 +Node: Getline217508 +Node: Plain Getline219736 +Node: Getline/Variable221825 +Node: Getline/File222966 +Node: Getline/Variable/File224288 +Ref: Getline/Variable/File-Footnote-1225887 +Node: Getline/Pipe225974 +Node: Getline/Variable/Pipe228534 +Node: Getline/Coprocess229641 +Node: Getline/Variable/Coprocess230884 +Node: Getline Notes231598 +Node: Getline Summary233540 +Ref: table-getline-variants233883 +Node: Command line directories234739 +Node: Printing235364 +Node: Print236995 +Node: Print Examples238332 +Node: Output Separators241116 +Node: OFMT242876 +Node: Printf244234 +Node: Basic Printf245140 +Node: Control Letters246679 +Node: Format Modifiers250491 +Node: Printf Examples256500 +Node: Redirection259215 +Node: Special Files266199 +Node: Special FD266732 +Ref: Special FD-Footnote-1270356 +Node: Special Network270430 +Node: Special Caveats271280 +Node: Close Files And Pipes272076 +Ref: Close Files And Pipes-Footnote-1279099 +Ref: Close Files And Pipes-Footnote-2279247 +Node: Expressions279397 +Node: Values280466 +Node: Constants281142 +Node: Scalar Constants281822 +Ref: Scalar Constants-Footnote-1282681 +Node: Nondecimal-numbers282863 +Node: Regexp Constants285922 +Node: Using Constant Regexps286397 +Node: Variables289452 +Node: Using Variables290107 +Node: Assignment Options291831 +Node: Conversion293703 +Ref: table-locale-affects299079 +Ref: Conversion-Footnote-1299703 +Node: All Operators299812 +Node: Arithmetic Ops300442 +Node: Concatenation302947 +Ref: Concatenation-Footnote-1305740 +Node: Assignment Ops305860 +Ref: table-assign-ops310848 +Node: Increment Ops312256 +Node: Truth Values and Conditions315726 +Node: Truth Values316809 +Node: Typing and Comparison317858 +Node: Variable Typing318647 +Ref: Variable Typing-Footnote-1322544 +Node: Comparison Operators322666 +Ref: table-relational-ops323076 +Node: POSIX String Comparison326625 +Ref: POSIX String Comparison-Footnote-1327581 +Node: Boolean Ops327719 +Ref: Boolean Ops-Footnote-1331797 +Node: Conditional Exp331888 +Node: Function Calls333620 +Node: Precedence337214 +Node: Patterns and Actions340867 +Node: Pattern Overview341921 +Node: Regexp Patterns343587 +Node: Expression Patterns344130 +Node: Ranges347704 +Node: BEGIN/END350670 +Node: Using BEGIN/END351432 +Ref: Using BEGIN/END-Footnote-1354163 +Node: I/O And BEGIN/END354269 +Node: BEGINFILE/ENDFILE356551 +Node: Empty359382 +Node: Using Shell Variables359698 +Node: Action Overview361983 +Node: Statements364340 +Node: If Statement366194 +Node: While Statement367693 +Node: Do Statement369737 +Node: For Statement370893 +Node: Switch Statement374045 +Node: Break Statement376142 +Node: Continue Statement378132 +Node: Next Statement379919 +Node: Nextfile Statement382309 +Node: Exit Statement384606 +Node: Built-in Variables387022 +Node: User-modified388117 +Ref: User-modified-Footnote-1396143 +Node: Auto-set396205 +Ref: Auto-set-Footnote-1406918 +Node: ARGC and ARGV407123 +Node: Arrays410974 +Node: Array Basics412479 +Node: Array Intro413190 +Node: Reference to Elements417508 +Node: Assigning Elements419778 +Node: Array Example420269 +Node: Scanning an Array422001 +Node: Delete424667 +Ref: Delete-Footnote-1427102 +Node: Numeric Array Subscripts427159 +Node: Uninitialized Subscripts429342 +Node: Multi-dimensional430970 +Node: Multi-scanning434064 +Node: Arrays of Arrays435648 +Node: Functions440225 +Node: Built-in441047 +Node: Calling Built-in442125 +Node: Numeric Functions444113 +Ref: Numeric Functions-Footnote-1447878 +Ref: Numeric Functions-Footnote-2448235 +Ref: Numeric Functions-Footnote-3448283 +Node: String Functions448552 +Ref: String Functions-Footnote-1472049 +Ref: String Functions-Footnote-2472178 +Ref: String Functions-Footnote-3472426 +Node: Gory Details472513 +Ref: table-sub-escapes474192 +Ref: table-posix-sub475506 +Ref: table-gensub-escapes476419 +Node: I/O Functions477590 +Ref: I/O Functions-Footnote-1484245 +Node: Time Functions484392 +Ref: Time Functions-Footnote-1495284 +Ref: Time Functions-Footnote-2495352 +Ref: Time Functions-Footnote-3495510 +Ref: Time Functions-Footnote-4495621 +Ref: Time Functions-Footnote-5495733 +Ref: Time Functions-Footnote-6495960 +Node: Bitwise Functions496226 +Ref: table-bitwise-ops496784 +Ref: Bitwise Functions-Footnote-1500944 +Node: Type Functions501128 +Node: I18N Functions501598 +Node: User-defined503225 +Node: Definition Syntax504029 +Ref: Definition Syntax-Footnote-1508939 +Node: Function Example509008 +Node: Function Caveats511602 +Node: Calling A Function512023 +Node: Variable Scope513138 +Node: Pass By Value/Reference515113 +Node: Return Statement518553 +Node: Dynamic Typing521534 +Node: Indirect Calls522269 +Node: Internationalization531954 +Node: I18N and L10N533380 +Node: Explaining gettext534066 +Ref: Explaining gettext-Footnote-1539132 +Ref: Explaining gettext-Footnote-2539316 +Node: Programmer i18n539481 +Node: Translator i18n543681 +Node: String Extraction544474 +Ref: String Extraction-Footnote-1545435 +Node: Printf Ordering545521 +Ref: Printf Ordering-Footnote-1548305 +Node: I18N Portability548369 +Ref: I18N Portability-Footnote-1550818 +Node: I18N Example550881 +Ref: I18N Example-Footnote-1553516 +Node: Gawk I18N553588 +Node: Advanced Features554205 +Node: Nondecimal Data555718 +Node: Array Sorting557301 +Node: Controlling Array Traversal558001 +Node: Controlling Scanning With A Function558752 +Node: Controlling Scanning565232 +Node: Array Sorting Functions568860 +Ref: Array Sorting Functions-Footnote-1572854 +Node: Two-way I/O573048 +Ref: Two-way I/O-Footnote-1578480 +Node: TCP/IP Networking578550 +Node: Profiling581394 +Node: Library Functions588868 +Ref: Library Functions-Footnote-1591875 +Node: Library Names592046 +Ref: Library Names-Footnote-1595517 +Ref: Library Names-Footnote-2595737 +Node: General Functions595823 +Node: Strtonum Function596776 +Node: Assert Function599706 +Node: Round Function603032 +Node: Cliff Random Function604575 +Node: Ordinal Functions605591 +Ref: Ordinal Functions-Footnote-1608661 +Ref: Ordinal Functions-Footnote-2608913 +Node: Join Function609122 +Ref: Join Function-Footnote-1610893 +Node: Gettimeofday Function611093 +Node: Data File Management614808 +Node: Filetrans Function615440 +Node: Rewind Function619579 +Node: File Checking620966 +Node: Empty Files622060 +Node: Ignoring Assigns624290 +Node: Getopt Function625843 +Ref: Getopt Function-Footnote-1637147 +Node: Passwd Functions637350 +Ref: Passwd Functions-Footnote-1646325 +Node: Group Functions646413 +Node: Walking Arrays654497 +Node: Sample Programs656066 +Node: Running Examples656731 +Node: Clones657459 +Node: Cut Program658683 +Node: Egrep Program668528 +Ref: Egrep Program-Footnote-1676301 +Node: Id Program676411 +Node: Split Program680027 +Ref: Split Program-Footnote-1683546 +Node: Tee Program683674 +Node: Uniq Program686477 +Node: Wc Program693906 +Ref: Wc Program-Footnote-1698172 +Ref: Wc Program-Footnote-2698372 +Node: Miscellaneous Programs698464 +Node: Dupword Program699652 +Node: Alarm Program701683 +Node: Translate Program706432 +Ref: Translate Program-Footnote-1710819 +Ref: Translate Program-Footnote-2711047 +Node: Labels Program711181 +Ref: Labels Program-Footnote-1714552 +Node: Word Sorting714636 +Node: History Sorting718520 +Node: Extract Program720359 +Ref: Extract Program-Footnote-1727842 +Node: Simple Sed727970 +Node: Igawk Program731032 +Ref: Igawk Program-Footnote-1746065 +Ref: Igawk Program-Footnote-2746266 +Node: Anagram Program746404 +Node: Signature Program749472 +Node: Debugger750572 +Node: Debugging751483 +Node: Debugging Concepts751896 +Node: Debugging Terms753752 +Node: Awk Debugging756374 +Node: Sample dgawk session757266 +Node: dgawk invocation757758 +Node: Finding The Bug758940 +Node: List of Debugger Commands765426 +Node: Breakpoint Control766737 +Node: Dgawk Execution Control770373 +Node: Viewing And Changing Data773724 +Node: Dgawk Stack777061 +Node: Dgawk Info778521 +Node: Miscellaneous Dgawk Commands782469 +Node: Readline Support787897 +Node: Dgawk Limitations788735 +Node: Language History790924 +Node: V7/SVR3.1792362 +Node: SVR4794683 +Node: POSIX796125 +Node: BTL797133 +Node: POSIX/GNU797867 +Node: Common Extensions803018 +Node: Contributors804119 +Node: Installation808295 +Node: Gawk Distribution809189 +Node: Getting809673 +Node: Extracting810499 +Node: Distribution contents812191 +Node: Unix Installation817413 +Node: Quick Installation818030 +Node: Additional Configuration Options819992 +Node: Configuration Philosophy821469 +Node: Non-Unix Installation823811 +Node: PC Installation824269 +Node: PC Binary Installation825568 +Node: PC Compiling827416 +Node: PC Testing830360 +Node: PC Using831536 +Node: Cygwin835721 +Node: MSYS836721 +Node: VMS Installation837235 +Node: VMS Compilation837838 +Ref: VMS Compilation-Footnote-1838845 +Node: VMS Installation Details838903 +Node: VMS Running840538 +Node: VMS Old Gawk842145 +Node: Bugs842619 +Node: Other Versions846529 +Node: Notes851808 +Node: Compatibility Mode852500 +Node: Additions853283 +Node: Accessing The Source854095 +Node: Adding Code855520 +Node: New Ports861487 +Node: Dynamic Extensions865600 +Node: Internals866976 +Node: Plugin License876079 +Node: Sample Library876713 +Node: Internal File Description877399 +Node: Internal File Ops881114 +Ref: Internal File Ops-Footnote-1885895 +Node: Using Internal File Ops886035 +Node: Future Extensions888412 +Node: Basic Concepts890916 +Node: Basic High Level891673 +Ref: Basic High Level-Footnote-1895708 +Node: Basic Data Typing895893 +Node: Floating Point Issues900418 +Node: String Conversion Precision901501 +Ref: String Conversion Precision-Footnote-1903195 +Node: Unexpected Results903304 +Node: POSIX Floating Point Problems905130 +Ref: POSIX Floating Point Problems-Footnote-1908832 +Node: Glossary908870 +Node: Copying933013 +Node: GNU Free Documentation License970570 +Node: Index995707 End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index 60cfd1d7..49229d19 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -306,394 +306,437 @@ particular records in a file and perform operations upon them. * Index:: Concept and Variable Index. @detailmenu -* History:: The history of @command{gawk} and - @command{awk}. -* Names:: What name to use to find @command{awk}. -* This Manual:: Using this @value{DOCUMENT}. Includes - sample input files that you can use. -* Conventions:: Typographical Conventions. -* Manual History:: Brief history of the GNU project and this - @value{DOCUMENT}. -* How To Contribute:: Helping to save the world. -* Acknowledgments:: Acknowledgments. -* Running gawk:: How to run @command{gawk} programs; - includes command-line syntax. -* One-shot:: Running a short throwaway @command{awk} - program. -* Read Terminal:: Using no input files (input from terminal - instead). -* Long:: Putting permanent @command{awk} programs in - files. -* Executable Scripts:: Making self-contained @command{awk} - programs. -* Comments:: Adding documentation to @command{gawk} - programs. -* Quoting:: More discussion of shell quoting issues. -* DOS Quoting:: Quoting in Windows Batch Files. -* Sample Data Files:: Sample data files for use in the - @command{awk} programs illustrated in this - @value{DOCUMENT}. -* Very Simple:: A very simple example. -* Two Rules:: A less simple one-line example using two - rules. -* More Complex:: A more complex example. -* Statements/Lines:: Subdividing or combining statements into - lines. -* Other Features:: Other Features of @command{awk}. -* When:: When to use @command{gawk} and when to use - other things. -* Command Line:: How to run @command{awk}. -* Options:: Command-line options and their meanings. -* Other Arguments:: Input file names and variable assignments. -* Naming Standard Input:: How to specify standard input with other - files. -* Environment Variables:: The environment variables @command{gawk} - uses. -* AWKPATH Variable:: Searching directories for @command{awk} - programs. -* Other Environment Variables:: The environment variables. -* Exit Status:: @command{gawk}'s exit status. -* Include Files:: Including other files into your program. -* Obsolete:: Obsolete Options and/or features. -* Undocumented:: Undocumented Options and Features. -* Regexp Usage:: How to Use Regular Expressions. -* Escape Sequences:: How to write nonprinting characters. -* Regexp Operators:: Regular Expression Operators. -* Bracket Expressions:: What can go between @samp{[...]}. -* GNU Regexp Operators:: Operators specific to GNU software. -* Case-sensitivity:: How to do case-insensitive matching. -* Leftmost Longest:: How much text matches. -* Computed Regexps:: Using Dynamic Regexps. -* Locales:: How the locale affects things. -* Records:: Controlling how data is split into records. -* Fields:: An introduction to fields. -* Nonconstant Fields:: Nonconstant Field Numbers. -* Changing Fields:: Changing the Contents of a Field. -* Field Separators:: The field separator and how to change it. -* Default Field Splitting:: How fields are normally separated. -* Regexp Field Splitting:: Using regexps as the field separator. -* Single Character Fields:: Making each character a separate field. -* Command Line Field Separator:: Setting @code{FS} from the command-line. -* Field Splitting Summary:: Some final points and a summary table. -* Constant Size:: Reading constant width data. -* Splitting By Content:: Defining Fields By Content -* Multiple Line:: Reading multi-line records. -* Getline:: Reading files under explicit program - control using the @code{getline} function. -* Plain Getline:: Using @code{getline} with no arguments. -* Getline/Variable:: Using @code{getline} into a variable. -* Getline/File:: Using @code{getline} from a file. -* Getline/Variable/File:: Using @code{getline} into a variable from a - file. -* Getline/Pipe:: Using @code{getline} from a pipe. -* Getline/Variable/Pipe:: Using @code{getline} into a variable from a - pipe. -* Getline/Coprocess:: Using @code{getline} from a coprocess. -* Getline/Variable/Coprocess:: Using @code{getline} into a variable from a - coprocess. -* Getline Notes:: Important things to know about - @code{getline}. -* Getline Summary:: Summary of @code{getline} Variants. -* Command line directories:: What happens if you put a directory on the - command line. -* Print:: The @code{print} statement. -* Print Examples:: Simple examples of @code{print} statements. -* Output Separators:: The output separators and how to change - them. -* OFMT:: Controlling Numeric Output With - @code{print}. -* Printf:: The @code{printf} statement. -* Basic Printf:: Syntax of the @code{printf} statement. -* Control Letters:: Format-control letters. -* Format Modifiers:: Format-specification modifiers. -* Printf Examples:: Several examples. -* Redirection:: How to redirect output to multiple files - and pipes. -* Special Files:: File name interpretation in @command{gawk}. - @command{gawk} allows access to inherited - file descriptors. -* Special FD:: Special files for I/O. -* Special Network:: Special files for network communications. -* Special Caveats:: Things to watch out for. -* Close Files And Pipes:: Closing Input and Output Files and Pipes. -* Values:: Constants, Variables, and Regular - Expressions. -* Constants:: String, numeric and regexp constants. -* Scalar Constants:: Numeric and string constants. -* Nondecimal-numbers:: What are octal and hex numbers. -* Regexp Constants:: Regular Expression constants. -* Using Constant Regexps:: When and how to use a regexp constant. -* Variables:: Variables give names to values for later - use. -* Using Variables:: Using variables in your programs. -* Assignment Options:: Setting variables on the command-line and a - summary of command-line syntax. This is an - advanced method of input. -* Conversion:: The conversion of strings to numbers and - vice versa. -* All Operators:: @command{gawk}'s operators. -* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-}, - etc.) -* Concatenation:: Concatenating strings. -* Assignment Ops:: Changing the value of a variable or a - field. -* Increment Ops:: Incrementing the numeric value of a - variable. -* Truth Values and Conditions:: Testing for true and false. -* Truth Values:: What is ``true'' and what is ``false''. -* Typing and Comparison:: How variables acquire types and how this - affects comparison of numbers and strings - with @samp{<}, etc. -* Variable Typing:: String type versus numeric type. -* Comparison Operators:: The comparison operators. -* POSIX String Comparison:: String comparison with POSIX rules. -* Boolean Ops:: Combining comparison expressions using - boolean operators @samp{||} (``or''), - @samp{&&} (``and'') and @samp{!} (``not''). -* Conditional Exp:: Conditional expressions select between two - subexpressions under control of a third - subexpression. -* Function Calls:: A function call is an expression. -* Precedence:: How various operators nest. -* Pattern Overview:: What goes into a pattern. -* Regexp Patterns:: Using regexps as patterns. -* Expression Patterns:: Any expression can be used as a pattern. -* Ranges:: Pairs of patterns specify record ranges. -* BEGIN/END:: Specifying initialization and cleanup - rules. -* Using BEGIN/END:: How and why to use BEGIN/END rules. -* I/O And BEGIN/END:: I/O issues in BEGIN/END rules. -* Empty:: The empty pattern, which matches every - record. -* BEGINFILE/ENDFILE:: Two special patterns for advanced control. -* Using Shell Variables:: How to use shell variables with - @command{awk}. -* Action Overview:: What goes into an action. -* Statements:: Describes the various control statements in - detail. -* If Statement:: Conditionally execute some @command{awk} - statements. -* While Statement:: Loop until some condition is satisfied. -* Do Statement:: Do specified action while looping until - some condition is satisfied. -* For Statement:: Another looping statement, that provides - initialization and increment clauses. -* Switch Statement:: Switch/case evaluation for conditional - execution of statements based on a value. -* Break Statement:: Immediately exit the innermost enclosing - loop. -* Continue Statement:: Skip to the end of the innermost enclosing - loop. -* Next Statement:: Stop processing the current input record. -* Nextfile Statement:: Stop processing the current file. -* Exit Statement:: Stop execution of @command{awk}. -* Built-in Variables:: Summarizes the built-in variables. -* User-modified:: Built-in variables that you change to - control @command{awk}. -* Auto-set:: Built-in variables where @command{awk} - gives you information. -* ARGC and ARGV:: Ways to use @code{ARGC} and @code{ARGV}. -* Array Basics:: The basics of arrays. -* Array Intro:: Introduction to Arrays -* Reference to Elements:: How to examine one element of an array. -* Assigning Elements:: How to change an element of an array. -* Array Example:: Basic Example of an Array -* Scanning an Array:: A variation of the @code{for} statement. It - loops through the indices of an array's - existing elements. -* Controlling Scanning:: Controlling the order in which arrays - are scanned. -* Delete:: The @code{delete} statement removes an - element from an array. -* Numeric Array Subscripts:: How to use numbers as subscripts in - @command{awk}. -* Uninitialized Subscripts:: Using Uninitialized variables as - subscripts. -* Multi-dimensional:: Emulating multidimensional arrays in - @command{awk}. -* Multi-scanning:: Scanning multidimensional arrays. -* Array Sorting:: Sorting array values and indices. -* Arrays of Arrays:: True multidimensional arrays. -* Built-in:: Summarizes the built-in functions. -* Calling Built-in:: How to call built-in functions. -* Numeric Functions:: Functions that work with numbers, including - @code{int()}, @code{sin()} and - @code{rand()}. -* String Functions:: Functions for string manipulation, such as - @code{split()}, @code{match()} and - @code{sprintf()}. -* Gory Details:: More than you want to know about @samp{\} - and @samp{&} with @code{sub()}, - @code{gsub()}, and @code{gensub()}. -* I/O Functions:: Functions for files and shell commands. -* Time Functions:: Functions for dealing with timestamps. -* Bitwise Functions:: Functions for bitwise operations. -* Type Functions:: Functions for type information. -* I18N Functions:: Functions for string translation. -* User-defined:: Describes User-defined functions in detail. -* Definition Syntax:: How to write definitions and what they - mean. -* Function Example:: An example function definition and what it - does. -* Function Caveats:: Things to watch out for. -* Calling A Function:: Don't use spaces. -* Variable Scope:: Controlling variable scope. -* Pass By Value/Reference:: Passing parameters. -* Return Statement:: Specifying the value a function returns. -* Dynamic Typing:: How variable types can change at runtime. -* Indirect Calls:: Choosing the function to call at runtime. -* I18N and L10N:: Internationalization and Localization. -* Explaining gettext:: How GNU @code{gettext} works. -* Programmer i18n:: Features for the programmer. -* Translator i18n:: Features for the translator. -* String Extraction:: Extracting marked strings. -* Printf Ordering:: Rearranging @code{printf} arguments. -* I18N Portability:: @command{awk}-level portability issues. -* I18N Example:: A simple i18n example. -* Gawk I18N:: @command{gawk} is also internationalized. -* Nondecimal Data:: Allowing nondecimal input data. -* Two-way I/O:: Two-way communications with another - process. -* TCP/IP Networking:: Using @command{gawk} for network - programming. -* Profiling:: Profiling your @command{awk} programs. -* Library Names:: How to best name private global variables - in library functions. -* General Functions:: Functions that are of general use. -* Strtonum Function:: A replacement for the built-in - @code{strtonum()} function. -* Assert Function:: A function for assertions in @command{awk} - programs. -* Round Function:: A function for rounding if @code{sprintf()} - does not do it correctly. -* Cliff Random Function:: The Cliff Random Number Generator. -* Ordinal Functions:: Functions for using characters as numbers - and vice versa. -* Join Function:: A function to join an array into a string. -* Gettimeofday Function:: A function to get formatted times. -* Data File Management:: Functions for managing command-line data - files. -* Filetrans Function:: A function for handling data file - transitions. -* Rewind Function:: A function for rereading the current file. -* File Checking:: Checking that data files are readable. -* Empty Files:: Checking for zero-length files. -* Ignoring Assigns:: Treating assignments as file names. -* Getopt Function:: A function for processing command-line - arguments. -* Passwd Functions:: Functions for getting user information. -* Group Functions:: Functions for getting group information. -* Running Examples:: How to run these examples. -* Clones:: Clones of common utilities. -* Cut Program:: The @command{cut} utility. -* Egrep Program:: The @command{egrep} utility. -* Id Program:: The @command{id} utility. -* Split Program:: The @command{split} utility. -* Tee Program:: The @command{tee} utility. -* Uniq Program:: The @command{uniq} utility. -* Wc Program:: The @command{wc} utility. -* Miscellaneous Programs:: Some interesting @command{awk} programs. -* Dupword Program:: Finding duplicated words in a document. -* Alarm Program:: An alarm clock. -* Translate Program:: A program similar to the @command{tr} - utility. -* Labels Program:: Printing mailing labels. -* Word Sorting:: A program to produce a word usage count. -* History Sorting:: Eliminating duplicate entries from a - history file. -* Extract Program:: Pulling out programs from Texinfo source - files. -* Simple Sed:: A Simple Stream Editor. -* Igawk Program:: A wrapper for @command{awk} that includes - files. -* Anagram Program:: Finding anagrams from a dictionary. -* Signature Program:: People do amazing things with too much time - on their hands. -* Debugging:: Introduction to @command{dgawk}. -* Debugging Concepts:: Debugging In General. -* Debugging Terms:: Additional Debugging Concepts. -* Awk Debugging:: Awk Debugging. -* Sample dgawk session:: Sample @command{dgawk} session. -* dgawk invocation:: @command{dgawk} Invocation. -* Finding The Bug:: Finding The Bug. -* List of Debugger Commands:: Main @command{dgawk} Commands. -* Breakpoint Control:: Control of breakpoints. -* Dgawk Execution Control:: Control of execution. -* Viewing And Changing Data:: Viewing and changing data. -* Dgawk Stack:: Dealing with the stack. -* Dgawk Info:: Obtaining information about the program and - the debugger state. -* Miscellaneous Dgawk Commands:: Miscellaneous Commands. -* Readline Support:: Readline Support. -* Dgawk Limitations:: Limitations and future plans. -* V7/SVR3.1:: The major changes between V7 and System V - Release 3.1. -* SVR4:: Minor changes between System V Releases 3.1 - and 4. -* POSIX:: New features from the POSIX standard. -* BTL:: New features from Brian Kernighan's - version of @command{awk}. -* POSIX/GNU:: The extensions in @command{gawk} not in - POSIX @command{awk}. -* Contributors:: The major contributors to @command{gawk}. -* Common Extensions:: Common Extensions Summary. -* Gawk Distribution:: What is in the @command{gawk} distribution. -* Getting:: How to get the distribution. -* Extracting:: How to extract the distribution. -* Distribution contents:: What is in the distribution. -* Unix Installation:: Installing @command{gawk} under various - versions of Unix. -* Quick Installation:: Compiling @command{gawk} under Unix. -* Additional Configuration Options:: Other compile-time options. -* Configuration Philosophy:: How it's all supposed to work. -* Non-Unix Installation:: Installation on Other Operating Systems. -* PC Installation:: Installing and Compiling @command{gawk} on - MS-DOS and OS/2. -* PC Binary Installation:: Installing a prepared distribution. -* PC Compiling:: Compiling @command{gawk} for MS-DOS, - Windows32, and OS/2. -* PC Testing:: Testing @command{gawk} on PC - Operating Systems. -* PC Using:: Running @command{gawk} on MS-DOS, Windows32 - and OS/2. -* Cygwin:: Building and running @command{gawk} for - Cygwin. -* MSYS:: Using @command{gawk} In The MSYS - Environment. -* VMS Installation:: Installing @command{gawk} on VMS. -* VMS Compilation:: How to compile @command{gawk} under VMS. -* VMS Installation Details:: How to install @command{gawk} under VMS. -* VMS Running:: How to run @command{gawk} under VMS. -* VMS Old Gawk:: An old version comes with some VMS systems. -* Bugs:: Reporting Problems and Bugs. -* Other Versions:: Other freely available @command{awk} - implementations. -* Compatibility Mode:: How to disable certain @command{gawk} - extensions. -* Additions:: Making Additions To @command{gawk}. -* Accessing The Source:: Accessing the Git repository. -* Adding Code:: Adding code to the main body of - @command{gawk}. -* New Ports:: Porting @command{gawk} to a new operating - system. -* Dynamic Extensions:: Adding new built-in functions to - @command{gawk}. -* Internals:: A brief look at some @command{gawk} - internals. -* Plugin License:: A note about licensing. -* Sample Library:: A example of new functions. -* Internal File Description:: What the new functions will do. -* Internal File Ops:: The code for internal file operations. -* Using Internal File Ops:: How to use an external extension. -* Future Extensions:: New features that may be implemented one - day. -* Basic High Level:: The high level view. -* Basic Data Typing:: A very quick intro to data types. -* Floating Point Issues:: Stuff to know about floating-point numbers. -* String Conversion Precision:: The String Value Can Lie. -* Unexpected Results:: Floating Point Numbers Are Not Abstract - Numbers. -* POSIX Floating Point Problems:: Standards Versus Existing Practice. +* History:: The history of @command{gawk} and + @command{awk}. +* Names:: What name to use to find @command{awk}. +* This Manual:: Using this @value{DOCUMENT}. Includes + sample input files that you can use. +* Conventions:: Typographical Conventions. +* Manual History:: Brief history of the GNU project and + this @value{DOCUMENT}. +* How To Contribute:: Helping to save the world. +* Acknowledgments:: Acknowledgments. +* Running gawk:: How to run @command{gawk} programs; + includes command-line syntax. +* One-shot:: Running a short throwaway @command{awk} + program. +* Read Terminal:: Using no input files (input from + terminal instead). +* Long:: Putting permanent @command{awk} + programs in files. +* Executable Scripts:: Making self-contained @command{awk} + programs. +* Comments:: Adding documentation to @command{gawk} + programs. +* Quoting:: More discussion of shell quoting + issues. +* DOS Quoting:: Quoting in Windows Batch Files. +* Sample Data Files:: Sample data files for use in the + @command{awk} programs illustrated in + this @value{DOCUMENT}. +* Very Simple:: A very simple example. +* Two Rules:: A less simple one-line example using + two rules. +* More Complex:: A more complex example. +* Statements/Lines:: Subdividing or combining statements + into lines. +* Other Features:: Other Features of @command{awk}. +* When:: When to use @command{gawk} and when to + use other things. +* Command Line:: How to run @command{awk}. +* Options:: Command-line options and their + meanings. +* Other Arguments:: Input file names and variable + assignments. +* Naming Standard Input:: How to specify standard input with + other files. +* Environment Variables:: The environment variables + @command{gawk} uses. +* AWKPATH Variable:: Searching directories for @command{awk} + programs. +* Other Environment Variables:: The environment variables. +* Exit Status:: @command{gawk}'s exit status. +* Include Files:: Including other files into your + program. +* Obsolete:: Obsolete Options and/or features. +* Undocumented:: Undocumented Options and Features. +* Regexp Usage:: How to Use Regular Expressions. +* Escape Sequences:: How to write nonprinting characters. +* Regexp Operators:: Regular Expression Operators. +* Bracket Expressions:: What can go between @samp{[...]}. +* GNU Regexp Operators:: Operators specific to GNU software. +* Case-sensitivity:: How to do case-insensitive matching. +* Leftmost Longest:: How much text matches. +* Computed Regexps:: Using Dynamic Regexps. +* Locales:: How the locale affects things. +* Records:: Controlling how data is split into + records. +* Fields:: An introduction to fields. +* Nonconstant Fields:: Nonconstant Field Numbers. +* Changing Fields:: Changing the Contents of a Field. +* Field Separators:: The field separator and how to change + it. +* Default Field Splitting:: How fields are normally separated. +* Regexp Field Splitting:: Using regexps as the field separator. +* Single Character Fields:: Making each character a separate field. +* Command Line Field Separator:: Setting @code{FS} from the + command-line. +* Field Splitting Summary:: Some final points and a summary table. +* Constant Size:: Reading constant width data. +* Splitting By Content:: Defining Fields By Content +* Multiple Line:: Reading multi-line records. +* Getline:: Reading files under explicit program + control using the @code{getline} + function. +* Plain Getline:: Using @code{getline} with no arguments. +* Getline/Variable:: Using @code{getline} into a variable. +* Getline/File:: Using @code{getline} from a file. +* Getline/Variable/File:: Using @code{getline} into a variable + from a file. +* Getline/Pipe:: Using @code{getline} from a pipe. +* Getline/Variable/Pipe:: Using @code{getline} into a variable + from a pipe. +* Getline/Coprocess:: Using @code{getline} from a coprocess. +* Getline/Variable/Coprocess:: Using @code{getline} into a variable + from a coprocess. +* Getline Notes:: Important things to know about + @code{getline}. +* Getline Summary:: Summary of @code{getline} Variants. +* Command line directories:: What happens if you put a directory on + the command line. +* Print:: The @code{print} statement. +* Print Examples:: Simple examples of @code{print} + statements. +* Output Separators:: The output separators and how to change + them. +* OFMT:: Controlling Numeric Output With + @code{print}. +* Printf:: The @code{printf} statement. +* Basic Printf:: Syntax of the @code{printf} statement. +* Control Letters:: Format-control letters. +* Format Modifiers:: Format-specification modifiers. +* Printf Examples:: Several examples. +* Redirection:: How to redirect output to multiple + files and pipes. +* Special Files:: File name interpretation in + @command{gawk}. @command{gawk} allows + access to inherited file descriptors. +* Special FD:: Special files for I/O. +* Special Network:: Special files for network + communications. +* Special Caveats:: Things to watch out for. +* Close Files And Pipes:: Closing Input and Output Files and + Pipes. +* Values:: Constants, Variables, and Regular + Expressions. +* Constants:: String, numeric and regexp constants. +* Scalar Constants:: Numeric and string constants. +* Nondecimal-numbers:: What are octal and hex numbers. +* Regexp Constants:: Regular Expression constants. +* Using Constant Regexps:: When and how to use a regexp constant. +* Variables:: Variables give names to values for + later use. +* Using Variables:: Using variables in your programs. +* Assignment Options:: Setting variables on the command-line + and a summary of command-line syntax. + This is an advanced method of input. +* Conversion:: The conversion of strings to numbers + and vice versa. +* All Operators:: @command{gawk}'s operators. +* Arithmetic Ops:: Arithmetic operations (@samp{+}, + @samp{-}, etc.) +* Concatenation:: Concatenating strings. +* Assignment Ops:: Changing the value of a variable or a + field. +* Increment Ops:: Incrementing the numeric value of a + variable. +* Truth Values and Conditions:: Testing for true and false. +* Truth Values:: What is ``true'' and what is ``false''. +* Typing and Comparison:: How variables acquire types and how + this affects comparison of numbers and + strings with @samp{<}, etc. +* Variable Typing:: String type versus numeric type. +* Comparison Operators:: The comparison operators. +* POSIX String Comparison:: String comparison with POSIX rules. +* Boolean Ops:: Combining comparison expressions using + boolean operators @samp{||} (``or''), + @samp{&&} (``and'') and @samp{!} + (``not''). +* Conditional Exp:: Conditional expressions select between + two subexpressions under control of a + third subexpression. +* Function Calls:: A function call is an expression. +* Precedence:: How various operators nest. +* Pattern Overview:: What goes into a pattern. +* Regexp Patterns:: Using regexps as patterns. +* Expression Patterns:: Any expression can be used as a + pattern. +* Ranges:: Pairs of patterns specify record + ranges. +* BEGIN/END:: Specifying initialization and cleanup + rules. +* Using BEGIN/END:: How and why to use BEGIN/END rules. +* I/O And BEGIN/END:: I/O issues in BEGIN/END rules. +* BEGINFILE/ENDFILE:: Two special patterns for advanced + control. +* Empty:: The empty pattern, which matches every + record. +* Using Shell Variables:: How to use shell variables with + @command{awk}. +* Action Overview:: What goes into an action. +* Statements:: Describes the various control + statements in detail. +* If Statement:: Conditionally execute some + @command{awk} statements. +* While Statement:: Loop until some condition is satisfied. +* Do Statement:: Do specified action while looping until + some condition is satisfied. +* For Statement:: Another looping statement, that + provides initialization and increment + clauses. +* Switch Statement:: Switch/case evaluation for conditional + execution of statements based on a + value. +* Break Statement:: Immediately exit the innermost + enclosing loop. +* Continue Statement:: Skip to the end of the innermost + enclosing loop. +* Next Statement:: Stop processing the current input + record. +* Nextfile Statement:: Stop processing the current file. +* Exit Statement:: Stop execution of @command{awk}. +* Built-in Variables:: Summarizes the built-in variables. +* User-modified:: Built-in variables that you change to + control @command{awk}. +* Auto-set:: Built-in variables where @command{awk} + gives you information. +* ARGC and ARGV:: Ways to use @code{ARGC} and + @code{ARGV}. +* Array Basics:: The basics of arrays. +* Array Intro:: Introduction to Arrays +* Reference to Elements:: How to examine one element of an array. +* Assigning Elements:: How to change an element of an array. +* Array Example:: Basic Example of an Array +* Scanning an Array:: A variation of the @code{for} + statement. It loops through the indices + of an array's existing elements. +* Delete:: The @code{delete} statement removes an + element from an array. +* Numeric Array Subscripts:: How to use numbers as subscripts in + @command{awk}. +* Uninitialized Subscripts:: Using Uninitialized variables as + subscripts. +* Multi-dimensional:: Emulating multidimensional arrays in + @command{awk}. +* Multi-scanning:: Scanning multidimensional arrays. +* Arrays of Arrays:: True multidimensional arrays. +* Built-in:: Summarizes the built-in functions. +* Calling Built-in:: How to call built-in functions. +* Numeric Functions:: Functions that work with numbers, + including @code{int()}, @code{sin()} + and @code{rand()}. +* String Functions:: Functions for string manipulation, such + as @code{split()}, @code{match()} and + @code{sprintf()}. +* Gory Details:: More than you want to know about + @samp{\} and @samp{&} with + @code{sub()}, @code{gsub()}, and + @code{gensub()}. +* I/O Functions:: Functions for files and shell commands. +* Time Functions:: Functions for dealing with timestamps. +* Bitwise Functions:: Functions for bitwise operations. +* Type Functions:: Functions for type information. +* I18N Functions:: Functions for string translation. +* User-defined:: Describes User-defined functions in + detail. +* Definition Syntax:: How to write definitions and what they + mean. +* Function Example:: An example function definition and what + it does. +* Function Caveats:: Things to watch out for. +* Calling A Function:: Don't use spaces. +* Variable Scope:: Controlling variable scope. +* Pass By Value/Reference:: Passing parameters. +* Return Statement:: Specifying the value a function + returns. +* Dynamic Typing:: How variable types can change at + runtime. +* Indirect Calls:: Choosing the function to call at + runtime. +* I18N and L10N:: Internationalization and Localization. +* Explaining gettext:: How GNU @code{gettext} works. +* Programmer i18n:: Features for the programmer. +* Translator i18n:: Features for the translator. +* String Extraction:: Extracting marked strings. +* Printf Ordering:: Rearranging @code{printf} arguments. +* I18N Portability:: @command{awk}-level portability issues. +* I18N Example:: A simple i18n example. +* Gawk I18N:: @command{gawk} is also + internationalized. +* Nondecimal Data:: Allowing nondecimal input data. +* Array Sorting:: Facilities for controlling array + traversal and sorting arrays. +* Controlling Array Traversal:: How to use PROCINFO["sorted_in"]. +* Controlling Scanning With A Function:: Using a function to control scanning. +* Controlling Scanning:: Controlling the order in which arrays + are scanned. +* Array Sorting Functions:: How to use @code{asort()} and + @code{asorti()}. +* Two-way I/O:: Two-way communications with another + process. +* TCP/IP Networking:: Using @command{gawk} for network + programming. +* Profiling:: Profiling your @command{awk} programs. +* Library Names:: How to best name private global + variables in library functions. +* General Functions:: Functions that are of general use. +* Strtonum Function:: A replacement for the built-in + @code{strtonum()} function. +* Assert Function:: A function for assertions in + @command{awk} programs. +* Round Function:: A function for rounding if + @code{sprintf()} does not do it + correctly. +* Cliff Random Function:: The Cliff Random Number Generator. +* Ordinal Functions:: Functions for using characters as + numbers and vice versa. +* Join Function:: A function to join an array into a + string. +* Gettimeofday Function:: A function to get formatted times. +* Data File Management:: Functions for managing command-line + data files. +* Filetrans Function:: A function for handling data file + transitions. +* Rewind Function:: A function for rereading the current + file. +* File Checking:: Checking that data files are readable. +* Empty Files:: Checking for zero-length files. +* Ignoring Assigns:: Treating assignments as file names. +* Getopt Function:: A function for processing command-line + arguments. +* Passwd Functions:: Functions for getting user information. +* Group Functions:: Functions for getting group + information. +* Walking Arrays:: A function to walk arrays of arrays. +* Running Examples:: How to run these examples. +* Clones:: Clones of common utilities. +* Cut Program:: The @command{cut} utility. +* Egrep Program:: The @command{egrep} utility. +* Id Program:: The @command{id} utility. +* Split Program:: The @command{split} utility. +* Tee Program:: The @command{tee} utility. +* Uniq Program:: The @command{uniq} utility. +* Wc Program:: The @command{wc} utility. +* Miscellaneous Programs:: Some interesting @command{awk} + programs. +* Dupword Program:: Finding duplicated words in a document. +* Alarm Program:: An alarm clock. +* Translate Program:: A program similar to the @command{tr} + utility. +* Labels Program:: Printing mailing labels. +* Word Sorting:: A program to produce a word usage + count. +* History Sorting:: Eliminating duplicate entries from a + history file. +* Extract Program:: Pulling out programs from Texinfo + source files. +* Simple Sed:: A Simple Stream Editor. +* Igawk Program:: A wrapper for @command{awk} that + includes files. +* Anagram Program:: Finding anagrams from a dictionary. +* Signature Program:: People do amazing things with too much + time on their hands. +* Debugging:: Introduction to @command{dgawk}. +* Debugging Concepts:: Debugging In General. +* Debugging Terms:: Additional Debugging Concepts. +* Awk Debugging:: Awk Debugging. +* Sample dgawk session:: Sample @command{dgawk} session. +* dgawk invocation:: @command{dgawk} Invocation. +* Finding The Bug:: Finding The Bug. +* List of Debugger Commands:: Main @command{dgawk} Commands. +* Breakpoint Control:: Control of breakpoints. +* Dgawk Execution Control:: Control of execution. +* Viewing And Changing Data:: Viewing and changing data. +* Dgawk Stack:: Dealing with the stack. +* Dgawk Info:: Obtaining information about the program + and the debugger state. +* Miscellaneous Dgawk Commands:: Miscellaneous Commands. +* Readline Support:: Readline Support. +* Dgawk Limitations:: Limitations and future plans. +* V7/SVR3.1:: The major changes between V7 and System + V Release 3.1. +* SVR4:: Minor changes between System V Releases + 3.1 and 4. +* POSIX:: New features from the POSIX standard. +* BTL:: New features from Brian Kernighan's + version of @command{awk}. +* POSIX/GNU:: The extensions in @command{gawk} not in + POSIX @command{awk}. +* Common Extensions:: Common Extensions Summary. +* Contributors:: The major contributors to + @command{gawk}. +* Gawk Distribution:: What is in the @command{gawk} + distribution. +* Getting:: How to get the distribution. +* Extracting:: How to extract the distribution. +* Distribution contents:: What is in the distribution. +* Unix Installation:: Installing @command{gawk} under various + versions of Unix. +* Quick Installation:: Compiling @command{gawk} under Unix. +* Additional Configuration Options:: Other compile-time options. +* Configuration Philosophy:: How it's all supposed to work. +* Non-Unix Installation:: Installation on Other Operating + Systems. +* PC Installation:: Installing and Compiling @command{gawk} + on MS-DOS and OS/2. +* PC Binary Installation:: Installing a prepared distribution. +* PC Compiling:: Compiling @command{gawk} for MS-DOS, + Windows32, and OS/2. +* PC Testing:: Testing @command{gawk} on PC systems. +* PC Using:: Running @command{gawk} on MS-DOS, + Windows32 and OS/2. +* Cygwin:: Building and running @command{gawk} for + Cygwin. +* MSYS:: Using @command{gawk} In The MSYS + Environment. +* VMS Installation:: Installing @command{gawk} on VMS. +* VMS Compilation:: How to compile @command{gawk} under + VMS. +* VMS Installation Details:: How to install @command{gawk} under + VMS. +* VMS Running:: How to run @command{gawk} under VMS. +* VMS Old Gawk:: An old version comes with some VMS + systems. +* Bugs:: Reporting Problems and Bugs. +* Other Versions:: Other freely available @command{awk} + implementations. +* Compatibility Mode:: How to disable certain @command{gawk} + extensions. +* Additions:: Making Additions To @command{gawk}. +* Accessing The Source:: Accessing the Git repository. +* Adding Code:: Adding code to the main body of + @command{gawk}. +* New Ports:: Porting @command{gawk} to a new + operating system. +* Dynamic Extensions:: Adding new built-in functions to + @command{gawk}. +* Internals:: A brief look at some @command{gawk} + internals. +* Plugin License:: A note about licensing. +* Sample Library:: A example of new functions. +* Internal File Description:: What the new functions will do. +* Internal File Ops:: The code for internal file operations. +* Using Internal File Ops:: How to use an external extension. +* Future Extensions:: New features that may be implemented + one day. +* Basic High Level:: The high level view. +* Basic Data Typing:: A very quick intro to data types. +* Floating Point Issues:: Stuff to know about floating-point + numbers. +* String Conversion Precision:: The String Value Can Lie. +* Unexpected Results:: Floating Point Numbers Are Not Abstract + Numbers. +* POSIX Floating Point Problems:: Standards Versus Existing Practice. @end detailmenu @end menu @@ -13036,7 +13079,6 @@ same @command{awk} program. * Uninitialized Subscripts:: Using Uninitialized variables as subscripts. * Multi-dimensional:: Emulating multidimensional arrays in @command{awk}. -* Array Sorting:: Sorting array values and indices. * Arrays of Arrays:: True multidimensional arrays. @end menu @@ -13378,11 +13420,6 @@ END @{ @cindex elements in arrays, scanning @cindex arrays, scanning -@menu -* Controlling Scanning:: Controlling the order in which arrays are scanned. -* Controlling Scanning With A Function:: Using a function to control scanning. -@end menu - In programs that use arrays, it is often necessary to use a loop that executes once for each element of an array. In other languages, where arrays are contiguous and indices are limited to positive integers, @@ -13447,286 +13484,14 @@ the loop body; it is not predictable whether the @code{for} loop will reach them. Similarly, changing @var{var} inside the loop may produce strange results. It is best to avoid such things. -@node Controlling Scanning -@subsubsection Controlling Array Scanning Order - As an extension, @command{gawk} makes it possible for you to loop over the elements of an array in order, based on the value of @code{PROCINFO["sorted_in"]} (@pxref{Auto-set}). -Several sorting options are available: - -@table @samp -@item ascending index string -Order by indices compared as strings; this is the most basic sort. -(Internally, array indices are always strings, so with @samp{a[2*5] = 1} -the index is actually @code{"10"} rather than numeric 10.) - -@item ascending index number -Order by indices but force them to be treated as numbers in the process. -Any index with non-numeric value will end up positioned as if it were zero. - -@item ascending value string -Order by element values rather than by indices. Scalar values are -compared as strings. Subarrays, if present, come out last. - -@item ascending value number -Order by values but force scalar values to be treated as numbers -for the purpose of comparison. If there are subarrays, those appear -at the end of the sorted list. - -@item descending index string -Reverse order from the most basic sort. - -@item descending index number -Numeric indices ordered from high to low. - -@item descending value string -Element values, treated as strings, ordered from high to low. Subarrays, if present, -come out first. - -@item descending value number -Element values, treated as numbers, ordered from high to low. Subarrays, if present, -come out first. - -@item unsorted -Array elements are processed in arbitrary order, the normal @command{awk} -behavior. You can also get the normal behavior by just -deleting the @code{"sorted_in"} item from the @code{PROCINFO} array, if -it previously had a value assigned to it. -@end table - -The array traversal order is determined before the @code{for} loop -starts to run. Changing @code{PROCINFO["sorted_in"]} in the loop body -will not affect the loop. - -Portions of the sort specification string may be truncated or omitted. -The default is @samp{ascending} for direction, @samp{index} for sort key type, -and @samp{string} for comparison mode. This implies that one can -simply assign the empty string, "", instead of "ascending index string" to -@code{PROCINFO["sorted_in"]} for the same effect. - -For example: - -@example -$ @kbd{gawk 'BEGIN @{} -> @kbd{ a[4] = 4} -> @kbd{ a[3] = 3} -> @kbd{ for (i in a)} -> @kbd{ print i, a[i]} -> @kbd{@}'} -@print{} 4 4 -@print{} 3 3 -$ @kbd{gawk 'BEGIN @{} -> @kbd{ PROCINFO["sorted_in"] = "asc index"} -> @kbd{ a[4] = 4} -> @kbd{ a[3] = 3} -> @kbd{ for (i in a)} -> @kbd{ print i, a[i]} -> @kbd{@}'} -@print{} 3 3 -@print{} 4 4 -@end example - -When sorting an array by element values, if a value happens to be -a subarray then it is considered to be greater than any string or -numeric value, regardless of what the subarray itself contains, -and all subarrays are treated as being equal to each other. Their -order relative to each other is determined by their index strings. +This is an advanced feature, so discussion of it is delayed +until @ref{Controlling Array Traversal}. -@node Controlling Scanning With A Function -@subsubsection Controlling Array Scanning Order With a User-defined Function - -The value of @code{PROCINFO["sorted_in"]} can also be a function name. -This lets you traverse an array based on any custom criterion. -The array elements are ordered according to the return value of this -function. This comparison function should be defined with at least -four arguments: - -@example -function comp_func(i1, v1, i2, v2) -@{ - @var{compare elements 1 and 2 in some fashion} - @var{return < 0; 0; or > 0} -@} -@end example - -Here, @var{i1} and @var{i2} are the indices, and @var{v1} and @var{v2} -are the corresponding values of the two elements being compared. -Either @var{v1} or @var{v2}, or both, can be arrays if the array being -traversed contains subarrays as values. The three possible return values -are interpreted this way: - -@itemize @bullet -@item -If the return value of @code{comp_func(i1, v1, i2, v2)} is less than zero, -index @var{i1} comes before index @var{i2} during loop traversal. - -@item -If @code{comp_func(i1, v1, i2, v2)} returns zero, @var{i1} and @var{i2} -come together but the relative order with respect to each other is undefined. - -@item -If the return value of @code{comp_func(i1, v1, i2, v2)} is greater than zero, -@var{i1} comes after @var{i2}. -@end itemize - -The following comparison function can be used to scan an array in -numerical order of the indices: - -@example -function cmp_num_idx(i1, v1, i2, v2) -@{ - # numerical index comparison, ascending order - return (i1 - i2) -@} -@end example - -This function traverses an array based on an order by element values -rather than by indices: - -@example -function cmp_str_val(i1, v1, i2, v2) -@{ - # string value comparison, ascending order - v1 = v1 "" - v2 = v2 "" - if (v1 < v2) - return -1 - return (v1 != v2) -@} -@end example - -Here is a -comparison function to make all numbers, and numeric strings without -any leading or trailing spaces, come out first during loop traversal: - -@example -function cmp_num_str_val(i1, v1, i2, v2, n1, n2) -@{ - # numbers before string value comparison, ascending order - n1 = v1 + 0 - n2 = v2 + 0 - if (n1 == v1) - return (n2 == v2) ? (n1 - n2) : -1 - else if (n2 == v2) - return 1 - return (v1 < v2) ? -1 : (v1 != v2) -@} -@end example - -Consider sorting the entries of a GNU/Linux system password file -according to login names. The following program which sorts records -by a specific field position can be used for this purpose: - -@example -# sort.awk --- simple program to sort by field position -# field position is specified by the global variable POS - -function cmp_field(i1, v1, i2, v2) -@{ - # comparison by value, as string, and ascending order - return v1[POS] < v2[POS] ? -1 : (v1[POS] != v2[POS]) -@} - -@{ - for (i = 1; i <= NF; i++) - a[NR][i] = $i -@} - -END @{ - PROCINFO["sorted_in"] = "cmp_field" - if (POS < 1 || POS > NF) - POS = 1 - for (i in a) @{ - for (j = 1; j <= NF; j++) - printf("%s%c", a[i][j], j < NF ? ":" : "") - print "" - @} -@} -@end example - -The first field in each entry of the password file is the user's login name, -and the fields are seperated by colons. Running the program produces the -following output: - -@example -$ @kbd{gawk -vPOS=1 -F: -f sort.awk /etc/passwd} -@print{} adm:x:3:4:adm:/var/adm:/sbin/nologin -@print{} apache:x:48:48:Apache:/var/www:/sbin/nologin -@print{} avahi:x:70:70:Avahi daemon:/:/sbin/nologin -@dots{} -@end example - -The comparison normally should always return the same value when given a -specific pair of array elements as its arguments. If inconsistent -results are returned then the order is undefined. This behavior is -sometimes exploited to introduce random order in otherwise seemingly -ordered data: - -@example -function cmp_randomize(i1, v1, i2, v2) -@{ - # random order - return (2 - 4 * rand()) -@} -@end example - -As mentioned above, the order of the indices is arbitrary if two -elements compare equal. This is usually not a problem, but letting -the tied elements come out in arbitrary order can be an issue, especially -when comparing item values. The partial ordering of the equal elements -may change during the next loop traversal, if other elements are added or -removed from the array. One way to resolve ties when comparing elements -with otherwise equal values is to include the indices in the comparison -rules. Note that doing this may make the loop traversal less efficient, -so consider it only if necessary. The following comparison functions -force a deterministic order, and are based on the fact that the -indices of two elements are never equal: - -@example -function cmp_numeric(i1, v1, i2, v2) -@{ - # numerical value (and index) comparison, descending order - return (v1 != v2) ? (v2 - v1) : (i2 - i1) -@} - -function cmp_string(i1, v1, i2, v2) -@{ - # string value (and index) comparison, descending order - v1 = v1 i1 - v2 = v2 i2 - return (v1 > v2) ? -1 : (v1 != v2) -@} -@end example - -@c Avoid using the term ``stable'' when describing the unpredictable behavior -@c if two items compare equal. Usually, the goal of a "stable algorithm" -@c is to maintain the original order of the items, which is a meaningless -@c concept for a list constructed from a hash. - -A custom comparison function can often simplify ordered loop -traversal, and the the sky is really the limit when it comes to -designing such a function. - -When string comparisons are made during a sort, either for element -values where one or both aren't numbers, or for element indices -handled as strings, the value of @code{IGNORECASE} -(@pxref{Built-in Variables}) controls whether -the comparisons treat corresponding uppercase and lowercase letters as -equivalent or distinct. - -All sorting based on @code{PROCINFO["sorted_in"]} -is disabled in POSIX mode, -since the @code{PROCINFO} array is not special in that case. - -As a side note, sorting the array indices before traversing -the array has been reported to add 15% to 20% overhead to the -execution time of @command{awk} programs. For this reason, -sorted array traversal is not the default. - -@c The @command{gawk} -@c maintainers believe that only the people who wish to use a -@c feature should have to pay for it. +In addition, @command{gawk} provides built-in functions for +sorting arrays; see @ref{Array Sorting Functions}. @node Delete @section The @code{delete} Statement @@ -14107,124 +13872,6 @@ The result is to set @code{separate[1]} to @code{"1"} and @code{separate[2]} to @code{"foo"}. Presto! The original sequence of separate indices is recovered. -@node Array Sorting -@section Sorting Array Values and Indices with @command{gawk} - -@cindex arrays, sorting -@cindex @code{asort()} function (@command{gawk}) -@cindex @code{asort()} function (@command{gawk}), arrays@comma{} sorting -@cindex sort function, arrays, sorting -The order in which an array is scanned with a @samp{for (i in array)} -loop is essentially arbitrary. -In most @command{awk} implementations, sorting an array requires -writing a @code{sort} function. -While this can be educational for exploring different sorting algorithms, -usually that's not the point of the program. -@command{gawk} provides the built-in @code{asort()} -and @code{asorti()} functions -(@pxref{String Functions}) -for sorting arrays. For example: - -@example -@var{populate the array} data -n = asort(data) -for (i = 1; i <= n; i++) - @var{do something with} data[i] -@end example - -After the call to @code{asort()}, the array @code{data} is indexed from 1 -to some number @var{n}, the total number of elements in @code{data}. -(This count is @code{asort()}'s return value.) -@code{data[1]} @value{LEQ} @code{data[2]} @value{LEQ} @code{data[3]}, and so on. -The array elements are compared as strings. - -@cindex side effects, @code{asort()} function -An important side effect of calling @code{asort()} is that -@emph{the array's original indices are irrevocably lost}. -As this isn't always desirable, @code{asort()} accepts a -second argument: - -@example -@var{populate the array} source -n = asort(source, dest) -for (i = 1; i <= n; i++) - @var{do something with} dest[i] -@end example - -In this case, @command{gawk} copies the @code{source} array into the -@code{dest} array and then sorts @code{dest}, destroying its indices. -However, the @code{source} array is not affected. - -@code{asort()} and @code{asorti()} accept a third string argument -to control the comparison rule for the array elements, and the direction -of the sorted results. The valid comparison modes are @samp{string} and @samp{number}, -and the direction can be either @samp{ascending} or @samp{descending}. -Either mode or direction, or both, can be omitted in which -case the defaults, @samp{string} or @samp{ascending} is assumed -for the comparison mode and the direction, respectively. Seperate comparison -mode from direction with a single space, and they can appear in any -order. To compare the elements as numbers, and to reverse the elements -of the @code{dest} array, the call to asort in the above example can be -replaced with: - -@example -asort(source, dest, "descending number") -@end example - -The third argument to @code{asort()} can also be a user-defined -function name which is used to order the array elements before -constructing the result array. -@xref{Scanning an Array}, for more information. - - -Often, what's needed is to sort on the values of the @emph{indices} -instead of the values of the elements. -To do that, use the -@code{asorti()} function. The interface is identical to that of -@code{asort()}, except that the index values are used for sorting, and -become the values of the result array: - -@example -@{ source[$0] = some_func($0) @} - -END @{ - n = asorti(source, dest) - for (i = 1; i <= n; i++) @{ - @ii{Work with sorted indices directly:} - @var{do something with} dest[i] - @dots{} - @ii{Access original array via sorted indices:} - @var{do something with} source[dest[i]] - @} -@} -@end example - -Sorting the array by replacing the indices provides maximal flexibility. -To traverse the elements in decreasing order, use a loop that goes from -@var{n} down to 1, either over the elements or over the indices. This -is an alternative to specifying @samp{descending} for the sorting order -using the optional third argument. - -@cindex reference counting, sorting arrays -Copying array indices and elements isn't expensive in terms of memory. -Internally, @command{gawk} maintains @dfn{reference counts} to data. -For example, when @code{asort()} copies the first array to the second one, -there is only one copy of the original array elements' data, even though -both arrays use the values. - -@c Document It And Call It A Feature. Sigh. -@cindex @command{gawk}, @code{IGNORECASE} variable in -@cindex @code{IGNORECASE} variable -@cindex arrays, sorting, @code{IGNORECASE} variable and -@cindex @code{IGNORECASE} variable, array sorting and -Because @code{IGNORECASE} affects string comparisons, the value -of @code{IGNORECASE} also affects sorting for both @code{asort()} and @code{asorti()}. -Note also that the locale's sorting order does @emph{not} -come into play; comparisons are based on character values only.@footnote{This -is true because locale-based comparison occurs only when in POSIX -compatibility mode, and since @code{asort()} and @code{asorti()} are -@command{gawk} extensions, they are not available in that case.} -Caveat Emptor. @node Arrays of Arrays @section Arrays of Arrays @@ -14667,7 +14314,7 @@ order specification. The value of @code{IGNORECASE} affects the sorting. The third argument can also be a user-defined function name in which case the value returned by the function is used to order the array elements before constructing the result array. -@xref{Scanning an Array}, for more information. +@xref{Array Sorting Functions}, for more information. For example, if the contents of @code{a} are as follows: @@ -14701,7 +14348,7 @@ asort(a, a, "descending") @end example The @code{asort()} function is described in more detail in -@ref{Array Sorting}. +@ref{Array Sorting Functions}. @code{asort()} is a @command{gawk} extension; it is not available in compatibility mode (@pxref{Options}). @@ -14713,7 +14360,7 @@ are sorted, instead of the values. (Here too, @code{IGNORECASE} affects the sorting.) The @code{asorti()} function is described in more detail in -@ref{Array Sorting}. +@ref{Array Sorting Functions}. @code{asorti()} is a @command{gawk} extension; it is not available in compatibility mode (@pxref{Options}). @@ -18474,7 +18121,9 @@ It's a bit of a ``grab bag'' of items that are otherwise unrelated to each other. First, a command-line option allows @command{gawk} to recognize nondecimal numbers in input data, not just in @command{awk} -programs. Next, two-way I/O, discussed briefly in earlier parts of this +programs. +Then, @command{gawk}'s special features for sorting arrays are presented. +Next, two-way I/O, discussed briefly in earlier parts of this @value{DOCUMENT}, is described in full detail, along with the basics of TCP/IP networking. Finally, @command{gawk} can @dfn{profile} an @command{awk} program, making it possible to tune @@ -18487,6 +18136,8 @@ its description is relegated to an appendix. @menu * Nondecimal Data:: Allowing nondecimal input data. +* Array Sorting:: Facilities for controlling array traversal and + sorting arrays. * Two-way I/O:: Two-way communications with another process. * TCP/IP Networking:: Using @command{gawk} for network programming. * Profiling:: Profiling your @command{awk} programs. @@ -18549,6 +18200,473 @@ This makes your programs easier to write and easier to read, and leads to less surprising results. @end quotation +@node Array Sorting +@section Controlling Array Traversal and Array Sorting + +@command{gawk} lets you control the order in which @samp{for (i in array)} loops +will traverse an array. + +In addition, two built-in functions, @code{asort()} and @code{asorti()}, +let you sort arrays based on the array values and indices, respectively. +These two functions also provide control over the sorting criteria used +to order the elements during sorting. + +@menu +* Controlling Array Traversal:: How to use PROCINFO["sorted_in"]. +* Array Sorting Functions:: How to use @code{asort()} and @code{asorti()}. +@end menu + +@node Controlling Array Traversal +@subsection Controlling Array Traversal + +By default, the order in which a @samp{for (i in array)} loop +will scan an array is not defined; it is generally based upon +the internal implementation of arrays inside @command{awk}. + +Often, though, it is desirable to be able to loop over the elements +in a particular order that you, the programmer, choose. @command{gawk} +lets you do this; this @value{SUBSECTION} describes how. + +@menu +* Controlling Scanning With A Function:: Using a function to control scanning. +* Controlling Scanning:: Controlling the order in which arrays + are scanned. +@end menu + +@node Controlling Scanning With A Function +@subsubsection Controlling Array Scanning Order With a User-defined Function + +The value of @code{PROCINFO["sorted_in"]} can be a function name. +This lets you traverse an array based on any custom criterion. +The array elements are ordered according to the return value of this +function. This comparison function should be defined with at least +four arguments: + +@example +function comp_func(i1, v1, i2, v2) +@{ + @var{compare elements 1 and 2 in some fashion} + @var{return < 0; 0; or > 0} +@} +@end example + +Here, @var{i1} and @var{i2} are the indices, and @var{v1} and @var{v2} +are the corresponding values of the two elements being compared. +Either @var{v1} or @var{v2}, or both, can be arrays if the array being +traversed contains subarrays as values. The three possible return values +are interpreted this way: + +@itemize @bullet +@item +If the return value of @code{comp_func(i1, v1, i2, v2)} is less than zero, +index @var{i1} comes before index @var{i2} during loop traversal. + +@item +If @code{comp_func(i1, v1, i2, v2)} returns zero, @var{i1} and @var{i2} +come together but the relative order with respect to each other is undefined. + +@item +If the return value of @code{comp_func(i1, v1, i2, v2)} is greater than zero, +@var{i1} comes after @var{i2}. +@end itemize + +The following comparison function can be used to scan an array in +numerical order of the indices: + +@example +function cmp_num_idx(i1, v1, i2, v2) +@{ + # numerical index comparison, ascending order + return (i1 - i2) +@} +@end example + +This function traverses an array based on the string order of the element values +rather than by indices: + +@example +function cmp_str_val(i1, v1, i2, v2) +@{ + # string value comparison, ascending order + v1 = v1 "" + v2 = v2 "" + if (v1 < v2) + return -1 + return (v1 != v2) +@} +@end example + +Here is a +comparison function to make all numbers, and numeric strings without +any leading or trailing spaces, come out first during loop traversal: + +@example +function cmp_num_str_val(i1, v1, i2, v2, n1, n2) +@{ + # numbers before string value comparison, ascending order + n1 = v1 + 0 + n2 = v2 + 0 + if (n1 == v1) + return (n2 == v2) ? (n1 - n2) : -1 + else if (n2 == v2) + return 1 + return (v1 < v2) ? -1 : (v1 != v2) +@} +@end example + +@strong{FIXME}: Put in a fuller example here of some data +and show the different results when traversing. + +Consider sorting the entries of a GNU/Linux system password file +according to login names. The following program which sorts records +by a specific field position can be used for this purpose: + +@example +# sort.awk --- simple program to sort by field position +# field position is specified by the global variable POS + +function cmp_field(i1, v1, i2, v2) +@{ + # comparison by value, as string, and ascending order + return v1[POS] < v2[POS] ? -1 : (v1[POS] != v2[POS]) +@} + +@{ + for (i = 1; i <= NF; i++) + a[NR][i] = $i +@} + +END @{ + PROCINFO["sorted_in"] = "cmp_field" + if (POS < 1 || POS > NF) + POS = 1 + for (i in a) @{ + for (j = 1; j <= NF; j++) + printf("%s%c", a[i][j], j < NF ? ":" : "") + print "" + @} +@} +@end example + +The first field in each entry of the password file is the user's login name, +and the fields are seperated by colons. +Each record defines a subarray, which each field as an element in the subarray. +Running the program produces the +following output: + +@example +$ @kbd{gawk -vPOS=1 -F: -f sort.awk /etc/passwd} +@print{} adm:x:3:4:adm:/var/adm:/sbin/nologin +@print{} apache:x:48:48:Apache:/var/www:/sbin/nologin +@print{} avahi:x:70:70:Avahi daemon:/:/sbin/nologin +@dots{} +@end example + +The comparison normally should always return the same value when given a +specific pair of array elements as its arguments. If inconsistent +results are returned then the order is undefined. This behavior is +sometimes exploited to introduce random order in otherwise seemingly +ordered data: + +@example +function cmp_randomize(i1, v1, i2, v2) +@{ + # random order + return (2 - 4 * rand()) +@} +@end example + +As mentioned above, the order of the indices is arbitrary if two +elements compare equal. This is usually not a problem, but letting +the tied elements come out in arbitrary order can be an issue, especially +when comparing item values. The partial ordering of the equal elements +may change during the next loop traversal, if other elements are added or +removed from the array. One way to resolve ties when comparing elements +with otherwise equal values is to include the indices in the comparison +rules. Note that doing this may make the loop traversal less efficient, +so consider it only if necessary. The following comparison functions +force a deterministic order, and are based on the fact that the +indices of two elements are never equal: + +@example +function cmp_numeric(i1, v1, i2, v2) +@{ + # numerical value (and index) comparison, descending order + return (v1 != v2) ? (v2 - v1) : (i2 - i1) +@} + +function cmp_string(i1, v1, i2, v2) +@{ + # string value (and index) comparison, descending order + v1 = v1 i1 + v2 = v2 i2 + return (v1 > v2) ? -1 : (v1 != v2) +@} +@end example + +@c Avoid using the term ``stable'' when describing the unpredictable behavior +@c if two items compare equal. Usually, the goal of a "stable algorithm" +@c is to maintain the original order of the items, which is a meaningless +@c concept for a list constructed from a hash. + +A custom comparison function can often simplify ordered loop +traversal, and the the sky is really the limit when it comes to +designing such a function. + +When string comparisons are made during a sort, either for element +values where one or both aren't numbers, or for element indices +handled as strings, the value of @code{IGNORECASE} +(@pxref{Built-in Variables}) controls whether +the comparisons treat corresponding uppercase and lowercase letters as +equivalent or distinct. + +Another point to keep in mind is that in the case of subarrays +the element values can themselves be arrays; a production comparison +function should use the @code{isarray()} function +(@pxref{Type Functions}), +to check for this, and choose a defined sorting order for subarrays. + +All sorting based on @code{PROCINFO["sorted_in"]} +is disabled in POSIX mode, +since the @code{PROCINFO} array is not special in that case. + +As a side note, sorting the array indices before traversing +the array has been reported to add 15% to 20% overhead to the +execution time of @command{awk} programs. For this reason, +sorted array traversal is not the default. + +@c The @command{gawk} +@c maintainers believe that only the people who wish to use a +@c feature should have to pay for it. + +@node Controlling Scanning +@subsubsection Controlling Array Scanning Order + +As described in +@iftex +the previous subsubsection, +@end iftex +@ref{Controlling Scanning With A Function}, +@ifnottex +@end ifnottex +you can provide the name of a function as the value of +@code{PROCINFO["sorted_in"]} to specify custom sorting criteria. + +Often, though, you may wish to do something simple, such as +``sort based on comparing the indices in ascending order,'' +or ``sort based on comparing the values in descending order.'' +Having to write a simple comparison function for this purpose +for use in all of your programs becomes tedious. +For the most likely simple cases @command{gawk} provides +the option of supplying special names that do the requested +sorting for you. +You can think of them as ``predefined'' sorting functions, +if you like, although the names purposely include characters +that are not valid in real @command{awk} function names. + +The following special values are available: + +@table @code +@item "@@ind_str_asc" +Order by indices compared as strings; this is the most basic sort. +(Internally, array indices are always strings, so with @samp{a[2*5] = 1} +the index is actually @code{"10"} rather than numeric 10.) + +@item "@@ind_num_asc" +Order by indices but force them to be treated as numbers in the process. +Any index with non-numeric value will end up positioned as if it were zero. + +@item "@@val_type_asc" +Order by element values rather than indices. +Ordering is by the type assigned to the element +(@pxref{Typing and Comparison}). +All numeric values come before all string values, +which in turn come before all subarrays. + +@item "@@val_str_asc" +Order by element values rather than by indices. Scalar values are +compared as strings. Subarrays, if present, come out last. + +@item "@@val_num_asc" +Order by values but force scalar values to be treated as numbers +for the purpose of comparison. If there are subarrays, those appear +at the end of the sorted list. + +@item "@@ind_str_desc" +Reverse order from the most basic sort. + +@item "@@ind_num_desc" +Numeric indices ordered from high to low. + +@item "@@val_type_desc" +Element values, based on type, in descending order. + +@item "@@val_str_desc" +Element values, treated as strings, ordered from high to low. Subarrays, if present, +come out first. + +@item "@@val_num_desc" +Element values, treated as numbers, ordered from high to low. Subarrays, if present, +come out first. + +@item "@@unsorted" +Array elements are processed in arbitrary order, which is the normal @command{awk} +behavior. You can also get the normal behavior by just +deleting the @code{"sorted_in"} element from the @code{PROCINFO} array, if +it previously had a value assigned to it. +@end table + +The array traversal order is determined before the @code{for} loop +starts to run. Changing @code{PROCINFO["sorted_in"]} in the loop body +will not affect the loop. + +For example: + +@example +$ @kbd{gawk 'BEGIN @{} +> @kbd{ a[4] = 4} +> @kbd{ a[3] = 3} +> @kbd{ for (i in a)} +> @kbd{ print i, a[i]} +> @kbd{@}'} +@print{} 4 4 +@print{} 3 3 +$ @kbd{gawk 'BEGIN @{} +> @kbd{ PROCINFO["sorted_in"] = "@@str_ind_asc"} +> @kbd{ a[4] = 4} +> @kbd{ a[3] = 3} +> @kbd{ for (i in a)} +> @kbd{ print i, a[i]} +> @kbd{@}'} +@print{} 3 3 +@print{} 4 4 +@end example + +When sorting an array by element values, if a value happens to be +a subarray then it is considered to be greater than any string or +numeric value, regardless of what the subarray itself contains, +and all subarrays are treated as being equal to each other. Their +order relative to each other is determined by their index strings. + +@node Array Sorting Functions +@subsection Sorting Array Values and Indices with @command{gawk} + +@cindex arrays, sorting +@cindex @code{asort()} function (@command{gawk}) +@cindex @code{asort()} function (@command{gawk}), arrays@comma{} sorting +@cindex sort function, arrays, sorting +The order in which an array is scanned with a @samp{for (i in array)} +loop is essentially arbitrary. +In most @command{awk} implementations, sorting an array requires +writing a @code{sort} function. +While this can be educational for exploring different sorting algorithms, +usually that's not the point of the program. +@command{gawk} provides the built-in @code{asort()} +and @code{asorti()} functions +(@pxref{String Functions}) +for sorting arrays. For example: + +@example +@var{populate the array} data +n = asort(data) +for (i = 1; i <= n; i++) + @var{do something with} data[i] +@end example + +After the call to @code{asort()}, the array @code{data} is indexed from 1 +to some number @var{n}, the total number of elements in @code{data}. +(This count is @code{asort()}'s return value.) +@code{data[1]} @value{LEQ} @code{data[2]} @value{LEQ} @code{data[3]}, and so on. +The array elements are compared as strings. + +@cindex side effects, @code{asort()} function +An important side effect of calling @code{asort()} is that +@emph{the array's original indices are irrevocably lost}. +As this isn't always desirable, @code{asort()} accepts a +second argument: + +@example +@var{populate the array} source +n = asort(source, dest) +for (i = 1; i <= n; i++) + @var{do something with} dest[i] +@end example + +In this case, @command{gawk} copies the @code{source} array into the +@code{dest} array and then sorts @code{dest}, destroying its indices. +However, the @code{source} array is not affected. + +@code{asort()} and @code{asorti()} accept a third string argument +to control the comparison rule for the array elements, and the direction +of the sorted results. The valid comparison modes are @samp{string} and @samp{number}, +and the direction can be either @samp{ascending} or @samp{descending}. +Either mode or direction, or both, can be omitted in which +case the defaults, @samp{string} or @samp{ascending} is assumed +for the comparison mode and the direction, respectively. Seperate comparison +mode from direction with a single space, and they can appear in any +order. To compare the elements as numbers, and to reverse the elements +of the @code{dest} array, the call to asort in the above example can be +replaced with: + +@example +asort(source, dest, "descending number") +@end example + +The third argument to @code{asort()} can also be a user-defined +function name which is used to order the array elements before +constructing the result array. +@xref{Scanning an Array}, for more information. + + +Often, what's needed is to sort on the values of the @emph{indices} +instead of the values of the elements. +To do that, use the +@code{asorti()} function. The interface is identical to that of +@code{asort()}, except that the index values are used for sorting, and +become the values of the result array: + +@example +@{ source[$0] = some_func($0) @} + +END @{ + n = asorti(source, dest) + for (i = 1; i <= n; i++) @{ + @ii{Work with sorted indices directly:} + @var{do something with} dest[i] + @dots{} + @ii{Access original array via sorted indices:} + @var{do something with} source[dest[i]] + @} +@} +@end example + +Sorting the array by replacing the indices provides maximal flexibility. +To traverse the elements in decreasing order, use a loop that goes from +@var{n} down to 1, either over the elements or over the indices. This +is an alternative to specifying @samp{descending} for the sorting order +using the optional third argument. + +@cindex reference counting, sorting arrays +Copying array indices and elements isn't expensive in terms of memory. +Internally, @command{gawk} maintains @dfn{reference counts} to data. +For example, when @code{asort()} copies the first array to the second one, +there is only one copy of the original array elements' data, even though +both arrays use the values. + +@c Document It And Call It A Feature. Sigh. +@cindex @command{gawk}, @code{IGNORECASE} variable in +@cindex @code{IGNORECASE} variable +@cindex arrays, sorting, @code{IGNORECASE} variable and +@cindex @code{IGNORECASE} variable, array sorting and +Because @code{IGNORECASE} affects string comparisons, the value +of @code{IGNORECASE} also affects sorting for both @code{asort()} and @code{asorti()}. +Note also that the locale's sorting order does @emph{not} +come into play; comparisons are based on character values only.@footnote{This +is true because locale-based comparison occurs only when in POSIX +compatibility mode, and since @code{asort()} and @code{asorti()} are +@command{gawk} extensions, they are not available in that case.} +Caveat Emptor. + @node Two-way I/O @section Two-Way Communications with Another Process @cindex Brennan, Michael @@ -26252,8 +26370,8 @@ of the @value{DOCUMENT} where you can find more information. * SVR4:: Minor changes between System V Releases 3.1 and 4. * POSIX:: New features from the POSIX standard. -* BTL:: New features from Brian Kernighan's - version of @command{awk}. +* BTL:: New features from Brian Kernighan's version of + @command{awk}. * POSIX/GNU:: The extensions in @command{gawk} not in POSIX @command{awk}. * Common Extensions:: Common Extensions Summary. @@ -26762,6 +26880,9 @@ SunOS 3.x, Sun 386 (Road Runner) @item Tandem (non-POSIX) +@item +Prestandard VAX C compiler for VAX/VMS + @end itemize @end itemize @@ -26887,6 +27008,7 @@ provided the initial port to OS/2 and its documentation. @cindex Jaegermann, Michal Michal Jaegermann provided the port to Atari systems and its documentation. +(This port is no longer supported.) He continues to provide portability checking with DEC Alpha systems, and has done a lot of work to make sure @command{gawk} works on non-32-bit systems. @@ -2182,6 +2182,7 @@ post: NODE *array, *sort_str; size_t num_elems = 0; static NODE *sorted_in = NULL; + const char *how_to_sort = "@unsorted"; /* get the array */ array = POP_ARRAY(); @@ -2200,7 +2201,13 @@ post: if (PROCINFO_node != NULL) sort_str = in_array(PROCINFO_node, sorted_in); - list = assoc_list(array, sort_str, SORTED_IN); + if (sort_str != NULL) { + sort_str = force_string(sort_str); + if (sort_str->stlen > 0) + how_to_sort = sort_str->stptr; + } + + list = assoc_list(array, how_to_sort, SORTED_IN); list[num_elems] = array; /* actual array for use in * lint warning in Op_arrayfor_incr diff --git a/test/ChangeLog b/test/ChangeLog index b3e99c2c..03c93594 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,10 @@ +Wed May 4 23:37:27 2011 Arnold D. Robbins <arnold@skeeve.com> + + Revise tests for array sorting. + + * arraysort.awk, arraysort.ok, sort1.awk, sort1.ok, + sortfor.awk: Revised. + Wed May 4 23:07:39 2011 Arnold D. Robbins <arnold@skeeve.com> * nastyparm.awk, nastyparm.ok: New files from John Haque. diff --git a/test/arraysort.awk b/test/arraysort.awk index 09922044..b053a1d9 100644 --- a/test/arraysort.awk +++ b/test/arraysort.awk @@ -18,7 +18,7 @@ BEGIN { BEGIN { print "--- test2 ---" a[100] = a[1] = a["x"] = a["y"] = 1 - PROCINFO["sorted_in"] = "num" + PROCINFO["sorted_in"] = "@ind_num_asc" for (i in a) print i, a[i] delete a @@ -27,7 +27,7 @@ BEGIN { BEGIN { print "--- test3 ---" a[100] = a[1] = a["x"] = 1 - PROCINFO["sorted_in"] = "num" + PROCINFO["sorted_in"] = "@ind_num_asc" for (i in a) print i, a[i] delete a @@ -36,7 +36,7 @@ BEGIN { BEGIN { print "--- test4 ---" a[0] = a[100] = a[1] = a["x"] = 1 - PROCINFO["sorted_in"] = "num" + PROCINFO["sorted_in"] = "@ind_num_asc" for (i in a) print i, a[i] delete a @@ -45,7 +45,7 @@ BEGIN { BEGIN { print "--- test5 ---" a[""] = a["y"] = a[0] = 1 - PROCINFO["sorted_in"] = "num" + PROCINFO["sorted_in"] = "@ind_num_asc" for (i in a) print i, a[i] delete a @@ -54,7 +54,7 @@ BEGIN { BEGIN { print "--- test6 ---" a[2] = a[1] = a[4] = a["3 "] = 1 - PROCINFO["sorted_in"] = "num" + PROCINFO["sorted_in"] = "@ind_num_asc" for (i in a) print "\""i"\"" delete a @@ -67,24 +67,24 @@ BEGIN { for (i = 1; i <= n; i++) b[a[i]] = a[i] print "--unsorted--" - PROCINFO["sorted_in"] = "unsorted" + PROCINFO["sorted_in"] = "@unsorted" for (i in b) print "|"i"|"b[i]"|" print "--asc ind str--" - PROCINFO["sorted_in"] = "asc ind str" + PROCINFO["sorted_in"] = "@ind_str_asc" for (i in b) print "|"i"|"b[i]"|" print "--asc val str--" - PROCINFO["sorted_in"] = "asc val str" + PROCINFO["sorted_in"] = "@val_str_asc" for (i in b) print "|"i"|"b[i]"|" print "--asc ind num--" - PROCINFO["sorted_in"] = "asc ind num" + PROCINFO["sorted_in"] = "@ind_num_asc" for (i in b) print "|"i"|"b[i]"|" print "--asc val num--" - PROCINFO["sorted_in"] = "asc val num" + PROCINFO["sorted_in"] = "@val_num_asc" for (i in b) print "|"i"|"b[i]"|" } diff --git a/test/arraysort.ok b/test/arraysort.ok index 70af0e01..454badf0 100644 --- a/test/arraysort.ok +++ b/test/arraysort.ok @@ -1,7 +1,7 @@ --- test1 --- + 5 5 10 10 3D 3D - 5 5 --- test2 --- x 1 y 1 @@ -71,13 +71,13 @@ y 1 |4|4| |5|5| --asc val num-- -|0|0| |D3|D3| +|0|0| |1|1| |2|2| +|3D|3D| | 3| 3| |3|3| -|3D|3D| -| 4 | 4 | |4|4| +| 4 | 4 | |5|5| diff --git a/test/sort1.awk b/test/sort1.awk index 44af59ff..ef28e9cd 100644 --- a/test/sort1.awk +++ b/test/sort1.awk @@ -16,38 +16,52 @@ BEGIN{ printf("---end asort(a, a), IGNORECASE = %d---\n", IGNORECASE) makea(a) - SORT_STR = "num" + SORT_STR = "@ind_num_asc" asort2(a, "") - printf("---end asort(a, b, \"num\"), IGNORECASE = %d---\n", IGNORECASE) + printf("---end asort(a, b, \"%s\"), IGNORECASE = %d---\n", + SORT_STR, IGNORECASE) makea(a) - SORT_STR = "desc str" + SORT_STR = "@ind_str_desc" asort1(a, "") - printf("---end asort(a, a, \"desc str\"), IGNORECASE = %d---\n", IGNORECASE) + printf("---end asort(a, a, \"%s\"), IGNORECASE = %d---\n", + SORT_STR, IGNORECASE) makea(a) - SORT_STR = "val str" + SORT_STR = "@val_str_asc" proc_sort(a, "") - printf("---end PROCINFO[\"sorted_in\"] = \"val str\", IGNORECASE = %d---\n", - IGNORECASE) + printf("---end PROCINFO[\"sorted_in\"] = \"%s\", IGNORECASE = %d---\n", + SORT_STR, IGNORECASE) makea(a) - SORT_STR = "val num" + SORT_STR = "@val_num_asc" proc_sort(a, "") - printf("---end PROCINFO[\"sorted_in\"] = \"val num\", IGNORECASE = %d---\n", - IGNORECASE) + printf("---end PROCINFO[\"sorted_in\"] = \"%s\", IGNORECASE = %d---\n", + SORT_STR, IGNORECASE) makea(a) - SORT_STR = "desc val str" + SORT_STR = "@val_str_desc" proc_sort(a, "") - printf("---end PROCINFO[\"sorted_in\"] = \"desc val str\", IGNORECASE = %d---\n", - IGNORECASE) + printf("---end PROCINFO[\"sorted_in\"] = \"%s\", IGNORECASE = %d---\n", + SORT_STR, IGNORECASE) makea(a) - SORT_STR = "desc val num" + SORT_STR = "@val_num_desc" proc_sort(a, "") - printf("---end PROCINFO[\"sorted_in\"] = \"desc val num\", IGNORECASE = %d---\n", - IGNORECASE) + printf("---end PROCINFO[\"sorted_in\"] = \"%s\", IGNORECASE = %d---\n", + SORT_STR, IGNORECASE) + + makea(a) + SORT_STR = "@val_type_asc" + proc_sort(a, "") + printf("---end PROCINFO[\"sorted_in\"] = \"%s\", IGNORECASE = %d---\n", + SORT_STR, IGNORECASE) + + makea(a) + SORT_STR = "@val_type_desc" + proc_sort(a, "") + printf("---end PROCINFO[\"sorted_in\"] = \"%s\", IGNORECASE = %d---\n", + SORT_STR, IGNORECASE) } } diff --git a/test/sort1.ok b/test/sort1.ok index 8ba2bcd2..2d43e31a 100644 --- a/test/sort1.ok +++ b/test/sort1.ok @@ -1,5 +1,5 @@ - [1]: 1234 - [2]: 234 + [1]: 234 + [2]: 1234 [3]: Zebra [4]: barz [5]: blattt @@ -8,8 +8,8 @@ [6][3]: tttalb [6][4]: zrab ---end asort(a), IGNORECASE = 0--- - [1]: barz 1234 - [2]: blattt 234 + [1]: barz 234 + [2]: blattt 1234 [3]: Zebra Zebra [4]: 1234 barz [5]: 234 blattt @@ -18,8 +18,8 @@ [6][3]: tttalb tttalb [6][4]: zrab zrab ---end asort(a, b), IGNORECASE = 0--- - [1]: 1234 - [2]: 234 + [1]: 234 + [2]: 1234 [3]: Zebra [4]: barz [5]: blattt @@ -28,26 +28,26 @@ [6][3]: tttalb [6][4]: zrab ---end asort(a, a), IGNORECASE = 0--- - [1]: barz Zebra - [2]: blattt barz - [3]: Zebra blattt - [4]: 1234 234 - [5]: 234 1234 - [6][1]: 4321 arbeZ - [6][2]: arbeZ tttalb - [6][3]: tttalb zrab - [6][4]: zrab 4321 ----end asort(a, b, "num"), IGNORECASE = 0--- + [1]: barz barz + [2]: blattt blattt + [3]: Zebra Zebra + [4]: 1234 1234 + [5]: 234 234 + [6][1]: 4321 4321 + [6][2]: arbeZ arbeZ + [6][3]: tttalb tttalb + [6][4]: zrab zrab +---end asort(a, b, "@ind_num_asc"), IGNORECASE = 0--- [1][1]: zrab [1][2]: tttalb [1][3]: arbeZ [1][4]: 4321 - [2]: blattt - [3]: barz + [2]: 234 + [3]: 1234 [4]: Zebra - [5]: 234 - [6]: 1234 ----end asort(a, a, "desc str"), IGNORECASE = 0--- + [5]: blattt + [6]: barz +---end asort(a, a, "@ind_str_desc"), IGNORECASE = 0--- [4]: 1234 [5]: 234 [3]: Zebra @@ -57,17 +57,17 @@ [6][2]: arbeZ [6][3]: tttalb [6][4]: zrab ----end PROCINFO["sorted_in"] = "val str", IGNORECASE = 0--- - [3]: Zebra +---end PROCINFO["sorted_in"] = "@val_str_asc", IGNORECASE = 0--- [1]: barz [2]: blattt + [3]: Zebra [5]: 234 [4]: 1234 + [6][4]: zrab [6][2]: arbeZ [6][3]: tttalb - [6][4]: zrab [6][1]: 4321 ----end PROCINFO["sorted_in"] = "val num", IGNORECASE = 0--- +---end PROCINFO["sorted_in"] = "@val_num_asc", IGNORECASE = 0--- [6][4]: zrab [6][3]: tttalb [6][2]: arbeZ @@ -77,19 +77,39 @@ [3]: Zebra [5]: 234 [4]: 1234 ----end PROCINFO["sorted_in"] = "desc val str", IGNORECASE = 0--- +---end PROCINFO["sorted_in"] = "@val_str_desc", IGNORECASE = 0--- [6][1]: 4321 [6][4]: zrab - [6][3]: tttalb [6][2]: arbeZ + [6][3]: tttalb [4]: 1234 [5]: 234 + [1]: barz + [2]: blattt + [3]: Zebra +---end PROCINFO["sorted_in"] = "@val_num_desc", IGNORECASE = 0--- + [5]: 234 + [4]: 1234 + [3]: Zebra + [1]: barz + [2]: blattt + [6][1]: 4321 + [6][2]: arbeZ + [6][3]: tttalb + [6][4]: zrab +---end PROCINFO["sorted_in"] = "@val_type_asc", IGNORECASE = 0--- + [6][4]: zrab + [6][3]: tttalb + [6][2]: arbeZ + [6][1]: 4321 [2]: blattt [1]: barz [3]: Zebra ----end PROCINFO["sorted_in"] = "desc val num", IGNORECASE = 0--- - [1]: 1234 - [2]: 234 + [4]: 1234 + [5]: 234 +---end PROCINFO["sorted_in"] = "@val_type_desc", IGNORECASE = 0--- + [1]: 234 + [2]: 1234 [3]: barz [4]: blattt [5]: Zebra @@ -98,8 +118,8 @@ [6][3]: tttalb [6][4]: zrab ---end asort(a), IGNORECASE = 1--- - [1]: barz 1234 - [2]: blattt 234 + [1]: barz 234 + [2]: blattt 1234 [3]: Zebra barz [4]: 1234 blattt [5]: 234 Zebra @@ -108,8 +128,8 @@ [6][3]: tttalb tttalb [6][4]: zrab zrab ---end asort(a, b), IGNORECASE = 1--- - [1]: 1234 - [2]: 234 + [1]: 234 + [2]: 1234 [3]: barz [4]: blattt [5]: Zebra @@ -121,23 +141,23 @@ [1]: barz barz [2]: blattt blattt [3]: Zebra Zebra - [4]: 1234 234 - [5]: 234 1234 - [6][1]: 4321 arbeZ - [6][2]: arbeZ tttalb - [6][3]: tttalb zrab - [6][4]: zrab 4321 ----end asort(a, b, "num"), IGNORECASE = 1--- + [4]: 1234 1234 + [5]: 234 234 + [6][1]: 4321 4321 + [6][2]: arbeZ arbeZ + [6][3]: tttalb tttalb + [6][4]: zrab zrab +---end asort(a, b, "@ind_num_asc"), IGNORECASE = 1--- [1][1]: zrab [1][2]: tttalb [1][3]: arbeZ [1][4]: 4321 - [2]: Zebra - [3]: blattt - [4]: barz - [5]: 234 - [6]: 1234 ----end asort(a, a, "desc str"), IGNORECASE = 1--- + [2]: 234 + [3]: 1234 + [4]: Zebra + [5]: blattt + [6]: barz +---end asort(a, a, "@ind_str_desc"), IGNORECASE = 1--- [4]: 1234 [5]: 234 [1]: barz @@ -147,17 +167,17 @@ [6][2]: arbeZ [6][3]: tttalb [6][4]: zrab ----end PROCINFO["sorted_in"] = "val str", IGNORECASE = 1--- +---end PROCINFO["sorted_in"] = "@val_str_asc", IGNORECASE = 1--- [1]: barz [2]: blattt [3]: Zebra [5]: 234 [4]: 1234 + [6][4]: zrab [6][2]: arbeZ [6][3]: tttalb - [6][4]: zrab [6][1]: 4321 ----end PROCINFO["sorted_in"] = "val num", IGNORECASE = 1--- +---end PROCINFO["sorted_in"] = "@val_num_asc", IGNORECASE = 1--- [6][4]: zrab [6][3]: tttalb [6][2]: arbeZ @@ -167,14 +187,34 @@ [1]: barz [5]: 234 [4]: 1234 ----end PROCINFO["sorted_in"] = "desc val str", IGNORECASE = 1--- +---end PROCINFO["sorted_in"] = "@val_str_desc", IGNORECASE = 1--- [6][1]: 4321 [6][4]: zrab - [6][3]: tttalb [6][2]: arbeZ + [6][3]: tttalb [4]: 1234 [5]: 234 + [1]: barz + [2]: blattt + [3]: Zebra +---end PROCINFO["sorted_in"] = "@val_num_desc", IGNORECASE = 1--- + [5]: 234 + [4]: 1234 + [1]: barz + [2]: blattt + [3]: Zebra + [6][1]: 4321 + [6][2]: arbeZ + [6][3]: tttalb + [6][4]: zrab +---end PROCINFO["sorted_in"] = "@val_type_asc", IGNORECASE = 1--- + [6][4]: zrab + [6][3]: tttalb + [6][2]: arbeZ + [6][1]: 4321 [3]: Zebra [2]: blattt [1]: barz ----end PROCINFO["sorted_in"] = "desc val num", IGNORECASE = 1--- + [4]: 1234 + [5]: 234 +---end PROCINFO["sorted_in"] = "@val_type_desc", IGNORECASE = 1--- diff --git a/test/sortfor.awk b/test/sortfor.awk index 611eca64..922b5e85 100644 --- a/test/sortfor.awk +++ b/test/sortfor.awk @@ -1,9 +1,9 @@ { a[$0]++ } END { - PROCINFO["sorted_in"] = "ascending" + PROCINFO["sorted_in"] = "@ind_str_asc" for (i in a) print i - PROCINFO["sorted_in"] = "descending" + PROCINFO["sorted_in"] = "@ind_str_desc" for (i in a) print i } |