aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog18
-rw-r--r--array.c545
-rw-r--r--awk.h7
-rw-r--r--doc/ChangeLog10
-rw-r--r--doc/gawk.119
-rw-r--r--doc/gawk.info794
-rw-r--r--doc/gawk.texi104
-rw-r--r--eval.c260
8 files changed, 1114 insertions, 643 deletions
diff --git a/ChangeLog b/ChangeLog
index 75010a7b..ee69ed53 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+Tue Mar 29 20:45:49 2011 Pat Rankin <rankin@patechdata.com>
+
+ Move the code to support sorting `for (index in array)' from
+ eval.c to array.c, and implement several additional orderings.
+
+ * array.c (comp_func, sorted_in, sort_ignorecase,
+ sort_up_index_ignrcase, sort_down_index_ignrcase): Move from eval.c.
+ (sort_up_index_string, sort_down_index_string): Move from eval.c
+ and rename from *_str to *_string.
+ (sort_selection, sort_match, sort_maybe_numeric_index,
+ sort_up_index_number, sort_down_index_number,
+ sort_up_value, sort_down_value): New routines.
+ * eval.c (sort_&c): Move to array.c.
+ (r_interpret: case Op_arrayfor_init): Call sort_maybe_numeric_index
+ before and after qsort.
+ * awk.h (qsort_compfunc): New typedef.
+ (sorted_in, sort_maybe_numeric_index): Declare.
+
Fri Mar 25 13:15:36 2011 Arnold D. Robbins <arnold@skeeve.com>
* awk.h: Move libsigsegv portability checks to here from main.c.
diff --git a/array.c b/array.c
index b821b5e4..0b03397c 100644
--- a/array.c
+++ b/array.c
@@ -54,6 +54,24 @@ static unsigned long awk_hash(const char *s, size_t len, unsigned long hsize, si
unsigned long (*hash)(const char *s, size_t len, unsigned long hsize, size_t *code) = awk_hash;
+/* used by sort_maybe_numeric_index() and sort_up_index_number() */
+struct sort_num { AWKNUM sort_numbr; NODE *sort_index; };
+
+/* sorting support code */
+static qsort_compfunc sort_selection(NODE *, const char *, int);
+static size_t sort_match(const char *, size_t, const char *);
+static int sort_ignorecase(const char *, size_t, const char *, size_t);
+
+/* qsort callback routines */
+static int sort_up_index_string(const void *, const void *);
+static int sort_down_index_string(const void *, const void *);
+static int sort_up_index_ignrcase(const void *, const void *);
+static int sort_down_index_ignrcase(const void *, const void *);
+static int sort_up_index_number(const void *, const void *);
+static int sort_down_index_number(const void *, const void *);
+static int sort_up_value(const void *, const void *);
+static int sort_down_value(const void *, const void *);
+
/* array_init --- possibly temporary function for experimentation purposes */
void
@@ -1273,6 +1291,533 @@ do_asorti(int nargs)
return asort_actual(nargs, INDEX);
}
+
+/* comp_func --- array index comparison function for qsort, used in debug.c */
+
+int
+comp_func(const void *p1, const void *p2)
+{
+ return sort_up_index_string(p1, p2);
+}
+
+/* sort_ignorecase --- case insensitive string comparison from cmp_nodes() */
+
+static int
+sort_ignorecase(const char *s1, size_t l1, const char *s2, size_t l2)
+{
+ size_t l;
+ int ret;
+
+ l = (l1 < l2) ? l1 : l2;
+#ifdef MBS_SUPPORT
+ if (gawk_mb_cur_max > 1) {
+ ret = strncasecmpmbs((const unsigned char *) s1,
+ (const unsigned char *) s2, l);
+ } else
+#endif
+ for (ret = 0; l-- > 0 && ret == 0; s1++, s2++)
+ ret = casetable[*(unsigned char *) s1]
+ - casetable[*(unsigned char *) s2];
+ if (ret == 0 && l1 != l2)
+ ret = (l1 < l2) ? -1 : 1;
+ return ret;
+}
+
+/*
+ * sort_up_index_string --- qsort comparison function; ascending index strings;
+ * index strings are distinct within an array, so no comparisons will yield
+ * equal and warrant disambiguation
+ */
+
+static int
+sort_up_index_string(const void *p1, const void *p2)
+{
+ const NODE *t1, *t2;
+ const char *str1, *str2;
+ size_t len1, len2;
+ int ret;
+
+ /* Array indexes are strings and distinct, never equal */
+ t1 = *((const NODE *const *) p1);
+ t2 = *((const NODE *const *) p2);
+
+ str1 = t1->ahname_str;
+ len1 = t1->ahname_len;
+ str2 = t2->ahname_str;
+ len2 = t2->ahname_len;
+
+ ret = memcmp(str1, str2, (len1 < len2) ? len1 : len2);
+ /*
+ * if they compared equal but their lengths differ, the
+ * shorter one is a prefix of the longer and compares as less
+ */
+ if (ret == 0 && len1 != len2)
+ ret = (len1 < len2) ? -1 : 1;
+
+ /* indices are unique within an array, so result should never be 0 */
+ assert(ret != 0);
+ return ret;
+}
+
+/* sort_down_index_string --- descending index strings */
+
+static int
+sort_down_index_string(const void *p1, const void *p2)
+{
+ /*
+ * Negation versus transposed arguments: when all keys are
+ * distinct, as with array indices here, either method will
+ * transform an ascending sort into a descending one. But if
+ * there are equal keys--such as when IGNORECASE is honored--
+ * that get disambiguated into a determisitc order, negation
+ * will reverse those but transposed arguments would retain
+ * their relative order within the rest of the reversed sort.
+ */
+ return -sort_up_index_string(p1, p2);
+}
+
+/*
+ * sort_up_index_ignrcase --- ascending index string, case insensitive;
+ * case insensitive comparison can cause distinct index strings to compare
+ * equal, so disambiguation in necessary
+ */
+
+static int
+sort_up_index_ignrcase(const void *p1, const void *p2)
+{
+ const NODE *t1, *t2;
+ int ret;
+
+ t1 = *((const NODE *const *) p1);
+ t2 = *((const NODE *const *) p2);
+
+ ret = sort_ignorecase(t1->ahname_str, t1->ahname_len,
+ t2->ahname_str, t2->ahname_len);
+
+ /*
+ * if case insensitive result is "they're the same",
+ * use case sensitive comparison to force distinct order
+ */
+ if (ret == 0)
+ ret = sort_up_index_string(p1, p2);
+ return ret;
+}
+
+/* sort_down_index_ignrcase --- descending index strings, case insensitive */
+
+static int
+sort_down_index_ignrcase(const void *p1, const void *p2)
+{
+ return -sort_up_index_ignrcase(p1, p2);
+}
+
+/*
+ * sort_up_index_number --- qsort comparison function; ascending index numbers;
+ * the args have been built for us by sort_maybe_numeric_index() and point
+ * to custom structs rather than to gawk nodes
+ */
+
+static int
+sort_up_index_number(const void *p1, const void *p2)
+{
+ const struct sort_num *n1, *n2;
+ int ret;
+
+ n1 = *((const struct sort_num *const *) p1);
+ n2 = *((const struct sort_num *const *) p2);
+
+ if (n1->sort_numbr < n2->sort_numbr)
+ ret = -1;
+ else
+ ret = (n1->sort_numbr > n2->sort_numbr);
+
+ /* break a tie with the index string itself */
+ if (ret == 0)
+ return sort_up_index_string((const void *) n1->sort_index,
+ (const void *) n2->sort_index);
+ return ret;
+}
+
+/* sort_down_index_number --- descending index numbers */
+
+static int
+sort_down_index_number(const void *p1, const void *p2)
+{
+ return -sort_up_index_number(p1, p2);
+}
+
+/*
+ * sort_up_value --- qsort comparison function; ascending values;
+ * standard awk comparison rules apply, so might be by number or might
+ * be by string depending upon the values involved; if a value happens
+ * to be a sub-array, it will compare greater than any scalar (last in
+ * ascending order) and equal with any other sub-array (disambiguated
+ * by index string, the same as with equal scalar values)
+ */
+
+static int
+sort_up_value(const void *p1, const void *p2)
+{
+ const NODE *idx1, *idx2;
+ NODE *val1, *val2;
+ int ret;
+
+ /* we're passed a pair of index (array subscript) nodes */
+ idx1 = *(const NODE *const *) p1;
+ idx2 = *(const NODE *const *) p2;
+ if (idx1->type != Node_ahash || idx2->type != Node_ahash)
+ cant_happen();
+
+ /* and we want to compare the element values they refer to */
+ val1 = idx1->hvalue;
+ val2 = idx2->hvalue;
+ if (val1->type == Node_var_array) {
+ if (val2->type == Node_var_array)
+ ret = 0; /* both sub-arrays, hence tie */
+ else
+ ret = 1; /* val1 (sub-array) > val2 (scalar) */
+ } else {
+ if (val2->type == Node_var_array)
+ ret = -1; /* val1 (scalar) < val2 (sub-array) */
+ else
+ ret = cmp_nodes(val1, val2); /* both scalars */
+ }
+
+ /* disambiguate ties to force a deterministic order */
+ if (ret == 0)
+ ret = sort_up_index_string(p1, p2);
+ return ret;
+}
+
+/* sort_down_value --- descending value maybe-nums */
+
+static int
+sort_down_value(const void *p1, const void *p2)
+{
+ return -sort_up_value(p1, p2);
+}
+
+/* sort_maybe_numeric_index --- special handling for sorting indices as numbers */
+
+void
+sort_maybe_numeric_index(qsort_compfunc func, NODE **list,
+ size_t num_elems, int setup)
+{
+ static const NODE empty_node;
+
+ size_t i;
+ NODE temp_node;
+ struct sort_num *fake_node;
+
+ /* if we're not sorting indices by numeric value, do nothing */
+ if (func != sort_up_index_number && func != sort_down_index_number)
+ return;
+
+ /*
+ * Insert a new struct in front of each index, so there's somewhere
+ * to store its numeric value after that's calculated once, avoiding
+ * having the qsort callback recalculate it for every comparison.
+ * Our caller will call us back to destroy this intermediate array
+ * as soon as qsort() finishes; the rest of gawk won't see it.
+ * We could use--or perhaps mis-use--gawk nodes here, but they'd tie
+ * up quite a bit more memory during the sort.
+ */
+ if (setup) {
+ temp_node = empty_node; /* init union fields to 0,NULL */
+ for (i = 0; i < num_elems; ++i) {
+ emalloc(fake_node, struct sort_num *,
+ sizeof (struct sort_num), "sort numeric index");
+ /* populate enough fields to satisfy force_number();
+ we don't need to maintain reference counts */
+ temp_node.type = Node_val;
+ temp_node.stptr = list[i]->ahname_str;
+ temp_node.stlen = list[i]->ahname_len;
+ temp_node.flags = PERM | STRING | MAYBE_NUM;
+ /* insert the extra data in front of our input node */
+ fake_node->sort_numbr = force_number(&temp_node);
+ fake_node->sort_index = list[i];
+ list[i] = (NODE *) fake_node;
+ }
+ } else { /* post sort cleanup */
+ for (i = 0; i < num_elems; ++i) {
+ /* undo the setup manipulations */
+ fake_node = (struct sort_num *) list[i];
+ list[i] = fake_node->sort_index;
+ efree((void *) fake_node);
+ }
+ }
+}
+
+/* sort_match --- compare leading part of a string against a sort option */
+
+static size_t
+sort_match(const char *str, size_t slen, const char *target)
+{
+ const char *endofword;
+
+ /*
+ * STR comes from the user and is SLEN characters long;
+ * if it has spaces then we care about the subset up until
+ * its first space rather than the full length.
+ * We check whether STR is the same as--or proper prefix
+ * of--TARGET.
+ * Accept "by-foo" or "as-foo" as a synonym match for "foo".
+ */
+ endofword = strchr(str, ' ');
+ if (endofword != NULL)
+ slen = (size_t) (endofword - str);
+ /* try for exact match */
+ if (slen > 0 && strncasecmp(str, target, slen) == 0)
+ return slen;
+ /* ingore "by-" or "as-" prefix and try again */
+ if (slen > 3
+ && (strncasecmp(str, "by-", 3) == 0
+ || strncasecmp(str, "as-", 3) == 0)
+ && strncasecmp(str + 3, target, slen - 3) == 0)
+ return slen;
+ /* no match */
+ return 0;
+}
+
+/*
+ * sort_selection --- parse user-specified sort ordering
+ * ("ascending index" or "value number" and so forth);
+ * returns a qsort comparison function
+ */
+
+static qsort_compfunc
+sort_selection(NODE *r, const char *warn_arg, int default_by_value)
+{
+ enum sort_bits {
+ unrecognized = 0,
+ Unsorted = 0x01,
+ Ascending = 0x02,
+ Descending = 0x04,
+ by_Index = 0x08,
+ by_Value = 0x10,
+ as_String = 0x20,
+ as_Number = 0x40,
+ as_Inserted = 0x80,
+ allbitsused = 0xff
+ };
+ /*
+ * The documented values are singular, but it's trivial to accept
+ * "index numbers" and "descending values" since we're using a
+ * prefix match. Latin plural of index is the only complication.
+ */
+ static const struct sort_keys {
+ const char *const keyword;
+ enum sort_bits keybit;
+ } sorts[] = {
+ { "unsorted", Unsorted }, /* not part of a three-part key */
+ { "ascending", Ascending }, /* ascending vs descending */
+ { "descending", Descending },
+ { "indexes", by_Index }, /* by_Index vs by_Number */
+ { "indices", by_Index }, /* synonym for plural match */
+ { "values", by_Value },
+ { "strings", as_String }, /* as_String vs as_Number */
+ { "numbers", as_Number },
+ { "numeric", as_Number }, /* synonym; singular only */
+ { "inserted", as_Inserted }, /* not part of a three-part key */
+ { "insertion", as_Inserted }, /* another synonym */
+ };
+
+ static short warned_unrecognized = FALSE;
+ static enum sort_bits prev_invalid = unrecognized;
+
+ char *s;
+ size_t l, matchlen;
+ unsigned i;
+ const char *errfmt;
+ enum sort_bits allparts;
+ qsort_compfunc sort_func, default_sort_func;
+
+ /* deduce our caller from the args provided */
+ if (warn_arg != NULL) /* for-in statement */
+ default_sort_func = (qsort_compfunc) 0; /* unsorted */
+ else if (default_by_value) /* asort() */
+ default_sort_func = sort_up_value;
+ else /* asorti() */
+ default_sort_func = ! IGNORECASE ? sort_up_index_string
+ : sort_up_index_ignrcase;
+
+ r = force_string(r);
+ s = r->stptr;
+ l = r->stlen;
+
+ /* treat empty sort-order string as request for default */
+ if (l == 0)
+ return default_sort_func;
+
+ /* no ordering specification yet */
+ allparts = unrecognized;
+ /*
+ * Scan through S until length L is exhausted, checking its
+ * starting word against each possible ordering choice and
+ * then skipping past that to get to the next word. If no
+ * choice matches, an error has been detected. Duplicate
+ * words are accepted; contradictory ones get noticed and
+ * rejected after the string parsing has completed.
+ */
+ while (l > 0) {
+ matchlen = 0; /* lint suppression */
+ for (i = 0; i < (sizeof sorts / sizeof *sorts); ++i) {
+ matchlen = sort_match(s, l, sorts[i].keyword);
+ if (matchlen > 0) {
+ allparts |= sorts[i].keybit;
+ break;
+ }
+ }
+ if (matchlen == 0) {
+ /* failed to match any possible component */
+ allparts = unrecognized;
+ break;
+ }
+ /* move past the part just handled */
+ s += matchlen, l -= matchlen;
+ /* (l > 0) here would accept a trailing space; (l > 1) won't */
+ if (l > 1 && *s == ' ')
+ ++s, --l;
+ } /* while (l > 0) */
+
+ /*
+ * If we have one or two parts of a three part key, fill in
+ * whichever parts are missing with default values. For key
+ * type the default is specified by our caller, for direction
+ * it is Ascending, and for comparison mode it is as_String
+ * but only when sorting by_Index. When sorting by_Value,
+ * comparison mode is out of the user's control and we don't
+ * need or want to fill in either as_String or as_Number.
+ */
+ if ((allparts & (Ascending|Descending|as_String|as_Number)) != 0
+ && (allparts & (Unsorted|by_Index|by_Value|as_Inserted)) == 0)
+ allparts |= default_by_value ? by_Value : by_Index;
+ if ((allparts & (by_Index|by_Value|as_String|as_Number)) != 0
+ && (allparts & (Unsorted|Ascending|Descending|as_Inserted)) == 0)
+ allparts |= Ascending;
+ /* by_Value is handled differently from by_Index */
+ if ((allparts & (Ascending|Descending|by_Index)) != 0
+ && (allparts & (Unsorted|by_Value|as_String|as_Number|as_Inserted)) == 0)
+ allparts |= as_String;
+
+ /*
+ * ALLPARTS is a bit mask of all the user's specified ordering
+ * choices; partial specifications have been extended into full
+ * three part key. Valid combinations yield a qsort(3) comparison
+ * routine result. Invalid ones are rejected and yield default
+ * sort, with an error message for asort()/asorti(), a warning
+ * for for-in statement if this is a different invalid combination
+ * than the most recently warned about one, or a one-time warning
+ * if this is an unrecognized ordering specification and we're
+ * running in lint checking mode.
+ */
+ sort_func = default_sort_func;
+ errfmt = NULL;
+ switch (allparts) {
+ case Unsorted:
+ sort_func = (qsort_compfunc) 0;
+ break;
+ case Ascending|by_Index|as_String:
+ sort_func = ! IGNORECASE ? sort_up_index_string
+ : sort_up_index_ignrcase;
+ break;
+ case Descending|by_Index|as_String:
+ sort_func = ! IGNORECASE ? sort_down_index_string
+ : sort_down_index_ignrcase;
+ break;
+ case Ascending|by_Index|as_Number:
+ sort_func = sort_up_index_number;
+ break;
+ case Descending|by_Index|as_Number:
+ sort_func = sort_down_index_number;
+ break;
+ case Ascending|by_Value:
+ sort_func = sort_up_value;
+ break;
+ case Descending|by_Value:
+ sort_func = sort_down_value;
+ break;
+ /* for value sorts, distinction between string and number is not
+ allowed; they're always ordered by basic awk MAYBE_NUM comparison */
+ case Ascending|by_Value|as_String:
+ case Descending|by_Value|as_String:
+ if (errfmt == NULL)
+ errfmt = _("%s: sorting by value can't be forced to use string comparison");
+ /* FALL THROUGH */
+ case Ascending|by_Value|as_Number:
+ case Descending|by_Value|as_Number:
+ if (errfmt == NULL)
+ errfmt = _("%s: sorting by value can't be forced to use numeric comparison");
+ /* FALL THROUGH */
+ /* gawk doesn't maintain the data necessary to sort by
+ order of insertion */
+ case as_Inserted:
+ if (errfmt == NULL)
+ errfmt = _("%s: sort ordering \"as-inserted\" is not implemented");
+ /* FALL THROUGH */
+ default:
+ /* sort_func still has default value */
+ if (errfmt == NULL)
+ errfmt = (allparts == unrecognized)
+ ? _("%s: sort specification is not recognized")
+ : _("%s: sort specification is invalid");
+ if (warn_arg == NULL) {
+ /* asort()/asorti() bad sort specification argument */
+ error(errfmt, default_by_value ? "asort" : "asorti");
+ } else if (allparts != unrecognized) {
+ /* invalid combination of valid ordering components */
+ if (allparts != prev_invalid)
+ warning(errfmt, warn_arg);
+ prev_invalid = allparts;
+ } else if (do_lint) {
+ /* unrecognized ordering specification string */
+ if (! warned_unrecognized)
+ lintwarn(errfmt, warn_arg);
+ warned_unrecognized = TRUE;
+ }
+ break;
+ }
+ return sort_func;
+}
+
+/*
+ * sorted_in --- fetch value of PROCINFO["sorted_in"] and use sort_selection()
+ * to parse it; controls whether and how traversal of ``for (index in array)''
+ * should be sorted; returns a qsort comparison function
+ */
+
+qsort_compfunc
+sorted_in(void) /* called from r_interpret(eval.c) */
+{
+ static NODE *sorted_str = NULL;
+ static short warned_extension = FALSE;
+
+ NODE *r;
+
+ /* if there's no PROCINFO[], there's no ["sorted_in"], so no sorting */
+ if (PROCINFO_node == NULL)
+ return (qsort_compfunc) 0;
+
+ if (sorted_str == NULL) /* do this once */
+ sorted_str = make_string("sorted_in", 9);
+
+ r = (NODE *) NULL;
+ if (in_array(PROCINFO_node, sorted_str))
+ r = *assoc_lookup(PROCINFO_node, sorted_str, TRUE);
+ /* if there's no PROCINFO["sorted_in"], there's no sorting */
+ if (r == NULL)
+ return (qsort_compfunc) 0;
+
+ /* we're going to make use of PROCINFO["sorted_in"] */
+ if (do_lint && ! warned_extension) {
+ warned_extension = TRUE;
+ lintwarn(_("sorted array traversal is a gawk extension"));
+ }
+
+ return sort_selection(r, "`PROCINFO[\"sorted_in\"]'", FALSE);
+}
+
+
/*
From bonzini@gnu.org Mon Oct 28 16:05:26 2002
Date: Mon, 28 Oct 2002 13:33:03 +0100
diff --git a/awk.h b/awk.h
index a56b2b29..6dcc0332 100644
--- a/awk.h
+++ b/awk.h
@@ -846,6 +846,9 @@ struct flagtab {
#endif
#define UNLIMITED LONG_MAX
+/* qsort comparison function */
+typedef int (*qsort_compfunc)(const void *,const void *);
+
/* -------------------------- External variables -------------------------- */
/* gawk builtin variables */
extern long NF;
@@ -1123,6 +1126,9 @@ extern NODE *assoc_dump(NODE *symbol, int indent_level);
extern NODE *do_adump(int nargs);
extern NODE *do_asort(int nargs);
extern NODE *do_asorti(int nargs);
+extern int comp_func(const void *p1, const void *p2);
+extern qsort_compfunc sorted_in(void);
+extern void sort_maybe_numeric_index(qsort_compfunc, NODE **, size_t, int);
extern unsigned long (*hash)(const char *s, size_t len, unsigned long hsize, size_t *code);
/* awkgram.c */
extern NODE *mk_symbol(NODETYPE type, NODE *value);
@@ -1205,7 +1211,6 @@ extern void PUSH_CODE(INSTRUCTION *cp);
extern INSTRUCTION *POP_CODE(void);
extern int interpret(INSTRUCTION *);
extern int cmp_nodes(NODE *, NODE *);
-extern int comp_func(const void *p1, const void *p2);
extern void set_IGNORECASE(void);
extern void set_OFS(void);
extern void set_ORS(void);
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 9c2ad6ec..2f25cb6a 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,13 @@
+Sun Mar 27 21:10:55 2011 Pat Rankin <rankin@pactechdata.com>
+
+ * gawk.texi (Builit-in Variables: PROCINFO array, Scanning All
+ Elements of an Array: `for' statement): Update the documentation
+ for PROCINFO["sorted_in"]; add "ascending index number",
+ "descending index string", "ascending value", and "descending
+ value" as supported sort orderings.
+ * gawk.1 (PROCINFO array): Update PROCINFO["sorted_in"] to
+ reflect that the value matters, and list the supported sort orders.
+
Tue Feb 15 17:11:26 2011 Pat Rankin <rankin@pactechdata.com>
* gawk.texi (Builit-in Variables: PROCINFO array, Scanning All
diff --git a/doc/gawk.1 b/doc/gawk.1
index 83a91094..ff3c0a20 100644
--- a/doc/gawk.1
+++ b/doc/gawk.1
@@ -1082,13 +1082,20 @@ system call.
\fBPROCINFO["sorted_in"]\fP
If this element exists in
.BR PROCINFO ,
-.IR "no matter what its value" ,
-then
-.I gawk
-will cause array
+then its value controls the order in which array elements
+are traversed in
.B for
-loops
-to traverse the array indices in sorted order.
+loops.
+Supported values are
+\fB"ascending index string"\fR,
+\fB"ascending index number"\fR,
+\fB"ascending value"\fR,
+\fB"descending index string"\fR,
+\fB"descending index number"\fR,
+\fB"descending value"\fR, and
+\fB"unsorted"\fR.
+The order specification words can be truncated, or omitted (provided
+that at least one is present), or given in any order.
.TP
\fBPROCINFO["version"]\fP
the version of
diff --git a/doc/gawk.info b/doc/gawk.info
index 714df826..f084fa3f 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -3633,7 +3633,7 @@ better performance when reading records. Otherwise, `gawk' has to make
several function calls, _per input character_, to find the record
terminator.
- According to POSIX, string conmparison is also affected by locales
+ According to POSIX, string comparison is also affected by locales
(similar to regular expressions). The details are presented in *note
POSIX String Comparison::.
@@ -9408,18 +9408,32 @@ with a pound sign (`#').
`PROCINFO["sorted_in"]'
If this element exists in `PROCINFO', its value controls the
- order in which array indices will be processed by `for(i in
- arr) ...' loops. A value of `"ascending index string"',
- which may be shortened to `"ascending index"' or just
- `"ascending"', will result in either case sensitive or case
- insensitive ascending order depending upon the value of
- `IGNORECASE'. A value of `"descending index string"', which
- may be shortened in a similar manner, will result in the
- opposite order. The value `"unsorted"' is also recognized,
- yielding the default result of arbitrary order. Any other
- value will be ignored, and warned about (at the time of first
- `for(in in arr) ...' execution) when lint checking is enabled.
- *Note Scanning an Array::, for more information.
+ order in which array indices will be processed by `for (index
+ in array) ...' loops. The value should contain one to three
+ words; separate pairs of words by a single space. One word
+ controls sort direction, "ascending" or "descending;" another
+ controls the sort key, "index" or "value;" and the remaining
+ one, which is only valid for sorting by index, is comparison
+ mode, "string" or "number." When two or three words are
+ present, they may be specified in any order, so `ascending
+ index string' and `string ascending index' are equivalent.
+ Also, each word may be truncated, so `asc index str' and `a i
+ s' are also equivalent. Note that a separating space is
+ required even when the words have been shortened down to one
+ letter each.
+
+ You can omit direction and/or key type and/or comparison
+ mode. Provided that at least one is present, missing parts
+ of a sort specification default to `ascending', `index', and
+ (for indices only) `string', respectively. An empty string,
+ `""', is the same as `unsorted' and will cause `for (index in
+ array) ...' to process the indices in arbitrary order.
+ Another thing to note is that the array sorting takes place
+ at the time `for (... in ...)' is about to start executing,
+ so changing the value of `PROCINFO["sorted_in"]' during loop
+ execution does not have any effect on the order in which any
+ remaining array elements get processed. *Note Scanning an
+ Array::, for more information.
`PROCINFO["strftime"]'
The default time format string for `strftime()'. Assigning a
@@ -9938,15 +9952,43 @@ produce strange results. It is best to avoid such things.
As an extension, `gawk' makes it possible for you to loop over the
elements of an array in order, based on the value of
-`PROCINFO["sorted_in"]' (*note Auto-set::). At present two sorting
-options are available: `"ascending index string"' and `"descending
-index string"'. They can be shortened by omitting `string' or `index
-string'. The value `"unsorted"' can be used as an explicit "no-op" and
-yields the same result as when `PROCINFO["sorted_in"]' has no value at
-all. If the index strings contain letters, the value of `IGNORECASE'
-affects the order of the result. This extension is disabled in POSIX
-mode, since the `PROCINFO' array is not special in that case. For
-example:
+`PROCINFO["sorted_in"]' (*note Auto-set::). Several sorting options
+are available:
+
+`"ascending index string"'
+ Order by indices compared as strings, the most basic sort.
+ (Internally, array indices are always strings, so with `a[2*5] = 1'
+ the index is actually `"10"' rather than numeric 10.)
+
+`"ascending index number"'
+ Order by indices but force them to be treated as numbers in the
+ process. Any index with non-numeric value will end up positioned
+ as if it were 0.
+
+`"ascending value"'
+ Order by element values rather than by indices. Comparisons are
+ done as numeric when both values being compared are numeric, or
+ done as strings when either or both aren't numeric. Sub-arrays,
+ if present, come out last.
+
+`"descending index string"'
+ Reverse order from the most basic sort.
+
+`"descending index number"'
+ Numeric indices ordered from high to low.
+
+`"descending value"'
+ Element values ordered from high to low. Sub-arrays, if present,
+ come out first.
+
+`"unsorted"'
+ Array elements are processed in arbitrary order, the normal `awk'
+ behavior.
+
+ Portions of the sort specification string may be truncated or
+omitted. The default is `ascending' for direction, `index' for sort
+key type, and (when sorting by index only) `string' for comparison mode.
+For example:
$ gawk 'BEGIN {
> a[4] = 4
@@ -9957,7 +9999,7 @@ example:
-| 4 4
-| 3 3
$ gawk 'BEGIN {
- > PROCINFO["sorted_in"] = "ascending index"
+ > PROCINFO["sorted_in"] = "asc index"
> a[4] = 4
> a[3] = 3
> for (i in a)
@@ -9971,6 +10013,26 @@ array has been reported to add 15% to 20% overhead to the execution
time of `awk' programs. For this reason, sorted array traversal is not
the default.
+ When sorting an array by element values, if a value happens to be a
+sub-array then it is considered to be greater than any string or
+numeric value, regardless of what the sub-array itself contains, and
+all sub-arrays are treated as being equal to each other. Their order
+relative to each other is determined by their index strings.
+
+ Sorting by array element values (for values other than sub-arrays)
+always uses basic `awk' comparison mode: if both values happen to be
+numbers then they're compared as numbers, otherwise they're compared as
+strings.
+
+ When string comparisons are made during a sort, either for element
+values where one or both aren't numbers or for element indices handled
+as strings, the value of `IGNORECASE' controls whether the comparisons
+treat corresponding upper and lower case letters as equivalent or
+distinct.
+
+ This sorting extension is disabled in POSIX mode, since the
+`PROCINFO' array is not special in that case.
+

File: gawk.info, Node: Delete, Next: Numeric Array Subscripts, Prev: Array Basics, Up: Arrays
@@ -24437,7 +24499,7 @@ Index
(line 67)
* advanced features, data files as single record: Records. (line 175)
* advanced features, fixed-width data: Constant Size. (line 9)
-* advanced features, FNR/NR variables: Auto-set. (line 215)
+* advanced features, FNR/NR variables: Auto-set. (line 229)
* advanced features, gawk: Advanced Features. (line 6)
* advanced features, gawk, network programming: TCP/IP Networking.
(line 6)
@@ -24937,7 +24999,7 @@ Index
(line 47)
* dark corner, FILENAME variable <1>: Auto-set. (line 92)
* dark corner, FILENAME variable: Getline Notes. (line 19)
-* dark corner, FNR/NR variables: Auto-set. (line 215)
+* dark corner, FNR/NR variables: Auto-set. (line 229)
* dark corner, format-control characters: Control Letters. (line 18)
* dark corner, FS as null string: Single Character Fields.
(line 20)
@@ -25136,7 +25198,7 @@ Index
* differences in awk and gawk, regular expressions: Case-sensitivity.
(line 26)
* differences in awk and gawk, RS/RT variables: Records. (line 167)
-* differences in awk and gawk, RT variable: Auto-set. (line 204)
+* differences in awk and gawk, RT variable: Auto-set. (line 218)
* differences in awk and gawk, single-character fields: Single Character Fields.
(line 6)
* differences in awk and gawk, split() function: String Functions.
@@ -25417,7 +25479,7 @@ Index
* floating-point, numbers, AWKNUM internal type: Internals. (line 19)
* FNR variable <1>: Auto-set. (line 102)
* FNR variable: Records. (line 6)
-* FNR variable, changing: Auto-set. (line 215)
+* FNR variable, changing: Auto-set. (line 229)
* for statement: For Statement. (line 6)
* for statement, in arrays: Scanning an Array. (line 20)
* force_number() internal function: Internals. (line 27)
@@ -25591,7 +25653,7 @@ Index
* gawk, regular expressions, operators: GNU Regexp Operators.
(line 6)
* gawk, regular expressions, precedence: Regexp Operators. (line 157)
-* gawk, RT variable in <1>: Auto-set. (line 204)
+* gawk, RT variable in <1>: Auto-set. (line 218)
* gawk, RT variable in <2>: Getline/Variable/File.
(line 10)
* gawk, RT variable in <3>: Multiple Line. (line 129)
@@ -26038,7 +26100,7 @@ Index
* not Boolean-logic operator: Boolean Ops. (line 6)
* NR variable <1>: Auto-set. (line 118)
* NR variable: Records. (line 6)
-* NR variable, changing: Auto-set. (line 215)
+* NR variable, changing: Auto-set. (line 229)
* null strings <1>: Basic Data Typing. (line 50)
* null strings <2>: Truth Values. (line 6)
* null strings <3>: Regexp Field Splitting.
@@ -26470,7 +26532,7 @@ Index
* right angle bracket (>), >> operator (I/O): Redirection. (line 50)
* right shift, bitwise: Bitwise Functions. (line 32)
* Ritchie, Dennis: Basic Data Typing. (line 74)
-* RLENGTH variable: Auto-set. (line 191)
+* RLENGTH variable: Auto-set. (line 205)
* RLENGTH variable, match() function and: String Functions. (line 205)
* Robbins, Arnold <1>: Future Extensions. (line 6)
* Robbins, Arnold <2>: Bugs. (line 32)
@@ -26495,9 +26557,9 @@ Index
* RS variable: Records. (line 20)
* RS variable, multiline records and: Multiple Line. (line 17)
* rshift() function (gawk): Bitwise Functions. (line 51)
-* RSTART variable: Auto-set. (line 197)
+* RSTART variable: Auto-set. (line 211)
* RSTART variable, match() function and: String Functions. (line 205)
-* RT variable <1>: Auto-set. (line 204)
+* RT variable <1>: Auto-set. (line 218)
* RT variable <2>: Getline/Variable/File.
(line 10)
* RT variable <3>: Multiple Line. (line 129)
@@ -27008,339 +27070,339 @@ Ref: Case-sensitivity-Footnote-2154459
Node: Leftmost Longest154567
Node: Computed Regexps155768
Node: Locales159194
-Node: Reading Files162902
-Node: Records164843
-Ref: Records-Footnote-1173517
-Node: Fields173554
-Ref: Fields-Footnote-1176587
-Node: Nonconstant Fields176673
-Node: Changing Fields178875
-Node: Field Separators184853
-Node: Default Field Splitting187482
-Node: Regexp Field Splitting188599
-Node: Single Character Fields191941
-Node: Command Line Field Separator193000
-Node: Field Splitting Summary196441
-Ref: Field Splitting Summary-Footnote-1199633
-Node: Constant Size199734
-Node: Splitting By Content204318
-Ref: Splitting By Content-Footnote-1208044
-Node: Multiple Line208084
-Ref: Multiple Line-Footnote-1213931
-Node: Getline214110
-Node: Plain Getline216338
-Node: Getline/Variable218427
-Node: Getline/File219568
-Node: Getline/Variable/File220890
-Ref: Getline/Variable/File-Footnote-1222489
-Node: Getline/Pipe222576
-Node: Getline/Variable/Pipe225136
-Node: Getline/Coprocess226243
-Node: Getline/Variable/Coprocess227486
-Node: Getline Notes228200
-Node: Getline Summary230142
-Ref: table-getline-variants230485
-Node: Command line directories231341
-Node: Printing231966
-Node: Print233597
-Node: Print Examples234934
-Node: Output Separators237718
-Node: OFMT239478
-Node: Printf240836
-Node: Basic Printf241742
-Node: Control Letters243281
-Node: Format Modifiers247093
-Node: Printf Examples253102
-Node: Redirection255817
-Node: Special Files262801
-Node: Special FD263334
-Ref: Special FD-Footnote-1266958
-Node: Special Network267032
-Node: Special Caveats267882
-Node: Close Files And Pipes268678
-Ref: Close Files And Pipes-Footnote-1275701
-Ref: Close Files And Pipes-Footnote-2275849
-Node: Expressions275999
-Node: Values277068
-Node: Constants277744
-Node: Scalar Constants278424
-Ref: Scalar Constants-Footnote-1279283
-Node: Nondecimal-numbers279465
-Node: Regexp Constants282524
-Node: Using Constant Regexps282999
-Node: Variables286054
-Node: Using Variables286709
-Node: Assignment Options288433
-Node: Conversion290305
-Ref: table-locale-affects295681
-Ref: Conversion-Footnote-1296305
-Node: All Operators296414
-Node: Arithmetic Ops297044
-Node: Concatenation299549
-Ref: Concatenation-Footnote-1302342
-Node: Assignment Ops302462
-Ref: table-assign-ops307450
-Node: Increment Ops308858
-Node: Truth Values and Conditions312328
-Node: Truth Values313411
-Node: Typing and Comparison314460
-Node: Variable Typing315249
-Ref: Variable Typing-Footnote-1319146
-Node: Comparison Operators319268
-Ref: table-relational-ops319678
-Node: POSIX String Comparison323227
-Ref: POSIX String Comparison-Footnote-1324183
-Node: Boolean Ops324321
-Ref: Boolean Ops-Footnote-1328399
-Node: Conditional Exp328490
-Node: Function Calls330222
-Node: Precedence333816
-Node: Patterns and Actions337469
-Node: Pattern Overview338523
-Node: Regexp Patterns340189
-Node: Expression Patterns340732
-Node: Ranges344306
-Node: BEGIN/END347272
-Node: Using BEGIN/END348034
-Ref: Using BEGIN/END-Footnote-1350765
-Node: I/O And BEGIN/END350871
-Node: BEGINFILE/ENDFILE353153
-Node: Empty355984
-Node: Using Shell Variables356300
-Node: Action Overview358585
-Node: Statements360942
-Node: If Statement362796
-Node: While Statement364295
-Node: Do Statement366339
-Node: For Statement367495
-Node: Switch Statement370647
-Node: Break Statement372744
-Node: Continue Statement374734
-Node: Next Statement376521
-Node: Nextfile Statement378911
-Node: Exit Statement381387
-Node: Built-in Variables383803
-Node: User-modified384898
-Ref: User-modified-Footnote-1392924
-Node: Auto-set392986
-Ref: Auto-set-Footnote-1402861
-Node: ARGC and ARGV403066
-Node: Arrays406917
-Node: Array Basics408488
-Node: Array Intro409199
-Node: Reference to Elements413517
-Node: Assigning Elements415787
-Node: Array Example416278
-Node: Scanning an Array418010
-Node: Delete421542
-Ref: Delete-Footnote-1423977
-Node: Numeric Array Subscripts424034
-Node: Uninitialized Subscripts426217
-Node: Multi-dimensional427845
-Node: Multi-scanning430936
-Node: Array Sorting432520
-Ref: Array Sorting-Footnote-1435614
-Node: Arrays of Arrays435808
-Node: Functions440381
-Node: Built-in441203
-Node: Calling Built-in442281
-Node: Numeric Functions444269
-Ref: Numeric Functions-Footnote-1448034
-Ref: Numeric Functions-Footnote-2448391
-Ref: Numeric Functions-Footnote-3448439
-Node: String Functions448708
-Ref: String Functions-Footnote-1471210
-Ref: String Functions-Footnote-2471339
-Ref: String Functions-Footnote-3471587
-Node: Gory Details471674
-Ref: table-sub-escapes473353
-Ref: table-posix-sub474667
-Ref: table-gensub-escapes475580
-Node: I/O Functions476751
-Ref: I/O Functions-Footnote-1483406
-Node: Time Functions483553
-Ref: Time Functions-Footnote-1494448
-Ref: Time Functions-Footnote-2494516
-Ref: Time Functions-Footnote-3494674
-Ref: Time Functions-Footnote-4494785
-Ref: Time Functions-Footnote-5494897
-Ref: Time Functions-Footnote-6495124
-Node: Bitwise Functions495390
-Ref: table-bitwise-ops495948
-Ref: Bitwise Functions-Footnote-1500108
-Node: Type Functions500292
-Node: I18N Functions500762
-Node: User-defined502389
-Node: Definition Syntax503193
-Ref: Definition Syntax-Footnote-1508103
-Node: Function Example508172
-Node: Function Caveats510766
-Node: Calling A Function511187
-Node: Variable Scope512302
-Node: Pass By Value/Reference514277
-Node: Return Statement517717
-Node: Dynamic Typing520698
-Node: Indirect Calls521433
-Node: Internationalization531118
-Node: I18N and L10N532544
-Node: Explaining gettext533230
-Ref: Explaining gettext-Footnote-1538296
-Ref: Explaining gettext-Footnote-2538480
-Node: Programmer i18n538645
-Node: Translator i18n542845
-Node: String Extraction543638
-Ref: String Extraction-Footnote-1544599
-Node: Printf Ordering544685
-Ref: Printf Ordering-Footnote-1547469
-Node: I18N Portability547533
-Ref: I18N Portability-Footnote-1549982
-Node: I18N Example550045
-Ref: I18N Example-Footnote-1552680
-Node: Gawk I18N552752
-Node: Advanced Features553369
-Node: Nondecimal Data554688
-Node: Two-way I/O556269
-Ref: Two-way I/O-Footnote-1561703
-Node: TCP/IP Networking561773
-Node: Profiling564617
-Node: Library Functions572091
-Ref: Library Functions-Footnote-1575196
-Node: Library Names575367
-Ref: Library Names-Footnote-1578838
-Ref: Library Names-Footnote-2579058
-Node: General Functions579144
-Node: Nextfile Function580207
-Node: Strtonum Function584588
-Node: Assert Function587544
-Node: Round Function590870
-Node: Cliff Random Function592413
-Node: Ordinal Functions593429
-Ref: Ordinal Functions-Footnote-1596499
-Ref: Ordinal Functions-Footnote-2596751
-Node: Join Function596960
-Ref: Join Function-Footnote-1598731
-Node: Gettimeofday Function598931
-Node: Data File Management602646
-Node: Filetrans Function603278
-Node: Rewind Function607514
-Node: File Checking608967
-Node: Empty Files610061
-Node: Ignoring Assigns612291
-Node: Getopt Function613844
-Ref: Getopt Function-Footnote-1625148
-Node: Passwd Functions625351
-Ref: Passwd Functions-Footnote-1634326
-Node: Group Functions634414
-Node: Walking Arrays642498
-Node: Sample Programs644067
-Node: Running Examples644732
-Node: Clones645460
-Node: Cut Program646684
-Node: Egrep Program656533
-Ref: Egrep Program-Footnote-1664304
-Node: Id Program664414
-Node: Split Program668030
-Ref: Split Program-Footnote-1671549
-Node: Tee Program671677
-Node: Uniq Program674480
-Node: Wc Program681903
-Ref: Wc Program-Footnote-1686167
-Node: Miscellaneous Programs686367
-Node: Dupword Program687555
-Node: Alarm Program689586
-Node: Translate Program694343
-Ref: Translate Program-Footnote-1698722
-Ref: Translate Program-Footnote-2698950
-Node: Labels Program699084
-Ref: Labels Program-Footnote-1702455
-Node: Word Sorting702539
-Node: History Sorting706422
-Node: Extract Program708260
-Ref: Extract Program-Footnote-1715741
-Node: Simple Sed715869
-Node: Igawk Program718931
-Ref: Igawk Program-Footnote-1733963
-Ref: Igawk Program-Footnote-2734164
-Node: Anagram Program734302
-Node: Signature Program737400
-Node: Debugger738503
-Node: Debugging739414
-Node: Debugging Concepts739728
-Node: Debugging Terms741584
-Node: Awk Debugging744129
-Node: Sample dgawk session745021
-Node: dgawk invocation745513
-Node: Finding The Bug746695
-Node: List of Debugger Commands753180
-Node: Breakpoint Control754491
-Node: Dgawk Execution Control757967
-Node: Viewing And Changing Data761318
-Node: Dgawk Stack764627
-Node: Dgawk Info766087
-Node: Miscellaneous Dgawk Commands770035
-Node: Readline Support775466
-Node: Dgawk Limitations776293
-Node: Language History778432
-Node: V7/SVR3.1779864
-Node: SVR4782159
-Node: POSIX783601
-Node: BTL784599
-Node: POSIX/GNU785333
-Node: Common Extensions790519
-Node: Contributors791620
-Node: Installation795655
-Node: Gawk Distribution796549
-Node: Getting797033
-Node: Extracting797859
-Node: Distribution contents799550
-Node: Unix Installation804568
-Node: Quick Installation805185
-Node: Additional Configuration Options807147
-Node: Configuration Philosophy808624
-Node: Non-Unix Installation810966
-Node: PC Installation811424
-Node: PC Binary Installation812723
-Node: PC Compiling814571
-Node: PC Testing817515
-Node: PC Using818691
-Node: Cygwin822876
-Node: MSYS823873
-Node: VMS Installation824387
-Node: VMS Compilation824993
-Ref: VMS Compilation-Footnote-1826000
-Node: VMS Installation Details826058
-Node: VMS Running827693
-Node: VMS Old Gawk829300
-Node: Bugs829774
-Node: Other Versions833639
-Node: Notes838918
-Node: Compatibility Mode839610
-Node: Additions840393
-Node: Accessing The Source841205
-Node: Adding Code842628
-Node: New Ports848176
-Node: Dynamic Extensions852289
-Node: Internals853665
-Node: Plugin License862781
-Node: Sample Library863415
-Node: Internal File Description864101
-Node: Internal File Ops867808
-Ref: Internal File Ops-Footnote-1872576
-Node: Using Internal File Ops872724
-Node: Future Extensions875101
-Node: Basic Concepts877605
-Node: Basic High Level878362
-Ref: Basic High Level-Footnote-1882397
-Node: Basic Data Typing882582
-Node: Floating Point Issues887107
-Node: String Conversion Precision888190
-Ref: String Conversion Precision-Footnote-1889884
-Node: Unexpected Results889993
-Node: POSIX Floating Point Problems891819
-Ref: POSIX Floating Point Problems-Footnote-1895521
-Node: Glossary895559
-Node: Copying919702
-Node: GNU Free Documentation License957259
-Node: Index982396
+Node: Reading Files162901
+Node: Records164842
+Ref: Records-Footnote-1173516
+Node: Fields173553
+Ref: Fields-Footnote-1176586
+Node: Nonconstant Fields176672
+Node: Changing Fields178874
+Node: Field Separators184852
+Node: Default Field Splitting187481
+Node: Regexp Field Splitting188598
+Node: Single Character Fields191940
+Node: Command Line Field Separator192999
+Node: Field Splitting Summary196440
+Ref: Field Splitting Summary-Footnote-1199632
+Node: Constant Size199733
+Node: Splitting By Content204317
+Ref: Splitting By Content-Footnote-1208043
+Node: Multiple Line208083
+Ref: Multiple Line-Footnote-1213930
+Node: Getline214109
+Node: Plain Getline216337
+Node: Getline/Variable218426
+Node: Getline/File219567
+Node: Getline/Variable/File220889
+Ref: Getline/Variable/File-Footnote-1222488
+Node: Getline/Pipe222575
+Node: Getline/Variable/Pipe225135
+Node: Getline/Coprocess226242
+Node: Getline/Variable/Coprocess227485
+Node: Getline Notes228199
+Node: Getline Summary230141
+Ref: table-getline-variants230484
+Node: Command line directories231340
+Node: Printing231965
+Node: Print233596
+Node: Print Examples234933
+Node: Output Separators237717
+Node: OFMT239477
+Node: Printf240835
+Node: Basic Printf241741
+Node: Control Letters243280
+Node: Format Modifiers247092
+Node: Printf Examples253101
+Node: Redirection255816
+Node: Special Files262800
+Node: Special FD263333
+Ref: Special FD-Footnote-1266957
+Node: Special Network267031
+Node: Special Caveats267881
+Node: Close Files And Pipes268677
+Ref: Close Files And Pipes-Footnote-1275700
+Ref: Close Files And Pipes-Footnote-2275848
+Node: Expressions275998
+Node: Values277067
+Node: Constants277743
+Node: Scalar Constants278423
+Ref: Scalar Constants-Footnote-1279282
+Node: Nondecimal-numbers279464
+Node: Regexp Constants282523
+Node: Using Constant Regexps282998
+Node: Variables286053
+Node: Using Variables286708
+Node: Assignment Options288432
+Node: Conversion290304
+Ref: table-locale-affects295680
+Ref: Conversion-Footnote-1296304
+Node: All Operators296413
+Node: Arithmetic Ops297043
+Node: Concatenation299548
+Ref: Concatenation-Footnote-1302341
+Node: Assignment Ops302461
+Ref: table-assign-ops307449
+Node: Increment Ops308857
+Node: Truth Values and Conditions312327
+Node: Truth Values313410
+Node: Typing and Comparison314459
+Node: Variable Typing315248
+Ref: Variable Typing-Footnote-1319145
+Node: Comparison Operators319267
+Ref: table-relational-ops319677
+Node: POSIX String Comparison323226
+Ref: POSIX String Comparison-Footnote-1324182
+Node: Boolean Ops324320
+Ref: Boolean Ops-Footnote-1328398
+Node: Conditional Exp328489
+Node: Function Calls330221
+Node: Precedence333815
+Node: Patterns and Actions337468
+Node: Pattern Overview338522
+Node: Regexp Patterns340188
+Node: Expression Patterns340731
+Node: Ranges344305
+Node: BEGIN/END347271
+Node: Using BEGIN/END348033
+Ref: Using BEGIN/END-Footnote-1350764
+Node: I/O And BEGIN/END350870
+Node: BEGINFILE/ENDFILE353152
+Node: Empty355983
+Node: Using Shell Variables356299
+Node: Action Overview358584
+Node: Statements360941
+Node: If Statement362795
+Node: While Statement364294
+Node: Do Statement366338
+Node: For Statement367494
+Node: Switch Statement370646
+Node: Break Statement372743
+Node: Continue Statement374733
+Node: Next Statement376520
+Node: Nextfile Statement378910
+Node: Exit Statement381386
+Node: Built-in Variables383802
+Node: User-modified384897
+Ref: User-modified-Footnote-1392923
+Node: Auto-set392985
+Ref: Auto-set-Footnote-1403716
+Node: ARGC and ARGV403921
+Node: Arrays407772
+Node: Array Basics409343
+Node: Array Intro410054
+Node: Reference to Elements414372
+Node: Assigning Elements416642
+Node: Array Example417133
+Node: Scanning an Array418865
+Node: Delete424132
+Ref: Delete-Footnote-1426567
+Node: Numeric Array Subscripts426624
+Node: Uninitialized Subscripts428807
+Node: Multi-dimensional430435
+Node: Multi-scanning433526
+Node: Array Sorting435110
+Ref: Array Sorting-Footnote-1438204
+Node: Arrays of Arrays438398
+Node: Functions442971
+Node: Built-in443793
+Node: Calling Built-in444871
+Node: Numeric Functions446859
+Ref: Numeric Functions-Footnote-1450624
+Ref: Numeric Functions-Footnote-2450981
+Ref: Numeric Functions-Footnote-3451029
+Node: String Functions451298
+Ref: String Functions-Footnote-1473800
+Ref: String Functions-Footnote-2473929
+Ref: String Functions-Footnote-3474177
+Node: Gory Details474264
+Ref: table-sub-escapes475943
+Ref: table-posix-sub477257
+Ref: table-gensub-escapes478170
+Node: I/O Functions479341
+Ref: I/O Functions-Footnote-1485996
+Node: Time Functions486143
+Ref: Time Functions-Footnote-1497038
+Ref: Time Functions-Footnote-2497106
+Ref: Time Functions-Footnote-3497264
+Ref: Time Functions-Footnote-4497375
+Ref: Time Functions-Footnote-5497487
+Ref: Time Functions-Footnote-6497714
+Node: Bitwise Functions497980
+Ref: table-bitwise-ops498538
+Ref: Bitwise Functions-Footnote-1502698
+Node: Type Functions502882
+Node: I18N Functions503352
+Node: User-defined504979
+Node: Definition Syntax505783
+Ref: Definition Syntax-Footnote-1510693
+Node: Function Example510762
+Node: Function Caveats513356
+Node: Calling A Function513777
+Node: Variable Scope514892
+Node: Pass By Value/Reference516867
+Node: Return Statement520307
+Node: Dynamic Typing523288
+Node: Indirect Calls524023
+Node: Internationalization533708
+Node: I18N and L10N535134
+Node: Explaining gettext535820
+Ref: Explaining gettext-Footnote-1540886
+Ref: Explaining gettext-Footnote-2541070
+Node: Programmer i18n541235
+Node: Translator i18n545435
+Node: String Extraction546228
+Ref: String Extraction-Footnote-1547189
+Node: Printf Ordering547275
+Ref: Printf Ordering-Footnote-1550059
+Node: I18N Portability550123
+Ref: I18N Portability-Footnote-1552572
+Node: I18N Example552635
+Ref: I18N Example-Footnote-1555270
+Node: Gawk I18N555342
+Node: Advanced Features555959
+Node: Nondecimal Data557278
+Node: Two-way I/O558859
+Ref: Two-way I/O-Footnote-1564293
+Node: TCP/IP Networking564363
+Node: Profiling567207
+Node: Library Functions574681
+Ref: Library Functions-Footnote-1577786
+Node: Library Names577957
+Ref: Library Names-Footnote-1581428
+Ref: Library Names-Footnote-2581648
+Node: General Functions581734
+Node: Nextfile Function582797
+Node: Strtonum Function587178
+Node: Assert Function590134
+Node: Round Function593460
+Node: Cliff Random Function595003
+Node: Ordinal Functions596019
+Ref: Ordinal Functions-Footnote-1599089
+Ref: Ordinal Functions-Footnote-2599341
+Node: Join Function599550
+Ref: Join Function-Footnote-1601321
+Node: Gettimeofday Function601521
+Node: Data File Management605236
+Node: Filetrans Function605868
+Node: Rewind Function610104
+Node: File Checking611557
+Node: Empty Files612651
+Node: Ignoring Assigns614881
+Node: Getopt Function616434
+Ref: Getopt Function-Footnote-1627738
+Node: Passwd Functions627941
+Ref: Passwd Functions-Footnote-1636916
+Node: Group Functions637004
+Node: Walking Arrays645088
+Node: Sample Programs646657
+Node: Running Examples647322
+Node: Clones648050
+Node: Cut Program649274
+Node: Egrep Program659123
+Ref: Egrep Program-Footnote-1666894
+Node: Id Program667004
+Node: Split Program670620
+Ref: Split Program-Footnote-1674139
+Node: Tee Program674267
+Node: Uniq Program677070
+Node: Wc Program684493
+Ref: Wc Program-Footnote-1688757
+Node: Miscellaneous Programs688957
+Node: Dupword Program690145
+Node: Alarm Program692176
+Node: Translate Program696933
+Ref: Translate Program-Footnote-1701312
+Ref: Translate Program-Footnote-2701540
+Node: Labels Program701674
+Ref: Labels Program-Footnote-1705045
+Node: Word Sorting705129
+Node: History Sorting709012
+Node: Extract Program710850
+Ref: Extract Program-Footnote-1718331
+Node: Simple Sed718459
+Node: Igawk Program721521
+Ref: Igawk Program-Footnote-1736553
+Ref: Igawk Program-Footnote-2736754
+Node: Anagram Program736892
+Node: Signature Program739990
+Node: Debugger741093
+Node: Debugging742004
+Node: Debugging Concepts742318
+Node: Debugging Terms744174
+Node: Awk Debugging746719
+Node: Sample dgawk session747611
+Node: dgawk invocation748103
+Node: Finding The Bug749285
+Node: List of Debugger Commands755770
+Node: Breakpoint Control757081
+Node: Dgawk Execution Control760557
+Node: Viewing And Changing Data763908
+Node: Dgawk Stack767217
+Node: Dgawk Info768677
+Node: Miscellaneous Dgawk Commands772625
+Node: Readline Support778056
+Node: Dgawk Limitations778883
+Node: Language History781022
+Node: V7/SVR3.1782454
+Node: SVR4784749
+Node: POSIX786191
+Node: BTL787189
+Node: POSIX/GNU787923
+Node: Common Extensions793109
+Node: Contributors794210
+Node: Installation798245
+Node: Gawk Distribution799139
+Node: Getting799623
+Node: Extracting800449
+Node: Distribution contents802140
+Node: Unix Installation807158
+Node: Quick Installation807775
+Node: Additional Configuration Options809737
+Node: Configuration Philosophy811214
+Node: Non-Unix Installation813556
+Node: PC Installation814014
+Node: PC Binary Installation815313
+Node: PC Compiling817161
+Node: PC Testing820105
+Node: PC Using821281
+Node: Cygwin825466
+Node: MSYS826463
+Node: VMS Installation826977
+Node: VMS Compilation827583
+Ref: VMS Compilation-Footnote-1828590
+Node: VMS Installation Details828648
+Node: VMS Running830283
+Node: VMS Old Gawk831890
+Node: Bugs832364
+Node: Other Versions836229
+Node: Notes841508
+Node: Compatibility Mode842200
+Node: Additions842983
+Node: Accessing The Source843795
+Node: Adding Code845218
+Node: New Ports850766
+Node: Dynamic Extensions854879
+Node: Internals856255
+Node: Plugin License865371
+Node: Sample Library866005
+Node: Internal File Description866691
+Node: Internal File Ops870398
+Ref: Internal File Ops-Footnote-1875166
+Node: Using Internal File Ops875314
+Node: Future Extensions877691
+Node: Basic Concepts880195
+Node: Basic High Level880952
+Ref: Basic High Level-Footnote-1884987
+Node: Basic Data Typing885172
+Node: Floating Point Issues889697
+Node: String Conversion Precision890780
+Ref: String Conversion Precision-Footnote-1892474
+Node: Unexpected Results892583
+Node: POSIX Floating Point Problems894409
+Ref: POSIX Floating Point Problems-Footnote-1898111
+Node: Glossary898149
+Node: Copying922292
+Node: GNU Free Documentation License959849
+Node: Index984986

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 7c63476f..0b410fc1 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -5138,7 +5138,7 @@ will give you much better performance when reading records. Otherwise,
@command{gawk} has to make several function calls, @emph{per input
character}, to find the record terminator.
-According to POSIX, string conmparison is also affected by locales
+According to POSIX, string comparison is also affected by locales
(similar to regular expressions). The details are presented in
@ref{POSIX String Comparison}.
@@ -12756,17 +12756,30 @@ The parent process ID of the current process.
@item PROCINFO["sorted_in"]
If this element exists in @code{PROCINFO}, its value controls the
order in which array indices will be processed by
-@samp{for(i in arr) @dots{}} loops.
-A value of @code{"ascending index string"}, which may be shortened to
-@code{"ascending index"} or just @code{"ascending"}, will result in either
-case sensitive or case insensitive ascending order depending upon
-the value of @code{IGNORECASE}.
-A value of @code{"descending index string"}, which may be shortened in
-a similar manner, will result in the opposite order.
-The value @code{"unsorted"} is also recognized, yielding the default
-result of arbitrary order. Any other value will be ignored, and
-warned about (at the time of first @samp{for(in in arr) @dots{}}
-execution) when lint checking is enabled.
+@samp{for (index in array) @dots{}} loops.
+The value should contain one to three words; separate pairs of words
+by a single space.
+One word controls sort direction, ``ascending'' or ``descending;''
+another controls the sort key, ``index'' or ``value;'' and the remaining
+one, which is only valid for sorting by index, is comparison mode,
+``string'' or ``number.'' When two or three words are present, they may
+be specified in any order, so @samp{ascending index string} and
+@samp{string ascending index} are equivalent. Also, each word may
+be truncated, so @samp{asc index str} and @samp{a i s} are also
+equivalent. Note that a separating space is required even when the
+words have been shortened down to one letter each.
+
+You can omit direction and/or key type and/or comparison mode. Provided
+that at least one is present, missing parts of a sort specification
+default to @samp{ascending}, @samp{index}, and (for indices only) @samp{string},
+respectively.
+An empty string, @code{""}, is the same as @samp{unsorted} and will cause
+@samp{for (index in array) @dots{}} to process the indices in
+arbitrary order. Another thing to note is that the array sorting
+takes place at the time @samp{for (@dots{} in @dots{})} is about to
+start executing, so changing the value of @code{PROCINFO["sorted_in"]}
+during loop execution does not have any effect on the order in which any
+remaining array elements get processed.
@xref{Scanning an Array}, for more information.
@item PROCINFO["strftime"]
@@ -13439,14 +13452,43 @@ strange results. It is best to avoid such things.
As an extension, @command{gawk} makes it possible for you to
loop over the elements of an array in order, based on the value of
@code{PROCINFO["sorted_in"]} (@pxref{Auto-set}).
-At present two sorting options are available: @code{"ascending
-index string"} and @code{"descending index string"}. They can be
-shortened by omitting @samp{string} or @samp{index string}. The value
-@code{"unsorted"} can be used as an explicit ``no-op'' and yields the same
-result as when @code{PROCINFO["sorted_in"]} has no value at all. If the
-index strings contain letters, the value of @code{IGNORECASE} affects
-the order of the result. This extension is disabled in POSIX mode,
-since the @code{PROCINFO} array is not special in that case. For example:
+Several sorting options are available:
+
+@table @code
+@item "ascending index string"
+Order by indices compared as strings, the most basic sort.
+(Internally, array indices are always strings, so with @code{a[2*5] = 1}
+the index is actually @code{"10"} rather than numeric 10.)
+
+@item "ascending index number"
+Order by indices but force them to be treated as numbers in the process.
+Any index with non-numeric value will end up positioned as if it were 0.
+
+@item "ascending value"
+Order by element values rather than by indices. Comparisons are done
+as numeric when both values being compared are numeric, or done as
+strings when either or both aren't numeric. Sub-arrays, if present,
+come out last.
+
+@item "descending index string"
+Reverse order from the most basic sort.
+
+@item "descending index number"
+Numeric indices ordered from high to low.
+
+@item "descending value"
+Element values ordered from high to low. Sub-arrays, if present,
+come out first.
+
+@item "unsorted"
+Array elements are processed in arbitrary order, the normal @command{awk}
+behavior.
+@end table
+
+Portions of the sort specification string may be truncated or omitted.
+The default is @samp{ascending} for direction, @samp{index} for sort key type,
+and (when sorting by index only) @samp{string} for comparison mode.
+For example:
@example
$ @kbd{gawk 'BEGIN @{}
@@ -13458,7 +13500,7 @@ $ @kbd{gawk 'BEGIN @{}
@print{} 4 4
@print{} 3 3
$ @kbd{gawk 'BEGIN @{}
-> @kbd{ PROCINFO["sorted_in"] = "ascending index"}
+> @kbd{ PROCINFO["sorted_in"] = "asc index"}
> @kbd{ a[4] = 4}
> @kbd{ a[3] = 3}
> @kbd{ for (i in a)}
@@ -13476,6 +13518,26 @@ sorted array traversal is not the default.
@c maintainers believe that only the people who wish to use a
@c feature should have to pay for it.
+When sorting an array by element values, if a value happens to be
+a sub-array then it is considered to be greater than any string or
+numeric value, regardless of what the sub-array itself contains,
+and all sub-arrays are treated as being equal to each other. Their
+order relative to each other is determined by their index strings.
+
+Sorting by array element values (for values other than sub-arrays)
+always uses basic @command{awk} comparison mode: if both values
+happen to be numbers then they're compared as numbers, otherwise
+they're compared as strings.
+
+When string comparisons are made during a sort, either for element
+values where one or both aren't numbers or for element indices
+handled as strings, the value of @code{IGNORECASE} controls whether
+the comparisons treat corresponding upper and lower case letters as
+equivalent or distinct.
+
+This sorting extension is disabled in POSIX mode,
+since the @code{PROCINFO} array is not special in that case.
+
@node Delete
@section The @code{delete} Statement
@cindex @code{delete} statement
diff --git a/eval.c b/eval.c
index 5b3b2f45..a9928735 100644
--- a/eval.c
+++ b/eval.c
@@ -1055,254 +1055,6 @@ update_FNR()
}
-typedef int (*qsort_compfunc)(const void *,const void *);
-
-static qsort_compfunc sorted_in(void);
-static int sort_ignrcase(const char *, size_t,const char *,size_t);
-static int sort_up_index_str(const void *, const void *);
-static int sort_down_index_str(const void *, const void *);
-static int sort_up_index_ignrcase(const void *, const void *);
-static int sort_down_index_ignrcase(const void *, const void *);
-
-/* comp_func --- array index comparison function for qsort, used in debug.c */
-
-int
-comp_func(const void *p1, const void *p2)
-{
- return sort_up_index_str(p1, p2);
-}
-
-/* sort_ignorecase --- case insensitive string comparison from cmp_nodes() */
-
-static int
-sort_ignorecase(const char *s1, size_t l1, const char *s2, size_t l2)
-{
- size_t l;
- int ret;
-
- l = (l1 < l2) ? l1 : l2;
-#ifdef MBS_SUPPORT
- if (gawk_mb_cur_max > 1) {
- ret = strncasecmpmbs((const unsigned char *) s1,
- (const unsigned char *) s2, l);
- } else
-#endif
- for (ret = 0; l-- > 0 && ret == 0; s1++, s2++)
- ret = casetable[*(unsigned char *) s1]
- - casetable[*(unsigned char *) s2];
- if (ret == 0 && l1 != l2)
- ret = (l1 < l2) ? -1 : 1;
- return ret;
-}
-
-/*
- * sort_up_index_str --- qsort comparison function; ascending index strings;
- * index strings are distinct within an array, so no comparisons will yield
- * equal and warrant disambiguation
- */
-
-static int
-sort_up_index_str(const void *p1, const void *p2)
-{
- const NODE *t1, *t2;
- const char *str1, *str2;
- size_t len1, len2;
- int ret;
-
- /* Array indexes are strings and distinct, never equal */
- t1 = *((const NODE *const *) p1);
- t2 = *((const NODE *const *) p2);
-
- str1 = t1->ahname_str;
- len1 = t1->ahname_len;
- str2 = t2->ahname_str;
- len2 = t2->ahname_len;
-
- ret = memcmp(str1, str2, (len1 < len2) ? len1 : len2);
- /*
- * if they compared equal but their lengths differ, the
- * shorter one is a prefix of the longer and compares as less
- */
- if (ret == 0 && len1 != len2)
- ret = (len1 < len2) ? -1 : 1;
-
- /* indices are unique within an array, so result should never be 0 */
- assert(ret != 0);
- return ret;
-}
-
-/* sort_down_index_str --- descending index strings */
-
-static int
-sort_down_index_str(const void *p1, const void *p2)
-{
- /*
- * Negation versus transposed arguments: when all keys are
- * distinct, as with array indices here, either method will
- * transform an ascending sort into a descending one. But if
- * there are equal keys--such as when IGNORECASE is honored--
- * that get disambiguated into a determisitc order, negation
- * will reverse those but transposed arguments would retain
- * their relative order within the rest of the reversed sort.
- */
- return -sort_up_index_str(p1, p2);
-}
-
-/*
- * sort_up_index_ignrcase --- ascending index string, case insensitive;
- * case insensitive comparison can cause distinct index strings to compare
- * equal, so disambiguation in necessary
- */
-
-static int
-sort_up_index_ignrcase(const void *p1, const void *p2)
-{
- const NODE *t1, *t2;
- int ret;
-
- t1 = *((const NODE *const *) p1);
- t2 = *((const NODE *const *) p2);
-
- ret = sort_ignorecase(t1->ahname_str, t1->ahname_len,
- t2->ahname_str, t2->ahname_len);
-
- /*
- * if case insensitive result is "they're the same",
- * use case sensitive comparison to force distinct order
- */
- if (ret == 0)
- ret = sort_up_index_str(p1, p2);
- return ret;
-}
-
-/* sort_down_index_ignrcase --- descending index strings, case insensitive */
-
-static int
-sort_down_index_ignrcase(const void *p1, const void *p2)
-{
- return -sort_up_index_ignrcase(p1, p2);
-}
-
-struct sort_option {
- qsort_compfunc func;
- const char *keydescr;
-};
-
-/*
- * sorted_in --- fetch and parse value of PROCINFO["sorted_in"] to decide
- * whether to sort the traversal of ``for (index in array) {}'', and
- * if so, what ordering to generate; returns a qsort comparison function
- */
-
-static qsort_compfunc
-sorted_in(void)
-{
- static struct sort_option sorts[] = {
- /* explicit no-op */
- { (qsort_compfunc) NULL, "unsorted" },
- /* also matches "ascending index" and "ascending" */
- { sort_up_index_str, "ascending index string" },
- /* also matches "descending index" and "descending" */
- { sort_down_index_str, "descending index string" },
- /*
- * to come
- */
- /* also matches "ascending value"; "ascending" won't get here */
- /* { sort_up_value_str, "ascending value string" },
- * { sort_down_value_str, "descending value string" },
- * { sort_up_index_num, "ascending index number" },
- * { sort_down_index_num, "descending index number" },
- * { sort_up_value_num, "ascending value number" },
- * { sort_down_value_num, "descending value number" },
- * and possibly
- * { sort_as_inserted, "insertion-order" },
- */
- };
- static NODE *sorted_str = NULL;
- static short warned_extension = FALSE, warned_unrecognized = FALSE;
-
- NODE *r;
- const char *s, *descr;
- size_t i, k, l;
- short is_number;
- qsort_compfunc sort_func;
-
- /* if there's no PROCINFO[], there can be no ["sorted_in"], so no sorting */
- if (PROCINFO_node == NULL)
- return (qsort_compfunc) NULL;
-
- if (sorted_str == NULL) /* do this once */
- sorted_str = make_string("sorted_in", 9);
-
- r = (NODE *) NULL;
- if (in_array(PROCINFO_node, sorted_str))
- r = *assoc_lookup(PROCINFO_node, sorted_str, TRUE);
- /* if there's no PROCINFO["sorted_in"], there's no sorting */
- if (!r)
- return (qsort_compfunc) 0;
-
- /* we're going to make use of PROCINFO["sorted_in"] */
- if (do_lint && ! warned_extension) {
- warned_extension = TRUE;
- lintwarn(_("sorted array traversal is a gawk extension"));
- }
-
- /* default result is no sorting */
- sort_func = (qsort_compfunc) NULL;
-
- /* undocumented synonyms: 0 is "unsorted", 1 is "ascending index" */
- if (r->flags & MAYBE_NUM)
- (void) force_number(r);
- is_number = ((r->flags & NUMBER) != 0);
- if (is_number) {
- if (r->numbr == 1)
- sort_func = sort_up_index_str;
- if (r->numbr == 0 || r->numbr == 1)
- goto got_func;
- /*
- * using PROCINFO["sorted_in"] as a number is not a general
- * index into sorts[]; entries beyond [1] may get reordered
- */
- }
-
- r = force_string(r);
- s = r->stptr;
- l = r->stlen;
- while (l > 0 && *s == ' ')
- ++s, --l;
-
- /* treat empty string the same as 0, a synonym for no sorting */
- if (l == 0)
- goto got_func; /* sort_func is still 0 */
-
- /* scan the list of available sorting orders */
- for (i = 0; i < (sizeof sorts / sizeof *sorts); ++i) {
- descr = sorts[i].keydescr;
- if (((k = strlen(descr)) == l || (k > l && descr[l] == ' '))
- && !strncasecmp(s, descr, l)) {
- sort_func = sorts[i].func;
- goto got_func;
- }
- }
-
- /* we didn't match any key description; sort_func is still 0 */
- if ((do_lint || is_number) && ! warned_unrecognized) {
- warned_unrecognized = TRUE;
- lintwarn(_("`PROCINFO[\"sorted_in\"]' value is not recognized"));
- }
-
-got_func:
- if (IGNORECASE) {
- if (sort_func == sort_up_index_str)
- sort_func = sort_up_index_ignrcase;
- if (sort_func == sort_down_index_str)
- sort_func = sort_down_index_ignrcase;
- }
-
- return sort_func;
-}
-
-
NODE *frame_ptr; /* current frame */
STACK_ITEM *stack_ptr = NULL;
@@ -2444,9 +2196,19 @@ post:
}
sort_compare = sorted_in();
- if (sort_compare)
+ if (sort_compare) {
+ /* ordering might be "* index number"
+ in which case we want more than just
+ a simple array of element nodes */
+ sort_maybe_numeric_index(sort_compare, list,
+ num_elems, TRUE);
+ /* sort the array of element pointers */
qsort(list, num_elems, sizeof(NODE *),
sort_compare); /* shazzam! */
+ /* clean up by-index as-number */
+ sort_maybe_numeric_index(sort_compare, list,
+ num_elems, FALSE);
+ }
list[num_elems] = array; /* actual array for use in
* lint warning in Op_arrayfor_incr
*/