diff options
-rw-r--r-- | ChangeLog | 11 | ||||
-rw-r--r-- | array.c | 83 | ||||
-rw-r--r-- | test/ChangeLog | 6 | ||||
-rw-r--r-- | test/arraysort.awk | 84 | ||||
-rw-r--r-- | test/arraysort.ok | 53 |
5 files changed, 204 insertions, 33 deletions
@@ -1,3 +1,14 @@ +Thu Apr 7 21:38:08 2011 Arnold D. Robbins <arnold@skeeve.com> + + * array.c (merge): Use sort_cmp_nodes for asort/asorti. + See test/arraysort.awk test 1. + +Thu Apr 7 10:48:21 2011 Pat Rankin <rankin@patechdata.com> + + * array.c (sort_cmp_nodes): New routine. Unlike cmp_nodes, numbers + are less than strings instead of being formatted and then compared. + (sort_up_value): Use it. + Sun Apr 3 22:18:26 2011 Arnold D. Robbins <arnold@skeeve.com> * README, FUTURE: Minor edits. @@ -61,6 +61,7 @@ struct sort_num { AWKNUM sort_numbr; NODE *sort_index; }; static qsort_compfunc sort_selection(NODE *, const char *, int); static size_t sort_match(const char *, size_t, const char *); static int sort_ignorecase(const char *, size_t, const char *, size_t); +static int sort_cmp_nodes(NODE *, NODE *); /* qsort callback routines */ static int sort_up_index_string(const void *, const void *); @@ -1035,11 +1036,13 @@ merge(NODE *left, NODE *right) NODE *ans, *cur; /* - * The use of cmp_nodes() here means that IGNORECASE influences the - * comparison. This is OK, but it may be surprising. This comment - * serves to remind us that we know about this and that it's OK. + * Use sort_cmp_nodes(): see the comment there as to why. That + * function will call cmp_nodes() on strings, which means that + * IGNORECASE influences the comparison. This is OK, but it may + * be surprising. This comment serves to remind us that we + * know about this and that it's OK. */ - if (cmp_nodes(left->ahvalue, right->ahvalue) <= 0) { + if (sort_cmp_nodes(left->ahvalue, right->ahvalue) <= 0) { ans = cur = left; left = left->ahnext; } else { @@ -1300,6 +1303,56 @@ comp_func(const void *p1, const void *p2) return sort_up_index_string(p1, p2); } +/* + * sort_cmp_nodes --- compare two nodes. Unlike cmp_nodes(), we don't + * mix numeric and string comparisons. Numbers compare as less than + * strings, which in turn compare as less than sub-arrays. All + * sub-arrays compare equal to each other, regardless of their contents. + */ + +static int +sort_cmp_nodes(NODE *n1, NODE *n2) +{ + int ret; + + if (n1->type == Node_var_array) { + /* return 0 if n2 is a sub-array too, else return 1 */ + ret = (n2->type != Node_var_array); + } else if (n2->type == Node_var_array) { + ret = -1; /* n1 (scalar) < n2 (sub-array) */ + } else { + /* + * Both scalars; we can't settle for a regular + * MAYBE_NUM comparison because that can cause + * problems when strings fall between numbers + * whose value makes them be ordered differently + * when handled as strings than as numbers. + * For example, { 10, 5, "3D" } yields a cycle: + * 5 < 10, "10" < "3D", "3D" < "5", so would + * produce different sort results depending upon + * the order in which comparisons between pairs + * of values happen to be performed. We force + * numbers to be less than strings in order to + * avoid that: 5 < 10, 10 < "3D", 5 < "3D". + */ + /* first resolve any undecided types */ + if (n1->flags & MAYBE_NUM) + (void) force_number(n1); + if (n2->flags & MAYBE_NUM) + (void) force_number(n2); + /* + * If both types are the same, use cmp_nodes(); + * otherwise, force the numeric value to be less + * than the string one. + */ + if ((n1->flags & NUMBER) == (n2->flags & NUMBER)) + ret = cmp_nodes(n1, n2); + else + ret = (n1->flags & NUMBER) ? -1 : 1; + } + return ret; +} + /* sort_ignorecase --- case insensitive string comparison from cmp_nodes() */ static int @@ -1446,14 +1499,7 @@ sort_down_index_number(const void *p1, const void *p2) return -sort_up_index_number(p1, p2); } -/* - * sort_up_value --- qsort comparison function; ascending values; - * standard awk comparison rules apply, so might be by number or might - * be by string depending upon the values involved; if a value happens - * to be a sub-array, it will compare greater than any scalar (last in - * ascending order) and equal with any other sub-array (disambiguated - * by index string, the same as with equal scalar values) - */ +/* sort_up_value --- qsort comparison function; ascending values */ static int sort_up_value(const void *p1, const void *p2) @@ -1471,17 +1517,8 @@ sort_up_value(const void *p1, const void *p2) /* and we want to compare the element values they refer to */ val1 = idx1->hvalue; val2 = idx2->hvalue; - if (val1->type == Node_var_array) { - if (val2->type == Node_var_array) - ret = 0; /* both sub-arrays, hence tie */ - else - ret = 1; /* val1 (sub-array) > val2 (scalar) */ - } else { - if (val2->type == Node_var_array) - ret = -1; /* val1 (scalar) < val2 (sub-array) */ - else - ret = cmp_nodes(val1, val2); /* both scalars */ - } + + ret = sort_cmp_nodes(val1, val2); /* disambiguate ties to force a deterministic order */ if (ret == 0) diff --git a/test/ChangeLog b/test/ChangeLog index ee4c679d..2f5a461b 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,6 +1,10 @@ +Thu Apr 7 21:44:06 2011 Arnold D. Robbins <arnold@skeeve.com> + + * arraysort.awk, arraysort.ok: Added more test cases. + Fri Apr 1 11:56:54 2011 Arnold D. Robbins <arnold@skeeve.com> - * arraysort.awk, arraysort.ok: New files from John Haque, + * arraysort.awk, arraysort.ok: New files from John Haque, edited somewhat. * Makefile.am (arraysort): New test. diff --git a/test/arraysort.awk b/test/arraysort.awk index 28535528..912d5880 100644 --- a/test/arraysort.awk +++ b/test/arraysort.awk @@ -1,11 +1,81 @@ -# Date: Thu, 31 Mar 2011 12:29:09 -0600 # From: j.eh@mchsi.com +# March and April 2010 -BEGIN { a[100]=a[1]=a["x"]=a["y"]=1; PROCINFO["sorted_in"]="num"; -for (i in a) print i, a[i] } +BEGIN { + print "--- test1 ---" + a[1] = b[3] = 5 + a[2] = b[2] = "3D" + a[3] = b[1] = 10 + asort(a) + asort(b) + for (i = 1; i <= 3; ++i) + printf(" %2s %-2s\n", (a[i] ""), (b[i] "")) -BEGIN { a[100]=a[1]=a["x"]=1; PROCINFO["sorted_in"]="num"; -for (i in a) print i, a[i] } + delete a + delete b +} -BEGIN { a[0]=a[100]=a[1]=a["x"]=1; PROCINFO["sorted_in"]="num"; -for (i in a) print i, a[i] } +BEGIN { + print "--- test2 ---" + a[100] = a[1] = a["x"] = a["y"] = 1 + PROCINFO["sorted_in"] = "num" + for (i in a) + print i, a[i] + delete a +} + +BEGIN { + print "--- test3 ---" + a[100] = a[1] = a["x"] = 1 + PROCINFO["sorted_in"] = "num" + for (i in a) + print i, a[i] + delete a +} + +BEGIN { + print "--- test4 ---" + a[0] = a[100] = a[1] = a["x"] = 1 + PROCINFO["sorted_in"] = "num" + for (i in a) + print i, a[i] + delete a +} + +BEGIN { + print "--- test5 ---" + a[""] = a["y"] = a[0] = 1 + PROCINFO["sorted_in"] = "num" + for (i in a) + print i, a[i] + delete a +} + +BEGIN { + print "--- test6 ---" + a[2] = a[1] = a[4] = a["3 "] = 1 + PROCINFO["sorted_in"] = "num" + for (i in a) + print "\""i"\"" + delete a +} + + +BEGIN { + print "--- test7 ---" + n = split(" 4 \n 3\n3D\nD3\n3\n0\n2\n4\n1\n5", a, "\n") + for (i = 1; i <= n; i++) + b[a[i]] = a[i] + print "--asc ind str--" + PROCINFO["sorted_in"] = "asc ind str" + for (i in b) + print "|"i"|"b[i] + print "--asc ind num--" + PROCINFO["sorted_in"] = "asc ind num" + for (i in b) + print "|"i"|"b[i] + print "--asc val num--" + PROCINFO["sorted_in"] = "asc val num" + for (i in b) + print "|"i"|"b[i] +} diff --git a/test/arraysort.ok b/test/arraysort.ok index 2306e8a9..bf48ecda 100644 --- a/test/arraysort.ok +++ b/test/arraysort.ok @@ -1,13 +1,62 @@ +--- test1 --- + 5 5 + 10 10 + 3D 3D +--- test2 --- x 1 y 1 1 1 100 1 +--- test3 --- x 1 -y 1 1 1 100 1 +--- test4 --- 0 1 x 1 -y 1 1 1 100 1 +--- test5 --- + 1 +0 1 +y 1 +--- test6 --- +"1" +"2" +"3 " +"4" +--- test7 --- +--asc ind str-- +| 3| 3 +| 4 | 4 +|0|0 +|1|1 +|2|2 +|3|3 +|3D|3D +|4|4 +|5|5 +|D3|D3 +--asc ind num-- +|0|0 +|D3|D3 +|1|1 +|2|2 +| 3| 3 +|3|3 +|3D|3D +| 4 | 4 +|4|4 +|5|5 +--asc val num-- +gawk: arraysort.awk:79: warning: `PROCINFO["sorted_in"]': sorting by value can't be forced to use numeric comparison +|4|4 +|5|5 +|D3|D3 +|3D|3D +| 3| 3 +|0|0 +|1|1 +|2|2 +|3|3 +| 4 | 4 |