aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2011-04-07 21:44:53 +0300
committerArnold D. Robbins <arnold@skeeve.com>2011-04-07 21:44:53 +0300
commitc0583c31b8d47bd55e9340e7434cf9ccf7336f6d (patch)
tree6d54ecc869334cafeaadaca562a54f1bd2b4d717
parent00068b77275315c8c16e32c39af376dfcf5a2374 (diff)
downloadegawk-c0583c31b8d47bd55e9340e7434cf9ccf7336f6d.tar.gz
egawk-c0583c31b8d47bd55e9340e7434cf9ccf7336f6d.tar.bz2
egawk-c0583c31b8d47bd55e9340e7434cf9ccf7336f6d.zip
More improvements to array sorting.
-rw-r--r--ChangeLog11
-rw-r--r--array.c83
-rw-r--r--test/ChangeLog6
-rw-r--r--test/arraysort.awk84
-rw-r--r--test/arraysort.ok53
5 files changed, 204 insertions, 33 deletions
diff --git a/ChangeLog b/ChangeLog
index a307a160..01176a75 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+Thu Apr 7 21:38:08 2011 Arnold D. Robbins <arnold@skeeve.com>
+
+ * array.c (merge): Use sort_cmp_nodes for asort/asorti.
+ See test/arraysort.awk test 1.
+
+Thu Apr 7 10:48:21 2011 Pat Rankin <rankin@patechdata.com>
+
+ * array.c (sort_cmp_nodes): New routine. Unlike cmp_nodes, numbers
+ are less than strings instead of being formatted and then compared.
+ (sort_up_value): Use it.
+
Sun Apr 3 22:18:26 2011 Arnold D. Robbins <arnold@skeeve.com>
* README, FUTURE: Minor edits.
diff --git a/array.c b/array.c
index 6817a375..5d1c5884 100644
--- a/array.c
+++ b/array.c
@@ -61,6 +61,7 @@ struct sort_num { AWKNUM sort_numbr; NODE *sort_index; };
static qsort_compfunc sort_selection(NODE *, const char *, int);
static size_t sort_match(const char *, size_t, const char *);
static int sort_ignorecase(const char *, size_t, const char *, size_t);
+static int sort_cmp_nodes(NODE *, NODE *);
/* qsort callback routines */
static int sort_up_index_string(const void *, const void *);
@@ -1035,11 +1036,13 @@ merge(NODE *left, NODE *right)
NODE *ans, *cur;
/*
- * The use of cmp_nodes() here means that IGNORECASE influences the
- * comparison. This is OK, but it may be surprising. This comment
- * serves to remind us that we know about this and that it's OK.
+ * Use sort_cmp_nodes(): see the comment there as to why. That
+ * function will call cmp_nodes() on strings, which means that
+ * IGNORECASE influences the comparison. This is OK, but it may
+ * be surprising. This comment serves to remind us that we
+ * know about this and that it's OK.
*/
- if (cmp_nodes(left->ahvalue, right->ahvalue) <= 0) {
+ if (sort_cmp_nodes(left->ahvalue, right->ahvalue) <= 0) {
ans = cur = left;
left = left->ahnext;
} else {
@@ -1300,6 +1303,56 @@ comp_func(const void *p1, const void *p2)
return sort_up_index_string(p1, p2);
}
+/*
+ * sort_cmp_nodes --- compare two nodes. Unlike cmp_nodes(), we don't
+ * mix numeric and string comparisons. Numbers compare as less than
+ * strings, which in turn compare as less than sub-arrays. All
+ * sub-arrays compare equal to each other, regardless of their contents.
+ */
+
+static int
+sort_cmp_nodes(NODE *n1, NODE *n2)
+{
+ int ret;
+
+ if (n1->type == Node_var_array) {
+ /* return 0 if n2 is a sub-array too, else return 1 */
+ ret = (n2->type != Node_var_array);
+ } else if (n2->type == Node_var_array) {
+ ret = -1; /* n1 (scalar) < n2 (sub-array) */
+ } else {
+ /*
+ * Both scalars; we can't settle for a regular
+ * MAYBE_NUM comparison because that can cause
+ * problems when strings fall between numbers
+ * whose value makes them be ordered differently
+ * when handled as strings than as numbers.
+ * For example, { 10, 5, "3D" } yields a cycle:
+ * 5 < 10, "10" < "3D", "3D" < "5", so would
+ * produce different sort results depending upon
+ * the order in which comparisons between pairs
+ * of values happen to be performed. We force
+ * numbers to be less than strings in order to
+ * avoid that: 5 < 10, 10 < "3D", 5 < "3D".
+ */
+ /* first resolve any undecided types */
+ if (n1->flags & MAYBE_NUM)
+ (void) force_number(n1);
+ if (n2->flags & MAYBE_NUM)
+ (void) force_number(n2);
+ /*
+ * If both types are the same, use cmp_nodes();
+ * otherwise, force the numeric value to be less
+ * than the string one.
+ */
+ if ((n1->flags & NUMBER) == (n2->flags & NUMBER))
+ ret = cmp_nodes(n1, n2);
+ else
+ ret = (n1->flags & NUMBER) ? -1 : 1;
+ }
+ return ret;
+}
+
/* sort_ignorecase --- case insensitive string comparison from cmp_nodes() */
static int
@@ -1446,14 +1499,7 @@ sort_down_index_number(const void *p1, const void *p2)
return -sort_up_index_number(p1, p2);
}
-/*
- * sort_up_value --- qsort comparison function; ascending values;
- * standard awk comparison rules apply, so might be by number or might
- * be by string depending upon the values involved; if a value happens
- * to be a sub-array, it will compare greater than any scalar (last in
- * ascending order) and equal with any other sub-array (disambiguated
- * by index string, the same as with equal scalar values)
- */
+/* sort_up_value --- qsort comparison function; ascending values */
static int
sort_up_value(const void *p1, const void *p2)
@@ -1471,17 +1517,8 @@ sort_up_value(const void *p1, const void *p2)
/* and we want to compare the element values they refer to */
val1 = idx1->hvalue;
val2 = idx2->hvalue;
- if (val1->type == Node_var_array) {
- if (val2->type == Node_var_array)
- ret = 0; /* both sub-arrays, hence tie */
- else
- ret = 1; /* val1 (sub-array) > val2 (scalar) */
- } else {
- if (val2->type == Node_var_array)
- ret = -1; /* val1 (scalar) < val2 (sub-array) */
- else
- ret = cmp_nodes(val1, val2); /* both scalars */
- }
+
+ ret = sort_cmp_nodes(val1, val2);
/* disambiguate ties to force a deterministic order */
if (ret == 0)
diff --git a/test/ChangeLog b/test/ChangeLog
index ee4c679d..2f5a461b 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,6 +1,10 @@
+Thu Apr 7 21:44:06 2011 Arnold D. Robbins <arnold@skeeve.com>
+
+ * arraysort.awk, arraysort.ok: Added more test cases.
+
Fri Apr 1 11:56:54 2011 Arnold D. Robbins <arnold@skeeve.com>
- * arraysort.awk, arraysort.ok: New files from John Haque,
+ * arraysort.awk, arraysort.ok: New files from John Haque,
edited somewhat.
* Makefile.am (arraysort): New test.
diff --git a/test/arraysort.awk b/test/arraysort.awk
index 28535528..912d5880 100644
--- a/test/arraysort.awk
+++ b/test/arraysort.awk
@@ -1,11 +1,81 @@
-# Date: Thu, 31 Mar 2011 12:29:09 -0600
# From: j.eh@mchsi.com
+# March and April 2010
-BEGIN { a[100]=a[1]=a["x"]=a["y"]=1; PROCINFO["sorted_in"]="num";
-for (i in a) print i, a[i] }
+BEGIN {
+ print "--- test1 ---"
+ a[1] = b[3] = 5
+ a[2] = b[2] = "3D"
+ a[3] = b[1] = 10
+ asort(a)
+ asort(b)
+ for (i = 1; i <= 3; ++i)
+ printf(" %2s %-2s\n", (a[i] ""), (b[i] ""))
-BEGIN { a[100]=a[1]=a["x"]=1; PROCINFO["sorted_in"]="num";
-for (i in a) print i, a[i] }
+ delete a
+ delete b
+}
-BEGIN { a[0]=a[100]=a[1]=a["x"]=1; PROCINFO["sorted_in"]="num";
-for (i in a) print i, a[i] }
+BEGIN {
+ print "--- test2 ---"
+ a[100] = a[1] = a["x"] = a["y"] = 1
+ PROCINFO["sorted_in"] = "num"
+ for (i in a)
+ print i, a[i]
+ delete a
+}
+
+BEGIN {
+ print "--- test3 ---"
+ a[100] = a[1] = a["x"] = 1
+ PROCINFO["sorted_in"] = "num"
+ for (i in a)
+ print i, a[i]
+ delete a
+}
+
+BEGIN {
+ print "--- test4 ---"
+ a[0] = a[100] = a[1] = a["x"] = 1
+ PROCINFO["sorted_in"] = "num"
+ for (i in a)
+ print i, a[i]
+ delete a
+}
+
+BEGIN {
+ print "--- test5 ---"
+ a[""] = a["y"] = a[0] = 1
+ PROCINFO["sorted_in"] = "num"
+ for (i in a)
+ print i, a[i]
+ delete a
+}
+
+BEGIN {
+ print "--- test6 ---"
+ a[2] = a[1] = a[4] = a["3 "] = 1
+ PROCINFO["sorted_in"] = "num"
+ for (i in a)
+ print "\""i"\""
+ delete a
+}
+
+
+BEGIN {
+ print "--- test7 ---"
+ n = split(" 4 \n 3\n3D\nD3\n3\n0\n2\n4\n1\n5", a, "\n")
+ for (i = 1; i <= n; i++)
+ b[a[i]] = a[i]
+ print "--asc ind str--"
+ PROCINFO["sorted_in"] = "asc ind str"
+ for (i in b)
+ print "|"i"|"b[i]
+ print "--asc ind num--"
+ PROCINFO["sorted_in"] = "asc ind num"
+ for (i in b)
+ print "|"i"|"b[i]
+ print "--asc val num--"
+ PROCINFO["sorted_in"] = "asc val num"
+ for (i in b)
+ print "|"i"|"b[i]
+}
diff --git a/test/arraysort.ok b/test/arraysort.ok
index 2306e8a9..bf48ecda 100644
--- a/test/arraysort.ok
+++ b/test/arraysort.ok
@@ -1,13 +1,62 @@
+--- test1 ---
+ 5 5
+ 10 10
+ 3D 3D
+--- test2 ---
x 1
y 1
1 1
100 1
+--- test3 ---
x 1
-y 1
1 1
100 1
+--- test4 ---
0 1
x 1
-y 1
1 1
100 1
+--- test5 ---
+ 1
+0 1
+y 1
+--- test6 ---
+"1"
+"2"
+"3 "
+"4"
+--- test7 ---
+--asc ind str--
+| 3| 3
+| 4 | 4
+|0|0
+|1|1
+|2|2
+|3|3
+|3D|3D
+|4|4
+|5|5
+|D3|D3
+--asc ind num--
+|0|0
+|D3|D3
+|1|1
+|2|2
+| 3| 3
+|3|3
+|3D|3D
+| 4 | 4
+|4|4
+|5|5
+--asc val num--
+gawk: arraysort.awk:79: warning: `PROCINFO["sorted_in"]': sorting by value can't be forced to use numeric comparison
+|4|4
+|5|5
+|D3|D3
+|3D|3D
+| 3| 3
+|0|0
+|1|1
+|2|2
+|3|3
+| 4 | 4