summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2023-05-02 19:29:42 -0700
committerKaz Kylheku <kaz@kylheku.com>2023-05-02 19:29:42 -0700
commit005b8909d995b699130ab97269cabab2bcf33a75 (patch)
treee3acda4fb0d5c361f3d2efb4dd245a8bcfa291c5
parentf6cb6a21745822874789a33e150ef7ddbbf58979 (diff)
downloadtxr-005b8909d995b699130ab97269cabab2bcf33a75.tar.gz
txr-005b8909d995b699130ab97269cabab2bcf33a75.tar.bz2
txr-005b8909d995b699130ab97269cabab2bcf33a75.zip
sort: support stable sorting via ssort and snsort.
For array-like objecgts, these objects use an array-based merge sort, using an auxiliary array equal in size to the original array. To provide the auxiliary array, a new kind of very simple vector-like object is introduced into the gc module: protected array. This looks like a raw dynamic C array of val type, returned as a val *. Under the hood, there is a heap object there, which makes the array traversable by the garbage collector. The whole point of this exercise is to make the new mergesort function safe even if the caller-supplied functions misbehave in such a way that the auxiliary array holds the only references to heap objects. * gc.c (struct prot_array): New struct, (prot_array_cls): New static variable. (gc_late_init): Register COBJ class, retaining in prot_array_cls. (prot_array_mark, prot_array_free): New static functions. (prot_array_ops): New static structure. (prot_array_alloc, prot_array_free): New functions. * gc.h (prot_array_alloc, prot_array_free): Declared. * lib.c (mergesort, ssort_vec): New static function. (snsort, ssort): New functions. * lib.h (snsort, ssort): Declared. * tests/010/sort.tl: Cover ssort. * txr.1: Documented. * stdlib/doc-syms.tl: Updated.
-rw-r--r--eval.c2
-rw-r--r--gc.c58
-rw-r--r--gc.h3
-rw-r--r--lib.c105
-rw-r--r--lib.h2
-rw-r--r--stdlib/doc-syms.tl6
-rw-r--r--tests/010/sort.tl22
-rw-r--r--txr.122
8 files changed, 211 insertions, 9 deletions
diff --git a/eval.c b/eval.c
index 68ea1865..ca9fe25d 100644
--- a/eval.c
+++ b/eval.c
@@ -7509,6 +7509,8 @@ void eval_init(void)
reg_fun(intern(lit("merge"), user_package), func_n4o(merge_wrap, 2));
reg_fun(intern(lit("nsort"), user_package), func_n3o(nsort, 1));
reg_fun(intern(lit("sort"), user_package), func_n3o(sort, 1));
+ reg_fun(intern(lit("snsort"), user_package), func_n3o(snsort, 1));
+ reg_fun(intern(lit("ssort"), user_package), func_n3o(ssort, 1));
reg_fun(intern(lit("nshuffle"), user_package), func_n2o(nshuffle, 1));
reg_fun(intern(lit("shuffle"), user_package), func_n2o(shuffle, 1));
reg_fun(intern(lit("find"), user_package), func_n4o(find, 2));
diff --git a/gc.c b/gc.c
index 25b4c071..2c61c914 100644
--- a/gc.c
+++ b/gc.c
@@ -136,6 +136,13 @@ int full_gc;
val break_obj;
#endif
+struct prot_array {
+ cnum size;
+ val *arr;
+};
+
+struct cobj_class *prot_array_cls;
+
val prot1(val *loc)
{
assert (gc_prot_top < prot_stack_limit);
@@ -1132,6 +1139,8 @@ void gc_late_init(void)
func_n1(gc_call_finalizers));
reg_fun(intern(lit("set-stack-limit"), user_package), func_n1(set_stack_limit));
reg_fun(intern(lit("get-stack-limit"), user_package), func_n0(get_stack_limit));
+
+ prot_array_cls = cobj_register(intern(lit("gc-prot-array"), system_package));
}
/*
@@ -1247,3 +1256,52 @@ void gc_stack_overflow(void)
{
uw_throwf(stack_overflow_s, lit("computation exceeded stack limit"), nao);
}
+
+static void prot_array_mark(val obj)
+{
+ struct prot_array *pa = coerce(struct prot_array *, obj->co.handle);
+ cnum i;
+
+ if (pa->arr)
+ for (i = 0; i < pa->size; i++)
+ gc_mark(pa->arr[i]);
+}
+
+static void prot_array_free(val obj)
+{
+ struct prot_array *pa = coerce(struct prot_array *, obj->co.handle);
+
+ if (pa->arr) {
+ free(pa->arr - 1);
+ pa->arr = 0;
+ }
+}
+
+static struct cobj_ops prot_array_ops = cobj_ops_init(eq,
+ cobj_print_op,
+ prot_array_free,
+ prot_array_mark,
+ cobj_eq_hash_op);
+
+val *gc_prot_array_alloc(cnum size, val self)
+{
+ struct prot_array *pa = convert(struct prot_array *,
+ chk_malloc(sizeof *pa));
+
+ if (size >= INT_PTR_MAX)
+ uw_throwf(error_s, lit("~s: array too large"), self, nao);
+
+ pa->size = size;
+ pa->arr = convert(val *,
+ chk_calloc(sizeof *pa->arr, (size + 1))) + 1;
+
+ pa->arr[-1] = cobj(convert(mem_t *, pa), prot_array_cls, &prot_array_ops);
+
+ return pa->arr;
+}
+
+void gc_prot_array_free(val *arr)
+{
+ if (arr)
+ prot_array_free(arr[-1]);
+}
diff --git a/gc.h b/gc.h
index ea09cfad..58d057db 100644
--- a/gc.h
+++ b/gc.h
@@ -86,3 +86,6 @@ INLINE void gc_stack_check(void)
if (&v < gc_stack_limit)
gc_stack_overflow();
}
+
+val *gc_prot_array_alloc(cnum size, val self);
+void gc_prot_array_free(val *);
diff --git a/lib.c b/lib.c
index c721a01f..bea09ea3 100644
--- a/lib.c
+++ b/lib.c
@@ -10874,6 +10874,60 @@ static void sort_vec(val vec, val lessfun, val keyfun, val self)
quicksort(vec, lessfun, keyfun, 0, len);
}
+static void mergesort(val vec, val lessfun, val keyfun, cnum from, cnum to,
+ val *aux)
+{
+ switch (to - from) {
+ case 0:
+ case 1:
+ break;
+ case 2:
+ if (funcall2(lessfun,
+ funcall1(keyfun, ref(vec, num_fast(from + 1))),
+ funcall1(keyfun, ref(vec, num_fast(from)))))
+ swap(vec, num_fast(from), num_fast(from + 1));
+ break;
+ default:
+ {
+ cnum mid = from + (to - from) / 2;
+ cnum i, j, k;
+
+ mergesort(vec, lessfun, keyfun, from, mid, aux);
+ mergesort(vec, lessfun, keyfun, mid, to, aux);
+
+ for (i = from, j = mid, k = 0; i < mid && j < to; )
+ {
+ if (funcall2(lessfun,
+ funcall1(keyfun, ref(vec, num_fast(i))),
+ funcall1(keyfun, ref(vec, num_fast(j)))))
+ {
+ aux[k++] = ref(vec, num_fast(i++));
+ } else {
+ aux[k++] = ref(vec, num_fast(j++));
+ }
+ }
+
+ while (i < mid)
+ aux[k++] = ref(vec, num_fast(i++));
+ while (j < to)
+ aux[k++] = ref(vec, num_fast(j++));
+
+ for (i = from, k = 0; i < to; i++, k++)
+ refset(vec, num_fast(i), aux[k]);
+ }
+ break;
+ }
+}
+
+static void ssort_vec(val vec, val lessfun, val keyfun, val self)
+{
+ cnum len = c_fixnum(length(vec), self);
+ val *aux = gc_prot_array_alloc(len, self);
+ mergesort(vec, lessfun, keyfun, 0, len, aux);
+ gc_prot_array_free(aux);
+}
+
+
val nsort(val seq, val lessfun, val keyfun)
{
val self = lit("nsort");
@@ -10925,6 +10979,57 @@ val sort(val seq, val lessfun, val keyfun)
abort();
}
+val snsort(val seq, val lessfun, val keyfun)
+{
+ val self = lit("snsort");
+ seq_info_t si = seq_info(seq);
+
+ keyfun = default_arg(keyfun, identity_f);
+ lessfun = default_arg(lessfun, less_f);
+
+ switch (si.kind) {
+ case SEQ_NIL:
+ return nil;
+ case SEQ_VECLIKE:
+ case SEQ_HASHLIKE:
+ ssort_vec(seq, lessfun, keyfun, self);
+ return seq;
+ case SEQ_LISTLIKE:
+ return sort_list(seq, lessfun, keyfun);
+ case SEQ_TREELIKE:
+ case SEQ_NOTSEQ:
+ unsup_obj(self, seq);
+ }
+
+ abort();
+}
+
+val ssort(val seq, val lessfun, val keyfun)
+{
+ val self = lit("ssort");
+ seq_info_t si = seq_info(seq);
+
+ keyfun = default_arg(keyfun, identity_f);
+ lessfun = default_arg(lessfun, less_f);
+
+ switch (si.kind) {
+ case SEQ_NIL:
+ return nil;
+ case SEQ_VECLIKE:
+ case SEQ_HASHLIKE:
+ seq = copy(seq);
+ ssort_vec(seq, lessfun, keyfun, self);
+ return seq;
+ case SEQ_LISTLIKE:
+ return sort_list(copy_list(seq), lessfun, keyfun);
+ case SEQ_TREELIKE:
+ case SEQ_NOTSEQ:
+ unsup_obj(self, seq);
+ }
+
+ abort();
+}
+
val nshuffle(val seq, val randstate)
{
seq_info_t si = seq_info(seq);
diff --git a/lib.h b/lib.h
index f7e6f992..9e71b5f9 100644
--- a/lib.h
+++ b/lib.h
@@ -1319,6 +1319,8 @@ val interpose(val sep, val seq);
val merge(val list1, val list2, val lessfun, val keyfun);
val nsort(val seq, val lessfun, val keyfun);
val sort(val seq, val lessfun, val keyfun);
+val snsort(val seq, val lessfun, val keyfun);
+val ssort(val seq, val lessfun, val keyfun);
val nshuffle(val seq, val randstate);
val shuffle(val seq, val randstate);
val multi_sort(val lists, val funcs, val key_funcs);
diff --git a/stdlib/doc-syms.tl b/stdlib/doc-syms.tl
index 029cecbd..f3e6e9b7 100644
--- a/stdlib/doc-syms.tl
+++ b/stdlib/doc-syms.tl
@@ -1346,7 +1346,7 @@
("nreverse" "N-03D8471B")
("nrot" "N-025DB962")
("nshuffle" "N-01F12561")
- ("nsort" "N-01FE5176")
+ ("nsort" "N-03923640")
("nth" "N-0039F3FB")
("nthcdr" "N-03D71D22")
("nthlast" "N-02FC66FA")
@@ -1810,6 +1810,7 @@
("slots" "N-00E90177")
("slotset" "N-02657437")
("sme" "N-008C6621")
+ ("snsort" "N-03923640")
("so-acceptconn" "N-02FFF4E8")
("so-broadcast" "N-02FFF4E8")
("so-debug" "N-02FFF4E8")
@@ -1851,7 +1852,7 @@
("some" "D-0041")
("some-false" "N-016BDF48")
("some-true" "N-016BDF48")
- ("sort" "N-01FE5176")
+ ("sort" "N-03923640")
("sort-group" "N-01E65DDC")
("source-loc" "N-0370CD69")
("source-loc-str" "N-0370CD69")
@@ -1868,6 +1869,7 @@
("sqrt" "D-0025")
("square" "D-0031")
("ssize-t" "N-01153D9E")
+ ("ssort" "N-03923640")
("sspl" "N-0296195B")
("sssucc" "N-038E636C")
("ssucc" "N-038E636C")
diff --git a/tests/010/sort.tl b/tests/010/sort.tl
index 1fd48531..1fcc0c2d 100644
--- a/tests/010/sort.tl
+++ b/tests/010/sort.tl
@@ -21,3 +21,25 @@
(sort slist) list
(sort list (fun greater)) (reverse list)
(sort slist (fun greater)) (reverse list)))
+
+(test (ssort ()) nil)
+
+(let* ((list (conses '(1 2 3 4 5 6 7 8)))
+ (sp (uniq [mapcar ssort (perm list (len list))])))
+ (mvtest (len sp) 1
+ (car sp) list))
+
+(test (ssort #()) #())
+
+(let* ((vec (conses #(1 2 3 4 5 6 7 8)))
+ (sp (uniq [mapcar ssort (perm vec (len vec))])))
+ (mvtest (len sp) 1
+ (car sp) vec))
+
+(let* ((list (range* 0 1000))
+ (slist (shuffle list)))
+ (mvtest
+ (ssort list) list
+ (ssort slist) list
+ (ssort list (fun greater)) (reverse list)
+ (ssort slist (fun greater)) (reverse list)))
diff --git a/txr.1 b/txr.1
index 6927bd13..606e7f8d 100644
--- a/txr.1
+++ b/txr.1
@@ -37716,10 +37716,12 @@ is a transformed list of rows which is reconstituted into a list of columns.
;; (op remove-if (ap eql @3 20))
.brev
-.coNP Functions @ sort and @ nsort
+.coNP Functions @, sort @, nsort @ ssort and @ snsort
.synb
.mets (sort < sequence >> [ lessfun <> [ keyfun ]])
.mets (nsort < sequence >> [ lessfun <> [ keyfun ]])
+.mets (ssort < sequence >> [ lessfun <> [ keyfun ]])
+.mets (snsort < sequence >> [ lessfun <> [ keyfun ]])
.syne
.desc
The
@@ -37784,19 +37786,25 @@ For strings and vectors,
is not stable.
The
-.code sort
+.code ssort
and
.code nsort
-functions can be applied to hashes. It produces meaningful behavior
-for a hash table which contains
+functions have the same argument syntax and semantics as, respectively,
+.code sort
+and
+.codn nsort .
+These functions provide a stable sort for all sequences, not only
+lists, at the cost of temporarily allocating memory.
+
+All of these functions can be applied to hashes. They produce meaningful
+behavior for a hash table which contains
.I N
keys which are the integers from 0 to
.IR "N - 1" .
Such as hash is treated as if it were a vector. The values are sorted
and reassigned to sorted order to the integer keys.
-The behavior of
-.code sort
-is not specified for hashes whose contents do not conform to this convention.
+The behavior is not specified for hashes whose contents do not conform to this
+convention.
Note:
.code nsort