diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2023-05-02 19:29:42 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2023-05-02 19:29:42 -0700 |
commit | 005b8909d995b699130ab97269cabab2bcf33a75 (patch) | |
tree | e3acda4fb0d5c361f3d2efb4dd245a8bcfa291c5 | |
parent | f6cb6a21745822874789a33e150ef7ddbbf58979 (diff) | |
download | txr-005b8909d995b699130ab97269cabab2bcf33a75.tar.gz txr-005b8909d995b699130ab97269cabab2bcf33a75.tar.bz2 txr-005b8909d995b699130ab97269cabab2bcf33a75.zip |
sort: support stable sorting via ssort and snsort.
For array-like objecgts, these objects use an
array-based merge sort, using an auxiliary array
equal in size to the original array.
To provide the auxiliary array, a new kind of very simple
vector-like object is introduced into the gc module: protected
array. This looks like a raw dynamic C array of val type,
returned as a val *. Under the hood, there is a heap object
there, which makes the array traversable by the garbage
collector.
The whole point of this exercise is to make the new mergesort
function safe even if the caller-supplied functions misbehave
in such a way that the auxiliary array holds the only
references to heap objects.
* gc.c (struct prot_array): New struct,
(prot_array_cls): New static variable.
(gc_late_init): Register COBJ class, retaining in
prot_array_cls.
(prot_array_mark, prot_array_free): New static functions.
(prot_array_ops): New static structure.
(prot_array_alloc, prot_array_free): New functions.
* gc.h (prot_array_alloc, prot_array_free): Declared.
* lib.c (mergesort, ssort_vec): New static function.
(snsort, ssort): New functions.
* lib.h (snsort, ssort): Declared.
* tests/010/sort.tl: Cover ssort.
* txr.1: Documented.
* stdlib/doc-syms.tl: Updated.
-rw-r--r-- | eval.c | 2 | ||||
-rw-r--r-- | gc.c | 58 | ||||
-rw-r--r-- | gc.h | 3 | ||||
-rw-r--r-- | lib.c | 105 | ||||
-rw-r--r-- | lib.h | 2 | ||||
-rw-r--r-- | stdlib/doc-syms.tl | 6 | ||||
-rw-r--r-- | tests/010/sort.tl | 22 | ||||
-rw-r--r-- | txr.1 | 22 |
8 files changed, 211 insertions, 9 deletions
@@ -7509,6 +7509,8 @@ void eval_init(void) reg_fun(intern(lit("merge"), user_package), func_n4o(merge_wrap, 2)); reg_fun(intern(lit("nsort"), user_package), func_n3o(nsort, 1)); reg_fun(intern(lit("sort"), user_package), func_n3o(sort, 1)); + reg_fun(intern(lit("snsort"), user_package), func_n3o(snsort, 1)); + reg_fun(intern(lit("ssort"), user_package), func_n3o(ssort, 1)); reg_fun(intern(lit("nshuffle"), user_package), func_n2o(nshuffle, 1)); reg_fun(intern(lit("shuffle"), user_package), func_n2o(shuffle, 1)); reg_fun(intern(lit("find"), user_package), func_n4o(find, 2)); @@ -136,6 +136,13 @@ int full_gc; val break_obj; #endif +struct prot_array { + cnum size; + val *arr; +}; + +struct cobj_class *prot_array_cls; + val prot1(val *loc) { assert (gc_prot_top < prot_stack_limit); @@ -1132,6 +1139,8 @@ void gc_late_init(void) func_n1(gc_call_finalizers)); reg_fun(intern(lit("set-stack-limit"), user_package), func_n1(set_stack_limit)); reg_fun(intern(lit("get-stack-limit"), user_package), func_n0(get_stack_limit)); + + prot_array_cls = cobj_register(intern(lit("gc-prot-array"), system_package)); } /* @@ -1247,3 +1256,52 @@ void gc_stack_overflow(void) { uw_throwf(stack_overflow_s, lit("computation exceeded stack limit"), nao); } + +static void prot_array_mark(val obj) +{ + struct prot_array *pa = coerce(struct prot_array *, obj->co.handle); + cnum i; + + if (pa->arr) + for (i = 0; i < pa->size; i++) + gc_mark(pa->arr[i]); +} + +static void prot_array_free(val obj) +{ + struct prot_array *pa = coerce(struct prot_array *, obj->co.handle); + + if (pa->arr) { + free(pa->arr - 1); + pa->arr = 0; + } +} + +static struct cobj_ops prot_array_ops = cobj_ops_init(eq, + cobj_print_op, + prot_array_free, + prot_array_mark, + cobj_eq_hash_op); + +val *gc_prot_array_alloc(cnum size, val self) +{ + struct prot_array *pa = convert(struct prot_array *, + chk_malloc(sizeof *pa)); + + if (size >= INT_PTR_MAX) + uw_throwf(error_s, lit("~s: array too large"), self, nao); + + pa->size = size; + pa->arr = convert(val *, + chk_calloc(sizeof *pa->arr, (size + 1))) + 1; + + pa->arr[-1] = cobj(convert(mem_t *, pa), prot_array_cls, &prot_array_ops); + + return pa->arr; +} + +void gc_prot_array_free(val *arr) +{ + if (arr) + prot_array_free(arr[-1]); +} @@ -86,3 +86,6 @@ INLINE void gc_stack_check(void) if (&v < gc_stack_limit) gc_stack_overflow(); } + +val *gc_prot_array_alloc(cnum size, val self); +void gc_prot_array_free(val *); @@ -10874,6 +10874,60 @@ static void sort_vec(val vec, val lessfun, val keyfun, val self) quicksort(vec, lessfun, keyfun, 0, len); } +static void mergesort(val vec, val lessfun, val keyfun, cnum from, cnum to, + val *aux) +{ + switch (to - from) { + case 0: + case 1: + break; + case 2: + if (funcall2(lessfun, + funcall1(keyfun, ref(vec, num_fast(from + 1))), + funcall1(keyfun, ref(vec, num_fast(from))))) + swap(vec, num_fast(from), num_fast(from + 1)); + break; + default: + { + cnum mid = from + (to - from) / 2; + cnum i, j, k; + + mergesort(vec, lessfun, keyfun, from, mid, aux); + mergesort(vec, lessfun, keyfun, mid, to, aux); + + for (i = from, j = mid, k = 0; i < mid && j < to; ) + { + if (funcall2(lessfun, + funcall1(keyfun, ref(vec, num_fast(i))), + funcall1(keyfun, ref(vec, num_fast(j))))) + { + aux[k++] = ref(vec, num_fast(i++)); + } else { + aux[k++] = ref(vec, num_fast(j++)); + } + } + + while (i < mid) + aux[k++] = ref(vec, num_fast(i++)); + while (j < to) + aux[k++] = ref(vec, num_fast(j++)); + + for (i = from, k = 0; i < to; i++, k++) + refset(vec, num_fast(i), aux[k]); + } + break; + } +} + +static void ssort_vec(val vec, val lessfun, val keyfun, val self) +{ + cnum len = c_fixnum(length(vec), self); + val *aux = gc_prot_array_alloc(len, self); + mergesort(vec, lessfun, keyfun, 0, len, aux); + gc_prot_array_free(aux); +} + + val nsort(val seq, val lessfun, val keyfun) { val self = lit("nsort"); @@ -10925,6 +10979,57 @@ val sort(val seq, val lessfun, val keyfun) abort(); } +val snsort(val seq, val lessfun, val keyfun) +{ + val self = lit("snsort"); + seq_info_t si = seq_info(seq); + + keyfun = default_arg(keyfun, identity_f); + lessfun = default_arg(lessfun, less_f); + + switch (si.kind) { + case SEQ_NIL: + return nil; + case SEQ_VECLIKE: + case SEQ_HASHLIKE: + ssort_vec(seq, lessfun, keyfun, self); + return seq; + case SEQ_LISTLIKE: + return sort_list(seq, lessfun, keyfun); + case SEQ_TREELIKE: + case SEQ_NOTSEQ: + unsup_obj(self, seq); + } + + abort(); +} + +val ssort(val seq, val lessfun, val keyfun) +{ + val self = lit("ssort"); + seq_info_t si = seq_info(seq); + + keyfun = default_arg(keyfun, identity_f); + lessfun = default_arg(lessfun, less_f); + + switch (si.kind) { + case SEQ_NIL: + return nil; + case SEQ_VECLIKE: + case SEQ_HASHLIKE: + seq = copy(seq); + ssort_vec(seq, lessfun, keyfun, self); + return seq; + case SEQ_LISTLIKE: + return sort_list(copy_list(seq), lessfun, keyfun); + case SEQ_TREELIKE: + case SEQ_NOTSEQ: + unsup_obj(self, seq); + } + + abort(); +} + val nshuffle(val seq, val randstate) { seq_info_t si = seq_info(seq); @@ -1319,6 +1319,8 @@ val interpose(val sep, val seq); val merge(val list1, val list2, val lessfun, val keyfun); val nsort(val seq, val lessfun, val keyfun); val sort(val seq, val lessfun, val keyfun); +val snsort(val seq, val lessfun, val keyfun); +val ssort(val seq, val lessfun, val keyfun); val nshuffle(val seq, val randstate); val shuffle(val seq, val randstate); val multi_sort(val lists, val funcs, val key_funcs); diff --git a/stdlib/doc-syms.tl b/stdlib/doc-syms.tl index 029cecbd..f3e6e9b7 100644 --- a/stdlib/doc-syms.tl +++ b/stdlib/doc-syms.tl @@ -1346,7 +1346,7 @@ ("nreverse" "N-03D8471B") ("nrot" "N-025DB962") ("nshuffle" "N-01F12561") - ("nsort" "N-01FE5176") + ("nsort" "N-03923640") ("nth" "N-0039F3FB") ("nthcdr" "N-03D71D22") ("nthlast" "N-02FC66FA") @@ -1810,6 +1810,7 @@ ("slots" "N-00E90177") ("slotset" "N-02657437") ("sme" "N-008C6621") + ("snsort" "N-03923640") ("so-acceptconn" "N-02FFF4E8") ("so-broadcast" "N-02FFF4E8") ("so-debug" "N-02FFF4E8") @@ -1851,7 +1852,7 @@ ("some" "D-0041") ("some-false" "N-016BDF48") ("some-true" "N-016BDF48") - ("sort" "N-01FE5176") + ("sort" "N-03923640") ("sort-group" "N-01E65DDC") ("source-loc" "N-0370CD69") ("source-loc-str" "N-0370CD69") @@ -1868,6 +1869,7 @@ ("sqrt" "D-0025") ("square" "D-0031") ("ssize-t" "N-01153D9E") + ("ssort" "N-03923640") ("sspl" "N-0296195B") ("sssucc" "N-038E636C") ("ssucc" "N-038E636C") diff --git a/tests/010/sort.tl b/tests/010/sort.tl index 1fd48531..1fcc0c2d 100644 --- a/tests/010/sort.tl +++ b/tests/010/sort.tl @@ -21,3 +21,25 @@ (sort slist) list (sort list (fun greater)) (reverse list) (sort slist (fun greater)) (reverse list))) + +(test (ssort ()) nil) + +(let* ((list (conses '(1 2 3 4 5 6 7 8))) + (sp (uniq [mapcar ssort (perm list (len list))]))) + (mvtest (len sp) 1 + (car sp) list)) + +(test (ssort #()) #()) + +(let* ((vec (conses #(1 2 3 4 5 6 7 8))) + (sp (uniq [mapcar ssort (perm vec (len vec))]))) + (mvtest (len sp) 1 + (car sp) vec)) + +(let* ((list (range* 0 1000)) + (slist (shuffle list))) + (mvtest + (ssort list) list + (ssort slist) list + (ssort list (fun greater)) (reverse list) + (ssort slist (fun greater)) (reverse list))) @@ -37716,10 +37716,12 @@ is a transformed list of rows which is reconstituted into a list of columns. ;; (op remove-if (ap eql @3 20)) .brev -.coNP Functions @ sort and @ nsort +.coNP Functions @, sort @, nsort @ ssort and @ snsort .synb .mets (sort < sequence >> [ lessfun <> [ keyfun ]]) .mets (nsort < sequence >> [ lessfun <> [ keyfun ]]) +.mets (ssort < sequence >> [ lessfun <> [ keyfun ]]) +.mets (snsort < sequence >> [ lessfun <> [ keyfun ]]) .syne .desc The @@ -37784,19 +37786,25 @@ For strings and vectors, is not stable. The -.code sort +.code ssort and .code nsort -functions can be applied to hashes. It produces meaningful behavior -for a hash table which contains +functions have the same argument syntax and semantics as, respectively, +.code sort +and +.codn nsort . +These functions provide a stable sort for all sequences, not only +lists, at the cost of temporarily allocating memory. + +All of these functions can be applied to hashes. They produce meaningful +behavior for a hash table which contains .I N keys which are the integers from 0 to .IR "N - 1" . Such as hash is treated as if it were a vector. The values are sorted and reassigned to sorted order to the integer keys. -The behavior of -.code sort -is not specified for hashes whose contents do not conform to this convention. +The behavior is not specified for hashes whose contents do not conform to this +convention. Note: .code nsort |