diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2014-10-22 20:48:45 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2014-10-22 20:48:45 -0700 |
commit | 2fbc83085f4a73b228c4441294e66485c319639a (patch) | |
tree | 11ea46eb4fcda7515a198d46c136fe735aaa04a8 | |
parent | fbcb476d1d4cc015fa6afbf9135f5b92c1c1e299 (diff) | |
download | txr-2fbc83085f4a73b228c4441294e66485c319639a.tar.gz txr-2fbc83085f4a73b228c4441294e66485c319639a.tar.bz2 txr-2fbc83085f4a73b228c4441294e66485c319639a.zip |
Ensure that a hash reorganization doesn't take place
during a traversal, which could cause items to be visited
twice or skipped.
* gc.c (full_gc): Changed from static to exter (full_gc): Changed to
internal linkage.
* gc.h (full_gc): Declared.
* hash.c (struct hash): New member, usecount.
(struct hash_iter): New member, next.
(reachable_iters): New static variable.
(hash_mark): Reset the usecount of every reachable hash table.
(hash_iter_mark): Add every reachable iterator to reachable_iters
list.
(make_hash, make_similar_hash, copy_hash): Initialize usecount.
(gethash_c): Do not call hash_grow if usecount is nonzero.
(hash_begin): Initialize next field of hash_iter to null.
Increment use count on hash table.
(hash_next): When traversal ends, release use count, and
null out the hash table reference.
(do_weak_tables): New static function.
(do_iters): New static function.
(hash_process_weak): Weak hash processing logic moved
to do_weak_tables. Also calls do_iters.
* txr.1: Describe hash table traversal, and the assurances
and caveats about inserting and deleting items.
-rw-r--r-- | ChangeLog | 31 | ||||
-rw-r--r-- | gc.c | 2 | ||||
-rw-r--r-- | gc.h | 1 | ||||
-rw-r--r-- | hash.c | 62 | ||||
-rw-r--r-- | txr.1 | 33 |
5 files changed, 122 insertions, 7 deletions
@@ -1,5 +1,36 @@ | |||
1 | 2014-10-21 Kaz Kylheku <kaz@kylheku.com> | 1 | 2014-10-21 Kaz Kylheku <kaz@kylheku.com> |
2 | 2 | ||
3 | Ensure that a hash reorganization doesn't take place | ||
4 | during a traversal, which could cause items to be visited | ||
5 | twice or skipped. | ||
6 | |||
7 | * gc.c (full_gc): Changed from static to exter (full_gc): Changed to | ||
8 | internal linkage. | ||
9 | |||
10 | * gc.h (full_gc): Declared. | ||
11 | |||
12 | * hash.c (struct hash): New member, usecount. | ||
13 | (struct hash_iter): New member, next. | ||
14 | (reachable_iters): New static variable. | ||
15 | (hash_mark): Reset the usecount of every reachable hash table. | ||
16 | (hash_iter_mark): Add every reachable iterator to reachable_iters | ||
17 | list. | ||
18 | (make_hash, make_similar_hash, copy_hash): Initialize usecount. | ||
19 | (gethash_c): Do not call hash_grow if usecount is nonzero. | ||
20 | (hash_begin): Initialize next field of hash_iter to null. | ||
21 | Increment use count on hash table. | ||
22 | (hash_next): When traversal ends, release use count, and | ||
23 | null out the hash table reference. | ||
24 | (do_weak_tables): New static function. | ||
25 | (do_iters): New static function. | ||
26 | (hash_process_weak): Weak hash processing logic moved | ||
27 | to do_weak_tables. Also calls do_iters. | ||
28 | |||
29 | * txr.1: Describe hash table traversal, and the assurances | ||
30 | and caveats about inserting and deleting items. | ||
31 | |||
32 | 2014-10-21 Kaz Kylheku <kaz@kylheku.com> | ||
33 | |||
3 | * eval.c (interp_fun): If the function doesn't have | 34 | * eval.c (interp_fun): If the function doesn't have |
4 | specials, then don't bother saving and restoring dynamic | 35 | specials, then don't bother saving and restoring dynamic |
5 | env around the argument binding and evaluation. | 36 | env around the argument binding and evaluation. |
@@ -90,7 +90,7 @@ static val mutobj[MUTOBJ_VEC_SIZE]; | |||
90 | static int mutobj_idx; | 90 | static int mutobj_idx; |
91 | static val freshobj[FRESHOBJ_VEC_SIZE]; | 91 | static val freshobj[FRESHOBJ_VEC_SIZE]; |
92 | static int freshobj_idx; | 92 | static int freshobj_idx; |
93 | static int full_gc; | 93 | int full_gc; |
94 | #endif | 94 | #endif |
95 | 95 | ||
96 | #if EXTRA_DEBUGGING | 96 | #if EXTRA_DEBUGGING |
@@ -39,6 +39,7 @@ int gc_is_reachable(val); | |||
39 | val gc_set(loc, val); | 39 | val gc_set(loc, val); |
40 | val gc_push(val, loc); | 40 | val gc_push(val, loc); |
41 | val gc_mutated(val); | 41 | val gc_mutated(val); |
42 | extern int full_gc; | ||
42 | #endif | 43 | #endif |
43 | 44 | ||
44 | void unmark(void); | 45 | void unmark(void); |
@@ -54,6 +54,7 @@ struct hash { | |||
54 | cnum modulus; | 54 | cnum modulus; |
55 | cnum count; | 55 | cnum count; |
56 | val userdata; | 56 | val userdata; |
57 | int usecount; | ||
57 | cnum (*hash_fun)(val); | 58 | cnum (*hash_fun)(val); |
58 | val (*equal_fun)(val, val); | 59 | val (*equal_fun)(val, val); |
59 | val (*assoc_fun)(val key, val list); | 60 | val (*assoc_fun)(val key, val list); |
@@ -61,6 +62,7 @@ struct hash { | |||
61 | }; | 62 | }; |
62 | 63 | ||
63 | struct hash_iter { | 64 | struct hash_iter { |
65 | struct hash_iter *next; | ||
64 | val hash; | 66 | val hash; |
65 | cnum chain; | 67 | cnum chain; |
66 | val cons; | 68 | val cons; |
@@ -69,9 +71,10 @@ struct hash_iter { | |||
69 | val weak_keys_k, weak_vals_k, equal_based_k; | 71 | val weak_keys_k, weak_vals_k, equal_based_k; |
70 | 72 | ||
71 | /* | 73 | /* |
72 | * Dynamic list built up during gc. | 74 | * Dynamic lists built up during gc. |
73 | */ | 75 | */ |
74 | static struct hash *reachable_weak_hashes; | 76 | static struct hash *reachable_weak_hashes; |
77 | static struct hash_iter *reachable_iters; | ||
75 | 78 | ||
76 | /* C99 inline instantiations. */ | 79 | /* C99 inline instantiations. */ |
77 | #if __STDC_VERSION__ >= 199901L | 80 | #if __STDC_VERSION__ >= 199901L |
@@ -376,6 +379,10 @@ static void hash_mark(val hash) | |||
376 | 379 | ||
377 | gc_mark(h->userdata); | 380 | gc_mark(h->userdata); |
378 | 381 | ||
382 | /* Use counts will be re-calculated by a scan of the | ||
383 | hash iterators which are still reachable. */ | ||
384 | h->usecount = 0; | ||
385 | |||
379 | switch (h->flags) { | 386 | switch (h->flags) { |
380 | case hash_weak_none: | 387 | case hash_weak_none: |
381 | /* If the hash is not weak, we can simply mark the table | 388 | /* If the hash is not weak, we can simply mark the table |
@@ -474,6 +481,7 @@ val make_hash(val weak_keys, val weak_vals, val equal_based) | |||
474 | h->table = table; | 481 | h->table = table; |
475 | h->userdata = nil; | 482 | h->userdata = nil; |
476 | 483 | ||
484 | h->usecount = 0; | ||
477 | h->hash_fun = equal_based ? equal_hash : eql_hash; | 485 | h->hash_fun = equal_based ? equal_hash : eql_hash; |
478 | h->equal_fun = equal_based ? equal : eql; | 486 | h->equal_fun = equal_based ? equal : eql; |
479 | h->assoc_fun = equal_based ? assoc : assql; | 487 | h->assoc_fun = equal_based ? assoc : assql; |
@@ -497,6 +505,7 @@ val make_similar_hash(val existing) | |||
497 | h->userdata = ex->userdata; | 505 | h->userdata = ex->userdata; |
498 | 506 | ||
499 | h->flags = ex->flags; | 507 | h->flags = ex->flags; |
508 | h->usecount = 0; | ||
500 | h->hash_fun = ex->hash_fun; | 509 | h->hash_fun = ex->hash_fun; |
501 | h->equal_fun = ex->equal_fun; | 510 | h->equal_fun = ex->equal_fun; |
502 | h->assoc_fun = ex->assoc_fun; | 511 | h->assoc_fun = ex->assoc_fun; |
@@ -519,6 +528,7 @@ val copy_hash(val existing) | |||
519 | h->userdata = ex->userdata; | 528 | h->userdata = ex->userdata; |
520 | 529 | ||
521 | h->flags = ex->flags; | 530 | h->flags = ex->flags; |
531 | h->usecount = 0; | ||
522 | h->hash_fun = ex->hash_fun; | 532 | h->hash_fun = ex->hash_fun; |
523 | h->assoc_fun = ex->assoc_fun; | 533 | h->assoc_fun = ex->assoc_fun; |
524 | h->acons_new_c_fun = ex->acons_new_c_fun; | 534 | h->acons_new_c_fun = ex->acons_new_c_fun; |
@@ -535,7 +545,7 @@ val gethash_c(val hash, val key, loc new_p) | |||
535 | loc pchain = vecref_l(h->table, num_fast(h->hash_fun(key) % h->modulus)); | 545 | loc pchain = vecref_l(h->table, num_fast(h->hash_fun(key) % h->modulus)); |
536 | val old = deref(pchain); | 546 | val old = deref(pchain); |
537 | val cell = h->acons_new_c_fun(key, new_p, pchain); | 547 | val cell = h->acons_new_c_fun(key, new_p, pchain); |
538 | if (old != deref(pchain) && ++h->count > 2 * h->modulus) | 548 | if (old != deref(pchain) && ++h->count > 2 * h->modulus && h->usecount == 0) |
539 | hash_grow(h, hash); | 549 | hash_grow(h, hash); |
540 | return cell; | 550 | return cell; |
541 | } | 551 | } |
@@ -638,8 +648,11 @@ val hashp(val obj) | |||
638 | static void hash_iter_mark(val hash_iter) | 648 | static void hash_iter_mark(val hash_iter) |
639 | { | 649 | { |
640 | struct hash_iter *hi = coerce(struct hash_iter *, hash_iter->co.handle); | 650 | struct hash_iter *hi = coerce(struct hash_iter *, hash_iter->co.handle); |
641 | gc_mark(hi->hash); | 651 | if (hi->hash) |
652 | gc_mark(hi->hash); | ||
642 | gc_mark(hi->cons); | 653 | gc_mark(hi->cons); |
654 | hi->next = reachable_iters; | ||
655 | reachable_iters = hi; | ||
643 | } | 656 | } |
644 | 657 | ||
645 | static struct cobj_ops hash_iter_ops = { | 658 | static struct cobj_ops hash_iter_ops = { |
@@ -654,14 +667,16 @@ val hash_begin(val hash) | |||
654 | { | 667 | { |
655 | val hi_obj; | 668 | val hi_obj; |
656 | struct hash_iter *hi; | 669 | struct hash_iter *hi; |
657 | class_check (hash, hash_s); | 670 | struct hash *h = (struct hash *) hash->co.handle; |
658 | 671 | ||
659 | hi = coerce(struct hash_iter *, chk_malloc(sizeof *hi)); | 672 | hi = coerce(struct hash_iter *, chk_malloc(sizeof *hi)); |
673 | hi->next = 0; | ||
660 | hi->hash = nil; | 674 | hi->hash = nil; |
661 | hi->chain = -1; | 675 | hi->chain = -1; |
662 | hi->cons = nil; | 676 | hi->cons = nil; |
663 | hi_obj = cobj(coerce(mem_t *, hi), hash_iter_s, &hash_iter_ops); | 677 | hi_obj = cobj(coerce(mem_t *, hi), hash_iter_s, &hash_iter_ops); |
664 | hi->hash = hash; | 678 | hi->hash = hash; |
679 | h->usecount++; | ||
665 | return hi_obj; | 680 | return hi_obj; |
666 | } | 681 | } |
667 | 682 | ||
@@ -673,8 +688,11 @@ val hash_next(val iter) | |||
673 | if (hi->cons) | 688 | if (hi->cons) |
674 | hi->cons = cdr(hi->cons); | 689 | hi->cons = cdr(hi->cons); |
675 | while (nilp(hi->cons)) { | 690 | while (nilp(hi->cons)) { |
676 | if (++hi->chain >= h->modulus) | 691 | if (++hi->chain >= h->modulus) { |
692 | hi->hash = nil; | ||
693 | h->usecount--; | ||
677 | return nil; | 694 | return nil; |
695 | } | ||
678 | set(mkloc(hi->cons, iter), vecref(h->table, num_fast(hi->chain))); | 696 | set(mkloc(hi->cons, iter), vecref(h->table, num_fast(hi->chain))); |
679 | } | 697 | } |
680 | return car(hi->cons); | 698 | return car(hi->cons); |
@@ -704,7 +722,7 @@ val hash_equal(val obj) | |||
704 | * that were visited during the marking phase, maintained in the list | 722 | * that were visited during the marking phase, maintained in the list |
705 | * reachable_weak_hashes. | 723 | * reachable_weak_hashes. |
706 | */ | 724 | */ |
707 | void hash_process_weak(void) | 725 | static void do_weak_tables(void) |
708 | { | 726 | { |
709 | struct hash *h; | 727 | struct hash *h; |
710 | cnum i; | 728 | cnum i; |
@@ -805,6 +823,38 @@ void hash_process_weak(void) | |||
805 | reachable_weak_hashes = 0; | 823 | reachable_weak_hashes = 0; |
806 | } | 824 | } |
807 | 825 | ||
826 | static void do_iters(void) | ||
827 | { | ||
828 | struct hash_iter *hi; | ||
829 | |||
830 | for (hi = reachable_iters; hi != 0; hi = hi->next) { | ||
831 | val hash = hi->hash; | ||
832 | |||
833 | if (!hash) | ||
834 | continue; | ||
835 | |||
836 | #if CONFIG_GEN_GC | ||
837 | /* If the hash is a tenured object, we do not touch it. | ||
838 | It wasn't marked and so its usecount wasn't reset to zero. */ | ||
839 | if (!full_gc && hash->t.gen > 0) | ||
840 | continue; | ||
841 | #endif | ||
842 | |||
843 | { | ||
844 | struct hash *h = coerce(struct hash *, hash->co.handle); | ||
845 | h->usecount++; | ||
846 | } | ||
847 | } | ||
848 | |||
849 | reachable_iters = 0; | ||
850 | } | ||
851 | |||
852 | void hash_process_weak(void) | ||
853 | { | ||
854 | do_weak_tables(); | ||
855 | do_iters(); | ||
856 | } | ||
857 | |||
808 | val hashv(val args) | 858 | val hashv(val args) |
809 | { | 859 | { |
810 | val wkeys = memq(weak_keys_k, args); | 860 | val wkeys = memq(weak_keys_k, args); |
@@ -19455,6 +19455,36 @@ function instead. | |||
19455 | In addition to storing key-value pairs, a hash table can have a piece of | 19455 | In addition to storing key-value pairs, a hash table can have a piece of |
19456 | information associated with it, called the user data. | 19456 | information associated with it, called the user data. |
19457 | 19457 | ||
19458 | A hash table can be traversed to visit all of the keys and data. The order of | ||
19459 | traversal bears no relation to the order of insertion, or to any properties of | ||
19460 | the key type. | ||
19461 | |||
19462 | During an open traversal, new keys can be inserted into a hash table or deleted | ||
19463 | from it while a a traversal is in progress. Insertion of a new key during | ||
19464 | traversal will not cause any existing key to be visited twice or to be skipped; | ||
19465 | however, it is not specified whether the new key will be traversed. Similarly, | ||
19466 | if a key is deleted during traversal, and that key has not yet been visited, it | ||
19467 | is not specified whether it will be visited during the remainder of the | ||
19468 | traversal. | ||
19469 | |||
19470 | An open traversal of a hash table is performed by the | ||
19471 | .code maphash | ||
19472 | function and the | ||
19473 | .code dohash | ||
19474 | operator. The traversal is open because code supplied by the program | ||
19475 | is evaluated for each entry. | ||
19476 | |||
19477 | The functions | ||
19478 | .codn hash-keys , | ||
19479 | .codn hash-values , | ||
19480 | .codn hash-pairs , | ||
19481 | and | ||
19482 | .code hash-alist also perform an open traversal, because they return | ||
19483 | lazy lists. The traversal isn't complete until the returned lazy list | ||
19484 | is fully instantiated. In the meanwhile, the | ||
19485 | \*(TX program can mutate the hash table from which the lazy list | ||
19486 | is being generated. | ||
19487 | |||
19458 | .coNP Function @ hash-construct | 19488 | .coNP Function @ hash-construct |
19459 | .synb | 19489 | .synb |
19460 | .mets (hash-construct < hash-args << key-val-pairs ) | 19490 | .mets (hash-construct < hash-args << key-val-pairs ) |
@@ -19888,6 +19918,9 @@ then the corresponding entries from | |||
19888 | each list pairwise correspond to the pairs in | 19918 | each list pairwise correspond to the pairs in |
19889 | .metn hash . | 19919 | .metn hash . |
19890 | 19920 | ||
19921 | The list returned by each of these functions is lazy, and hence constitutes | ||
19922 | an open traversal of the hash table. | ||
19923 | |||
19891 | .coNP Operator @ dohash | 19924 | .coNP Operator @ dohash |
19892 | .synb | 19925 | .synb |
19893 | .mets (dohash ( < key-var < value-var < hash-form <> [ result-form ]) | 19926 | .mets (dohash ( < key-var < value-var < hash-form <> [ result-form ]) |