From 2fbc83085f4a73b228c4441294e66485c319639a Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Wed, 22 Oct 2014 20:48:45 -0700 Subject: Ensure that a hash reorganization doesn't take place during a traversal, which could cause items to be visited twice or skipped. * gc.c (full_gc): Changed from static to exter (full_gc): Changed to internal linkage. * gc.h (full_gc): Declared. * hash.c (struct hash): New member, usecount. (struct hash_iter): New member, next. (reachable_iters): New static variable. (hash_mark): Reset the usecount of every reachable hash table. (hash_iter_mark): Add every reachable iterator to reachable_iters list. (make_hash, make_similar_hash, copy_hash): Initialize usecount. (gethash_c): Do not call hash_grow if usecount is nonzero. (hash_begin): Initialize next field of hash_iter to null. Increment use count on hash table. (hash_next): When traversal ends, release use count, and null out the hash table reference. (do_weak_tables): New static function. (do_iters): New static function. (hash_process_weak): Weak hash processing logic moved to do_weak_tables. Also calls do_iters. * txr.1: Describe hash table traversal, and the assurances and caveats about inserting and deleting items. --- ChangeLog | 31 +++++++++++++++++++++++++++++++ gc.c | 2 +- gc.h | 1 + hash.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ txr.1 | 33 +++++++++++++++++++++++++++++++++ 5 files changed, 122 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9cad9c43..9c113392 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,34 @@ +2014-10-21 Kaz Kylheku + + Ensure that a hash reorganization doesn't take place + during a traversal, which could cause items to be visited + twice or skipped. + + * gc.c (full_gc): Changed from static to exter (full_gc): Changed to + internal linkage. + + * gc.h (full_gc): Declared. + + * hash.c (struct hash): New member, usecount. + (struct hash_iter): New member, next. + (reachable_iters): New static variable. + (hash_mark): Reset the usecount of every reachable hash table. + (hash_iter_mark): Add every reachable iterator to reachable_iters + list. + (make_hash, make_similar_hash, copy_hash): Initialize usecount. + (gethash_c): Do not call hash_grow if usecount is nonzero. + (hash_begin): Initialize next field of hash_iter to null. + Increment use count on hash table. + (hash_next): When traversal ends, release use count, and + null out the hash table reference. + (do_weak_tables): New static function. + (do_iters): New static function. + (hash_process_weak): Weak hash processing logic moved + to do_weak_tables. Also calls do_iters. + + * txr.1: Describe hash table traversal, and the assurances + and caveats about inserting and deleting items. + 2014-10-21 Kaz Kylheku * eval.c (interp_fun): If the function doesn't have diff --git a/gc.c b/gc.c index d3dd1f3c..0f7fc785 100644 --- a/gc.c +++ b/gc.c @@ -90,7 +90,7 @@ static val mutobj[MUTOBJ_VEC_SIZE]; static int mutobj_idx; static val freshobj[FRESHOBJ_VEC_SIZE]; static int freshobj_idx; -static int full_gc; +int full_gc; #endif #if EXTRA_DEBUGGING diff --git a/gc.h b/gc.h index 51452243..c00d857b 100644 --- a/gc.h +++ b/gc.h @@ -39,6 +39,7 @@ int gc_is_reachable(val); val gc_set(loc, val); val gc_push(val, loc); val gc_mutated(val); +extern int full_gc; #endif void unmark(void); diff --git a/hash.c b/hash.c index a591afbf..0a1c5117 100644 --- a/hash.c +++ b/hash.c @@ -54,6 +54,7 @@ struct hash { cnum modulus; cnum count; val userdata; + int usecount; cnum (*hash_fun)(val); val (*equal_fun)(val, val); val (*assoc_fun)(val key, val list); @@ -61,6 +62,7 @@ struct hash { }; struct hash_iter { + struct hash_iter *next; val hash; cnum chain; val cons; @@ -69,9 +71,10 @@ struct hash_iter { val weak_keys_k, weak_vals_k, equal_based_k; /* - * Dynamic list built up during gc. + * Dynamic lists built up during gc. */ static struct hash *reachable_weak_hashes; +static struct hash_iter *reachable_iters; /* C99 inline instantiations. */ #if __STDC_VERSION__ >= 199901L @@ -376,6 +379,10 @@ static void hash_mark(val hash) gc_mark(h->userdata); + /* Use counts will be re-calculated by a scan of the + hash iterators which are still reachable. */ + h->usecount = 0; + switch (h->flags) { case hash_weak_none: /* If the hash is not weak, we can simply mark the table @@ -474,6 +481,7 @@ val make_hash(val weak_keys, val weak_vals, val equal_based) h->table = table; h->userdata = nil; + h->usecount = 0; h->hash_fun = equal_based ? equal_hash : eql_hash; h->equal_fun = equal_based ? equal : eql; h->assoc_fun = equal_based ? assoc : assql; @@ -497,6 +505,7 @@ val make_similar_hash(val existing) h->userdata = ex->userdata; h->flags = ex->flags; + h->usecount = 0; h->hash_fun = ex->hash_fun; h->equal_fun = ex->equal_fun; h->assoc_fun = ex->assoc_fun; @@ -519,6 +528,7 @@ val copy_hash(val existing) h->userdata = ex->userdata; h->flags = ex->flags; + h->usecount = 0; h->hash_fun = ex->hash_fun; h->assoc_fun = ex->assoc_fun; h->acons_new_c_fun = ex->acons_new_c_fun; @@ -535,7 +545,7 @@ val gethash_c(val hash, val key, loc new_p) loc pchain = vecref_l(h->table, num_fast(h->hash_fun(key) % h->modulus)); val old = deref(pchain); val cell = h->acons_new_c_fun(key, new_p, pchain); - if (old != deref(pchain) && ++h->count > 2 * h->modulus) + if (old != deref(pchain) && ++h->count > 2 * h->modulus && h->usecount == 0) hash_grow(h, hash); return cell; } @@ -638,8 +648,11 @@ val hashp(val obj) static void hash_iter_mark(val hash_iter) { struct hash_iter *hi = coerce(struct hash_iter *, hash_iter->co.handle); - gc_mark(hi->hash); + if (hi->hash) + gc_mark(hi->hash); gc_mark(hi->cons); + hi->next = reachable_iters; + reachable_iters = hi; } static struct cobj_ops hash_iter_ops = { @@ -654,14 +667,16 @@ val hash_begin(val hash) { val hi_obj; struct hash_iter *hi; - class_check (hash, hash_s); + struct hash *h = (struct hash *) hash->co.handle; hi = coerce(struct hash_iter *, chk_malloc(sizeof *hi)); + hi->next = 0; hi->hash = nil; hi->chain = -1; hi->cons = nil; hi_obj = cobj(coerce(mem_t *, hi), hash_iter_s, &hash_iter_ops); hi->hash = hash; + h->usecount++; return hi_obj; } @@ -673,8 +688,11 @@ val hash_next(val iter) if (hi->cons) hi->cons = cdr(hi->cons); while (nilp(hi->cons)) { - if (++hi->chain >= h->modulus) + if (++hi->chain >= h->modulus) { + hi->hash = nil; + h->usecount--; return nil; + } set(mkloc(hi->cons, iter), vecref(h->table, num_fast(hi->chain))); } return car(hi->cons); @@ -704,7 +722,7 @@ val hash_equal(val obj) * that were visited during the marking phase, maintained in the list * reachable_weak_hashes. */ -void hash_process_weak(void) +static void do_weak_tables(void) { struct hash *h; cnum i; @@ -805,6 +823,38 @@ void hash_process_weak(void) reachable_weak_hashes = 0; } +static void do_iters(void) +{ + struct hash_iter *hi; + + for (hi = reachable_iters; hi != 0; hi = hi->next) { + val hash = hi->hash; + + if (!hash) + continue; + +#if CONFIG_GEN_GC + /* If the hash is a tenured object, we do not touch it. + It wasn't marked and so its usecount wasn't reset to zero. */ + if (!full_gc && hash->t.gen > 0) + continue; +#endif + + { + struct hash *h = coerce(struct hash *, hash->co.handle); + h->usecount++; + } + } + + reachable_iters = 0; +} + +void hash_process_weak(void) +{ + do_weak_tables(); + do_iters(); +} + val hashv(val args) { val wkeys = memq(weak_keys_k, args); diff --git a/txr.1 b/txr.1 index 512f2b82..78df5c9e 100644 --- a/txr.1 +++ b/txr.1 @@ -19455,6 +19455,36 @@ function instead. In addition to storing key-value pairs, a hash table can have a piece of information associated with it, called the user data. +A hash table can be traversed to visit all of the keys and data. The order of +traversal bears no relation to the order of insertion, or to any properties of +the key type. + +During an open traversal, new keys can be inserted into a hash table or deleted +from it while a a traversal is in progress. Insertion of a new key during +traversal will not cause any existing key to be visited twice or to be skipped; +however, it is not specified whether the new key will be traversed. Similarly, +if a key is deleted during traversal, and that key has not yet been visited, it +is not specified whether it will be visited during the remainder of the +traversal. + +An open traversal of a hash table is performed by the +.code maphash +function and the +.code dohash +operator. The traversal is open because code supplied by the program +is evaluated for each entry. + +The functions +.codn hash-keys , +.codn hash-values , +.codn hash-pairs , +and +.code hash-alist also perform an open traversal, because they return +lazy lists. The traversal isn't complete until the returned lazy list +is fully instantiated. In the meanwhile, the +\*(TX program can mutate the hash table from which the lazy list +is being generated. + .coNP Function @ hash-construct .synb .mets (hash-construct < hash-args << key-val-pairs ) @@ -19888,6 +19918,9 @@ then the corresponding entries from each list pairwise correspond to the pairs in .metn hash . +The list returned by each of these functions is lazy, and hence constitutes +an open traversal of the hash table. + .coNP Operator @ dohash .synb .mets (dohash ( < key-var < value-var < hash-form <> [ result-form ]) -- cgit v1.2.3