diff options
Diffstat (limited to 'array.c')
-rw-r--r-- | array.c | 231 |
1 files changed, 210 insertions, 21 deletions
@@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Progamming Language. @@ -23,9 +23,24 @@ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +/* + * Tree walks (``for (iggy in foo)'') and array deletions use expensive + * linear searching. So what we do is start out with small arrays and + * grow them as needed, so that our arrays are hopefully small enough, + * most of the time, that they're pretty full and we're not looking at + * wasted space. + * + * The decision is made to grow the array if the average chain length is + * ``too big''. This is defined as the total number of entries in the table + * divided by the size of the array being greater than some constant. + */ + +#define AVG_CHAIN_MAX 10 /* don't want to linear search more than this */ + #include "awk.h" static NODE *assoc_find P((NODE *symbol, NODE *subs, int hash1)); +static void grow_table P((NODE *symbol)); NODE * concat_exp(tree) @@ -84,7 +99,7 @@ NODE *symbol; if (symbol->var_array == 0) return; - for (i = 0; i < HASHSIZE; i++) { + for (i = 0; i < symbol->array_size; i++) { for (bucket = symbol->var_array[i]; bucket; bucket = next) { next = bucket->ahnext; unref(bucket->ahname); @@ -93,17 +108,25 @@ NODE *symbol; } symbol->var_array[i] = 0; } + free(symbol->var_array); + symbol->var_array = NULL; + symbol->array_size = symbol->table_size = 0; } /* * calculate the hash function of the string in subs */ unsigned int -hash(s, len) -register char *s; +hash(s, len, hsize) +register const char *s; register size_t len; +unsigned long hsize; { - register unsigned long h = 0, g; + register unsigned long h = 0; + +#ifdef this_is_really_slow + + register unsigned long g; while (len--) { h = (h << 4) + *s++; @@ -113,10 +136,84 @@ register size_t len; h = h ^ g; } } - if (h < HASHSIZE) - return h; - else - return h%HASHSIZE; + +#else /* this_is_really_slow */ +/* + * This is INCREDIBLY ugly, but fast. We break the string up into 8 byte + * units. On the first time through the loop we get the "leftover bytes" + * (strlen % 8). On every other iteration, we perform 8 HASHC's so we handle + * all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If + * this routine is heavily used enough, it's worth the ugly coding. + * + * OZ's original sdbm hash, copied from Margo Seltzers db package. + * + */ + +/* Even more speed: */ +/* #define HASHC h = *s++ + 65599 * h */ +/* Because 65599 = pow(2,6) + pow(2,16) - 1 we multiply by shifts */ +#define HASHC htmp = (h << 6); \ + h = *s++ + htmp + (htmp << 10) - h + + unsigned long htmp; + + h = 0; + +#if defined(VAXC) +/* + * [This was an implementation of "Duff's Device", but it has been + * redone, separating the switch for extra iterations from the loop. + * This is necessary because the DEC VAX-C compiler is STOOPID.] + */ + switch (len & (8 - 1)) { + case 7: HASHC; + case 6: HASHC; + case 5: HASHC; + case 4: HASHC; + case 3: HASHC; + case 2: HASHC; + case 1: HASHC; + default: break; + } + + if (len > (8 - 1)) { + register size_t loop = len >> 3; + do { + HASHC; + HASHC; + HASHC; + HASHC; + HASHC; + HASHC; + HASHC; + HASHC; + } while (--loop); + } +#else /* !VAXC */ + /* "Duff's Device" for those who can handle it */ + if (len > 0) { + register size_t loop = (len + 8 - 1) >> 3; + + switch (len & (8 - 1)) { + case 0: + do { /* All fall throughs */ + HASHC; + case 7: HASHC; + case 6: HASHC; + case 5: HASHC; + case 4: HASHC; + case 3: HASHC; + case 2: HASHC; + case 1: HASHC; + } while (--loop); + } + } +#endif /* !VAXC */ +#endif /* this_is_really_slow - not */ + + if (h >= hsize) + h %= hsize; + return h; } /* @@ -158,7 +255,7 @@ NODE *symbol, *subs; if (symbol->var_array == 0) return 0; subs = concat_exp(subs); /* concat_exp returns a string node */ - hash1 = hash(subs->stptr, subs->stlen); + hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size); if (assoc_find(symbol, subs, hash1) == NULL) { free_temp(subs); return 0; @@ -183,17 +280,16 @@ NODE *symbol, *subs; register NODE *bucket; (void) force_string(subs); - hash1 = hash(subs->stptr, subs->stlen); - - if (symbol->var_array == 0) { /* this table really should grow - * dynamically */ - size_t size; - size = sizeof(NODE *) * HASHSIZE; - emalloc(symbol->var_array, NODE **, size, "assoc_lookup"); - memset((char *)symbol->var_array, 0, size); + if (symbol->var_array == 0) { symbol->type = Node_var_array; + symbol->array_size = symbol->table_size = 0; /* sanity */ + grow_table(symbol); + hash1 = hash(subs->stptr, subs->stlen, + (unsigned long) symbol->array_size); } else { + hash1 = hash(subs->stptr, subs->stlen, + (unsigned long) symbol->array_size); bucket = assoc_find(symbol, subs, hash1); if (bucket != NULL) { free_temp(subs); @@ -205,6 +301,17 @@ NODE *symbol, *subs; if (do_lint && subs->stlen == 0) warning("subscript of array `%s' is null string", symbol->vname); + + /* first see if we would need to grow the array, before installing */ + symbol->table_size++; + if ((symbol->flags & ARRAYMAXED) == 0 + && symbol->table_size/symbol->array_size > AVG_CHAIN_MAX) { + grow_table(symbol); + /* have to recompute hash value for new size */ + hash1 = hash(subs->stptr, subs->stlen, + (unsigned long) symbol->array_size); + } + getnode(bucket); bucket->type = Node_ahash; if (subs->flags & TEMP) @@ -240,7 +347,7 @@ NODE *symbol, *tree; if (symbol->var_array == 0) return; subs = concat_exp(tree); /* concat_exp returns string node */ - hash1 = hash(subs->stptr, subs->stlen); + hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size); last = NULL; for (bucket = symbol->var_array[hash1]; bucket; last = bucket, bucket = bucket->ahnext) @@ -256,6 +363,14 @@ NODE *symbol, *tree; unref(bucket->ahname); unref(bucket->ahvalue); freenode(bucket); + symbol->table_size--; + if (symbol->table_size <= 0) { + memset(symbol->var_array, '\0', + sizeof(NODE *) * symbol->array_size); + symbol->table_size = symbol->array_size = 0; + free(symbol->var_array); + symbol->var_array = NULL; + } } void @@ -263,12 +378,12 @@ assoc_scan(symbol, lookat) NODE *symbol; struct search *lookat; { - if (!symbol->var_array) { + if (symbol->var_array == NULL) { lookat->retval = NULL; return; } lookat->arr_ptr = symbol->var_array; - lookat->arr_end = lookat->arr_ptr + HASHSIZE; /* added */ + lookat->arr_end = lookat->arr_ptr + symbol->array_size; lookat->bucket = symbol->var_array[0]; assoc_next(lookat); } @@ -291,3 +406,77 @@ struct search *lookat; } return; } + +/* grow_table --- grow a hash table */ + +static void +grow_table(symbol) +NODE *symbol; +{ + NODE **old, **new, *chain, *next; + int i, j; + unsigned long hash1; + unsigned long oldsize, newsize; + /* + * This is an array of primes. We grow the table by an order of + * magnitude each time (not just doubling) so that growing is a + * rare operation. We expect, on average, that it won't happen + * more than twice. The final size is also chosen to be small + * enough so that MS-DOG mallocs can handle it. When things are + * very large (> 8K), we just double more or less, instead of + * just jumping from 8K to 64K. + */ + static long sizes[] = { 13, 127, 1021, 8191, 16381, 32749, 65497 }; + + /* find next biggest hash size */ + oldsize = symbol->array_size; + newsize = 0; + for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) { + if (oldsize < sizes[i]) { + newsize = sizes[i]; + break; + } + } + + if (newsize == oldsize) { /* table already at max (!) */ + symbol->flags |= ARRAYMAXED; + return; + } + + /* allocate new table */ + emalloc(new, NODE **, newsize * sizeof(NODE *), "grow_table"); + memset(new, '\0', newsize * sizeof(NODE *)); + + /* brand new hash table, set things up and return */ + if (symbol->var_array == NULL) { + symbol->table_size = 0; + goto done; + } + + /* old hash table there, move stuff to new, free old */ + old = symbol->var_array; + for (i = 0; i < oldsize; i++) { + if (old[i] == NULL) + continue; + + for (chain = old[i]; chain != NULL; chain = next) { + next = chain->ahnext; + hash1 = hash(chain->ahname->stptr, + chain->ahname->stlen, newsize); + + /* remove from old list, add to new */ + chain->ahnext = new[hash1]; + new[hash1] = chain; + + } + } + free(old); + +done: + /* + * note that symbol->table_size does not change if an old array, + * and is explicitly set to 0 if a new one. + */ + symbol->var_array = new; + symbol->array_size = newsize; +} |