From 50268dcf6484af095fb7485758de1b44e9375a51 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Tue, 3 Aug 2021 16:10:29 +1200 Subject: [PATCH v2] Use densehash.h hashtables in SMgr The hash table lookups done in SMgr can quite often be a bottleneck during crash recovery. Traditionally these use dynahash. Here we swap dynahash out and use densehash instead. This improves lookup performance. --- src/backend/storage/smgr/smgr.c | 82 +- src/include/lib/densehash.h | 1436 +++++++++++++++++++++++++++++++ 2 files changed, 1496 insertions(+), 22 deletions(-) create mode 100644 src/include/lib/densehash.h diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 0fcef4994b..3fa9c21c4b 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -18,6 +18,7 @@ #include "postgres.h" #include "access/xlogutils.h" +#include "common/hashfn.h" #include "lib/ilist.h" #include "storage/bufmgr.h" #include "storage/ipc.h" @@ -25,6 +26,25 @@ #include "storage/smgr.h" #include "utils/hsearch.h" #include "utils/inval.h" +#include "utils/memutils.h" + +static inline uint32 relfilenodebackend_hash(RelFileNodeBackend *rnode); + +#define DH_PREFIX smgrtable +#define DH_ELEMENT_TYPE SMgrRelationData +#define DH_KEY_TYPE RelFileNodeBackend +#define DH_KEY smgr_rnode +#define DH_HASH_KEY(tb, key) relfilenodebackend_hash(&key) +#define DH_EQUAL(tb, a, b) (memcmp(&a, &b, sizeof(RelFileNodeBackend)) == 0) +#define DH_SCOPE static inline +#define DH_STORE_HASH +#define DH_GET_HASH(tb, a) a->hash +#define DH_ALLOCATE(b) MemoryContextAlloc(TopMemoryContext, (b)) +#define DH_ALLOCATE_ZERO(b) MemoryContextAllocZero(TopMemoryContext, (b)) +#define DH_FREE(p) pfree(p) +#define DH_DEFINE +#define DH_DECLARE +#include "lib/densehash.h" /* @@ -91,13 +111,43 @@ static const int NSmgr = lengthof(smgrsw); * Each backend has a hashtable that stores all extant SMgrRelation objects. * In addition, "unowned" SMgrRelation objects are chained together in a list. */ -static HTAB *SMgrRelationHash = NULL; +static smgrtable_hash *SMgrRelationHash = NULL; static dlist_head unowned_relns; /* local function prototypes */ static void smgrshutdown(int code, Datum arg); +/* + * relfilenodebackend_hash + * Custom rolled hash function for simplehash table. + * + * smgropen() is often a bottleneck in CPU bound workloads during crash + * recovery. We make use of this custom hash function rather than using + * hash_bytes as it gives us a little bit more performance. + * + * XXX What if sizeof(Oid) is not 4? + */ +static inline uint32 +relfilenodebackend_hash(RelFileNodeBackend *rnode) +{ + uint32 hashkey; + + hashkey = murmurhash32((uint32) rnode->node.spcNode); + + /* rotate hashkey left 1 bit at each step */ + hashkey = pg_rotate_right32(hashkey, 31); + hashkey ^= murmurhash32((uint32) rnode->node.dbNode); + + hashkey = pg_rotate_right32(hashkey, 31); + hashkey ^= murmurhash32((uint32) rnode->node.relNode); + + hashkey = pg_rotate_right32(hashkey, 31); + hashkey ^= murmurhash32((uint32) rnode->backend); + + return hashkey; +} + /* * smgrinit(), smgrshutdown() -- Initialize or shut down storage @@ -149,29 +199,22 @@ smgropen(RelFileNode rnode, BackendId backend) SMgrRelation reln; bool found; - if (SMgrRelationHash == NULL) + if (unlikely(SMgrRelationHash == NULL)) { /* First time through: initialize the hash table */ - HASHCTL ctl; - - ctl.keysize = sizeof(RelFileNodeBackend); - ctl.entrysize = sizeof(SMgrRelationData); - SMgrRelationHash = hash_create("smgr relation table", 400, - &ctl, HASH_ELEM | HASH_BLOBS); + SMgrRelationHash = smgrtable_create(400); dlist_init(&unowned_relns); } /* Look up or create an entry */ brnode.node = rnode; brnode.backend = backend; - reln = (SMgrRelation) hash_search(SMgrRelationHash, - (void *) &brnode, - HASH_ENTER, &found); + reln = smgrtable_insert(SMgrRelationHash, brnode, &found); /* Initialize it if not present before */ if (!found) { - /* hash_search already filled in the lookup key */ + /* smgrtable_insert already filled in the lookup key */ reln->smgr_owner = NULL; reln->smgr_targblock = InvalidBlockNumber; for (int i = 0; i <= MAX_FORKNUM; ++i) @@ -266,9 +309,7 @@ smgrclose(SMgrRelation reln) if (!owner) dlist_delete(&reln->node); - if (hash_search(SMgrRelationHash, - (void *) &(reln->smgr_rnode), - HASH_REMOVE, NULL) == NULL) + if (!smgrtable_delete(SMgrRelationHash, reln->smgr_rnode)) elog(ERROR, "SMgrRelation hashtable corrupted"); /* @@ -285,16 +326,16 @@ smgrclose(SMgrRelation reln) void smgrcloseall(void) { - HASH_SEQ_STATUS status; + smgrtable_iterator iterator; SMgrRelation reln; /* Nothing to do if hashtable not set up */ if (SMgrRelationHash == NULL) return; - hash_seq_init(&status, SMgrRelationHash); + smgrtable_start_iterate(SMgrRelationHash, &iterator); - while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL) + while ((reln = smgrtable_iterate(SMgrRelationHash, &iterator)) != NULL) smgrclose(reln); } @@ -314,10 +355,7 @@ smgrclosenode(RelFileNodeBackend rnode) /* Nothing to do if hashtable not set up */ if (SMgrRelationHash == NULL) return; - - reln = (SMgrRelation) hash_search(SMgrRelationHash, - (void *) &rnode, - HASH_FIND, NULL); + reln = smgrtable_lookup(SMgrRelationHash, rnode); if (reln != NULL) smgrclose(reln); } diff --git a/src/include/lib/densehash.h b/src/include/lib/densehash.h new file mode 100644 index 0000000000..26fab94479 --- /dev/null +++ b/src/include/lib/densehash.h @@ -0,0 +1,1436 @@ +/* + * densehash.h + * + * A hashtable implementation which can be included into .c files to + * provide a fast hash table implementation specific to the given type. + * + * DH_ELEMENT_TYPE defines the data type that the hashtable stores. These + * are allocated DH_ITEMS_PER_SEGMENT at a time and stored inside a + * DH_SEGMENT. Each DH_SEGMENT is allocated on demand only when there are + * no free slots to store another DH_ELEMENT_TYPE in an existing segment. + * After items are removed from the hash table, the next inserted item's + * data will be stored in the earliest free item in the earliest segment + * with a free slot. This helps keep the actual data compact, or "dense" + * even when the bucket array has become large. + * + * The bucket array is an array of DH_BUCKET and is dynamically allocated + * and may grow as more items are added to the table. The DH_BUCKET type + * is very narrow and stores just 2 uint32 values. One of these is the + * hash value and the other is the index into the segments which are used + * to directly look up the stored DH_ELEMENT_TYPE type. + * + * During inserts, hash table collisions are dealt with using linear + * probing, this means that instead of doing something like chaining with a + * linked list, we use the first free bucket which comes after the optimal + * bucket. This is much more CPU cache efficient than traversing a linked + * list. When we're unable to use the most optimal bucket, we may also + * move the contents of subsequent buckets around so that we keep items as + * close to their most optimal position as possible. This prevents + * excessively long linear probes during lookups. + * + * During hash table deletes, we must attempt to move the contents of + * buckets that are not in their optimal position up to either their + * optimal position, or as close as we can get to it. During lookups, this + * means that we can stop searching for a non-existing item as soon as we + * find an empty bucket. + * + * Empty buckets are denoted by their 'index' field being set to + * DH_UNUSED_BUCKET_INDEX. This is done rather than adding a special field + * so that we can keep the DH_BUCKET type as narrow as possible. + * Conveniently sizeof(DH_BUCKET) is 8, which allows 8 of these to fit on a + * single 64-byte cache line. It's important to keep this type as narrow as + * possible so that we can perform hash lookups by hitting as few + * cache lines as possible. + * + * The implementation here is similar to simplehash.h but has the following + * benefits: + * + * - Pointers to elements are stable and are not moved around like they are + * in simplehash.h + * - Sequential scans of the hash table remain very fast even when the + * table is sparsely populated. + * - Both simplehash.h and densehash.h may move items around during inserts + * and deletes. If DH_ELEMENT_TYPE is large, since simplehash.h stores + * the data in the hash bucket, these operations may become expensive in + * simplehash.h. In densehash.h these remain fairly cheap as the bucket + * is always 8 bytes wide due to the hash entry being stored in the + * DH_SEGMENT. + * + * If none of the above points are important for the given use case then, + * please consider using simplehash.h instead. + * + * + * Portions Copyright (c) 2021, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/include/lib/densehash.h + * + */ + +#include "port/pg_bitutils.h" + +/* helpers */ +#define DH_MAKE_PREFIX(a) CppConcat(a,_) +#define DH_MAKE_NAME(name) DH_MAKE_NAME_(DH_MAKE_PREFIX(DH_PREFIX),name) +#define DH_MAKE_NAME_(a,b) CppConcat(a,b) + +/* type declarations */ +#define DH_TYPE DH_MAKE_NAME(hash) +#define DH_BUCKET DH_MAKE_NAME(bucket) +#define DH_SEGMENT DH_MAKE_NAME(segment) +#define DH_ITERATOR DH_MAKE_NAME(iterator) + +/* function declarations */ +#define DH_CREATE DH_MAKE_NAME(create) +#define DH_DESTROY DH_MAKE_NAME(destroy) +#define DH_RESET DH_MAKE_NAME(reset) +#define DH_INSERT DH_MAKE_NAME(insert) +#define DH_INSERT_HASH DH_MAKE_NAME(insert_hash) +#define DH_DELETE DH_MAKE_NAME(delete) +#define DH_LOOKUP DH_MAKE_NAME(lookup) +#define DH_LOOKUP_HASH DH_MAKE_NAME(lookup_hash) +#define DH_GROW DH_MAKE_NAME(grow) +#define DH_START_ITERATE DH_MAKE_NAME(start_iterate) +#define DH_ITERATE DH_MAKE_NAME(iterate) + +/* internal helper functions (no externally visible prototypes) */ +#define DH_NEXT_ONEBIT DH_MAKE_NAME(next_onebit) +#define DH_NEXT_ZEROBIT DH_MAKE_NAME(next_zerobit) +#define DH_INDEX_TO_ELEMENT DH_MAKE_NAME(index_to_element) +#define DH_MARK_SEGMENT_ITEM_USED DH_MAKE_NAME(mark_segment_item_used) +#define DH_MARK_SEGMENT_ITEM_UNUSED DH_MAKE_NAME(mark_segment_item_unused) +#define DH_GET_NEXT_UNUSED_ENTRY DH_MAKE_NAME(get_next_unused_entry) +#define DH_REMOVE_ENTRY DH_MAKE_NAME(remove_entry) +#define DH_SET_BUCKET_IN_USE DH_MAKE_NAME(set_bucket_in_use) +#define DH_SET_BUCKET_EMPTY DH_MAKE_NAME(set_bucket_empty) +#define DH_IS_BUCKET_IN_USE DH_MAKE_NAME(is_bucket_in_use) +#define DH_COMPUTE_PARAMETERS DH_MAKE_NAME(compute_parameters) +#define DH_NEXT DH_MAKE_NAME(next) +#define DH_PREV DH_MAKE_NAME(prev) +#define DH_DISTANCE_FROM_OPTIMAL DH_MAKE_NAME(distance) +#define DH_INITIAL_BUCKET DH_MAKE_NAME(initial_bucket) +#define DH_INSERT_HASH_INTERNAL DH_MAKE_NAME(insert_hash_internal) +#define DH_LOOKUP_HASH_INTERNAL DH_MAKE_NAME(lookup_hash_internal) + +/* + * When allocating memory to store instances of DH_ELEMENT_TYPE, how many + * should we allocate at once? This must be a power of 2 and at least + * DH_BITS_PER_WORD. + */ +#ifndef DH_ITEMS_PER_SEGMENT +#define DH_ITEMS_PER_SEGMENT 256 +#endif + +/* A special index to set DH_BUCKET->index to when it's not in use */ +#define DH_UNUSED_BUCKET_INDEX PG_UINT32_MAX + +/* + * Macros for translating a bucket's index into the segment index and another + * to determine the item number within the segment. + */ +#define DH_INDEX_SEGMENT(i) (i) / DH_ITEMS_PER_SEGMENT +#define DH_INDEX_ITEM(i) (i) % DH_ITEMS_PER_SEGMENT + + /* + * How many elements do we need in the bitmap array to store a bit for each + * of DH_ITEMS_PER_SEGMENT. Keep the word size native to the processor. + */ +#if SIZEOF_VOID_P >= 8 + +#define DH_BITS_PER_WORD 64 +#define DH_BITMAP_WORD uint64 +#define DH_RIGHTMOST_ONE_POS(x) pg_rightmost_one_pos64(x) + +#else + +#define DH_BITS_PER_WORD 32 +#define DH_BITMAP_WORD uint32 +#define DH_RIGHTMOST_ONE_POS(x) pg_rightmost_one_pos32(x) + +#endif + +/* Sanity check on DH_ITEMS_PER_SEGMENT setting */ +#if DH_ITEMS_PER_SEGMENT < DH_BITS_PER_WORD +#error "DH_ITEMS_PER_SEGMENT must be >= than DH_BITS_PER_WORD" +#endif + +/* Ensure DH_ITEMS_PER_SEGMENT is a power of 2 */ +#if DH_ITEMS_PER_SEGMENT & (DH_ITEMS_PER_SEGMENT - 1) != 0 +#error "DH_ITEMS_PER_SEGMENT must be a power of 2" +#endif + +#define DH_BITMAP_WORDS (DH_ITEMS_PER_SEGMENT / DH_BITS_PER_WORD) +#define DH_WORDNUM(x) ((x) / DH_BITS_PER_WORD) +#define DH_BITNUM(x) ((x) % DH_BITS_PER_WORD) + +/* generate forward declarations necessary to use the hash table */ +#ifdef DH_DECLARE + +typedef struct DH_BUCKET +{ + uint32 hashvalue; /* Hash value for this bucket */ + uint32 index; /* Index to the actual data */ +} DH_BUCKET; + +typedef struct DH_SEGMENT +{ + uint32 nitems; /* Number of items stored */ + DH_BITMAP_WORD used_items[DH_BITMAP_WORDS]; /* A 1-bit for each used item + * in the items array */ + DH_ELEMENT_TYPE items[DH_ITEMS_PER_SEGMENT]; /* the actual data */ +} DH_SEGMENT; + +/* type definitions */ + +/* + * DH_TYPE + * Hash table metadata type + */ +typedef struct DH_TYPE +{ + /* + * Size of bucket array. Note that the maximum number of elements is + * lower (DH_MAX_FILLFACTOR) + */ + uint32 size; + + /* mask for bucket and size calculations, based on size */ + uint32 sizemask; + + /* the number of elements stored */ + uint32 members; + + /* boundary after which to grow hashtable */ + uint32 grow_threshold; + + /* how many elements are there in the segments array */ + uint32 nsegments; + + /* the number of elements in the used_segments array */ + uint32 used_segment_words; + + /* + * The first segment we should search in for an empty slot. This will be + * the first segment that DH_GET_NEXT_UNUSED_ENTRY will search in when + * looking for an unused entry. We'll increase the value of this when we + * fill a segment and we'll lower it down when we delete an item from a + * segment lower than this value. + */ + uint32 first_free_segment; + + /* dynamically allocated array of hash buckets */ + DH_BUCKET *buckets; + + /* an array of segment pointers to store data */ + DH_SEGMENT **segments; + + /* + * A bitmap of non-empty segments. A 1-bit denotes that the corresponding + * segment is non-empty. + */ + DH_BITMAP_WORD *used_segments; + +#ifdef DH_HAVE_PRIVATE_DATA + /* user defined data, useful for callbacks */ + void *private_data; +#endif +} DH_TYPE; + +/* + * DH_ITERATOR + * Used when looping over the contents of the hash table. + */ +typedef struct DH_ITERATOR +{ + int32 cursegidx; /* current segment. -1 means not started */ + int32 curitemidx; /* current item within cursegidx, -1 means not + * started */ + uint32 found_members; /* number of items visitied so far in the loop */ + uint32 total_members; /* number of items that existed at the start + * iteration. */ +} DH_ITERATOR; + +/* externally visible function prototypes */ + +#ifdef DH_HAVE_PRIVATE_DATA +/* _hash _create(uint32 nbuckets, void *private_data) */ +DH_SCOPE DH_TYPE *DH_CREATE(uint32 nbuckets, void *private_data); +#else +/* _hash _create(uint32 nbuckets) */ +DH_SCOPE DH_TYPE *DH_CREATE(uint32 nbuckets); +#endif + +/* void _destroy(_hash *tb) */ +DH_SCOPE void DH_DESTROY(DH_TYPE * tb); + +/* void _reset(_hash *tb) */ +DH_SCOPE void DH_RESET(DH_TYPE * tb); + +/* void _grow(_hash *tb) */ +DH_SCOPE void DH_GROW(DH_TYPE * tb, uint32 newsize); + +/* *_insert(_hash *tb, key, bool *found) */ +DH_SCOPE DH_ELEMENT_TYPE *DH_INSERT(DH_TYPE * tb, DH_KEY_TYPE key, + bool *found); + +/* + * *_insert_hash(_hash *tb, key, uint32 hash, + * bool *found) + */ +DH_SCOPE DH_ELEMENT_TYPE *DH_INSERT_HASH(DH_TYPE * tb, DH_KEY_TYPE key, + uint32 hash, bool *found); + +/* *_lookup(_hash *tb, key) */ +DH_SCOPE DH_ELEMENT_TYPE *DH_LOOKUP(DH_TYPE * tb, DH_KEY_TYPE key); + +/* *_lookup_hash(_hash *tb, key, uint32 hash) */ +DH_SCOPE DH_ELEMENT_TYPE *DH_LOOKUP_HASH(DH_TYPE * tb, DH_KEY_TYPE key, + uint32 hash); + +/* bool _delete(_hash *tb, key) */ +DH_SCOPE bool DH_DELETE(DH_TYPE * tb, DH_KEY_TYPE key); + +/* void _start_iterate(_hash *tb, _iterator *iter) */ +DH_SCOPE void DH_START_ITERATE(DH_TYPE * tb, DH_ITERATOR * iter); + +/* *_iterate(_hash *tb, _iterator *iter) */ +DH_SCOPE DH_ELEMENT_TYPE *DH_ITERATE(DH_TYPE * tb, DH_ITERATOR * iter); + +#endif /* DH_DECLARE */ + +/* generate implementation of the hash table */ +#ifdef DH_DEFINE + +/* + * The maximum size for the hash table. This must be a power of 2. We cannot + * make this PG_UINT32_MAX + 1 because we use DH_UNUSED_BUCKET_INDEX denote an + * empty bucket. Doing so would mean we could accidentally set a used + * bucket's index to DH_UNUSED_BUCKET_INDEX. + */ +#define DH_MAX_SIZE ((uint32) PG_INT32_MAX + 1) + +/* normal fillfactor, unless already close to maximum */ +#ifndef DH_FILLFACTOR +#define DH_FILLFACTOR (0.9) +#endif +/* increase fillfactor if we otherwise would error out */ +#define DH_MAX_FILLFACTOR (0.98) +/* grow if actual and optimal location bigger than */ +#ifndef DH_GROW_MAX_DIB +#define DH_GROW_MAX_DIB 25 +#endif +/* + * Grow if more than this number of buckets needs to be moved when inserting. + */ +#ifndef DH_GROW_MAX_MOVE +#define DH_GROW_MAX_MOVE 150 +#endif +#ifndef DH_GROW_MIN_FILLFACTOR +/* but do not grow due to DH_GROW_MAX_* if below */ +#define DH_GROW_MIN_FILLFACTOR 0.1 +#endif + +/* + * Wrap the following definitions in include guards, to avoid multiple + * definition errors if this header is included more than once. The rest of + * the file deliberately has no include guards, because it can be included + * with different parameters to define functions and types with non-colliding + * names. + */ +#ifndef DENSEHASH_H +#define DENSEHASH_H + +#ifdef FRONTEND +#define gh_error(...) pg_log_error(__VA_ARGS__) +#define gh_log(...) pg_log_info(__VA_ARGS__) +#else +#define gh_error(...) elog(ERROR, __VA_ARGS__) +#define gh_log(...) elog(LOG, __VA_ARGS__) +#endif + +#endif /* DENSEHASH_H */ + +/* + * Gets the position of the first 1-bit which comes after 'prevbit' in the + * 'words' array. 'nwords' is the size of the 'words' array. + */ +static inline int32 +DH_NEXT_ONEBIT(DH_BITMAP_WORD * words, uint32 nwords, int32 prevbit) +{ + uint32 wordnum; + + prevbit++; + + wordnum = DH_WORDNUM(prevbit); + if (wordnum < nwords) + { + DH_BITMAP_WORD mask = (~(DH_BITMAP_WORD) 0) << DH_BITNUM(prevbit); + DH_BITMAP_WORD word = words[wordnum] & mask; + + if (word != 0) + return wordnum * DH_BITS_PER_WORD + DH_RIGHTMOST_ONE_POS(word); + + for (++wordnum; wordnum < nwords; wordnum++) + { + word = words[wordnum]; + + if (word != 0) + { + int32 result = wordnum * DH_BITS_PER_WORD; + + result += DH_RIGHTMOST_ONE_POS(word); + return result; + } + } + } + return -1; +} + +/* + * Gets the position of the first 0-bit which comes after 'prevbit' in the + * 'words' array. 'nwords' is the size of the 'words' array. + * + * This is similar to DH_NEXT_ONEBIT but flips the bits before operating on + * each DH_BITMAP_WORD. + */ +static inline int32 +DH_NEXT_ZEROBIT(DH_BITMAP_WORD * words, uint32 nwords, int32 prevbit) +{ + uint32 wordnum; + + prevbit++; + + wordnum = DH_WORDNUM(prevbit); + if (wordnum < nwords) + { + DH_BITMAP_WORD mask = (~(DH_BITMAP_WORD) 0) << DH_BITNUM(prevbit); + DH_BITMAP_WORD word = ~(words[wordnum] & mask); /* flip bits */ + + if (word != 0) + return wordnum * DH_BITS_PER_WORD + DH_RIGHTMOST_ONE_POS(word); + + for (++wordnum; wordnum < nwords; wordnum++) + { + word = ~words[wordnum]; /* flip bits */ + + if (word != 0) + { + int32 result = wordnum * DH_BITS_PER_WORD; + + result += DH_RIGHTMOST_ONE_POS(word); + return result; + } + } + } + return -1; +} + +/* + * Finds the hash table entry for a given DH_BUCKET's 'index'. + */ +static inline DH_ELEMENT_TYPE * +DH_INDEX_TO_ELEMENT(DH_TYPE * tb, uint32 index) +{ + DH_SEGMENT *seg; + uint32 segidx; + uint32 item; + + segidx = DH_INDEX_SEGMENT(index); + item = DH_INDEX_ITEM(index); + + Assert(segidx < tb->nsegments); + + seg = tb->segments[segidx]; + + Assert(seg != NULL); + + /* ensure this segment is marked as used */ + Assert(seg->used_items[DH_WORDNUM(item)] & (((DH_BITMAP_WORD) 1) << DH_BITNUM(item))); + + return &seg->items[item]; +} + +static inline void +DH_MARK_SEGMENT_ITEM_USED(DH_TYPE * tb, DH_SEGMENT * seg, uint32 segidx, + uint32 segitem) +{ + uint32 word = DH_WORDNUM(segitem); + uint32 bit = DH_BITNUM(segitem); + + /* ensure this item is not marked as used */ + Assert((seg->used_items[word] & (((DH_BITMAP_WORD) 1) << bit)) == 0); + + /* switch on the used bit */ + seg->used_items[word] |= (((DH_BITMAP_WORD) 1) << bit); + + /* if the segment was previously empty then mark it as used */ + if (seg->nitems == 0) + { + word = DH_WORDNUM(segidx); + bit = DH_BITNUM(segidx); + + /* switch on the used bit for this segment */ + tb->used_segments[word] |= (((DH_BITMAP_WORD) 1) << bit); + } + seg->nitems++; +} + +static inline void +DH_MARK_SEGMENT_ITEM_UNUSED(DH_TYPE * tb, DH_SEGMENT * seg, uint32 segidx, + uint32 segitem) +{ + uint32 word = DH_WORDNUM(segitem); + uint32 bit = DH_BITNUM(segitem); + + /* ensure this item is marked as used */ + Assert((seg->used_items[word] & (((DH_BITMAP_WORD) 1) << bit)) != 0); + + /* switch off the used bit */ + seg->used_items[word] &= ~(((DH_BITMAP_WORD) 1) << bit); + + /* when removing the last item mark the segment as unused */ + if (seg->nitems == 1) + { + word = DH_WORDNUM(segidx); + bit = DH_BITNUM(segidx); + + /* switch off the used bit for this segment */ + tb->used_segments[word] &= ~(((DH_BITMAP_WORD) 1) << bit); + } + + seg->nitems--; +} + +/* + * Returns the first unused entry from the first non-full segment and set + * *index to the index of the returned entry. + */ +static inline DH_ELEMENT_TYPE * +DH_GET_NEXT_UNUSED_ENTRY(DH_TYPE * tb, uint32 *index) +{ + DH_SEGMENT *seg; + uint32 segidx = tb->first_free_segment; + uint32 itemidx; + + seg = tb->segments[segidx]; + + /* find the first segment with an unused item */ + while (seg != NULL && seg->nitems == DH_ITEMS_PER_SEGMENT) + seg = tb->segments[++segidx]; + + tb->first_free_segment = segidx; + + /* allocate the segment if it's not already */ + if (seg == NULL) + { + seg = DH_ALLOCATE(sizeof(DH_SEGMENT)); + tb->segments[segidx] = seg; + + seg->nitems = 0; + memset(seg->used_items, 0, sizeof(seg->used_items)); + /* no need to zero the items array */ + + /* use the first slot in this segment */ + itemidx = 0; + } + else + { + /* find the first unused item in this segment */ + itemidx = DH_NEXT_ZEROBIT(seg->used_items, DH_BITMAP_WORDS, -1); + Assert(itemidx >= 0); + } + + /* this is a good spot to ensure nitems matches the bits in used_items */ + Assert(seg->nitems == pg_popcount((const char *) seg->used_items, DH_ITEMS_PER_SEGMENT / 8)); + + DH_MARK_SEGMENT_ITEM_USED(tb, seg, segidx, itemidx); + + *index = segidx * DH_ITEMS_PER_SEGMENT + itemidx; + return &seg->items[itemidx]; + +} + +/* + * Remove the entry denoted by 'index' from its segment. + */ +static inline void +DH_REMOVE_ENTRY(DH_TYPE * tb, uint32 index) +{ + DH_SEGMENT *seg; + uint32 segidx = DH_INDEX_SEGMENT(index); + uint32 item = DH_INDEX_ITEM(index); + + Assert(segidx < tb->nsegments); + seg = tb->segments[segidx]; + Assert(seg != NULL); + + DH_MARK_SEGMENT_ITEM_UNUSED(tb, seg, segidx, item); + + /* + * Lower the first free segment index to point to this segment so that the + * next insert will store in this segment. If it's already set to a lower + * segment number then don't adjust as we want to consume slots from the + * earliest segment first. + */ + if (tb->first_free_segment > segidx) + tb->first_free_segment = segidx; +} + +/* + * Set 'bucket' as in use by 'index'. + */ +static inline void +DH_SET_BUCKET_IN_USE(DH_BUCKET * bucket, uint32 index) +{ + bucket->index = index; +} + +/* + * Mark 'bucket' as unused. + */ +static inline void +DH_SET_BUCKET_EMPTY(DH_BUCKET * bucket) +{ + bucket->index = DH_UNUSED_BUCKET_INDEX; +} + +/* + * Return true if 'bucket' is in use. + */ +static inline bool +DH_IS_BUCKET_IN_USE(DH_BUCKET * bucket) +{ + return bucket->index != DH_UNUSED_BUCKET_INDEX; +} + + /* + * Compute sizing parameters for hashtable. Called when creating and growing + * the hashtable. + */ +static inline void +DH_COMPUTE_PARAMETERS(DH_TYPE * tb, uint32 newsize) +{ + uint32 size; + + /* + * Ensure the bucket array size has not exceeded DH_MAX_SIZE or wrapped + * back to zero. + */ + if (newsize == 0 || newsize > DH_MAX_SIZE) + gh_error("hash table too large"); + + /* + * Ensure we don't build a table that can't store an entire single segment + * worth of data. + */ + size = Max(newsize, DH_ITEMS_PER_SEGMENT); + + /* round up size to the next power of 2 */ + size = pg_nextpower2_32(size); + + /* now set size */ + tb->size = size; + tb->sizemask = tb->size - 1; + + /* calculate how many segments we'll need to store 'size' items */ + tb->nsegments = pg_nextpower2_32(size / DH_ITEMS_PER_SEGMENT); + + /* + * Calculate the number of bitmap words needed to store a bit for each + * segment. + */ + tb->used_segment_words = (tb->nsegments + DH_BITS_PER_WORD - 1) / DH_BITS_PER_WORD; + + /* + * Compute the next threshold at which we need to grow the hash table + * again. + */ + if (tb->size == DH_MAX_SIZE) + tb->grow_threshold = (uint32) (((double) tb->size) * DH_MAX_FILLFACTOR); + else + tb->grow_threshold = (uint32) (((double) tb->size) * DH_FILLFACTOR); +} + +/* return the optimal bucket for the hash */ +static inline uint32 +DH_INITIAL_BUCKET(DH_TYPE * tb, uint32 hash) +{ + return hash & tb->sizemask; +} + +/* return the next bucket after the current, handling wraparound */ +static inline uint32 +DH_NEXT(DH_TYPE * tb, uint32 curelem, uint32 startelem) +{ + curelem = (curelem + 1) & tb->sizemask; + + Assert(curelem != startelem); + + return curelem; +} + +/* return the bucket before the current, handling wraparound */ +static inline uint32 +DH_PREV(DH_TYPE * tb, uint32 curelem, uint32 startelem) +{ + curelem = (curelem - 1) & tb->sizemask; + + Assert(curelem != startelem); + + return curelem; +} + +/* return the distance between a bucket and its optimal position */ +static inline uint32 +DH_DISTANCE_FROM_OPTIMAL(DH_TYPE * tb, uint32 optimal, uint32 bucket) +{ + if (optimal <= bucket) + return bucket - optimal; + else + return (tb->size + bucket) - optimal; +} + +/* + * Create a hash table with 'nbuckets' buckets. + */ +DH_SCOPE DH_TYPE * +#ifdef DH_HAVE_PRIVATE_DATA +DH_CREATE(uint32 nbuckets, void *private_data) +#else +DH_CREATE(uint32 nbuckets) +#endif +{ + DH_TYPE *tb; + uint32 size; + uint32 i; + + tb = DH_ALLOCATE_ZERO(sizeof(DH_TYPE)); + +#ifdef DH_HAVE_PRIVATE_DATA + tb->private_data = private_data; +#endif + + /* increase nelements by fillfactor, want to store nelements elements */ + size = (uint32) Min((double) DH_MAX_SIZE, ((double) nbuckets) / DH_FILLFACTOR); + + DH_COMPUTE_PARAMETERS(tb, size); + + tb->buckets = DH_ALLOCATE(sizeof(DH_BUCKET) * tb->size); + + /* ensure all the buckets are set to empty */ + for (i = 0; i < tb->size; i++) + DH_SET_BUCKET_EMPTY(&tb->buckets[i]); + + tb->segments = DH_ALLOCATE_ZERO(sizeof(DH_SEGMENT *) * tb->nsegments); + tb->used_segments = DH_ALLOCATE_ZERO(sizeof(DH_BITMAP_WORD) * tb->used_segment_words); + return tb; +} + +/* destroy a previously created hash table */ +DH_SCOPE void +DH_DESTROY(DH_TYPE * tb) +{ + DH_FREE(tb->buckets); + + /* Free each segment one by one */ + for (uint32 n = 0; n < tb->nsegments; n++) + { + if (tb->segments[n] != NULL) + DH_FREE(tb->segments[n]); + } + + DH_FREE(tb->segments); + DH_FREE(tb->used_segments); + + pfree(tb); +} + +/* reset the contents of a previously created hash table */ +DH_SCOPE void +DH_RESET(DH_TYPE * tb) +{ + int32 i = -1; + uint32 x; + + /* reset each used segment one by one */ + while ((i = DH_NEXT_ONEBIT(tb->used_segments, tb->used_segment_words, + i)) >= 0) + { + DH_SEGMENT *seg = tb->segments[i]; + + Assert(seg != NULL); + + seg->nitems = 0; + memset(seg->used_items, 0, sizeof(seg->used_items)); + } + + /* empty every bucket */ + for (x = 0; x < tb->size; x++) + DH_SET_BUCKET_EMPTY(&tb->buckets[x]); + + /* zero the used segment bits */ + memset(tb->used_segments, 0, sizeof(DH_BITMAP_WORD) * tb->used_segment_words); + + /* and mark the table as having zero members */ + tb->members = 0; + + /* ensure we start putting any new items in the first segment */ + tb->first_free_segment = 0; +} + +/* + * Grow a hash table to at least 'newsize' buckets. + * + * Usually this will automatically be called by insertions/deletions, when + * necessary. But resizing to the exact input size can be advantageous + * performance-wise, when known at some point. + */ +DH_SCOPE void +DH_GROW(DH_TYPE * tb, uint32 newsize) +{ + uint32 oldsize = tb->size; + uint32 oldnsegments = tb->nsegments; + uint32 oldusedsegmentwords = tb->used_segment_words; + DH_BUCKET *oldbuckets = tb->buckets; + DH_SEGMENT **oldsegments = tb->segments; + DH_BITMAP_WORD *oldusedsegments = tb->used_segments; + DH_BUCKET *newbuckets; + uint32 i; + uint32 startelem = 0; + uint32 copyelem; + + Assert(oldsize == pg_nextpower2_32(oldsize)); + + /* compute parameters for new table */ + DH_COMPUTE_PARAMETERS(tb, newsize); + + tb->buckets = DH_ALLOCATE(sizeof(DH_ELEMENT_TYPE) * tb->size); + + /* Ensure all the buckets are set to empty */ + for (i = 0; i < tb->size; i++) + DH_SET_BUCKET_EMPTY(&tb->buckets[i]); + + newbuckets = tb->buckets; + + /* + * Copy buckets from the old buckets to newbuckets. We theoretically could + * use DH_INSERT here, to avoid code duplication, but that's more general + * than we need. We neither want tb->members increased, nor do we need to + * do deal with deleted elements, nor do we need to compare keys. So a + * special-cased implementation is a lot faster. Resizing can be time + * consuming and frequent, that's worthwhile to optimize. + * + * To be able to simply move buckets over, we have to start not at the + * first bucket (i.e oldbuckets[0]), but find the first bucket that's + * either empty or is occupied by an entry at its optimal position. Such a + * bucket has to exist in any table with a load factor under 1, as not all + * buckets are occupied, i.e. there always has to be an empty bucket. By + * starting at such a bucket we can move the entries to the larger table, + * without having to deal with conflicts. + */ + + /* search for the first element in the hash that's not wrapped around */ + for (i = 0; i < oldsize; i++) + { + DH_BUCKET *oldbucket = &oldbuckets[i]; + uint32 hash; + uint32 optimal; + + if (!DH_IS_BUCKET_IN_USE(oldbucket)) + { + startelem = i; + break; + } + + hash = oldbucket->hashvalue; + optimal = DH_INITIAL_BUCKET(tb, hash); + + if (optimal == i) + { + startelem = i; + break; + } + } + + /* and copy all elements in the old table */ + copyelem = startelem; + for (i = 0; i < oldsize; i++) + { + DH_BUCKET *oldbucket = &oldbuckets[copyelem]; + + if (DH_IS_BUCKET_IN_USE(oldbucket)) + { + uint32 hash; + uint32 startelem; + uint32 curelem; + DH_BUCKET *newbucket; + + hash = oldbucket->hashvalue; + startelem = DH_INITIAL_BUCKET(tb, hash); + curelem = startelem; + + /* find empty element to put data into */ + for (;;) + { + newbucket = &newbuckets[curelem]; + + if (!DH_IS_BUCKET_IN_USE(newbucket)) + break; + + curelem = DH_NEXT(tb, curelem, startelem); + } + + /* copy entry to new slot */ + memcpy(newbucket, oldbucket, sizeof(DH_BUCKET)); + } + + /* can't use DH_NEXT here, would use new size */ + copyelem++; + if (copyelem >= oldsize) + copyelem = 0; + } + + DH_FREE(oldbuckets); + + /* + * Enlarge the segment array so we can store enough segments for the new + * hash table capacity. + */ + tb->segments = DH_ALLOCATE(sizeof(DH_SEGMENT *) * tb->nsegments); + memcpy(tb->segments, oldsegments, sizeof(DH_SEGMENT *) * oldnsegments); + /* zero the newly extended part of the array */ + memset(&tb->segments[oldnsegments], 0, sizeof(DH_SEGMENT *) * + (tb->nsegments - oldnsegments)); + DH_FREE(oldsegments); + + /* + * The majority of tables will only ever need one bitmap word to store + * used segments, so we only bother to reallocate the used_segments array + * if the number of bitmap words has actually changed. + */ + if (tb->used_segment_words != oldusedsegmentwords) + { + tb->used_segments = DH_ALLOCATE(sizeof(DH_BITMAP_WORD) * + tb->used_segment_words); + memcpy(tb->used_segments, oldusedsegments, sizeof(DH_BITMAP_WORD) * + oldusedsegmentwords); + memset(&tb->used_segments[oldusedsegmentwords], 0, + sizeof(DH_BITMAP_WORD) * (tb->used_segment_words - + oldusedsegmentwords)); + + DH_FREE(oldusedsegments); + } +} + +/* + * This is a separate static inline function, so it can be reliably be inlined + * into its wrapper functions even if DH_SCOPE is extern. + */ +static inline DH_ELEMENT_TYPE * +DH_INSERT_HASH_INTERNAL(DH_TYPE * tb, DH_KEY_TYPE key, uint32 hash, bool *found) +{ + uint32 startelem; + uint32 curelem; + DH_BUCKET *buckets; + uint32 insertdist; + +restart: + insertdist = 0; + + /* + * To avoid doing the grow check inside the loop, we do the grow check + * regardless of if the key is present. This also lets us avoid having to + * re-find our position in the hashtable after resizing. + * + * Note that this also reached when resizing the table due to + * DH_GROW_MAX_DIB / DH_GROW_MAX_MOVE. + */ + if (unlikely(tb->members >= tb->grow_threshold)) + { + /* this may wrap back to 0 when we're already at DH_MAX_SIZE */ + DH_GROW(tb, tb->size * 2); + } + + /* perform the insert starting the bucket search at optimal location */ + buckets = tb->buckets; + startelem = DH_INITIAL_BUCKET(tb, hash); + curelem = startelem; + for (;;) + { + DH_BUCKET *bucket = &buckets[curelem]; + DH_ELEMENT_TYPE *entry; + uint32 curdist; + uint32 curhash; + uint32 curoptimal; + + /* any empty bucket can directly be used */ + if (!DH_IS_BUCKET_IN_USE(bucket)) + { + uint32 index; + + /* and add the new entry */ + tb->members++; + + entry = DH_GET_NEXT_UNUSED_ENTRY(tb, &index); + entry->DH_KEY = key; + bucket->hashvalue = hash; + DH_SET_BUCKET_IN_USE(bucket, index); + *found = false; + return entry; + } + + curhash = bucket->hashvalue; + + if (curhash == hash) + { + /* + * The hash value matches so we just need to ensure the key + * matches too. To do that, we need to lookup the entry in the + * segments using the index stored in the bucket. + */ + entry = DH_INDEX_TO_ELEMENT(tb, bucket->index); + + /* if we find a match, we're done */ + if (DH_EQUAL(tb, key, entry->DH_KEY)) + { + Assert(DH_IS_BUCKET_IN_USE(bucket)); + *found = true; + return entry; + } + } + + /* + * For non-empty, non-matching buckets we have to decide whether to + * skip over or move the colliding entry. When the colliding + * element's distance to its optimal position is smaller than the + * to-be-inserted entry's, we shift the colliding entry (and its + * followers) one bucket closer to their optimal position. + */ + curoptimal = DH_INITIAL_BUCKET(tb, curhash); + curdist = DH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem); + + if (insertdist > curdist) + { + DH_ELEMENT_TYPE *entry; + DH_BUCKET *lastbucket = bucket; + uint32 emptyelem = curelem; + uint32 moveelem; + int32 emptydist = 0; + uint32 index; + + /* find next empty bucket */ + for (;;) + { + DH_BUCKET *emptybucket; + + emptyelem = DH_NEXT(tb, emptyelem, startelem); + emptybucket = &buckets[emptyelem]; + + if (!DH_IS_BUCKET_IN_USE(emptybucket)) + { + lastbucket = emptybucket; + break; + } + + /* + * To avoid negative consequences from overly imbalanced + * hashtables, grow the hashtable if collisions would require + * us to move a lot of entries. The most likely cause of such + * imbalance is filling a (currently) small table, from a + * currently big one, in hashtable order. Don't grow if the + * hashtable would be too empty, to prevent quick space + * explosion for some weird edge cases. + */ + if (unlikely(++emptydist > DH_GROW_MAX_MOVE) && + ((double) tb->members / tb->size) >= DH_GROW_MIN_FILLFACTOR) + { + tb->grow_threshold = 0; + goto restart; + } + } + + /* shift forward, starting at last occupied element */ + + /* + * TODO: This could be optimized to be one memcpy in many cases, + * excepting wrapping around at the end of ->data. Hasn't shown up + * in profiles so far though. + */ + moveelem = emptyelem; + while (moveelem != curelem) + { + DH_BUCKET *movebucket; + + moveelem = DH_PREV(tb, moveelem, startelem); + movebucket = &buckets[moveelem]; + + memcpy(lastbucket, movebucket, sizeof(DH_BUCKET)); + lastbucket = movebucket; + } + + /* and add the new entry */ + tb->members++; + + entry = DH_GET_NEXT_UNUSED_ENTRY(tb, &index); + entry->DH_KEY = key; + bucket->hashvalue = hash; + DH_SET_BUCKET_IN_USE(bucket, index); + *found = false; + return entry; + } + + curelem = DH_NEXT(tb, curelem, startelem); + insertdist++; + + /* + * To avoid negative consequences from overly imbalanced hashtables, + * grow the hashtable if collisions lead to large runs. The most + * likely cause of such imbalance is filling a (currently) small + * table, from a currently big one, in hashtable order. Don't grow if + * the hashtable would be too empty, to prevent quick space explosion + * for some weird edge cases. + */ + if (unlikely(insertdist > DH_GROW_MAX_DIB) && + ((double) tb->members / tb->size) >= DH_GROW_MIN_FILLFACTOR) + { + tb->grow_threshold = 0; + goto restart; + } + } +} + +/* + * Insert the key into the hashtable, set *found to true if the key already + * exists, false otherwise. Returns the hashtable entry in either case. + */ +DH_SCOPE DH_ELEMENT_TYPE * +DH_INSERT(DH_TYPE * tb, DH_KEY_TYPE key, bool *found) +{ + uint32 hash = DH_HASH_KEY(tb, key); + + return DH_INSERT_HASH_INTERNAL(tb, key, hash, found); +} + +/* + * Insert the key into the hashtable using an already-calculated hash. Set + * *found to true if the key already exists, false otherwise. Returns the + * hashtable entry in either case. + */ +DH_SCOPE DH_ELEMENT_TYPE * +DH_INSERT_HASH(DH_TYPE * tb, DH_KEY_TYPE key, uint32 hash, bool *found) +{ + return DH_INSERT_HASH_INTERNAL(tb, key, hash, found); +} + +/* + * This is a separate static inline function, so it can be reliably be inlined + * into its wrapper functions even if DH_SCOPE is extern. + */ +static inline DH_ELEMENT_TYPE * +DH_LOOKUP_HASH_INTERNAL(DH_TYPE * tb, DH_KEY_TYPE key, uint32 hash) +{ + const uint32 startelem = DH_INITIAL_BUCKET(tb, hash); + uint32 curelem = startelem; + + for (;;) + { + DH_BUCKET *bucket = &tb->buckets[curelem]; + + if (!DH_IS_BUCKET_IN_USE(bucket)) + return NULL; + + if (bucket->hashvalue == hash) + { + DH_ELEMENT_TYPE *entry; + + /* + * The hash value matches so we just need to ensure the key + * matches too. To do that, we need to lookup the entry in the + * segments using the index stored in the bucket. + */ + entry = DH_INDEX_TO_ELEMENT(tb, bucket->index); + + /* if we find a match, we're done */ + if (DH_EQUAL(tb, key, entry->DH_KEY)) + return entry; + } + + /* + * TODO: we could stop search based on distance. If the current + * buckets's distance-from-optimal is smaller than what we've skipped + * already, the entry doesn't exist. + */ + + curelem = DH_NEXT(tb, curelem, startelem); + } +} + +/* + * Lookup an entry in the hash table. Returns NULL if key not present. + */ +DH_SCOPE DH_ELEMENT_TYPE * +DH_LOOKUP(DH_TYPE * tb, DH_KEY_TYPE key) +{ + uint32 hash = DH_HASH_KEY(tb, key); + + return DH_LOOKUP_HASH_INTERNAL(tb, key, hash); +} + +/* + * Lookup an entry in the hash table using an already-calculated hash. + * + * Returns NULL if key not present. + */ +DH_SCOPE DH_ELEMENT_TYPE * +DH_LOOKUP_HASH(DH_TYPE * tb, DH_KEY_TYPE key, uint32 hash) +{ + return DH_LOOKUP_HASH_INTERNAL(tb, key, hash); +} + +/* + * Delete an entry from hash table by key. Returns whether to-be-deleted key + * was present. + */ +DH_SCOPE bool +DH_DELETE(DH_TYPE * tb, DH_KEY_TYPE key) +{ + uint32 hash = DH_HASH_KEY(tb, key); + uint32 startelem = DH_INITIAL_BUCKET(tb, hash); + uint32 curelem = startelem; + + for (;;) + { + DH_BUCKET *bucket = &tb->buckets[curelem]; + + if (!DH_IS_BUCKET_IN_USE(bucket)) + return false; + + if (bucket->hashvalue == hash) + { + DH_ELEMENT_TYPE *entry; + + entry = DH_INDEX_TO_ELEMENT(tb, bucket->index); + + if (DH_EQUAL(tb, key, entry->DH_KEY)) + { + DH_BUCKET *lastbucket = bucket; + + /* mark the entry as unused */ + DH_REMOVE_ENTRY(tb, bucket->index); + /* and mark the bucket unused */ + DH_SET_BUCKET_EMPTY(bucket); + + tb->members--; + + /* + * Backward shift following buckets till either an empty + * bucket or a bucket at its optimal position is encountered. + * + * While that sounds expensive, the average chain length is + * short, and deletions would otherwise require tombstones. + */ + for (;;) + { + DH_BUCKET *curbucket; + uint32 curhash; + uint32 curoptimal; + + curelem = DH_NEXT(tb, curelem, startelem); + curbucket = &tb->buckets[curelem]; + + if (!DH_IS_BUCKET_IN_USE(curbucket)) + break; + + curhash = curbucket->hashvalue; + curoptimal = DH_INITIAL_BUCKET(tb, curhash); + + /* current is at optimal position, done */ + if (curoptimal == curelem) + { + DH_SET_BUCKET_EMPTY(lastbucket); + break; + } + + /* shift */ + memcpy(lastbucket, curbucket, sizeof(DH_BUCKET)); + DH_SET_BUCKET_EMPTY(curbucket); + + lastbucket = curbucket; + } + + return true; + } + } + /* TODO: return false; if the distance is too big */ + + curelem = DH_NEXT(tb, curelem, startelem); + } +} + +/* + * Initialize iterator. + */ +DH_SCOPE void +DH_START_ITERATE(DH_TYPE * tb, DH_ITERATOR * iter) +{ + iter->cursegidx = -1; + iter->curitemidx = -1; + iter->found_members = 0; + iter->total_members = tb->members; +} + +/* + * Iterate over all entries in the hashtable. Return the next occupied entry, + * or NULL if there are no more entries. + * + * During iteration the only current entry in the hash table and any entry + * which was previously visited in the loop may be deleted. Deletion of items + * not yet visited is prohibited as are insertions of new entries. + */ +DH_SCOPE DH_ELEMENT_TYPE * +DH_ITERATE(DH_TYPE * tb, DH_ITERATOR * iter) +{ + /* + * Bail if we've already visited all members. This check allows us to + * exit quickly in cases where the table is large but it only contains a + * small number of records. This also means that inserts into the table + * are not possible during iteration. If that is done then we may not + * visit all items in the table. Rather than ever removing this check to + * allow table insertions during iteration, we should add another iterator + * where insertions are safe. + */ + if (iter->found_members == iter->total_members) + return NULL; + + for (;;) + { + DH_SEGMENT *seg; + + /* need a new segment? */ + if (iter->curitemidx == -1) + { + iter->cursegidx = DH_NEXT_ONEBIT(tb->used_segments, + tb->used_segment_words, + iter->cursegidx); + + /* no more segments with items? We're done */ + if (iter->cursegidx == -1) + return NULL; + } + + seg = tb->segments[iter->cursegidx]; + + /* if the segment has items then it certainly shouldn't be NULL */ + Assert(seg != NULL); + + /* + * Advance to the next used item in this segment. For full segments + * we bypass the bitmap and just skip to the next item, otherwise we + * consult the bitmap to find the next used item. + */ + if (seg->nitems == DH_ITEMS_PER_SEGMENT) + { + if (iter->curitemidx == DH_ITEMS_PER_SEGMENT - 1) + iter->curitemidx = -1; + else + { + iter->curitemidx++; + iter->found_members++; + return &seg->items[iter->curitemidx]; + } + } + else + { + iter->curitemidx = DH_NEXT_ONEBIT(seg->used_items, + DH_BITMAP_WORDS, + iter->curitemidx); + + if (iter->curitemidx >= 0) + { + iter->found_members++; + return &seg->items[iter->curitemidx]; + } + } + + /* + * DH_NEXT_ONEBIT returns -1 when there are no more bits. We just + * loop again to fetch the next segment. + */ + } +} + +#endif /* DH_DEFINE */ + +/* undefine external parameters, so next hash table can be defined */ +#undef DH_PREFIX +#undef DH_KEY_TYPE +#undef DH_KEY +#undef DH_ELEMENT_TYPE +#undef DH_HASH_KEY +#undef DH_SCOPE +#undef DH_DECLARE +#undef DH_DEFINE +#undef DH_EQUAL +#undef DH_ALLOCATE +#undef DH_ALLOCATE_ZERO +#undef DH_FREE + +/* undefine locally declared macros */ +#undef DH_MAKE_PREFIX +#undef DH_MAKE_NAME +#undef DH_MAKE_NAME_ +#undef DH_ITEMS_PER_SEGMENT +#undef DH_UNUSED_BUCKET_INDEX +#undef DH_INDEX_SEGMENT +#undef DH_INDEX_ITEM +#undef DH_BITS_PER_WORD +#undef DH_BITMAP_WORD +#undef DH_RIGHTMOST_ONE_POS +#undef DH_BITMAP_WORDS +#undef DH_WORDNUM +#undef DH_BITNUM +#undef DH_RAW_ALLOCATOR +#undef DH_MAX_SIZE +#undef DH_FILLFACTOR +#undef DH_MAX_FILLFACTOR +#undef DH_GROW_MAX_DIB +#undef DH_GROW_MAX_MOVE +#undef DH_GROW_MIN_FILLFACTOR + +/* types */ +#undef DH_TYPE +#undef DH_BUCKET +#undef DH_SEGMENT +#undef DH_ITERATOR + +/* external function names */ +#undef DH_CREATE +#undef DH_DESTROY +#undef DH_RESET +#undef DH_INSERT +#undef DH_INSERT_HASH +#undef DH_DELETE +#undef DH_LOOKUP +#undef DH_LOOKUP_HASH +#undef DH_GROW +#undef DH_START_ITERATE +#undef DH_ITERATE + +/* internal function names */ +#undef DH_NEXT_ONEBIT +#undef DH_NEXT_ZEROBIT +#undef DH_INDEX_TO_ELEMENT +#undef DH_MARK_SEGMENT_ITEM_USED +#undef DH_MARK_SEGMENT_ITEM_UNUSED +#undef DH_GET_NEXT_UNUSED_ENTRY +#undef DH_REMOVE_ENTRY +#undef DH_SET_BUCKET_IN_USE +#undef DH_SET_BUCKET_EMPTY +#undef DH_IS_BUCKET_IN_USE +#undef DH_COMPUTE_PARAMETERS +#undef DH_NEXT +#undef DH_PREV +#undef DH_DISTANCE_FROM_OPTIMAL +#undef DH_INITIAL_BUCKET +#undef DH_INSERT_HASH_INTERNAL +#undef DH_LOOKUP_HASH_INTERNAL -- 2.30.2