From d737c4033a92f070c3c3ad791fdef2c8e133394f Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Sat, 22 Mar 2025 12:20:44 +0100 Subject: [PATCH v20250324 2/6] review --- src/backend/storage/ipc/shmem.c | 4 +- src/backend/utils/hash/dynahash.c | 112 ++++++++++++++++++++---------- src/include/utils/hsearch.h | 4 +- 3 files changed, 81 insertions(+), 39 deletions(-) diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c index d8aed0bfaaf..51ad68cc796 100644 --- a/src/backend/storage/ipc/shmem.c +++ b/src/backend/storage/ipc/shmem.c @@ -345,9 +345,11 @@ ShmemInitHash(const char *name, /* table string name for shmem index */ infoP->alloc = ShmemAllocNoError; hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE; + /* review: I don't see a reason to rename this to _init_ */ + /* look it up in the shmem index */ location = ShmemInitStruct(name, - hash_get_init_size(infoP, hash_flags, init_size, 0), + hash_get_shared_size(infoP, hash_flags, init_size, 0), &found); /* diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index ba785f1d5f2..9792377d567 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -260,6 +260,30 @@ static long hash_accesses, hash_expansions; #endif +/* review: shouldn't this have some MAXALIGNs? */ +#define HASH_ELEMENTS_OFFSET(hctl, nsegs) \ + (sizeof(HASHHDR) + \ + ((hctl)->dsize * sizeof(HASHSEGMENT)) + \ + ((hctl)->ssize * (nsegs) * sizeof(HASHBUCKET))) + +#define HASH_ELEMENTS(hashp, nsegs) \ + ((char *) (hashp)->hctl + HASH_ELEMENTS_OFFSET((hashp)->hctl, nsegs)) + +#define HASH_SEGMENT_OFFSET(hctl, idx) \ + (sizeof(HASHHDR) + \ + ((hctl)->dsize * sizeof(HASHSEGMENT)) + \ + ((hctl)->ssize * (idx) * sizeof(HASHBUCKET))) + +#define HASH_SEGMENT_PTR(hashp, idx) \ + (HASHSEGMENT) ((char *) (hashp)->hctl + HASH_SEGMENT_OFFSET((hashp)->hctl, (idx))) + +#define HASH_SEGMENT_SIZE(hashp) ((hashp)->ssize * sizeof(HASHBUCKET)) + +#define HASH_DIRECTORY(hashp) (HASHSEGMENT *) (((char *) (hashp)->hctl) + sizeof(HASHHDR)) + +#define HASH_ELEMENT_NEXT(hctl, num, ptr) \ + ((char *) (ptr) + ((num) * (MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN((hctl)->entrysize)))) + /* * Private function prototypes */ @@ -281,9 +305,16 @@ static void register_seq_scan(HTAB *hashp); static void deregister_seq_scan(HTAB *hashp); static bool has_seq_scans(HTAB *hashp); -static int compute_buckets_and_segs(long nelem, int *nbuckets, - long num_partitions, long ssize); -static void element_add(HTAB *hashp, HASHELEMENT *firstElement, int freelist_idx, int nelem); +/* + * review: it's a bit weird to have a function that calculates two values, + * but returns one through a pointer and another as a regular retval. see + * ExecChooseHashTableSize for a better approach. + */ +static void compute_buckets_and_segs(long nelem, long num_partitions, + long ssize, /* segment size */ + int *nbuckets, int *nsegments); +static void element_add(HTAB *hashp, HASHELEMENT *firstElement, + int freelist_idx, int nelem); /* * memory allocation support @@ -511,7 +542,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) hashp->isshared = false; } - /* Choose number of entries to allocate at a time */ + /* Choose the number of entries to allocate at a time. */ nelem_batch = choose_nelem_alloc(info->entrysize); /* @@ -521,7 +552,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) */ if (!hashp->hctl) { - Size size = hash_get_init_size(info, flags, nelem, nelem_batch); + Size size = hash_get_shared_size(info, flags, nelem, nelem_batch); hashp->hctl = (HASHHDR *) hashp->alloc(size); if (!hashp->hctl) @@ -572,6 +603,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) hctl->keysize = info->keysize; hctl->entrysize = info->entrysize; + /* remember how many elements to allocate at once */ hctl->nelem_alloc = nelem_batch; /* make local copies of heavily-used constant fields */ @@ -598,7 +630,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) freelist_partitions, nelem_alloc, nelem_alloc_first; - void *curr_offset = NULL; + void *ptr = NULL; /* XXX it's not offset, clearly */ int nsegs; int nbuckets; @@ -612,13 +644,18 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) freelist_partitions = 1; /* - * If table is shared, calculate the offset at which to find the the - * first partition of elements + * For shared hash tables, we need to determine the offset for the + * first partition of elements. We have to skip space for the header, + * segments and buckets. + * + * XXX can we rely on this matching the calculation in hash_get_shared_size? + * or could/should we add some asserts? Can we have at least some sanity checks + * on nbuckets/nsegs? */ + compute_buckets_and_segs(nelem, hctl->num_partitions, hctl->ssize, + &nbuckets, &nsegs); - nsegs = compute_buckets_and_segs(nelem, &nbuckets, hctl->num_partitions, hctl->ssize); - - curr_offset = (((char *) hashp->hctl) + sizeof(HASHHDR) + (hctl->dsize * sizeof(HASHSEGMENT)) + (sizeof(HASHBUCKET) * hctl->ssize * nsegs)); + ptr = HASH_ELEMENTS(hashp, nsegs); nelem_alloc = nelem / freelist_partitions; if (nelem_alloc <= 0) @@ -637,16 +674,15 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) for (i = 0; i < freelist_partitions; i++) { int temp = (i == 0) ? nelem_alloc_first : nelem_alloc; - HASHELEMENT *firstElement; - Size elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize); /* - * Memory is allocated as part of initial allocation in - * ShmemInitHash + * Memory is allocated as part of allocation in ShmemInitHash, we + * just need to split that allocation per-batch freelists. + * + * review: why do we invert the argument order? */ - firstElement = (HASHELEMENT *) curr_offset; - curr_offset = (((char *) curr_offset) + (temp * elementSize)); - element_add(hashp, firstElement, i, temp); + element_add(hashp, (HASHELEMENT *) ptr, i, temp); + ptr = HASH_ELEMENT_NEXT(hctl, temp, ptr); } } @@ -734,7 +770,8 @@ init_htab(HTAB *hashp, long nelem) for (i = 0; i < NUM_FREELISTS; i++) SpinLockInit(&(hctl->freeList[i].mutex)); - nsegs = compute_buckets_and_segs(nelem, &nbuckets, hctl->num_partitions, hctl->ssize); + compute_buckets_and_segs(nelem, hctl->num_partitions, hctl->ssize, + &nbuckets, &nsegs); hctl->max_bucket = hctl->low_mask = nbuckets - 1; hctl->high_mask = (nbuckets << 1) - 1; @@ -755,22 +792,19 @@ init_htab(HTAB *hashp, long nelem) if (!(hashp->dir)) { CurrentDynaHashCxt = hashp->hcxt; - hashp->dir = (HASHSEGMENT *) (((char *) hashp->hctl) + sizeof(HASHHDR)); + hashp->dir = HASH_DIRECTORY(hashp); } /* Allocate initial segments */ i = 0; for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++) { - *segp = (HASHBUCKET *) (((char *) hashp->hctl) - + sizeof(HASHHDR) - + (hashp->hctl->dsize * sizeof(HASHSEGMENT)) - + (i * sizeof(HASHBUCKET) * hashp->ssize)); - MemSet(*segp, 0, sizeof(HASHBUCKET) * hashp->ssize); - - i = i + 1; + *segp = HASH_SEGMENT_PTR(hashp, i++); + MemSet(*segp, 0, HASH_SEGMENT_SIZE(hashp)); } + Assert(i == nsegs); + #ifdef HASH_DEBUG fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n", "TABLE POINTER ", hashp, @@ -862,9 +896,14 @@ hash_select_dirsize(long num_entries) * Compute the required initial memory allocation for a shared-memory * hashtable with the given parameters. We need space for the HASHHDR * and for the (non expansible) directory. + * + * review: why rename this to "init" + * review: also, why adding the "const"? + * review: the comment should really explain the arguments, e.g. what + * is nelem_alloc for is not obvious? */ Size -hash_get_init_size(const HASHCTL *info, int flags, long init_size, int nelem_alloc) +hash_get_shared_size(const HASHCTL *info, int flags, long init_size, int nelem_alloc) { int nbuckets; int nsegs; @@ -914,7 +953,8 @@ hash_get_init_size(const HASHCTL *info, int flags, long init_size, int nelem_all else ssize = DEF_SEGSIZE; - nsegs = compute_buckets_and_segs(init_size, &nbuckets, num_partitions, ssize); + compute_buckets_and_segs(init_size, num_partitions, ssize, + &nbuckets, &nsegs); if (!element_alloc) init_size = 0; @@ -1791,6 +1831,7 @@ element_alloc(HTAB *hashp, int nelem) return firstElement; } +/* review: comment needed, how is the work divided between element_alloc and this? */ static void element_add(HTAB *hashp, HASHELEMENT *firstElement, int freelist_idx, int nelem) { @@ -2034,11 +2075,11 @@ AtEOSubXact_HashTables(bool isCommit, int nestDepth) } } -static int -compute_buckets_and_segs(long nelem, int *nbuckets, long num_partitions, long ssize) +/* review: comment needed */ +static void +compute_buckets_and_segs(long nelem, long num_partitions, long ssize, + int *nbuckets, int *nsegments) { - int nsegs; - /* * Allocate space for the next greater power of two number of buckets, * assuming a desired maximum load factor of 1. @@ -2058,7 +2099,6 @@ compute_buckets_and_segs(long nelem, int *nbuckets, long num_partitions, long ss /* * Figure number of directory segments needed, round up to a power of 2 */ - nsegs = ((*nbuckets) - 1) / ssize + 1; - nsegs = next_pow2_int(nsegs); - return nsegs; + *nsegments = ((*nbuckets) - 1) / ssize + 1; + *nsegments = next_pow2_int(*nsegments); } diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h index 5e513c8116a..e78ab38b4e8 100644 --- a/src/include/utils/hsearch.h +++ b/src/include/utils/hsearch.h @@ -151,8 +151,8 @@ extern void hash_seq_term(HASH_SEQ_STATUS *status); extern void hash_freeze(HTAB *hashp); extern Size hash_estimate_size(long num_entries, Size entrysize); extern long hash_select_dirsize(long num_entries); -extern Size hash_get_init_size(const HASHCTL *info, int flags, - long init_size, int nelem_alloc); +extern Size hash_get_shared_size(const HASHCTL *info, int flags, + long init_size, int nelem_alloc); extern void AtEOXact_HashTables(bool isCommit); extern void AtEOSubXact_HashTables(bool isCommit, int nestDepth); -- 2.49.0