From b7ee120e05d48ebacb078e00ffefc5f98052d214 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Sat, 9 Dec 2023 18:06:05 +0700 Subject: [PATCH v7 13/13] PoC: Get rid of strlen() calls when using HASH_STRINGS Add cstring_hash, which uses the chunked incremental interface of fasthash. That way, we don't need know the length of the key upfront. Open questions: - Is performance better? - Since we have the total length when we reach the end, should well try to use it in the finalization stage? - Do we need to keep string_hash around? --- src/backend/utils/hash/dynahash.c | 49 +++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 012d4a0b1f..ba74126e73 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -98,6 +98,7 @@ #include "access/xact.h" #include "common/hashfn.h" +#include "common/hashfn_unstable.h" #include "port/pg_bitutils.h" #include "storage/shmem.h" #include "storage/spin.h" @@ -307,6 +308,44 @@ string_compare(const char *key1, const char *key2, Size keysize) return strncmp(key1, key2, keysize - 1); } +/* + * cstring_hash: hash function for keys that are NUL-terminated strings. + * + * NOTE: this is the default hash function if none is specified. + */ +static uint32 +cstring_hash(const void *key, Size keysize) +{ + fasthash_state hs; + int s_len = 0; + const unsigned char *k = (const unsigned char *) key; + + /* + * If the string exceeds keysize-1 bytes, we want to hash only that many, + * because when it is copied into the hash table it will be truncated at + * that length. + */ + + fasthash_init(&hs, 0, 0, false); + + while (*k && s_len < keysize) + { + int chunk_len; + + for (chunk_len = 0; + chunk_len < FH_SIZEOF_ACCUM && k[chunk_len] != '\0' && s_len < keysize; + chunk_len++) + { + s_len++; + } + + fasthash_accum(&hs, k, chunk_len); + k += chunk_len; + } + + return fasthash_final32(&hs); +} + /************************** CREATE ROUTINES **********************/ @@ -419,7 +458,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) { /* * string_hash used to be considered the default hash method, and in a - * non-assert build it effectively still is. But we now consider it + * non-assert build it effectively still was until version 17. Since version 14 we consider it * an assertion error to not say HASH_STRINGS explicitly. To help * catch mistaken usage of HASH_STRINGS, we also insist on a * reasonably long string length: if the keysize is only 4 or 8 bytes, @@ -428,12 +467,12 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) Assert(flags & HASH_STRINGS); Assert(info->keysize > 8); - hashp->hash = string_hash; + hashp->hash = cstring_hash; } /* * If you don't specify a match function, it defaults to string_compare if - * you used string_hash, and to memcmp otherwise. + * you used cstring_hash, and to memcmp otherwise. * * Note: explicitly specifying string_hash is deprecated, because this * might not work for callers in loadable modules on some platforms due to @@ -442,7 +481,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) */ if (flags & HASH_COMPARE) hashp->match = info->match; - else if (hashp->hash == string_hash) + else if (hashp->hash == cstring_hash) hashp->match = (HashCompareFunc) string_compare; else hashp->match = memcmp; @@ -452,7 +491,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) */ if (flags & HASH_KEYCOPY) hashp->keycopy = info->keycopy; - else if (hashp->hash == string_hash) + else if (hashp->hash == cstring_hash) { /* * The signature of keycopy is meant for memcpy(), which returns -- 2.43.0