From f5ab683d61724e9766d43e58c6f3177a30f708d0 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Sun, 10 Dec 2023 12:11:37 +0700 Subject: [PATCH v8 2/5] Rewrite fasthash functions using a homegrown incremental interface The incremental interface will be useful for cases we don't know the length up front, such as NUL-terminated strings. First, we need to validate that this interface can give the same answer as the original functions when we do know the length. A future commit will add a temporary assert for testing in CI. --- src/include/common/hashfn_unstable.h | 161 +++++++++++++++++++++++++-- 1 file changed, 153 insertions(+), 8 deletions(-) diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h index a5bf965fa2..fbae7a5522 100644 --- a/src/include/common/hashfn_unstable.h +++ b/src/include/common/hashfn_unstable.h @@ -1,3 +1,25 @@ +/* +Building blocks for creating fast inlineable hash functions. The +unstable designation is in contrast to hashfn.h, which cannot break +compatibility because hashes can be writen to disk and so must have +the same hashes between versions. + + * + * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * src/include/common/hashfn_unstable.c + */ + +#ifndef HASHFN_UNSTABLE_H +#define HASHFN_UNSTABLE_H + +/* + * fasthash is a modification of code taken from + * https://code.google.com/archive/p/fast-hash/source/default/source + * under the terms of the MIT licencse. The original copyright + * notice follows: + */ + /* The MIT License Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com) @@ -23,16 +45,130 @@ SOFTWARE. */ -#include "fasthash.h" +typedef struct fasthash_state +{ + uint64 accum; +#define FH_SIZEOF_ACCUM sizeof(uint64) + uint64 hash; +} fasthash_state; + +static inline uint64 +fasthash_mix(uint64 h) +{ + h ^= h >> 23; + h *= 0x2127599bf4325c37ULL; + h ^= h >> 47; + return h; +} + +static inline void +fasthash_combine(fasthash_state* hs) +{ + hs->hash ^= fasthash_mix(hs->accum); + hs->hash *= 0x880355f21e6d1965ULL; + + /* reset hash state for next input */ + hs->accum = 0; +} + +static inline void +fasthash_init(fasthash_state *hs, int len, uint64 seed) +{ + memset(hs, 0, sizeof(fasthash_state)); + + // since we don't know the length for a nul-terminated string + // handle some other way -- maybe we can accum the length in + // the state and fold it in during the finalizer (cf. xxHash3) + hs->hash = seed ^ (len * 0x880355f21e6d1965ULL); +} + +static inline void +fasthash_accum(fasthash_state *hs, const unsigned char *k, int len) +{ + Assert(hs->accum == 0); + Assert(len <= FH_SIZEOF_ACCUM); + + switch (len) + { + case 8: memcpy(&hs->accum, k, 8); + break; + case 7: hs->accum |= (uint64) k[6] << 48; + /* FALLTHROUGH */ + case 6: hs->accum |= (uint64) k[5] << 40; + /* FALLTHROUGH */ + case 5: hs->accum |= (uint64) k[4] << 32; + /* FALLTHROUGH */ + case 4: hs->accum |= (uint64) k[3] << 24; + /* FALLTHROUGH */ + case 3: hs->accum |= (uint64) k[2] << 16; + /* FALLTHROUGH */ + case 2: hs->accum |= (uint64) k[1] << 8; + /* FALLTHROUGH */ + case 1: hs->accum |= (uint64) k[0]; + break; + case 0: + return; + } + + fasthash_combine(hs); +} + + +static inline uint64 +fasthash_final64(fasthash_state *hs) +{ + return fasthash_mix(hs->hash); +} + +static inline uint32 +fasthash_final32(fasthash_state *hs) +{ + // the following trick converts the 64-bit hashcode to Fermat + // residue, which shall retain information from both the higher + // and lower parts of hashcode. + uint64 h = fasthash_final64(hs); + return h - (h >> 32); +} + +static inline uint64 +fasthash64(const unsigned char * k, int len, uint64 seed) +{ + fasthash_state hs; + + fasthash_init(&hs, len, seed); + + while (len >= FH_SIZEOF_ACCUM) + { + fasthash_accum(&hs, k, FH_SIZEOF_ACCUM); + k += FH_SIZEOF_ACCUM; + len -= FH_SIZEOF_ACCUM; + } + + fasthash_accum(&hs, k, len); + return fasthash_final64(&hs); +} + +static inline uint64 +fasthash32(const unsigned char * k, int len, uint64 seed) +{ + uint64 h = fasthash64(k, len, seed); + return h - (h >> 32); +} + + +// XXX NOT FOR COMMIT // Compression function for Merkle-Damgard construction. // This function is generated using the framework provided. -#define mix(h) ({ \ - (h) ^= (h) >> 23; \ - (h) *= 0x2127599bf4325c37ULL; \ - (h) ^= (h) >> 47; }) +static inline uint64_t mix(uint64_t h) { + h ^= h >> 23; + h *= 0x2127599bf4325c37ULL; + h ^= h >> 47; + return h; +} -uint64_t fasthash64(const void *buf, size_t len, uint64_t seed) +static inline +uint64_t fasthash64_orig(const void *buf, size_t len, uint64_t seed) { const uint64_t m = 0x880355f21e6d1965ULL; const uint64_t *pos = (const uint64_t *)buf; @@ -52,11 +188,17 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed) switch (len & 7) { case 7: v ^= (uint64_t)pos2[6] << 48; + /* FALLTHROUGH */ case 6: v ^= (uint64_t)pos2[5] << 40; + /* FALLTHROUGH */ case 5: v ^= (uint64_t)pos2[4] << 32; + /* FALLTHROUGH */ case 4: v ^= (uint64_t)pos2[3] << 24; + /* FALLTHROUGH */ case 3: v ^= (uint64_t)pos2[2] << 16; + /* FALLTHROUGH */ case 2: v ^= (uint64_t)pos2[1] << 8; + /* FALLTHROUGH */ case 1: v ^= (uint64_t)pos2[0]; h ^= mix(v); h *= m; @@ -65,11 +207,14 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed) return mix(h); } -uint32_t fasthash32(const void *buf, size_t len, uint32_t seed) +static inline +uint32_t fasthash32_orig(const void *buf, size_t len, uint32_t seed) { // the following trick converts the 64-bit hashcode to Fermat // residue, which shall retain information from both the higher // and lower parts of hashcode. - uint64_t h = fasthash64(buf, len, seed); + uint64_t h = fasthash64_orig(buf, len, seed); return h - (h >> 32); } + +#endif /* HASHFN_UNSTABLE_H */ -- 2.43.0