From ec51e2e7b60e305020792d2608f677d263094a8f Mon Sep 17 00:00:00 2001 From: John Naylor Date: Sat, 9 Dec 2023 12:39:45 +0700 Subject: [PATCH v6 02/13] Rewrite fasthash functions using a homegrown incremental interface This serves as a model for correct use of the interface. --- src/include/common/hashfn_unstable.h | 154 +++++++++++++++++++++++++-- 1 file changed, 147 insertions(+), 7 deletions(-) diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h index 76ed27c0a0..e5288ae723 100644 --- a/src/include/common/hashfn_unstable.h +++ b/src/include/common/hashfn_unstable.h @@ -1,3 +1,25 @@ +/* +Building blocks for creating fast inlineable hash functions. The +unstable designation is in contrast to hashfn.h, which cannot break +compatibility because hashes can be writen to disk and so must have +the same hashes between versions. + + * + * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * src/include/common/hashfn_unstable.c + */ + +#ifndef HASHFN_UNSTABLE_H +#define HASHFN_UNSTABLE_H + +/* + * fasthash is a modification of code taken from + * https://code.google.com/archive/p/fast-hash/source/default/source + * under the terms of the MIT licencse. The original copyright + * notice follows: + */ + /* The MIT License Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com) @@ -23,7 +45,118 @@ SOFTWARE. */ -#include "fasthash.h" +typedef struct fasthash_state +{ + uint64 accum; +#define FH_SIZEOF_ACCUM sizeof(uint64) + uint64 hash; +} fasthash_state; + +static inline uint64 +fasthash_mix(uint64 h) +{ + h ^= h >> 23; + h *= 0x2127599bf4325c37ULL; + h ^= h >> 47; + return h; +} + +static inline void +fasthash_combine(fasthash_state* hs) +{ + hs->hash ^= fasthash_mix(hs->accum); + hs->hash *= 0x880355f21e6d1965ULL; + + /* reset hash state for next input */ + hs->accum = 0; +} + +static inline void +fasthash_init(fasthash_state *hs, int len, uint64 seed) +{ + memset(hs, 0, sizeof(fasthash_state)); + + // since we don't know the length for a nul-terminated string + // handle some other way -- maybe we can accum the length in + // the state and fold it in during the finalizer (cf. xxHash3) + hs->hash = seed ^ (len * 0x880355f21e6d1965ULL); +} + +static inline void +fasthash_accum(fasthash_state *hs, const unsigned char *k, int len) +{ + Assert(hs->accum == 0); + Assert(len <= FH_SIZEOF_ACCUM); + + switch (len) + { + case 8: memcpy(&hs->accum, k, 8); + break; + case 7: hs->accum |= (uint64) k[6] << 48; + /* FALLTHROUGH */ + case 6: hs->accum |= (uint64) k[5] << 40; + /* FALLTHROUGH */ + case 5: hs->accum |= (uint64) k[4] << 32; + /* FALLTHROUGH */ + case 4: hs->accum |= (uint64) k[3] << 24; + /* FALLTHROUGH */ + case 3: hs->accum |= (uint64) k[2] << 16; + /* FALLTHROUGH */ + case 2: hs->accum |= (uint64) k[1] << 8; + /* FALLTHROUGH */ + case 1: hs->accum |= (uint64) k[0]; + break; + case 0: + return; + } + + fasthash_combine(hs); +} + + +static inline uint64 +fasthash_final64(fasthash_state *hs) +{ + return fasthash_mix(hs->hash); +} + +static inline uint32 +fasthash_final32(fasthash_state *hs) +{ + // the following trick converts the 64-bit hashcode to Fermat + // residue, which shall retain information from both the higher + // and lower parts of hashcode. + uint64 h = fasthash_final64(hs); + return h - (h >> 32); +} + +static inline uint64 +fasthash64(const unsigned char * k, int len, uint64 seed) +{ + fasthash_state hs; + + fasthash_init(&hs, len, seed); + + while (len >= FH_SIZEOF_ACCUM) + { + fasthash_accum(&hs, k, FH_SIZEOF_ACCUM); + k += FH_SIZEOF_ACCUM; + len -= FH_SIZEOF_ACCUM; + } + + fasthash_accum(&hs, k, len); + return fasthash_final64(&hs); +} + +static inline uint64 +fasthash32(const unsigned char * k, int len, uint64 seed) +{ + uint64 h = fasthash64(k, len, seed); + return h - (h >> 32); +} + + +// XXX NOT FOR COMMIT // Compression function for Merkle-Damgard construction. // This function is generated using the framework provided. @@ -34,9 +167,8 @@ static inline uint64_t mix(uint64_t h) { return h; } -// security: if the system allows empty keys (len=3) the seed is exposed, the reverse of mix. -// objsize: 0-1fd: 509 -uint64_t fasthash64(const void *buf, size_t len, uint64_t seed) +static inline +uint64_t fasthash64_orig(const void *buf, size_t len, uint64_t seed) { const uint64_t m = 0x880355f21e6d1965ULL; const uint64_t *pos = (const uint64_t *)buf; @@ -56,11 +188,17 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed) switch (len & 7) { case 7: v ^= (uint64_t)pos2[6] << 48; + /* FALLTHROUGH */ case 6: v ^= (uint64_t)pos2[5] << 40; + /* FALLTHROUGH */ case 5: v ^= (uint64_t)pos2[4] << 32; + /* FALLTHROUGH */ case 4: v ^= (uint64_t)pos2[3] << 24; + /* FALLTHROUGH */ case 3: v ^= (uint64_t)pos2[2] << 16; + /* FALLTHROUGH */ case 2: v ^= (uint64_t)pos2[1] << 8; + /* FALLTHROUGH */ case 1: v ^= (uint64_t)pos2[0]; h ^= mix(v); h *= m; @@ -69,12 +207,14 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed) return mix(h); } -// objsize: 0-236: 566 -uint32_t fasthash32(const void *buf, size_t len, uint32_t seed) +static inline +uint32_t fasthash32_orig(const void *buf, size_t len, uint32_t seed) { // the following trick converts the 64-bit hashcode to Fermat // residue, which shall retain information from both the higher // and lower parts of hashcode. - uint64_t h = fasthash64(buf, len, seed); + uint64_t h = fasthash64_orig(buf, len, seed); return h - (h >> 32); } + +#endif /* HASHFN_UNSTABLE_H */ -- 2.43.0