From f5ab683d61724e9766d43e58c6f3177a30f708d0 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Sun, 10 Dec 2023 12:11:37 +0700
Subject: [PATCH v8 2/5] Rewrite fasthash functions using a homegrown
 incremental interface

The incremental interface will be useful for cases we don't know
the length up front, such as NUL-terminated strings. First, we
need to validate that this interface can give the same answer
as the original functions when we do know the length. A future

commit will add a temporary assert for testing in CI.
---
 src/include/common/hashfn_unstable.h | 161 +++++++++++++++++++++++++--
 1 file changed, 153 insertions(+), 8 deletions(-)

diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index a5bf965fa2..fbae7a5522 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -1,3 +1,25 @@
+/*
+Building blocks for creating fast inlineable hash functions. The
+unstable designation is in contrast to hashfn.h, which cannot break
+compatibility because hashes can be writen to disk and so must have
+the same hashes between versions.
+
+ *
+ * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group
+ *
+ * src/include/common/hashfn_unstable.c
+ */
+
+#ifndef HASHFN_UNSTABLE_H
+#define HASHFN_UNSTABLE_H
+
+/*
+ * fasthash is a modification of code taken from
+ * https://code.google.com/archive/p/fast-hash/source/default/source
+ * under the terms of the MIT licencse. The original copyright
+ * notice follows:
+ */
+
 /* The MIT License
 
    Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
@@ -23,16 +45,130 @@
    SOFTWARE.
 */
 
-#include "fasthash.h"
+typedef struct fasthash_state
+{
+	uint64 accum;
+#define FH_SIZEOF_ACCUM sizeof(uint64)
+	uint64 hash;
+} fasthash_state;
+
+static inline uint64
+fasthash_mix(uint64 h)
+{
+	h ^= h >> 23;
+	h *= 0x2127599bf4325c37ULL;
+	h ^= h >> 47;
+	return h;
+}
+
+static inline void
+fasthash_combine(fasthash_state* hs)
+{
+	hs->hash ^= fasthash_mix(hs->accum);
+	hs->hash *= 0x880355f21e6d1965ULL;
+
+	/* reset hash state for next input */
+	hs->accum = 0;
+}
+
+static inline void
+fasthash_init(fasthash_state *hs, int len, uint64 seed)
+{
+	memset(hs, 0, sizeof(fasthash_state));
+
+	// since we don't know the length for a nul-terminated string
+	// handle some other way -- maybe we can accum the length in
+	// the state and fold it in during the finalizer (cf. xxHash3)
+	hs->hash = seed ^ (len * 0x880355f21e6d1965ULL);
+}
+
+static inline void
+fasthash_accum(fasthash_state *hs, const unsigned char *k, int len)
+{
+	Assert(hs->accum == 0);
+	Assert(len <= FH_SIZEOF_ACCUM);
+
+	switch (len)
+	{
+		case 8: memcpy(&hs->accum, k, 8);
+			break;
+		case 7: hs->accum |= (uint64) k[6] << 48;
+			/* FALLTHROUGH */
+		case 6: hs->accum |= (uint64) k[5] << 40;
+			/* FALLTHROUGH */
+		case 5: hs->accum |= (uint64) k[4] << 32;
+			/* FALLTHROUGH */
+		case 4: hs->accum |= (uint64) k[3] << 24;
+			/* FALLTHROUGH */
+		case 3: hs->accum |= (uint64) k[2] << 16;
+			/* FALLTHROUGH */
+		case 2: hs->accum |= (uint64) k[1] << 8;
+			/* FALLTHROUGH */
+		case 1: hs->accum |= (uint64) k[0];
+			break;
+		case 0:
+			return;
+	}
+
+	fasthash_combine(hs);
+}
+
+
+static inline uint64
+fasthash_final64(fasthash_state *hs)
+{
+	return fasthash_mix(hs->hash);
+}
+
+static inline uint32
+fasthash_final32(fasthash_state *hs)
+{
+	// the following trick converts the 64-bit hashcode to Fermat
+	// residue, which shall retain information from both the higher
+	// and lower parts of hashcode.
+	uint64 h = fasthash_final64(hs);
+	return h - (h >> 32);
+}
+
+static inline uint64
+fasthash64(const unsigned char * k, int len, uint64 seed)
+{
+	fasthash_state hs;
+
+	fasthash_init(&hs, len, seed);
+
+	while (len >= FH_SIZEOF_ACCUM)
+	{
+		fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
+		k += FH_SIZEOF_ACCUM;
+		len -= FH_SIZEOF_ACCUM;
+	}
+
+	fasthash_accum(&hs, k, len);
+	return fasthash_final64(&hs);
+}
+
+static inline uint64
+fasthash32(const unsigned char * k, int len, uint64 seed)
+{
+	uint64 h = fasthash64(k, len, seed);
+	return h - (h >> 32);
+}
+
+
+// XXX NOT FOR COMMIT
 
 // Compression function for Merkle-Damgard construction.
 // This function is generated using the framework provided.
-#define mix(h) ({					\
-			(h) ^= (h) >> 23;		\
-			(h) *= 0x2127599bf4325c37ULL;	\
-			(h) ^= (h) >> 47; })
+static inline uint64_t mix(uint64_t h) {
+	h ^= h >> 23;
+	h *= 0x2127599bf4325c37ULL;
+	h ^= h >> 47;
+	return h;
+}
 
-uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
+static inline
+uint64_t fasthash64_orig(const void *buf, size_t len, uint64_t seed)
 {
 	const uint64_t    m = 0x880355f21e6d1965ULL;
 	const uint64_t *pos = (const uint64_t *)buf;
@@ -52,11 +188,17 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
 
 	switch (len & 7) {
 	case 7: v ^= (uint64_t)pos2[6] << 48;
+		/* FALLTHROUGH */
 	case 6: v ^= (uint64_t)pos2[5] << 40;
+		/* FALLTHROUGH */
 	case 5: v ^= (uint64_t)pos2[4] << 32;
+		/* FALLTHROUGH */
 	case 4: v ^= (uint64_t)pos2[3] << 24;
+		/* FALLTHROUGH */
 	case 3: v ^= (uint64_t)pos2[2] << 16;
+		/* FALLTHROUGH */
 	case 2: v ^= (uint64_t)pos2[1] << 8;
+		/* FALLTHROUGH */
 	case 1: v ^= (uint64_t)pos2[0];
 		h ^= mix(v);
 		h *= m;
@@ -65,11 +207,14 @@ uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
 	return mix(h);
 }
 
-uint32_t fasthash32(const void *buf, size_t len, uint32_t seed)
+static inline
+uint32_t fasthash32_orig(const void *buf, size_t len, uint32_t seed)
 {
 	// the following trick converts the 64-bit hashcode to Fermat
 	// residue, which shall retain information from both the higher
 	// and lower parts of hashcode.
-	uint64_t h = fasthash64(buf, len, seed);
+	uint64_t h = fasthash64_orig(buf, len, seed);
 	return h - (h >> 32);
 }
+
+#endif							/* HASHFN_UNSTABLE_H */
-- 
2.43.0