From ce1180d557cbdf8cff33842ea2f1a22ba6676725 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Tue, 2 Apr 2024 15:54:49 -0500 Subject: [PATCH v21 1/3] inline pg_popcount for small numbers of bytes --- src/include/port/pg_bitutils.h | 34 ++++++++++++++++++++++++++++++++-- src/port/pg_bitutils.c | 10 +++++----- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 53e5239717..1f487a4bc3 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -302,16 +302,46 @@ pg_ceil_log2_64(uint64 num) /* Attempt to use the POPCNT instruction, but perform a runtime check first */ extern PGDLLIMPORT int (*pg_popcount32) (uint32 word); extern PGDLLIMPORT int (*pg_popcount64) (uint64 word); -extern PGDLLIMPORT uint64 (*pg_popcount) (const char *buf, int bytes); +extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes); #else /* Use a portable implementation -- no need for a function pointer. */ extern int pg_popcount32(uint32 word); extern int pg_popcount64(uint64 word); -extern uint64 pg_popcount(const char *buf, int bytes); +extern uint64 pg_popcount_optimized(const char *buf, int bytes); #endif /* TRY_POPCNT_FAST */ +/* + * Returns the number of 1-bits in buf. + * + * If there aren't many bytes to process, the function call overhead of the + * optimized versions isn't worth taking, so we inline a loop that consults + * pg_number_of_ones in that case. If there are many bytes to process, we + * accept the function call overhead because the optimized versions are likely + * to be faster. + */ +static inline uint64 +pg_popcount(const char *buf, int bytes) +{ + /* + * We use 8 bytes as the threshold because that's where we'll first use + * special instructions on 64-bit systems. A threshold of 4 bytes might + * make more sense on 32-bit systems, but it seems unlikely to make a + * tremendous difference. + */ + if (bytes < 8) + { + uint64 popcnt = 0; + + while (bytes--) + popcnt += pg_number_of_ones[(unsigned char) *buf++]; + return popcnt; + } + + return pg_popcount_optimized(buf, bytes); +} + /* * Rotate the bits of "word" to the right/left by n bits. */ diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index 28312f3dd9..4720f8e419 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -118,7 +118,7 @@ static uint64 pg_popcount_fast(const char *buf, int bytes); int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; -uint64 (*pg_popcount) (const char *buf, int bytes) = pg_popcount_choose; +uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose; #endif /* TRY_POPCNT_FAST */ #ifdef TRY_POPCNT_FAST @@ -155,13 +155,13 @@ choose_popcount_functions(void) { pg_popcount32 = pg_popcount32_fast; pg_popcount64 = pg_popcount64_fast; - pg_popcount = pg_popcount_fast; + pg_popcount_optimized = pg_popcount_fast; } else { pg_popcount32 = pg_popcount32_slow; pg_popcount64 = pg_popcount64_slow; - pg_popcount = pg_popcount_slow; + pg_popcount_optimized = pg_popcount_slow; } } @@ -183,7 +183,7 @@ static uint64 pg_popcount_choose(const char *buf, int bytes) { choose_popcount_functions(); - return pg_popcount(buf, bytes); + return pg_popcount_optimized(buf, bytes); } /* @@ -391,7 +391,7 @@ pg_popcount64(uint64 word) * Returns the number of 1-bits in buf */ uint64 -pg_popcount(const char *buf, int bytes) +pg_popcount_optimized(const char *buf, int bytes) { return pg_popcount_slow(buf, bytes); } -- 2.25.1