From e5e4cac323b913e9fcdbd17d6b07316a21f7ff5c Mon Sep 17 00:00:00 2001 From: Paul Amonson Date: Tue, 19 Mar 2024 13:37:31 -0700 Subject: [PATCH 2/3] [Refactor] Seperated slow, fast, and choose functionality into files. Signed-off-by: Paul Amonson --- contrib/intarray/_intbig_gist.c | 2 +- contrib/ltree/_ltree_gist.c | 2 +- contrib/pageinspect/heapfuncs.c | 4 +- contrib/pg_trgm/trgm_gist.c | 2 +- contrib/pg_walinspect/pg_walinspect.c | 2 +- src/backend/lib/bloomfilter.c | 2 +- src/backend/postmaster/syslogger.c | 2 +- src/backend/utils/adt/tsgistidx.c | 2 +- src/backend/utils/adt/varbit.c | 2 +- src/backend/utils/adt/varlena.c | 2 +- src/port/Makefile | 2 + src/port/meson.build | 2 + src/port/pg_bitutils.c | 171 +------------------------- src/port/pg_popcount_x86_64_accel.c | 101 +++++++++++++++ src/port/pg_popcount_x86_64_choose.c | 98 +++++++++++++++ 15 files changed, 215 insertions(+), 181 deletions(-) create mode 100644 src/port/pg_popcount_x86_64_accel.c create mode 100644 src/port/pg_popcount_x86_64_choose.c diff --git a/contrib/intarray/_intbig_gist.c b/contrib/intarray/_intbig_gist.c index 9699fbf3b4..a12ea7ed9b 100644 --- a/contrib/intarray/_intbig_gist.c +++ b/contrib/intarray/_intbig_gist.c @@ -210,7 +210,7 @@ g_intbig_compress(PG_FUNCTION_ARGS) static int32 sizebitvec(BITVECP sign, int siglen) { - return pg_popcount(sign, siglen); + return PG_POPCOUNT(sign, siglen); } static int diff --git a/contrib/ltree/_ltree_gist.c b/contrib/ltree/_ltree_gist.c index e89a39a5b5..bd66ec2e65 100644 --- a/contrib/ltree/_ltree_gist.c +++ b/contrib/ltree/_ltree_gist.c @@ -180,7 +180,7 @@ _ltree_union(PG_FUNCTION_ARGS) static int32 sizebitvec(BITVECP sign, int siglen) { - return pg_popcount((const char *) sign, siglen); + return PG_POPCOUNT((const char *) sign, siglen); } static int diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c index 3faeabc711..089842962f 100644 --- a/contrib/pageinspect/heapfuncs.c +++ b/contrib/pageinspect/heapfuncs.c @@ -527,8 +527,8 @@ heap_tuple_infomask_flags(PG_FUNCTION_ARGS) if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); - bitcnt = pg_popcount((const char *) &t_infomask, sizeof(uint16)) + - pg_popcount((const char *) &t_infomask2, sizeof(uint16)); + bitcnt = PG_POPCOUNT((const char *) &t_infomask, sizeof(uint16)) + + PG_POPCOUNT((const char *) &t_infomask2, sizeof(uint16)); /* If no flags, return a set of empty arrays */ if (bitcnt <= 0) diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c index 9ef2e38560..850316196f 100644 --- a/contrib/pg_trgm/trgm_gist.c +++ b/contrib/pg_trgm/trgm_gist.c @@ -648,7 +648,7 @@ gtrgm_same(PG_FUNCTION_ARGS) static int32 sizebitvec(BITVECP sign, int siglen) { - return pg_popcount(sign, siglen); + return PG_POPCOUNT(sign, siglen); } static int diff --git a/contrib/pg_walinspect/pg_walinspect.c b/contrib/pg_walinspect/pg_walinspect.c index ee2918726d..93a7b4842a 100644 --- a/contrib/pg_walinspect/pg_walinspect.c +++ b/contrib/pg_walinspect/pg_walinspect.c @@ -303,7 +303,7 @@ GetWALBlockInfo(FunctionCallInfo fcinfo, XLogReaderState *record, block_fpi_len = blk->bimg_len; /* Construct and save block_fpi_info */ - bitcnt = pg_popcount((const char *) &blk->bimg_info, + bitcnt = PG_POPCOUNT((const char *) &blk->bimg_info, sizeof(uint8)); flags = (Datum *) palloc0(sizeof(Datum) * bitcnt); if ((blk->bimg_info & BKPIMAGE_HAS_HOLE) != 0) diff --git a/src/backend/lib/bloomfilter.c b/src/backend/lib/bloomfilter.c index 360d21ca45..c01b069c01 100644 --- a/src/backend/lib/bloomfilter.c +++ b/src/backend/lib/bloomfilter.c @@ -187,7 +187,7 @@ double bloom_prop_bits_set(bloom_filter *filter) { int bitset_bytes = filter->m / BITS_PER_BYTE; - uint64 bits_set = pg_popcount((char *) filter->bitset, bitset_bytes); + uint64 bits_set = PG_POPCOUNT((char *) filter->bitset, bitset_bytes); return bits_set / (double) filter->m; } diff --git a/src/backend/postmaster/syslogger.c b/src/backend/postmaster/syslogger.c index 08efe74cc9..85c57b3154 100644 --- a/src/backend/postmaster/syslogger.c +++ b/src/backend/postmaster/syslogger.c @@ -898,7 +898,7 @@ process_pipe_input(char *logbuffer, int *bytes_in_logbuffer) if (p.nuls[0] == '\0' && p.nuls[1] == '\0' && p.len > 0 && p.len <= PIPE_MAX_PAYLOAD && p.pid != 0 && - pg_popcount((char *) &dest_flags, 1) == 1) + PG_POPCOUNT((char *) &dest_flags, 1) == 1) { List *buffer_list; ListCell *cell; diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c index 5698ee5502..d7a76faf31 100644 --- a/src/backend/utils/adt/tsgistidx.c +++ b/src/backend/utils/adt/tsgistidx.c @@ -489,7 +489,7 @@ gtsvector_same(PG_FUNCTION_ARGS) static int32 sizebitvec(BITVECP sign, int siglen) { - return pg_popcount(sign, siglen); + return PG_POPCOUNT(sign, siglen); } static int diff --git a/src/backend/utils/adt/varbit.c b/src/backend/utils/adt/varbit.c index 8fcf3fb731..3f287cd54d 100644 --- a/src/backend/utils/adt/varbit.c +++ b/src/backend/utils/adt/varbit.c @@ -1212,7 +1212,7 @@ bit_bit_count(PG_FUNCTION_ARGS) { VarBit *arg = PG_GETARG_VARBIT_P(0); - PG_RETURN_INT64(pg_popcount((char *) VARBITS(arg), VARBITBYTES(arg))); + PG_RETURN_INT64(PG_POPCOUNT((char *) VARBITS(arg), VARBITBYTES(arg))); } /* diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 8d28dd42ce..809e6a59ab 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -3151,7 +3151,7 @@ bytea_bit_count(PG_FUNCTION_ARGS) { bytea *t1 = PG_GETARG_BYTEA_PP(0); - PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1))); + PG_RETURN_INT64(PG_POPCOUNT(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1))); } /* diff --git a/src/port/Makefile b/src/port/Makefile index dcc8737e68..1499985dfc 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -44,6 +44,8 @@ OBJS = \ noblock.o \ path.o \ pg_bitutils.o \ + pg_popcount_x86_64_choose.o \ + pg_popcount_x86_64_accel.o \ pg_strong_random.o \ pgcheckdir.o \ pgmkdirp.o \ diff --git a/src/port/meson.build b/src/port/meson.build index 92b593e6ef..cf6e9fa06c 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -7,6 +7,8 @@ pgport_sources = [ 'noblock.c', 'path.c', 'pg_bitutils.c', + 'pg_popcount_x86_64_choose.c', + 'pg_popcount_x86_64_accel.c', 'pg_strong_random.c', 'pgcheckdir.c', 'pgmkdirp.c', diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index e629969035..f08820b35b 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -11,14 +11,6 @@ *------------------------------------------------------------------------- */ #include "c.h" - -#ifdef HAVE__GET_CPUID -#include -#endif -#ifdef HAVE__CPUID -#include -#endif - #include "port/pg_bitutils.h" @@ -103,167 +95,6 @@ const uint8 pg_number_of_ones[256] = { 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; -// static inline int pg_popcount32_slow(uint32 word); -// static inline int pg_popcount64_slow(uint64 word); -// static uint64 pg_popcount_slow(const char *buf, int bytes); - -#ifdef TRY_POPCNT_FAST -static bool pg_popcount_available(void); -static int pg_popcount32_choose(uint32 word); -static int pg_popcount64_choose(uint64 word); -static uint64 pg_popcount_choose(const char *buf, int bytes); -static inline int pg_popcount32_fast(uint32 word); -static inline int pg_popcount64_fast(uint64 word); -static uint64 pg_popcount_fast(const char *buf, int bytes); - -int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; -int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; -uint64 (*pg_popcount) (const char *buf, int bytes) = pg_popcount_choose; - -/* - * Return true if CPUID indicates that the POPCNT instruction is available. - */ -static bool -pg_popcount_available(void) -{ - unsigned int exx[4] = {0, 0, 0, 0}; - -#if defined(HAVE__GET_CPUID) - __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 1); -#else -#error cpuid instruction not available -#endif - - return (exx[2] & (1 << 23)) != 0; /* POPCNT */ -} - -/* - * These functions get called on the first call to pg_popcount32 etc. - * They detect whether we can use the asm implementations, and replace - * the function pointers so that subsequent calls are routed directly to - * the chosen implementation. - */ -static inline void set_function_pointers() -{ - if (pg_popcount_available()) - { - pg_popcount32 = pg_popcount32_fast; - pg_popcount64 = pg_popcount64_fast; - pg_popcount = pg_popcount_fast; - } - else - { - pg_popcount32 = pg_popcount32_slow; - pg_popcount64 = pg_popcount64_slow; - pg_popcount = pg_popcount_slow; - } -} - -static int -pg_popcount32_choose(uint32 word) -{ - set_function_pointers(); - return pg_popcount32(word); -} - -static int -pg_popcount64_choose(uint64 word) -{ - set_function_pointers(); - return pg_popcount64(word); -} - -static uint64 -pg_popcount_choose(const char *buf, int bytes) -{ - set_function_pointers(); - return pg_popcount(buf, bytes); -} - -/* - * pg_popcount32_fast - * Return the number of 1 bits set in word - */ -static inline int -pg_popcount32_fast(uint32 word) -{ -#ifdef _MSC_VER - return __popcnt(word); -#else - uint32 res; - -__asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc"); - return (int) res; -#endif -} - -/* - * pg_popcount64_fast - * Return the number of 1 bits set in word - */ -static inline int -pg_popcount64_fast(uint64 word) -{ -#ifdef _MSC_VER - return __popcnt64(word); -#else - uint64 res; - -__asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc"); - return (int) res; -#endif -} - -/* - * pg_popcount_fast - * Returns the number of 1-bits in buf - */ -static inline uint64 -pg_popcount_fast(const char *buf, int bytes) -{ - uint64 popcnt = 0; - -#if SIZEOF_VOID_P >= 8 - /* Process in 64-bit chunks if the buffer is aligned. */ - if (buf == (const char *) TYPEALIGN(8, buf)) - { - const uint64 *words = (const uint64 *) buf; - - while (bytes >= 8) - { - popcnt += PG_POPCOUNT64(*words++); - bytes -= 8; - } - - buf = (const char *) words; - } -#else - /* Process in 32-bit chunks if the buffer is aligned. */ - if (buf == (const char *) TYPEALIGN(4, buf)) - { - const uint32 *words = (const uint32 *) buf; - - while (bytes >= 4) - { - popcnt += PG_POPCOUNT32(*words++); - bytes -= 4; - } - - buf = (const char *) words; - } -#endif - - /* Process any remaining bytes */ - while (bytes--) - popcnt += pg_number_of_ones[(unsigned char) *buf++]; - - return popcnt; -} - -#endif /* TRY_POPCNT_FAST */ - /* * pg_popcount32_slow @@ -319,7 +150,7 @@ pg_popcount64_slow(uint64 word) * pg_popcount_slow * Returns the number of 1-bits in buf */ -uint64 +inline uint64 pg_popcount_slow(const char *buf, int bytes) { uint64 popcnt = 0; diff --git a/src/port/pg_popcount_x86_64_accel.c b/src/port/pg_popcount_x86_64_accel.c new file mode 100644 index 0000000000..d63e8aa30f --- /dev/null +++ b/src/port/pg_popcount_x86_64_accel.c @@ -0,0 +1,101 @@ +/*------------------------------------------------------------------------- + * + * pg_popcount_x86_64_accel.c + * Miscellaneous functions for bit-wise operations. + * + * Copyright (c) 2024, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/port/pg_popcount_x86_64_accel.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" +#include "port/pg_bitutils.h" + +#ifdef TRY_POPCNT_FAST +int pg_popcount32_fast(uint32 word); +int pg_popcount64_fast(uint64 word); +uint64 pg_popcount_fast(const char *buf, int bytes); + +/* + * pg_popcount32_fast + * Return the number of 1 bits set in word + */ +int +pg_popcount32_fast(uint32 word) +{ +#ifdef _MSC_VER + return __popcnt(word); +#else + uint32 res; + +__asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc"); + return (int) res; +#endif +} + +/* + * pg_popcount64_fast + * Return the number of 1 bits set in word + */ +int +pg_popcount64_fast(uint64 word) +{ +#ifdef _MSC_VER + return __popcnt64(word); +#else + uint64 res; + +__asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc"); + return (int) res; +#endif +} + +/* + * pg_popcount_fast + * Returns the number of 1-bits in buf + */ +uint64 +pg_popcount_fast(const char *buf, int bytes) +{ + uint64 popcnt = 0; + +#if SIZEOF_VOID_P >= 8 + /* Process in 64-bit chunks if the buffer is aligned. */ + if (buf == (const char *) TYPEALIGN(8, buf)) + { + const uint64 *words = (const uint64 *) buf; + + while (bytes >= 8) + { + popcnt += PG_POPCOUNT64(*words++); + bytes -= 8; + } + + buf = (const char *) words; + } +#else + /* Process in 32-bit chunks if the buffer is aligned. */ + if (buf == (const char *) TYPEALIGN(4, buf)) + { + const uint32 *words = (const uint32 *) buf; + + while (bytes >= 4) + { + popcnt += PG_POPCOUNT32(*words++); + bytes -= 4; + } + + buf = (const char *) words; + } +#endif + + /* Process any remaining bytes */ + while (bytes--) + popcnt += pg_number_of_ones[(unsigned char) *buf++]; + + return popcnt; +} + +#endif /* TRY_POPCNT_FAST */ diff --git a/src/port/pg_popcount_x86_64_choose.c b/src/port/pg_popcount_x86_64_choose.c new file mode 100644 index 0000000000..1a0022a0b3 --- /dev/null +++ b/src/port/pg_popcount_x86_64_choose.c @@ -0,0 +1,98 @@ +/*------------------------------------------------------------------------- + * + * pg_popcount_x86_64_choose.c + * Miscellaneous functions for bit-wise operations. + * + * Copyright (c) 2024, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/port/pg_popcount_x86_64_choose.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#ifdef HAVE__GET_CPUID +#include +#endif +#ifdef HAVE__CPUID +#include +#endif + +#include "port/pg_bitutils.h" + +#ifdef TRY_POPCNT_FAST +int pg_popcount32_fast(uint32 word); +int pg_popcount64_fast(uint64 word); +uint64 pg_popcount_fast(const char *buf, int bytes); + +static int pg_popcount32_choose(uint32 word); +static int pg_popcount64_choose(uint64 word); +static uint64 pg_popcount_choose(const char *buf, int bytes); + +int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; +int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; +uint64 (*pg_popcount) (const char *buf, int bytes) = pg_popcount_choose; + +/* + * Return true if CPUID indicates that the POPCNT instruction is available. + */ +static bool +pg_popcount_available(void) +{ + unsigned int exx[4] = {0, 0, 0, 0}; + +#if defined(HAVE__GET_CPUID) + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUID) + __cpuid(exx, 1); +#else +#error cpuid instruction not available +#endif + + return (exx[2] & (1 << 23)) != 0; /* POPCNT */ +} + +/* + * These functions get called on the first call to pg_popcount32 etc. + * They detect whether we can use the asm implementations, and replace + * the function pointers so that subsequent calls are routed directly to + * the chosen implementation. + */ +static inline void set_function_pointers() +{ + if (pg_popcount_available()) + { + pg_popcount32 = pg_popcount32_fast; + pg_popcount64 = pg_popcount64_fast; + pg_popcount = pg_popcount_fast; + } + else + { + pg_popcount32 = pg_popcount32_slow; + pg_popcount64 = pg_popcount64_slow; + pg_popcount = pg_popcount_slow; + } +} + +static int +pg_popcount32_choose(uint32 word) +{ + set_function_pointers(); + return pg_popcount32(word); +} + +static int +pg_popcount64_choose(uint64 word) +{ + set_function_pointers(); + return pg_popcount64(word); +} + +static uint64 +pg_popcount_choose(const char *buf, int bytes) +{ + set_function_pointers(); + return pg_popcount(buf, bytes); +} +#endif /* TRY_POPCNT_FAST */ -- 2.34.1