diff --git a/configure b/configure index ec75318..2e79190 100755 --- a/configure +++ b/configure @@ -11937,6 +11937,39 @@ $as_echo "#define LOCALE_T_IN_XLOCALE 1" >>confdefs.h fi +# Check for __builtin_popcount +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5 +$as_echo_n "checking for __builtin_popcount... " >&6; } +if ${pgac_cv_have_builtin_popcount+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +return __builtin_popcount(42) + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv_have_builtin_popcount="yes" +else + pgac_cv_have_builtin_popcount="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_have_builtin_popcount" >&5 +$as_echo "$pgac_cv_have_builtin_popcount" >&6; } +if test "$pgac_cv_have_builtin_popcount" = "yes" ; then + +$as_echo "#define HAVE_BUILTIN_POPCOUNT 1" >>confdefs.h + +fi + ac_fn_c_check_type "$LINENO" "struct cmsgcred" "ac_cv_type_struct_cmsgcred" "#include #include #ifdef HAVE_SYS_UCRED_H diff --git a/configure.in b/configure.in index bc925d0..4d50f6c 100644 --- a/configure.in +++ b/configure.in @@ -1371,6 +1371,15 @@ AC_TYPE_LONG_LONG_INT PGAC_TYPE_LOCALE_T +# Check for __builtin_popcount +AC_CACHE_CHECK([for __builtin_popcount], [pgac_cv_have_builtin_popcount], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([], [[return __builtin_popcount(42)]])], + [pgac_cv_have_builtin_popcount="yes"], + [pgac_cv_have_builtin_popcount="no"])]) +if test "$pgac_cv_have_builtin_popcount" = "yes" ; then + AC_DEFINE(HAVE_BUILTIN_POPCOUNT, 1, [Define to 1 if you have __builtin_popcount.]) +fi + AC_CHECK_TYPES([struct cmsgcred], [], [], [#include #include diff --git a/contrib/intarray/_intbig_gist.c b/contrib/intarray/_intbig_gist.c index 6dae7c91..7d6127d 100644 --- a/contrib/intarray/_intbig_gist.c +++ b/contrib/intarray/_intbig_gist.c @@ -5,6 +5,7 @@ #include "access/gist.h" #include "access/stratnum.h" +#include "common/bitutils.h" #include "_int.h" @@ -19,27 +20,6 @@ PG_FUNCTION_INFO_V1(g_intbig_penalty); PG_FUNCTION_INFO_V1(g_intbig_picksplit); PG_FUNCTION_INFO_V1(g_intbig_union); PG_FUNCTION_INFO_V1(g_intbig_same); - -/* Number of one-bits in an unsigned byte */ -static const uint8 number_of_ones[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 -}; - PG_FUNCTION_INFO_V1(_intbig_in); PG_FUNCTION_INFO_V1(_intbig_out); @@ -207,12 +187,7 @@ g_intbig_compress(PG_FUNCTION_ARGS) static int32 sizebitvec(BITVECP sign) { - int32 size = 0, - i; - - LOOPBYTE - size += number_of_ones[(unsigned char) sign[i]]; - return size; + return popcount(&sign[0], SIGLEN * sizeof(sign[0])); } static int @@ -225,7 +200,7 @@ hemdistsign(BITVECP a, BITVECP b) LOOPBYTE { diff = (unsigned char) (a[i] ^ b[i]); - dist += number_of_ones[diff]; + dist += popcount_uint8(diff); } return dist; } diff --git a/contrib/ltree/_ltree_gist.c b/contrib/ltree/_ltree_gist.c index a387f5b..be433d0 100644 --- a/contrib/ltree/_ltree_gist.c +++ b/contrib/ltree/_ltree_gist.c @@ -9,6 +9,7 @@ #include "access/gist.h" #include "access/stratnum.h" +#include "common/bitutils.h" #include "crc32.h" #include "ltree.h" @@ -22,27 +23,6 @@ PG_FUNCTION_INFO_V1(_ltree_consistent); #define GETENTRY(vec,pos) ((ltree_gist *) DatumGetPointer((vec)->vector[(pos)].key)) #define NEXTVAL(x) ( (ltree*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) ) - -/* Number of one-bits in an unsigned byte */ -static const uint8 number_of_ones[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 -}; - #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) @@ -209,12 +189,7 @@ _ltree_union(PG_FUNCTION_ARGS) static int32 sizebitvec(BITVECP sign) { - int32 size = 0, - i; - - ALOOPBYTE - size += number_of_ones[(unsigned char) sign[i]]; - return size; + return popcount(&sign[0], SIGLEN * sizeof(sign[0])); } static int @@ -227,7 +202,7 @@ hemdistsign(BITVECP a, BITVECP b) ALOOPBYTE { diff = (unsigned char) (a[i] ^ b[i]); - dist += number_of_ones[diff]; + dist += popcount_uint8(diff); } return dist; } diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c index 2181c26..fba0f15 100644 --- a/contrib/pg_trgm/trgm_gist.c +++ b/contrib/pg_trgm/trgm_gist.c @@ -6,6 +6,7 @@ #include "trgm.h" #include "access/stratnum.h" +#include "common/bitutils.h" #include "fmgr.h" @@ -39,27 +40,6 @@ PG_FUNCTION_INFO_V1(gtrgm_same); PG_FUNCTION_INFO_V1(gtrgm_penalty); PG_FUNCTION_INFO_V1(gtrgm_picksplit); -/* Number of one-bits in an unsigned byte */ -static const uint8 number_of_ones[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 -}; - - Datum gtrgm_in(PG_FUNCTION_ARGS) { @@ -629,12 +609,7 @@ gtrgm_same(PG_FUNCTION_ARGS) static int32 sizebitvec(BITVECP sign) { - int32 size = 0, - i; - - LOOPBYTE - size += number_of_ones[(unsigned char) sign[i]]; - return size; + return popcount(&sign[0], SIGLEN * sizeof(sign[0])); } static int @@ -647,7 +622,7 @@ hemdistsign(BITVECP a, BITVECP b) LOOPBYTE { diff = (unsigned char) (a[i] ^ b[i]); - dist += number_of_ones[diff]; + dist += popcount_uint8(diff); } return dist; } diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index eaab4be..50b98c9 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -88,6 +88,7 @@ #include "access/heapam_xlog.h" #include "access/visibilitymap.h" #include "access/xlog.h" +#include "common/bitutils.h" #include "miscadmin.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" @@ -112,43 +113,9 @@ #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE) #define HEAPBLK_TO_MAPBIT(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK) -/* tables for fast counting of set bits for visible and frozen */ -static const uint8 number_of_ones_for_visible[256] = { - 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4, - 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4, - 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, - 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4 -}; -static const uint8 number_of_ones_for_frozen[256] = { - 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, - 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, - 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4, - 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, - 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4, - 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4 -}; +/* Masks for bit counting. */ +#define VISIBLE_MASK64 0x5555555555555555 /* The lower bit of each bit pair */ +#define FROZEN_MASK64 0xaaaaaaaaaaaaaaaa /* The upper bit of each bit pair */ /* prototypes for internal routines */ static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend); @@ -409,7 +376,7 @@ visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_fro for (mapBlock = 0;; mapBlock++) { Buffer mapBuffer; - unsigned char *map; + uint64 *map; int i; /* @@ -426,13 +393,15 @@ visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_fro * immediately stale anyway if anyone is concurrently setting or * clearing bits, and we only really need an approximate value. */ - map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer)); + map = (uint64 *) PageGetContents(BufferGetPage(mapBuffer)); - for (i = 0; i < MAPSIZE; i++) + StaticAssertStmt(MAPSIZE % sizeof(uint64) == 0, + "unsupported MAPSIZE"); + for (i = 0; i < MAPSIZE / sizeof(uint64); i++) { - *all_visible += number_of_ones_for_visible[map[i]]; + *all_visible += popcount_uint64(map[i] & VISIBLE_MASK64); if (all_frozen) - *all_frozen += number_of_ones_for_frozen[map[i]]; + *all_frozen += popcount_uint64(map[i] & FROZEN_MASK64); } ReleaseBuffer(mapBuffer); diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index f281cc5..cd9a82a 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -21,7 +21,9 @@ #include "postgres.h" #include "access/hash.h" +#include "common/bitutils.h" +#include #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD) @@ -50,58 +52,18 @@ #define HAS_MULTIPLE_ONES(x) ((bitmapword) RIGHTMOST_ONE(x) != (x)) - /* - * Lookup tables to avoid need for bit-by-bit groveling - * - * rightmost_one_pos[x] gives the bit number (0-7) of the rightmost one bit - * in a nonzero byte value x. The entry for x=0 is never used. - * - * number_of_ones[x] gives the number of one-bits (0-8) in a byte value x. - * - * We could make these tables larger and reduce the number of iterations - * in the functions that use them, but bytewise shifts and masks are - * especially fast on many machines, so working a byte at a time seems best. + * Find the rightmost (least significant) bit set in a bitmapword. The least + * significant bit is called 0 (unlike the ffs function which call that bit + * position 1). Undefined if there are no bits set. */ - -static const uint8 rightmost_one_pos[256] = { - 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 -}; - -static const uint8 number_of_ones[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 -}; - +static inline int +rightmost_one_pos(bitmapword word) +{ + StaticAssertStmt(sizeof(bitmapword) <= sizeof(int), + "unsupported bitmapword size"); + return ffs((int) word) - 1; +} /* * bms_copy - make a palloc'd copy of a bitmapset @@ -511,12 +473,7 @@ bms_singleton_member(const Bitmapset *a) if (result >= 0 || HAS_MULTIPLE_ONES(w)) elog(ERROR, "bitmapset has multiple members"); result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; + result += rightmost_one_pos(w); } } if (result < 0) @@ -554,12 +511,7 @@ bms_get_singleton_member(const Bitmapset *a, int *member) if (result >= 0 || HAS_MULTIPLE_ONES(w)) return false; result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; + result += rightmost_one_pos(w); } } if (result < 0) @@ -574,25 +526,10 @@ bms_get_singleton_member(const Bitmapset *a, int *member) int bms_num_members(const Bitmapset *a) { - int result = 0; - int nwords; - int wordnum; - if (a == NULL) return 0; - nwords = a->nwords; - for (wordnum = 0; wordnum < nwords; wordnum++) - { - bitmapword w = a->words[wordnum]; - - /* we assume here that bitmapword is an unsigned type */ - while (w != 0) - { - result += number_of_ones[w & 255]; - w >>= 8; - } - } - return result; + return popcount(&a->words[0], + a->nwords * sizeof(a->words[0])); } /* @@ -872,12 +809,7 @@ bms_first_member(Bitmapset *a) a->words[wordnum] &= ~w; result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; + result += rightmost_one_pos(w); return result; } } @@ -927,12 +859,7 @@ bms_next_member(const Bitmapset *a, int prevbit) int result; result = wordnum * BITS_PER_BITMAPWORD; - while ((w & 255) == 0) - { - w >>= 8; - result += 8; - } - result += rightmost_one_pos[w & 255]; + result += rightmost_one_pos(w); return result; } diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c index cdd5d43..79f8466 100644 --- a/src/backend/utils/adt/tsgistidx.c +++ b/src/backend/utils/adt/tsgistidx.c @@ -16,6 +16,7 @@ #include "access/gist.h" #include "access/tuptoaster.h" +#include "common/bitutils.h" #include "tsearch/ts_utils.h" #include "utils/pg_crc.h" @@ -69,26 +70,6 @@ typedef struct #define GETARR(x) ( (int32*)( (char*)(x)+GTHDRSIZE ) ) #define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int32) ) -/* Number of one-bits in an unsigned byte */ -static const uint8 number_of_ones[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 -}; - static int32 sizebitvec(BITVECP sign); Datum @@ -499,12 +480,7 @@ gtsvector_same(PG_FUNCTION_ARGS) static int32 sizebitvec(BITVECP sign) { - int32 size = 0, - i; - - LOOPBYTE - size += number_of_ones[(unsigned char) sign[i]]; - return size; + return popcount(&sign[0], SIGLEN * sizeof(sign[0])); } static int @@ -517,7 +493,7 @@ hemdistsign(BITVECP a, BITVECP b) LOOPBYTE { diff = (unsigned char) (a[i] ^ b[i]); - dist += number_of_ones[diff]; + dist += popcount_uint8(diff); } return dist; } diff --git a/src/common/Makefile b/src/common/Makefile index 72b7369..42ec487 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -36,7 +36,7 @@ override CPPFLAGS += -DVAL_LDFLAGS_EX="\"$(LDFLAGS_EX)\"" override CPPFLAGS += -DVAL_LDFLAGS_SL="\"$(LDFLAGS_SL)\"" override CPPFLAGS += -DVAL_LIBS="\"$(LIBS)\"" -OBJS_COMMON = config_info.o controldata_utils.o exec.o keywords.o \ +OBJS_COMMON = bitutils.o config_info.o controldata_utils.o exec.o keywords.o \ pg_lzcompress.o pgfnames.o psprintf.o relpath.o rmtree.o \ string.o username.o wait_error.o diff --git a/src/common/bitutils.c b/src/common/bitutils.c new file mode 100644 index 0000000..9da062f --- /dev/null +++ b/src/common/bitutils.c @@ -0,0 +1,105 @@ +/* + * bitutils.c + * bit manipulation support + * + * Copyright (c) 2016, PostgreSQL Global Development Group + * + * src/common/bitutils.c + */ + +#include "postgres.h" + +#include "common/bitutils.h" + +/* The lookup table used for 8 bit popcounts. */ +const uint8 popcount_uint8_table[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 +}; + +#if !defined(USE_POPCOUNT_TABLE) && !defined(HAVE_BUILTIN_POPCOUNT) + +/* + * Wider versions, as functions rather than macros to avoid multiple + * evaluation hazards. + */ + +int +popcount_uint32(uint32 v) +{ + return + popcount_uint8(v & 0xff) + + popcount_uint8((v >> 8) & 0xff) + + popcount_uint8((v >> 16) & 0xff) + + popcount_uint8(v >> 24); +} + +int +popcount_uint64(uint64 v) +{ + /* + * Maybe some of the other codings from Hacker's Delight, Knuth or + * Beautiful Code would be better? + */ + return + popcount_uint8(v & 0xff) + + popcount_uint8((v >> 8) & 0xff) + + popcount_uint8((v >> 16) & 0xff) + + popcount_uint8((v >> 24) & 0xff) + + popcount_uint8((v >> 32) & 0xff) + + popcount_uint8((v >> 40) & 0xff) + + popcount_uint8((v >> 48) & 0xff) + + popcount_uint8(v >> 56); +} + +#endif + +/* + * Count the number of bits set in a region of memory. Do so using the widest + * size possible, so that builds that use hardware instructions rather than + * looking up each byte can get the maximum benefit. 'data' must be correctly + * aligned for a uint64. + */ +Size +popcount(const void *data, Size size) +{ + Size result = 0; + const uint8 *end = ((uint8 *) data) + size; + const uint64 *p64; + const uint32 *p32; + const uint8 *p8; + + /* + * TODO: Not tested, but 32 bit grain might actually be better than 64: + * http://stackoverflow.com/questions/25078285/replacing-a-32-bit-loop-count-variable-with-64-bit-introduces-crazy-performance/ + */ + + /* Process as many whole uint64s as we can */ + p64 = (uint64 *) data; + while ((uint8 *) (p64 + 1) <= end) + result += popcount_uint64(*p64++); + /* See if there is a trailing whole uint32 we can process */ + p32 = (uint32 *) p64; + if ((uint8 *) (p32 + 1) <= end) + result += popcount_uint32(*p32++); + /* Mop up any remaining bytes */ + p8 = (uint8 *) p32; + while (p8 < end) + result += popcount_uint8(*p8++); + + return result; +} diff --git a/src/include/common/bitutils.h b/src/include/common/bitutils.h new file mode 100644 index 0000000..492fbc9 --- /dev/null +++ b/src/include/common/bitutils.h @@ -0,0 +1,34 @@ +/* + * bitutils.h + * bit manipulation support + * + * Copyright (c) 2016, PostgreSQL Global Development Group + * + * src/include/common/bitutils.h + */ +#ifndef BITUTILS_H +#define BITUTILS_H + +/* Use a private lookup table for the per-byte population count. */ +extern const uint8 popcount_uint8_table[256]; +#define popcount_uint8(v) popcount_uint8_table[(v)] + +#if !defined(POPCOUNT_USE_TABLE) && defined(HAVE_BUILTIN_POPCOUNT) + +/* Use GCC built-ins for wider sizes */ +#define popcount_uint32(v) \ + __builtin_popcount(v) +#define popcount_uint64(v) \ + __builtin_popcountll(v) + +#else + +/* Use functions for the wider sizes to avoid multiple evaluation hazards. */ +int popcount_uint32(uint32 v); +int popcount_uint64(uint64 v); + +#endif + +extern Size popcount(const void *data, Size size); + +#endif /* BITUTILS_H */ diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index b621ff2..f66231a 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -90,6 +90,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_ATOMIC_H +/* Define to 1 if you have __builtin_popcount. */ +#undef HAVE_BUILTIN_POPCOUNT + /* Define to 1 if you have the `cbrt' function. */ #undef HAVE_CBRT