From 474771cf8c3d90cc0356cb22f38c8d46d7ab0742 Mon Sep 17 00:00:00 2001 From: Amit Khandekar Date: Wed, 27 Jan 2021 22:34:06 +0800 Subject: [PATCH 1/2] Speed up xor'ing of two gist index signatures for tsvectors In hemdistsign(), rather than using xor operator on char values, use it in 64-bit chunks. And since the chunks are 64-bit, use popcount64() on each of the chunks. I have checked that the two bitvector pointer arguments of hemdistsign() are not always 64-bit aligned. So process the leading and trailing bits char-by-char, leaving the middle 64-bit chunks for use of popcount64(). This results in speed-up in Gist index creation for tsvectors. With default siglen (124), the speed up is 12-20%. With siglen=700, it is 30-50%. So with longer signature lengths, we get higher percentage speed-up. Similar results are seen in other types using gist index, such as intarray, hstore, and ltree that are availale in contrib. With smaller siglens such as 10, 20 etc, there is a bit of a reduction in speed by 1-7% if we use this optimization. It's probably because of an extra function call for pg_xorcount(); and also might be due to the extra logic in pg_xorcount(). So for siglen less than 32, keep the existing method using byte-by-byte traversal. --- contrib/hstore/hstore_gist.c | 14 +++------ contrib/intarray/_intbig_gist.c | 15 ++-------- contrib/ltree/_ltree_gist.c | 15 ++-------- src/backend/utils/adt/tsgistidx.c | 15 ++-------- src/include/port/pg_bitutils.h | 20 +++++++++++++ src/port/pg_bitutils.c | 47 +++++++++++++++++++++++++++++++ 6 files changed, 80 insertions(+), 46 deletions(-) diff --git a/contrib/hstore/hstore_gist.c b/contrib/hstore/hstore_gist.c index 102c9cea72..ba1395dea3 100644 --- a/contrib/hstore/hstore_gist.c +++ b/contrib/hstore/hstore_gist.c @@ -8,6 +8,7 @@ #include "access/stratnum.h" #include "catalog/pg_type.h" #include "hstore.h" +#include "port/pg_bitutils.h" #include "utils/pg_crc.h" /* gist_hstore_ops opclass options */ @@ -256,18 +257,11 @@ sizebitvec(BITVECP sign, int siglen) return size; } -static int +static inline int hemdistsign(BITVECP a, BITVECP b, int siglen) { - int i, - dist = 0; - - LOOPBIT(siglen) - { - if (GETBIT(a, i) != GETBIT(b, i)) - dist++; - } - return dist; + return pg_xorcount((const unsigned char *) a, (const unsigned char *) b, + siglen); } static int diff --git a/contrib/intarray/_intbig_gist.c b/contrib/intarray/_intbig_gist.c index 18ecd8cda6..c3bad635c3 100644 --- a/contrib/intarray/_intbig_gist.c +++ b/contrib/intarray/_intbig_gist.c @@ -211,20 +211,11 @@ sizebitvec(BITVECP sign, int siglen) return pg_popcount(sign, siglen); } -static int +static inline int hemdistsign(BITVECP a, BITVECP b, int siglen) { - int i, - diff, - dist = 0; - - LOOPBYTE(siglen) - { - diff = (unsigned char) (a[i] ^ b[i]); - /* Using the popcount functions here isn't likely to win */ - dist += pg_number_of_ones[diff]; - } - return dist; + return pg_xorcount((const unsigned char *) a, (const unsigned char *) b, + siglen); } static int diff --git a/contrib/ltree/_ltree_gist.c b/contrib/ltree/_ltree_gist.c index 72516c3b6b..ead08b00ed 100644 --- a/contrib/ltree/_ltree_gist.c +++ b/contrib/ltree/_ltree_gist.c @@ -180,20 +180,11 @@ sizebitvec(BITVECP sign, int siglen) return pg_popcount((const char *) sign, siglen); } -static int +static inline int hemdistsign(BITVECP a, BITVECP b, int siglen) { - int i, - diff, - dist = 0; - - ALOOPBYTE(siglen) - { - diff = (unsigned char) (a[i] ^ b[i]); - /* Using the popcount functions here isn't likely to win */ - dist += pg_number_of_ones[diff]; - } - return dist; + return pg_xorcount((const unsigned char *) a, (const unsigned char *) b, + siglen); } static int diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c index c09eefdda2..9d822da241 100644 --- a/src/backend/utils/adt/tsgistidx.c +++ b/src/backend/utils/adt/tsgistidx.c @@ -486,20 +486,11 @@ sizebitvec(BITVECP sign, int siglen) return pg_popcount(sign, siglen); } -static int +static inline int hemdistsign(BITVECP a, BITVECP b, int siglen) { - int i, - diff, - dist = 0; - - LOOPBYTE(siglen) - { - diff = (unsigned char) (a[i] ^ b[i]); - /* Using the popcount functions here isn't likely to win */ - dist += pg_number_of_ones[diff]; - } - return dist; + return pg_xorcount((const unsigned char *) a, (const unsigned char *) b, + siglen); } static int diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index f9b77ec278..55ef9a8e9e 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -214,6 +214,26 @@ extern int (*pg_popcount64) (uint64 word); /* Count the number of one-bits in a byte array */ extern uint64 pg_popcount(const char *buf, int bytes); +/* Count the number of 1-bits in the result of xor operation */ +extern uint64 pg_xorcount_long(const unsigned char *a, const unsigned char *b, + int bytes); +static inline uint64 pg_xorcount(const unsigned char *a, const unsigned char *b, + int bytes) +{ + /* For smaller lengths, do simple byte-by-byte traversal */ + if (bytes <= 32) + { + int i; + uint64 popcnt = 0; + + for (i = 0; i < bytes; i++) + popcnt += pg_number_of_ones[a[i] ^ b[i]]; + return popcnt; + } + else + return pg_xorcount_long(a, b, bytes); +} + /* * Rotate the bits of "word" to the right by n bits. */ diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c index 2252021854..3a3bbc4262 100644 --- a/src/port/pg_bitutils.c +++ b/src/port/pg_bitutils.c @@ -319,3 +319,50 @@ pg_popcount(const char *buf, int bytes) return popcnt; } + +/* + * pg_xorcount + * Count the number of 1-bits in the result of xor operation. + */ +uint64 +pg_xorcount_long(const unsigned char *a, const unsigned char *b, int bytes) +{ + uint64 popcnt = 0; + int i = 0; + +#if SIZEOF_VOID_P >= 8 + const unsigned char *a_aligned = (const unsigned char *) TYPEALIGN(8, a); + const unsigned char *b_aligned = (const unsigned char *) TYPEALIGN(8, b); + + /* + * We can process 64-bit chunks only if both are mis-aligned by the same + * number of bytes. + */ + if (b_aligned - b == a_aligned - a) + { + int unaligned_bytes = a_aligned - a; + uint64 *aint64 = (uint64*) a_aligned; + uint64 *bint64 = (uint64*) b_aligned; + int nelem; + + /* Process leading bytes upto where aligned bytes start */ + unaligned_bytes = Min(unaligned_bytes, bytes); + for (i = 0; i < unaligned_bytes; i++) + popcnt += pg_number_of_ones[a[i] ^ b[i]]; + + /* Process 64-bit chunks using popcount function */ + nelem = (bytes - unaligned_bytes)/sizeof(uint64); + for (i = 0; i < nelem; i++) + popcnt += pg_popcount64(aint64[i] ^ bint64[i]); + + /* Position i for the trailing bytes */ + i = unaligned_bytes + nelem*sizeof(uint64); + } +#endif + + /* Process trailing bytes */ + for (; i < bytes; i++) + popcnt += pg_number_of_ones[a[i] ^ b[i]]; + + return popcnt; +} -- 2.17.1