From f96ae8bf6913796c5d724ff9da25bbc79927ff1c Mon Sep 17 00:00:00 2001 From: John Naylor Date: Fri, 28 Feb 2025 18:27:40 +0700 Subject: [PATCH v11 4/4] Use runtime check even when we have SSE 4.2 at compile time This allows us to use PCLMUL for longer inputs. Short inputs are inlined to avoid the indirection through a function pointer. --- configure | 2 +- configure.ac | 2 +- src/include/port/pg_crc32c.h | 15 +++++++++++---- src/port/meson.build | 1 + src/port/pg_crc32c_sse42_choose.c | 2 ++ 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/configure b/configure index 93fddd69981..91c0ffc8272 100755 --- a/configure +++ b/configure @@ -17684,7 +17684,7 @@ if test x"$USE_SSE42_CRC32C" = x"1"; then $as_echo "#define USE_SSE42_CRC32C 1" >>confdefs.h - PG_CRC32C_OBJS="pg_crc32c_sse42.o" + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sse42_choose.o" { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2" >&5 $as_echo "SSE 4.2" >&6; } else diff --git a/configure.ac b/configure.ac index b6d02f5ecc7..a85bdbd4ff6 100644 --- a/configure.ac +++ b/configure.ac @@ -2151,7 +2151,7 @@ fi AC_MSG_CHECKING([which CRC-32C implementation to use]) if test x"$USE_SSE42_CRC32C" = x"1"; then AC_DEFINE(USE_SSE42_CRC32C, 1, [Define to 1 use Intel SSE 4.2 CRC instructions.]) - PG_CRC32C_OBJS="pg_crc32c_sse42.o" + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sse42_choose.o" AC_MSG_RESULT(SSE 4.2) else if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index fe0e1b6b275..26b676dddc9 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -55,22 +55,29 @@ typedef uint32 pg_crc32c; ((crc) = pg_comp_crc32c_dispatch((crc), (data), (len))) #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) +extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); +extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); +#ifdef USE_PCLMUL_WITH_RUNTIME_CHECK +extern pg_crc32c pg_comp_crc32c_pclmul(pg_crc32c crc, const void *data, size_t len); +#endif static inline pg_crc32c pg_comp_crc32c_dispatch(pg_crc32c crc, const void *data, size_t len) { - if (__builtin_constant_p(len) && len < 64) + if (len < 64) { /* - * For small constant inputs, inline the computation. This allows the - * compiler to unroll loops. + * For small inputs, inline the computation to avoid the runtime + * check. This also allows the compiler to unroll loops for constant + * input. */ return pg_comp_crc32c_sse42_inline(crc, data, len); } else - return pg_comp_crc32c_sse42(crc, data, len); + /* For larger inputs, use a runtime check for PCLMUL instructions. */ + return pg_comp_crc32c(crc, data, len); } #elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) diff --git a/src/port/meson.build b/src/port/meson.build index 7fcfa728d43..8d70a4d510e 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -83,6 +83,7 @@ replace_funcs_pos = [ # x86/x64 ['pg_crc32c_sse42', 'USE_SSE42_CRC32C'], ['pg_crc32c_sse42', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], + ['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C'], ['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], ['pg_crc32c_sb8', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'], diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c index abea0f90eb3..89a48c76894 100644 --- a/src/port/pg_crc32c_sse42_choose.c +++ b/src/port/pg_crc32c_sse42_choose.c @@ -55,8 +55,10 @@ pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) pg_comp_crc32c = pg_comp_crc32c_pclmul; #endif } +#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK else pg_comp_crc32c = pg_comp_crc32c_sb8; +#endif return pg_comp_crc32c(crc, data, len); } -- 2.48.1