From f9ae422bec79b965e58382a20da14621c10d2c19 Mon Sep 17 00:00:00 2001 From: Andrew Kim Date: Wed, 5 Nov 2025 00:00:20 -0800 Subject: [PATCH 1/2] Move platform-specific checksum code from header to source Key architectural changes: checksum_impl.h (simplified for external programs): - Contains only basic FNV-1a implementation - No platform-specific code (AVX2, CPUID, intrinsics) - External programs get portable standalone implementation - Uses static functions (no symbol conflicts with checksum.c) checksum.c (full implementation for PostgreSQL): - Includes checksum_impl.h for common definitions - Contains all platform-specific code (AVX2, CPUID detection) - Implements runtime dispatch based on CPU features - Provides public interfaces wrapping the basic implementation Architecture benefits: - Follows PostgreSQL principle: platform code belongs in .c files - No code duplication (checksum.c includes checksum_impl.h) - Clean separation: external programs vs internal optimization - Maintains backward compatibility for external tools Trade-offs: - External programs use basic implementation only (no AVX2) - PostgreSQL internal code gets full optimization --- config/c-compiler.m4 | 26 ++++ configure | 52 ++++++++ configure.ac | 9 ++ meson.build | 30 +++++ src/include/pg_config.h.in | 3 + src/include/port/checksum.h | 6 + src/include/port/checksum_impl.h | 13 +- src/port/checksum.c | 200 ++++++++++++++++++++++++++++++- 8 files changed, 331 insertions(+), 8 deletions(-) diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index 236a59e8536..40927d56e6a 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -581,6 +581,32 @@ fi undefine([Ac_cachevar])dnl ])# PGAC_SSE42_CRC32_INTRINSICS +# PGAC_AVX2_SUPPORT +# --------------------------- +# Check if the compiler supports AVX2 target attribute. +# This is used for optimized checksum calculations with runtime detection. +# +# If AVX2 target attribute is supported, sets pgac_avx2_support. +AC_DEFUN([PGAC_AVX2_SUPPORT], +[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx2_support])])dnl +AC_CACHE_CHECK([for AVX2 target attribute support], [Ac_cachevar], +[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include + #if defined(__has_attribute) && __has_attribute (target) + __attribute__((target("avx2"))) + static int avx2_test(void) + { + return 0; + } + #endif], + [return avx2_test();])], + [Ac_cachevar=yes], + [Ac_cachevar=no])]) +if test x"$Ac_cachevar" = x"yes"; then + pgac_avx2_support=yes +fi +undefine([Ac_cachevar])dnl +])# PGAC_AVX2_SUPPORT + # PGAC_AVX512_PCLMUL_INTRINSICS # --------------------------- # Check if the compiler supports AVX-512 carryless multiplication diff --git a/configure b/configure index f7c24c8f576..08a04619284 100755 --- a/configure +++ b/configure @@ -17552,6 +17552,58 @@ $as_echo "#define HAVE_XSAVE_INTRINSICS 1" >>confdefs.h fi +# Check for AVX2 target and intrinsic support +# +if test x"$host_cpu" = x"x86_64"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5 +$as_echo_n "checking for AVX2 support... " >&6; } +if ${pgac_cv_avx2_support+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + #if defined(__has_attribute) && __has_attribute (target) + __attribute__((target("avx2"))) + #endif + static int avx2_test(void) + { + const char buf[sizeof(__m256i)]; + __m256i accum = _mm256_loadu_si256((const __m256i *) buf); + accum = _mm256_add_epi32(accum, accum); + int result = _mm256_extract_epi32(accum, 0); + return (int) result; + } +int +main () +{ +return avx2_test(); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv_avx2_support=yes +else + pgac_cv_avx2_support=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx2_support" >&5 +$as_echo "$pgac_cv_avx2_support" >&6; } +if test x"$pgac_cv_avx2_support" = x"yes"; then + pgac_avx2_support=yes +fi + + if test x"$pgac_avx2_support" = x"yes"; then + +$as_echo "#define USE_AVX2_WITH_RUNTIME_CHECK 1" >>confdefs.h + + fi +fi + # Check for AVX-512 popcount intrinsics # if test x"$host_cpu" = x"x86_64"; then diff --git a/configure.ac b/configure.ac index 6c802deaacb..00a701db1f2 100644 --- a/configure.ac +++ b/configure.ac @@ -2077,6 +2077,15 @@ else fi fi +# Check for AVX2 target and intrinsic support +# +if test x"$host_cpu" = x"x86_64"; then + PGAC_AVX2_SUPPORT() + if test x"$pgac_avx2_support" = x"yes"; then + AC_DEFINE(USE_AVX2_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX2 instructions with a runtime check.]) + fi +fi + # Check for XSAVE intrinsics # PGAC_XSAVE_INTRINSICS() diff --git a/meson.build b/meson.build index 0f61ff6a700..55b26814efc 100644 --- a/meson.build +++ b/meson.build @@ -2293,6 +2293,36 @@ int main(void) endif +############################################################### +# Check for the availability of AVX2 support +############################################################### + +if host_cpu == 'x86_64' + + prog = ''' +#include +#include +#if defined(__has_attribute) && __has_attribute (target) +__attribute__((target("avx2"))) +#endif +static int avx2_test(void) +{ + return 0; +} + +int main(void) +{ + return avx2_test(); +} +''' + + if cc.links(prog, name: 'AVX2 support', args: test_c_args) + cdata.set('USE_AVX2_WITH_RUNTIME_CHECK', 1) + endif + +endif + + ############################################################### # Check for the availability of AVX-512 popcount intrinsics. ############################################################### diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index f52f14cc566..66556985a63 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -678,6 +678,9 @@ /* Define to 1 to use AVX-512 CRC algorithms with a runtime check. */ #undef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK +/* Define to 1 to use AVX2 instructions with a runtime check. */ +#undef USE_AVX2_WITH_RUNTIME_CHECK + /* Define to 1 to use AVX-512 popcount instructions with a runtime check. */ #undef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK diff --git a/src/include/port/checksum.h b/src/include/port/checksum.h index c2faed83ede..531c94404f8 100644 --- a/src/include/port/checksum.h +++ b/src/include/port/checksum.h @@ -21,4 +21,10 @@ */ extern uint16 pg_checksum_page(char *page, BlockNumber blkno); +/* + * Choose the best available checksum implementation and compute checksum + * for a single block. + */ +extern uint32 pg_checksum_block_choose(const char *data); + #endif /* CHECKSUM_H */ diff --git a/src/include/port/checksum_impl.h b/src/include/port/checksum_impl.h index 00cb0549f24..7993da92325 100644 --- a/src/include/port/checksum_impl.h +++ b/src/include/port/checksum_impl.h @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * checksum_impl.h - * Checksum implementation for data pages. + * Checksum implementation for data pages. * * This file exists for the benefit of external programs that may wish to * check Postgres page checksums. They can #include this to get the code @@ -173,6 +173,8 @@ pg_checksum_block(const PGChecksummablePage *page) return result; } +#ifndef PG_CHECKSUM_INTERNAL + /* * Compute the checksum for a Postgres page. * @@ -182,6 +184,9 @@ pg_checksum_block(const PGChecksummablePage *page) * The checksum includes the block number (to detect the case where a page is * somehow moved to a different location), the page header (excluding the * checksum itself), and the page data. + * + * This function is only defined when included by external programs. + * PostgreSQL internal code uses the optimized version in checksum.c. */ uint16 pg_checksum_page(char *page, BlockNumber blkno) @@ -196,8 +201,8 @@ pg_checksum_page(char *page, BlockNumber blkno) /* * Save pd_checksum and temporarily set it to zero, so that the checksum * calculation isn't affected by the old checksum stored on the page. - * Restore it after, because actually updating the checksum is NOT part of - * the API of this function. + * Restore it after, because actually updating the checksum is NOT part + * of the API of this function. */ save_checksum = cpage->phdr.pd_checksum; cpage->phdr.pd_checksum = 0; @@ -213,3 +218,5 @@ pg_checksum_page(char *page, BlockNumber blkno) */ return (uint16) ((checksum % 65535) + 1); } + +#endif /* !PG_CHECKSUM_INTERNAL */ diff --git a/src/port/checksum.c b/src/port/checksum.c index de61a46231d..1f684d80db9 100644 --- a/src/port/checksum.c +++ b/src/port/checksum.c @@ -1,7 +1,11 @@ /*------------------------------------------------------------------------- * * checksum.c - * Checksum implementation for data pages. + * Checksum implementation for data pages. + * + * This file provides the platform-specific optimizations (AVX2, CPUID) + * and runtime dispatch logic. It includes checksum_impl.h for the + * basic implementation and common definitions. * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -11,12 +15,198 @@ * *------------------------------------------------------------------------- */ + #include "postgres.h" #include "port/checksum.h" +#include "storage/bufpage.h" + +/* Prevent duplicate pg_checksum_page definition from checksum_impl.h */ +#define PG_CHECKSUM_INTERNAL + +/* Include the basic implementation and common definitions */ +#include "port/checksum_impl.h" + +#ifdef USE_AVX2_WITH_RUNTIME_CHECK +#include +#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT) +#include +#endif +#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX) +#include +#endif +#endif + +#ifdef USE_AVX2_WITH_RUNTIME_CHECK + +/* + * Does CPUID say there's support for XSAVE instructions? + */ +static inline bool +xsave_available(void) +{ + unsigned int exx[4] = {0, 0, 0, 0}; + +#if defined(HAVE__GET_CPUID) + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUID) + __cpuid(exx, 1); +#else +#error cpuid instruction not available +#endif + return (exx[2] & (1 << 27)) != 0; /* osxsave */ +} + /* - * The actual code is in port/checksum_impl.h. This is done so that - * external programs can incorporate the checksum code by #include'ing - * that file from the exported Postgres headers. (Compare our CRC code.) + * Does XGETBV say the YMM registers are enabled? + * + * NB: Caller is responsible for verifying that xsave_available() returns true + * before calling this. */ -#include "port/checksum_impl.h" /* IWYU pragma: keep */ +#ifdef HAVE_XSAVE_INTRINSICS +pg_attribute_target("xsave") +#endif +static inline bool +ymm_regs_available(void) +{ +#ifdef HAVE_XSAVE_INTRINSICS + return (_xgetbv(0) & 0x06) == 0x06; +#else + return false; +#endif +} + +/* + * Check for AVX2 support using CPUID detection + */ +static inline bool +avx2_available(void) +{ + unsigned int exx[4] = {0, 0, 0, 0}; + + if (!xsave_available() || !ymm_regs_available()) + return false; + +#if defined(HAVE__GET_CPUID_COUNT) + __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUIDEX) + __cpuidex(exx, 7, 0); +#else +#error cpuid instruction not available +#endif + return (exx[1] & (1 << 5)) != 0; /* avx2 */ +} + +/* + * AVX2-optimized block checksum algorithm. + * Same algorithm as default, but compiled with AVX2 target for auto-vectorization. + */ +pg_attribute_target("avx2") +static uint32 +pg_checksum_block_avx2(const PGChecksummablePage *page) +{ + uint32 sums[N_SUMS]; + uint32 result = 0; + uint32 i, + j; + + /* ensure that the size is compatible with the algorithm */ + Assert(sizeof(PGChecksummablePage) == BLCKSZ); + + /* initialize partial checksums to their corresponding offsets */ + memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets)); + + /* main checksum calculation */ + for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++) + for (j = 0; j < N_SUMS; j++) + CHECKSUM_COMP(sums[j], page->data[i][j]); + + /* finally add in two rounds of zeroes for additional mixing */ + for (i = 0; i < 2; i++) + for (j = 0; j < N_SUMS; j++) + CHECKSUM_COMP(sums[j], 0); + + /* xor fold partial checksums together */ + for (i = 0; i < N_SUMS; i++) + result ^= sums[i]; + + return result; +} +#endif /* USE_AVX2_WITH_RUNTIME_CHECK */ + +/* Function pointer for the checksum implementation */ +static uint32 (*pg_checksum_block_impl) (const PGChecksummablePage *page) = NULL; + +/* + * Initialize the checksum implementation based on available CPU features. + */ +static void +pg_checksum_init(void) +{ + if (pg_checksum_block_impl != NULL) + return; /* already initialized */ + +#ifdef USE_AVX2_WITH_RUNTIME_CHECK + if (avx2_available()) + pg_checksum_block_impl = pg_checksum_block_avx2; + else +#endif + pg_checksum_block_impl = pg_checksum_block; +} + +/* + * Choose the best available checksum implementation. + * This follows the naming pattern of pg_crc32c_choose. + */ +uint32 +pg_checksum_block_choose(const char *data) +{ + const PGChecksummablePage *page = (const PGChecksummablePage *) data; + + /* Initialize the implementation if not done already */ + if (pg_checksum_block_impl == NULL) + pg_checksum_init(); + + return pg_checksum_block_impl(page); +} + +/* + * Compute the checksum for a Postgres page. + * + * The page must be adequately aligned (at least on a 4-byte boundary). + * Beware also that the checksum field of the page is transiently zeroed. + * + * The checksum includes the block number (to detect the case where a page is + * somehow moved to a different location), the page header (excluding the + * checksum itself), and the page data. + */ +uint16 +pg_checksum_page(char *page, BlockNumber blkno) +{ + PGChecksummablePage *cpage = (PGChecksummablePage *) page; + uint16 save_checksum; + uint32 checksum; + + /* We only calculate the checksum for properly-initialized pages */ + Assert(!PageIsNew((Page) page)); + + /* + * Save pd_checksum and temporarily set it to zero, so that the checksum + * calculation isn't affected by the old checksum stored on the page. + * Restore it after, because actually updating the checksum is NOT part + * of the API of this function. + */ + save_checksum = cpage->phdr.pd_checksum; + cpage->phdr.pd_checksum = 0; + checksum = pg_checksum_block_choose((const char *) cpage); + cpage->phdr.pd_checksum = save_checksum; + + /* Mix in the block number to detect transposed pages */ + checksum ^= blkno; + + /* + * Reduce to a uint16 (to fit in the pd_checksum field) with an offset of + * one. That avoids checksums of zero, which seems like a good idea. + */ + return (uint16) ((checksum % 65535) + 1); +} -- 2.43.0