From 46a7801b1985a81bb8bc35fcfb2cbb74e6ea5545 Mon Sep 17 00:00:00 2001 From: Jakub Wartak Date: Fri, 21 Feb 2025 10:19:35 +0100 Subject: [PATCH v19 1/8] Add support for basic NUMA awareness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add basic NUMA awareness routines, using a minimal src/port/pg_numa.c portability wrapper and an optional build dependency, enabled by --with-libnuma configure option. For now this is Linux-only, other platforms may be supported later. A built-in SQL function pg_numa_available() allows checking NUMA support, i.e. that the server was built/linked with NUMA library. The libnuma library is not available on 32-bit builds (there's no shared object for i386), so we disable it in that case. The i386 is very memory limited anyway, even with PAE, so NUMA is mostly irrelevant. Author: Jakub Wartak Co-authored-by: Bertrand Drouvot Reviewed-by: Andres Freund Reviewed-by: Álvaro Herrera Reviewed-by: Tomas Vondra Discussion: https://postgr.es/m/CAKZiRmxh6KWo0aqRqvmcoaX2jUxZYb4kGp3N%3Dq1w%2BDiH-696Xw%40mail.gmail.com --- .cirrus.tasks.yml | 2 + configure | 187 ++++++++++++++++++++++++++++ configure.ac | 14 +++ doc/src/sgml/func.sgml | 13 ++ doc/src/sgml/installation.sgml | 21 ++++ meson.build | 23 ++++ meson_options.txt | 3 + src/Makefile.global.in | 6 +- src/backend/utils/misc/guc_tables.c | 2 +- src/include/catalog/pg_proc.dat | 4 + src/include/pg_config.h.in | 3 + src/include/port/pg_numa.h | 41 ++++++ src/include/storage/pg_shmem.h | 1 + src/makefiles/meson.build | 3 + src/port/Makefile | 1 + src/port/meson.build | 1 + src/port/pg_numa.c | 110 ++++++++++++++++ 17 files changed, 433 insertions(+), 2 deletions(-) create mode 100644 src/include/port/pg_numa.h create mode 100644 src/port/pg_numa.c diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml index 86a1fa9bbdb..6f4f5c674a1 100644 --- a/.cirrus.tasks.yml +++ b/.cirrus.tasks.yml @@ -471,6 +471,7 @@ task: --enable-cassert --enable-injection-points --enable-debug \ --enable-tap-tests --enable-nls \ --with-segsize-blocks=6 \ + --with-libnuma \ --with-liburing \ \ ${LINUX_CONFIGURE_FEATURES} \ @@ -523,6 +524,7 @@ task: -Dllvm=disabled \ --pkg-config-path /usr/lib/i386-linux-gnu/pkgconfig/ \ -DPERL=perl5.36-i386-linux-gnu \ + -Dlibnuma=disabled \ build-32 EOF diff --git a/configure b/configure index 30d949c3c46..bc195975c2e 100755 --- a/configure +++ b/configure @@ -708,6 +708,9 @@ XML2_LIBS XML2_CFLAGS XML2_CONFIG with_libxml +LIBNUMA_LIBS +LIBNUMA_CFLAGS +with_libnuma LIBCURL_LIBS LIBCURL_CFLAGS with_libcurl @@ -872,6 +875,7 @@ with_liburing with_uuid with_ossp_uuid with_libcurl +with_libnuma with_libxml with_libxslt with_system_tzdata @@ -906,6 +910,8 @@ LIBURING_CFLAGS LIBURING_LIBS LIBCURL_CFLAGS LIBCURL_LIBS +LIBNUMA_CFLAGS +LIBNUMA_LIBS XML2_CONFIG XML2_CFLAGS XML2_LIBS @@ -1588,6 +1594,7 @@ Optional Packages: --with-uuid=LIB build contrib/uuid-ossp using LIB (bsd,e2fs,ossp) --with-ossp-uuid obsolete spelling of --with-uuid=ossp --with-libcurl build with libcurl support + --with-libnuma build with libnuma for NUMA awareness --with-libxml build with XML support --with-libxslt use XSLT support when building contrib/xml2 --with-system-tzdata=DIR @@ -1629,6 +1636,10 @@ Some influential environment variables: C compiler flags for LIBCURL, overriding pkg-config LIBCURL_LIBS linker flags for LIBCURL, overriding pkg-config + LIBNUMA_CFLAGS + C compiler flags for LIBNUMA, overriding pkg-config + LIBNUMA_LIBS + linker flags for LIBNUMA, overriding pkg-config XML2_CONFIG path to xml2-config utility XML2_CFLAGS C compiler flags for XML2, overriding pkg-config XML2_LIBS linker flags for XML2, overriding pkg-config @@ -9063,6 +9074,182 @@ $as_echo "$as_me: WARNING: *** OAuth support tests require --with-python to run" fi +# +# libnuma +# +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build with libnuma support" >&5 +$as_echo_n "checking whether to build with libnuma support... " >&6; } + + + +# Check whether --with-libnuma was given. +if test "${with_libnuma+set}" = set; then : + withval=$with_libnuma; + case $withval in + yes) + +$as_echo "#define USE_LIBNUMA 1" >>confdefs.h + + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-libnuma option" "$LINENO" 5 + ;; + esac + +else + with_libnuma=no + +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_libnuma" >&5 +$as_echo "$with_libnuma" >&6; } + + +if test "$with_libnuma" = yes ; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for numa_available in -lnuma" >&5 +$as_echo_n "checking for numa_available in -lnuma... " >&6; } +if ${ac_cv_lib_numa_numa_available+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lnuma $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char numa_available (); +int +main () +{ +return numa_available (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_numa_numa_available=yes +else + ac_cv_lib_numa_numa_available=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_numa_numa_available" >&5 +$as_echo "$ac_cv_lib_numa_numa_available" >&6; } +if test "x$ac_cv_lib_numa_numa_available" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBNUMA 1 +_ACEOF + + LIBS="-lnuma $LIBS" + +else + as_fn_error $? "library 'libnuma' is required for NUMA awareness" "$LINENO" 5 +fi + + +pkg_failed=no +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for numa" >&5 +$as_echo_n "checking for numa... " >&6; } + +if test -n "$LIBNUMA_CFLAGS"; then + pkg_cv_LIBNUMA_CFLAGS="$LIBNUMA_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"numa\""; } >&5 + ($PKG_CONFIG --exists --print-errors "numa") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_LIBNUMA_CFLAGS=`$PKG_CONFIG --cflags "numa" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$LIBNUMA_LIBS"; then + pkg_cv_LIBNUMA_LIBS="$LIBNUMA_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"numa\""; } >&5 + ($PKG_CONFIG --exists --print-errors "numa") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_LIBNUMA_LIBS=`$PKG_CONFIG --libs "numa" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + LIBNUMA_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "numa" 2>&1` + else + LIBNUMA_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "numa" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$LIBNUMA_PKG_ERRORS" >&5 + + as_fn_error $? "Package requirements (numa) were not met: + +$LIBNUMA_PKG_ERRORS + +Consider adjusting the PKG_CONFIG_PATH environment variable if you +installed software in a non-standard prefix. + +Alternatively, you may set the environment variables LIBNUMA_CFLAGS +and LIBNUMA_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details." "$LINENO" 5 +elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it +is in your PATH or set the PKG_CONFIG environment variable to the full +path to pkg-config. + +Alternatively, you may set the environment variables LIBNUMA_CFLAGS +and LIBNUMA_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details. + +To get pkg-config, see . +See \`config.log' for more details" "$LINENO" 5; } +else + LIBNUMA_CFLAGS=$pkg_cv_LIBNUMA_CFLAGS + LIBNUMA_LIBS=$pkg_cv_LIBNUMA_LIBS + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +fi +fi + # # XML # diff --git a/configure.ac b/configure.ac index 25cdfcf65af..064dfee5ad0 100644 --- a/configure.ac +++ b/configure.ac @@ -1053,6 +1053,20 @@ if test "$with_libcurl" = yes ; then fi +# +# libnuma +# +AC_MSG_CHECKING([whether to build with libnuma support]) +PGAC_ARG_BOOL(with, libnuma, no, [build with libnuma for NUMA awareness], + [AC_DEFINE([USE_LIBNUMA], 1, [Define to build with NUMA awareness support. (--with-libnuma)])]) +AC_MSG_RESULT([$with_libnuma]) +AC_SUBST(with_libnuma) + +if test "$with_libnuma" = yes ; then + AC_CHECK_LIB(numa, numa_available, [], [AC_MSG_ERROR([library 'libnuma' is required for NUMA awareness])]) + PKG_CHECK_MODULES(LIBNUMA, numa) +fi + # # XML # diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 5bf6656deca..1f98826d16d 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -25138,6 +25138,19 @@ SELECT * FROM pg_ls_dir('.') WITH ORDINALITY AS t(ls,n); + + + + pg_numa_available + + pg_numa_available () + boolean + + + Returns true if the server has been compiled with NUMA support. + + + diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml index cc28f041330..5f0486bb335 100644 --- a/doc/src/sgml/installation.sgml +++ b/doc/src/sgml/installation.sgml @@ -1156,6 +1156,16 @@ build-postgresql: + + + + + Build with libnuma support for basic NUMA support. + Only supported on platforms for which the libnuma library is implemented. + + + + @@ -2645,6 +2655,17 @@ ninja install + + + + + Build with libnuma support for basic NUMA support. + Only supported on platforms for which the libnuma library is implemented. + The default for this option is auto. + + + + diff --git a/meson.build b/meson.build index b8da4966297..f509370ee42 100644 --- a/meson.build +++ b/meson.build @@ -943,6 +943,27 @@ else endif +############################################################### +# Library: libnuma +############################################################### + +libnumaopt = get_option('libnuma') +if not libnumaopt.disabled() + # via pkg-config + libnuma = dependency('numa', required: libnumaopt) + if not libnuma.found() + libnuma = cc.find_library('numa', required: libnumaopt) + endif + if not cc.has_header('numa.h', dependencies: libnuma, required: libnumaopt) + libnuma = not_found_dep + endif + if libnuma.found() + cdata.set('USE_LIBNUMA', 1) + endif +else + libnuma = not_found_dep +endif + ############################################################### # Library: liburing @@ -3225,6 +3246,7 @@ backend_both_deps += [ icu_i18n, ldap, libintl, + libnuma, liburing, libxml, lz4, @@ -3881,6 +3903,7 @@ if meson.version().version_compare('>=0.57') 'icu': icu, 'ldap': ldap, 'libcurl': libcurl, + 'libnuma': libnuma, 'liburing': liburing, 'libxml': libxml, 'libxslt': libxslt, diff --git a/meson_options.txt b/meson_options.txt index dd7126da3a7..8675e1b5d87 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -106,6 +106,9 @@ option('libcurl', type : 'feature', value: 'auto', option('libedit_preferred', type: 'boolean', value: false, description: 'Prefer BSD Libedit over GNU Readline') +option('libnuma', type: 'feature', value: 'auto', + description: 'NUMA awareness support') + option('liburing', type : 'feature', value: 'auto', description: 'io_uring support, for asynchronous I/O') diff --git a/src/Makefile.global.in b/src/Makefile.global.in index cce29a37ac5..8b61d1ed492 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -196,6 +196,7 @@ with_gssapi = @with_gssapi@ with_krb_srvnam = @with_krb_srvnam@ with_ldap = @with_ldap@ with_libcurl = @with_libcurl@ +with_libnuma = @with_libnuma@ with_liburing = @with_liburing@ with_libxml = @with_libxml@ with_libxslt = @with_libxslt@ @@ -223,6 +224,9 @@ krb_srvtab = @krb_srvtab@ ICU_CFLAGS = @ICU_CFLAGS@ ICU_LIBS = @ICU_LIBS@ +LIBNUMA_CFLAGS = @LIBNUMA_CFLAGS@ +LIBNUMA_LIBS = @LIBNUMA_LIBS@ + LIBURING_CFLAGS = @LIBURING_CFLAGS@ LIBURING_LIBS = @LIBURING_LIBS@ @@ -250,7 +254,7 @@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ PG_SYSROOT = @PG_SYSROOT@ -override CPPFLAGS := $(ICU_CFLAGS) $(LIBURING_CFLAGS) $(CPPFLAGS) +override CPPFLAGS := $(ICU_CFLAGS) $(LIBNUMA_CFLAGS) $(LIBURING_CFLAGS) $(CPPFLAGS) ifdef PGXS override CPPFLAGS := -I$(includedir_server) -I$(includedir_internal) $(CPPFLAGS) diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 4eaeca89f2c..ea8d796e7c4 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -566,7 +566,7 @@ static int ssl_renegotiation_limit; */ int huge_pages = HUGE_PAGES_TRY; int huge_page_size; -static int huge_pages_status = HUGE_PAGES_UNKNOWN; +int huge_pages_status = HUGE_PAGES_UNKNOWN; /* * These variables are all dummies that don't do anything, except in some diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 8b68b16d79d..d532b8c43b9 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -8506,6 +8506,10 @@ proargnames => '{name,off,size,allocated_size}', prosrc => 'pg_get_shmem_allocations' }, +{ oid => '9685', descr => 'Is NUMA compilation available?', + proname => 'pg_numa_available', provolatile => 'v', prorettype => 'bool', + proargtypes => '', prosrc => 'pg_numa_available' }, + # memory context of local backend { oid => '2282', descr => 'information about all memory contexts of local backend', diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 92f0616c400..e67f81da167 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -675,6 +675,9 @@ /* Define to 1 to build with libcurl support. (--with-libcurl) */ #undef USE_LIBCURL +/* Define to 1 to build with NUMA awareness support. (--with-libnuma) */ +#undef USE_LIBNUMA + /* Define to build with io_uring support. (--with-liburing) */ #undef USE_LIBURING diff --git a/src/include/port/pg_numa.h b/src/include/port/pg_numa.h new file mode 100644 index 00000000000..2fa0bc82a90 --- /dev/null +++ b/src/include/port/pg_numa.h @@ -0,0 +1,41 @@ +/*------------------------------------------------------------------------- + * + * pg_numa.h + * Basic NUMA portability routines + * + * + * Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/include/port/pg_numa.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_NUMA_H +#define PG_NUMA_H + +#include "fmgr.h" + +extern PGDLLIMPORT int pg_numa_init(void); +extern PGDLLIMPORT int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status); +extern PGDLLIMPORT int pg_numa_get_max_node(void); +extern PGDLLIMPORT Size pg_numa_get_pagesize(void); +extern PGDLLIMPORT Datum pg_numa_available(PG_FUNCTION_ARGS); + +#ifdef USE_LIBNUMA + +/* + * This is required on Linux, before pg_numa_query_pages() as we + * need to page-fault before move_pages(2) syscall returns valid results. + */ +#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \ + ro_volatile_var = *(uint64 *)ptr + +#else + +#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \ + do {} while(0) + +#endif + +#endif /* PG_NUMA_H */ diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h index b99ebc9e86f..5f7d4b83a60 100644 --- a/src/include/storage/pg_shmem.h +++ b/src/include/storage/pg_shmem.h @@ -45,6 +45,7 @@ typedef struct PGShmemHeader /* standard header for all Postgres shmem */ extern PGDLLIMPORT int shared_memory_type; extern PGDLLIMPORT int huge_pages; extern PGDLLIMPORT int huge_page_size; +extern PGDLLIMPORT int huge_pages_status; /* Possible values for huge_pages and huge_pages_status */ typedef enum diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build index 46d8da070e8..55da678ec27 100644 --- a/src/makefiles/meson.build +++ b/src/makefiles/meson.build @@ -200,6 +200,8 @@ pgxs_empty = [ 'ICU_LIBS', + 'LIBNUMA_CFLAGS', 'LIBNUMA_LIBS', + 'LIBURING_CFLAGS', 'LIBURING_LIBS', ] @@ -232,6 +234,7 @@ pgxs_deps = { 'icu': icu, 'ldap': ldap, 'libcurl': libcurl, + 'libnuma': libnuma, 'liburing': liburing, 'libxml': libxml, 'libxslt': libxslt, diff --git a/src/port/Makefile b/src/port/Makefile index f11896440d5..4274949dfa4 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -45,6 +45,7 @@ OBJS = \ path.o \ pg_bitutils.o \ pg_localeconv_r.o \ + pg_numa.o \ pg_popcount_aarch64.o \ pg_popcount_avx512.o \ pg_strong_random.o \ diff --git a/src/port/meson.build b/src/port/meson.build index cf7f07644b9..3b26c68fda7 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -8,6 +8,7 @@ pgport_sources = [ 'path.c', 'pg_bitutils.c', 'pg_localeconv_r.c', + 'pg_numa.c', 'pg_popcount_aarch64.c', 'pg_popcount_avx512.c', 'pg_strong_random.c', diff --git a/src/port/pg_numa.c b/src/port/pg_numa.c new file mode 100644 index 00000000000..443cd85838a --- /dev/null +++ b/src/port/pg_numa.c @@ -0,0 +1,110 @@ +/*------------------------------------------------------------------------- + * + * pg_numa.c + * Basic NUMA portability routines + * + * + * Copyright (c) 2025, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/port/pg_numa.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include + +#ifdef WIN32 +#include +#endif + +#include "fmgr.h" +#include "miscadmin.h" +#include "port/pg_numa.h" +#include "storage/pg_shmem.h" + +/* + * At this point we provide support only for Linux thanks to libnuma, but in + * future support for other platforms e.g. Win32 or FreeBSD might be possible + * too. For Win32 NUMA APIs see + * https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support + */ +#ifdef USE_LIBNUMA + +#include +#include + +/* libnuma requires initialization as per numa(3) on Linux */ +int +pg_numa_init(void) +{ + int r = numa_available(); + + return r; +} + +int +pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status) +{ + return numa_move_pages(pid, count, pages, NULL, status, 0); +} + +int +pg_numa_get_max_node(void) +{ + return numa_max_node(); +} + +#else + +/* Empty wrappers */ +int +pg_numa_init(void) +{ + /* We state that NUMA is not available */ + return -1; +} + +int +pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status) +{ + return 0; +} + +int +pg_numa_get_max_node(void) +{ + return 0; +} + +#endif + +Datum +pg_numa_available(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(pg_numa_init() != -1); +} + +/* This should be used only after the server is started */ +Size +pg_numa_get_pagesize(void) +{ + Size os_page_size; +#ifdef WIN32 + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + os_page_size = sysinfo.dwPageSize; +#else + os_page_size= sysconf(_SC_PAGESIZE); +#endif + + Assert(IsUnderPostmaster); + Assert(huge_pages_status != HUGE_PAGES_UNKNOWN); + + if (huge_pages_status == HUGE_PAGES_ON) + GetHugePageSize(&os_page_size, NULL); + + return os_page_size; +} -- 2.49.0