From d091528cd990ea84980441bf82064d5b884c6786 Mon Sep 17 00:00:00 2001 From: Jakub Wartak Date: Fri, 21 Feb 2025 10:19:35 +0100 Subject: [PATCH v16 1/4] Add optional dependency to libnuma (Linux-only) for basic NUMA awareness routines and add minimal src/port/pg_numa.c portability wrapper. Other platforms can be added later. This also adds function pg_numa_available() that can be used to check if the server was linked with NUMA support. libnuma is unavailable on 32-bit builds, so due to lack of i386 shared object, we disable it there (it does not make sense anyway on i386 it is very memory limited platform even with PAE) Author: Jakub Wartak Co-authored-by: Bertrand Drouvot Reviewed-by: Andres Freund Discussion: https://postgr.es/m/CAKZiRmxh6KWo0aqRqvmcoaX2jUxZYb4kGp3N%3Dq1w%2BDiH-696Xw%40mail.gmail.com --- .cirrus.tasks.yml | 12 +- configure | 87 ++++++++++++++ configure.ac | 13 +++ doc/src/sgml/func.sgml | 13 +++ doc/src/sgml/installation.sgml | 21 ++++ meson.build | 23 ++++ meson_options.txt | 3 + src/Makefile.global.in | 1 + src/backend/utils/misc/guc_tables.c | 2 +- src/include/catalog/pg_proc.dat | 4 + src/include/pg_config.h.in | 3 + src/include/port/pg_numa.h | 46 ++++++++ src/include/storage/pg_shmem.h | 1 + src/makefiles/meson.build | 3 + src/port/Makefile | 1 + src/port/meson.build | 1 + src/port/pg_numa.c | 168 ++++++++++++++++++++++++++++ 17 files changed, 397 insertions(+), 5 deletions(-) create mode 100644 src/include/port/pg_numa.h create mode 100644 src/port/pg_numa.c diff --git a/.cirrus.tasks.yml b/.cirrus.tasks.yml index 86a1fa9bbdb..e6963c774aa 100644 --- a/.cirrus.tasks.yml +++ b/.cirrus.tasks.yml @@ -445,8 +445,10 @@ task: EOF setup_additional_packages_script: | - #apt-get update - #DEBIAN_FRONTEND=noninteractive apt-get -y install ... + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get -y install \ + libnuma1 \ + libnuma-dev matrix: # SPECIAL: @@ -471,6 +473,7 @@ task: --enable-cassert --enable-injection-points --enable-debug \ --enable-tap-tests --enable-nls \ --with-segsize-blocks=6 \ + --with-libnuma \ --with-liburing \ \ ${LINUX_CONFIGURE_FEATURES} \ @@ -523,6 +526,7 @@ task: -Dllvm=disabled \ --pkg-config-path /usr/lib/i386-linux-gnu/pkgconfig/ \ -DPERL=perl5.36-i386-linux-gnu \ + -Dlibnuma=disabled \ build-32 EOF @@ -839,8 +843,8 @@ task: folder: $CCACHE_DIR setup_additional_packages_script: | - #apt-get update - #DEBIAN_FRONTEND=noninteractive apt-get -y install ... + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get -y install libnuma1 libnuma-dev ### # Test that code can be built with gcc/clang without warnings diff --git a/configure b/configure index 4dd67a5cc6e..81e43a38331 100755 --- a/configure +++ b/configure @@ -711,6 +711,7 @@ with_libxml LIBCURL_LIBS LIBCURL_CFLAGS with_libcurl +with_libnuma with_uuid LIBURING_LIBS LIBURING_CFLAGS @@ -872,6 +873,7 @@ with_liburing with_uuid with_ossp_uuid with_libcurl +with_libnuma with_libxml with_libxslt with_system_tzdata @@ -1588,6 +1590,7 @@ Optional Packages: --with-uuid=LIB build contrib/uuid-ossp using LIB (bsd,e2fs,ossp) --with-ossp-uuid obsolete spelling of --with-uuid=ossp --with-libcurl build with libcurl support + --with-libnuma build with libnuma support --with-libxml build with XML support --with-libxslt use XSLT support when building contrib/xml2 --with-system-tzdata=DIR @@ -9279,6 +9282,33 @@ fi +# +# NUMA +# + + + +# Check whether --with-libnuma was given. +if test "${with_libnuma+set}" = set; then : + withval=$with_libnuma; + case $withval in + yes) + +$as_echo "#define USE_LIBNUMA 1" >>confdefs.h + + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-libnuma option" "$LINENO" 5 + ;; + esac + +else + with_libnuma=no + +fi @@ -12517,6 +12547,63 @@ fi fi +if test "$with_libnuma" = yes ; then + + ac_fn_c_check_header_mongrel "$LINENO" "numa.h" "ac_cv_header_numa_h" "$ac_includes_default" +if test "x$ac_cv_header_numa_h" = xyes; then : + +else + as_fn_error $? "header file is required for --with-libnuma" "$LINENO" 5 +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for numa_available in -lnuma" >&5 +$as_echo_n "checking for numa_available in -lnuma... " >&6; } +if ${ac_cv_lib_numa_numa_available+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lnuma $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char numa_available (); +int +main () +{ +return numa_available (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_numa_numa_available=yes +else + ac_cv_lib_numa_numa_available=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_numa_numa_available" >&5 +$as_echo "$ac_cv_lib_numa_numa_available" >&6; } +if test "x$ac_cv_lib_numa_numa_available" = xyes; then : + + LIBS="-lnuma $LIBS" + +else + as_fn_error $? "library 'numa' does not provide numa_available" "$LINENO" 5 +fi + +fi + # XXX libcurl must link after libgssapi_krb5 on FreeBSD to avoid segfaults # during gss_acquire_cred(). This is possibly related to Curl's Heimdal # dependency on that platform? diff --git a/configure.ac b/configure.ac index 537e654e7b3..1879baf183a 100644 --- a/configure.ac +++ b/configure.ac @@ -1053,6 +1053,19 @@ if test "$with_libcurl" = yes ; then fi +# +# libnuma +# +AC_MSG_CHECKING([whether to build with libnuma support]) +PGAC_ARG_BOOL(with, libnuma, no, [use libnuma for NUMA awareness], + [AC_DEFINE([USE_LIBNUMA], 1, [Define to build with NUMA awareness support. (--with-libnuma)])]) +AC_MSG_RESULT([$with_libnuma]) +AC_SUBST(with_libnuma) + +if test "$with_libnuma" = yes ; then + AC_CHECK_LIB(numa, numa_available, [], [AC_MSG_ERROR([library 'libnuma' is required for NUMA awareness])]) +fi + # # XML # diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 5bf6656deca..1f98826d16d 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -25138,6 +25138,19 @@ SELECT * FROM pg_ls_dir('.') WITH ORDINALITY AS t(ls,n); + + + + pg_numa_available + + pg_numa_available () + boolean + + + Returns true if the server has been compiled with NUMA support. + + + diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml index cc28f041330..5f0486bb335 100644 --- a/doc/src/sgml/installation.sgml +++ b/doc/src/sgml/installation.sgml @@ -1156,6 +1156,16 @@ build-postgresql: + + + + + Build with libnuma support for basic NUMA support. + Only supported on platforms for which the libnuma library is implemented. + + + + @@ -2645,6 +2655,17 @@ ninja install + + + + + Build with libnuma support for basic NUMA support. + Only supported on platforms for which the libnuma library is implemented. + The default for this option is auto. + + + + diff --git a/meson.build b/meson.build index 187f1787a3c..52c4f3c1022 100644 --- a/meson.build +++ b/meson.build @@ -943,6 +943,27 @@ else endif +############################################################### +# Library: libnuma +############################################################### + +libnumaopt = get_option('libnuma') +if not libnumaopt.disabled() + # via pkg-config + libnuma = dependency('numa', required: libnumaopt) + if not libnuma.found() + libnuma = cc.find_library('numa', required: libnumaopt) + endif + if not cc.has_header('numa.h', dependencies: libnuma, required: libnumaopt) + libnuma = not_found_dep + endif + if libnuma.found() + cdata.set('USE_LIBNUMA', 1) + endif +else + libnuma = not_found_dep +endif + ############################################################### # Library: liburing @@ -3177,6 +3198,7 @@ backend_both_deps += [ icu_i18n, ldap, libintl, + libnuma, liburing, libxml, lz4, @@ -3833,6 +3855,7 @@ if meson.version().version_compare('>=0.57') 'icu': icu, 'ldap': ldap, 'libcurl': libcurl, + 'libnuma': libnuma, 'liburing': liburing, 'libxml': libxml, 'libxslt': libxslt, diff --git a/meson_options.txt b/meson_options.txt index dd7126da3a7..8675e1b5d87 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -106,6 +106,9 @@ option('libcurl', type : 'feature', value: 'auto', option('libedit_preferred', type: 'boolean', value: false, description: 'Prefer BSD Libedit over GNU Readline') +option('libnuma', type: 'feature', value: 'auto', + description: 'NUMA awareness support') + option('liburing', type : 'feature', value: 'auto', description: 'io_uring support, for asynchronous I/O') diff --git a/src/Makefile.global.in b/src/Makefile.global.in index cce29a37ac5..71479ad9018 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -196,6 +196,7 @@ with_gssapi = @with_gssapi@ with_krb_srvnam = @with_krb_srvnam@ with_ldap = @with_ldap@ with_libcurl = @with_libcurl@ +with_libnuma = @with_libnuma@ with_liburing = @with_liburing@ with_libxml = @with_libxml@ with_libxslt = @with_libxslt@ diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 989825d3a9c..a80616d4455 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -566,7 +566,7 @@ static int ssl_renegotiation_limit; */ int huge_pages = HUGE_PAGES_TRY; int huge_page_size; -static int huge_pages_status = HUGE_PAGES_UNKNOWN; +int huge_pages_status = HUGE_PAGES_UNKNOWN; /* * These variables are all dummies that don't do anything, except in some diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 8b68b16d79d..d532b8c43b9 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -8506,6 +8506,10 @@ proargnames => '{name,off,size,allocated_size}', prosrc => 'pg_get_shmem_allocations' }, +{ oid => '9685', descr => 'Is NUMA compilation available?', + proname => 'pg_numa_available', provolatile => 'v', prorettype => 'bool', + proargtypes => '', prosrc => 'pg_numa_available' }, + # memory context of local backend { oid => '2282', descr => 'information about all memory contexts of local backend', diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index c6f055b3905..424d42b14f8 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -675,6 +675,9 @@ /* Define to 1 to build with libcurl support. (--with-libcurl) */ #undef USE_LIBCURL +/* Define to 1 to build with NUMA awareness support. (--with-libnuma) */ +#undef USE_LIBNUMA + /* Define to build with io_uring support. (--with-liburing) */ #undef USE_LIBURING diff --git a/src/include/port/pg_numa.h b/src/include/port/pg_numa.h new file mode 100644 index 00000000000..986152e0942 --- /dev/null +++ b/src/include/port/pg_numa.h @@ -0,0 +1,46 @@ +/*------------------------------------------------------------------------- + * + * pg_numa.h + * Basic NUMA portability routines + * + * + * Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/include/port/pg_numa.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_NUMA_H +#define PG_NUMA_H + +#include "c.h" +#include "postgres.h" +#include "fmgr.h" + +extern PGDLLIMPORT int pg_numa_init(void); +extern PGDLLIMPORT int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status); +extern PGDLLIMPORT int pg_numa_get_max_node(void); +extern PGDLLIMPORT Size pg_numa_get_pagesize(void); +extern PGDLLIMPORT Datum pg_numa_available(PG_FUNCTION_ARGS); + +#ifdef USE_LIBNUMA + +/* + * This is required on Linux, before pg_numa_query_pages() as we + * need to page-fault before move_pages(2) syscall returns valid results. + */ +#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \ + ro_volatile_var = *(uint64 *)ptr + +extern void numa_warn(int num, char *fmt,...) pg_attribute_printf(2, 3); +extern void numa_error(char *where); + +#else + +#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \ + do {} while(0) + +#endif + +#endif /* PG_NUMA_H */ diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h index b99ebc9e86f..5f7d4b83a60 100644 --- a/src/include/storage/pg_shmem.h +++ b/src/include/storage/pg_shmem.h @@ -45,6 +45,7 @@ typedef struct PGShmemHeader /* standard header for all Postgres shmem */ extern PGDLLIMPORT int shared_memory_type; extern PGDLLIMPORT int huge_pages; extern PGDLLIMPORT int huge_page_size; +extern PGDLLIMPORT int huge_pages_status; /* Possible values for huge_pages and huge_pages_status */ typedef enum diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build index 46d8da070e8..55da678ec27 100644 --- a/src/makefiles/meson.build +++ b/src/makefiles/meson.build @@ -200,6 +200,8 @@ pgxs_empty = [ 'ICU_LIBS', + 'LIBNUMA_CFLAGS', 'LIBNUMA_LIBS', + 'LIBURING_CFLAGS', 'LIBURING_LIBS', ] @@ -232,6 +234,7 @@ pgxs_deps = { 'icu': icu, 'ldap': ldap, 'libcurl': libcurl, + 'libnuma': libnuma, 'liburing': liburing, 'libxml': libxml, 'libxslt': libxslt, diff --git a/src/port/Makefile b/src/port/Makefile index 7843d7b67cb..8c8e6b92910 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -45,6 +45,7 @@ OBJS = \ path.o \ pg_bitutils.o \ pg_localeconv_r.o \ + pg_numa.o \ pg_popcount_avx512.o \ pg_strong_random.o \ pgcheckdir.o \ diff --git a/src/port/meson.build b/src/port/meson.build index 653539ba5b3..1eb8e38d047 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -8,6 +8,7 @@ pgport_sources = [ 'path.c', 'pg_bitutils.c', 'pg_localeconv_r.c', + 'pg_numa.c', 'pg_popcount_avx512.c', 'pg_strong_random.c', 'pgcheckdir.c', diff --git a/src/port/pg_numa.c b/src/port/pg_numa.c new file mode 100644 index 00000000000..7d905ef31f5 --- /dev/null +++ b/src/port/pg_numa.c @@ -0,0 +1,168 @@ +/*------------------------------------------------------------------------- + * + * pg_numa.c + * Basic NUMA portability routines + * + * + * Copyright (c) 2025, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/port/pg_numa.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include + +#ifdef WIN32 +#include +#endif + +#include "fmgr.h" +#include "port/pg_numa.h" +#include "storage/pg_shmem.h" + +/* + * At this point we provide support only for Linux thanks to libnuma, but in + * future support for other platforms e.g. Win32 or FreeBSD might be possible + * too. For Win32 NUMA APIs see + * https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support + */ +#ifdef USE_LIBNUMA + +#include +#include + +/* libnuma requires initialization as per numa(3) on Linux */ +int +pg_numa_init(void) +{ + int r = numa_available(); + + return r; +} + +int +pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status) +{ + return numa_move_pages(pid, count, pages, NULL, status, 0); +} + +int +pg_numa_get_max_node(void) +{ + return numa_max_node(); +} + +Size +pg_numa_get_pagesize(void) +{ + Size os_page_size = sysconf(_SC_PAGESIZE); + + if (huge_pages_status == HUGE_PAGES_ON) + GetHugePageSize(&os_page_size, NULL); + + return os_page_size; +} + +#ifndef FRONTEND +/* + * XXX: not really tested as there is no way to trigger this in our + * current usage of libnuma. + * + * The libnuma built-in code can be seen here: + * https://github.com/numactl/numactl/blob/master/libnuma.c + * + */ +void +numa_warn(int num, char *fmt,...) +{ + va_list ap; + int olde = errno; + int needed; + StringInfoData msg; + + initStringInfo(&msg); + + va_start(ap, fmt); + needed = appendStringInfoVA(&msg, fmt, ap); + va_end(ap); + if (needed > 0) + { + enlargeStringInfo(&msg, needed); + va_start(ap, fmt); + appendStringInfoVA(&msg, fmt, ap); + va_end(ap); + } + + ereport(WARNING, + (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), + errmsg_internal("libnuma: WARNING: %s", msg.data))); + + pfree(msg.data); + + errno = olde; +} + +void +numa_error(char *where) +{ + int olde = errno; + + /* + * XXX: for now we issue just WARNING, but long-term that might depend on + * numa_set_strict() here. + */ + elog(WARNING, "libnuma: ERROR: %s", where); + errno = olde; +} +#endif /* FRONTEND */ + +#else + +/* Empty wrappers */ +int +pg_numa_init(void) +{ + /* We state that NUMA is not available */ + return -1; +} + +int +pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status) +{ + return 0; +} + +int +pg_numa_get_max_node(void) +{ + return 0; +} + +Size +pg_numa_get_pagesize(void) +{ +#ifndef WIN32 + Size os_page_size = sysconf(_SC_PAGESIZE); +#else + Size os_page_size; + SYSTEM_INFO sysinfo; + + GetSystemInfo(&sysinfo); + os_page_size = sysinfo.dwPageSize; +#endif + if (huge_pages_status == HUGE_PAGES_ON) + GetHugePageSize(&os_page_size, NULL); + return os_page_size; +} + +#endif + +Datum +pg_numa_available(PG_FUNCTION_ARGS) +{ + PG_RETURN_BOOL(pg_numa_init() != -1); +} -- 2.39.5