From 7cac8c70d916a63d54fbfbb7d6bb8e9c753e71a1 Mon Sep 17 00:00:00 2001 From: Ronan Dunklau Date: Mon, 20 Dec 2021 13:29:48 +0100 Subject: [PATCH v1 1/2] Add the possibility of tuning malloc options. We provide a malloc implementation dependent version of a MallocAdjustSettings function, which can be used to tune different malloc options. As for now, only a version for glibc's malloc is provided. This implementation tunes M_MMAP_THRESHOLD and M_TRIM_THRESHOLD according to the work_mem value and a new GUC, glibc_malloc_max_trim_threshold. If set to -1 (the default) this new tuning is disabled. If set to something else, we set M_TRIM_THRESHOLD to that value (capped to work_mem) and M_MMAP_THRESHOLD to half that value (capped to work_mem or 32MB). --- src/backend/utils/init/postinit.c | 9 +++ src/backend/utils/misc/guc.c | 37 ++++++++- src/backend/utils/mmgr/Makefile | 1 + src/backend/utils/mmgr/malloc_tuning.c | 106 +++++++++++++++++++++++++ src/include/utils/memutils.h | 10 +++ 5 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 src/backend/utils/mmgr/malloc_tuning.c diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 7292e51f7d..ca170c430e 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -661,6 +661,12 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, InitCatalogCache(); InitPlanCache(); + /* Adjust malloc options if needed. + * This is done here because the implementation can vary depending on the + * type of backend. + */ + MallocAdjustSettings(); + /* Initialize portal manager */ EnablePortalManager(); @@ -1054,6 +1060,9 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, /* Initialize this backend's session state. */ InitializeSession(); + /* Tune malloc options according to what we've read */ + /* MallocTuneHook(); */ + /* report this backend in the PgBackendStatus array */ if (!bootstrap) pgstat_bestart(); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index f9504d3aec..7c0e124529 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -235,6 +235,11 @@ static bool check_recovery_target_lsn(char **newval, void **extra, GucSource sou static void assign_recovery_target_lsn(const char *newval, void *extra); static bool check_primary_slot_name(char **newval, void **extra, GucSource source); static bool check_default_with_oids(bool *newval, void **extra, GucSource source); +static void assign_work_mem(int newval, void* extra); +static void assign_glibc_trim_threshold(int newval, void* extra); + +static void check_reserved_prefixes(const char *varName); +static List *reserved_class_prefix = NIL; /* Private functions in guc-file.l that need to be called from guc.c */ static ConfigVariable *ProcessConfigFileInternal(GucContext context, @@ -2439,7 +2444,7 @@ static struct config_int ConfigureNamesInt[] = }, &work_mem, 4096, 64, MAX_KILOBYTES, - NULL, NULL, NULL + NULL, &assign_work_mem, NULL }, { @@ -2465,6 +2470,21 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"glibc_malloc_max_trim_threshold", PGC_USERSET, RESOURCES_MEM, + gettext_noop("Sets the maximum value for glibc's M_TRIM_THRESHOLD option."), + gettext_noop("This controls how much memory glibc's will not return to the " + "OS once freed. An idle backend can thus consume that much memory " + "even if not in used. The default (-1) value disable static tuning " + "and relies on the default dynamic adjustment"), + GUC_UNIT_KB + }, + &glibc_malloc_max_trim_threshold, + -1, -1, MAX_KILOBYTES, + NULL, &assign_glibc_trim_threshold, NULL + }, + + /* * We use the hopefully-safely-small value of 100kB as the compiled-in * default for max_stack_depth. InitializeGUCOptions will increase it if @@ -12570,6 +12590,7 @@ check_primary_slot_name(char **newval, void **extra, GucSource source) return true; } + static bool check_default_with_oids(bool *newval, void **extra, GucSource source) { @@ -12585,4 +12606,18 @@ check_default_with_oids(bool *newval, void **extra, GucSource source) return true; } +static void +assign_work_mem(int newval, void* extra) +{ + work_mem = newval; + MallocAdjustSettings(); +} + +static void +assign_glibc_trim_threshold(int newval, void* extra) +{ + glibc_malloc_max_trim_threshold = newval; + MallocAdjustSettings(); +} + #include "guc-file.c" diff --git a/src/backend/utils/mmgr/Makefile b/src/backend/utils/mmgr/Makefile index 3b4cfdbd52..ab75a71249 100644 --- a/src/backend/utils/mmgr/Makefile +++ b/src/backend/utils/mmgr/Makefile @@ -17,6 +17,7 @@ OBJS = \ dsa.o \ freepage.o \ generation.o \ + malloc_tuning.o \ mcxt.o \ memdebug.o \ portalmem.o \ diff --git a/src/backend/utils/mmgr/malloc_tuning.c b/src/backend/utils/mmgr/malloc_tuning.c new file mode 100644 index 0000000000..90526d8cc1 --- /dev/null +++ b/src/backend/utils/mmgr/malloc_tuning.c @@ -0,0 +1,106 @@ +#include "postgres.h" + +#include "utils/memutils.h" +#include "miscadmin.h" + + +/* + * Implementation speficic GUCs. Those are defined even if we use another implementation, but will have + * no effect in that case. + */ + +int glibc_malloc_max_trim_threshold; + +/* + * Depending on the malloc implementation used, we may want to + * tune it. + * In this first version, the only tunable library is glibc's malloc + * implementation. + */ +/* GLIBC implementation */ +#if defined(__GLIBC__) +#include +#include +#include + +int previous_mmap_threshold = -1; +int previous_trim_threshold = -1; + +/* For GLIBC's malloc, we want to avoid having too many mmap'd memory regions, + * and also to avoid repeatingly allocating / releasing memory from the system. + * + * The default configuration of malloc adapt's its M_MMAP_THRESHOLD and M_TRIM_THRESHOLD + * dynamically when previoulsy mmaped blocks are freed. + * + * This isn't really sufficient for our use case, as we may end up with a trim + * threshold which repeatedly releases work_mem memory to the system. + * + * Instead of letting malloc dynamically tune itself, for values up to 32MB we + * ensure that work_mem will fit both bellow M_MMAP_THRESHOLD and + * M_TRIM_THRESHOLD. The rationale for this is that once a backend has allocated + * this much memory, it is likely to use it again. + * + * To keep up with malloc's default behaviour, we set M_TRIM_THRESHOLD to + * M_MMAP_THRESHOLD * 2 so that work_mem blocks can avoid being released too + * early. + * + * Newer versions of malloc got rid of the MALLOC_MMAP_THRESHOLD upper limit, + * but we still enforce the values it sets to avoid wasting too much memory if we have a huge + * work_mem which is used only once. + */ + +# if __WORDSIZE == 32 +# define MMAP_THRESHOLD_MAX (512 * 1024) +# else +# define MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long)) +# endif + +void +MallocAdjustSettings() +{ + int uncapped_mmap_threshold, + mmap_threshold, + trim_threshold; + long base_target; + /* If static malloc tuning is disabled, bail out. */ + if (glibc_malloc_max_trim_threshold == -1) + return; + /* We don't want to adjust anything in the postmaster process, as that would + * disable dynamic adjustment for any child process*/ + if ((MyProcPid == PostmasterPid) || + ((MyBackendType != B_BACKEND) && + (MyBackendType != B_BG_WORKER))) + return; + base_target = Min((long) work_mem * 1024, (long) glibc_malloc_max_trim_threshold / 2 * 1024); + /* To account for overhead, add one more memory page to that. */ + base_target += 4096; + uncapped_mmap_threshold = Min(INT_MAX, base_target); + /* Cap mmap_threshold to MMAP_THRESHOLD_MAX */ + mmap_threshold = Min(MMAP_THRESHOLD_MAX, uncapped_mmap_threshold); + /* Set trim treshold to two times the uncapped value */ + trim_threshold = Min(INT_MAX, (long) uncapped_mmap_threshold * 2); + if (mmap_threshold != previous_mmap_threshold) + { + mallopt(M_MMAP_THRESHOLD, mmap_threshold); + previous_mmap_threshold = mmap_threshold; + } + + if (trim_threshold != previous_trim_threshold) + { + mallopt(M_TRIM_THRESHOLD, trim_threshold); + /* If we reduce the trim_threshold, ask malloc to actually trim it. + * This allows us to recover from a bigger work_mem set up once, then + * reset back to a smaller value. + */ + if (trim_threshold < previous_trim_threshold) + malloc_trim(trim_threshold); + previous_trim_threshold = trim_threshold; + } +} + +/* Default no-op implementation for others malloc providers. */ +#else +void MallocAdjustSettings() +{ +} +#endif diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h index ff872274d4..b958926969 100644 --- a/src/include/utils/memutils.h +++ b/src/include/utils/memutils.h @@ -89,6 +89,16 @@ extern void MemoryContextStatsDetail(MemoryContext context, int max_children, extern void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow); +/* Interface to tune underlying malloc implementation in mcxt.c. + * The implementation can only rely on GUCs for now, but it could be profitable + * to collect statistics about individual palloc / pfree cycle to determine the + * optimum size of certain values. + */ +extern void MallocAdjustSettings(void); + +/* Malloc-implementation specific GUCs */ +extern PGDLLIMPORT int glibc_malloc_max_trim_threshold; + #ifdef MEMORY_CONTEXT_CHECKING extern void MemoryContextCheck(MemoryContext context); #endif -- 2.34.1