From c518f1226f8961fdef88600a6d388674e184cff7 Mon Sep 17 00:00:00 2001 From: Daniil Davidov Date: Fri, 16 May 2025 11:58:40 +0700 Subject: [PATCH v2 1/2] Parallel index autovacuum with bgworkers --- src/backend/access/common/reloptions.c | 11 ++++ src/backend/commands/vacuum.c | 55 +++++++++++++++++++ src/backend/commands/vacuumparallel.c | 46 ++++++++++------ src/backend/postmaster/autovacuum.c | 9 +++ src/backend/postmaster/bgworker.c | 33 ++++++++++- src/backend/utils/init/globals.c | 1 + src/backend/utils/misc/guc_tables.c | 12 ++++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/miscadmin.h | 1 + src/include/utils/guc_hooks.h | 2 + src/include/utils/rel.h | 10 ++++ 11 files changed, 162 insertions(+), 19 deletions(-) diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 46c1dce222d..ccf59208783 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -166,6 +166,15 @@ static relopt_bool boolRelOpts[] = }, true }, + { + { + "parallel_idx_autovac_enabled", + "Allows autovacuum to process indexes of this table in parallel mode", + RELOPT_KIND_HEAP, + ShareUpdateExclusiveLock + }, + false + }, /* list terminator */ {{NULL}} }; @@ -1863,6 +1872,8 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind) {"fillfactor", RELOPT_TYPE_INT, offsetof(StdRdOptions, fillfactor)}, {"autovacuum_enabled", RELOPT_TYPE_BOOL, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, enabled)}, + {"parallel_idx_autovac_enabled", RELOPT_TYPE_BOOL, + offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, parallel_idx_autovac_enabled)}, {"autovacuum_vacuum_threshold", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_threshold)}, {"autovacuum_vacuum_max_threshold", RELOPT_TYPE_INT, diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 33a33bf6b1c..f7667f14147 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -57,9 +57,21 @@ #include "utils/guc.h" #include "utils/guc_hooks.h" #include "utils/memutils.h" +#include "utils/rel.h" #include "utils/snapmgr.h" #include "utils/syscache.h" +/* + * Minimum number of dead tuples required for the table's indexes to be + * processed in parallel during autovacuum. + */ +#define AV_PARALLEL_DEADTUP_THRESHOLD 1024 + +/* + * How many indexes should process each parallel worker during autovacuum. + */ +#define NUM_INDEXES_PER_PARALLEL_WORKER 30 + /* * Minimum interval for cost-based vacuum delay reports from a parallel worker. * This aims to avoid sending too many messages and waking up the leader too @@ -2234,6 +2246,49 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, else toast_relid = InvalidOid; + /* + * If we are running autovacuum - decide whether we need to process indexes + * of table with given oid in parallel. + */ + if (AmAutoVacuumWorkerProcess() && + params->index_cleanup != VACOPTVALUE_DISABLED && + RelationAllowsParallelIdxAutovac(rel)) + { + PgStat_StatTabEntry *tabentry; + + /* fetch the pgstat table entry */ + tabentry = pgstat_fetch_stat_tabentry_ext(rel->rd_rel->relisshared, + rel->rd_id); + if (tabentry && tabentry->dead_tuples >= AV_PARALLEL_DEADTUP_THRESHOLD) + { + List *indexes = RelationGetIndexList(rel); + int num_indexes = list_length(indexes); + + list_free(indexes); + + if (av_reserved_workers_num > 0) + { + /* + * We request at least one parallel worker, if user set + * 'parallel_idx_autovac_enabled' option. The total number of + * additional parallel workers depends on how many indexes the + * table has. For now we assume that each parallel worker should + * process NUM_INDEXES_PER_PARALLEL_WORKER indexes. + */ + params->nworkers = + Min((num_indexes / NUM_INDEXES_PER_PARALLEL_WORKER) + 1, + av_reserved_workers_num); + } + else + ereport(WARNING, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("Cannot launch any supportive workers for parallel index cleanup of rel %s", + RelationGetRelationName(rel)), + errhint("You might need to set parameter \"av_reserved_workers_num\" to a value > 0"))); + + } + } + /* * Switch to the table owner's userid, so that any index functions are run * as that user. Also lock down security-restricted operations and diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c index 2b9d548cdeb..e2b3e5b343c 100644 --- a/src/backend/commands/vacuumparallel.c +++ b/src/backend/commands/vacuumparallel.c @@ -1,15 +1,15 @@ /*------------------------------------------------------------------------- * * vacuumparallel.c - * Support routines for parallel vacuum execution. + * Support routines for parallel [auto]vacuum execution. * * This file contains routines that are intended to support setting up, using, * and tearing down a ParallelVacuumState. * - * In a parallel vacuum, we perform both index bulk deletion and index cleanup - * with parallel worker processes. Individual indexes are processed by one - * vacuum process. ParallelVacuumState contains shared information as well as - * the memory space for storing dead items allocated in the DSA area. We + * In a parallel [auto]vacuum, we perform both index bulk deletion and index + * cleanup with parallel worker processes. Individual indexes are processed by + * one vacuum process. ParallelVacuumState contains shared information as well + * as the memory space for storing dead items allocated in the DSA area. We * launch parallel worker processes at the start of parallel index * bulk-deletion and index cleanup and once all indexes are processed, the * parallel worker processes exit. Each time we process indexes in parallel, @@ -34,6 +34,7 @@ #include "executor/instrument.h" #include "optimizer/paths.h" #include "pgstat.h" +#include "postmaster/autovacuum.h" #include "storage/bufmgr.h" #include "tcop/tcopprot.h" #include "utils/lsyscache.h" @@ -157,7 +158,8 @@ typedef struct PVIndStats } PVIndStats; /* - * Struct for maintaining a parallel vacuum state. typedef appears in vacuum.h. + * Struct for maintaining a parallel [auto]vacuum state. typedef appears in + * vacuum.h. */ struct ParallelVacuumState { @@ -371,10 +373,18 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, shared->relid = RelationGetRelid(rel); shared->elevel = elevel; shared->queryid = pgstat_get_my_query_id(); - shared->maintenance_work_mem_worker = - (nindexes_mwm > 0) ? - maintenance_work_mem / Min(parallel_workers, nindexes_mwm) : - maintenance_work_mem; + + if (AmAutoVacuumWorkerProcess()) + shared->maintenance_work_mem_worker = + (nindexes_mwm > 0) ? + autovacuum_work_mem / Min(parallel_workers, nindexes_mwm) : + autovacuum_work_mem; + else + shared->maintenance_work_mem_worker = + (nindexes_mwm > 0) ? + maintenance_work_mem / Min(parallel_workers, nindexes_mwm) : + maintenance_work_mem; + shared->dead_items_info.max_bytes = vac_work_mem * (size_t) 1024; /* Prepare DSA space for dead items */ @@ -558,7 +568,9 @@ parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested, * We don't allow performing parallel operation in standalone backend or * when parallelism is disabled. */ - if (!IsUnderPostmaster || max_parallel_maintenance_workers == 0) + if (!IsUnderPostmaster || + (av_reserved_workers_num == 0 && AmAutoVacuumWorkerProcess()) || + (max_parallel_maintenance_workers == 0 && !AmAutoVacuumWorkerProcess())) return 0; /* @@ -597,15 +609,17 @@ parallel_vacuum_compute_workers(Relation *indrels, int nindexes, int nrequested, parallel_workers = (nrequested > 0) ? Min(nrequested, nindexes_parallel) : nindexes_parallel; - /* Cap by max_parallel_maintenance_workers */ - parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers); + /* Cap by GUC variable */ + parallel_workers = AmAutoVacuumWorkerProcess() ? + Min(parallel_workers, av_reserved_workers_num) : + Min(parallel_workers, max_parallel_maintenance_workers); return parallel_workers; } /* * Perform index vacuum or index cleanup with parallel workers. This function - * must be used by the parallel vacuum leader process. + * must be used by the parallel [auto]vacuum leader process. */ static void parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scans, @@ -982,8 +996,8 @@ parallel_vacuum_index_is_parallel_safe(Relation indrel, int num_index_scans, /* * Perform work within a launched parallel process. * - * Since parallel vacuum workers perform only index vacuum or index cleanup, - * we don't need to report progress information. + * Since parallel [auto]vacuum workers perform only index vacuum or index + * cleanup, we don't need to report progress information. */ void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 4d4a1a3197e..e7e340c4e7c 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -3406,6 +3406,15 @@ check_autovacuum_work_mem(int *newval, void **extra, GucSource source) return true; } +bool +check_autovacuum_reserved_workers_num(int *newval, void **extra, + GucSource source) +{ + if (*newval > (max_worker_processes - 8)) + return false; + return true; +} + /* * Returns whether there is a free autovacuum worker slot available. */ diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c index 116ddf7b835..cb86db99da9 100644 --- a/src/backend/postmaster/bgworker.c +++ b/src/backend/postmaster/bgworker.c @@ -1046,6 +1046,8 @@ RegisterDynamicBackgroundWorker(BackgroundWorker *worker, BackgroundWorkerHandle **handle) { int slotno; + int from; + int upto; bool success = false; bool parallel; uint64 generation = 0; @@ -1088,10 +1090,23 @@ RegisterDynamicBackgroundWorker(BackgroundWorker *worker, return false; } + /* + * Determine range of workers in pool, that we can use (last + * 'av_reserved_workers_num' is reserved for autovacuum workers). + */ + + from = AmAutoVacuumWorkerProcess() ? + BackgroundWorkerData->total_slots - av_reserved_workers_num : + 0; + + upto = AmAutoVacuumWorkerProcess() ? + BackgroundWorkerData->total_slots : + BackgroundWorkerData->total_slots - av_reserved_workers_num; + /* * Look for an unused slot. If we find one, grab it. */ - for (slotno = 0; slotno < BackgroundWorkerData->total_slots; ++slotno) + for (slotno = from; slotno < upto; ++slotno) { BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno]; @@ -1159,7 +1174,13 @@ GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp) BackgroundWorkerSlot *slot; pid_t pid; - Assert(handle->slot < max_worker_processes); + /* Only autovacuum can use last 'av_reserved_workers_num' workers in pool. */ + if (!AmAutoVacuumWorkerProcess()) + Assert(handle->slot < max_worker_processes - av_reserved_workers_num); + else + Assert(handle->slot < max_worker_processes && + handle->slot >= max_worker_processes - av_reserved_workers_num); + slot = &BackgroundWorkerData->slot[handle->slot]; /* @@ -1298,7 +1319,13 @@ TerminateBackgroundWorker(BackgroundWorkerHandle *handle) BackgroundWorkerSlot *slot; bool signal_postmaster = false; - Assert(handle->slot < max_worker_processes); + /* Only autovacuum can use last 'av_reserved_workers_num' workers in pool. */ + if (!AmAutoVacuumWorkerProcess()) + Assert(handle->slot < max_worker_processes - av_reserved_workers_num); + else + Assert(handle->slot < max_worker_processes && + handle->slot >= max_worker_processes - av_reserved_workers_num); + slot = &BackgroundWorkerData->slot[handle->slot]; /* Set terminate flag in shared memory, unless slot has been reused. */ diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 92b0446b80c..cff13ef6bd7 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -144,6 +144,7 @@ int NBuffers = 16384; int MaxConnections = 100; int max_worker_processes = 8; int max_parallel_workers = 8; +int av_reserved_workers_num = 0; int MaxBackends = 0; /* GUC parameters for vacuum */ diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 2f8cbd86759..90b4e9570cf 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -3604,6 +3604,18 @@ struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"autovacuum_reserved_workers_num", PGC_USERSET, RESOURCES_WORKER_PROCESSES, + gettext_noop("Number of worker processes, reserved for participation in parallel index processing during autovacuum."), + gettext_noop("This parameter is depending on \"max_worker_processes\" (not on \"autovacuum_max_workers\"). " + "*Only* autovacuum workers can use these additional processes. " + "Also, these processes are taken into account in \"max_parallel_workers\"."), + }, + &av_reserved_workers_num, + 0, 0, MAX_BACKENDS, + check_autovacuum_reserved_workers_num, NULL, NULL + }, + { {"max_parallel_maintenance_workers", PGC_USERSET, RESOURCES_WORKER_PROCESSES, gettext_noop("Sets the maximum number of parallel processes per maintenance operation."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 34826d01380..2e38bada2b0 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -223,6 +223,7 @@ #max_parallel_maintenance_workers = 2 # limited by max_parallel_workers #max_parallel_workers = 8 # number of max_worker_processes that # can be used in parallel operations +#autovacuum_reserved_workers_num = 0 # disabled by default and limited by max_parallel_workers #parallel_leader_participation = on diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 1e59a7f910f..992c6b63226 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -177,6 +177,7 @@ extern PGDLLIMPORT int NBuffers; extern PGDLLIMPORT int MaxBackends; extern PGDLLIMPORT int MaxConnections; extern PGDLLIMPORT int max_worker_processes; +extern PGDLLIMPORT int av_reserved_workers_num; extern PGDLLIMPORT int max_parallel_workers; extern PGDLLIMPORT int commit_timestamp_buffers; diff --git a/src/include/utils/guc_hooks.h b/src/include/utils/guc_hooks.h index 799fa7ace68..9913c6e4681 100644 --- a/src/include/utils/guc_hooks.h +++ b/src/include/utils/guc_hooks.h @@ -31,6 +31,8 @@ extern void assign_application_name(const char *newval, void *extra); extern const char *show_archive_command(void); extern bool check_autovacuum_work_mem(int *newval, void **extra, GucSource source); +bool check_autovacuum_reserved_workers_num(int *newval, void **extra, + GucSource source); extern bool check_vacuum_buffer_usage_limit(int *newval, void **extra, GucSource source); extern bool check_backtrace_functions(char **newval, void **extra, diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index b552359915f..55aa5c45be1 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -311,6 +311,7 @@ typedef struct ForeignKeyCacheInfo typedef struct AutoVacOpts { bool enabled; + bool parallel_idx_autovac_enabled; int vacuum_threshold; int vacuum_max_threshold; int vacuum_ins_threshold; @@ -409,6 +410,15 @@ typedef struct StdRdOptions ((relation)->rd_options ? \ ((StdRdOptions *) (relation)->rd_options)->parallel_workers : (defaultpw)) +/* + * RelationAllowsParallelIdxAutovac + * Returns whether the relation's indexes can be processed in parallel + * during autovacuum. Note multiple eval of argument! + */ +#define RelationAllowsParallelIdxAutovac(relation) \ + ((relation)->rd_options ? \ + ((StdRdOptions *) (relation)->rd_options)->autovacuum.parallel_idx_autovac_enabled : false) + /* ViewOptions->check_option values */ typedef enum ViewOptCheckOption { -- 2.43.0