From e41d3f45fcd6f639b768c22139006ad11422575f Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Thu, 24 Nov 2022 18:20:36 -0800 Subject: [PATCH v17 1/3] Add eager and lazy freezing strategies to VACUUM. Eager freezing strategy avoids large build-ups of all-visible pages. It makes VACUUM trigger page-level freezing whenever doing so will enable the page to become all-frozen in the visibility map. This is useful for tables that experience continual growth, particularly strict append-only tables such as pgbench's history table. Eager freezing significantly improves performance stability by spreading out the cost of freezing over time, rather than doing most freezing during aggressive VACUUMs. It complements the insert autovacuum mechanism added by commit b07642db. VACUUM determines its freezing strategy based on the value of the new vacuum_freeze_strategy_threshold GUC (or reloption) with logged tables; tables that exceed the size threshold use the eager freezing strategy. Unlogged tables and temp tables will always use eager freezing strategy, since there is essentially no downside. Our policy for non-permanent relations results in no extra WAL writes, and no extra dirtying of pages (freezing doesn't need to be WAL-logged here, plus eager freezing can only affect pages that already need to have PD_ALL_VISIBLE set). VACUUM uses lazy freezing strategy for logged tables that fall under the GUC size threshold. Page-level freezing triggers based on the criteria established in commit 1de58df4, which added basic page-level freezing. Note that even lazy freezing strategy will trigger freezing whenever a page happens to have required that an FPI be written during pruning. Eager freezing is strictly more aggressive than lazy freezing. Settings like vacuum_freeze_min_age still get applied in just the same way in every VACUUM, independent of the strategy in use. The only mechanical difference between eager and lazy freezing strategies is that only the former applies its own additional criteria to trigger freezing pages. The vacuum_freeze_strategy_threshold default is 4096 megabytes (4 GiB). This relatively low default setting prioritizes performance stability. It will be reviewed at the end of the Postgres 16 beta period. Author: Peter Geoghegan Reviewed-By: Jeff Davis Reviewed-By: Andres Freund Reviewed-By: Matthias van de Meent Discussion: https://postgr.es/m/CAH2-WzkFok_6EAHuK39GaW4FjEFQsY=3J0AAd6FXk93u-Xq3Fg@mail.gmail.com --- src/include/commands/vacuum.h | 12 +++++ src/include/utils/rel.h | 1 + src/backend/access/common/reloptions.c | 12 +++++ src/backend/access/heap/heapam.c | 1 + src/backend/access/heap/vacuumlazy.c | 43 +++++++++++++++- src/backend/commands/vacuum.c | 25 +++++++++- src/backend/postmaster/autovacuum.c | 10 ++++ src/backend/utils/misc/guc_tables.c | 14 ++++++ src/backend/utils/misc/postgresql.conf.sample | 1 + doc/src/sgml/config.sgml | 19 ++++++- doc/src/sgml/maintenance.sgml | 50 +++++++++++++++---- doc/src/sgml/ref/create_table.sgml | 14 ++++++ 12 files changed, 190 insertions(+), 12 deletions(-) diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 689dbb770..50cc6fce5 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -191,6 +191,9 @@ typedef struct VacAttrStats #define VACOPT_SKIP_DATABASE_STATS 0x100 /* skip vac_update_datfrozenxid() */ #define VACOPT_ONLY_DATABASE_STATS 0x200 /* only vac_update_datfrozenxid() */ +/* Absolute maximum of VacuumParams->freeze_strategy_threshold is 512TB */ +#define MAX_VACUUM_THRESHOLD 536870912 + /* * Values used by index_cleanup and truncate params. * @@ -222,6 +225,9 @@ typedef struct VacuumParams * use default */ int multixact_freeze_table_age; /* multixact age at which to scan * whole table */ + int freeze_strategy_threshold; /* threshold to use eager + * freezing, in megabytes, + * -1 to use default */ bool is_wraparound; /* force a for-wraparound vacuum */ int log_min_duration; /* minimum execution threshold in ms at * which autovacuum is logged, -1 to use @@ -274,6 +280,11 @@ struct VacuumCutoffs */ TransactionId FreezeLimit; MultiXactId MultiXactCutoff; + + /* + * Threshold that triggers VACUUM's eager freezing strategy + */ + BlockNumber freeze_strategy_threshold_pages; }; /* @@ -297,6 +308,7 @@ extern PGDLLIMPORT int vacuum_freeze_min_age; extern PGDLLIMPORT int vacuum_freeze_table_age; extern PGDLLIMPORT int vacuum_multixact_freeze_min_age; extern PGDLLIMPORT int vacuum_multixact_freeze_table_age; +extern PGDLLIMPORT int vacuum_freeze_strategy_threshold; extern PGDLLIMPORT int vacuum_failsafe_age; extern PGDLLIMPORT int vacuum_multixact_failsafe_age; diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index af9785038..39c7ccf0c 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -314,6 +314,7 @@ typedef struct AutoVacOpts int multixact_freeze_min_age; int multixact_freeze_max_age; int multixact_freeze_table_age; + int freeze_strategy_threshold; int log_min_duration; float8 vacuum_cost_delay; float8 vacuum_scale_factor; diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 14c23101a..54ac90ff1 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -27,6 +27,7 @@ #include "catalog/pg_type.h" #include "commands/defrem.h" #include "commands/tablespace.h" +#include "commands/vacuum.h" #include "commands/view.h" #include "nodes/makefuncs.h" #include "postmaster/postmaster.h" @@ -312,6 +313,15 @@ static relopt_int intRelOpts[] = ShareUpdateExclusiveLock }, -1, 0, 2000000000 }, + { + { + "autovacuum_freeze_strategy_threshold", + "Table size at which VACUUM freezes using eager strategy, in megabytes.", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + -1, 0, MAX_VACUUM_THRESHOLD + }, { { "log_autovacuum_min_duration", @@ -1863,6 +1873,8 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind) offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_max_age)}, {"autovacuum_multixact_freeze_table_age", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_table_age)}, + {"autovacuum_freeze_strategy_threshold", RELOPT_TYPE_INT, + offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_strategy_threshold)}, {"log_autovacuum_min_duration", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, log_min_duration)}, {"toast_tuple_target", RELOPT_TYPE_INT, diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 388df94a4..152f6c2d6 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -7056,6 +7056,7 @@ heap_freeze_tuple(HeapTupleHeader tuple, cutoffs.OldestMxact = MultiXactCutoff; cutoffs.FreezeLimit = FreezeLimit; cutoffs.MultiXactCutoff = MultiXactCutoff; + cutoffs.freeze_strategy_threshold_pages = 0; pagefrz.freeze_required = true; pagefrz.FreezePageRelfrozenXid = FreezeLimit; diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 8f14cf85f..03ea36624 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -153,6 +153,8 @@ typedef struct LVRelState bool aggressive; /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */ bool skipwithvm; + /* Eagerly freeze all tuples on pages about to be set all-visible? */ + bool eager_freeze_strategy; /* Wraparound failsafe has been triggered? */ bool failsafe_active; /* Consider index vacuuming bypass optimization? */ @@ -243,6 +245,7 @@ typedef struct LVSavedErrInfo /* non-export function prototypes */ static void lazy_scan_heap(LVRelState *vacrel); +static void lazy_scan_strategy(LVRelState *vacrel); static BlockNumber lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block, bool *next_unskippable_allvis, @@ -472,6 +475,10 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, vacrel->skipwithvm = skipwithvm; + /* + * Now determine VACUUM's freezing strategy. + */ + lazy_scan_strategy(vacrel); if (verbose) { if (vacrel->aggressive) @@ -1267,6 +1274,38 @@ lazy_scan_heap(LVRelState *vacrel) lazy_cleanup_all_indexes(vacrel); } +/* + * lazy_scan_strategy() -- Determine freezing strategy. + * + * Our lazy freezing strategy is useful when putting off the work of freezing + * totally avoids freezing that turns out to have been wasted effort later on. + * Our eager freezing strategy is useful with larger tables that experience + * continual growth, where freezing pages proactively is needed just to avoid + * falling behind on freezing (eagerness is also likely to be cheaper in the + * short/medium term for such tables, but the long term picture matters most). + */ +static void +lazy_scan_strategy(LVRelState *vacrel) +{ + BlockNumber rel_pages = vacrel->rel_pages; + + /* + * Decide freezing strategy. + * + * The eager freezing strategy is used whenever rel_pages exceeds a + * threshold controlled by the freeze_strategy_threshold GUC/reloption. + * + * Also freeze eagerly with an unlogged or temp table, where the total + * cost of freezing pages is mostly just the cycles needed to prepare a + * set of freeze plans. Executing the freeze plans adds very little cost. + * Dirtying extra pages isn't a concern, either; VACUUM will definitely + * set PD_ALL_VISIBLE on affected pages, regardless of freezing strategy. + */ + vacrel->eager_freeze_strategy = + (rel_pages > vacrel->cutoffs.freeze_strategy_threshold_pages || + !RelationIsPermanent(vacrel->rel)); +} + /* * lazy_scan_skip() -- set up range of skippable blocks using visibility map. * @@ -1795,10 +1834,12 @@ retry: * one XID/MXID from before FreezeLimit/MultiXactCutoff is present. Also * freeze when pruning generated an FPI, if doing so means that we set the * page all-frozen afterwards (might not happen until final heap pass). + * When ongoing VACUUM opted to use the eager freezing strategy, we freeze + * any page that will thereby become all-frozen in the visibility map. */ if (pagefrz.freeze_required || tuples_frozen == 0 || (prunestate->all_visible && prunestate->all_frozen && - fpi_before != pgWalUsage.wal_fpi)) + (fpi_before != pgWalUsage.wal_fpi || vacrel->eager_freeze_strategy))) { /* * We're freezing the page. Our final NewRelfrozenXid doesn't need to diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 7b1a4b127..62bb87846 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -68,6 +68,7 @@ int vacuum_freeze_min_age; int vacuum_freeze_table_age; int vacuum_multixact_freeze_min_age; int vacuum_multixact_freeze_table_age; +int vacuum_freeze_strategy_threshold; int vacuum_failsafe_age; int vacuum_multixact_failsafe_age; @@ -264,6 +265,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel) params.freeze_table_age = 0; params.multixact_freeze_min_age = 0; params.multixact_freeze_table_age = 0; + params.freeze_strategy_threshold = 0; } else { @@ -271,6 +273,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel) params.freeze_table_age = -1; params.multixact_freeze_min_age = -1; params.multixact_freeze_table_age = -1; + params.freeze_strategy_threshold = -1; } /* user-invoked vacuum is never "for wraparound" */ @@ -962,7 +965,9 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params, multixact_freeze_min_age, freeze_table_age, multixact_freeze_table_age, - effective_multixact_freeze_max_age; + effective_multixact_freeze_max_age, + freeze_strategy_threshold; + uint64 threshold_strategy_pages; TransactionId nextXID, safeOldestXmin, aggressiveXIDCutoff; @@ -975,6 +980,7 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params, multixact_freeze_min_age = params->multixact_freeze_min_age; freeze_table_age = params->freeze_table_age; multixact_freeze_table_age = params->multixact_freeze_table_age; + freeze_strategy_threshold = params->freeze_strategy_threshold; /* Set pg_class fields in cutoffs */ cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid; @@ -1089,6 +1095,23 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params, if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff)) cutoffs->MultiXactCutoff = cutoffs->OldestMxact; + /* + * Determine the freeze_strategy_threshold to use: as specified by the + * caller, or vacuum_freeze_strategy_threshold + */ + if (freeze_strategy_threshold < 0) + freeze_strategy_threshold = vacuum_freeze_strategy_threshold; + Assert(freeze_strategy_threshold >= 0); + + /* + * Convert MB-based GUC to page-based value used within vacuumlazy.c, + * while being careful to avoid overflow + */ + threshold_strategy_pages = + (uint64) freeze_strategy_threshold * 1024 * 1024 / BLCKSZ; + threshold_strategy_pages = Min(threshold_strategy_pages, MaxBlockNumber); + cutoffs->freeze_strategy_threshold_pages = threshold_strategy_pages; + /* * Finally, figure out if caller needs to do an aggressive VACUUM or not. * diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index f5ea381c5..ecddde3a1 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -151,6 +151,7 @@ static int default_freeze_min_age; static int default_freeze_table_age; static int default_multixact_freeze_min_age; static int default_multixact_freeze_table_age; +static int default_freeze_strategy_threshold; /* Memory context for long-lived data */ static MemoryContext AutovacMemCxt; @@ -2010,6 +2011,7 @@ do_autovacuum(void) default_freeze_table_age = 0; default_multixact_freeze_min_age = 0; default_multixact_freeze_table_age = 0; + default_freeze_strategy_threshold = 0; } else { @@ -2017,6 +2019,7 @@ do_autovacuum(void) default_freeze_table_age = vacuum_freeze_table_age; default_multixact_freeze_min_age = vacuum_multixact_freeze_min_age; default_multixact_freeze_table_age = vacuum_multixact_freeze_table_age; + default_freeze_strategy_threshold = vacuum_freeze_strategy_threshold; } ReleaseSysCache(tuple); @@ -2801,6 +2804,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, int freeze_table_age; int multixact_freeze_min_age; int multixact_freeze_table_age; + int freeze_strategy_threshold; int vac_cost_limit; double vac_cost_delay; int log_min_duration; @@ -2850,6 +2854,11 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, ? avopts->multixact_freeze_table_age : default_multixact_freeze_table_age; + freeze_strategy_threshold = (avopts && + avopts->freeze_strategy_threshold >= 0) + ? avopts->freeze_strategy_threshold + : default_freeze_strategy_threshold; + tab = palloc(sizeof(autovac_table)); tab->at_relid = relid; tab->at_sharedrel = classForm->relisshared; @@ -2877,6 +2886,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, tab->at_params.freeze_table_age = freeze_table_age; tab->at_params.multixact_freeze_min_age = multixact_freeze_min_age; tab->at_params.multixact_freeze_table_age = multixact_freeze_table_age; + tab->at_params.freeze_strategy_threshold = freeze_strategy_threshold; tab->at_params.is_wraparound = wraparound; tab->at_params.log_min_duration = log_min_duration; tab->at_vacuum_cost_limit = vac_cost_limit; diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 4ac808ed2..7a78d98d3 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -2535,6 +2535,20 @@ struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"vacuum_freeze_strategy_threshold", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Table size at which VACUUM freezes using eager strategy, in megabytes."), + gettext_noop("This is applied by comparing it to the size of a table's main fork at " + "the beginning of each VACUUM. Eager freezing strategy is used when size " + "exceeds the threshold or when table is a temporary or unlogged table. " + "Otherwise lazy freezing strategy is used."), + GUC_UNIT_MB + }, + &vacuum_freeze_strategy_threshold, + 4096, 0, MAX_VACUUM_THRESHOLD, + NULL, NULL, NULL + }, + { {"vacuum_defer_cleanup_age", PGC_SIGHUP, REPLICATION_PRIMARY, gettext_noop("Number of transactions by which VACUUM and HOT cleanup should be deferred, if any."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index d06074b86..fda695e75 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -700,6 +700,7 @@ #vacuum_multixact_freeze_table_age = 150000000 #vacuum_multixact_freeze_min_age = 5000000 #vacuum_multixact_failsafe_age = 1600000000 +#vacuum_freeze_strategy_threshold = 4GB #bytea_output = 'hex' # hex, escape #xmlbinary = 'base64' #xmloption = 'content' diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index f985afc00..39480c653 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -9225,6 +9225,21 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + vacuum_freeze_strategy_threshold (integer) + + vacuum_freeze_strategy_threshold configuration parameter + + + + + Specifies the cutoff size (in megabytes) that VACUUM + should use to decide whether to apply its eager freezing strategy. + The default is 4096 megabytes (equivalent to 4GB). + + + + vacuum_freeze_table_age (integer) @@ -9260,7 +9275,9 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; Specifies the cutoff age (in transactions) that VACUUM should use to decide whether to - trigger freezing of pages that have an older XID. + trigger freezing of pages that have an older XID. When VACUUM + uses its eager freezing strategy, freezing a page can also be + triggered when the page contains only all-visible tuples. The default is 50 million transactions. Although users can set this value anywhere from zero to one billion, VACUUM will silently limit the effective value to half diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml index 759ea5ac9..8d762bad2 100644 --- a/doc/src/sgml/maintenance.sgml +++ b/doc/src/sgml/maintenance.sgml @@ -478,15 +478,47 @@ - - controls how old an XID value has to be before rows bearing that XID will be - frozen. Increasing this setting may avoid unnecessary work if the - rows that would otherwise be frozen will soon be modified again, - but decreasing this setting increases - the number of transactions that can elapse before the table must be - vacuumed again. + controls + VACUUM's freezing strategy. The + eager freezing strategy freezes all tuples + on a page when they are considered visible to all current + transactions. The goal is to freeze pages in batch earlier to + spread out the overhead of freezing over time, improving system + level performance stability. The lazy freezing + strategy determines whether each page is to be frozen + largely on the basis of the age of the oldest extant XID on the + page. The goal is to avoid wholly unnecessary freezing. + Increasing + may avoid unnecessary work if the pages that would otherwise be + frozen will soon be modified again, but decreasing this setting + increases the risk of an eventual VACUUM that + must perform an excessive amount of catch up + freezing. + + controls how old an + XID value has to be before pages with rows bearing that XID are + frozen. This setting is an additional trigger criteria for + freezing a page's tuples, used by both freezing strategies. + Unlogged relations always use eager freezing strategy. There is + also an optimization that makes VACUUM trigger + freezing pages whenever a full page image is logged (see ), which aims to avoid another full + page image for the same page later on. + + + + + In PostgreSQL versions before 16, all + freezing was triggered by + vacuum_freeze_min_age. Newer versions trigger + freezing with the goal of finding the most opportune time to + freeze, spreading out the cost over multiple + VACUUM operations. + + + VACUUM uses the visibility map to determine which pages of a table must be scanned. Normally, it @@ -837,8 +869,8 @@ vacuum insert threshold = vacuum base insert threshold + vacuum insert scale fac For tables which receive INSERT operations but no or almost no UPDATE/DELETE operations, it may be beneficial to lower the table's - as this may allow - tuples to be frozen by earlier vacuums. The number of obsolete tuples and + + to allow freezing to take place proactively. The number of obsolete tuples and the number of inserted tuples are obtained from the cumulative statistics system; it is a semi-accurate count updated by each UPDATE, DELETE and INSERT operation. (It is diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index a03dee4af..f97cc7084 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -1682,6 +1682,20 @@ WITH ( MODULUS numeric_literal, REM + + autovacuum_freeze_strategy_threshold, toast.autovacuum_freeze_strategy_threshold (integer) + + autovacuum_freeze_strategy_threshold storage parameter + + + + + Per-table value for + parameter. + + + + autovacuum_freeze_min_age, toast.autovacuum_freeze_min_age (integer) -- 2.39.0