From 1e6d198796e5aac482eac1f9b16a5b12f550bfcd Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Thu, 25 Mar 2021 13:54:32 -0700 Subject: [PATCH v7 4/4] Skip index vacuuming in some cases. Skip index vacuuming in two cases: The case where there are so few dead tuples that index vacuuming seems unnecessary, and the case where the relfrozenxid of the table being vacuumed is dangerously far in the past. This commit add new GUC parameters vacuum_skip_index_age and vacuum_multixact_skip_index_age that specify age at which VACUUM should skip index cleanup to hurry finishing in order to advance relfrozenxid/relminmxid. After each index vacuuming (in non-parallel vacuum case), we check if the table's relfrozenxid/relminmxid are too old comparing those new GUC parameters. If so, we skip further index vacuuming within the vacuum operation. This behavior is intended to deal with the risk of XID wraparound, the default values are much higher, 1.8 billion. Although users can set those parameters, VACUUM will silently adjust the effective value more than 105% of autovacuum_freeze_max_age/autovacuum_multixact_freeze_max_age, so that only anti-wraparound autovacuuma and aggressive scan have a change to skip index vacuuming. --- src/include/commands/vacuum.h | 4 + src/backend/access/heap/vacuumlazy.c | 244 ++++++++++++++++-- src/backend/commands/vacuum.c | 61 +++++ src/backend/utils/misc/guc.c | 25 +- src/backend/utils/misc/postgresql.conf.sample | 2 + doc/src/sgml/config.sgml | 51 ++++ doc/src/sgml/maintenance.sgml | 10 +- 7 files changed, 377 insertions(+), 20 deletions(-) diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index d029da5ac0..d3d44d9bac 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -235,6 +235,8 @@ extern int vacuum_freeze_min_age; extern int vacuum_freeze_table_age; extern int vacuum_multixact_freeze_min_age; extern int vacuum_multixact_freeze_table_age; +extern int vacuum_skip_index_age; +extern int vacuum_multixact_skip_index_age; /* Variables for cost-based parallel vacuum */ extern pg_atomic_uint32 *VacuumSharedCostBalance; @@ -270,6 +272,8 @@ extern void vacuum_set_xid_limits(Relation rel, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit); +extern bool vacuum_xid_limit_emergency(TransactionId relfrozenxid, + MultiXactId relminmxid); extern void vac_update_datfrozenxid(void); extern void vacuum_delay_point(void); extern bool vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 74ec751466..eb99238297 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -131,6 +131,12 @@ */ #define PREFETCH_SIZE ((BlockNumber) 32) +/* + * The threshold of the percentage of heap blocks having LP_DEAD line pointer + * above which index vacuuming goes ahead. + */ +#define SKIP_VACUUM_PAGES_RATIO 0.01 + /* * DSM keys for parallel vacuum. Unlike other parallel execution code, since * we don't need to worry about DSM keys conflicting with plan_node_id we can @@ -403,9 +409,11 @@ static void lazy_prune_page_items(LVRelState *vacrel, Buffer buf, LVTempCounters *scancounts, LVPagePruneState *pageprunestate, LVPageVisMapState *pagevmstate); -static void lazy_vacuum_all_pruned_items(LVRelState *vacrel); +static void lazy_vacuum_all_pruned_items(LVRelState *vacrel, + BlockNumber has_dead_items_pages, + bool onecall); static void lazy_vacuum_heap(LVRelState *vacrel); -static void lazy_vacuum_all_indexes(LVRelState *vacrel); +static bool lazy_vacuum_all_indexes(LVRelState *vacrel); static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, @@ -860,7 +868,8 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) next_fsm_block_to_vacuum; PGRUsage ru0; Buffer vmbuffer = InvalidBuffer; - bool skipping_blocks; + bool skipping_blocks, + have_vacuumed_indexes = false; xl_heap_freeze_tuple *frozen; StringInfoData buf; const int initprog_index[] = { @@ -874,7 +883,8 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) /* Counters of # blocks in onerel: */ BlockNumber empty_pages, - vacuumed_pages; + vacuumed_pages, + has_dead_items_pages; pg_rusage_init(&ru0); @@ -889,7 +899,7 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) vacrel->relnamespace, vacrel->relname))); - empty_pages = vacuumed_pages = 0; + empty_pages = vacuumed_pages = has_dead_items_pages = 0; /* Initialize counters */ scancounts.num_tuples = 0; @@ -1126,8 +1136,16 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) vmbuffer = InvalidBuffer; } + /* + * Definitely won't be skipping index vacuuming due to finding + * very few dead items during this VACUUM operation -- that's only + * something that lazy_vacuum_all_pruned_items() is willing to do + * when it is only called once during the entire VACUUM operation. + */ + have_vacuumed_indexes = true; + /* Remove the collected garbage tuples from table and indexes */ - lazy_vacuum_all_pruned_items(vacrel); + lazy_vacuum_all_pruned_items(vacrel, has_dead_items_pages, false); /* * Vacuum the Free Space Map to make newly-freed space visible on @@ -1265,6 +1283,17 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) lazy_prune_page_items(vacrel, buf, vistest, frozen, &scancounts, &pageprunestate, &pagevmstate); + /* + * Remember the number of pages having at least one LP_DEAD line + * pointer. This could be from this VACUUM, a previous VACUUM, or + * even opportunistic pruning. Note that this is exactly the same + * thing as having items that are stored in dead_tuples space, because + * lazy_prune_page_items() doesn't count anything other than LP_DEAD + * items as dead (as of PostgreSQL 14). + */ + if (pageprunestate.has_dead_items) + has_dead_items_pages++; + /* * Step 7 for block: Set up details for saving free space in FSM at * end of loop. (Also performs extra single pass strategy steps in @@ -1287,7 +1316,8 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) * Note: It's not in fact 100% certain that we really will call * lazy_vacuum_heap() -- lazy_vacuum_all_pruned_items() might opt * to skip index vacuuming (and so must skip heap vacuuming). - * This is deemed okay because it only happens in emergencies. + * This is deemed okay because it only happens in emergencies, or + * when there is very little free space anyway. */ } else @@ -1399,7 +1429,8 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) /* If any tuples need to be deleted, perform final vacuum cycle */ Assert(vacrel->nindexes > 0 || dead_tuples->num_tuples == 0); if (dead_tuples->num_tuples > 0) - lazy_vacuum_all_pruned_items(vacrel); + lazy_vacuum_all_pruned_items(vacrel, has_dead_items_pages, + !have_vacuumed_indexes); /* * Vacuum the remainder of the Free Space Map. We must do this whether or @@ -2059,10 +2090,16 @@ retry: /* * Remove the collected garbage tuples from the table and its indexes. + * + * We may be able to skip index vacuuming (we may even be required to do so by + * reloption) */ static void -lazy_vacuum_all_pruned_items(LVRelState *vacrel) +lazy_vacuum_all_pruned_items(LVRelState *vacrel, + BlockNumber has_dead_items_pages, bool onecall) { + bool applyskipoptimization; + /* Should not end up here with no indexes */ Assert(vacrel->nindexes > 0); Assert(!IsParallelWorker()); @@ -2070,19 +2107,159 @@ lazy_vacuum_all_pruned_items(LVRelState *vacrel) if (!vacrel->do_index_vacuuming) { /* - * Just ignore second or subsequent calls in when INDEX_CLEANUP off - * was specified + * Just ignore second or subsequent calls in emergency cases. This + * includes when INDEX_CLEANUP off was specified. */ Assert(!vacrel->do_index_cleanup); vacrel->dead_tuples->num_tuples = 0; return; } - /* Okay, we're going to do index vacuuming */ - lazy_vacuum_all_indexes(vacrel); + /* + * Consider applying the optimization where we skip index vacuuming to + * save work in indexes that is likely to have little upside. This is + * expected to help in the extreme (though still common) case where + * autovacuum generally only triggers VACUUMs against the table because of + * the need to freeze tuples and/or the need to set visibility map bits. + * The overall effect is that cases where the table is slightly less than + * 100% append-only (where there are some dead tuples, but very few) tend + * to behave almost as if they really were 100% append-only. + * + * Our approach is to skip index vacuuming when there are very few heap + * pages with dead items. Even then, it must be the first and last call + * here for the VACUUM (we never apply the optimization when we're low on + * space for TIDs). This threshold allows us to not give too much weight + * to items that are concentrated in relatively few heap pages. These are + * usually due to correlated non-HOT UPDATEs. + * + * It's important that we avoid putting off a VACUUM that eventually + * dirties index pages more often than would happen if we didn't skip. + * It's also important to avoid allowing relatively many heap pages that + * can never have their visibility map bit set to stay that way + * indefinitely. + * + * In general the criteria that we apply here must not create distinct new + * problems for the logic that schedules autovacuum workers. For example, + * we cannot allow autovacuum_vacuum_insert_scale_factor-driven autovacuum + * workers to do little or no useful work due to misapplication of this + * optimization. While the optimization is expressly designed to avoid + * work that has non-zero value to the system, the value of that work + * should be close to zero. There should be a natural asymmetry between + * the costs and the benefits of skipping. + */ + applyskipoptimization = false; + if (onecall) + { + BlockNumber threshold; - /* Remove tuples from heap */ - lazy_vacuum_heap(vacrel); + Assert(vacrel->num_index_scans == 0); + Assert(vacrel->do_index_vacuuming); + Assert(vacrel->do_index_cleanup); + + threshold = (double) vacrel->rel_pages * SKIP_VACUUM_PAGES_RATIO; + + applyskipoptimization = (has_dead_items_pages < threshold); + } + + if (applyskipoptimization) + { + /* + * skipped index vacuuming due to optimization. Make log report that + * lazy_vacuum_heap would've made. + * + * Don't report tups_vacuumed here because it will be zero here in + * common case where there are no newly pruned LP_DEAD items for this + * VACUUM. This is roughly consistent with lazy_vacuum_heap(), and + * the similar "nindexes == 0" specific ereport() at the end of + * lazy_scan_heap(). + */ + ereport(elevel, + (errmsg("\"%s\": opted to not totally remove %d pruned items in %u pages", + vacrel->relname, vacrel->dead_tuples->num_tuples, + has_dead_items_pages))); + + /* + * Skip index vacuuming, but don't skip index cleanup. + * + * It wouldn't make sense to not do cleanup just because this + * optimization was applied. (As a general rule, the case where there + * are _almost_ zero dead items when vacuuming a large table should + * not behave very differently from the case where there are precisely + * zero dead items.) + */ + vacrel->do_index_vacuuming = false; + } + else if (lazy_vacuum_all_indexes(vacrel)) + { + /* + * We successfully completed a round of index vacuuming. Do related + * heap vacuuming now. + * + * There will be no calls to vacuum_xid_limit_emergency() to check + * for issues with the age of the table's relfrozenxid unless and + * until there is another call here -- heap vacuuming doesn't do that. + * This should be okay, because the cost of a round of heap vacuuming + * is much more linear. Also, it has costs that are unaffected by the + * number of indexes total. + */ + lazy_vacuum_heap(vacrel); + } + else + { + /* + * Emergency case: We attempted index vacuuming, didn't finish + * another round of index vacuuming (or one that reliably deleted + * tuples from all of the table's indexes, at least). This happens + * when the table's relfrozenxid is too far in the past. + * + * From this point on the VACUUM operation will do no further index + * vacuuming or heap vacuuming. It will do any remaining pruning that + * is required, plus other heap-related and relation-level maintenance + * tasks. But that's it. We also disable a cost delay when a delay + * is in effect. + * + * Note that we deliberately don't vary our behavior based on factors + * like whether or not the ongoing VACUUM is aggressive. If it's not + * aggressive we probably won't be able to advance relfrozenxid during + * this VACUUM. If we can't, then an anti-wraparound VACUUM should + * take place immediately after we finish up. We should be able to + * skip all index vacuuming for the later anti-wraparound VACUUM. + * + * This is very much like the "CLEANUP_INDEX = off" case, except we + * determine that index vacuuming will be skipped dynamically. + * Another difference is that we don't warn the user in the + * INDEX_CLEANUP off case, and we don't presume to stop applying a + * cost delay. + */ + Assert(vacrel->do_index_vacuuming); + Assert(vacrel->do_index_cleanup); + + vacrel->do_index_vacuuming = false; + vacrel->do_index_cleanup = false; + ereport(WARNING, + (errmsg("abandoned index vacuuming of table \"%s.%s.%s\" as a fail safe after %d index scans", + get_database_name(MyDatabaseId), + vacrel->relname, + vacrel->relname, + vacrel->num_index_scans), + errdetail("table's relfrozenxid or relminmxid is too far in the past"), + errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n" + "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs."))); + + /* Stop applying cost limits from this point on */ + VacuumCostActive = false; + VacuumCostBalance = 0; + VacuumPageHit = 0; + VacuumPageMiss = 0; + VacuumPageDirty = 0; + + /* + * TODO: + * + * Call lazy_space_free() and arrange to stop even recording TIDs + * (i.e. make lazy_record_dead_tuple() into a no-op) + */ + } /* * Forget the now-vacuumed tuples -- just press on @@ -2099,14 +2276,30 @@ lazy_vacuum_all_pruned_items(LVRelState *vacrel) * rely on conflicts from heap pruning instead (i.e. a heap_page_prune() call * that took place earlier, usually though not always during the ongoing * VACUUM operation). + * + * Returns true in the common case when all indexes were successfully + * vacuumed. Returns false in rare cases where we determined that the ongoing + * VACUUM operation is at risk of taking too long to finish, leading to + * wraparound failure. */ -static void +static bool lazy_vacuum_all_indexes(LVRelState *vacrel) { + bool allindexes = true; + Assert(vacrel->nindexes > 0); + Assert(vacrel->do_index_vacuuming); + Assert(vacrel->do_index_cleanup); Assert(TransactionIdIsNormal(vacrel->relfrozenxid)); Assert(MultiXactIdIsValid(vacrel->relminmxid)); + /* Precheck for XID wraparound emergencies */ + if (vacuum_xid_limit_emergency(vacrel->relfrozenxid, vacrel->relminmxid)) + { + /* Wraparound emergency -- don't even start an index scan */ + return false; + } + /* Report that we are now vacuuming indexes */ pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_INDEX); @@ -2121,18 +2314,35 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat, vacrel->old_live_tuples, vacrel); + + if (vacuum_xid_limit_emergency(vacrel->relfrozenxid, + vacrel->relminmxid)) + { + /* Wraparound emergency -- end current index scan */ + allindexes = false; + break; + } } } else { + /* Note: parallel VACUUM only gets the precheck */ + allindexes = true; + /* Outsource everything to parallel variant */ do_parallel_lazy_vacuum_all_indexes(vacrel); } - /* Increase and report the number of index scans */ + /* + * Increase and report the number of index scans. Note that we include + * the case where we started a round index scanning that we weren't able + * to finish. + */ vacrel->num_index_scans++; pgstat_progress_update_param(PROGRESS_VACUUM_NUM_INDEX_VACUUMS, vacrel->num_index_scans); + + return allindexes; } /* diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index c064352e23..063113cd38 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -62,6 +62,8 @@ int vacuum_freeze_min_age; int vacuum_freeze_table_age; int vacuum_multixact_freeze_min_age; int vacuum_multixact_freeze_table_age; +int vacuum_skip_index_age; +int vacuum_multixact_skip_index_age; /* A few variables that don't seem worth passing around as parameters */ @@ -1134,6 +1136,65 @@ vacuum_set_xid_limits(Relation rel, } } +/* + * vacuum_xid_limit_emergency() -- Handle wraparound emergencies + * + * Input parameters are the target relation's relfrozenxid and relminmxid. + */ +bool +vacuum_xid_limit_emergency(TransactionId relfrozenxid, MultiXactId relminmxid) +{ + TransactionId xid_skip_limit; + MultiXactId multi_skip_limit; + int skip_index_vacuum; + + Assert(TransactionIdIsNormal(relfrozenxid)); + Assert(MultiXactIdIsValid(relminmxid)); + + /* + * Determine the index skipping age to use. In any case not less than + * autovacuum_freeze_max_age * 1.05, so that VACUUM always does an + * aggressive scan. + */ + skip_index_vacuum = Max(vacuum_skip_index_age, autovacuum_freeze_max_age * 1.05); + + xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum; + if (!TransactionIdIsNormal(xid_skip_limit)) + xid_skip_limit = FirstNormalTransactionId; + + if (TransactionIdIsNormal(relfrozenxid) && + TransactionIdPrecedes(relfrozenxid, xid_skip_limit)) + { + /* The table's relfrozenxid is too old */ + return true; + } + + /* + * Similar to above, determine the index skipping age to use for multixact. + * In any case not less than autovacuum_multixact_freeze_max_age * 1.05. + */ + skip_index_vacuum = Max(vacuum_multixact_skip_index_age, + autovacuum_multixact_freeze_max_age * 1.05); + + /* + * Compute the multixact age for which freezing is urgent. This is + * normally autovacuum_multixact_freeze_max_age, but may be less if we are + * short of multixact member space. + */ + multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum; + if (multi_skip_limit < FirstMultiXactId) + multi_skip_limit = FirstMultiXactId; + + if (MultiXactIdIsValid(relminmxid) && + MultiXactIdPrecedes(relminmxid, multi_skip_limit)) + { + /* The table's relminmxid is too old */ + return true; + } + + return false; +} + /* * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples * diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 0c5dc4d3e8..24fb736a72 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2622,6 +2622,26 @@ static struct config_int ConfigureNamesInt[] = 0, 0, 1000000, /* see ComputeXidHorizons */ NULL, NULL, NULL }, + { + {"vacuum_skip_index_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Age at which VACUUM should skip index vacuuming."), + NULL + }, + &vacuum_skip_index_age, + /* This upper-limit can be 1.05 of autovacuum_freeze_max_age */ + 1800000000, 0, 2100000000, + NULL, NULL, NULL + }, + { + {"vacuum_multixact_skip_index_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Multixact age at which VACUUM should skip index vacuuming."), + NULL + }, + &vacuum_multixact_skip_index_age, + /* This upper-limit can be 1.05 of autovacuum_multixact_freeze_max_age */ + 1800000000, 0, 2100000000, + NULL, NULL, NULL + }, /* * See also CheckRequiredParameterValues() if this parameter changes @@ -3222,7 +3242,10 @@ static struct config_int ConfigureNamesInt[] = NULL }, &autovacuum_freeze_max_age, - /* see pg_resetwal if you change the upper-limit value */ + /* + * see pg_resetwal and vacuum_skip_index_age if you change the + * upper-limit value. + */ 200000000, 100000, 2000000000, NULL, NULL, NULL }, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index b234a6bfe6..7d6564e17f 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -673,6 +673,8 @@ #vacuum_freeze_table_age = 150000000 #vacuum_multixact_freeze_min_age = 5000000 #vacuum_multixact_freeze_table_age = 150000000 +#vacuum_skip_index_age = 1800000000 +#vacuum_multixact_skip_index_age = 1800000000 #bytea_output = 'hex' # hex, escape #xmlbinary = 'base64' #xmloption = 'content' diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index ddc6d789d8..9a21e4a402 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8528,6 +8528,31 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + vacuum_skip_index_age (integer) + + vacuum_skip_index_age configuration parameter + + + + + VACUUM skips index cleanup if the table's + pg_class.relfrozenxid field has reached + the age specified by this setting. A VACUUM with skipping + index cleanup hurries finishing VACUUM to advance + pg_class.relfrozenxid + as quickly as possible. This is an equivalent behavior to setting + OFF to INDEX_CLEANUP option except that + this parameters skips index cleanup even in the middle of vacuum operation. + The default is 1.8 billion transactions. Although users can set this value + anywhere from zero to 2.1 billion, VACUUM will silently + adjust the effective value more than 105% of + , so that only anti-wraparound + autovacuums and aggressive scans have a chance to skip index cleanup. + + + + vacuum_multixact_freeze_table_age (integer) @@ -8574,6 +8599,32 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + vacuum_multixact_skip_index_age (integer) + + vacuum_multixact_skip_index_age configuration parameter + + + + + VACUUM skips index cleanup if the table's + pg_class.relminmxid field has reached + the age specified by this setting. A VACUUM with skipping + index cleanup hurries finishing VACUUM to advance + pg_class.relminmxid + as quickly as possible. This is an equivalent behavior to setting + OFF to INDEX_CLEANUP option except that + this parameters skips index cleanup even in the middle of vacuum operation. + The default is 1.8 billion multixacts. Although users can set this value + anywhere from zero to 2.1 billion, VACUUM will silently + adjust the effective value more than 105% of + , so that only + anti-wraparound autovacuums and aggressive scans have a chance to skip + index cleanup. + + + + bytea_output (enum) diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml index 4d8ad754f8..4d3674c1b4 100644 --- a/doc/src/sgml/maintenance.sgml +++ b/doc/src/sgml/maintenance.sgml @@ -607,8 +607,14 @@ SELECT datname, age(datfrozenxid) FROM pg_database; If for some reason autovacuum fails to clear old XIDs from a table, the - system will begin to emit warning messages like this when the database's - oldest XIDs reach forty million transactions from the wraparound point: + system will begin to skip index cleanup to hurry finishing vacuum + operation. controls when + VACUUM and autovacuum do that. + + + + The system emits warning messages like this when the database's + oldest XIDs reach forty million transactions from the wraparound point: WARNING: database "mydb" must be vacuumed within 39985967 transactions -- 2.27.0