From 862d5dfa65ff6dedf417db47e6f6c43e1e3dc0ce Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Mon, 5 Sep 2022 17:46:34 -0700 Subject: [PATCH v11 4/4] Finish removing aggressive mode VACUUM. The concept of aggressive/scan_all VACUUM dates back to the introduction of the visibility map in Postgres 8.4. Although pre-visibility-map VACUUM was far less efficient (especially after 9.6's commit fd31cd26), its naive approach had one notable advantage: users only had to think about a single kind of lazy vacuum (the only kind that existed). Break the final remaining dependency on aggressive mode: replace the rules governing when VACUUM will wait for a cleanup lock with a new set of rules more attuned to the needs of the table. With that last dependency gone, there is no need for aggressive mode, so get rid of it. Users once again only have to think about one kind of lazy vacuum. In general, all of the behaviors associated with aggressive mode prior to Postgres 16 have been retained; they just get applied selectively, on a more dynamic timeline. For example, the aforementioned change to VACUUM's cleanup lock behavior retains the general idea of sometimes waiting for a cleanup lock to make sure that older XIDs get frozen, so that relfrozenxid can be advanced by a sufficient amount. All that really changes is the information driving VACUUM's decision on waiting. We use new, dedicated cutoffs, rather than applying the FreezeLimit and MultiXactCutoff used when deciding whether we should trigger freezing on the basis of XID/MXID age. These minimum fallback cutoffs (which are called MinXid and MinMulti) are typically far older than the standard FreezeLimit/MultiXactCutoff cutoffs. VACUUM doesn't need an aggressive mode to decide on whether to wait for a cleanup lock anymore; it can decide everything at the level of individual heap pages. It is okay to aggressively punt on waiting for a cleanup lock like this because VACUUM now directly understands the importance of never falling too far behind on the work of freezing physical heap pages at the level of the whole table, following recent work to add VM scanning strategies. It is generally safer for VACUUM to press on with freezing other heap pages from the table instead. Even if relfrozenxid can only be advanced by relatively few XIDs as a consequence, VACUUM should have more than ample opportunity to catch up next time, since there is bound to be no more than a small number of problematic unfrozen pages left behind. VACUUM now tends to consistently advance relfrozenxid (at least by some small amount) all the time in larger tables, so all that has to happen for relfrozenxid to fully catch up is for a few remaining unfrozen pages to get frozen. Since relfrozenxid is now considered to be no more than a lagging indicator of freezing, and since relfrozenxid isn't used to trigger freezing in the way that it once was, time is on our side. Also teach VACUUM to wait for a short while for cleanup locks when doing so has a decent chance of preserving its ability to advance relfrozenxid up to FreezeLimit (and/or to advance relminmxid up to MultiXactCutoff). As a result, VACUUM typically manages to advance relfrozenxid by just as much as it would have had it promised to advance it up to FreezeLimit (i.e. had it made the traditional aggressive VACUUM guarantee), even when vacuuming a table that happens to have relatively many cleanup lock conflicts affecting pages with older XIDs/MXIDs. VACUUM thereby avoids missing out on advancing relfrozenxid up to the traditional target amount when it really can be avoided fairly easily, without promising to do so (VACUUM only promises to advance up to MinXid/MinMulti). XXX We also need to avoid the special auto-cancellation behavior for antiwraparound autovacuums to make this truly safe. See also, related patch for this: https://commitfest.postgresql.org/41/4027/ Author: Peter Geoghegan Reviewed-By: Jeff Davis Reviewed-By: Andres Freund Discussion: https://postgr.es/m/CAH2-WzkU42GzrsHhL2BiC1QMhaVGmVdb5HR0_qczz0Gu2aSn=A@mail.gmail.com --- src/include/commands/vacuum.h | 9 +- src/backend/access/heap/vacuumlazy.c | 221 +++--- src/backend/commands/vacuum.c | 42 +- src/backend/utils/activity/pgstat_relation.c | 4 +- doc/src/sgml/config.sgml | 10 +- doc/src/sgml/logicaldecoding.sgml | 2 +- doc/src/sgml/maintenance.sgml | 721 ++++++++---------- doc/src/sgml/ref/create_table.sgml | 2 +- doc/src/sgml/ref/prepare_transaction.sgml | 2 +- doc/src/sgml/ref/vacuum.sgml | 10 +- doc/src/sgml/ref/vacuumdb.sgml | 4 +- doc/src/sgml/xact.sgml | 4 +- .../expected/vacuum-no-cleanup-lock.out | 24 +- .../specs/vacuum-no-cleanup-lock.spec | 30 +- 14 files changed, 555 insertions(+), 530 deletions(-) diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 43e367bcb..b75b813f8 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -276,6 +276,13 @@ struct VacuumCutoffs TransactionId FreezeLimit; MultiXactId MultiXactCutoff; + /* + * Earliest permissible NewRelfrozenXid/NewRelminMxid values that can be + * set in pg_class at the end of VACUUM. + */ + TransactionId MinXid; + MultiXactId MinMulti; + /* * Threshold cutoff point (expressed in # of physical heap rel blocks in * rel's main fork) that triggers VACUUM's eager freezing strategy @@ -348,7 +355,7 @@ extern void vac_update_relstats(Relation relation, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact); -extern bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, +extern void vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs); extern bool vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs); extern void vac_update_datfrozenxid(void); diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 69062d016..732c3d73c 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -157,8 +157,6 @@ typedef struct LVRelState BufferAccessStrategy bstrategy; ParallelVacuumState *pvs; - /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */ - bool aggressive; /* Eagerly freeze all tuples on pages about to be set all-visible? */ bool eager_freeze_strategy; /* Wraparound failsafe has been triggered? */ @@ -262,7 +260,8 @@ static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, LVPagePruneState *prunestate); static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, - bool *hastup, bool *recordfreespace); + bool *hastup, bool *recordfreespace, + Size *freespace); static void lazy_vacuum(LVRelState *vacrel); static bool lazy_vacuum_all_indexes(LVRelState *vacrel); static void lazy_vacuum_heap_rel(LVRelState *vacrel); @@ -459,7 +458,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * future we might want to teach lazy_scan_prune to recompute vistest from * time to time, to increase the number of dead tuples it can prune away.) */ - vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs); + vacuum_get_cutoffs(rel, params, &vacrel->cutoffs); vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel); vacrel->vistest = GlobalVisTestFor(rel); /* Initialize state used to track oldest extant XID/MXID */ @@ -539,17 +538,14 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, /* * Prepare to update rel's pg_class entry. * - * Aggressive VACUUMs must always be able to advance relfrozenxid to a - * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff. - * Non-aggressive VACUUMs may advance them by any amount, or not at all. + * VACUUM can only advance relfrozenxid to a value >= MinXid, and + * relminmxid to a value >= MinMulti. */ Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin || - TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit : - vacrel->cutoffs.relfrozenxid, + TransactionIdPrecedesOrEquals(vacrel->cutoffs.MinXid, vacrel->NewRelfrozenXid)); Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact || - MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff : - vacrel->cutoffs.relminmxid, + MultiXactIdPrecedesOrEquals(vacrel->cutoffs.MinMulti, vacrel->NewRelminMxid)); if (vacrel->vmstrat == VMSNAP_SCAN_LAZY) { @@ -557,7 +553,6 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * Must keep original relfrozenxid/relminmxid when lazy_scan_strategy * decided to skip all-visible pages containing unfrozen XIDs/MXIDs */ - Assert(!vacrel->aggressive); vacrel->NewRelfrozenXid = InvalidTransactionId; vacrel->NewRelminMxid = InvalidMultiXactId; } @@ -626,33 +621,14 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur); memset(&walusage, 0, sizeof(WalUsage)); WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage); - initStringInfo(&buf); + if (verbose) - { - Assert(!params->is_wraparound); msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n"); - } else if (params->is_wraparound) - { - /* - * While it's possible for a VACUUM to be both is_wraparound - * and !aggressive, that's just a corner-case -- is_wraparound - * implies aggressive. Produce distinct output for the corner - * case all the same, just in case. - */ - if (vacrel->aggressive) - msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n"); - else - msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n"); - } + msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n"); else - { - if (vacrel->aggressive) - msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n"); - else - msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"); - } + msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"); appendStringInfo(&buf, msgfmt, get_database_name(MyDatabaseId), vacrel->relnamespace, @@ -948,6 +924,7 @@ lazy_scan_heap(LVRelState *vacrel) { bool hastup, recordfreespace; + Size freespace; LockBuffer(buf, BUFFER_LOCK_SHARE); @@ -961,10 +938,8 @@ lazy_scan_heap(LVRelState *vacrel) /* Collect LP_DEAD items in dead_items array, count tuples */ if (lazy_scan_noprune(vacrel, buf, blkno, page, &hastup, - &recordfreespace)) + &recordfreespace, &freespace)) { - Size freespace = 0; - /* * Processed page successfully (without cleanup lock) -- just * need to perform rel truncation and FSM steps, much like the @@ -973,21 +948,14 @@ lazy_scan_heap(LVRelState *vacrel) */ if (hastup) vacrel->nonempty_pages = blkno + 1; - if (recordfreespace) - freespace = PageGetHeapFreeSpace(page); - UnlockReleaseBuffer(buf); if (recordfreespace) RecordPageWithFreeSpace(vacrel->rel, blkno, freespace); + + /* lock and pin released by lazy_scan_noprune */ continue; } - /* - * lazy_scan_noprune could not do all required processing. Wait - * for a cleanup lock, and call lazy_scan_prune in the usual way. - */ - Assert(vacrel->aggressive); - LockBuffer(buf, BUFFER_LOCK_UNLOCK); - LockBufferForCleanup(buf); + /* cleanup lock acquired by lazy_scan_noprune */ } /* Check for new or empty pages before lazy_scan_prune call */ @@ -1433,8 +1401,6 @@ lazy_scan_strategy(LVRelState *vacrel, bool force_scan_all) if (force_scan_all) vacrel->vmstrat = VMSNAP_SCAN_ALL; - Assert(!vacrel->aggressive || vacrel->vmstrat != VMSNAP_SCAN_LAZY); - /* Inform vmsnap infrastructure of our chosen strategy */ visibilitymap_snap_strategy(vacrel->vmsnap, vacrel->vmstrat); @@ -2011,17 +1977,32 @@ retry: * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't * performed here, it's quite possible that an earlier opportunistic pruning * operation left LP_DEAD items behind. We'll at least collect any such items - * in the dead_items array for removal from indexes. + * in the dead_items array for removal from indexes (assuming caller's page + * can be processed successfully here). * - * For aggressive VACUUM callers, we may return false to indicate that a full - * cleanup lock is required for processing by lazy_scan_prune. This is only - * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from - * one or more tuples on the page. We always return true for non-aggressive - * callers. + * We return true to indicate that processing succeeded, in which case we'll + * have dropped the lock and pin on buf/page. Else returns false, indicating + * that page must be processed by lazy_scan_prune in the usual way after all. + * Acquires a cleanup lock on buf/page for caller before returning false. + * + * We go to considerable trouble to get a cleanup lock on any page that has + * XIDs/MXIDs that need to be frozen in order for VACUUM to be able to set + * relfrozenxid/relminmxid to values >= FreezeLimit/MultiXactCutoff cutoffs. + * But we don't strictly guarantee it; we only guarantee that final values + * will be >= MinXid/MinMulti cutoffs in the worst case. + * + * We prefer to "under promise and over deliver" like this because a strong + * guarantee has the potential to make a bad situation even worse. VACUUM + * should avoid waiting for a cleanup lock for an indefinitely long time until + * it has already exhausted every available alternative. It's quite possible + * (and perhaps even likely) that the problem will go away on its own. But + * even when it doesn't, our approach at least makes it likely that the first + * VACUUM that encounters the issue will catch up on whatever freezing may + * still be required for every other page in the target rel. * * See lazy_scan_prune for an explanation of hastup return flag. * recordfreespace flag instructs caller on whether or not it should do - * generic FSM processing for page. + * generic FSM processing for page, using *freespace value set here. */ static bool lazy_scan_noprune(LVRelState *vacrel, @@ -2029,7 +2010,8 @@ lazy_scan_noprune(LVRelState *vacrel, BlockNumber blkno, Page page, bool *hastup, - bool *recordfreespace) + bool *recordfreespace, + Size *freespace) { OffsetNumber offnum, maxoff; @@ -2037,6 +2019,7 @@ lazy_scan_noprune(LVRelState *vacrel, live_tuples, recently_dead_tuples, missed_dead_tuples; + bool should_freeze = false; HeapTupleHeader tupleheader; TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid; MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid; @@ -2046,6 +2029,7 @@ lazy_scan_noprune(LVRelState *vacrel, *hastup = false; /* for now */ *recordfreespace = false; /* for now */ + *freespace = PageGetHeapFreeSpace(page); lpdead_items = 0; live_tuples = 0; @@ -2087,34 +2071,7 @@ lazy_scan_noprune(LVRelState *vacrel, if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs, &NoFreezePageRelfrozenXid, &NoFreezePageRelminMxid)) - { - /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */ - if (vacrel->aggressive) - { - /* - * Aggressive VACUUMs must always be able to advance rel's - * relfrozenxid to a value >= FreezeLimit (and be able to - * advance rel's relminmxid to a value >= MultiXactCutoff). - * The ongoing aggressive VACUUM won't be able to do that - * unless it can freeze an XID (or MXID) from this tuple now. - * - * The only safe option is to have caller perform processing - * of this page using lazy_scan_prune. Caller might have to - * wait a while for a cleanup lock, but it can't be helped. - */ - vacrel->offnum = InvalidOffsetNumber; - return false; - } - - /* - * Non-aggressive VACUUMs are under no obligation to advance - * relfrozenxid (even by one XID). We can be much laxer here. - * - * Currently we always just accept an older final relfrozenxid - * and/or relminmxid value. We never make caller wait or work a - * little harder, even when it likely makes sense to do so. - */ - } + should_freeze = true; ItemPointerSet(&(tuple.t_self), blkno, offnum); tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); @@ -2163,10 +2120,98 @@ lazy_scan_noprune(LVRelState *vacrel, vacrel->offnum = InvalidOffsetNumber; /* - * By here we know for sure that caller can put off freezing and pruning - * this particular page until the next VACUUM. Remember its details now. - * (lazy_scan_prune expects a clean slate, so we have to do this last.) + * Release lock (but not pin) on page now. Then consider if we should + * back out of accepting reduced processing for this page. + * + * Our caller's initial inability to get a cleanup lock will often turn + * out to have been nothing more than a momentary blip, and it would be a + * shame if relfrozenxid/relminmxid values < FreezeLimit/MultiXactCutoff + * were used without good reason. For example, the checkpointer might + * have been writing out this page a moment ago, in which case its buffer + * pin might have already been released by now. + * + * It's also possible that the conflicting buffer pin will continue to + * block cleanup lock acquisition on the buffer for an extended period. + * For example, it isn't uncommon for heap_lock_tuple to sleep while + * holding a buffer pin, in which case a conflicting pin could easily be + * held for much longer than VACUUM can reasonably be expected to wait. + * There are also truly pathological cases to worry about. For example, + * the case where buggy application code holds open a cursor forever. */ + LockBuffer(buf, BUFFER_LOCK_UNLOCK); + if (should_freeze) + { + /* + * If page has tuple with a dangerously old XID/MXID (an XID < MinXid, + * or an MXID < MinMulti), then we wait for however long it takes to + * get a cleanup lock. + * + * Check for that first (get it out of the way). + */ + if (TransactionIdPrecedes(NoFreezePageRelfrozenXid, + vacrel->cutoffs.MinXid) || + MultiXactIdPrecedes(NoFreezePageRelminMxid, + vacrel->cutoffs.MinMulti)) + { + /* + * MinXid/MinMulti are considered to be only barely adequate final + * values, so we only expect to end up here when previous VACUUMs + * put off processing by lazy_scan_prune in the hope that it would + * never come to this. That hasn't worked out, so we must wait. + */ + LockBufferForCleanup(buf); + return false; + } + + /* + * Page has tuple with XID < FreezeLimit, or MXID < MultiXactCutoff, + * but they're not so old that we're _strictly_ obligated to freeze. + * + * We are willing to go to the trouble of waiting for a cleanup lock + * for a short while for such a page -- just not indefinitely long. + * This avoids squandering opportunities to advance relfrozenxid or + * relminmxid by the target amount during any one VACUUM, which is + * particularly important with larger tables that only get vacuumed + * when autovacuum.c is concerned about table age. It would not be + * okay if the number of autovacuums such a table ended up requiring + * noticeably exceeded the expected autovacuum_freeze_max_age cadence. + * + * We are willing to wait and try again a total of 3 times. If that + * doesn't work then we just give up. We only wait here when it is + * actually expected to preserve current NewRelfrozenXid/NewRelminMxid + * tracker values, and when trackers will actually be used to update + * pg_class later on. This also tends to limit the impact of waiting + * for VACUUMs that experience relatively many cleanup lock conflicts. + */ + if (vacrel->vmstrat != VMSNAP_SCAN_LAZY && + (TransactionIdPrecedes(NoFreezePageRelfrozenXid, + vacrel->NewRelfrozenXid) || + MultiXactIdPrecedes(NoFreezePageRelminMxid, + vacrel->NewRelminMxid))) + { + /* wait 10ms, then 20ms, then 30ms, then give up */ + for (int i = 1; i <= 3; i++) + { + CHECK_FOR_INTERRUPTS(); + + pg_usleep(1000L * 10L * i); + if (ConditionalLockBufferForCleanup(buf)) + { + /* Go process page in lazy_scan_prune after all */ + return false; + } + } + } + + /* Accept reduced processing for this page after all */ + } + + /* + * By here we know for sure that caller will put off freezing and pruning + * this particular page until the next VACUUM. Remember its details now. + * Also drop the buffer pin that we held onto during cleanup lock steps. + */ + ReleaseBuffer(buf); vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid; vacrel->NewRelminMxid = NoFreezePageRelminMxid; diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 5085d9407..f4429e320 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -916,13 +916,8 @@ get_all_vacuum_rels(int options) * The target relation and VACUUM parameters are our inputs. * * Output parameters are the cutoffs that VACUUM caller should use. - * - * Return value indicates if vacuumlazy.c caller should make its VACUUM - * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to - * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a - * minimum). */ -bool +void vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs) { @@ -1092,6 +1087,39 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params, multixact_freeze_table_age > effective_multixact_freeze_max_age) multixact_freeze_table_age = effective_multixact_freeze_max_age; + /* + * Determine the cutoffs used by VACUUM to decide on whether to wait for a + * cleanup lock on a page (that it can't cleanup lock right away). These + * are the MinXid and MinMulti cutoffs. They are related to the cutoffs + * for freezing (FreezeLimit and MultiXactCutoff), and are only applied on + * pages that we cannot freeze right away. See vacuumlazy.c for details. + * + * VACUUM can ratchet back NewRelfrozenXid and/or NewRelminMxid instead of + * waiting indefinitely for a cleanup lock in almost all cases. The high + * level goal is to create as many opportunities as possible to freeze + * (across many successive VACUUM operations), while avoiding waiting for + * a cleanup lock whenever possible. Any time spent waiting is time spent + * not freezing other eligible pages, which is typically a bad trade-off. + * + * As a consequence of all this, MinXid and MinMulti also act as limits on + * the oldest acceptable values that can ever be set in pg_class by VACUUM + * (though this is only relevant when they have already attained XID/XMID + * ages that approach freeze_table_age and/or multixact_freeze_table_age). + */ + cutoffs->MinXid = nextXID - (freeze_table_age * 0.95); + if (!TransactionIdIsNormal(cutoffs->MinXid)) + cutoffs->MinXid = FirstNormalTransactionId; + /* MinXid must always be <= FreezeLimit */ + if (TransactionIdPrecedes(cutoffs->FreezeLimit, cutoffs->MinXid)) + cutoffs->MinXid = cutoffs->FreezeLimit; + + cutoffs->MinMulti = nextMXID - (multixact_freeze_table_age * 0.95); + if (cutoffs->MinMulti < FirstMultiXactId) + cutoffs->MinMulti = FirstMultiXactId; + /* MinMulti must always be <= MultiXactCutoff */ + if (MultiXactIdPrecedes(cutoffs->MultiXactCutoff, cutoffs->MinMulti)) + cutoffs->MinMulti = cutoffs->MultiXactCutoff; + /* * Finally, set tableagefrac for VACUUM. This can come from either XID or * XMID table age (whichever is greater currently). @@ -1109,8 +1137,6 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params, */ if (params->is_wraparound) cutoffs->tableagefrac = 1.0; - - return (cutoffs->tableagefrac >= 1.0); } /* diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index f9788c30a..0c80896cc 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -235,8 +235,8 @@ pgstat_report_vacuum(Oid tableoid, bool shared, tabentry->dead_tuples = deadtuples; /* - * It is quite possible that a non-aggressive VACUUM ended up skipping - * various pages, however, we'll zero the insert counter here regardless. + * It is quite possible that VACUUM will skip all-visible pages for a + * smaller table, however, we'll zero the insert counter here regardless. * It's currently used only to track when we need to perform an "insert" * autovacuum, which are mainly intended to freeze newly inserted tuples. * Zeroing this may just mean we'll not try to vacuum the table again diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 596c44060..fd9b2b619 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8256,7 +8256,7 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; Note that even when this parameter is disabled, the system will launch autovacuum processes if necessary to prevent transaction ID wraparound. See for more information. + linkend="vacuum-xid-space"/> for more information. @@ -8445,7 +8445,7 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; This parameter can only be set at server start, but the setting can be reduced for individual tables by changing table storage parameters. - For more information see . + For more information see . @@ -9195,7 +9195,7 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; billion, VACUUM will silently limit the effective value to . For more - information see . + information see . @@ -9228,7 +9228,7 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; the value of , so that there is not an unreasonably short time between forced autovacuums. For more information see . + linkend="vacuum-xid-space"/>. @@ -9284,7 +9284,7 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; billion, VACUUM will silently limit the effective value to . For more - information see . + information see . diff --git a/doc/src/sgml/logicaldecoding.sgml b/doc/src/sgml/logicaldecoding.sgml index 38ee69dcc..380da3c1e 100644 --- a/doc/src/sgml/logicaldecoding.sgml +++ b/doc/src/sgml/logicaldecoding.sgml @@ -324,7 +324,7 @@ postgres=# select * from pg_logical_slot_get_changes('regression_slot', NULL, NU because neither required WAL nor required rows from the system catalogs can be removed by VACUUM as long as they are required by a replication slot. In extreme cases this could cause the database to shut down to prevent - transaction ID wraparound (see ). + transaction ID wraparound (see ). So if a slot is no longer required it should be dropped. diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml index 759ea5ac9..ed54a2988 100644 --- a/doc/src/sgml/maintenance.sgml +++ b/doc/src/sgml/maintenance.sgml @@ -400,202 +400,73 @@ - - Preventing Transaction ID Wraparound Failures + + Freezing tuples - - transaction ID - wraparound - + + VACUUM freezes a page's tuples (by processing + the tuple header fields described in ) as a way of avoiding long term + dependencies on transaction status metadata referenced therein. + Heap pages that only contain frozen tuples are suitable for long + term storage. Larger databases are often mostly comprised of cold + data that is modified very infrequently, plus a relatively small + amount of hot data that is updated far more frequently. + VACUUM applies a variety of techniques that + allow it to concentrate most of its efforts on hot data. + + + + Managing the 32-bit Transaction ID address space wraparound of transaction IDs - - PostgreSQL's - MVCC transaction semantics - depend on being able to compare transaction ID (XID) - numbers: a row version with an insertion XID greater than the current - transaction's XID is in the future and should not be visible - to the current transaction. But since transaction IDs have limited size - (32 bits) a cluster that runs for a long time (more - than 4 billion transactions) would suffer transaction ID - wraparound: the XID counter wraps around to zero, and all of a sudden - transactions that were in the past appear to be in the future — which - means their output become invisible. In short, catastrophic data loss. - (Actually the data is still there, but that's cold comfort if you cannot - get at it.) To avoid this, it is necessary to vacuum every table - in every database at least once every two billion transactions. - - - - The reason that periodic vacuuming solves the problem is that - VACUUM will mark rows as frozen, indicating that - they were inserted by a transaction that committed sufficiently far in - the past that the effects of the inserting transaction are certain to be - visible to all current and future transactions. - Normal XIDs are - compared using modulo-232 arithmetic. This means - that for every normal XID, there are two billion XIDs that are - older and two billion that are newer; another - way to say it is that the normal XID space is circular with no - endpoint. Therefore, once a row version has been created with a particular - normal XID, the row version will appear to be in the past for - the next two billion transactions, no matter which normal XID we are - talking about. If the row version still exists after more than two billion - transactions, it will suddenly appear to be in the future. To - prevent this, PostgreSQL reserves a special XID, - FrozenTransactionId, which does not follow the normal XID - comparison rules and is always considered older - than every normal XID. - Frozen row versions are treated as if the inserting XID were - FrozenTransactionId, so that they will appear to be - in the past to all normal transactions regardless of wraparound - issues, and so such row versions will be valid until deleted, no matter - how long that is. - - - - In PostgreSQL versions before 9.4, freezing was - implemented by actually replacing a row's insertion XID - with FrozenTransactionId, which was visible in the - row's xmin system column. Newer versions just set a flag - bit, preserving the row's original xmin for possible - forensic use. However, rows with xmin equal - to FrozenTransactionId (2) may still be found - in databases pg_upgrade'd from pre-9.4 versions. + PostgreSQL's MVCC transaction semantics depend on + being able to compare transaction ID (XID) + numbers: a row version with an insertion XID greater than the + current transaction's XID is in the future and + should not be visible to the current transaction. But since the + on-disk representation of transaction IDs is only 32-bits, the + system is incapable of representing + distances between any two XIDs that exceed + about 2 billion transaction IDs. + - Also, system catalogs may contain rows with xmin equal - to BootstrapTransactionId (1), indicating that they were - inserted during the first phase of initdb. - Like FrozenTransactionId, this special XID is treated as - older than every normal XID. + One of the purposes of periodic vacuuming is to manage the + Transaction Id address space. VACUUM will + mark rows as frozen, indicating that they + were inserted by a transaction that committed sufficiently far in + the past that the effects of the inserting transaction are + certain to be visible to all current and future transactions. + There is, in effect, an infinite distance between a frozen + transaction ID and any unfrozen transaction ID. This allows the + on-disk representation of transaction IDs to recycle the 32-bit + address space efficiently. - - - - controls how old an XID value has to be before rows bearing that XID will be - frozen. Increasing this setting may avoid unnecessary work if the - rows that would otherwise be frozen will soon be modified again, - but decreasing this setting increases - the number of transactions that can elapse before the table must be - vacuumed again. - - - - VACUUM uses the visibility map - to determine which pages of a table must be scanned. Normally, it - will skip pages that don't have any dead row versions even if those pages - might still have row versions with old XID values. Therefore, normal - VACUUMs won't always freeze every old row version in the table. - When that happens, VACUUM will eventually need to perform an - aggressive vacuum, which will freeze all eligible unfrozen - XID and MXID values, including those from all-visible but not all-frozen pages. - In practice most tables require periodic aggressive vacuuming. - - controls when VACUUM does that: all-visible but not all-frozen - pages are scanned if the number of transactions that have passed since the - last such scan is greater than vacuum_freeze_table_age minus - vacuum_freeze_min_age. Setting - vacuum_freeze_table_age to 0 forces VACUUM to - always use its aggressive strategy. - - - - The maximum time that a table can go unvacuumed is two billion - transactions minus the vacuum_freeze_min_age value at - the time of the last aggressive vacuum. If it were to go - unvacuumed for longer than - that, data loss could result. To ensure that this does not happen, - autovacuum is invoked on any table that might contain unfrozen rows with - XIDs older than the age specified by the configuration parameter . (This will happen even if - autovacuum is disabled.) - - - - This implies that if a table is not otherwise vacuumed, - autovacuum will be invoked on it approximately once every - autovacuum_freeze_max_age minus - vacuum_freeze_min_age transactions. - For tables that are regularly vacuumed for space reclamation purposes, - this is of little importance. However, for static tables - (including tables that receive inserts, but no updates or deletes), - there is no need to vacuum for space reclamation, so it can - be useful to try to maximize the interval between forced autovacuums - on very large static tables. Obviously one can do this either by - increasing autovacuum_freeze_max_age or decreasing - vacuum_freeze_min_age. - - - - The effective maximum for vacuum_freeze_table_age is 0.95 * - autovacuum_freeze_max_age; a setting higher than that will be - capped to the maximum. A value higher than - autovacuum_freeze_max_age wouldn't make sense because an - anti-wraparound autovacuum would be triggered at that point anyway, and - the 0.95 multiplier leaves some breathing room to run a manual - VACUUM before that happens. As a rule of thumb, - vacuum_freeze_table_age should be set to a value somewhat - below autovacuum_freeze_max_age, leaving enough gap so that - a regularly scheduled VACUUM or an autovacuum triggered by - normal delete and update activity is run in that window. Setting it too - close could lead to anti-wraparound autovacuums, even though the table - was recently vacuumed to reclaim space, whereas lower values lead to more - frequent aggressive vacuuming. - - - - The sole disadvantage of increasing autovacuum_freeze_max_age - (and vacuum_freeze_table_age along with it) is that - the pg_xact and pg_commit_ts - subdirectories of the database cluster will take more space, because it - must store the commit status and (if track_commit_timestamp is - enabled) timestamp of all transactions back to - the autovacuum_freeze_max_age horizon. The commit status uses - two bits per transaction, so if - autovacuum_freeze_max_age is set to its maximum allowed value - of two billion, pg_xact can be expected to grow to about half - a gigabyte and pg_commit_ts to about 20GB. If this - is trivial compared to your total database size, - setting autovacuum_freeze_max_age to its maximum allowed value - is recommended. Otherwise, set it depending on what you are willing to - allow for pg_xact and pg_commit_ts storage. - (The default, 200 million transactions, translates to about 50MB - of pg_xact storage and about 2GB of pg_commit_ts - storage.) - - - - One disadvantage of decreasing vacuum_freeze_min_age is that - it might cause VACUUM to do useless work: freezing a row - version is a waste of time if the row is modified - soon thereafter (causing it to acquire a new XID). So the setting should - be large enough that rows are not frozen until they are unlikely to change - any more. - - - - To track the age of the oldest unfrozen XIDs in a database, - VACUUM stores XID - statistics in the system tables pg_class and - pg_database. In particular, - the relfrozenxid column of a table's - pg_class row contains the oldest remaining unfrozen - XID at the end of the most recent VACUUM that successfully - advanced relfrozenxid (typically the most recent - aggressive VACUUM). Similarly, the - datfrozenxid column of a database's - pg_database row is a lower bound on the unfrozen XIDs - appearing in that database — it is just the minimum of the - per-table relfrozenxid values within the database. - A convenient way to - examine this information is to execute queries such as: + + To track the age of the oldest unfrozen XIDs in a database, + VACUUM stores XID statistics in the system + tables pg_class and + pg_database. In particular, the + relfrozenxid column of a table's + pg_class row contains the oldest + remaining unfrozen XID at the end of the most recent + VACUUM. All rows inserted by transactions + older than this cutoff XID are guaranteed to have been frozen. + Similarly, the datfrozenxid column of + a database's pg_database row is a lower + bound on the unfrozen XIDs appearing in that database — it + is just the minimum of the per-table + relfrozenxid values within the + database. A convenient way to examine this information is to + execute queries such as: SELECT c.oid::regclass as table_name, @@ -607,83 +478,13 @@ WHERE c.relkind IN ('r', 'm'); SELECT datname, age(datfrozenxid) FROM pg_database; - The age column measures the number of transactions from the - cutoff XID to the current transaction's XID. - - - - - When the VACUUM command's VERBOSE - parameter is specified, VACUUM prints various - statistics about the table. This includes information about how - relfrozenxid and - relminmxid advanced. The same details appear - in the server log when autovacuum logging (controlled by ) reports on a - VACUUM operation executed by autovacuum. + The age column measures the number of transactions from the + cutoff XID to the current transaction's XID. - - - - VACUUM normally only scans pages that have been modified - since the last vacuum, but relfrozenxid can only be - advanced when every page of the table - that might contain unfrozen XIDs is scanned. This happens when - relfrozenxid is more than - vacuum_freeze_table_age transactions old, when - VACUUM's FREEZE option is used, or when all - pages that are not already all-frozen happen to - require vacuuming to remove dead row versions. When VACUUM - scans every page in the table that is not already all-frozen, it should - set age(relfrozenxid) to a value just a little more than the - vacuum_freeze_min_age setting - that was used (more by the number of transactions started since the - VACUUM started). VACUUM - will set relfrozenxid to the oldest XID - that remains in the table, so it's possible that the final value - will be much more recent than strictly required. - If no relfrozenxid-advancing - VACUUM is issued on the table until - autovacuum_freeze_max_age is reached, an autovacuum will soon - be forced for the table. - - - - If for some reason autovacuum fails to clear old XIDs from a table, the - system will begin to emit warning messages like this when the database's - oldest XIDs reach forty million transactions from the wraparound point: - - -WARNING: database "mydb" must be vacuumed within 39985967 transactions -HINT: To avoid a database shutdown, execute a database-wide VACUUM in that database. - - - (A manual VACUUM should fix the problem, as suggested by the - hint; but note that the VACUUM must be performed by a - superuser, else it will fail to process system catalogs and thus not - be able to advance the database's datfrozenxid.) - If these warnings are - ignored, the system will shut down and refuse to start any new - transactions once there are fewer than three million transactions left - until wraparound: - - -ERROR: database is not accepting commands to avoid wraparound data loss in database "mydb" -HINT: Stop the postmaster and vacuum that database in single-user mode. - - - The three-million-transaction safety margin exists to let the - administrator recover without data loss, by manually executing the - required VACUUM commands. However, since the system will not - execute commands once it has gone into the safety shutdown mode, - the only way to do this is to stop the server and start the server in single-user - mode to execute VACUUM. The shutdown mode is not enforced - in single-user mode. See the reference - page for details about using single-user mode. - + - Multixacts and Wraparound + Managing the 32-bit MultiXactId address space MultiXactId @@ -704,47 +505,109 @@ HINT: Stop the postmaster and vacuum that database in single-user mode. particular multixact ID is stored separately in the pg_multixact subdirectory, and only the multixact ID appears in the xmax field in the tuple header. - Like transaction IDs, multixact IDs are implemented as a - 32-bit counter and corresponding storage, all of which requires - careful aging management, storage cleanup, and wraparound handling. - There is a separate storage area which holds the list of members in - each multixact, which also uses a 32-bit counter and which must also - be managed. + Like transaction IDs, multixact IDs are implemented as a 32-bit + counter and corresponding storage. - Whenever VACUUM scans any part of a table, it will replace - any multixact ID it encounters which is older than - - by a different value, which can be the zero value, a single - transaction ID, or a newer multixact ID. For each table, - pg_class.relminmxid stores the oldest - possible multixact ID still appearing in any tuple of that table. - If this value is older than - , an aggressive - vacuum is forced. As discussed in the previous section, an aggressive - vacuum means that only those pages which are known to be all-frozen will - be skipped. mxid_age() can be used on - pg_class.relminmxid to find its age. + A separate relminmxid field can be + advanced any time relfrozenxid is + advanced. VACUUM manages the MultiXactId + address space by implementing rules that are analogous to the + approach taken with Transaction IDs. Many of the XID-based + settings that influence VACUUM's behavior have + direct MultiXactId analogs. A convenient way to examine + information about the MultiXactId address space is to execute + queries such as: + + +SELECT c.oid::regclass as table_name, + mxid_age(c.relminmxid) +FROM pg_class c +WHERE c.relkind IN ('r', 'm'); + +SELECT datname, mxid_age(datminmxid) FROM pg_database; + + + + + Lazy and eager freezing strategies + + When VACUUM is configured to freeze more + aggressively it will typically set the table's + relfrozenxid and + relminmxid fields to relatively recent + values. However, there can be significant variation among tables + with varying workload characteristics. There can even be + variation in how relfrozenxid + advancement takes place over time for the same table, across + successive VACUUM operations. Sometimes + VACUUM will be able to advance + relfrozenxid and + relminmxid by relatively many + XIDs/MXIDs despite performing relatively little freezing work. On + the other hand VACUUM can sometimes freeze many + individual pages while only advancing + relfrozenxid by as few as one or two + XIDs (this is typically seen following bulk loading). - - Aggressive VACUUMs, regardless of what causes - them, are guaranteed to be able to advance - the table's relminmxid. - Eventually, as all tables in all databases are scanned and their - oldest multixact values are advanced, on-disk storage for older - multixacts can be removed. - + + + When the VACUUM command's VERBOSE + parameter is specified, VACUUM prints various + statistics about the table. This includes information about how + relfrozenxid and + relminmxid advanced, as well as + information about how many pages were newly frozen. The same + details appear in the server log when autovacuum logging + (controlled by ) + reports on a VACUUM operation executed by + autovacuum. + + - As a safety device, an aggressive vacuum scan will - occur for any table whose multixact-age is greater than . Also, if the - storage occupied by multixacts members exceeds 2GB, aggressive vacuum - scans will occur more often for all tables, starting with those that - have the oldest multixact-age. Both of these kinds of aggressive - scans will occur even if autovacuum is nominally disabled. + As a general rule, the design of VACUUM + prioritizes stable and predictable performance characteristics + over time, while still leaving some scope for freezing lazily when + a lazy strategy is likely to avoid unnecessary work altogether. Tables + whose heap relation on-disk size is less than at the start of + VACUUM will have page freezing triggered based + on lazy criteria. Freezing will only take place + when one or more XIDs attain an age greater than , or when one or more MXIDs + attain an age greater than . + + + Tables that are larger than will have + VACUUM trigger freezing for any and all pages + that are eligible to be frozen under the lazy criteria, as well as + pages that VACUUM considers all visible pages. + This is the eager freezing strategy. The design makes the soft + assumption that larger tables will tend to consist of pages that + will only need to be processed by VACUUM once. + The overhead of freezing each page is expected to be slightly + higher in the short term, but much lower in the long term, at + least on average. Eager freezing also limits the accumulation of + unfrozen pages, which tends to improve performance + stability over time. + + + Occasionally, VACUUM is required to advance + relfrozenxid and/or + relminmxid up to a specific value + to ensure the system always has a healthy amount of usable + transaction ID address space. This usually only occurs when + VACUUM must be run by autovacuum specifically + for the purpose of advancing relfrozenxid, + when no VACUUM has been triggered for some + time. In practice most individual tables will consistently have + somewhat recent values through routine vacuuming to clean up old + row versions. @@ -802,117 +665,197 @@ HINT: Stop the postmaster and vacuum that database in single-user mode. limits. - - Tables whose relfrozenxid value is more than - transactions old are always - vacuumed (this also applies to those tables whose freeze max age has - been modified via storage parameters; see below). Otherwise, if the - number of tuples obsoleted since the last - VACUUM exceeds the vacuum threshold, the - table is vacuumed. The vacuum threshold is defined as: + + Triggering thresholds + + Tables whose relfrozenxid value is + more than + transactions old are always vacuumed (this also applies to those + tables whose freeze max age has been modified via storage + parameters; see below). Otherwise, if the number of tuples + obsoleted since the last VACUUM exceeds the + vacuum threshold, the table is vacuumed. The + vacuum threshold is defined as: vacuum threshold = vacuum base threshold + vacuum scale factor * number of tuples - where the vacuum base threshold is - , - the vacuum scale factor is - , + where the vacuum base threshold is , the vacuum scale + factor is , and the number of tuples is pg_class.reltuples. - + - - The table is also vacuumed if the number of tuples inserted since the last - vacuum has exceeded the defined insert threshold, which is defined as: + + The table is also vacuumed if the number of tuples inserted since + the last vacuum has exceeded the defined insert threshold, which + is defined as: vacuum insert threshold = vacuum base insert threshold + vacuum insert scale factor * number of tuples - where the vacuum insert base threshold is - , - and vacuum insert scale factor is - . - Such vacuums may allow portions of the table to be marked as - all visible and also allow tuples to be frozen, which - can reduce the work required in subsequent vacuums. - For tables which receive INSERT operations but no or - almost no UPDATE/DELETE operations, - it may be beneficial to lower the table's - as this may allow - tuples to be frozen by earlier vacuums. The number of obsolete tuples and - the number of inserted tuples are obtained from the cumulative statistics system; - it is a semi-accurate count updated by each UPDATE, - DELETE and INSERT operation. (It is - only semi-accurate because some information might be lost under heavy - load.) If the relfrozenxid value of the table - is more than vacuum_freeze_table_age transactions old, - an aggressive vacuum is performed to freeze old tuples and advance - relfrozenxid; otherwise, only pages that have been modified - since the last vacuum are scanned. - + where the vacuum insert base threshold + is , and + vacuum insert scale factor is . Such + vacuums may allow portions of the table to be marked as + all visible and also allow tuples to be + frozen. The number of obsolete tuples and the number of inserted + tuples are obtained from the cumulative statistics system; it is + a semi-accurate count updated by each UPDATE, + DELETE and INSERT + operation. (It is only semi-accurate because some information + might be lost under heavy load.) + - - For analyze, a similar condition is used: the threshold, defined as: + + For analyze, a similar condition is used: the threshold, defined as: analyze threshold = analyze base threshold + analyze scale factor * number of tuples - is compared to the total number of tuples inserted, updated, or deleted - since the last ANALYZE. - - - - Partitioned tables are not processed by autovacuum. Statistics - should be collected by running a manual ANALYZE when it is - first populated, and again whenever the distribution of data in its - partitions changes significantly. - - - - Temporary tables cannot be accessed by autovacuum. Therefore, - appropriate vacuum and analyze operations should be performed via - session SQL commands. - - - - The default thresholds and scale factors are taken from - postgresql.conf, but it is possible to override them - (and many other autovacuum control parameters) on a per-table basis; see - for more information. - If a setting has been changed via a table's storage parameters, that value - is used when processing that table; otherwise the global settings are - used. See for more details on - the global settings. - - - - When multiple workers are running, the autovacuum cost delay parameters - (see ) are - balanced among all the running workers, so that the - total I/O impact on the system is the same regardless of the number - of workers actually running. However, any workers processing tables whose - per-table autovacuum_vacuum_cost_delay or - autovacuum_vacuum_cost_limit storage parameters have been set - are not considered in the balancing algorithm. - - - - Autovacuum workers generally don't block other commands. If a process - attempts to acquire a lock that conflicts with the - SHARE UPDATE EXCLUSIVE lock held by autovacuum, lock - acquisition will interrupt the autovacuum. For conflicting lock modes, - see . However, if the autovacuum - is running to prevent transaction ID wraparound (i.e., the autovacuum query - name in the pg_stat_activity view ends with - (to prevent wraparound)), the autovacuum is not - automatically interrupted. - - - - - Regularly running commands that acquire locks conflicting with a - SHARE UPDATE EXCLUSIVE lock (e.g., ANALYZE) can - effectively prevent autovacuums from ever completing. + is compared to the total number of tuples inserted, updated, or + deleted since the last ANALYZE. - + + + + + Anti-wraparound autovacuum + + + wraparound + of transaction IDs + + + + wraparound + of multixact IDs + + + + If no relfrozenxid-advancing + VACUUM is issued on the table before + autovacuum_freeze_max_age is reached, an + anti-wraparound autovacuum will soon be launched against the + table. This reliably advances + relfrozenxid when there is no other + reason for VACUUM to run, or when a smaller + table had VACUUM operations that lazily opted + not to advance relfrozenxid. + + + + An anti-wraparound autovacuum will also be triggered for any + table whose multixact-age is greater than . However, + if the storage occupied by multixacts members exceeds 2GB, + anti-wraparound vacuum might occur more often than this. + + + + If for some reason autovacuum fails to clear old XIDs from a table, the + system will begin to emit warning messages like this when the database's + oldest XIDs reach forty million transactions from the wraparound point: + + +WARNING: database "mydb" must be vacuumed within 39985967 transactions +HINT: To avoid a database shutdown, execute a database-wide VACUUM in that database. + + + (A manual VACUUM should fix the problem, as suggested by the + hint; but note that the VACUUM must be performed by a + superuser, else it will fail to process system catalogs and thus not + be able to advance the database's datfrozenxid.) + If these warnings are + ignored, the system will shut down and refuse to start any new + transactions once there are fewer than three million transactions left + until wraparound: + + +ERROR: database is not accepting commands to avoid wraparound data loss in database "mydb" +HINT: Stop the postmaster and vacuum that database in single-user mode. + + + The three-million-transaction safety margin exists to let the + administrator recover by manually executing the required + VACUUM commands. It is usually sufficient to + allow autovacuum to finish against the table with the oldest + relfrozenxid and/or + relminmxid value. The wraparound + failsafe mechanism controlled by and will typically + trigger before warning messages are first emitted. This happens + dynamically, in any antiwraparound autovacuum worker that is + tasked with advancing very old table ages. It will also happen + during manual VACUUM operations. + + + + The shutdown mode is not enforced in single-user mode, which can + be useful in some disaster recovery scenarios. See the reference page for details about using + single-user mode. + + + + + Limitations + + + Partitioned tables are not processed by autovacuum. Statistics + should be collected by running a manual ANALYZE when it is + first populated, and again whenever the distribution of data in its + partitions changes significantly. + + + + Temporary tables cannot be accessed by autovacuum. Therefore, + appropriate vacuum and analyze operations should be performed via + session SQL commands. + + + + The default thresholds and scale factors are taken from + postgresql.conf, but it is possible to override them + (and many other autovacuum control parameters) on a per-table basis; see + for more information. + If a setting has been changed via a table's storage parameters, that value + is used when processing that table; otherwise the global settings are + used. See for more details on + the global settings. + + + + When multiple workers are running, the autovacuum cost delay parameters + (see ) are + balanced among all the running workers, so that the + total I/O impact on the system is the same regardless of the number + of workers actually running. However, any workers processing tables whose + per-table autovacuum_vacuum_cost_delay or + autovacuum_vacuum_cost_limit storage parameters have been set + are not considered in the balancing algorithm. + + + + Autovacuum workers generally don't block other commands. If a process + attempts to acquire a lock that conflicts with the + SHARE UPDATE EXCLUSIVE lock held by autovacuum, lock + acquisition will interrupt the autovacuum. For conflicting lock modes, + see . However, if the autovacuum + is running to prevent transaction ID wraparound (i.e., the autovacuum query + name in the pg_stat_activity view ends with + (to prevent wraparound)), the autovacuum is not + automatically interrupted. + + + + + Regularly running commands that acquire locks conflicting with a + SHARE UPDATE EXCLUSIVE lock (e.g., ANALYZE) can + effectively prevent autovacuums from ever completing. + + + diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index eabbf9e65..859175718 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -1503,7 +1503,7 @@ WITH ( MODULUS numeric_literal, REM and/or ANALYZE operations on this table following the rules discussed in . If false, this table will not be autovacuumed, except to prevent - transaction ID wraparound. See for + transaction ID wraparound. See for more about wraparound prevention. Note that the autovacuum daemon does not run at all (except to prevent transaction ID wraparound) if the diff --git a/doc/src/sgml/ref/prepare_transaction.sgml b/doc/src/sgml/ref/prepare_transaction.sgml index f4f6118ac..1817ed1e3 100644 --- a/doc/src/sgml/ref/prepare_transaction.sgml +++ b/doc/src/sgml/ref/prepare_transaction.sgml @@ -128,7 +128,7 @@ PREPARE TRANSACTION transaction_id This will interfere with the ability of VACUUM to reclaim storage, and in extreme cases could cause the database to shut down to prevent transaction ID wraparound (see ). Keep in mind also that the transaction + linkend="vacuum-xid-space"/>). Keep in mind also that the transaction continues to hold whatever locks it held. The intended usage of the feature is that a prepared transaction will normally be committed or rolled back as soon as an external transaction manager has verified that diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml index c137debb1..d4237ec5d 100644 --- a/doc/src/sgml/ref/vacuum.sgml +++ b/doc/src/sgml/ref/vacuum.sgml @@ -156,9 +156,11 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ visibility map. Pages where - all tuples are known to be frozen can always be skipped, and those - where all tuples are known to be visible to all transactions may be - skipped except when performing an aggressive vacuum. + all tuples are known to be frozen are always skipped. Pages + where all tuples are known to be visible to all transactions are + skipped whenever VACUUM determined that + advancing relfrozenxid and + relminmxid was unnecessary. This option disables all page-skipping behavior, and is intended to be used only when the contents of the visibility map are suspect, which should happen only if there is a hardware or software @@ -213,7 +215,7 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ ). However, the + (see ). However, the wraparound failsafe mechanism controlled by will generally trigger automatically to avoid transaction ID wraparound failure, and diff --git a/doc/src/sgml/ref/vacuumdb.sgml b/doc/src/sgml/ref/vacuumdb.sgml index 841aced3b..48942c58f 100644 --- a/doc/src/sgml/ref/vacuumdb.sgml +++ b/doc/src/sgml/ref/vacuumdb.sgml @@ -180,7 +180,7 @@ PostgreSQL documentation - Aggressively freeze tuples. + Eagerly freeze tuples. @@ -259,7 +259,7 @@ PostgreSQL documentation transaction ID age of at least xid_age. This setting is useful for prioritizing tables to process to prevent transaction - ID wraparound (see ). + ID wraparound (see ). For the purposes of this option, the transaction ID age of a relation diff --git a/doc/src/sgml/xact.sgml b/doc/src/sgml/xact.sgml index b467660ee..c4146539f 100644 --- a/doc/src/sgml/xact.sgml +++ b/doc/src/sgml/xact.sgml @@ -49,8 +49,8 @@ The internal transaction ID type xid is 32 bits wide - and wraps around every - 4 billion transactions. A 32-bit epoch is incremented during each + and wraps around every + 2 billion transactions. A 32-bit epoch is incremented during each wraparound. There is also a 64-bit type xid8 which includes this epoch and therefore does not wrap around during the life of an installation; it can be converted to xid by casting. diff --git a/src/test/isolation/expected/vacuum-no-cleanup-lock.out b/src/test/isolation/expected/vacuum-no-cleanup-lock.out index f7bc93e8f..076fe07ab 100644 --- a/src/test/isolation/expected/vacuum-no-cleanup-lock.out +++ b/src/test/isolation/expected/vacuum-no-cleanup-lock.out @@ -1,6 +1,6 @@ Parsed test spec with 4 sessions -starting permutation: vacuumer_pg_class_stats dml_insert vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats +starting permutation: vacuumer_pg_class_stats dml_insert vacuumer_vacuum_noprune vacuumer_pg_class_stats step vacuumer_pg_class_stats: SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass; @@ -12,7 +12,7 @@ relpages|reltuples step dml_insert: INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl; -step vacuumer_nonaggressive_vacuum: +step vacuumer_vacuum_noprune: VACUUM smalltbl; step vacuumer_pg_class_stats: @@ -24,7 +24,7 @@ relpages|reltuples (1 row) -starting permutation: vacuumer_pg_class_stats dml_insert pinholder_cursor vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats pinholder_commit +starting permutation: vacuumer_pg_class_stats dml_insert pinholder_cursor vacuumer_vacuum_noprune vacuumer_pg_class_stats pinholder_commit step vacuumer_pg_class_stats: SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass; @@ -46,7 +46,7 @@ dummy 1 (1 row) -step vacuumer_nonaggressive_vacuum: +step vacuumer_vacuum_noprune: VACUUM smalltbl; step vacuumer_pg_class_stats: @@ -61,7 +61,7 @@ step pinholder_commit: COMMIT; -starting permutation: vacuumer_pg_class_stats pinholder_cursor dml_insert dml_delete dml_insert vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats pinholder_commit +starting permutation: vacuumer_pg_class_stats pinholder_cursor dml_insert dml_delete dml_insert vacuumer_vacuum_noprune vacuumer_pg_class_stats pinholder_commit step vacuumer_pg_class_stats: SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass; @@ -89,7 +89,7 @@ step dml_delete: step dml_insert: INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl; -step vacuumer_nonaggressive_vacuum: +step vacuumer_vacuum_noprune: VACUUM smalltbl; step vacuumer_pg_class_stats: @@ -104,7 +104,7 @@ step pinholder_commit: COMMIT; -starting permutation: vacuumer_pg_class_stats dml_insert dml_delete pinholder_cursor dml_insert vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats pinholder_commit +starting permutation: vacuumer_pg_class_stats dml_insert dml_delete pinholder_cursor dml_insert vacuumer_vacuum_noprune vacuumer_pg_class_stats pinholder_commit step vacuumer_pg_class_stats: SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass; @@ -132,7 +132,7 @@ dummy step dml_insert: INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl; -step vacuumer_nonaggressive_vacuum: +step vacuumer_vacuum_noprune: VACUUM smalltbl; step vacuumer_pg_class_stats: @@ -147,7 +147,7 @@ step pinholder_commit: COMMIT; -starting permutation: dml_begin dml_other_begin dml_key_share dml_other_key_share vacuumer_nonaggressive_vacuum pinholder_cursor dml_other_update dml_commit dml_other_commit vacuumer_nonaggressive_vacuum pinholder_commit vacuumer_nonaggressive_vacuum +starting permutation: dml_begin dml_other_begin dml_key_share dml_other_key_share vacuumer_vacuum_noprune pinholder_cursor dml_other_update dml_commit dml_other_commit vacuumer_vacuum_noprune pinholder_commit vacuumer_vacuum_noprune step dml_begin: BEGIN; step dml_other_begin: BEGIN; step dml_key_share: SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE; @@ -162,7 +162,7 @@ id 3 (1 row) -step vacuumer_nonaggressive_vacuum: +step vacuumer_vacuum_noprune: VACUUM smalltbl; step pinholder_cursor: @@ -178,12 +178,12 @@ dummy step dml_other_update: UPDATE smalltbl SET t = 'u' WHERE id = 3; step dml_commit: COMMIT; step dml_other_commit: COMMIT; -step vacuumer_nonaggressive_vacuum: +step vacuumer_vacuum_noprune: VACUUM smalltbl; step pinholder_commit: COMMIT; -step vacuumer_nonaggressive_vacuum: +step vacuumer_vacuum_noprune: VACUUM smalltbl; diff --git a/src/test/isolation/specs/vacuum-no-cleanup-lock.spec b/src/test/isolation/specs/vacuum-no-cleanup-lock.spec index 05fd280f6..927410258 100644 --- a/src/test/isolation/specs/vacuum-no-cleanup-lock.spec +++ b/src/test/isolation/specs/vacuum-no-cleanup-lock.spec @@ -55,15 +55,18 @@ step dml_other_key_share { SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE; step dml_other_update { UPDATE smalltbl SET t = 'u' WHERE id = 3; } step dml_other_commit { COMMIT; } -# This session runs non-aggressive VACUUM, but with maximally aggressive -# cutoffs for tuple freezing (e.g., FreezeLimit == OldestXmin): +# This session runs VACUUM with maximally aggressive cutoffs for tuple +# freezing (e.g., FreezeLimit == OldestXmin), without ever being +# prepared to wait for a cleanup lock (we'll never wait on a cleanup +# lock because the separate MinXid cutoff for waiting will still be +# well before FreezeLimit, given our default autovacuum_freeze_max_age). session vacuumer setup { SET vacuum_freeze_min_age = 0; SET vacuum_multixact_freeze_min_age = 0; } -step vacuumer_nonaggressive_vacuum +step vacuumer_vacuum_noprune { VACUUM smalltbl; } @@ -75,15 +78,14 @@ step vacuumer_pg_class_stats # Test VACUUM's reltuples counting mechanism. # # Final pg_class.reltuples should never be affected by VACUUM's inability to -# get a cleanup lock on any page, except to the extent that any cleanup lock -# contention changes the number of tuples that remain ("missed dead" tuples -# are counted in reltuples, much like "recently dead" tuples). +# get a cleanup lock on any page. Note that "missed dead" tuples are counted +# in reltuples, much like "recently dead" tuples. # Easy case: permutation vacuumer_pg_class_stats # Start with 20 tuples dml_insert - vacuumer_nonaggressive_vacuum + vacuumer_vacuum_noprune vacuumer_pg_class_stats # End with 21 tuples # Harder case -- count 21 tuples at the end (like last time), but with cleanup @@ -92,7 +94,7 @@ permutation vacuumer_pg_class_stats # Start with 20 tuples dml_insert pinholder_cursor - vacuumer_nonaggressive_vacuum + vacuumer_vacuum_noprune vacuumer_pg_class_stats # End with 21 tuples pinholder_commit # order doesn't matter @@ -103,7 +105,7 @@ permutation dml_insert dml_delete dml_insert - vacuumer_nonaggressive_vacuum + vacuumer_vacuum_noprune # reltuples is 21 here again -- "recently dead" tuple won't be included in # count here: vacuumer_pg_class_stats @@ -116,7 +118,7 @@ permutation dml_delete pinholder_cursor dml_insert - vacuumer_nonaggressive_vacuum + vacuumer_vacuum_noprune # reltuples is 21 here again -- "missed dead" tuple ("recently dead" when # concurrent activity held back VACUUM's OldestXmin) won't be included in # count here: @@ -128,7 +130,7 @@ permutation # This provides test coverage for code paths that are only hit when we need to # freeze, but inability to acquire a cleanup lock on a heap page makes # freezing some XIDs/MXIDs < FreezeLimit/MultiXactCutoff impossible (without -# waiting for a cleanup lock, which non-aggressive VACUUM is unwilling to do). +# waiting for a cleanup lock, which won't ever happen here). permutation dml_begin dml_other_begin @@ -136,15 +138,15 @@ permutation dml_other_key_share # Will get cleanup lock, can't advance relminmxid yet: # (though will usually advance relfrozenxid by ~2 XIDs) - vacuumer_nonaggressive_vacuum + vacuumer_vacuum_noprune pinholder_cursor dml_other_update dml_commit dml_other_commit # Can't cleanup lock, so still can't advance relminmxid here: # (relfrozenxid held back by XIDs in MultiXact too) - vacuumer_nonaggressive_vacuum + vacuumer_vacuum_noprune pinholder_commit # Pin was dropped, so will advance relminmxid, at long last: # (ditto for relfrozenxid advancement) - vacuumer_nonaggressive_vacuum + vacuumer_vacuum_noprune -- 2.38.1