From 6c8cb32e074e7de2414b067fcf4011acb4cca121 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Mon, 22 Nov 2021 10:02:30 -0800 Subject: [PATCH v8 1/3] Loosen coupling between relfrozenxid and tuple freezing. The pg_class.relfrozenxid invariant for heap relations is as follows: relfrozenxid must be less than or equal to the oldest extant XID in the table, and must never wraparound (it must be advanced by VACUUM before wraparound, or in extreme cases the system must be forced to stop allocating new XIDs). Before now, VACUUM always set relfrozenxid to whatever value it happened to use when determining which tuples to freeze (the VACUUM operation's FreezeLimit cutoff). But there was no inherent reason why the oldest extant XID in the table should be anywhere near as old as that. Furthermore, even if it really was almost as old as FreezeLimit, that tells us much more about the mechanism that VACUUM used to determine which tuples to freeze than anything else. Depending on the details of the table and workload, it may have been possible to safely advance relfrozenxid by many more XIDs, at a relatively small cost in freezing (possibly no extra cost at all) -- but VACUUM rigidly coupled freezing with advancing relfrozenxid, missing all this. Teach VACUUM to track the newest possible safe final relfrozenxid dynamically (and to track a new value for relminmxid). In the extreme though common case where all tuples are already frozen, or became frozen (or were removed by pruning), the final relfrozenxid value will be exactly equal to the OldestXmin value used by the same VACUUM operation. A later patch will overhaul the strategy that VACUUM uses for freezing so that relfrozenxid will tend to get set to a value that's relatively close to OldestXmin in almost all cases. Final relfrozenxid values still follow the same rules as before. They must still be >= FreezeLimit in an aggressive VACUUM. Non-aggressive VACUUMs can set relfrozenxid to any value that's greater than the preexisting relfrozenxid, which could be either much earlier or much later than FreezeLimit. Much depends on workload characteristics. In practice there is significant natural variation that we can take advantage of. Credit for the general idea of using the oldest extant XID to set pg_class.relfrozenxid at the end of VACUUM goes to Andres Freund. Author: Peter Geoghegan Discussion: https://postgr.es/m/CAH2-WzkymFbz6D_vL+jmqSn_5q1wsFvFrE+37yLgL_Rkfd6Gzg@mail.gmail.com --- src/include/access/heapam.h | 4 +- src/include/access/heapam_xlog.h | 4 +- src/include/commands/vacuum.h | 1 + src/backend/access/heap/heapam.c | 186 ++++++++++++++++++++------- src/backend/access/heap/vacuumlazy.c | 85 ++++++++---- src/backend/commands/cluster.c | 5 +- src/backend/commands/vacuum.c | 34 +++-- 7 files changed, 238 insertions(+), 81 deletions(-) diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 0ad87730e..d35402f9f 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -168,7 +168,9 @@ extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi); extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, - MultiXactId cutoff_multi, Buffer buf); + MultiXactId cutoff_multi, + TransactionId *NewRelfrozenxid, + MultiXactId *NewRelminmxid, Buffer buf); extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple); extern void simple_heap_insert(Relation relation, HeapTuple tup); diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index 5c47fdcec..ae55c90f7 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -410,7 +410,9 @@ extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, - bool *totally_frozen); + bool *totally_frozen, + TransactionId *NewRelfrozenxid, + MultiXactId *NewRelminmxid); extern void heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *xlrec_tp); extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer, diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index d64f6268f..ead88edda 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -291,6 +291,7 @@ extern bool vacuum_set_xid_limits(Relation rel, int multixact_freeze_min_age, int multixact_freeze_table_age, TransactionId *oldestXmin, + MultiXactId *oldestMxact, TransactionId *freezeLimit, MultiXactId *multiXactCutoff); extern bool vacuum_xid_failsafe_check(TransactionId relfrozenxid, diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 98230aac4..d85a817ff 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -6087,12 +6087,24 @@ heap_inplace_update(Relation relation, HeapTuple tuple) * FRM_RETURN_IS_MULTI * The return value is a new MultiXactId to set as new Xmax. * (caller must obtain proper infomask bits using GetMultiXactIdHintBits) + * + * "NewRelfrozenxid" is an output value; it's used to maintain target new + * relfrozenxid for the relation. It can be ignored unless "flags" contains + * either FRM_NOOP or FRM_RETURN_IS_MULTI, because we only handle multiXacts + * here. This follows the general convention: only track XIDs that will still + * be in the table after the ongoing VACUUM finishes. Note that it's up to + * caller to maintain this when the Xid return value is itself an Xid. + * + * Note that we cannot depend on xmin to maintain NewRelfrozenxid. We need to + * push maintenance of NewRelfrozenxid down this far, since in general xmin + * might have been frozen by an earlier VACUUM operation, in which case our + * caller will not have factored-in xmin when maintaining NewRelfrozenxid. */ static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, MultiXactId cutoff_multi, - uint16 *flags) + uint16 *flags, TransactionId *NewRelfrozenxid) { TransactionId xid = InvalidTransactionId; int i; @@ -6104,6 +6116,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, bool has_lockers; TransactionId update_xid; bool update_committed; + TransactionId tempNewRelfrozenxid; *flags = 0; @@ -6198,13 +6211,13 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, /* is there anything older than the cutoff? */ need_replace = false; + tempNewRelfrozenxid = *NewRelfrozenxid; for (i = 0; i < nmembers; i++) { if (TransactionIdPrecedes(members[i].xid, cutoff_xid)) - { need_replace = true; - break; - } + if (TransactionIdPrecedes(members[i].xid, tempNewRelfrozenxid)) + tempNewRelfrozenxid = members[i].xid; } /* @@ -6213,6 +6226,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, */ if (!need_replace) { + *NewRelfrozenxid = tempNewRelfrozenxid; *flags |= FRM_NOOP; pfree(members); return InvalidTransactionId; @@ -6222,6 +6236,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * If the multi needs to be updated, figure out which members do we need * to keep. */ + tempNewRelfrozenxid = *NewRelfrozenxid; nnewmembers = 0; newmembers = palloc(sizeof(MultiXactMember) * nmembers); has_lockers = false; @@ -6303,7 +6318,11 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * list.) */ if (TransactionIdIsValid(update_xid)) + { newmembers[nnewmembers++] = members[i]; + if (TransactionIdPrecedes(members[i].xid, tempNewRelfrozenxid)) + tempNewRelfrozenxid = members[i].xid; + } } else { @@ -6313,6 +6332,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, { /* running locker cannot possibly be older than the cutoff */ Assert(!TransactionIdPrecedes(members[i].xid, cutoff_xid)); + Assert(!TransactionIdPrecedes(members[i].xid, *NewRelfrozenxid)); newmembers[nnewmembers++] = members[i]; has_lockers = true; } @@ -6341,6 +6361,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, if (update_committed) *flags |= FRM_MARK_COMMITTED; xid = update_xid; + /* Caller manages NewRelfrozenxid directly when we return an XID */ } else { @@ -6350,6 +6371,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, */ xid = MultiXactIdCreateFromMembers(nnewmembers, newmembers); *flags |= FRM_RETURN_IS_MULTI; + *NewRelfrozenxid = tempNewRelfrozenxid; } pfree(newmembers); @@ -6368,6 +6390,13 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * will be totally frozen after these operations are performed and false if * more freezing will eventually be required. * + * Also maintains *NewRelfrozenxid and *NewRelminmxid, which are the current + * target relfrozenxid and relminmxid for the relation. Assumption is that + * caller will actually go on to freeze as indicated by our *frz output, so + * any (xmin, xmax, xvac) XIDs that we indicate need to be frozen won't need + * to be counted here. Values are valid lower bounds at the point that the + * ongoing VACUUM finishes. + * * Caller is responsible for setting the offset field, if appropriate. * * It is assumed that the caller has checked the tuple with @@ -6392,7 +6421,9 @@ bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi, - xl_heap_freeze_tuple *frz, bool *totally_frozen_p) + xl_heap_freeze_tuple *frz, bool *totally_frozen_p, + TransactionId *NewRelfrozenxid, + MultiXactId *NewRelminmxid) { bool changed = false; bool xmax_already_frozen = false; @@ -6436,6 +6467,11 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, frz->t_infomask |= HEAP_XMIN_FROZEN; changed = true; } + else if (TransactionIdPrecedes(xid, *NewRelfrozenxid)) + { + /* won't be frozen, but older than current NewRelfrozenxid */ + *NewRelfrozenxid = xid; + } } /* @@ -6453,10 +6489,11 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, { TransactionId newxmax; uint16 flags; + TransactionId temp = *NewRelfrozenxid; newxmax = FreezeMultiXactId(xid, tuple->t_infomask, relfrozenxid, relminmxid, - cutoff_xid, cutoff_multi, &flags); + cutoff_xid, cutoff_multi, &flags, &temp); freeze_xmax = (flags & FRM_INVALIDATE_XMAX); @@ -6474,6 +6511,24 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, if (flags & FRM_MARK_COMMITTED) frz->t_infomask |= HEAP_XMAX_COMMITTED; changed = true; + + if (TransactionIdPrecedes(newxmax, *NewRelfrozenxid)) + { + /* New xmax is an XID older than new NewRelfrozenxid */ + *NewRelfrozenxid = newxmax; + } + } + else if (flags & FRM_NOOP) + { + /* + * Changing nothing, so might have to ratchet back NewRelminmxid, + * NewRelfrozenxid, or both together + */ + if (MultiXactIdIsValid(xid) && + MultiXactIdPrecedes(xid, *NewRelminmxid)) + *NewRelminmxid = xid; + if (TransactionIdPrecedes(temp, *NewRelfrozenxid)) + *NewRelfrozenxid = temp; } else if (flags & FRM_RETURN_IS_MULTI) { @@ -6495,6 +6550,13 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, frz->xmax = newxmax; changed = true; + + /* + * New multixact might have remaining XID older than + * NewRelfrozenxid + */ + if (TransactionIdPrecedes(temp, *NewRelfrozenxid)) + *NewRelfrozenxid = temp; } } else if (TransactionIdIsNormal(xid)) @@ -6522,7 +6584,14 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, freeze_xmax = true; } else + { freeze_xmax = false; + if (TransactionIdPrecedes(xid, *NewRelfrozenxid)) + { + /* won't be frozen, but older than current NewRelfrozenxid */ + *NewRelfrozenxid = xid; + } + } } else if ((tuple->t_infomask & HEAP_XMAX_INVALID) || !TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple))) @@ -6569,6 +6638,9 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * was removed in PostgreSQL 9.0. Note that if we were to respect * cutoff_xid here, we'd need to make surely to clear totally_frozen * when we skipped freezing on that basis. + * + * Since we always freeze here, NewRelfrozenxid doesn't need to be + * maintained. */ if (TransactionIdIsNormal(xid)) { @@ -6646,11 +6718,14 @@ heap_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple frz; bool do_freeze; bool tuple_totally_frozen; + TransactionId NewRelfrozenxid = FirstNormalTransactionId; + MultiXactId NewRelminmxid = FirstMultiXactId; do_freeze = heap_prepare_freeze_tuple(tuple, relfrozenxid, relminmxid, cutoff_xid, cutoff_multi, - &frz, &tuple_totally_frozen); + &frz, &tuple_totally_frozen, + &NewRelfrozenxid, &NewRelminmxid); /* * Note that because this is not a WAL-logged operation, we don't need to @@ -7080,6 +7155,15 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) * Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac) * are older than the specified cutoff XID or MultiXactId. If so, return true. * + * Also maintains *NewRelfrozenxid and *NewRelminmxid, which are the current + * target relfrozenxid and relminmxid for the relation. Assumption is that + * caller will never freeze any of the XIDs from the tuple, even when we say + * that they should. If caller opts to go with our recommendation to freeze, + * then it must account for the fact that it shouldn't trust how we've set + * NewRelfrozenxid/NewRelminmxid. (In practice aggressive VACUUMs always take + * our recommendation because they must, and non-aggressive VACUUMs always opt + * to not freeze, preferring to ratchet back NewRelfrozenxid instead). + * * It doesn't matter whether the tuple is alive or dead, we are checking * to see if a tuple needs to be removed or frozen to avoid wraparound. * @@ -7088,74 +7172,86 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) */ bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, - MultiXactId cutoff_multi, Buffer buf) + MultiXactId cutoff_multi, + TransactionId *NewRelfrozenxid, + MultiXactId *NewRelminmxid, Buffer buf) { TransactionId xid; + bool needs_freeze = false; xid = HeapTupleHeaderGetXmin(tuple); - if (TransactionIdIsNormal(xid) && - TransactionIdPrecedes(xid, cutoff_xid)) - return true; + if (TransactionIdIsNormal(xid)) + { + if (TransactionIdPrecedes(xid, *NewRelfrozenxid)) + *NewRelfrozenxid = xid; + if (TransactionIdPrecedes(xid, cutoff_xid)) + needs_freeze = true; + } /* * The considerations for multixacts are complicated; look at * heap_prepare_freeze_tuple for justifications. This routine had better * be in sync with that one! + * + * (Actually, we maintain NewRelminmxid differently here, because we + * assume that XIDs that should be frozen according to cutoff_xid won't + * be, whereas heap_prepare_freeze_tuple makes the opposite assumption.) */ if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { MultiXactId multi; + MultiXactMember *members; + int nmembers; multi = HeapTupleHeaderGetRawXmax(tuple); - if (!MultiXactIdIsValid(multi)) - { - /* no xmax set, ignore */ - ; - } - else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask)) + if (MultiXactIdIsValid(multi) && + MultiXactIdPrecedes(multi, *NewRelminmxid)) + *NewRelminmxid = multi; + + if (HEAP_LOCKED_UPGRADED(tuple->t_infomask)) return true; else if (MultiXactIdPrecedes(multi, cutoff_multi)) - return true; - else + needs_freeze = true; + + /* need to check whether any member of the mxact is too old */ + nmembers = GetMultiXactIdMembers(multi, &members, false, + HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); + + for (int i = 0; i < nmembers; i++) { - MultiXactMember *members; - int nmembers; - int i; - - /* need to check whether any member of the mxact is too old */ - - nmembers = GetMultiXactIdMembers(multi, &members, false, - HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); - - for (i = 0; i < nmembers; i++) - { - if (TransactionIdPrecedes(members[i].xid, cutoff_xid)) - { - pfree(members); - return true; - } - } - if (nmembers > 0) - pfree(members); + if (TransactionIdPrecedes(members[i].xid, cutoff_xid)) + needs_freeze = true; + if (TransactionIdPrecedes(members[i].xid, *NewRelfrozenxid)) + *NewRelfrozenxid = xid; } + if (nmembers > 0) + pfree(members); } else { xid = HeapTupleHeaderGetRawXmax(tuple); - if (TransactionIdIsNormal(xid) && - TransactionIdPrecedes(xid, cutoff_xid)) - return true; + if (TransactionIdIsNormal(xid)) + { + if (TransactionIdPrecedes(xid, *NewRelfrozenxid)) + *NewRelfrozenxid = xid; + if (TransactionIdPrecedes(xid, cutoff_xid)) + needs_freeze = true; + } } if (tuple->t_infomask & HEAP_MOVED) { xid = HeapTupleHeaderGetXvac(tuple); - if (TransactionIdIsNormal(xid) && - TransactionIdPrecedes(xid, cutoff_xid)) - return true; + if (TransactionIdIsNormal(xid)) + { + if (TransactionIdPrecedes(xid, *NewRelfrozenxid)) + *NewRelfrozenxid = xid; + if (TransactionIdPrecedes(xid, cutoff_xid)) + needs_freeze = true; + } } - return false; + return needs_freeze; } /* diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index d57055674..d481a300b 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -172,8 +172,10 @@ typedef struct LVRelState /* VACUUM operation's cutoff for freezing XIDs and MultiXactIds */ TransactionId FreezeLimit; MultiXactId MultiXactCutoff; - /* Are FreezeLimit/MultiXactCutoff still valid? */ - bool freeze_cutoffs_valid; + + /* Track new pg_class.relfrozenxid/pg_class.relminmxid values */ + TransactionId NewRelfrozenxid; + MultiXactId NewRelminmxid; /* Error reporting state */ char *relnamespace; @@ -330,6 +332,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, PgStat_Counter startreadtime = 0; PgStat_Counter startwritetime = 0; TransactionId OldestXmin; + MultiXactId OldestMxact; TransactionId FreezeLimit; MultiXactId MultiXactCutoff; @@ -365,8 +368,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, params->freeze_table_age, params->multixact_freeze_min_age, params->multixact_freeze_table_age, - &OldestXmin, &FreezeLimit, - &MultiXactCutoff); + &OldestXmin, &OldestMxact, + &FreezeLimit, &MultiXactCutoff); skipwithvm = true; if (params->options & VACOPT_DISABLE_PAGE_SKIPPING) @@ -473,8 +476,10 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, vacrel->OldestXmin = OldestXmin; vacrel->FreezeLimit = FreezeLimit; vacrel->MultiXactCutoff = MultiXactCutoff; - /* Track if cutoffs became invalid (possible in !aggressive case only) */ - vacrel->freeze_cutoffs_valid = true; + + /* Initialize values used to advance relfrozenxid/relminmxid at the end */ + vacrel->NewRelfrozenxid = OldestXmin; + vacrel->NewRelminmxid = OldestMxact; /* * Call lazy_scan_heap to perform all required heap pruning, index @@ -527,16 +532,18 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * Aggressive VACUUM must reliably advance relfrozenxid (and relminmxid). * We are able to advance relfrozenxid in a non-aggressive VACUUM too, * provided we didn't skip any all-visible (not all-frozen) pages using - * the visibility map, and assuming that we didn't fail to get a cleanup - * lock that made it unsafe with respect to FreezeLimit (or perhaps our - * MultiXactCutoff) established for VACUUM operation. + * the visibility map. A non-aggressive VACUUM might only be able to + * advance relfrozenxid to an XID from before FreezeLimit (or a relminmxid + * from before MultiXactCutoff) when it wasn't possible to freeze some + * tuples due to our inability to acquire a cleanup lock, but the effect + * is usually insignificant -- NewRelfrozenxid value still has a decent + * chance of being much more recent that the existing relfrozenxid. * * NB: We must use orig_rel_pages, not vacrel->rel_pages, since we want * the rel_pages used by lazy_scan_heap, which won't match when we * happened to truncate the relation afterwards. */ - if (vacrel->scanned_pages + vacrel->frozenskipped_pages < orig_rel_pages || - !vacrel->freeze_cutoffs_valid) + if (vacrel->scanned_pages + vacrel->frozenskipped_pages < orig_rel_pages) { /* Cannot advance relfrozenxid/relminmxid */ Assert(!aggressive); @@ -548,11 +555,23 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, } else { + /* + * Aggressive case is strictly required to advance relfrozenxid, at + * least up to FreezeLimit (same applies with relminmxid and its + * cutoff, MultiXactCutoff). Assert that we got this right now. + */ Assert(vacrel->scanned_pages + vacrel->frozenskipped_pages == orig_rel_pages); + Assert(!aggressive || + TransactionIdPrecedesOrEquals(FreezeLimit, + vacrel->NewRelfrozenxid)); + Assert(!aggressive || + MultiXactIdPrecedesOrEquals(MultiXactCutoff, + vacrel->NewRelminmxid)); + vac_update_relstats(rel, new_rel_pages, new_live_tuples, new_rel_allvisible, vacrel->nindexes > 0, - FreezeLimit, MultiXactCutoff, + vacrel->NewRelfrozenxid, vacrel->NewRelminmxid, &frozenxid_updated, &minmulti_updated, false); } @@ -657,17 +676,17 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, OldestXmin, diff); if (frozenxid_updated) { - diff = (int32) (FreezeLimit - vacrel->relfrozenxid); + diff = (int32) (vacrel->NewRelfrozenxid - vacrel->relfrozenxid); appendStringInfo(&buf, _("new relfrozenxid: %u, which is %d xids ahead of previous value\n"), - FreezeLimit, diff); + vacrel->NewRelfrozenxid, diff); } if (minmulti_updated) { - diff = (int32) (MultiXactCutoff - vacrel->relminmxid); + diff = (int32) (vacrel->NewRelminmxid - vacrel->relminmxid); appendStringInfo(&buf, _("new relminmxid: %u, which is %d mxids ahead of previous value\n"), - MultiXactCutoff, diff); + vacrel->NewRelminmxid, diff); } if (orig_rel_pages > 0) { @@ -1579,6 +1598,8 @@ lazy_scan_prune(LVRelState *vacrel, int nfrozen; OffsetNumber deadoffsets[MaxHeapTuplesPerPage]; xl_heap_freeze_tuple frozen[MaxHeapTuplesPerPage]; + TransactionId NewRelfrozenxid; + MultiXactId NewRelminmxid; Assert(BufferGetBlockNumber(buf) == blkno); @@ -1587,6 +1608,8 @@ lazy_scan_prune(LVRelState *vacrel, retry: /* Initialize (or reset) page-level counters */ + NewRelfrozenxid = vacrel->NewRelfrozenxid; + NewRelminmxid = vacrel->NewRelminmxid; tuples_deleted = 0; lpdead_items = 0; recently_dead_tuples = 0; @@ -1796,7 +1819,9 @@ retry: vacrel->FreezeLimit, vacrel->MultiXactCutoff, &frozen[nfrozen], - &tuple_totally_frozen)) + &tuple_totally_frozen, + &NewRelfrozenxid, + &NewRelminmxid)) { /* Will execute freeze below */ frozen[nfrozen++].offset = offnum; @@ -1810,13 +1835,16 @@ retry: prunestate->all_frozen = false; } + vacrel->offnum = InvalidOffsetNumber; + /* * We have now divided every item on the page into either an LP_DEAD item * that will need to be vacuumed in indexes later, or a LP_NORMAL tuple * that remains and needs to be considered for freezing now (LP_UNUSED and * LP_REDIRECT items also remain, but are of no further interest to us). */ - vacrel->offnum = InvalidOffsetNumber; + vacrel->NewRelfrozenxid = NewRelfrozenxid; + vacrel->NewRelminmxid = NewRelminmxid; /* * Consider the need to freeze any items with tuple storage from the page @@ -1969,6 +1997,8 @@ lazy_scan_noprune(LVRelState *vacrel, missed_dead_tuples; HeapTupleHeader tupleheader; OffsetNumber deadoffsets[MaxHeapTuplesPerPage]; + TransactionId NewRelfrozenxid = vacrel->NewRelfrozenxid; + MultiXactId NewRelminmxid = vacrel->NewRelminmxid; Assert(BufferGetBlockNumber(buf) == blkno); @@ -2015,7 +2045,8 @@ lazy_scan_noprune(LVRelState *vacrel, tupleheader = (HeapTupleHeader) PageGetItem(page, itemid); if (heap_tuple_needs_freeze(tupleheader, vacrel->FreezeLimit, - vacrel->MultiXactCutoff, buf)) + vacrel->MultiXactCutoff, + &NewRelfrozenxid, &NewRelminmxid, buf)) { if (vacrel->aggressive) { @@ -2025,10 +2056,12 @@ lazy_scan_noprune(LVRelState *vacrel, } /* - * Current non-aggressive VACUUM operation definitely won't be - * able to advance relfrozenxid or relminmxid + * A non-aggressive VACUUM doesn't have to wait on a cleanup lock + * to ensure that it advances relfrozenxid to a sufficiently + * recent XID that happens to be present on this page. It can + * just accept an older New/final relfrozenxid instead. There is + * a decent chance that the problem will go away naturally. */ - vacrel->freeze_cutoffs_valid = false; } num_tuples++; @@ -2078,6 +2111,14 @@ lazy_scan_noprune(LVRelState *vacrel, vacrel->offnum = InvalidOffsetNumber; + /* + * We have committed to not freezing the tuples on this page (always + * happens with a non-aggressive VACUUM), so make sure that the target + * relfrozenxid/relminmxid values reflect the XIDs/MXIDs we encountered + */ + vacrel->NewRelfrozenxid = NewRelfrozenxid; + vacrel->NewRelminmxid = NewRelminmxid; + /* * Now save details of the LP_DEAD items from the page in vacrel (though * only when VACUUM uses two-pass strategy) diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 02a7e94bf..a7e988298 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -767,6 +767,7 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, TupleDesc oldTupDesc PG_USED_FOR_ASSERTS_ONLY; TupleDesc newTupDesc PG_USED_FOR_ASSERTS_ONLY; TransactionId OldestXmin; + MultiXactId oldestMxact; TransactionId FreezeXid; MultiXactId MultiXactCutoff; bool use_sort; @@ -856,8 +857,8 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, * Since we're going to rewrite the whole table anyway, there's no reason * not to be aggressive about this. */ - vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, - &OldestXmin, &FreezeXid, &MultiXactCutoff); + vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, &OldestXmin, &oldestMxact, + &FreezeXid, &MultiXactCutoff); /* * FreezeXid will become the table's new relfrozenxid, and that mustn't go diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index b6767a5ff..d71ff21b1 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -945,14 +945,26 @@ get_all_vacuum_rels(int options) * The output parameters are: * - oldestXmin is the Xid below which tuples deleted by any xact (that * committed) should be considered DEAD, not just RECENTLY_DEAD. - * - freezeLimit is the Xid below which all Xids are replaced by - * FrozenTransactionId during vacuum. - * - multiXactCutoff is the value below which all MultiXactIds are removed - * from Xmax. + * - oldestMxact is the Mxid below which MultiXacts are definitely not + * seen as visible by any running transaction. + * - freezeLimit is the Xid below which all Xids are definitely replaced by + * FrozenTransactionId during aggressive vacuums. + * - multiXactCutoff is the value below which all MultiXactIds are definitely + * removed from Xmax during aggressive vacuums. * * Return value indicates if vacuumlazy.c caller should make its VACUUM * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to - * FreezeLimit, and relminmxid up to multiXactCutoff. + * FreezeLimit (at a minimum), and relminmxid up to multiXactCutoff (at a + * minimum). + * + * oldestXmin and oldestMxact can be thought of as the most recent values that + * can ever be passed to vac_update_relstats() as frozenxid and minmulti + * arguments. These exact values can be used when no newer XIDs or MultiXacts + * remain in the heap relation (e.g., with an empty table). It's typical for + * vacuumlazy.c caller to notice that older XIDs/Multixacts remain in the + * table, which will force it to use the oldest extant values when it calls + * vac_update_relstats(). Ideally these values won't be very far behind the + * "optimal" oldestXmin and oldestMxact values we provide. */ bool vacuum_set_xid_limits(Relation rel, @@ -961,6 +973,7 @@ vacuum_set_xid_limits(Relation rel, int multixact_freeze_min_age, int multixact_freeze_table_age, TransactionId *oldestXmin, + MultiXactId *oldestMxact, TransactionId *freezeLimit, MultiXactId *multiXactCutoff) { @@ -969,7 +982,6 @@ vacuum_set_xid_limits(Relation rel, int effective_multixact_freeze_max_age; TransactionId limit; TransactionId safeLimit; - MultiXactId oldestMxact; MultiXactId mxactLimit; MultiXactId safeMxactLimit; int freezetable; @@ -1065,9 +1077,11 @@ vacuum_set_xid_limits(Relation rel, effective_multixact_freeze_max_age / 2); Assert(mxid_freezemin >= 0); + /* Remember for caller */ + *oldestMxact = GetOldestMultiXactId(); + /* compute the cutoff multi, being careful to generate a valid value */ - oldestMxact = GetOldestMultiXactId(); - mxactLimit = oldestMxact - mxid_freezemin; + mxactLimit = *oldestMxact - mxid_freezemin; if (mxactLimit < FirstMultiXactId) mxactLimit = FirstMultiXactId; @@ -1082,8 +1096,8 @@ vacuum_set_xid_limits(Relation rel, (errmsg("oldest multixact is far in the past"), errhint("Close open transactions with multixacts soon to avoid wraparound problems."))); /* Use the safe limit, unless an older mxact is still running */ - if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit)) - mxactLimit = oldestMxact; + if (MultiXactIdPrecedes(*oldestMxact, safeMxactLimit)) + mxactLimit = *oldestMxact; else mxactLimit = safeMxactLimit; } -- 2.30.2