From 7b8f1f4e2b9b8c3067f9c4750005d3f31d2256ef Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Wed, 11 Jan 2023 18:06:51 -0800 Subject: [PATCH v4] Tighten up VACUUM's approach to setting VM bits. One of the calls to visibilitymap_set() during VACUUM's initial heap pass could theoretically set a page's all-frozen bit without also setting the page's all-visible bit, leaving the visibility map in an inconsistent state. The problem was the tacit assumption that the lazy_scan_heap local variable all_visible_according_to_vm could not become stale without that being detected afterwards, via the prunestate all_visible/all_frozen fields being set false by lazy_scan_prune. However, prunestate only indicates whether the page is now eligible to become all-visible, which doesn't reliably indicate anything about the state of the page immediately before it was scanned -- including in cases where all_visible_according_to_vm happened to be set from earlier on. In other words, lazy_scan_heap failed to account for the fact that a page that was set all-visible in the VM in the recent past may not still be all-visible in the VM by now, even when it's eligible to become all-visible now. In practice there doesn't seem to be a concrete scenario in which this is possible. It was almost possible in scenarios involving concurrent HOT updates from transactions that abort, but (unlike pruning) freezing can never remove an XID that's > VACUUM's OldestXmin, even for an XID that is known to have aborted, which was protective here. Tighten up the way that visibilitymap_set() is called: request that both the all-visible and all-frozen bits get set whenever the all-frozen bit is set, regardless of what we think we know about the present state of the all-visible bit. Also make sure that the page level PD_ALL_VISIBLE flag is set in the same code path. These issues have been around since commit a892234f83, which added the all-frozen bit to the VM fork. In passing, stop reading the existing state of the VM when setting the VM in VACUUM's final heap pass. The existing state of the visibility map is irrelevant, since pages with LP_DEAD items are not all-visible (or all-frozen) by definition, and we already know that affected pages must have had at least one LP_DEAD item before we set it LP_UNUSED. Author: Peter Geoghegan Reviewed-By: Andres Freund Discussion: https://postgr.es/m/CAH2-WznuNGSzF8v6OsgjaC5aYsb3cZ6HW6MLm30X0d65cmSH6A@mail.gmail.com --- src/backend/access/heap/vacuumlazy.c | 83 ++++++++++++++++--------- src/backend/access/heap/visibilitymap.c | 9 ++- 2 files changed, 60 insertions(+), 32 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 369451516..81698d0d3 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -260,7 +260,7 @@ static void lazy_vacuum(LVRelState *vacrel); static bool lazy_vacuum_all_indexes(LVRelState *vacrel); static void lazy_vacuum_heap_rel(LVRelState *vacrel); static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, - Buffer buffer, int index, Buffer *vmbuffer); + Buffer buffer, int index, Buffer vmbuffer); static bool lazy_check_wraparound_failsafe(LVRelState *vacrel); static void lazy_cleanup_all_indexes(LVRelState *vacrel); static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel, @@ -1040,7 +1040,7 @@ lazy_scan_heap(LVRelState *vacrel) { Size freespace; - lazy_vacuum_heap_page(vacrel, blkno, buf, 0, &vmbuffer); + lazy_vacuum_heap_page(vacrel, blkno, buf, 0, vmbuffer); /* Forget the LP_DEAD items that we just vacuumed */ dead_items->num_items = 0; @@ -1092,7 +1092,10 @@ lazy_scan_heap(LVRelState *vacrel) uint8 flags = VISIBILITYMAP_ALL_VISIBLE; if (prunestate.all_frozen) + { + Assert(!TransactionIdIsValid(prunestate.visibility_cutoff_xid)); flags |= VISIBILITYMAP_ALL_FROZEN; + } /* * It should never be the case that the visibility map page is set @@ -1120,8 +1123,8 @@ lazy_scan_heap(LVRelState *vacrel) * got cleared after lazy_scan_skip() was called, so we must recheck * with buffer lock before concluding that the VM is corrupt. */ - else if (all_visible_according_to_vm && !PageIsAllVisible(page) - && VM_ALL_VISIBLE(vacrel->rel, blkno, &vmbuffer)) + else if (all_visible_according_to_vm && !PageIsAllVisible(page) && + visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0) { elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", vacrel->relname, blkno); @@ -1164,12 +1167,27 @@ lazy_scan_heap(LVRelState *vacrel) !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer)) { /* - * We can pass InvalidTransactionId as the cutoff XID here, - * because setting the all-frozen bit doesn't cause recovery - * conflicts. + * Avoid relying on all_visible_according_to_vm as a proxy for the + * page-level PD_ALL_VISIBLE bit being set, since it might have + * become stale -- even when all_visible is set in prunestate */ + if (!PageIsAllVisible(page)) + { + PageSetAllVisible(page); + MarkBufferDirty(buf); + } + + /* + * Set the page all-frozen (and all-visible) in the VM. + * + * We can pass InvalidTransactionId as our visibility_cutoff_xid, + * since a snapshotConflictHorizon sufficient to make everything + * safe for REDO was logged when the page's tuples were frozen. + */ + Assert(!TransactionIdIsValid(prunestate.visibility_cutoff_xid)); visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr, vmbuffer, InvalidTransactionId, + VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN); } @@ -1311,7 +1329,11 @@ lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block, /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */ if (!vacrel->skipwithvm) + { + /* Caller shouldn't rely on all_visible_according_to_vm */ + *next_unskippable_allvis = false; break; + } /* * Aggressive VACUUM caller can't skip pages just because they are @@ -1818,7 +1840,11 @@ retry: * cutoff by stepping back from OldestXmin. */ if (prunestate->all_visible && prunestate->all_frozen) + { + /* Using same cutoff when setting VM is now unnecessary */ snapshotConflictHorizon = prunestate->visibility_cutoff_xid; + prunestate->visibility_cutoff_xid = InvalidTransactionId; + } else { /* Avoids false conflicts when hot_standby_feedback in use */ @@ -2417,10 +2443,18 @@ lazy_vacuum_heap_rel(LVRelState *vacrel) blkno = ItemPointerGetBlockNumber(&vacrel->dead_items->items[index]); vacrel->blkno = blkno; + + /* + * Pin the visibility map page in case we need to mark the page + * all-visible. In most cases this will be very cheap, because we'll + * already have the correct page pinned anyway. + */ + visibilitymap_pin(vacrel->rel, blkno, &vmbuffer); + buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL, vacrel->bstrategy); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); - index = lazy_vacuum_heap_page(vacrel, blkno, buf, index, &vmbuffer); + index = lazy_vacuum_heap_page(vacrel, blkno, buf, index, vmbuffer); /* Now that we've vacuumed the page, record its available space */ page = BufferGetPage(buf); @@ -2457,7 +2491,8 @@ lazy_vacuum_heap_rel(LVRelState *vacrel) * vacrel->dead_items array. * * Caller must have an exclusive buffer lock on the buffer (though a full - * cleanup lock is also acceptable). + * cleanup lock is also acceptable). vmbuffer must be valid and already have + * a pin on blkno's visibility map page. * * index is an offset into the vacrel->dead_items array for the first listed * LP_DEAD item on the page. The return value is the first index immediately @@ -2465,7 +2500,7 @@ lazy_vacuum_heap_rel(LVRelState *vacrel) */ static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, - int index, Buffer *vmbuffer) + int index, Buffer vmbuffer) { VacDeadItems *dead_items = vacrel->dead_items; Page page = BufferGetPage(buffer); @@ -2546,31 +2581,21 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, * dirty, exclusively locked, and, if needed, a full page image has been * emitted. */ + Assert(!PageIsAllVisible(page)); if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid, &all_frozen)) - PageSetAllVisible(page); - - /* - * All the changes to the heap page have been done. If the all-visible - * flag is now set, also set the VM all-visible bit (and, if possible, the - * all-frozen bit) unless this has already been done previously. - */ - if (PageIsAllVisible(page)) { - uint8 flags = 0; - uint8 vm_status = visibilitymap_get_status(vacrel->rel, - blkno, vmbuffer); + uint8 flags = VISIBILITYMAP_ALL_VISIBLE; - /* Set the VM all-frozen bit to flag, if needed */ - if ((vm_status & VISIBILITYMAP_ALL_VISIBLE) == 0) - flags |= VISIBILITYMAP_ALL_VISIBLE; - if ((vm_status & VISIBILITYMAP_ALL_FROZEN) == 0 && all_frozen) + if (all_frozen) + { + Assert(!TransactionIdIsValid(visibility_cutoff_xid)); flags |= VISIBILITYMAP_ALL_FROZEN; + } - Assert(BufferIsValid(*vmbuffer)); - if (flags != 0) - visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr, - *vmbuffer, visibility_cutoff_xid, flags); + PageSetAllVisible(page); + visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr, + vmbuffer, visibility_cutoff_xid, flags); } /* Revert to the previous phase information for error traceback */ diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 1d1ca423a..74ff01bb1 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -146,7 +146,9 @@ visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags char *map; bool cleared = false; + /* Must never clear all_visible bit while leaving all_frozen bit set */ Assert(flags & VISIBILITYMAP_VALID_BITS); + Assert(flags != VISIBILITYMAP_ALL_VISIBLE); #ifdef TRACE_VISIBILITYMAP elog(DEBUG1, "vm_clear %s %d", RelationGetRelationName(rel), heapBlk); @@ -256,8 +258,11 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, #endif Assert(InRecovery || XLogRecPtrIsInvalid(recptr)); - Assert(InRecovery || BufferIsValid(heapBuf)); + Assert(InRecovery || PageIsAllVisible((Page) BufferGetPage(heapBuf))); + + /* Must never set all_frozen bit without also setting all_visible bit */ Assert(flags & VISIBILITYMAP_VALID_BITS); + Assert(flags != VISIBILITYMAP_ALL_FROZEN); /* Check that we have the right heap page pinned, if present */ if (BufferIsValid(heapBuf) && BufferGetBlockNumber(heapBuf) != heapBlk) @@ -299,8 +304,6 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, { Page heapPage = BufferGetPage(heapBuf); - /* caller is expected to set PD_ALL_VISIBLE first */ - Assert(PageIsAllVisible(heapPage)); PageSetLSN(heapPage, recptr); } } -- 2.39.0