From 00bb5187dba75601c8d3d37db9ce8b56e377a742 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Sat, 8 Mar 2025 01:15:08 +0100 Subject: [PATCH v12 3/5] SP-GIST: Fix visibility issues in IOS Previously, SP-GIST IOS could buffer tuples from pages while VACUUM came along and cleaned up an ALL_DEAD tuple, marking the tuple's page ALL_VISIBLE again and making IOS mistakenly believe the tuple is indeed visible. With this patch, pins now conflict with SP-GIST vacuum, and we now do preliminary visibility checks to be used by IOS so that the IOS infrastructure knows to recheck the heap page even if that page is now ALL_VISIBLE. Idea from Heikki Linnakangas --- src/backend/access/spgist/spgscan.c | 182 ++++++++++++++++++++++++-- src/backend/access/spgist/spgvacuum.c | 2 +- src/include/access/spgist_private.h | 9 +- 3 files changed, 179 insertions(+), 14 deletions(-) diff --git a/src/backend/access/spgist/spgscan.c b/src/backend/access/spgist/spgscan.c index 25893050c58..42784fdc4ca 100644 --- a/src/backend/access/spgist/spgscan.c +++ b/src/backend/access/spgist/spgscan.c @@ -30,7 +30,8 @@ typedef void (*storeRes_func) (SpGistScanOpaque so, ItemPointer heapPtr, Datum leafValue, bool isNull, SpGistLeafTuple leafTuple, bool recheck, - bool recheckDistances, double *distances); + bool recheckDistances, double *distances, + TMVC_Result visrecheck); /* * Pairing heap comparison function for the SpGistSearchItem queue. @@ -142,6 +143,7 @@ spgAddStartItem(SpGistScanOpaque so, bool isnull) startEntry->traversalValue = NULL; startEntry->recheck = false; startEntry->recheckDistances = false; + startEntry->visrecheck = TMVC_Unchecked; spgAddSearchItemToQueue(so, startEntry); } @@ -386,6 +388,19 @@ spgrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, if (scankey && scan->numberOfKeys > 0) memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData)); + /* prepare index-only scan requirements */ + so->nReorderThisPage = 0; + if (scan->xs_want_itup) + { + if (so->visrecheck == NULL) + so->visrecheck = palloc(MaxIndexTuplesPerPage); + + if (scan->numberOfOrderBys > 0 && so->items == NULL) + { + so->items = palloc_array(SpGistSearchItem *, MaxIndexTuplesPerPage); + } + } + /* initialize order-by data if needed */ if (orderbys && scan->numberOfOrderBys > 0) { @@ -453,6 +468,9 @@ spgendscan(IndexScanDesc scan) pfree(scan->xs_orderbynulls); } + if (BufferIsValid(so->vmbuf)) + ReleaseBuffer(so->vmbuf); + pfree(so); } @@ -502,6 +520,7 @@ spgNewHeapItem(SpGistScanOpaque so, int level, SpGistLeafTuple leafTuple, item->isLeaf = true; item->recheck = recheck; item->recheckDistances = recheckDistances; + item->visrecheck = TMVC_Unchecked; return item; } @@ -584,6 +603,14 @@ spgLeafTest(SpGistScanOpaque so, SpGistSearchItem *item, isnull, distances); + if (so->want_itup) + { + Assert(PointerIsValid(so->items)); + + so->items[so->nReorderThisPage] = heapItem; + so->nReorderThisPage++; + } + spgAddSearchItemToQueue(so, heapItem); MemoryContextSwitchTo(oldCxt); @@ -593,7 +620,7 @@ spgLeafTest(SpGistScanOpaque so, SpGistSearchItem *item, /* non-ordered scan, so report the item right away */ Assert(!recheckDistances); storeRes(so, &leafTuple->heapPtr, leafValue, isnull, - leafTuple, recheck, false, NULL); + leafTuple, recheck, false, NULL, TMVC_Unchecked); *reportedSome = true; } } @@ -806,6 +833,88 @@ spgTestLeafTuple(SpGistScanOpaque so, return SGLT_GET_NEXTOFFSET(leafTuple); } +/* + * Pupulate so->visrecheck based on tuples which are cached for a currently + * pinned page. + */ +static void +spgPopulateUnorderedVischecks(IndexScanDesc scan, SpGistScanOpaqueData *so) +{ + TM_IndexVisibilityCheckOp op; + + Assert(scan->numberOfOrderBys == 0); + + if (so->nPtrs == 0) + return; + + op.checkntids = so->nPtrs; + op.checktids = palloc_array(TM_VisCheck, so->nPtrs); + op.vmbuf = &so->vmbuf; + + for (int i = 0; i < op.checkntids; i++) + { + Assert(ItemPointerIsValid(&so->heapPtrs[i])); + + PopulateTMVischeck(&op.checktids[i], &so->heapPtrs[i], i); + } + + table_index_vischeck_tuples(scan->heapRelation, &op); + + for (int i = 0; i < op.checkntids; i++) + { + TM_VisCheck *check = &op.checktids[i]; + + Assert(check->tidblkno == ItemPointerGetBlockNumberNoCheck(&so->heapPtrs[check->idxoffnum])); + Assert(check->tidoffset == ItemPointerGetOffsetNumberNoCheck(&so->heapPtrs[check->idxoffnum])); + Assert(check->idxoffnum < op.checkntids); + + so->visrecheck[check->idxoffnum] = check->vischeckresult; + } + + pfree(op.checktids); +} + +/* pupulate so->visrecheck based on current cached tuples */ +static void +spgPopulateOrderedVisChecks(IndexScanDesc scan, SpGistScanOpaqueData *so) +{ + TM_IndexVisibilityCheckOp op; + + if (so->nReorderThisPage == 0) + return; + + Assert(so->nReorderThisPage > 0); + Assert(scan->numberOfOrderBys > 0); + Assert(PointerIsValid(so->items)); + + op.checkntids = so->nReorderThisPage; + op.checktids = palloc_array(TM_VisCheck, so->nReorderThisPage); + op.vmbuf = &so->vmbuf; + + for (int i = 0; i < op.checkntids; i++) + { + PopulateTMVischeck(&op.checktids[i], &so->items[i]->heapPtr, i); + Assert(ItemPointerIsValid(&so->items[i]->heapPtr)); + Assert(so->items[i]->isLeaf); + } + + table_index_vischeck_tuples(scan->heapRelation, &op); + + for (int i = 0; i < op.checkntids; i++) + { + TM_VisCheck *check = &op.checktids[i]; + + Assert(check->idxoffnum < op.checkntids); + Assert(check->tidblkno == ItemPointerGetBlockNumberNoCheck(&so->items[check->idxoffnum]->heapPtr)); + Assert(check->tidoffset == ItemPointerGetOffsetNumberNoCheck(&so->items[check->idxoffnum]->heapPtr)); + + so->items[check->idxoffnum]->visrecheck = check->vischeckresult; + } + + pfree(op.checktids); + so->nReorderThisPage = 0; +} + /* * Walk the tree and report all tuples passing the scan quals to the storeRes * subroutine. @@ -814,8 +923,8 @@ spgTestLeafTuple(SpGistScanOpaque so, * next page boundary once we have reported at least one tuple. */ static void -spgWalk(Relation index, SpGistScanOpaque so, bool scanWholeIndex, - storeRes_func storeRes) +spgWalk(IndexScanDesc scan, Relation index, SpGistScanOpaque so, + bool scanWholeIndex, storeRes_func storeRes) { Buffer buffer = InvalidBuffer; bool reportedSome = false; @@ -835,9 +944,23 @@ redirect: { /* We store heap items in the queue only in case of ordered search */ Assert(so->numberOfNonNullOrderBys > 0); + + /* + * If an item we found on a page is retrieved immediately after + * processing that page, we won't yet have released the page pin, + * and thus won't yet have processed the visibility data of the + * page's (now) ordered tuples. + * Do that now, so that all tuples on the page we're about to + * unpin were checked for visibility before we returned any. + */ + if (so->want_itup && so->nReorderThisPage) + spgPopulateOrderedVisChecks(scan, so); + + Assert(!so->want_itup || item->visrecheck != TMVC_Unchecked); storeRes(so, &item->heapPtr, item->value, item->isNull, item->leafTuple, item->recheck, - item->recheckDistances, item->distances); + item->recheckDistances, item->distances, + item->visrecheck); reportedSome = true; } else @@ -854,7 +977,15 @@ redirect: } else if (blkno != BufferGetBlockNumber(buffer)) { - UnlockReleaseBuffer(buffer); + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + + Assert(so->numberOfOrderBys >= 0); + if (so->numberOfOrderBys == 0) + spgPopulateUnorderedVischecks(scan, so); + else + spgPopulateOrderedVisChecks(scan, so); + + ReleaseBuffer(buffer); buffer = ReadBuffer(index, blkno); LockBuffer(buffer, BUFFER_LOCK_SHARE); } @@ -922,16 +1053,36 @@ redirect: } if (buffer != InvalidBuffer) - UnlockReleaseBuffer(buffer); -} + { + /* Unlock the buffer for concurrent accesses except VACUUM */ + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + /* + * If we're in an index-only scan, pre-check visibility of the tuples, + * so we can drop the pin without causing visibility bugs. + */ + if (so->want_itup) + { + Assert(scan->numberOfOrderBys >= 0); + + if (scan->numberOfOrderBys == 0) + spgPopulateUnorderedVischecks(scan, so); + else + spgPopulateOrderedVisChecks(scan, so); + } + + /* Release the page */ + ReleaseBuffer(buffer); + } +} /* storeRes subroutine for getbitmap case */ static void storeBitmap(SpGistScanOpaque so, ItemPointer heapPtr, Datum leafValue, bool isnull, SpGistLeafTuple leafTuple, bool recheck, - bool recheckDistances, double *distances) + bool recheckDistances, double *distances, + TMVC_Result visres) { Assert(!recheckDistances && !distances); tbm_add_tuples(so->tbm, heapPtr, 1, recheck); @@ -949,7 +1100,7 @@ spggetbitmap(IndexScanDesc scan, TIDBitmap *tbm) so->tbm = tbm; so->ntids = 0; - spgWalk(scan->indexRelation, so, true, storeBitmap); + spgWalk(scan, scan->indexRelation, so, true, storeBitmap); return so->ntids; } @@ -959,12 +1110,15 @@ static void storeGettuple(SpGistScanOpaque so, ItemPointer heapPtr, Datum leafValue, bool isnull, SpGistLeafTuple leafTuple, bool recheck, - bool recheckDistances, double *nonNullDistances) + bool recheckDistances, double *nonNullDistances, + TMVC_Result visres) { Assert(so->nPtrs < MaxIndexTuplesPerPage); so->heapPtrs[so->nPtrs] = *heapPtr; so->recheck[so->nPtrs] = recheck; so->recheckDistances[so->nPtrs] = recheckDistances; + if (so->want_itup) + so->visrecheck[so->nPtrs] = visres; if (so->numberOfOrderBys > 0) { @@ -1041,6 +1195,10 @@ spggettuple(IndexScanDesc scan, ScanDirection dir) scan->xs_heaptid = so->heapPtrs[so->iPtr]; scan->xs_recheck = so->recheck[so->iPtr]; scan->xs_hitup = so->reconTups[so->iPtr]; + if (so->want_itup) + scan->xs_visrecheck = so->visrecheck[so->iPtr]; + + Assert(!scan->xs_want_itup || scan->xs_visrecheck != TMVC_Unchecked); if (so->numberOfOrderBys > 0) index_store_float8_orderby_distances(scan, so->orderByTypes, @@ -1070,7 +1228,7 @@ spggettuple(IndexScanDesc scan, ScanDirection dir) } so->iPtr = so->nPtrs = 0; - spgWalk(scan->indexRelation, so, false, storeGettuple); + spgWalk(scan, scan->indexRelation, so, false, storeGettuple); if (so->nPtrs == 0) break; /* must have completed scan */ diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c index 81171f35451..04836a29304 100644 --- a/src/backend/access/spgist/spgvacuum.c +++ b/src/backend/access/spgist/spgvacuum.c @@ -625,7 +625,7 @@ spgvacuumpage(spgBulkDeleteState *bds, Buffer buffer) BlockNumber blkno = BufferGetBlockNumber(buffer); Page page; - LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + LockBufferForCleanup(buffer); page = (Page) BufferGetPage(buffer); if (PageIsNew(page)) diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h index cb43a278f46..63e970468c7 100644 --- a/src/include/access/spgist_private.h +++ b/src/include/access/spgist_private.h @@ -21,6 +21,7 @@ #include "storage/buf.h" #include "utils/geo_decls.h" #include "utils/relcache.h" +#include "tableam.h" typedef struct SpGistOptions @@ -175,7 +176,7 @@ typedef struct SpGistSearchItem bool isLeaf; /* SearchItem is heap item */ bool recheck; /* qual recheck is needed */ bool recheckDistances; /* distance recheck is needed */ - + uint8 visrecheck; /* IOS: TMVC_Result of contained heap tuple */ /* array with numberOfOrderBys entries */ double distances[FLEXIBLE_ARRAY_MEMBER]; } SpGistSearchItem; @@ -223,6 +224,7 @@ typedef struct SpGistScanOpaqueData /* These fields are only used in amgettuple scans: */ bool want_itup; /* are we reconstructing tuples? */ + Buffer vmbuf; /* IOS: used for table_index_vischeck_tuples */ TupleDesc reconTupDesc; /* if so, descriptor for reconstructed tuples */ int nPtrs; /* number of TIDs found on current page */ int iPtr; /* index for scanning through same */ @@ -235,6 +237,11 @@ typedef struct SpGistScanOpaqueData /* distances (for recheck) */ IndexOrderByDistance *distances[MaxIndexTuplesPerPage]; + /* support for IOS */ + int nReorderThisPage; + uint8 *visrecheck; /* IOS vis check results, counted by nPtrs */ + SpGistSearchItem **items; /* counted by nReorderThisPage */ + /* * Note: using MaxIndexTuplesPerPage above is a bit hokey since * SpGistLeafTuples aren't exactly IndexTuples; however, they are larger, -- 2.48.1