From 346eb3fb59ba39dd36d88bc5239ec14d2435412b Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Sat, 20 Dec 2025 00:39:50 +0100 Subject: [PATCH v14 4/8] IOS: Support tableAM-powered prechecked visibility statuses Previously, we assumed VM_ALL_VISIBLE(...) is universal across all AMs. Now, we use the new table_index_vischeck_tuples API, and allow index AMs to do this check as well. This removes the need for index AMs to hold on to pinned pages in index-only scans, as long as they do the visibility checks on the tuples they pull from the pages whilst they still have a pin on the page. selfuncs.c's get_actual_variable_endpoint() is similarly updated to use this new visibility checking infrastructure. Future commits will implement this new infrastructure in gist, sp-gist, and nbtree. --- src/backend/access/index/indexam.c | 6 ++ src/backend/executor/nodeIndexonlyscan.c | 80 +++++++++++++++--------- src/backend/utils/adt/selfuncs.c | 74 +++++++++++++--------- src/include/access/relscan.h | 2 + 4 files changed, 104 insertions(+), 58 deletions(-) diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 0492d92d23b..c9817e6004c 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -638,6 +638,12 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction) /* XXX: we should assert that a snapshot is pushed or registered */ Assert(TransactionIdIsValid(RecentXmin)); + /* + * Reset xs_visrecheck, so we don't confuse the next tuple's visibility + * state with that of the previous. + */ + scan->xs_visrecheck = TMVC_Unchecked; + /* * The AM's amgettuple proc finds the next index entry matching the scan * keys, and puts the TID into scan->xs_heaptid. It should also set diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 6bea42f128f..e8811f9a3c1 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -121,6 +121,7 @@ IndexOnlyNext(IndexOnlyScanState *node) while ((tid = index_getnext_tid(scandesc, direction)) != NULL) { bool tuple_from_heap = false; + TMVC_Result vischeck = scandesc->xs_visrecheck; CHECK_FOR_INTERRUPTS(); @@ -128,6 +129,9 @@ IndexOnlyNext(IndexOnlyScanState *node) * We can skip the heap fetch if the TID references a heap page on * which all tuples are known visible to everybody. In any case, * we'll use the index tuple not the heap tuple as the data source. + * The index may have already pre-checked the visibility of the tuple + * for us and stored the result in xs_visrecheck. In those cases, we + * can skip checking the visibility status. * * Note on Memory Ordering Effects: visibilitymap_get_status does not * lock the visibility map buffer, and therefore the result we read @@ -157,37 +161,57 @@ IndexOnlyNext(IndexOnlyScanState *node) * * It's worth going through this complexity to avoid needing to lock * the VM buffer, which could cause significant contention. + * + * The index doing these checks for us doesn't materially change these + * considerations. */ - if (!VM_ALL_VISIBLE(scandesc->heapRelation, - ItemPointerGetBlockNumber(tid), - &node->ioss_VMBuffer)) - { - /* - * Rats, we have to visit the heap to check visibility. - */ - InstrCountTuples2(node, 1); - if (!index_fetch_heap(scandesc, node->ioss_TableSlot)) - continue; /* no visible tuple, try next index entry */ + if (vischeck == TMVC_Unchecked) + vischeck = table_index_vischeck_tuple(scandesc->heapRelation, + &node->ioss_VMBuffer, + tid); - ExecClearTuple(node->ioss_TableSlot); - - /* - * Only MVCC snapshots are supported here, so there should be no - * need to keep following the HOT chain once a visible entry has - * been found. If we did want to allow that, we'd need to keep - * more state to remember not to call index_getnext_tid next time. - */ - if (scandesc->xs_heap_continue) - elog(ERROR, "non-MVCC snapshots are not supported in index-only scans"); + Assert(vischeck != TMVC_Unchecked); - /* - * Note: at this point we are holding a pin on the heap page, as - * recorded in scandesc->xs_cbuf. We could release that pin now, - * but it's not clear whether it's a win to do so. The next index - * entry might require a visit to the same heap page. - */ - - tuple_from_heap = true; + switch (vischeck) + { + case TMVC_Unchecked: + elog(ERROR, "Failed to check visibility for tuple"); + break; + case TMVC_Visible: + /* no further checks required here */ + break; + case TMVC_MaybeVisible: + { + /* + * Rats, we have to visit the heap to check visibility. + */ + InstrCountTuples2(node, 1); + if (!index_fetch_heap(scandesc, node->ioss_TableSlot)) + continue; /* no visible tuple, try next index entry */ + + ExecClearTuple(node->ioss_TableSlot); + + /* + * Only MVCC snapshots are supported here, so there should be + * no need to keep following the HOT chain once a visible + * entry has been found. If we did want to allow that, we'd + * need to keep more state to remember not to call + * index_getnext_tid next time. + */ + if (scandesc->xs_heap_continue) + elog(ERROR, "non-MVCC snapshots are not supported in index-only scans"); + + /* + * Note: at this point we are holding a pin on the heap page, + * as recorded in scandesc->xs_cbuf. We could release that + * pin now, but it's not clear whether it's a win to do so. + * The next index entry might require a visit to the same heap + * page. + */ + + tuple_from_heap = true; + break; + } } /* diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index c760b19db55..f01adcd6f71 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -7109,44 +7109,58 @@ get_actual_variable_endpoint(Relation heapRel, while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL) { BlockNumber block = ItemPointerGetBlockNumber(tid); + TMVC_Result visres = index_scan->xs_visrecheck; - if (!VM_ALL_VISIBLE(heapRel, - block, - &vmbuffer)) + if (visres == TMVC_Unchecked) + visres = table_index_vischeck_tuple(heapRel, &vmbuffer, tid); + + Assert(visres != TMVC_Unchecked); + + switch (visres) { - /* Rats, we have to visit the heap to check visibility */ - if (!index_fetch_heap(index_scan, tableslot)) + case TMVC_Unchecked: + elog(ERROR, "Failed to check visibility for tuple"); + case TMVC_Visible: + /* no further checks required here */ + break; + case TMVC_MaybeVisible: { - /* - * No visible tuple for this index entry, so we need to - * advance to the next entry. Before doing so, count heap - * page fetches and give up if we've done too many. - * - * We don't charge a page fetch if this is the same heap page - * as the previous tuple. This is on the conservative side, - * since other recently-accessed pages are probably still in - * buffers too; but it's good enough for this heuristic. - */ + /* Rats, we have to visit the heap to check visibility */ + if (!index_fetch_heap(index_scan, tableslot)) + { + /* + * No visible tuple for this index entry, so we need to + * advance to the next entry. Before doing so, count heap + * page fetches and give up if we've done too many. + * + * We don't charge a page fetch if this is the same heap + * page as the previous tuple. This is on the + * conservative side, since other recently-accessed pages + * are probably still in buffers too; but it's good enough + * for this heuristic. + */ #define VISITED_PAGES_LIMIT 100 - if (block != last_heap_block) - { - last_heap_block = block; - n_visited_heap_pages++; - if (n_visited_heap_pages > VISITED_PAGES_LIMIT) - break; - } + if (block != last_heap_block) + { + last_heap_block = block; + n_visited_heap_pages++; + if (n_visited_heap_pages > VISITED_PAGES_LIMIT) + break; + } - continue; /* no visible tuple, try next index entry */ - } + continue; /* no visible tuple, try next index entry */ + } - /* We don't actually need the heap tuple for anything */ - ExecClearTuple(tableslot); + /* We don't actually need the heap tuple for anything */ + ExecClearTuple(tableslot); - /* - * We don't care whether there's more than one visible tuple in - * the HOT chain; if any are visible, that's good enough. - */ + /* + * We don't care whether there's more than one visible tuple in + * the HOT chain; if any are visible, that's good enough. + */ + break; + } } /* diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 78989a959d4..76cf8d02795 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -178,6 +178,8 @@ typedef struct IndexScanDescData bool xs_recheck; /* T means scan keys must be rechecked */ + int xs_visrecheck; /* TM_VisCheckResult from tableam.h */ + /* * When fetching with an ordering operator, the values of the ORDER BY * expressions of the last returned tuple, according to the index. If -- 2.50.1 (Apple Git-155)