From 7272b43ec90a2879952befd41b776ff0949c5da0 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Tue, 11 Jun 2024 08:51:05 -0400 Subject: [PATCH v21 11/20] Make new bitmap table scan and bitmap heap scan descriptors The number of fields from the TableScanDescData required for bitmap table scans is few. Create a new BitmapTableScanDesc with only those members needed for bitmap table scans and remove the bitmap table scan-only fields from the generic TableScanDescData. Also add a BitmapHeapScanDesc structure for the heap AM implementation of bitmap table scans. Add table AM callbacks for starting, restarting, and ending these scans. While reducing the size of the scan descriptor (and the amount of code executed in the table_begin/end/rescan() functions) was a benefit, creating a new BitmapTableScanDesc and associated table AM functions to begin/end/rescan was actually motivated by work to remove all heap-specific code from the generic bitmap table scan code path. Pushing this code down into the heap AM layer would have required adding quite a few bitmap-specific parameters to heap_begin/end/rescan() --- src/backend/access/heap/heapam.c | 110 ++++++++++++++++++---- src/backend/access/heap/heapam_handler.c | 12 ++- src/backend/executor/nodeBitmapHeapscan.c | 29 +++--- src/include/access/heapam.h | 39 ++++++-- src/include/access/relscan.h | 20 +++- src/include/access/tableam.h | 83 +++++++++++----- src/include/nodes/execnodes.h | 2 +- src/tools/pgindent/typedefs.list | 2 + 8 files changed, 225 insertions(+), 72 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 82bb9cb33b6..0d8239d2f15 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1061,8 +1061,6 @@ heap_beginscan(Relation relation, Snapshot snapshot, scan->rs_base.rs_flags = flags; scan->rs_base.rs_parallel = parallel_scan; scan->rs_strategy = NULL; /* set in initscan */ - scan->rs_vmbuffer = InvalidBuffer; - scan->rs_empty_tuples_pending = 0; /* * Disable page-at-a-time mode if it's not a MVCC-safe snapshot. @@ -1178,19 +1176,6 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, if (BufferIsValid(scan->rs_cbuf)) ReleaseBuffer(scan->rs_cbuf); - if (BufferIsValid(scan->rs_vmbuffer)) - { - ReleaseBuffer(scan->rs_vmbuffer); - scan->rs_vmbuffer = InvalidBuffer; - } - - /* - * Reset rs_empty_tuples_pending, a field only used by bitmap heap scan, - * to avoid incorrectly emitting NULL-filled tuples from a previous scan - * on rescan. - */ - scan->rs_empty_tuples_pending = 0; - /* * The read stream is reset on rescan. This must be done before * initscan(), as some state referred to by read_stream_reset() is reset @@ -1218,9 +1203,6 @@ heap_endscan(TableScanDesc sscan) if (BufferIsValid(scan->rs_cbuf)) ReleaseBuffer(scan->rs_cbuf); - if (BufferIsValid(scan->rs_vmbuffer)) - ReleaseBuffer(scan->rs_vmbuffer); - /* * Must free the read stream before freeing the BufferAccessStrategy. */ @@ -1247,6 +1229,98 @@ heap_endscan(TableScanDesc sscan) pfree(scan); } +BitmapTableScanDesc * +heap_beginscan_bm(Relation relation, Snapshot snapshot, uint32 flags) +{ + BitmapHeapScanDesc *scan; + + /* + * increment relation ref count while scanning relation + * + * This is just to make really sure the relcache entry won't go away while + * the scan has a pointer to it. Caller should be holding the rel open + * anyway, so this is redundant in all normal scenarios... + */ + RelationIncrementReferenceCount(relation); + scan = (BitmapHeapScanDesc *) palloc(sizeof(BitmapHeapScanDesc)); + + scan->rs_base.rs_rd = relation; + scan->rs_base.rs_snapshot = snapshot; + scan->rs_base.rs_flags = flags; + + Assert(snapshot && IsMVCCSnapshot(snapshot)); + + /* we only need to set this up once */ + scan->rs_ctup.t_tableOid = RelationGetRelid(relation); + + scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_base.rs_rd); + + scan->rs_ctup.t_data = NULL; + ItemPointerSetInvalid(&scan->rs_ctup.t_self); + scan->rs_cbuf = InvalidBuffer; + scan->rs_cblock = InvalidBlockNumber; + + scan->rs_cindex = 0; + scan->rs_ntuples = 0; + + scan->rs_vmbuffer = InvalidBuffer; + scan->rs_empty_tuples_pending = 0; + + return (BitmapTableScanDesc *) scan; +} + +void +heap_rescan_bm(BitmapTableScanDesc *sscan) +{ + BitmapHeapScanDesc *scan = (BitmapHeapScanDesc *) sscan; + + if (BufferIsValid(scan->rs_cbuf)) + { + ReleaseBuffer(scan->rs_cbuf); + scan->rs_cbuf = InvalidBuffer; + } + + if (BufferIsValid(scan->rs_vmbuffer)) + { + ReleaseBuffer(scan->rs_vmbuffer); + scan->rs_vmbuffer = InvalidBuffer; + } + + scan->rs_cblock = InvalidBlockNumber; + + /* + * Reset rs_empty_tuples_pending, a field only used by bitmap heap scan, + * to avoid incorrectly emitting NULL-filled tuples from a previous scan + * on rescan. + */ + scan->rs_empty_tuples_pending = 0; + + scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_base.rs_rd); + + scan->rs_ctup.t_data = NULL; + ItemPointerSetInvalid(&scan->rs_ctup.t_self); +} + +void +heap_endscan_bm(BitmapTableScanDesc *sscan) +{ + BitmapHeapScanDesc *scan = (BitmapHeapScanDesc *) sscan; + + if (BufferIsValid(scan->rs_cbuf)) + ReleaseBuffer(scan->rs_cbuf); + + if (BufferIsValid(scan->rs_vmbuffer)) + ReleaseBuffer(scan->rs_vmbuffer); + + /* + * decrement relation reference count and free scan descriptor storage + */ + RelationDecrementReferenceCount(scan->rs_base.rs_rd); + + pfree(scan); +} + + HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction) { diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index c3788918777..82f216952b1 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2119,11 +2119,11 @@ heapam_estimate_rel_size(Relation rel, int32 *attr_widths, */ static bool -heapam_scan_bitmap_next_block(TableScanDesc scan, +heapam_scan_bitmap_next_block(BitmapTableScanDesc *scan, BlockNumber *blockno, bool *recheck, long *lossy_pages, long *exact_pages) { - HeapScanDesc hscan = (HeapScanDesc) scan; + BitmapHeapScanDesc *hscan = (BitmapHeapScanDesc *) scan; BlockNumber block; Buffer buffer; Snapshot snapshot; @@ -2283,10 +2283,10 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, } static bool -heapam_scan_bitmap_next_tuple(TableScanDesc scan, +heapam_scan_bitmap_next_tuple(BitmapTableScanDesc *scan, TupleTableSlot *slot) { - HeapScanDesc hscan = (HeapScanDesc) scan; + BitmapHeapScanDesc *hscan = (BitmapHeapScanDesc *) scan; OffsetNumber targoffset; Page page; ItemId lp; @@ -2629,6 +2629,10 @@ static const TableAmRoutine heapam_methods = { .scan_rescan = heap_rescan, .scan_getnextslot = heap_getnextslot, + .scan_begin_bm = heap_beginscan_bm, + .scan_rescan_bm = heap_rescan_bm, + .scan_end_bm = heap_endscan_bm, + .scan_set_tidrange = heap_set_tidrange, .scan_getnextslot_tidrange = heap_getnextslot_tidrange, diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 3c9e7f00b56..84c720bdc76 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -54,7 +54,7 @@ static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *ps static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node); static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node); static inline void BitmapPrefetch(BitmapHeapScanState *node, - TableScanDesc scan); + BitmapTableScanDesc *scan); static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate); @@ -68,7 +68,7 @@ static TupleTableSlot * BitmapHeapNext(BitmapHeapScanState *node) { ExprContext *econtext; - TableScanDesc scan; + BitmapTableScanDesc *scan; TupleTableSlot *slot; ParallelBitmapHeapState *pstate = node->pstate; dsa_area *dsa = node->ss.ps.state->es_query_dsa; @@ -158,13 +158,17 @@ BitmapHeapNext(BitmapHeapScanState *node) scan = table_beginscan_bm(node->ss.ss_currentRelation, node->ss.ps.state->es_snapshot, - 0, - NULL, - need_tuples); - + dsa, + need_tuples, node->prefetch_maximum); node->scandesc = scan; node->scan_in_progress = true; } + else + { + /* rescan to release any page pin */ + tbm_end_iterate(&scan->tbmiterator); + table_rescan_bm(scan, dsa, node->prefetch_maximum); + } tbm_begin_iterate(&scan->tbmiterator, node->tbm, dsa, pstate ? @@ -401,7 +405,7 @@ BitmapAdjustPrefetchTarget(BitmapHeapScanState *node) * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target */ static inline void -BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) +BitmapPrefetch(BitmapHeapScanState *node, BitmapTableScanDesc *scan) { #ifdef USE_PREFETCH ParallelBitmapHeapState *pstate = node->pstate; @@ -539,10 +543,6 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) { PlanState *outerPlan = outerPlanState(node); - /* rescan to release any page pin */ - if (node->scandesc) - table_rescan(node->scandesc, NULL); - /* release bitmaps and buffers if any */ tbm_end_iterate(&node->prefetch_iterator); if (node->tbm) @@ -576,7 +576,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node) void ExecEndBitmapHeapScan(BitmapHeapScanState *node) { - TableScanDesc scanDesc; + BitmapTableScanDesc *scanDesc; /* * extract information from the node @@ -593,7 +593,10 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node) * close heap scan */ if (scanDesc) - table_endscan(scanDesc); + { + tbm_end_iterate(&scanDesc->tbmiterator); + table_endscan_bm(scanDesc); + } /* * release bitmaps and buffers if any diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 9e9aec88a62..3cac199dd36 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -91,22 +91,39 @@ typedef struct HeapScanDescData */ ParallelBlockTableScanWorkerData *rs_parallelworkerdata; + /* these fields only used in page-at-a-time mode and for bitmap scans */ + int rs_cindex; /* current tuple's index in vistuples */ + int rs_ntuples; /* number of visible tuples on page */ + OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */ +} HeapScanDescData; +typedef struct HeapScanDescData *HeapScanDesc; + +typedef struct BitmapHeapScanDesc +{ + BitmapTableScanDesc rs_base; + + BlockNumber rs_nblocks; /* total number of blocks in rel */ + + int rs_cindex; /* current tuple's index in vistuples */ + int rs_ntuples; /* number of visible tuples on page */ + OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */ + + Buffer rs_cbuf; /* current buffer in scan, if any */ + /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */ + HeapTupleData rs_ctup; /* current tuple in scan, if any */ + + BlockNumber rs_cblock; /* current block # in scan, if any */ + /* * These fields are only used for bitmap scans for the "skip fetch" * optimization. Bitmap scans needing no fields from the heap may skip * fetching an all visible block, instead using the number of tuples per * block reported by the bitmap to determine how many NULL-filled tuples - * to return. + * to return. They are common to parallel and serial BitmapHeapScans */ Buffer rs_vmbuffer; int rs_empty_tuples_pending; - - /* these fields only used in page-at-a-time mode and for bitmap scans */ - int rs_cindex; /* current tuple's index in vistuples */ - int rs_ntuples; /* number of visible tuples on page */ - OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */ -} HeapScanDescData; -typedef struct HeapScanDescData *HeapScanDesc; +} BitmapHeapScanDesc; /* * Descriptor for fetches from heap via an index. @@ -296,6 +313,12 @@ extern void heap_prepare_pagescan(TableScanDesc sscan); extern void heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode); extern void heap_endscan(TableScanDesc sscan); + +extern BitmapTableScanDesc *heap_beginscan_bm(Relation relation, + Snapshot snapshot, uint32 flags); +extern void heap_rescan_bm(BitmapTableScanDesc *sscan); +void heap_endscan_bm(BitmapTableScanDesc *sscan); + extern HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction); extern bool heap_getnextslot(TableScanDesc sscan, ScanDirection direction, struct TupleTableSlot *slot); diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index b4511677b1d..036ef29e7d5 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -37,9 +37,6 @@ typedef struct TableScanDescData int rs_nkeys; /* number of scan keys */ struct ScanKeyData *rs_key; /* array of scan key descriptors */ - /* Iterators for Bitmap Table Scans */ - TBMIterator tbmiterator; - /* Range of ItemPointers for table_scan_getnextslot_tidrange() to scan. */ ItemPointerData rs_mintid; ItemPointerData rs_maxtid; @@ -55,6 +52,23 @@ typedef struct TableScanDescData } TableScanDescData; typedef struct TableScanDescData *TableScanDesc; +typedef struct BitmapTableScanDesc +{ + Relation rs_rd; /* heap relation descriptor */ + struct SnapshotData *rs_snapshot; /* snapshot to see */ + + /* + * Members common to Parallel and Serial BitmapTableScans + */ + TBMIterator tbmiterator; + + /* + * Information about type and behaviour of the scan, a bitmask of members + * of the ScanOptions enum (see tableam.h). + */ + uint32 rs_flags; +} BitmapTableScanDesc; + /* * Shared state for parallel table scan. * diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 7034b79c2de..5110fa2b443 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -345,6 +345,24 @@ typedef struct TableAmRoutine bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode); + /* + * Functions to begin, restart, and end a scan of the underlying table of + * a bitmap table scan. + * + * `rel`, `flags`, and `snapshot` serve the same purposes as in the + * standard relation scan_[begin|rescan|end] functions documented above. + */ + BitmapTableScanDesc *(*scan_begin_bm) (Relation rel, + Snapshot snapshot, + uint32 flags); + + void (*scan_rescan_bm) (BitmapTableScanDesc *scan); + + /* + * Release resources and deallocate scan. + */ + void (*scan_end_bm) (BitmapTableScanDesc *scan); + /* * Return next tuple from `scan`, store in slot. */ @@ -815,7 +833,7 @@ typedef struct TableAmRoutine * Optional callback, but either both scan_bitmap_next_block and * scan_bitmap_next_tuple need to exist, or neither. */ - bool (*scan_bitmap_next_block) (TableScanDesc scan, + bool (*scan_bitmap_next_block) (BitmapTableScanDesc *scan, BlockNumber *blockno, bool *recheck, long *lossy_pages, long *exact_pages); @@ -826,7 +844,7 @@ typedef struct TableAmRoutine * Optional callback, but either both scan_bitmap_next_block and * scan_bitmap_next_tuple need to exist, or neither. */ - bool (*scan_bitmap_next_tuple) (TableScanDesc scan, + bool (*scan_bitmap_next_tuple) (BitmapTableScanDesc *scan, TupleTableSlot *slot); /* @@ -851,8 +869,8 @@ typedef struct TableAmRoutine * is obviously OK. * * Currently it is required to implement this interface, as there's no - * alternative way (contrary e.g. to bitmap scans) to implement sample - * scans. If infeasible to implement, the AM may raise an error. + * alternative way to implement sample scans. If infeasible to implement, + * the AM may raise an error. */ bool (*scan_sample_next_block) (TableScanDesc scan, struct SampleScanState *scanstate); @@ -945,29 +963,50 @@ table_beginscan_strat(Relation rel, Snapshot snapshot, } /* - * table_beginscan_bm is an alternative entry point for setting up a - * TableScanDesc for a bitmap heap scan. Although that scan technology is - * really quite unlike a standard seqscan, there is just enough commonality to - * make it worth using the same data structure. + * table_beginscan_bm is an entry point for setting up a BitmapTableScanDesc + * for a bitmap table scan. */ -static inline TableScanDesc +static inline BitmapTableScanDesc * table_beginscan_bm(Relation rel, Snapshot snapshot, - int nkeys, struct ScanKeyData *key, bool need_tuple) + dsa_area *dsa, + bool need_tuple, + int prefetch_maximum) { uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE; if (need_tuple) flags |= SO_NEED_TUPLES; - return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); + return rel->rd_tableam->scan_begin_bm(rel, snapshot, flags); } +/* + * Restart a bitmap table scan. + */ +static inline void +table_rescan_bm(BitmapTableScanDesc *scan, + dsa_area *dsa, + int prefetch_maximum) +{ + scan->rs_rd->rd_tableam->scan_rescan_bm(scan); +} + +/* + * End a bitmap table scan. + */ +static inline void +table_endscan_bm(BitmapTableScanDesc *scan) +{ + scan->rs_rd->rd_tableam->scan_end_bm(scan); +} + + /* * table_beginscan_sampling is an alternative entry point for setting up a - * TableScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth - * using the same data structure although the behavior is rather different. - * In addition to the options offered by table_beginscan_strat, this call - * also allows control of whether page-mode visibility checking is used. + * TableScanDesc for a TABLESAMPLE scan. It's worth using the same data + * structure although the behavior is rather different. In addition to the + * options offered by table_beginscan_strat, this call also allows control of + * whether page-mode visibility checking is used. */ static inline TableScanDesc table_beginscan_sampling(Relation rel, Snapshot snapshot, @@ -989,7 +1028,7 @@ table_beginscan_sampling(Relation rel, Snapshot snapshot, /* * table_beginscan_tid is an alternative entry point for setting up a - * TableScanDesc for a Tid scan. As with bitmap scans, it's worth using + * TableScanDesc for a Tid scan. As with sample scans, it's worth using * the same data structure although the behavior is rather different. */ static inline TableScanDesc @@ -1002,7 +1041,7 @@ table_beginscan_tid(Relation rel, Snapshot snapshot) /* * table_beginscan_analyze is an alternative entry point for setting up a - * TableScanDesc for an ANALYZE scan. As with bitmap scans, it's worth using + * TableScanDesc for an ANALYZE scan. As with sample scans, it's worth using * the same data structure although the behavior is rather different. */ static inline TableScanDesc @@ -1019,9 +1058,6 @@ table_beginscan_analyze(Relation rel) static inline void table_endscan(TableScanDesc scan) { - if (scan->rs_flags & SO_TYPE_BITMAPSCAN) - tbm_end_iterate(&scan->tbmiterator); - scan->rs_rd->rd_tableam->scan_end(scan); } @@ -1032,9 +1068,6 @@ static inline void table_rescan(TableScanDesc scan, struct ScanKeyData *key) { - if (scan->rs_flags & SO_TYPE_BITMAPSCAN) - tbm_end_iterate(&scan->tbmiterator); - scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false); } @@ -1974,7 +2007,7 @@ table_relation_estimate_size(Relation rel, int32 *attr_widths, * used after verifying the presence (at plan time or such). */ static inline bool -table_scan_bitmap_next_block(TableScanDesc scan, +table_scan_bitmap_next_block(BitmapTableScanDesc *scan, BlockNumber *blockno, bool *recheck, long *lossy_pages, long *exact_pages) @@ -2001,7 +2034,7 @@ table_scan_bitmap_next_block(TableScanDesc scan, * returned false. */ static inline bool -table_scan_bitmap_next_tuple(TableScanDesc scan, +table_scan_bitmap_next_tuple(BitmapTableScanDesc *scan, TupleTableSlot *slot) { /* diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 5a9a187771a..0f34bac4baf 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1814,7 +1814,7 @@ typedef struct ParallelBitmapHeapState typedef struct BitmapHeapScanState { ScanState ss; /* its first field is NodeTag */ - struct TableScanDescData *scandesc; + struct BitmapTableScanDesc *scandesc; ExprState *bitmapqualorig; TIDBitmap *tbm; Buffer pvmbuffer; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 910fbdd880c..49941d62251 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -261,12 +261,14 @@ BitmapAndPath BitmapAndState BitmapHeapPath BitmapHeapScan +BitmapHeapScanDesc BitmapHeapScanState BitmapIndexScan BitmapIndexScanState BitmapOr BitmapOrPath BitmapOrState +BitmapTableScanDesc Bitmapset Block BlockId -- 2.34.1