From 749b78bddecac6f5d57d3ef94be89c65841fd162 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Mon, 28 Jul 2025 17:58:58 +1200 Subject: [PATCH v9 2/3] fixup! v7 parallel TID range scan patch --- src/backend/access/heap/heapam.c | 28 ++++++++++---------- src/backend/access/table/tableam.c | 16 +++++------- src/backend/executor/execParallel.c | 1 + src/backend/executor/nodeTidrangescan.c | 13 ++++------ src/backend/optimizer/path/costsize.c | 14 +++++----- src/backend/optimizer/path/tidpath.c | 10 +++++--- src/include/nodes/execnodes.h | 2 +- src/test/regress/expected/tidrangescan.out | 30 +++++++++++----------- src/test/regress/sql/tidrangescan.sql | 30 +++++++++++----------- 9 files changed, 71 insertions(+), 73 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 5105a2c8ad..d0e650de57 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -490,6 +490,21 @@ heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlk scan->rs_startblock = startBlk; scan->rs_numblocks = numBlks; + + /* set the limits in the ParallelBlockTableScanDesc, when present */ + + /* + * XXX no lock is being taken here. What guarantees are there that there + * isn't some worker using the old limits when the new limits are imposed? + */ + if (scan->rs_base.rs_parallel != NULL) + { + ParallelBlockTableScanDesc bpscan; + + bpscan = (ParallelBlockTableScanDesc) scan->rs_base.rs_parallel; + bpscan->phs_startblock = startBlk; + bpscan->phs_numblock = numBlks; + } } /* @@ -1478,19 +1493,6 @@ heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid, /* Set the start block and number of blocks to scan */ heap_setscanlimits(sscan, startBlk, numBlks); - /* - * If parallel mode is used, store startBlk and numBlks in parallel - * scan descriptor as well. - */ - if (scan->rs_base.rs_parallel != NULL) - { - ParallelBlockTableScanDesc bpscan = NULL; - - bpscan = (ParallelBlockTableScanDesc) scan->rs_base.rs_parallel; - bpscan->phs_startblock = startBlk; - bpscan->phs_numblock = numBlks; - } - /* Finally, set the TID range in sscan */ ItemPointerCopy(&lowestItem, &sscan->st.tidrange.rs_mintid); ItemPointerCopy(&highestItem, &sscan->st.tidrange.rs_maxtid); diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 5a76cec81e..8036654c77 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -197,6 +197,9 @@ table_beginscan_parallel_tidrange(Relation relation, ParallelTableScanDesc pscan Assert(RelFileLocatorEquals(relation->rd_locator, pscan->phs_locator)); + /* disable syncscan in parallel tid range scan. */ + pscan->phs_syncscan = false; + if (!pscan->phs_snapshot_any) { /* Snapshot was serialized -- restore it */ @@ -607,18 +610,11 @@ table_block_parallelscan_nextpage(Relation rel, } /* - * In a parallel TID range scan, 'pbscan->phs_numblock' is non-zero if an - * upper TID range limit is specified, or InvalidBlockNumber if no limit - * is given. This value may be less than or equal to 'pbscan->phs_nblocks' - * , which is the total number of blocks in the relation. - * - * The scan can terminate early once 'nallocated' reaches - * 'pbscan->phs_numblock', even if the full relation has remaining blocks - * to scan. This ensures that parallel workers only scan the subset of - * blocks that fall within the TID range. + * Check if we've allocated every block in the relation, or if we've + * reached the limit imposed by pbscan->phs_numblock (if set). */ if (nallocated >= pbscan->phs_nblocks) - page = InvalidBlockNumber; /* all blocks have been allocated */ + page = InvalidBlockNumber; /* all blocks have been allocated */ else if (pbscan->phs_numblock != InvalidBlockNumber && nallocated >= pbscan->phs_numblock) page = InvalidBlockNumber; /* upper scan limit reached */ diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index a58f7eafc9..7b1eb2e82c 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -1036,6 +1036,7 @@ ExecParallelReInitializeDSM(PlanState *planstate, case T_MemoizeState: /* these nodes have DSM state, but no reinitialization is required */ break; + default: break; } diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c index 06a1037d51..afa47b01de 100644 --- a/src/backend/executor/nodeTidrangescan.c +++ b/src/backend/executor/nodeTidrangescan.c @@ -418,13 +418,13 @@ ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags) * ---------------------------------------------------------------- */ void -ExecTidRangeScanEstimate(TidRangeScanState *node, - ParallelContext *pcxt) +ExecTidRangeScanEstimate(TidRangeScanState *node, ParallelContext *pcxt) { EState *estate = node->ss.ps.state; - node->trss_pscanlen = table_parallelscan_estimate(node->ss.ss_currentRelation, - estate->es_snapshot); + node->trss_pscanlen = + table_parallelscan_estimate(node->ss.ss_currentRelation, + estate->es_snapshot); shm_toc_estimate_chunk(&pcxt->estimator, node->trss_pscanlen); shm_toc_estimate_keys(&pcxt->estimator, 1); } @@ -436,8 +436,7 @@ ExecTidRangeScanEstimate(TidRangeScanState *node, * ---------------------------------------------------------------- */ void -ExecTidRangeScanInitializeDSM(TidRangeScanState *node, - ParallelContext *pcxt) +ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt) { EState *estate = node->ss.ps.state; ParallelTableScanDesc pscan; @@ -446,8 +445,6 @@ ExecTidRangeScanInitializeDSM(TidRangeScanState *node, table_parallelscan_initialize(node->ss.ss_currentRelation, pscan, estate->es_snapshot); - /* disable syncscan in parallel tid range scan. */ - pscan->phs_syncscan = false; shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); node->ss.ss_currentScanDesc = table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, pscan); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index fdb58d094f..eab1b18d30 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1366,9 +1366,9 @@ cost_tidrangescan(Path *path, PlannerInfo *root, { Selectivity selectivity; double pages; - Cost startup_cost = 0; - Cost cpu_run_cost = 0; - Cost disk_run_cost = 0; + Cost startup_cost; + Cost cpu_run_cost; + Cost disk_run_cost; QualCost qpqual_cost; Cost cpu_per_tuple; QualCost tid_qual_cost; @@ -1414,7 +1414,7 @@ cost_tidrangescan(Path *path, PlannerInfo *root, &spc_seq_page_cost); /* disk costs; 1 random page and the remainder as seq pages */ - disk_run_cost += spc_random_page_cost + spc_seq_page_cost * nseqpages; + disk_run_cost = spc_random_page_cost + spc_seq_page_cost * nseqpages; /* Add scanning CPU costs */ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); @@ -1422,14 +1422,14 @@ cost_tidrangescan(Path *path, PlannerInfo *root, /* * XXX currently we assume TID quals are a subset of qpquals at this * point; they will be removed (if possible) when we create the plan, so - * we subtract their cost from the total qpqual cost. (If the TID quals + * we subtract their cost from the total qpqual cost. (If the TID quals * can't be removed, this is a mistake and we're going to underestimate * the CPU cost a bit.) */ - startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple; + startup_cost = qpqual_cost.startup + tid_qual_cost.per_tuple; cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple - tid_qual_cost.per_tuple; - cpu_run_cost += cpu_per_tuple * ntuples; + cpu_run_cost = cpu_per_tuple * ntuples; /* tlist eval costs are paid per output row, not per tuple scanned */ startup_cost += path->pathtarget->cost.startup; diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c index 9c78eedcf5..e48c85833e 100644 --- a/src/backend/optimizer/path/tidpath.c +++ b/src/backend/optimizer/path/tidpath.c @@ -564,11 +564,13 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel) parallel_workers = compute_parallel_worker(rel, rel->pages, -1, max_parallel_workers_per_gather); + if (parallel_workers > 0) - { - add_partial_path(rel, (Path *) create_tidrangescan_path(root, rel, tidrangequals, - required_outer, parallel_workers)); - } + add_partial_path(rel, (Path *) create_tidrangescan_path(root, + rel, + tidrangequals, + required_outer, + parallel_workers)); } } diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 958c78f66c..4947b6cca0 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1929,7 +1929,7 @@ typedef struct TidScanState * trss_mintid the lowest TID in the scan range * trss_maxtid the highest TID in the scan range * trss_inScan is a scan currently in progress? - * trss_pscanlen size of parallel TID range scan descriptor + * trss_pscanlen size of parallel heap scan descriptor * ---------------- */ typedef struct TidRangeScanState diff --git a/src/test/regress/expected/tidrangescan.out b/src/test/regress/expected/tidrangescan.out index 32cd2bd9f4..3c5fc9e102 100644 --- a/src/test/regress/expected/tidrangescan.out +++ b/src/test/regress/expected/tidrangescan.out @@ -298,13 +298,13 @@ FETCH LAST c; COMMIT; DROP TABLE tidrangescan; -- tests for parallel tidrangescans -SET parallel_setup_cost=0; -SET parallel_tuple_cost=0; -SET min_parallel_table_scan_size=0; -SET max_parallel_workers_per_gather=4; -CREATE TABLE parallel_tidrangescan(id integer, data text) WITH (fillfactor=10); +SET parallel_setup_cost TO 0; +SET parallel_tuple_cost TO 0; +SET min_parallel_table_scan_size TO 0; +SET max_parallel_workers_per_gather TO 4; +CREATE TABLE parallel_tidrangescan(id integer, data text) WITH (fillfactor = 10); -- insert enough tuples such that each page gets 5 tuples with fillfactor = 10 -INSERT INTO parallel_tidrangescan SELECT i,repeat('x', 100) FROM generate_series(1,200) AS s(i); +INSERT INTO parallel_tidrangescan SELECT i, repeat('x', 100) FROM generate_series(1,200) AS s(i); -- ensure there are 40 pages for parallel test SELECT min(ctid), max(ctid) FROM parallel_tidrangescan; min | max @@ -313,8 +313,8 @@ SELECT min(ctid), max(ctid) FROM parallel_tidrangescan; (1 row) -- parallel range scans with upper bound -EXPLAIN (costs off) -SELECT count(*) FROM parallel_tidrangescan WHERE ctid<'(30,1)'; +EXPLAIN (COSTS OFF) +SELECT count(*) FROM parallel_tidrangescan WHERE ctid < '(30,1)'; QUERY PLAN -------------------------------------------------------------------- Finalize Aggregate @@ -325,15 +325,15 @@ SELECT count(*) FROM parallel_tidrangescan WHERE ctid<'(30,1)'; TID Cond: (ctid < '(30,1)'::tid) (6 rows) -SELECT count(*) FROM parallel_tidrangescan WHERE ctid<'(30,1)'; +SELECT count(*) FROM parallel_tidrangescan WHERE ctid < '(30,1)'; count ------- 150 (1 row) -- parallel range scans with lower bound -EXPLAIN (costs off) -SELECT count(*) FROM parallel_tidrangescan WHERE ctid>'(10,0)'; +EXPLAIN (COSTS OFF) +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)'; QUERY PLAN -------------------------------------------------------------------- Finalize Aggregate @@ -344,15 +344,15 @@ SELECT count(*) FROM parallel_tidrangescan WHERE ctid>'(10,0)'; TID Cond: (ctid > '(10,0)'::tid) (6 rows) -SELECT count(*) FROM parallel_tidrangescan WHERE ctid>'(10,0)'; +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)'; count ------- 150 (1 row) -- parallel range scans with both bounds -EXPLAIN (costs off) -SELECT count(*) FROM parallel_tidrangescan WHERE ctid>'(10,0)' AND ctid<'(30,1)'; +EXPLAIN (COSTS OFF) +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)' AND ctid < '(30,1)'; QUERY PLAN ----------------------------------------------------------------------------------- Finalize Aggregate @@ -363,7 +363,7 @@ SELECT count(*) FROM parallel_tidrangescan WHERE ctid>'(10,0)' AND ctid<'(30,1)' TID Cond: ((ctid > '(10,0)'::tid) AND (ctid < '(30,1)'::tid)) (6 rows) -SELECT count(*) FROM parallel_tidrangescan WHERE ctid>'(10,0)' AND ctid<'(30,1)'; +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)' AND ctid < '(30,1)'; count ------- 100 diff --git a/src/test/regress/sql/tidrangescan.sql b/src/test/regress/sql/tidrangescan.sql index 1d18b8a61d..0f1e43c6d0 100644 --- a/src/test/regress/sql/tidrangescan.sql +++ b/src/test/regress/sql/tidrangescan.sql @@ -99,33 +99,33 @@ COMMIT; DROP TABLE tidrangescan; -- tests for parallel tidrangescans -SET parallel_setup_cost=0; -SET parallel_tuple_cost=0; -SET min_parallel_table_scan_size=0; -SET max_parallel_workers_per_gather=4; +SET parallel_setup_cost TO 0; +SET parallel_tuple_cost TO 0; +SET min_parallel_table_scan_size TO 0; +SET max_parallel_workers_per_gather TO 4; -CREATE TABLE parallel_tidrangescan(id integer, data text) WITH (fillfactor=10); +CREATE TABLE parallel_tidrangescan(id integer, data text) WITH (fillfactor = 10); -- insert enough tuples such that each page gets 5 tuples with fillfactor = 10 -INSERT INTO parallel_tidrangescan SELECT i,repeat('x', 100) FROM generate_series(1,200) AS s(i); +INSERT INTO parallel_tidrangescan SELECT i, repeat('x', 100) FROM generate_series(1,200) AS s(i); -- ensure there are 40 pages for parallel test SELECT min(ctid), max(ctid) FROM parallel_tidrangescan; -- parallel range scans with upper bound -EXPLAIN (costs off) -SELECT count(*) FROM parallel_tidrangescan WHERE ctid<'(30,1)'; -SELECT count(*) FROM parallel_tidrangescan WHERE ctid<'(30,1)'; +EXPLAIN (COSTS OFF) +SELECT count(*) FROM parallel_tidrangescan WHERE ctid < '(30,1)'; +SELECT count(*) FROM parallel_tidrangescan WHERE ctid < '(30,1)'; -- parallel range scans with lower bound -EXPLAIN (costs off) -SELECT count(*) FROM parallel_tidrangescan WHERE ctid>'(10,0)'; -SELECT count(*) FROM parallel_tidrangescan WHERE ctid>'(10,0)'; +EXPLAIN (COSTS OFF) +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)'; +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)'; -- parallel range scans with both bounds -EXPLAIN (costs off) -SELECT count(*) FROM parallel_tidrangescan WHERE ctid>'(10,0)' AND ctid<'(30,1)'; -SELECT count(*) FROM parallel_tidrangescan WHERE ctid>'(10,0)' AND ctid<'(30,1)'; +EXPLAIN (COSTS OFF) +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)' AND ctid < '(30,1)'; +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)' AND ctid < '(30,1)'; -- parallel rescans EXPLAIN (COSTS OFF) -- 2.17.1