From 5530d65ed6df1b0edc86c8176e7d10f8f10ea6f8 Mon Sep 17 00:00:00 2001 From: jcoleman Date: Mon, 30 Nov 2020 11:36:35 -0500 Subject: [PATCH v3 2/2] Subqueries with LIMIT can be parallel safe when executing per-outer tuple The code that determined whether or not a rel should be considered for parallel query excluded subqueries with LIMIT/OFFSET. That's correct in the general case: as the comment notes that'd mean we have to guarantee ordering (and claims it's not worth checking that) for results to be consistent across workers. However there's a simpler case to special case than known unique or consistently ordered results: when we're already going to execute the subquery within the context of each outer tuple then whether we do that repeated execution within a single process or multiple processes isn't going to affect the guarantees we offer about consistency of results. --- src/backend/optimizer/path/allpaths.c | 7 +++++- src/test/regress/expected/select_parallel.out | 22 +++++++++---------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 8fc28007f5..6a8756430d 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -684,11 +684,16 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, * inconsistent results at the top-level. (In some cases, where * the result is ordered, we could relax this restriction. But it * doesn't currently seem worth expending extra effort to do so.) + * We can carve out an exception, however, for cases in which the + * subquery with a limit is already going to be executed in the + * context of a single outer tuple. In that case we executed the + * subquery more than once anyway, and so we already cannot + * guarantee row order determinicity whether parallel or not. */ { Query *subquery = castNode(Query, rte->subquery); - if (limit_needed(subquery)) + if (bms_is_empty(rel->lateral_relids) && limit_needed(subquery)) return; } break; diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index ada786d434..6030dafde5 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -1097,25 +1097,23 @@ explain (costs off) select t.unique1 from tenk1 t join lateral (select t.unique1 from tenk1 offset 0) l on true; QUERY PLAN --------------------------------------------------------------------- - Nested Loop - -> Gather - Workers Planned: 4 + Gather + Workers Planned: 4 + -> Nested Loop -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t - -> Gather - Workers Planned: 4 - -> Parallel Index Only Scan using tenk1_hundred on tenk1 -(7 rows) + -> Index Only Scan using tenk1_hundred on tenk1 +(5 rows) explain (costs off) select t.unique1 from tenk1 t join lateral (select t.unique1 from tenk1 limit 1) l on true; QUERY PLAN --------------------------------------------------------------------- - Nested Loop - -> Gather - Workers Planned: 4 + Gather + Workers Planned: 4 + -> Nested Loop -> Parallel Index Only Scan using tenk1_unique1 on tenk1 t - -> Limit - -> Seq Scan on tenk1 + -> Limit + -> Seq Scan on tenk1 (6 rows) explain (costs off) select t.unique1 from tenk1 t -- 2.32.1 (Apple Git-133)