diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index ccc07ba9f0..332f05875f 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -2209,7 +2209,7 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root, /* * add_child_rel_equivalences - * Search for EC members that reference the parent_rel, and + * Search for EC members that reference the root parent of child_rel, and * add transformed members referencing the child_rel. * * Note that this function won't be called at all unless we have at least some @@ -2217,6 +2217,14 @@ match_eclasses_to_foreign_key_col(PlannerInfo *root, * * parent_rel and child_rel could be derived from appinfo, but since the * caller has already computed them, we might as well just pass them in. + * + * The AppendRelInfos that are passed in are not used at all if child_rel is + * not a direct child of parent_rel, because they would contain mapping from + * the direct parent, whereas we need to translate from the root parent's EC + * expressions. Still, having caller pass them in common cases that don't + * involve multi-level inheritance is great for performance, because + * adjust_appendrel_attrs_multilevel(), etc. have to look up AppendRelInfos + * from child relids whose overhead can quickly add up. */ void add_child_rel_equivalences(PlannerInfo *root, @@ -2334,6 +2342,129 @@ add_child_rel_equivalences(PlannerInfo *root, } } +/* + * add_child_join_rel_equivalences + * Like add_child_rel_equivalences(), but for joinrels + * + * Here we find the ECs relevant to top parent joinrel and add transformed + * member expressions that refer to given child joinrel. + */ +void +add_child_join_rel_equivalences(PlannerInfo *root, + int nappinfos, AppendRelInfo **appinfos, + RelOptInfo *parent_joinrel, + RelOptInfo *child_joinrel, + RelOptInfo *child_outer_rel, + RelOptInfo *child_inner_rel) +{ + Relids top_parent_relids = child_joinrel->top_parent_relids; + Bitmapset *matching_ecs; + int i; + + Assert(IS_JOIN_REL(child_joinrel) && IS_JOIN_REL(parent_joinrel)); + + matching_ecs = get_common_eclass_indexes(root, + child_inner_rel->relids, + child_outer_rel->relids); + i = -1; + while ((i = bms_next_member(matching_ecs, i)) >= 0) + { + EquivalenceClass *cur_ec = (EquivalenceClass *) list_nth(root->eq_classes, i); + int num_members; + + /* + * If this EC contains a volatile expression, then generating child + * EMs would be downright dangerous, so skip it. We rely on a + * volatile EC having only one EM. + */ + if (cur_ec->ec_has_volatile) + continue; + + /* Sanity check */ + Assert(bms_overlap(top_parent_relids, cur_ec->ec_relids)); + + /* + * We don't use foreach() here because there's no point in scanning + * newly-added child members, so we can stop after the last + * pre-existing EC member. + */ + num_members = list_length(cur_ec->ec_members); + for (int pos = 0; pos < num_members; pos++) + { + EquivalenceMember *cur_em = (EquivalenceMember *) list_nth(cur_ec->ec_members, pos); + + if (cur_em->em_is_const) + continue; /* ignore consts here */ + + /* + * We consider only original EC members here, not + * already-transformed child members. + */ + if (cur_em->em_is_child) + continue; + + /* + * Does this member reference child's topmost parent rel? Don't + * bother with expressions that reference single base appendrel, + * because they would already have been transformed. + */ + if (bms_membership(cur_em->em_relids) == BMS_MULTIPLE && + bms_overlap(cur_em->em_relids, top_parent_relids)) + { + /* Yes, generate transformed child version */ + Expr *child_expr; + Relids new_relids; + Relids new_nullable_relids; + + if (parent_joinrel->reloptkind == RELOPT_JOINREL) + { + /* Simple single-level transformation */ + child_expr = (Expr *) + adjust_appendrel_attrs(root, + (Node *) cur_em->em_expr, + nappinfos, appinfos); + } + else + { + Assert(parent_joinrel->reloptkind == RELOPT_OTHER_JOINREL); + /* Must do multi-level transformation */ + child_expr = (Expr *) + adjust_appendrel_attrs_multilevel(root, + (Node *) cur_em->em_expr, + child_joinrel->relids, + top_parent_relids); + } + + /* + * Transform em_relids to match. Note we do *not* do + * pull_varnos(child_expr) here, as for example the + * transformation might have substituted a constant, but we + * don't want the child member to be marked as constant. + */ + new_relids = bms_difference(cur_em->em_relids, + child_joinrel->top_parent_relids); + new_relids = bms_add_members(new_relids, child_joinrel->relids); + + /* + * For nullable_relids, we must selectively replace parent + * nullable relids to child ones. + */ + new_nullable_relids = cur_em->em_nullable_relids; + if (bms_overlap(new_nullable_relids, top_parent_relids)) + new_nullable_relids = + adjust_child_relids_multilevel(root, + new_nullable_relids, + child_joinrel->relids, + top_parent_relids); + + (void) add_eq_member(cur_ec, child_expr, + new_relids, new_nullable_relids, + true, cur_em->em_datatype); + } + } + } +} + /* * generate_implied_equalities_for_column diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 85415381fb..f0e8e5ed3a 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -854,7 +854,6 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, (Node *) parent_joinrel->joininfo, nappinfos, appinfos); - pfree(appinfos); /* * Lateral relids referred in child join will be same as that referred in @@ -869,6 +868,14 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, */ joinrel->has_eclass_joins = parent_joinrel->has_eclass_joins; + if (joinrel->has_eclass_joins || has_useful_pathkeys(root, parent_joinrel)) + add_child_join_rel_equivalences(root, + nappinfos, appinfos, + parent_joinrel, joinrel, + outer_rel, inner_rel); + + pfree(appinfos); + /* Is the join between partitions itself partitioned? */ build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist, jointype); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 7345137d1d..55c95664e7 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -153,6 +153,12 @@ extern void add_child_rel_equivalences(PlannerInfo *root, AppendRelInfo *appinfo, RelOptInfo *parent_rel, RelOptInfo *child_rel); +extern void add_child_join_rel_equivalences(PlannerInfo *root, + int nappinfos, AppendRelInfo **appinfos, + RelOptInfo *parent_rel, + RelOptInfo *child_rel, + RelOptInfo *child_outer_rel, + RelOptInfo *child_inner_rel); extern List *generate_implied_equalities_for_column(PlannerInfo *root, RelOptInfo *rel, ec_matches_callback_type callback, diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out index c448d85dec..003c8d1c24 100644 --- a/src/test/regress/expected/equivclass.out +++ b/src/test/regress/expected/equivclass.out @@ -439,3 +439,67 @@ explain (costs off) Filter: ((unique1 = unique1) OR (unique2 = unique2)) (2 rows) +-- Check that child merge join for a FULL OUTER join works correctly +SET enable_partitionwise_join TO on; +SET enable_partitionwise_aggregate TO on; +CREATE TABLE child_joins_ecs_testtab1 (a int); +INSERT INTO child_joins_ecs_testtab1 SELECT generate_series(1, 100); +CREATE TABLE child_joins_ecs_testtab2 (a int, b int) PARTITION BY RANGE (a); +CREATE TABLE child_joins_ecs_testtab2_p1 PARTITION OF child_joins_ecs_testtab2 FOR VALUES FROM (1) TO (10001); +CREATE TABLE child_joins_ecs_testtab2_p2 PARTITION OF child_joins_ecs_testtab2 FOR VALUES FROM (10001) TO (20001); +CREATE TABLE child_joins_ecs_testtab2_p3 PARTITION OF child_joins_ecs_testtab2 FOR VALUES FROM (20001) TO (30001); +INSERT INTO child_joins_ecs_testtab2 SELECT a, a % 100 + 1 FROM generate_series(1, 30000) a; +ANALYZE child_joins_ecs_testtab1, child_joins_ecs_testtab2; +-- this forces plan to be a specific shape +SET work_mem TO '0.1MB'; +SET max_parallel_workers_per_gather TO 0; +EXPLAIN (COSTS OFF) +SELECT child_joins_ecs_testtab1.* + FROM (SELECT a, b + FROM child_joins_ecs_testtab2 t1 FULL JOIN child_joins_ecs_testtab2 t2 USING(a, b) + WHERE a >= 1 AND a < 200000 + GROUP BY 1, 2) AS data + JOIN child_joins_ecs_testtab1 ON (child_joins_ecs_testtab1.a = data.b); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------- + Nested Loop + Join Filter: ((COALESCE(t1.b, t2.b)) = child_joins_ecs_testtab1.a) + -> Group + Group Key: (COALESCE(t1.a, t2.a)), (COALESCE(t1.b, t2.b)) + -> Merge Append + Sort Key: (COALESCE(t1.a, t2.a)), (COALESCE(t1.b, t2.b)) + -> Group + Group Key: (COALESCE(t1.a, t2.a)), (COALESCE(t1.b, t2.b)) + -> Sort + Sort Key: (COALESCE(t1.a, t2.a)), (COALESCE(t1.b, t2.b)) + -> Hash Full Join + Hash Cond: ((t1.a = t2.a) AND (t1.b = t2.b)) + Filter: ((COALESCE(t1.a, t2.a) >= 1) AND (COALESCE(t1.a, t2.a) < 200000)) + -> Seq Scan on child_joins_ecs_testtab2_p1 t1 + -> Hash + -> Seq Scan on child_joins_ecs_testtab2_p1 t2 + -> Group + Group Key: (COALESCE(t1_1.a, t2_1.a)), (COALESCE(t1_1.b, t2_1.b)) + -> Sort + Sort Key: (COALESCE(t1_1.a, t2_1.a)), (COALESCE(t1_1.b, t2_1.b)) + -> Hash Full Join + Hash Cond: ((t1_1.a = t2_1.a) AND (t1_1.b = t2_1.b)) + Filter: ((COALESCE(t1_1.a, t2_1.a) >= 1) AND (COALESCE(t1_1.a, t2_1.a) < 200000)) + -> Seq Scan on child_joins_ecs_testtab2_p2 t1_1 + -> Hash + -> Seq Scan on child_joins_ecs_testtab2_p2 t2_1 + -> Group + Group Key: (COALESCE(t1_2.a, t2_2.a)), (COALESCE(t1_2.b, t2_2.b)) + -> Sort + Sort Key: (COALESCE(t1_2.a, t2_2.a)), (COALESCE(t1_2.b, t2_2.b)) + -> Hash Full Join + Hash Cond: ((t1_2.a = t2_2.a) AND (t1_2.b = t2_2.b)) + Filter: ((COALESCE(t1_2.a, t2_2.a) >= 1) AND (COALESCE(t1_2.a, t2_2.a) < 200000)) + -> Seq Scan on child_joins_ecs_testtab2_p3 t1_2 + -> Hash + -> Seq Scan on child_joins_ecs_testtab2_p3 t2_2 + -> Materialize + -> Seq Scan on child_joins_ecs_testtab1 +(38 rows) + +DROP TABLE child_joins_ecs_testtab1, child_joins_ecs_testtab2; diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql index 85aa65de39..bd792dfc3c 100644 --- a/src/test/regress/sql/equivclass.sql +++ b/src/test/regress/sql/equivclass.sql @@ -262,3 +262,26 @@ explain (costs off) -- this could be converted, but isn't at present explain (costs off) select * from tenk1 where unique1 = unique1 or unique2 = unique2; + +-- Check that child merge join for a FULL OUTER join works correctly +SET enable_partitionwise_join TO on; +SET enable_partitionwise_aggregate TO on; +CREATE TABLE child_joins_ecs_testtab1 (a int); +INSERT INTO child_joins_ecs_testtab1 SELECT generate_series(1, 100); +CREATE TABLE child_joins_ecs_testtab2 (a int, b int) PARTITION BY RANGE (a); +CREATE TABLE child_joins_ecs_testtab2_p1 PARTITION OF child_joins_ecs_testtab2 FOR VALUES FROM (1) TO (10001); +CREATE TABLE child_joins_ecs_testtab2_p2 PARTITION OF child_joins_ecs_testtab2 FOR VALUES FROM (10001) TO (20001); +CREATE TABLE child_joins_ecs_testtab2_p3 PARTITION OF child_joins_ecs_testtab2 FOR VALUES FROM (20001) TO (30001); +INSERT INTO child_joins_ecs_testtab2 SELECT a, a % 100 + 1 FROM generate_series(1, 30000) a; +ANALYZE child_joins_ecs_testtab1, child_joins_ecs_testtab2; +-- this forces plan to be a specific shape +SET work_mem TO '0.1MB'; +SET max_parallel_workers_per_gather TO 0; +EXPLAIN (COSTS OFF) +SELECT child_joins_ecs_testtab1.* + FROM (SELECT a, b + FROM child_joins_ecs_testtab2 t1 FULL JOIN child_joins_ecs_testtab2 t2 USING(a, b) + WHERE a >= 1 AND a < 200000 + GROUP BY 1, 2) AS data + JOIN child_joins_ecs_testtab1 ON (child_joins_ecs_testtab1.a = data.b); +DROP TABLE child_joins_ecs_testtab1, child_joins_ecs_testtab2;