From f27115d044b5f6babb722b45d7809954f2c76bbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= Date: Thu, 7 May 2020 08:36:29 +0800 Subject: [PATCH v11 6/6] Join removal at run-time with UniqueKey. We add another join removal during build_join_rel to use the benefits of UniqueKey remove_useless_join. However this new strategy can't be an replacement of the current one since we just knows 2 relation at that time. so it can't handle something like SELECT a.* FROM a LEFT JOIN (b LEFT JOIN c ON b.c_id = c.id) ON (a.b_id = b.id); However it can handle more cases than the current strategy like SELECT t1.a FROM m3 t1 LEFT JOIN (SELECT m1.a FROM m1, m2 WHERE m1.b = m2.a) t2; As for the implementation, it is a very PoC version. The main idea is if the join_canbe_removed, we still need a joinrel with the information of outerrel only. However I have to do many works to get there. 1. If the innerrel can be removed, we don't need to build pathlist for joinrel, we just reuse the pathlist from outerrel. However there are many places where use assert rel->pathlist[*]->parent == rel. so I copied the pathlist, we have to change the parent to joinrel. 2. During create plan for some path on RTE_RELATION, it needs to know the relation Oid with path->parent->relid. so we have to use the outerrel->relid to overwrite the joinrel->relid which is 0 before. 3. Almost same paths as item 2, it usually assert best_path->parent->rtekind == RTE_RELATION; now the path may appear in joinrel, so I used outerrel->rtekind to overwrite joinrel->rtekind. 4. I guess there are some dependencies between path->pathtarget and rel->reltarget. since we reuse the pathlist of outerrel, so I used the outer->reltarget as well. If the join can be removed, I guess the length of list_length(outrel->reltarget->exprs) >= (joinrel->reltarget->exprs). we can rely on the ProjectionPath to reduce the tlist. --- src/backend/optimizer/path/joinrels.c | 8 +- src/backend/optimizer/util/relnode.c | 263 +++++++++++++++++++++++++- src/include/optimizer/pathnode.h | 4 +- src/test/regress/expected/join.out | 39 ++++ src/test/regress/sql/join.sql | 26 +++ 5 files changed, 333 insertions(+), 7 deletions(-) diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index b9163ee8ff..7e393c09f9 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -691,6 +691,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) SpecialJoinInfo sjinfo_data; RelOptInfo *joinrel; List *restrictlist; + bool innerrel_removed = false; /* We should never try to join two overlapping sets of rels. */ Assert(!bms_overlap(rel1->relids, rel2->relids)); @@ -744,7 +745,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) * goes with this particular joining. */ joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo, - &restrictlist); + &restrictlist, &innerrel_removed); /* * If we've already proven this join is empty, we needn't consider any @@ -756,9 +757,10 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) return joinrel; } + if (!innerrel_removed) /* Add paths to the join relation. */ - populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo, - restrictlist); + populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo, + restrictlist); bms_free(joinrelids); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index a203e6f1ff..f488d87b1c 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -22,6 +22,7 @@ #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/inherit.h" +#include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/placeholder.h" @@ -73,6 +74,11 @@ static void build_child_join_reltarget(PlannerInfo *root, int nappinfos, AppendRelInfo **appinfos); +static bool join_canbe_removed(PlannerInfo *root, + SpecialJoinInfo *sjinfo, + RelOptInfo *joinrel, + RelOptInfo *innerrel, + List *restrictlist); /* * setup_simple_rel_arrays @@ -579,7 +585,8 @@ build_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, - List **restrictlist_ptr) + List **restrictlist_ptr, + bool *innerrel_removed) { RelOptInfo *joinrel; List *restrictlist; @@ -718,6 +725,64 @@ build_join_rel(PlannerInfo *root, */ joinrel->has_eclass_joins = has_relevant_eclass_joinclause(root, joinrel); + if (join_canbe_removed(root, sjinfo, + joinrel, inner_rel, + restrictlist)) + { + ListCell *lc; + + joinrel->rows = outer_rel->rows; + joinrel->consider_startup = outer_rel->consider_param_startup; + joinrel->consider_param_startup = outer_rel->consider_param_startup; + joinrel->consider_parallel = outer_rel->consider_parallel; + + /* Rely on the projection path to reduce the tlist. */ + joinrel->reltarget = outer_rel->reltarget; + + joinrel->direct_lateral_relids = outer_rel->direct_lateral_relids; + joinrel->lateral_relids = outer_rel->lateral_relids; + + joinrel->unique_for_rels = outer_rel->unique_for_rels; + joinrel->non_unique_for_rels = outer_rel->non_unique_for_rels; + joinrel->baserestrictinfo = outer_rel->baserestrictinfo; + joinrel->baserestrictcost = outer_rel->baserestrictcost; + joinrel->baserestrict_min_security = outer_rel->baserestrict_min_security; + joinrel->uniquekeys = outer_rel->uniquekeys; + joinrel->consider_partitionwise_join = outer_rel->consider_partitionwise_join; + joinrel->top_parent_relids = outer_rel->top_parent_relids; + + /* Some scan path need to know which base relation to scan, it uses the relid + * field, so we have to use the outerrel->relid. + */ + joinrel->relid = outer_rel->relid; + + /* Almost the same paths as above, it assert the rte_kind is RTE_RELATION, so + * we need to set as same as outerrel as well + */ + joinrel->rtekind = RTE_RELATION; + + /* Make sure the path->parent point to current joinrel, can't update it in-place. */ + foreach(lc, outer_rel->pathlist) + { + Size sz = size_of_path(lfirst(lc)); + Path *path = palloc(sz); + memcpy(path, lfirst(lc), sz); + path->parent = joinrel; + add_path(joinrel, path); + } + + foreach(lc, joinrel->partial_pathlist) + { + Size sz = size_of_path(lfirst(lc)); + Path *path = palloc(sz); + memcpy(path, lfirst(lc), sz); + path->parent = joinrel; + add_partial_path(joinrel, path); + } + *innerrel_removed = true; + } + else + { /* Store the partition information. */ build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist, sjinfo->jointype); @@ -746,7 +811,7 @@ build_join_rel(PlannerInfo *root, is_parallel_safe(root, (Node *) restrictlist) && is_parallel_safe(root, (Node *) joinrel->reltarget->exprs)) joinrel->consider_parallel = true; - + } /* Add the joinrel to the PlannerInfo. */ add_join_rel(root, joinrel); @@ -759,11 +824,18 @@ build_join_rel(PlannerInfo *root, if (root->join_rel_level) { Assert(root->join_cur_level > 0); - Assert(root->join_cur_level <= bms_num_members(joinrel->relids)); + // Assert(root->join_cur_level <= bms_num_members(joinrel->relids)); root->join_rel_level[root->join_cur_level] = lappend(root->join_rel_level[root->join_cur_level], joinrel); } + /* elog(INFO, "lev-%d Build JoinRel (%s) with %s and %s, inner is removed: %d", */ + /* root->join_cur_level, */ + /* bmsToString(joinrelids), */ + /* bmsToString(outer_rel->relids), */ + /* bmsToString(inner_rel->relids), */ + /* joinrel->removed); */ + return joinrel; } @@ -2027,3 +2099,188 @@ build_child_join_reltarget(PlannerInfo *root, childrel->reltarget->cost.per_tuple = parentrel->reltarget->cost.per_tuple; childrel->reltarget->width = parentrel->reltarget->width; } + +static bool +join_canbe_removed(PlannerInfo *root, + SpecialJoinInfo *sjinfo, + RelOptInfo *joinrel, + RelOptInfo *innerrel, + List *restrictlist) +{ + Bitmapset *vars; + List *exprs = NIL; + ListCell *lc; + Bitmapset *tmp; + bool res; + + if (sjinfo->jointype != JOIN_LEFT) + return false; + + if (innerrel->uniquekeys == NIL) + return false; + + /* + * Check if there is any innerrel's cols can't be removed. + */ + + vars = pull_varnos((Node*)joinrel->reltarget->exprs); + tmp = bms_intersect(vars, innerrel->relids); + if (!bms_is_empty(tmp)) + return false; + + foreach(lc, restrictlist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + if (rinfo->can_join) + { + if (rinfo->mergeopfamilies != NIL) + { + if (bms_is_subset(rinfo->left_relids, innerrel->relids)) + exprs = lappend(exprs, get_leftop(rinfo->clause)); + else if (bms_is_subset(rinfo->right_relids, innerrel->relids)) + exprs = lappend(exprs, get_rightop(rinfo->clause)); + else + Assert(false); + } + else + /* Not mergeable join clause, we have to keep it */ + return false; + } + else + { + /* + * If the rinfo is not joinable clause, and it is not pushed down to + * baserelation's basicrestrictinfo. so it must be in ON clauses. + * Example: SELECT .. FROM t1 left join t2 on t1.a = 10; + * In this case we can't remove the inner join as well. + */ + return false; + } + } + res = relation_has_uniquekeys_for(root, innerrel, exprs, true); + return res; +} + + +size_t +size_of_path(Path *path) +{ + switch(path->type) + { + case T_Path: + return sizeof(Path); + case T_IndexPath: + return sizeof(IndexPath); + case T_BitmapHeapPath: + return sizeof(BitmapHeapPath); + case T_TidPath: + return sizeof(TidPath); + case T_SubqueryScanPath: + return sizeof(SubqueryScanPath); + case T_ForeignPath: + return sizeof(ForeignPath); + case T_CustomPath: + return sizeof(CustomPath); + + + case T_NestPath: + return sizeof(NestPath); + + + case T_MergePath: + return sizeof(MergePath); + + + case T_HashPath: + return sizeof(HashPath); + + + case T_AppendPath: + return sizeof(AppendPath); + + + case T_MergeAppendPath: + return sizeof(MergeAppendPath); + + + case T_GroupResultPath: + return sizeof(GroupResultPath); + + + case T_MaterialPath: + return sizeof(MaterialPath); + + + case T_UniquePath: + return sizeof(UniquePath); + + + case T_GatherPath: + return sizeof(GatherPath); + + + case T_GatherMergePath: + return sizeof(GatherMergePath); + + + case T_ProjectionPath: + return sizeof(ProjectionPath); + + + case T_ProjectSetPath: + return sizeof(ProjectSetPath); + + + case T_SortPath: + return sizeof(SortPath); + + + case T_IncrementalSortPath: + return sizeof(IncrementalSortPath); + + + case T_GroupPath: + return sizeof(GroupPath); + + + case T_UpperUniquePath: + return sizeof(UpperUniquePath); + + + case T_AggPath: + return sizeof(AggPath); + + + case T_GroupingSetsPath: + return sizeof(GroupingSetsPath); + + + case T_MinMaxAggPath: + return sizeof(MinMaxAggPath); + + + case T_WindowAggPath: + return sizeof(WindowAggPath); + + + case T_SetOpPath: + return sizeof(SetOpPath); + + + case T_RecursiveUnionPath: + return sizeof(RecursiveUnionPath); + + + case T_LockRowsPath: + return sizeof(LockRowsPath); + case T_ModifyTablePath: + return sizeof(ModifyTablePath); + case T_LimitPath: + return sizeof(LimitPath); + default: + elog(ERROR, "unrecognized path type: %s", + nodeToString(&path->type)); + break; + } + return 0; +} diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 715a24ad29..254961b2b4 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -294,7 +294,8 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, - List **restrictlist_ptr); + List **restrictlist_ptr, + bool *innerrel_removed); extern Relids min_join_parameterization(PlannerInfo *root, Relids joinrelids, RelOptInfo *outer_rel, @@ -321,4 +322,5 @@ extern RelOptInfo *build_child_join_rel(PlannerInfo *root, RelOptInfo *parent_joinrel, List *restrictlist, SpecialJoinInfo *sjinfo, JoinType jointype); +extern size_t size_of_path(Path *path); #endif /* PATHNODE_H */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 8378936eda..b59c9a73ae 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -6283,3 +6283,42 @@ where exists (select 1 from j3 (13 rows) drop table j3; +create table m1 (a int primary key, b int, c int); +create table m2 (a int primary key, b int, c int); +create table m3 (a int primary key, b int, c int); +explain (verbose, costs off) +select t1.a +from m3 t1 +left join (select m1.a from m1, m2 where m1.b = m2.a) t2 +on (t1.a = t2.a); + QUERY PLAN +-------------------------- + Seq Scan on public.m3 t1 + Output: t1.a +(2 rows) + +explain (verbose, costs off) +select m1.* +from m1 left join m2 +on (m1.a = m2.a) +and m1.b in (select b from m3); + QUERY PLAN +---------------------------- + Seq Scan on public.m1 + Output: m1.a, m1.b, m1.c +(2 rows) + +explain (verbose, costs off) +select m1.* +from m1 left join m2 +on m1.b = m2.a +and m2.b in (select b from m3); + QUERY PLAN +---------------------------- + Seq Scan on public.m1 + Output: m1.a, m1.b, m1.c +(2 rows) + +drop table m1; +drop table m2; +drop table m3; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 3312542411..317354547d 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -2169,3 +2169,29 @@ where exists (select 1 from j3 and t1.unique1 < 1; drop table j3; + +create table m1 (a int primary key, b int, c int); +create table m2 (a int primary key, b int, c int); +create table m3 (a int primary key, b int, c int); + +explain (verbose, costs off) +select t1.a +from m3 t1 +left join (select m1.a from m1, m2 where m1.b = m2.a) t2 +on (t1.a = t2.a); + +explain (verbose, costs off) +select m1.* +from m1 left join m2 +on (m1.a = m2.a) +and m1.b in (select b from m3); + +explain (verbose, costs off) +select m1.* +from m1 left join m2 +on m1.b = m2.a +and m2.b in (select b from m3); + +drop table m1; +drop table m2; +drop table m3; -- 2.21.0