diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 4fbbde1..5504091 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -3923,10 +3923,12 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, /* * We support pushing down INNER, LEFT, RIGHT and FULL OUTER joins. * Constructing queries representing SEMI and ANTI joins is hard, hence - * not considered right now. + * not considered right now. SEMI_LEFT joins are ok here, since they're + * merely an optimization of LEFT joins. */ if (jointype != JOIN_INNER && jointype != JOIN_LEFT && - jointype != JOIN_RIGHT && jointype != JOIN_FULL) + jointype != JOIN_RIGHT && jointype != JOIN_FULL && + jointype != JOIN_SEMI_LEFT) return false; /* @@ -4059,6 +4061,7 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, break; case JOIN_LEFT: + case JOIN_SEMI_LEFT: fpinfo->joinclauses = list_concat(fpinfo->joinclauses, list_copy(fpinfo_i->remote_conds)); fpinfo->remote_conds = list_concat(fpinfo->remote_conds, diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 09c2304..7c114ca 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1121,6 +1121,9 @@ ExplainNode(PlanState *planstate, List *ancestors, case JOIN_LEFT: jointype = "Left"; break; + case JOIN_SEMI_LEFT: + jointype = "Semi Left"; + break; case JOIN_FULL: jointype = "Full"; break; diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 369e666..1436a5b 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -306,10 +306,12 @@ ExecHashJoin(HashJoinState *node) } /* - * In a semijoin, we'll consider returning the first - * match, but after that we're done with this outer tuple. + * In a semi or semi left join, we'll consider returning + * the first match, but after that we're done with this + * outer tuple. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.jointype == JOIN_SEMI || + node->js.jointype == JOIN_SEMI_LEFT) node->hj_JoinState = HJ_NEED_NEW_OUTER; if (otherqual == NIL || @@ -502,6 +504,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) case JOIN_SEMI: break; case JOIN_LEFT: + case JOIN_SEMI_LEFT: case JOIN_ANTI: hjstate->hj_NullInnerTupleSlot = ExecInitNullTupleSlot(estate, diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index 6db09b8..f61a8da 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -840,10 +840,12 @@ ExecMergeJoin(MergeJoinState *node) } /* - * In a semijoin, we'll consider returning the first - * match, but after that we're done with this outer tuple. + * In a semi or semi left join, we'll consider returning + * the first match, but after that we're done with this + * outer tuple. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.jointype == JOIN_SEMI || + node->js.jointype == JOIN_SEMI_LEFT) node->mj_JoinState = EXEC_MJ_NEXTOUTER; qualResult = (otherqual == NIL || @@ -1559,6 +1561,7 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) mergestate->mj_FillInner = false; break; case JOIN_LEFT: + case JOIN_SEMI_LEFT: case JOIN_ANTI: mergestate->mj_FillOuter = true; mergestate->mj_FillInner = false; diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index 555fa09..ebcf0f2 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -182,6 +182,7 @@ ExecNestLoop(NestLoopState *node) if (!node->nl_MatchedOuter && (node->js.jointype == JOIN_LEFT || + node->js.jointype == JOIN_SEMI_LEFT || node->js.jointype == JOIN_ANTI)) { /* @@ -247,10 +248,11 @@ ExecNestLoop(NestLoopState *node) } /* - * In a semijoin, we'll consider returning the first match, but - * after that we're done with this outer tuple. + * In a semi or semi left join, we'll consider returning the first + * match, but after that we're done with this outer tuple. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.jointype == JOIN_SEMI || + node->js.jointype == JOIN_SEMI_LEFT) node->nl_NeedNewOuter = true; if (otherqual == NIL || ExecQual(otherqual, econtext, false)) @@ -358,6 +360,7 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) case JOIN_SEMI: break; case JOIN_LEFT: + case JOIN_SEMI_LEFT: case JOIN_ANTI: nlstate->nl_NullInnerTupleSlot = ExecInitNullTupleSlot(estate, diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index f4e4a91..005d290 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2067,6 +2067,7 @@ _copySpecialJoinInfo(const SpecialJoinInfo *from) COPY_SCALAR_FIELD(jointype); COPY_SCALAR_FIELD(lhs_strict); COPY_SCALAR_FIELD(delay_upper_joins); + COPY_SCALAR_FIELD(is_unique_join); COPY_SCALAR_FIELD(semi_can_btree); COPY_SCALAR_FIELD(semi_can_hash); COPY_NODE_FIELD(semi_operators); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 854c062..4a87efd 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -839,6 +839,7 @@ _equalSpecialJoinInfo(const SpecialJoinInfo *a, const SpecialJoinInfo *b) COMPARE_SCALAR_FIELD(jointype); COMPARE_SCALAR_FIELD(lhs_strict); COMPARE_SCALAR_FIELD(delay_upper_joins); + COMPARE_SCALAR_FIELD(is_unique_join); COMPARE_SCALAR_FIELD(semi_can_btree); COMPARE_SCALAR_FIELD(semi_can_hash); COMPARE_NODE_FIELD(semi_operators); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index bfd12ac..f34567b 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2268,6 +2268,7 @@ _outSpecialJoinInfo(StringInfo str, const SpecialJoinInfo *node) WRITE_ENUM_FIELD(jointype, JoinType); WRITE_BOOL_FIELD(lhs_strict); WRITE_BOOL_FIELD(delay_upper_joins); + WRITE_BOOL_FIELD(is_unique_join); WRITE_BOOL_FIELD(semi_can_btree); WRITE_BOOL_FIELD(semi_can_hash); WRITE_NODE_FIELD(semi_operators); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index b395751..8656629 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1893,8 +1893,8 @@ cost_group(Path *path, PlannerInfo *root, * estimate and getting a tight lower bound. We choose to not examine the * join quals here, since that's by far the most expensive part of the * calculations. The end result is that CPU-cost considerations must be - * left for the second phase; and for SEMI/ANTI joins, we must also postpone - * incorporation of the inner path's run cost. + * left for the second phase; and for SEMI, SEMI_LEFT and ANTI joins, we must + * also postpone incorporation of the inner path's run cost. * * 'workspace' is to be filled with startup_cost, total_cost, and perhaps * other data to be used by final_cost_nestloop @@ -1902,7 +1902,7 @@ cost_group(Path *path, PlannerInfo *root, * 'outer_path' is the outer input to the join * 'inner_path' is the inner input to the join * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if jointype is SEMI or ANTI + * 'semifactors' contains valid data if jointype is SEMI, SEMI_LEFT or ANTI */ void initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, @@ -1940,10 +1940,12 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, inner_run_cost = inner_path->total_cost - inner_path->startup_cost; inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost; - if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + if (jointype == JOIN_SEMI || + jointype == JOIN_SEMI_LEFT || + jointype == JOIN_ANTI) { /* - * SEMI or ANTI join: executor will stop after first match. + * SEMI, SEMI_LEFT or ANTI join: executor will stop after first match. * * Getting decent estimates requires inspection of the join quals, * which we choose to postpone to final_cost_nestloop. @@ -1977,7 +1979,8 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, * 'path' is already filled in except for the rows and cost fields * 'workspace' is the result from initial_cost_nestloop * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if path->jointype is SEMI or ANTI + * 'semifactors' contains valid data if path->jointype is SEMI, SEMI_LEFT or + * ANTI */ void final_cost_nestloop(PlannerInfo *root, NestPath *path, @@ -2017,10 +2020,12 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, /* cost of inner-relation source data (we already dealt with outer rel) */ - if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI) + if (path->jointype == JOIN_SEMI || + path->jointype == JOIN_SEMI_LEFT || + path->jointype == JOIN_ANTI) { /* - * SEMI or ANTI join: executor will stop after first match. + * SEMI, SEMI_LEFT or ANTI join: executor will stop after first match. */ Cost inner_run_cost = workspace->inner_run_cost; Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost; @@ -2250,6 +2255,7 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, innerendsel = cache->leftendsel; } if (jointype == JOIN_LEFT || + jointype == JOIN_SEMI_LEFT || jointype == JOIN_ANTI) { outerstartsel = 0.0; @@ -2773,7 +2779,8 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, * num_batches * 'workspace' is the result from initial_cost_hashjoin * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if path->jointype is SEMI or ANTI + * 'semifactors' contains valid data if path->jointype is SEMI, SEMI_LEFT or + * ANTI */ void final_cost_hashjoin(PlannerInfo *root, HashPath *path, @@ -2896,13 +2903,15 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, /* CPU costs */ - if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI) + if (path->jpath.jointype == JOIN_SEMI || + path->jpath.jointype == JOIN_SEMI_LEFT || + path->jpath.jointype == JOIN_ANTI) { double outer_matched_rows; Selectivity inner_scan_frac; /* - * SEMI or ANTI join: executor will stop after first match. + * SEMI, SEMI_LEFT or ANTI join: executor will stop after first match. * * For an outer-rel row that has at least one match, we can expect the * bucket scan to stop after a fraction 1/(match_count+1) of the @@ -2937,10 +2946,10 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, clamp_row_est(inner_path_rows / virtualbuckets) * 0.05; /* Get # of tuples that will pass the basic join */ - if (path->jpath.jointype == JOIN_SEMI) - hashjointuples = outer_matched_rows; - else + if (path->jpath.jointype == JOIN_ANTI) hashjointuples = outer_path_rows - outer_matched_rows; + else + hashjointuples = outer_matched_rows; } else { @@ -3469,11 +3478,11 @@ get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel, /* * compute_semi_anti_join_factors - * Estimate how much of the inner input a SEMI or ANTI join + * Estimate how much of the inner input a SEMI, SEMI_LEFT or ANTI join * can be expected to scan. * - * In a hash or nestloop SEMI/ANTI join, the executor will stop scanning - * inner rows as soon as it finds a match to the current outer row. + * In a hash or nestloop SEMI/SEMI_LEFT/ANTI join, the executor will stop + * scanning inner rows as soon as it finds a match to the current outer row. * We should therefore adjust some of the cost components for this effect. * This function computes some estimates needed for these adjustments. * These estimates will be the same regardless of the particular paths used @@ -3483,7 +3492,7 @@ get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel, * Input parameters: * outerrel: outer relation under consideration * innerrel: inner relation under consideration - * jointype: must be JOIN_SEMI or JOIN_ANTI + * jointype: must be JOIN_SEMI, JOIN_SEMI_LEFT or JOIN_ANTI * sjinfo: SpecialJoinInfo relevant to this join * restrictlist: join quals * Output parameters: @@ -3506,7 +3515,9 @@ compute_semi_anti_join_factors(PlannerInfo *root, ListCell *l; /* Should only be called in these cases */ - Assert(jointype == JOIN_SEMI || jointype == JOIN_ANTI); + Assert(jointype == JOIN_SEMI || + jointype == JOIN_SEMI_LEFT || + jointype == JOIN_ANTI); /* * In an ANTI join, we must ignore clauses that are "pushed down", since @@ -3530,7 +3541,8 @@ compute_semi_anti_join_factors(PlannerInfo *root, joinquals = restrictlist; /* - * Get the JOIN_SEMI or JOIN_ANTI selectivity of the join clauses. + * Get the JOIN_SEMI, JOIN_SEMI_LEFT or JOIN_ANTI selectivity of the join + * clauses. */ jselec = clauselist_selectivity(root, joinquals, @@ -3969,6 +3981,10 @@ calc_joinrel_size_estimate(PlannerInfo *root, * * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the fraction * of LHS rows that have matches, and we apply that straightforwardly. + * + * For JOIN_SEMI_LEFT, the selectivity is defined as the fraction of the + * LHS rows that have matches, although unlike JOIN_SEMI we must consider + * NULL RHS rows, and take the higher estimate of the two. */ switch (jointype) { @@ -3993,6 +4009,12 @@ calc_joinrel_size_estimate(PlannerInfo *root, nrows = outer_rows * jselec; /* pselec not used */ break; + case JOIN_SEMI_LEFT: + nrows = outer_rows * jselec; + if (nrows < outer_rows) + nrows = outer_rows; + nrows *= pselec; + break; case JOIN_ANTI: nrows = outer_rows * (1.0 - jselec); nrows *= pselec; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index f3aced3..cfdda7b 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -19,6 +19,7 @@ #include "executor/executor.h" #include "foreign/fdwapi.h" #include "optimizer/cost.h" +#include "optimizer/planmain.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" @@ -28,6 +29,16 @@ set_join_pathlist_hook_type set_join_pathlist_hook = NULL; #define PATH_PARAM_BY_REL(path, rel) \ ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) +static bool is_innerrel_unique_for(PlannerInfo *root, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist); +static JoinType get_optimal_jointype(PlannerInfo *root, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + SpecialJoinInfo *sjinfo, + List *restrictlist); static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra); @@ -50,7 +61,176 @@ static List *select_mergejoin_clauses(PlannerInfo *root, List *restrictlist, JoinType jointype, bool *mergejoin_allowed); +static inline bool clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel, + RelOptInfo *innerrel); +/* + * is_innerrel_unique_for + * Determine if this innerrel can, at most, return a single tuple for each + * outer tuple, based on the 'restrictlist'. + */ +static bool +is_innerrel_unique_for(PlannerInfo *root, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist) +{ + bool is_unique; + int org_len; + ListCell *lc; + + if (restrictlist == NIL || + !rel_supports_distinctness(root, innerrel)) + return false; + + /* + * Remember the number of items that were in the restrictlist as the call + * to relation_has_unique_index_for may add more items which we'll need to + * remove later. + */ + org_len = list_length(restrictlist); + + /* + * rel_is_distinct_for requires restrict infos to have the correct clause + * direction info + */ + foreach(lc, restrictlist) + { + clause_sides_match_join((RestrictInfo *) lfirst(lc), outerrel, + innerrel); + } + + /* Let rel_is_distinct_for() do the hard work */ + is_unique = rel_is_distinct_for(root, innerrel, restrictlist); + + /* Remove any list items added by rel_is_distinct_for */ + list_truncate(restrictlist, org_len); + + return is_unique; +} + +/* + * get_optimal_jointype + * We may be able to optimize some joins by converting the JoinType to one + * which the executor is able to run more efficiently. Here we look for + * such cases and if we find a better choice, then we'll return it, + * otherwise we'll return the original JoinType. + */ +static JoinType +get_optimal_jointype(PlannerInfo *root, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + SpecialJoinInfo *sjinfo, + List *restrictlist) +{ + int innerrelid; + + /* + * LEFT JOINs in which we have proved the inner side to be unique can be + * converted into JOIN_SEMI_LEFT. + */ + if (jointype == JOIN_LEFT) + { + if (sjinfo->is_unique_join) + return JOIN_SEMI_LEFT; + else + return JOIN_LEFT; + } + + if (!bms_get_singleton_member(innerrel->relids, &innerrelid)) + return jointype; + + /* + * Any INNER JOINs which can be proven to return at most one inner tuple + * for each outer tuple can be converted in to a JOIN_SEMI. + */ + if (jointype == JOIN_INNER) + { + MemoryContext old_context; + ListCell *lc; + + /* can't optimize jointype with an empty restrictlist */ + if (restrictlist == NIL) + return jointype; + + /* + * First let's query the unique and non-unique caches to see if we've + * managed to prove that innerrel is unique for some subset of this + * outerrel. We don't need an exact match, as if we have any extra + * outerrels than were previously cached, then they can't make the + * innerrel any less unique. + */ + foreach(lc, root->unique_rels[innerrelid]) + { + Bitmapset *unique_rels = (Bitmapset *) lfirst(lc); + + if (bms_is_subset(unique_rels, outerrel->relids)) + { + /* ensure is_innerrel_unique_for() agrees */ + Assert(is_innerrel_unique_for(root, outerrel, innerrel, + restrictlist)); + + return JOIN_SEMI; /* Success! */ + } + } + + /* + * We may have previously determined that this outerrel, or some + * superset thereof, cannot prove this innerrel to be unique. + */ + foreach(lc, root->non_unique_rels[innerrelid]) + { + Bitmapset *unique_rels = (Bitmapset *) lfirst(lc); + + if (bms_is_subset(outerrel->relids, unique_rels)) + { + /* ensure is_innerrel_unique_for() agrees */ + Assert(!is_innerrel_unique_for(root, outerrel, innerrel, + restrictlist)); + + return jointype; + } + } + + /* + * We may be getting called from the geqo and might not be working in + * the standard planner's memory context, so let's ensure we are. + */ + old_context = MemoryContextSwitchTo(root->planner_cxt); + + if (is_innerrel_unique_for(root, outerrel, innerrel, restrictlist)) + { + /* + * XXX Should we attempt to get the minimum set of outerrels which + * proved this innerrel to be unique? If we did this then we might + * be able to make a few more cases unique that we otherwise + * couldn't. However, the standard join search always start with + * fewer rels anyway, so this may not matter, although perhaps we + * should be more aggressive to make this work better with the + * geqo? + */ + + /* cache the result for next time */ + root->unique_rels[innerrelid] = + lappend(root->unique_rels[innerrelid], outerrel->relids); + + jointype = JOIN_SEMI; /* Success! */ + } + else + { + /* + * None of outerrel helped prove innerrel unique, so we can safely + * reject this rel, or a subset of this rel in future checks. + */ + root->non_unique_rels[innerrelid] = + lappend(root->non_unique_rels[innerrelid], outerrel->relids); + } + + MemoryContextSwitchTo(old_context); + } + return jointype; +} /* * add_paths_to_joinrel @@ -88,6 +268,13 @@ add_paths_to_joinrel(PlannerInfo *root, bool mergejoin_allowed = true; ListCell *lc; + /* + * There may be a more optimal JoinType to use. Check for such cases + * first. + */ + jointype = get_optimal_jointype(root, outerrel, innerrel, jointype, sjinfo, + restrictlist); + extra.restrictlist = restrictlist; extra.mergeclause_list = NIL; extra.sjinfo = sjinfo; @@ -109,10 +296,12 @@ add_paths_to_joinrel(PlannerInfo *root, &mergejoin_allowed); /* - * If it's SEMI or ANTI join, compute correction factors for cost - * estimation. These will be the same for all paths. + * If it's SEMI, SEMI_LEFT or ANTI join, compute correction factors for + * cost estimation. These will be the same for all paths. */ - if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + if (jointype == JOIN_SEMI || + jointype == JOIN_SEMI_LEFT || + jointype == JOIN_ANTI) compute_semi_anti_join_factors(root, outerrel, innerrel, jointype, sjinfo, restrictlist, &extra.semifactors); @@ -827,16 +1016,17 @@ match_unsorted_outer(PlannerInfo *root, ListCell *lc1; /* - * Nestloop only supports inner, left, semi, and anti joins. Also, if we - * are doing a right or full mergejoin, we must use *all* the mergeclauses - * as join clauses, else we will not have a valid plan. (Although these - * two flags are currently inverses, keep them separate for clarity and - * possible future changes.) + * Nestloop only supports inner, left, semi_left, semi, and anti joins. + * Also, if we are doing a right or full mergejoin, we must use *all* the + * mergeclauses as join clauses, else we will not have a valid plan. + * (Although these two flags are currently inverses, keep them separate + * for clarity and possible future changes.) */ switch (jointype) { case JOIN_INNER: case JOIN_LEFT: + case JOIN_SEMI_LEFT: case JOIN_SEMI: case JOIN_ANTI: nestjoinOK = true; diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 01d4fea..b91cb79 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -490,10 +490,12 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, /* * The proposed join could still be legal, but only if we're * allowed to associate it into the RHS of this SJ. That means - * this SJ must be a LEFT join (not SEMI or ANTI, and certainly - * not FULL) and the proposed join must not overlap the LHS. + * this SJ must be a LEFT or SEMI_LEFT join (not SEMI or ANTI, and + * certainly not FULL) and the proposed join must not overlap the + * LHS. */ - if (sjinfo->jointype != JOIN_LEFT || + if ((sjinfo->jointype != JOIN_LEFT && + sjinfo->jointype != JOIN_SEMI_LEFT) || bms_overlap(joinrelids, sjinfo->min_lefthand)) return false; /* invalid join path */ @@ -508,8 +510,8 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, } /* - * Fail if violated any SJ's RHS and didn't match to a LEFT SJ: the - * proposed join can't associate into an SJ's RHS. + * Fail if violated any SJ's RHS and didn't match to a LEFT or SEMI_LEFT + * SJ: the proposed join can't associate into an SJ's RHS. * * Also, fail if the proposed join's predicate isn't strict; we're * essentially checking to see if we can apply outer-join identity 3, and @@ -518,7 +520,8 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, */ if (must_be_leftjoin && (match_sjinfo == NULL || - match_sjinfo->jointype != JOIN_LEFT || + (match_sjinfo->jointype != JOIN_LEFT && + match_sjinfo->jointype != JOIN_SEMI_LEFT) || !match_sjinfo->lhs_strict)) return false; /* invalid join path */ diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index f7f5714..7446eb5 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -34,11 +34,37 @@ /* local functions */ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static bool specialjoin_is_unique_join(PlannerInfo *root, + SpecialJoinInfo *sjinfo); static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); static Oid distinct_col_search(int colno, List *colnos, List *opids); +/* + * mark_unique_joins + * Analyze joins in order to determine if their inner side is unique based + * on the join condition. + */ +void +mark_unique_joins(PlannerInfo *root, List *joinlist) +{ + ListCell *lc; + + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + + /* + * Currently we're only interested in LEFT JOINs that have not already + * been marked as unique by a previous call. + */ + if (sjinfo->jointype == JOIN_LEFT && + !sjinfo->is_unique_join && + specialjoin_is_unique_join(root, sjinfo)) + sjinfo->is_unique_join = true; + } +} /* * remove_useless_joins @@ -92,6 +118,12 @@ restart: root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo); /* + * We may now be able to mark some joins as unique which we could not + * do before + */ + mark_unique_joins(root, joinlist); + + /* * Restart the scan. This is necessary to ensure we find all * removable joins independently of ordering of the join_info_list * (note that removal of attr_needed bits may make a join appear @@ -152,17 +184,17 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) { int innerrelid; RelOptInfo *innerrel; - Query *subquery = NULL; Relids joinrelids; - List *clause_list = NIL; - ListCell *l; int attroff; + ListCell *l; /* - * Must be a non-delaying left join to a single baserel, else we aren't - * going to be able to do anything with it. + * Join must not duplicate its outer side and must be a non-delaying left + * join to a single baserel, else we aren't going to be able to do + * anything with it. */ - if (sjinfo->jointype != JOIN_LEFT || + if (!sjinfo->is_unique_join || + sjinfo->jointype != JOIN_LEFT || sjinfo->delay_upper_joins) return false; @@ -171,38 +203,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) innerrel = find_base_rel(root, innerrelid); - if (innerrel->reloptkind != RELOPT_BASEREL) - return false; - - /* - * Before we go to the effort of checking whether any innerrel variables - * are needed above the join, make a quick check to eliminate cases in - * which we will surely be unable to prove uniqueness of the innerrel. - */ - if (innerrel->rtekind == RTE_RELATION) - { - /* - * For a plain-relation innerrel, we only know how to prove uniqueness - * by reference to unique indexes. If there are no indexes then - * there's certainly no unique indexes so there's no point in going - * further. - */ - if (innerrel->indexlist == NIL) - return false; - } - else if (innerrel->rtekind == RTE_SUBQUERY) - { - subquery = root->simple_rte_array[innerrelid]->subquery; - - /* - * If the subquery has no qualities that support distinctness proofs - * then there's no point in going further. - */ - if (!query_supports_distinctness(subquery)) - return false; - } - else - return false; /* unsupported rtekind */ + /* Must be true as is_unique_join can only be set to true for base rels */ + Assert(innerrel->reloptkind == RELOPT_BASEREL); /* Compute the relid set for the join we are considering */ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); @@ -213,7 +215,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) * * Note that this test only detects use of inner-rel attributes in higher * join conditions and the target list. There might be such attributes in - * pushed-down conditions at this join, too. We check that case below. + * pushed-down conditions at this join, too, but in this case the join + * would not have been marked as unique. * * As a micro-optimization, it seems better to start with max_attr and * count down rather than starting with min_attr and counting up, on the @@ -254,6 +257,44 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) return false; /* it does reference innerrel */ } + return true; +} + +/* + * specialjoin_is_unique_join + * True if it can be proved that this special join can only ever match at + * most one inner row for any single outer row. False is returned if + * there's insufficient evidence to prove the join is unique. + */ +static bool +specialjoin_is_unique_join(PlannerInfo *root, SpecialJoinInfo *sjinfo) +{ + int innerrelid; + RelOptInfo *innerrel; + Relids joinrelids; + ListCell *l; + List *clause_list = NIL; + + /* if there's more than one relation involved, then punt */ + if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid)) + return false; + + innerrel = find_base_rel(root, innerrelid); + + if (innerrel->reloptkind != RELOPT_BASEREL) + return false; + + /* + * Before we go to the effort of pulling out the join condition's columns, + * make a quick check to eliminate cases in which we will surely be unable + * to prove uniqueness of the innerrel. + */ + if (!rel_supports_distinctness(root, innerrel)) + return false; + + /* Compute the relid set for the join we are considering */ + joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + /* * Search for mergejoinable clauses that constrain the inner rel against * either the outer rel or a pseudoconstant. If an operator is @@ -275,10 +316,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) !bms_equal(restrictinfo->required_relids, joinrelids)) { /* - * If such a clause actually references the inner rel then join - * removal has to be disallowed. We have to check this despite - * the previous attr_needed checks because of the possibility of - * pushed-down clauses referencing the rel. + * If such a clause actually references the inner rel then we + * can't mark the join as unique. */ if (bms_is_member(innerrelid, restrictinfo->clause_relids)) return false; @@ -301,71 +340,9 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) clause_list = lappend(clause_list, restrictinfo); } - /* - * relation_has_unique_index_for automatically adds any usable restriction - * clauses for the innerrel, so we needn't do that here. (XXX we are not - * considering restriction clauses for subqueries; is that worth doing?) - */ - - if (innerrel->rtekind == RTE_RELATION) - { - /* Now examine the indexes to see if we have a matching unique index */ - if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL)) - return true; - } - else /* innerrel->rtekind == RTE_SUBQUERY */ - { - List *colnos = NIL; - List *opids = NIL; - - /* - * Build the argument lists for query_is_distinct_for: a list of - * output column numbers that the query needs to be distinct over, and - * a list of equality operators that the output columns need to be - * distinct according to. - */ - foreach(l, clause_list) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Oid op; - Var *var; - - /* - * Get the equality operator we need uniqueness according to. - * (This might be a cross-type operator and thus not exactly the - * same operator the subquery would consider; that's all right - * since query_is_distinct_for can resolve such cases.) The - * mergejoinability test above should have selected only OpExprs. - */ - Assert(IsA(rinfo->clause, OpExpr)); - op = ((OpExpr *) rinfo->clause)->opno; - - /* clause_sides_match_join identified the inner side for us */ - if (rinfo->outer_is_left) - var = (Var *) get_rightop(rinfo->clause); - else - var = (Var *) get_leftop(rinfo->clause); - - /* - * If inner side isn't a Var referencing a subquery output column, - * this clause doesn't help us. - */ - if (!var || !IsA(var, Var) || - var->varno != innerrelid || var->varlevelsup != 0) - continue; - - colnos = lappend_int(colnos, var->varattno); - opids = lappend_oid(opids, op); - } - - if (query_is_distinct_for(subquery, colnos, opids)) - return true; - } + if (rel_is_distinct_for(root, innerrel, clause_list)) + return true; - /* - * Some day it would be nice to check for other methods of establishing - * distinctness. - */ return false; } @@ -560,6 +537,127 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved) return result; } +/* + * rel_is_distinct_for + * Returns True if rel can be proved to be distinct over clause_list + * + * Note: We expect clause_list to be already processed to check if the + * RestrictInfos are in the form "outerrel_expr op innerrel_expr" or + * "innerrel_expr op outerrel_expr". + * + * Note: this method may add items to clause_list, callers should either + * make a copy of the list or trim it back to it's original length after + * calling this function. + */ +bool +rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) +{ + int relid = rel->relid; + + /* + * relation_has_unique_index_for automatically adds any usable restriction + * clauses for the rel, so we needn't do that here. (XXX we are not + * considering restriction clauses for subqueries; is that worth doing?) + */ + if (rel->rtekind == RTE_RELATION) + { + /* Now examine the indexes to see if we have a matching unique index */ + if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL)) + return true; + } + else if (rel->rtekind == RTE_SUBQUERY) + { + List *colnos = NIL; + List *opids = NIL; + ListCell *l; + Query *subquery = root->simple_rte_array[relid]->subquery; + + /* + * Build the argument lists for query_is_distinct_for: a list of + * output column numbers that the query needs to be distinct over, and + * a list of equality operators that the output columns need to be + * distinct according to. + */ + foreach(l, clause_list) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Oid op; + Var *var; + + if (!IsA(rinfo->clause, OpExpr)) + continue; + + /* + * Get the equality operator we need uniqueness according to. + * (This might be a cross-type operator and thus not exactly the + * same operator the subquery would consider; that's all right + * since query_is_distinct_for can resolve such cases.) The + * mergejoinability test above should have selected only OpExprs. + */ + op = ((OpExpr *) rinfo->clause)->opno; + + /* clause_sides_match_join identified the inner side for us */ + if (rinfo->outer_is_left) + var = (Var *) get_rightop(rinfo->clause); + else + var = (Var *) get_leftop(rinfo->clause); + + /* + * If inner side isn't a Var referencing a subquery output column, + * this clause doesn't help us. + */ + if (!var || !IsA(var, Var) || + var->varno != relid || var->varlevelsup != 0) + continue; + + colnos = lappend_int(colnos, var->varattno); + opids = lappend_oid(opids, op); + } + + if (query_is_distinct_for(subquery, colnos, opids)) + return true; + } + return false; /* can't prove rel to be distinct over + * clause_list */ +} + +/* + * rel_supports_distinctness + * Returns true if rel has some properties which can prove the relation + * to be unique over some set of columns. + * + * This is effectively a pre-checking function for rel_is_distinct_for(). + * It must return TRUE if rel_is_distinct_for() could possibly return TRUE + */ +bool +rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) +{ + if (rel->rtekind == RTE_RELATION) + { + /* + * For a plain-relation, we only know how to prove uniqueness by + * reference to unique indexes. If there are no indexes then there's + * certainly no unique indexes so there's nothing to prove uniqueness + * on the relation. + */ + if (rel->indexlist != NIL) + return true; + } + else if (rel->rtekind == RTE_SUBQUERY) + { + Query *subquery = root->simple_rte_array[rel->relid]->subquery; + + /* Check if the subquery has any qualities that support distinctness */ + if (query_supports_distinctness(subquery)) + return true; + } + + /* + * Some day it would be nice to check for other methods of establishing + * distinctness. + */ + return false; +} /* * query_supports_distinctness - could the query possibly be proven distinct diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 9999eea..e90f358 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -1130,6 +1130,7 @@ make_outerjoininfo(PlannerInfo *root, sjinfo->jointype = jointype; /* this always starts out false */ sjinfo->delay_upper_joins = false; + sjinfo->is_unique_join = false; compute_semijoin_info(sjinfo, clause); diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 88d7ea4..efaa655 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -185,6 +185,9 @@ query_planner(PlannerInfo *root, List *tlist, */ fix_placeholder_input_needed_levels(root); + /* Analyze joins to find out which ones have a unique inner side */ + mark_unique_joins(root, joinlist); + /* * Remove any useless outer joins. Ideally this would be done during * jointree preprocessing, but the necessary information isn't available diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index dd2b9ed..fe6f789 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -1617,6 +1617,7 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) switch (join->jointype) { case JOIN_LEFT: + case JOIN_SEMI_LEFT: case JOIN_SEMI: case JOIN_ANTI: inner_itlist->has_non_vars = false; diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 6f24b03..f1aeb9b 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -71,6 +71,14 @@ setup_simple_rel_arrays(PlannerInfo *root) /* simple_rte_array is an array equivalent of the rtable list */ root->simple_rte_array = (RangeTblEntry **) palloc0(root->simple_rel_array_size * sizeof(RangeTblEntry *)); + + /* initialize the unique relation caches */ + root->unique_rels = (List **) + palloc0(root->simple_rel_array_size * sizeof(List *)); + + root->non_unique_rels = (List **) + palloc0(root->simple_rel_array_size * sizeof(List *)); + rti = 1; foreach(lc, root->parse->rtable) { diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index c9edd88..28c2db0 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -978,11 +978,11 @@ transformFromClauseItem(ParseState *pstate, Node *n, /* * Make the left-side RTEs available for LATERAL access within the * right side, by temporarily adding them to the pstate's namespace - * list. Per SQL:2008, if the join type is not INNER or LEFT then the - * left-side names must still be exposed, but it's an error to - * reference them. (Stupid design, but that's what it says.) Hence, - * we always push them into the namespace, but mark them as not - * lateral_ok if the jointype is wrong. + * list. Per SQL:2008, if the join type is not INNER, LEFT or + * SEMI_LEFT then the left-side names must still be exposed, but it's + * an error to reference them. (Stupid design, but that's what it + * says.) Hence, we always push them into the namespace, but mark + * them as not lateral_ok if the jointype is wrong. * * Notice that we don't require the merged namespace list to be * conflict-free. See the comments for scanNameSpaceForRefname(). @@ -990,7 +990,9 @@ transformFromClauseItem(ParseState *pstate, Node *n, * NB: this coding relies on the fact that list_concat is not * destructive to its second argument. */ - lateral_ok = (j->jointype == JOIN_INNER || j->jointype == JOIN_LEFT); + lateral_ok = (j->jointype == JOIN_INNER || + j->jointype == JOIN_LEFT || + j->jointype == JOIN_SEMI_LEFT); setNamespaceLateralState(l_namespace, true, lateral_ok); sv_namespace_length = list_length(pstate->p_namespace); diff --git a/src/backend/utils/adt/network_selfuncs.c b/src/backend/utils/adt/network_selfuncs.c index 2e39687..4250325 100644 --- a/src/backend/utils/adt/network_selfuncs.c +++ b/src/backend/utils/adt/network_selfuncs.c @@ -216,6 +216,7 @@ networkjoinsel(PG_FUNCTION_ARGS) { case JOIN_INNER: case JOIN_LEFT: + case JOIN_SEMI_LEFT: /* XXX belongs here, or with SEMI/ANTI? */ case JOIN_FULL: /* diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index a6555e9..68aedfa 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -2202,6 +2202,7 @@ eqjoinsel(PG_FUNCTION_ARGS) { case JOIN_INNER: case JOIN_LEFT: + case JOIN_SEMI_LEFT: /* XXX belongs here, or with SEMI/ANTI? */ case JOIN_FULL: selec = eqjoinsel_inner(operator, &vardata1, &vardata2); break; diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 734df77..87695b1 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -638,6 +638,14 @@ typedef enum JoinType JOIN_ANTI, /* 1 copy of each LHS row that has no match */ /* + * The following join type is a variant of JOIN_LEFT for when the inner + * side of the join is known to be unique. This serves solely as an + * optimization to allow the executor to skip looking for another matching + * tuple in the inner side, when it's known that another cannot exist. + */ + JOIN_SEMI_LEFT, + + /* * These codes are used internally in the planner, but are not supported * by the executor (nor, indeed, by most of the planner). */ @@ -666,6 +674,7 @@ typedef enum JoinType #define IS_OUTER_JOIN(jointype) \ (((1 << (jointype)) & \ ((1 << JOIN_LEFT) | \ + (1 << JOIN_SEMI_LEFT) | \ (1 << JOIN_FULL) | \ (1 << JOIN_RIGHT) | \ (1 << JOIN_ANTI))) != 0) diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index d39c73b..85ee975 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -221,6 +221,18 @@ typedef struct PlannerInfo List **join_rel_level; /* lists of join-relation RelOptInfos */ int join_cur_level; /* index of list being extended */ + /* + * During the join search we attempt to optimize joins to try to prove + * their inner side to be unique based on the join condition. This is a + * rather expensive thing to do as it requires checking each relations + * unique indexes to see if the relation can, at most, return one tuple + * for each outer tuple. We use this cache during the join search to + * record lists of the sets of relations which both prove, and disprove + * the uniqueness properties for the relid indexed by these arrays. + */ + List **unique_rels; /* cache for proven unique rels */ + List **non_unique_rels; /* cache for proven non-unique rels */ + List *init_plans; /* init SubPlans for query */ List *cte_plan_ids; /* per-CTE-item list of subplan IDs */ @@ -1729,6 +1741,7 @@ typedef struct SpecialJoinInfo JoinType jointype; /* always INNER, LEFT, FULL, SEMI, or ANTI */ bool lhs_strict; /* joinclause is strict for some LHS rel */ bool delay_upper_joins; /* can't commute with upper RHS */ + bool is_unique_join; /* matches a max of 1 row per outer join row */ /* Remaining fields are set only for JOIN_SEMI jointype: */ bool semi_can_btree; /* true if semi_operators are all btree */ bool semi_can_hash; /* true if semi_operators are all hash */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 1f96e27..3c0dcbd 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -98,7 +98,11 @@ extern RestrictInfo *build_implied_join_equality(Oid opno, /* * prototypes for plan/analyzejoins.c */ +extern void mark_unique_joins(PlannerInfo *root, List *joinlist); extern List *remove_useless_joins(PlannerInfo *root, List *joinlist); +extern bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, + List *clause_list); +extern bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); extern bool query_supports_distinctness(Query *query); extern bool query_is_distinct_for(Query *query, List *colnos, List *opids); diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 3ff6691..952833f 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -880,29 +880,31 @@ explain (costs off) select a,c from t1 group by a,c,d; explain (costs off) select * from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z; - QUERY PLAN -------------------------------------------------------- - Group + QUERY PLAN +------------------------------------------------------ + HashAggregate Group Key: t1.a, t1.b, t2.x, t2.y - -> Merge Join - Merge Cond: ((t1.a = t2.x) AND (t1.b = t2.y)) - -> Index Scan using t1_pkey on t1 - -> Index Scan using t2_pkey on t2 -(6 rows) + -> Hash Semi Join + Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t1 +(7 rows) -- Test case where t1 can be optimized but not t2 explain (costs off) select t1.*,t2.x,t2.z from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z; - QUERY PLAN -------------------------------------------------------- + QUERY PLAN +------------------------------------------------------ HashAggregate Group Key: t1.a, t1.b, t2.x, t2.z - -> Merge Join - Merge Cond: ((t1.a = t2.x) AND (t1.b = t2.y)) - -> Index Scan using t1_pkey on t1 - -> Index Scan using t2_pkey on t2 -(6 rows) + -> Hash Semi Join + Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t1 +(7 rows) -- Cannot optimize when PK is deferrable explain (costs off) select * from t3 group by a,b,c; diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out index 0391b8e..aca998c 100644 --- a/src/test/regress/expected/equivclass.out +++ b/src/test/regress/expected/equivclass.out @@ -186,7 +186,7 @@ explain (costs off) select * from ec1, ec2 where ff = x1 and x1 = '42'::int8alias2; QUERY PLAN ----------------------------------------- - Nested Loop + Nested Loop Semi Join -> Seq Scan on ec2 Filter: (x1 = '42'::int8alias2) -> Index Scan using ec1_pkey on ec1 @@ -310,7 +310,7 @@ explain (costs off) -> Index Scan using ec1_expr3 on ec1 ec1_5 -> Index Scan using ec1_expr4 on ec1 ec1_6 -> Materialize - -> Merge Join + -> Merge Semi Join Merge Cond: ((((ec1_1.ff + 2) + 1)) = ec1.f1) -> Merge Append Sort Key: (((ec1_1.ff + 2) + 1)) @@ -365,7 +365,7 @@ explain (costs off) where ss1.x = ec1.f1 and ec1.ff = 42::int8; QUERY PLAN ----------------------------------------------------- - Merge Join + Merge Semi Join Merge Cond: ((((ec1_1.ff + 2) + 1)) = ec1.f1) -> Merge Append Sort Key: (((ec1_1.ff + 2) + 1)) diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index cafbc5e..2e867fa 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2756,8 +2756,8 @@ from nt3 as nt3 where nt3.id = 1 and ss2.b3; QUERY PLAN ----------------------------------------------- - Nested Loop - -> Nested Loop + Nested Loop Semi Join + -> Nested Loop Semi Join -> Index Scan using nt3_pkey on nt3 Index Cond: (id = 1) -> Index Scan using nt2_pkey on nt2 @@ -4035,7 +4035,7 @@ explain (costs off) on (p.k = ss.k); QUERY PLAN --------------------------------- - Hash Left Join + Hash Semi Left Join Hash Cond: (p.k = c.k) -> Seq Scan on parent p -> Hash @@ -5260,3 +5260,238 @@ ERROR: invalid reference to FROM-clause entry for table "xx1" LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss; ^ HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query. +-- +-- test planner's ability to change joins into their appropriate semi join +-- type +-- +create table j1 (id int primary key); +create table j2 (id int primary key); +create table j3 (id int); +insert into j1 values(1),(2),(3); +insert into j2 values(1),(2),(3); +insert into j3 values(1),(1); +analyze j1; +analyze j2; +analyze j3; +-- ensure join is changed to a semi join +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Semi Join + Output: j1.id, j2.id + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +-- ensure join not changed when not an equi-join +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id > j2.id; + QUERY PLAN +----------------------------------- + Nested Loop + Output: j1.id, j2.id + Join Filter: (j1.id > j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +-- don't change, as j3 has no unique index or pk on id +explain (verbose, costs off) +select * from j1 inner join j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------- + Hash Semi Join + Output: j1.id, j3.id + Hash Cond: (j3.id = j1.id) + -> Seq Scan on public.j3 + Output: j3.id + -> Hash + Output: j1.id + -> Seq Scan on public.j1 + Output: j1.id +(9 rows) + +-- ensure left join is converted to left semi join +explain (verbose, costs off) +select * from j1 left join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Semi Left Join + Output: j1.id, j2.id + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +-- ensure right join is converted too +explain (verbose, costs off) +select * from j1 right join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Semi Left Join + Output: j1.id, j2.id + Hash Cond: (j2.id = j1.id) + -> Seq Scan on public.j2 + Output: j2.id + -> Hash + Output: j1.id + -> Seq Scan on public.j1 + Output: j1.id +(9 rows) + +-- a clauseless (cross) join can't be converted +explain (verbose, costs off) +select * from j1 cross join j2; + QUERY PLAN +----------------------------------- + Nested Loop + Output: j1.id, j2.id + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(8 rows) + +-- ensure a natural join is converted to a semi join +explain (verbose, costs off) +select * from j1 natural join j2; + QUERY PLAN +----------------------------------- + Hash Semi Join + Output: j1.id + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +-- ensure distinct clause allows the inner to become a semi join +explain (verbose, costs off) +select * from j1 +inner join (select distinct id from j3) j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------------------- + Nested Loop Semi Join + Output: j1.id, j3.id + Join Filter: (j1.id = j3.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j3.id + -> Unique + Output: j3.id + -> Sort + Output: j3.id + Sort Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(14 rows) + +-- ensure group by clause allows the inner to become a semi join +explain (verbose, costs off) +select * from j1 +inner join (select id from j3 group by id) j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------------------- + Nested Loop Semi Join + Output: j1.id, j3.id + Join Filter: (j1.id = j3.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j3.id + -> Group + Output: j3.id + Group Key: j3.id + -> Sort + Output: j3.id + Sort Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(15 rows) + +-- ensure a full join is not altered +explain (verbose, costs off) +select * from j1 full join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Full Join + Output: j1.id, j2.id + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +drop table j1; +drop table j2; +drop table j3; +-- test a more complex permutations of join conversions +create table j1 (id1 int, id2 int, primary key(id1,id2)); +create table j2 (id1 int, id2 int, primary key(id1,id2)); +create table j3 (id1 int, id2 int, primary key(id1,id2)); +insert into j1 values(1,1),(2,2); +insert into j2 values(1,1); +insert into j3 values(1,1); +analyze j1; +analyze j2; +analyze j3; +-- ensure there's no join conversion when not all columns which are part of +-- the unique index are part of the join clause +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1; + QUERY PLAN +------------------------------------------ + Nested Loop + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Join Filter: (j1.id1 = j2.id1) + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 + -> Seq Scan on public.j1 + Output: j1.id1, j1.id2 +(7 rows) + +-- ensure inner is converted to semi join when there's multiple columns in the +-- join condition +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2; + QUERY PLAN +---------------------------------------------------------- + Nested Loop Semi Join + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Join Filter: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2)) + -> Seq Scan on public.j1 + Output: j1.id1, j1.id2 + -> Materialize + Output: j2.id1, j2.id2 + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 +(9 rows) + +drop table j1; +drop table j2; +drop table j3; diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out index 067aa8d..3f0c705 100644 --- a/src/test/regress/expected/rowsecurity.out +++ b/src/test/regress/expected/rowsecurity.out @@ -276,7 +276,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle); EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle); QUERY PLAN ---------------------------------------------------- - Nested Loop + Nested Loop Semi Join -> Subquery Scan on document Filter: f_leak(document.dtitle) -> Seq Scan on document document_1 diff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out index 7f57526..496eb62 100644 --- a/src/test/regress/expected/select_views.out +++ b/src/test/regress/expected/select_views.out @@ -1411,7 +1411,7 @@ NOTICE: f_leak => 9801-2345-6789-0123 EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_normal WHERE f_leak(cnum); QUERY PLAN --------------------------------------------------------- - Hash Join + Hash Semi Join Hash Cond: (r.cid = l.cid) -> Seq Scan on credit_card r Filter: f_leak(cnum) @@ -1432,7 +1432,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_secure WHERE f_leak(cnum); --------------------------------------------------------------- Subquery Scan on my_credit_card_secure Filter: f_leak(my_credit_card_secure.cnum) - -> Hash Join + -> Hash Semi Join Hash Cond: (r.cid = l.cid) -> Seq Scan on credit_card r -> Hash @@ -1466,7 +1466,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_normal -> Materialize -> Subquery Scan on l Filter: f_leak(l.cnum) - -> Hash Join + -> Hash Semi Join Hash Cond: (r_1.cid = l_1.cid) -> Seq Scan on credit_card r_1 -> Hash @@ -1497,7 +1497,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_secure -> Seq Scan on credit_usage r Filter: ((ymd >= '10-01-2011'::date) AND (ymd < '11-01-2011'::date)) -> Materialize - -> Hash Join + -> Hash Semi Join Hash Cond: (r_1.cid = l.cid) -> Seq Scan on credit_card r_1 -> Hash diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 3430f91..38cc39c 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1696,3 +1696,96 @@ update xx1 set x2 = f1 from xx1, lateral (select * from int4_tbl where f1 = x1) delete from xx1 using (select * from int4_tbl where f1 = x1) ss; delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss; delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss; + +-- +-- test planner's ability to change joins into their appropriate semi join +-- type +-- + +create table j1 (id int primary key); +create table j2 (id int primary key); +create table j3 (id int); + +insert into j1 values(1),(2),(3); +insert into j2 values(1),(2),(3); +insert into j3 values(1),(1); + +analyze j1; +analyze j2; +analyze j3; + +-- ensure join is changed to a semi join +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id = j2.id; + +-- ensure join not changed when not an equi-join +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id > j2.id; + +-- don't change, as j3 has no unique index or pk on id +explain (verbose, costs off) +select * from j1 inner join j3 on j1.id = j3.id; + +-- ensure left join is converted to left semi join +explain (verbose, costs off) +select * from j1 left join j2 on j1.id = j2.id; + +-- ensure right join is converted too +explain (verbose, costs off) +select * from j1 right join j2 on j1.id = j2.id; + +-- a clauseless (cross) join can't be converted +explain (verbose, costs off) +select * from j1 cross join j2; + +-- ensure a natural join is converted to a semi join +explain (verbose, costs off) +select * from j1 natural join j2; + +-- ensure distinct clause allows the inner to become a semi join +explain (verbose, costs off) +select * from j1 +inner join (select distinct id from j3) j3 on j1.id = j3.id; + +-- ensure group by clause allows the inner to become a semi join +explain (verbose, costs off) +select * from j1 +inner join (select id from j3 group by id) j3 on j1.id = j3.id; + +-- ensure a full join is not altered +explain (verbose, costs off) +select * from j1 full join j2 on j1.id = j2.id; + +drop table j1; +drop table j2; +drop table j3; + +-- test a more complex permutations of join conversions + +create table j1 (id1 int, id2 int, primary key(id1,id2)); +create table j2 (id1 int, id2 int, primary key(id1,id2)); +create table j3 (id1 int, id2 int, primary key(id1,id2)); + +insert into j1 values(1,1),(2,2); +insert into j2 values(1,1); +insert into j3 values(1,1); + +analyze j1; +analyze j2; +analyze j3; + +-- ensure there's no join conversion when not all columns which are part of +-- the unique index are part of the join clause +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1; + +-- ensure inner is converted to semi join when there's multiple columns in the +-- join condition +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2; + +drop table j1; +drop table j2; +drop table j3;