diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index a951c55..ae69fc1 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1151,9 +1151,16 @@ ExplainNode(PlanState *planstate, List *ancestors, appendStringInfo(es->str, " %s Join", jointype); else if (!IsA(plan, NestLoop)) appendStringInfoString(es->str, " Join"); + if (((Join *)plan)->inner_unique) + appendStringInfoString(es->str, "(inner unique)"); + } else + { ExplainPropertyText("Join Type", jointype, es); + ExplainPropertyText("Inner unique", + ((Join *)plan)->inner_unique?"true":"false", es); + } } break; case T_SetOp: diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 1d78cdf..f6cd8e1 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -306,10 +306,12 @@ ExecHashJoin(HashJoinState *node) } /* - * In a semijoin, we'll consider returning the first - * match, but after that we're done with this outer tuple. + * We'll consider returning the first match if the inner + * is unique, but after that we're done with this outer + * tuple. For the case of SEMI joins, we want to skip to + * the next outer row after having matched 1 inner row. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.inner_unique) node->hj_JoinState = HJ_NEED_NEW_OUTER; if (otherqual == NIL || @@ -451,6 +453,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) hjstate = makeNode(HashJoinState); hjstate->js.ps.plan = (Plan *) node; hjstate->js.ps.state = estate; + hjstate->js.inner_unique = node->join.inner_unique; /* * Miscellaneous initialization @@ -498,8 +501,10 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) /* set up null tuples for outer joins, if needed */ switch (node->join.jointype) { - case JOIN_INNER: case JOIN_SEMI: + hjstate->js.inner_unique = true; + /* fall through */ + case JOIN_INNER: break; case JOIN_LEFT: case JOIN_ANTI: diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index 15742c5..28dfbbe 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -840,10 +840,12 @@ ExecMergeJoin(MergeJoinState *node) } /* - * In a semijoin, we'll consider returning the first - * match, but after that we're done with this outer tuple. + * We'll consider returning the first match if the inner + * is unique, but after that we're done with this outer + * tuple. For the case of SEMI joins, we want to skip to + * the next outer row after having matched 1 inner row. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.inner_unique) node->mj_JoinState = EXEC_MJ_NEXTOUTER; qualResult = (otherqual == NIL || @@ -1486,6 +1488,8 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) mergestate->js.ps.plan = (Plan *) node; mergestate->js.ps.state = estate; + mergestate->js.inner_unique = node->join.inner_unique; + /* * Miscellaneous initialization * @@ -1553,8 +1557,10 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) switch (node->join.jointype) { - case JOIN_INNER: case JOIN_SEMI: + mergestate->js.inner_unique = true; + /* fall through */ + case JOIN_INNER: mergestate->mj_FillOuter = false; mergestate->mj_FillInner = false; break; diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index e66bcda..8cf0e71 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -247,10 +247,12 @@ ExecNestLoop(NestLoopState *node) } /* - * In a semijoin, we'll consider returning the first match, but - * after that we're done with this outer tuple. + * We'll consider returning the first match if the inner is + * unique, but after that we're done with this outer tuple. + * For the case of SEMI joins, we want to skip to the next outer + * row after having matched 1 inner row. */ - if (node->js.jointype == JOIN_SEMI) + if (node->js.inner_unique) node->nl_NeedNewOuter = true; if (otherqual == NIL || ExecQual(otherqual, econtext, false)) @@ -310,6 +312,8 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) nlstate->js.ps.plan = (Plan *) node; nlstate->js.ps.state = estate; + nlstate->js.inner_unique = node->join.inner_unique; + /* * Miscellaneous initialization * @@ -354,8 +358,10 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags) switch (node->join.jointype) { - case JOIN_INNER: case JOIN_SEMI: + nlstate->js.inner_unique = true; + /* fall through */ + case JOIN_INNER: break; case JOIN_LEFT: case JOIN_ANTI: diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 029761e..4008328 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1944,6 +1944,7 @@ _copySpecialJoinInfo(const SpecialJoinInfo *from) COPY_SCALAR_FIELD(jointype); COPY_SCALAR_FIELD(lhs_strict); COPY_SCALAR_FIELD(delay_upper_joins); + COPY_SCALAR_FIELD(is_unique_join); COPY_SCALAR_FIELD(semi_can_btree); COPY_SCALAR_FIELD(semi_can_hash); COPY_NODE_FIELD(semi_operators); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 190e50a..25885df 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -798,6 +798,7 @@ _equalSpecialJoinInfo(const SpecialJoinInfo *a, const SpecialJoinInfo *b) COMPARE_SCALAR_FIELD(jointype); COMPARE_SCALAR_FIELD(lhs_strict); COMPARE_SCALAR_FIELD(delay_upper_joins); + COMPARE_SCALAR_FIELD(is_unique_join); COMPARE_SCALAR_FIELD(semi_can_btree); COMPARE_SCALAR_FIELD(semi_can_hash); COMPARE_NODE_FIELD(semi_operators); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 385b289..69e7353 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1948,6 +1948,7 @@ _outSpecialJoinInfo(StringInfo str, const SpecialJoinInfo *node) WRITE_ENUM_FIELD(jointype, JoinType); WRITE_BOOL_FIELD(lhs_strict); WRITE_BOOL_FIELD(delay_upper_joins); + WRITE_BOOL_FIELD(is_unique_join); WRITE_BOOL_FIELD(semi_can_btree); WRITE_BOOL_FIELD(semi_can_hash); WRITE_NODE_FIELD(semi_operators); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 1a0d358..30d5c80 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1746,7 +1746,9 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, inner_run_cost = inner_path->total_cost - inner_path->startup_cost; inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost; - if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + if (jointype == JOIN_SEMI || + jointype == JOIN_ANTI || + sjinfo->is_unique_join) { double outer_matched_rows; Selectivity inner_scan_frac; @@ -2658,7 +2660,9 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, /* CPU costs */ - if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI) + if (path->jpath.jointype == JOIN_SEMI || + path->jpath.jointype == JOIN_ANTI || + sjinfo->is_unique_join) { double outer_matched_rows; Selectivity inner_scan_frac; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 1da953f..b0bc3f6 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -18,6 +18,7 @@ #include "executor/executor.h" #include "optimizer/cost.h" +#include "optimizer/planmain.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" @@ -49,7 +50,8 @@ static List *select_mergejoin_clauses(PlannerInfo *root, List *restrictlist, JoinType jointype, bool *mergejoin_allowed); - +static inline bool clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel, + RelOptInfo *innerrel); /* * add_paths_to_joinrel @@ -260,8 +262,74 @@ add_paths_to_joinrel(PlannerInfo *root, restrictlist, jointype, sjinfo, &semifactors, param_source_rels, extra_lateral_rels); + + if (restrictlist == NIL) + return; + + /* + * We can optimize inner loop execution for joins on which the inner rel + * is unique on the restrictlist. + */ + if (jointype == JOIN_INNER) + { + /* + * remember the number of items that were in the restrictlist as + * the call to relation_has_unique_index_for may add more items + * which we'll need to remove later. + */ + int org_len = list_length(restrictlist); + + /* + * rel_is_distinct_for requires restrict infos to have the + * correct clause direction info + */ + foreach(lc, restrictlist) + { + clause_sides_match_join((RestrictInfo *)lfirst(lc), + outerrel, innerrel); + } + + sjinfo->is_unique_join = rel_is_distinct_for(root, innerrel, restrictlist); + + /* Remove any list items added by rel_is_distinct_for */ + list_truncate(restrictlist, org_len); + } + + /* + * left joins were already checked for uniqueness in analyzejoins.c + */ + + if (sjinfo->is_unique_join) + { + /* + * OK, this join has the unique inner rel, so mark the paths added + * now that the inner is unique + */ + foreach(lc, joinrel->pathlist) + { + JoinPath *jp = (JoinPath *)lfirst(lc); + + /* + * This relies on that add_paths_to_joinrel won't be called + * with same outer/inner rels for different restrictlist. + */ + switch (jp->jointype) + { + case JOIN_INNER: + case JOIN_LEFT: + if (jp->outerjoinpath->parent == outerrel && + jp->innerjoinpath->parent == innerrel) + jp->inner_unique = true; + break; + default: + break; + } + } + } + } + /* * try_nestloop_path * Consider a nestloop join path; if it appears useful, push it into diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index fe9fd57..a79c194 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -624,6 +624,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) /* we don't bother trying to make the remaining fields valid */ sjinfo->lhs_strict = false; sjinfo->delay_upper_joins = false; + sjinfo->is_unique_join = false; sjinfo->semi_can_btree = false; sjinfo->semi_can_hash = false; sjinfo->semi_operators = NIL; diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 11d3933..5d0cd2e 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -33,12 +33,28 @@ /* local functions */ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); +static bool specialjoin_is_unique_join(PlannerInfo *root, + SpecialJoinInfo *sjinfo); static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); static Oid distinct_col_search(int colno, List *colnos, List *opids); +void +mark_unique_joins(PlannerInfo *root, List *joinlist) +{ + ListCell *lc; + + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + + if (specialjoin_is_unique_join(root, sjinfo)) + sjinfo->is_unique_join = true; + } +} + /* * remove_useless_joins * Check for relations that don't actually need to be joined at all, @@ -91,6 +107,12 @@ restart: root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo); /* + * We may now be able to mark some joins as unique which we could + * not do before + */ + mark_unique_joins(root, joinlist); + + /* * Restart the scan. This is necessary to ensure we find all * removable joins independently of ordering of the join_info_list * (note that removal of attr_needed bits may make a join appear @@ -151,17 +173,17 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) { int innerrelid; RelOptInfo *innerrel; - Query *subquery = NULL; Relids joinrelids; - List *clause_list = NIL; - ListCell *l; int attroff; + ListCell *l; /* - * Must be a non-delaying left join to a single baserel, else we aren't - * going to be able to do anything with it. + * Join must not duplicate its outer side and must be a non-delaying left + * join to a single baserel, else we aren't going to be able to do anything + * with it. */ - if (sjinfo->jointype != JOIN_LEFT || + if (!sjinfo->is_unique_join || + sjinfo->jointype != JOIN_LEFT || sjinfo->delay_upper_joins) return false; @@ -170,38 +192,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) innerrel = find_base_rel(root, innerrelid); - if (innerrel->reloptkind != RELOPT_BASEREL) - return false; - - /* - * Before we go to the effort of checking whether any innerrel variables - * are needed above the join, make a quick check to eliminate cases in - * which we will surely be unable to prove uniqueness of the innerrel. - */ - if (innerrel->rtekind == RTE_RELATION) - { - /* - * For a plain-relation innerrel, we only know how to prove uniqueness - * by reference to unique indexes. If there are no indexes then - * there's certainly no unique indexes so there's no point in going - * further. - */ - if (innerrel->indexlist == NIL) - return false; - } - else if (innerrel->rtekind == RTE_SUBQUERY) - { - subquery = root->simple_rte_array[innerrelid]->subquery; - - /* - * If the subquery has no qualities that support distinctness proofs - * then there's no point in going further. - */ - if (!query_supports_distinctness(subquery)) - return false; - } - else - return false; /* unsupported rtekind */ + Assert(innerrel->reloptkind == RELOPT_BASEREL); /* Compute the relid set for the join we are considering */ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); @@ -212,7 +203,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) * * Note that this test only detects use of inner-rel attributes in higher * join conditions and the target list. There might be such attributes in - * pushed-down conditions at this join, too. We check that case below. + * pushed-down conditions at this join, too, but in this case the join + * would not have been marked as unique. * * As a micro-optimization, it seems better to start with max_attr and * count down rather than starting with min_attr and counting up, on the @@ -253,6 +245,49 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) return false; /* it does reference innerrel */ } + return true; +} + +/* + * specialjoin_is_unique_join + * True if it can be proved that this special join will never produce + * more than 1 row per outer row, otherwise returns false if there is + * insufficient evidence to prove the join is unique. + */ +static bool +specialjoin_is_unique_join(PlannerInfo *root, SpecialJoinInfo *sjinfo) +{ + int innerrelid; + RelOptInfo *innerrel; + Query *subquery = NULL; + Relids joinrelids; + ListCell *l; + List *clause_list = NIL; + + /* check if we've already marked this join as unique on a previous call */ + if (sjinfo->is_unique_join) + return true; + + /* if there's more than 1 relation involved then punt */ + if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid)) + return false; + + innerrel = find_base_rel(root, innerrelid); + + if (innerrel->reloptkind != RELOPT_BASEREL) + return false; + + /* + * Before we go to the effort of pulling out the join condition's columns, + * make a quick check to eliminate cases in which we will surely be unable + * to prove uniqueness of the innerrel. + */ + if (!rel_supports_distinctness(root, innerrel)) + return false; + + /* Compute the relid set for the join we are considering */ + joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + /* * Search for mergejoinable clauses that constrain the inner rel against * either the outer rel or a pseudoconstant. If an operator is @@ -274,10 +309,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) !bms_equal(restrictinfo->required_relids, joinrelids)) { /* - * If such a clause actually references the inner rel then join - * removal has to be disallowed. We have to check this despite - * the previous attr_needed checks because of the possibility of - * pushed-down clauses referencing the rel. + * If such a clause actually references the inner rel then we can't + * mark the join as unique. */ if (bms_is_member(innerrelid, restrictinfo->clause_relids)) return false; @@ -300,71 +333,9 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) clause_list = lappend(clause_list, restrictinfo); } - /* - * relation_has_unique_index_for automatically adds any usable restriction - * clauses for the innerrel, so we needn't do that here. (XXX we are not - * considering restriction clauses for subqueries; is that worth doing?) - */ - - if (innerrel->rtekind == RTE_RELATION) - { - /* Now examine the indexes to see if we have a matching unique index */ - if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL)) - return true; - } - else /* innerrel->rtekind == RTE_SUBQUERY */ - { - List *colnos = NIL; - List *opids = NIL; - - /* - * Build the argument lists for query_is_distinct_for: a list of - * output column numbers that the query needs to be distinct over, and - * a list of equality operators that the output columns need to be - * distinct according to. - */ - foreach(l, clause_list) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Oid op; - Var *var; - - /* - * Get the equality operator we need uniqueness according to. - * (This might be a cross-type operator and thus not exactly the - * same operator the subquery would consider; that's all right - * since query_is_distinct_for can resolve such cases.) The - * mergejoinability test above should have selected only OpExprs. - */ - Assert(IsA(rinfo->clause, OpExpr)); - op = ((OpExpr *) rinfo->clause)->opno; - - /* clause_sides_match_join identified the inner side for us */ - if (rinfo->outer_is_left) - var = (Var *) get_rightop(rinfo->clause); - else - var = (Var *) get_leftop(rinfo->clause); - - /* - * If inner side isn't a Var referencing a subquery output column, - * this clause doesn't help us. - */ - if (!var || !IsA(var, Var) || - var->varno != innerrelid || var->varlevelsup != 0) - continue; - - colnos = lappend_int(colnos, var->varattno); - opids = lappend_oid(opids, op); - } - - if (query_is_distinct_for(subquery, colnos, opids)) - return true; - } + if (rel_is_distinct_for(root, innerrel, clause_list)) + return true; - /* - * Some day it would be nice to check for other methods of establishing - * distinctness. - */ return false; } @@ -564,6 +535,125 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved) return result; } +/* + * rel_is_distinct_for + * Returns True if rel can be proved to be distinct over clause_list + * + * Note: We expect clause_list to be already processed to check if the + * RestrictInfos are in the form "outerrel_expr op innerrel_expr" or + * "innerrel_expr op outerrel_expr". + * + * Note: this method may add items to clause_list, callers should either + * make a copy of the list or trim it back to it's original length after + * calling this function. + */ +bool +rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) +{ + int relid = rel->relid; + + /* + * relation_has_unique_index_for automatically adds any usable restriction + * clauses for the rel, so we needn't do that here. (XXX we are not + * considering restriction clauses for subqueries; is that worth doing?) + */ + if (rel->rtekind == RTE_RELATION) + { + /* Now examine the indexes to see if we have a matching unique index */ + if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL)) + return true; + } + else if (rel->rtekind == RTE_SUBQUERY) + { + List *colnos = NIL; + List *opids = NIL; + ListCell *l; + Query *subquery = root->simple_rte_array[relid]->subquery; + + /* + * Build the argument lists for query_is_distinct_for: a list of + * output column numbers that the query needs to be distinct over, and + * a list of equality operators that the output columns need to be + * distinct according to. + */ + foreach(l, clause_list) + { + RestrictInfo *rinfo = (RestrictInfo *)lfirst(l); + Oid op; + Var *var; + + if (!IsA(rinfo->clause, OpExpr)) + continue; + + /* + * Get the equality operator we need uniqueness according to. + * (This might be a cross-type operator and thus not exactly the + * same operator the subquery would consider; that's all right + * since query_is_distinct_for can resolve such cases.) The + * mergejoinability test above should have selected only OpExprs. + */ + op = ((OpExpr *)rinfo->clause)->opno; + + /* clause_sides_match_join identified the inner side for us */ + if (rinfo->outer_is_left) + var = (Var *)get_rightop(rinfo->clause); + else + var = (Var *)get_leftop(rinfo->clause); + + /* + * If inner side isn't a Var referencing a subquery output column, + * this clause doesn't help us. + */ + if (!var || !IsA(var, Var) || + var->varno != relid || var->varlevelsup != 0) + continue; + + colnos = lappend_int(colnos, var->varattno); + opids = lappend_oid(opids, op); + } + + if (query_is_distinct_for(subquery, colnos, opids)) + return true; + } + return false; /* can't prove rel to be distinct over clause_list */ +} +/* + * rel_supports_distinctness + * Returns true if rel has some properties which can prove the relation + * to be unique over some set of columns. + * + * This is effectively a pre-checking function for rel_is_distinct_for(). + * It must return TRUE if rel_is_distinct_for() could possibly return TRUE + */ +bool +rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) +{ + if (rel->rtekind == RTE_RELATION) + { + /* + * For a plain-relation, we only know how to prove uniqueness + * by reference to unique indexes. If there are no indexes then + * there's certainly no unique indexes so there's nothing to prove + * uniqueness on the relation. + */ + if (rel->indexlist != NIL) + return true; + } + else if (rel->rtekind == RTE_SUBQUERY) + { + Query *subquery = root->simple_rte_array[rel->relid]->subquery; + + /* Check if the subquery has any qualities that support distinctness */ + if (query_supports_distinctness(subquery)) + return true; + } + + /* + * Some day it would be nice to check for other methods of establishing + * distinctness. + */ + return false; +} /* * query_supports_distinctness - could the query possibly be proven distinct diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index cb69c03..ea08695 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -131,13 +131,12 @@ static BitmapAnd *make_bitmap_and(List *bitmapplans); static BitmapOr *make_bitmap_or(List *bitmapplans); static NestLoop *make_nestloop(List *tlist, List *joinclauses, List *otherclauses, List *nestParams, - Plan *lefttree, Plan *righttree, - JoinType jointype); + Plan *lefttree, Plan *righttree, JoinPath *jpath); static HashJoin *make_hashjoin(List *tlist, List *joinclauses, List *otherclauses, List *hashclauses, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinPath *jpath); static Hash *make_hash(Plan *lefttree, Oid skewTable, AttrNumber skewColumn, @@ -152,7 +151,7 @@ static MergeJoin *make_mergejoin(List *tlist, int *mergestrategies, bool *mergenullsfirst, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinPath *jpath); static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols, AttrNumber *sortColIdx, Oid *sortOperators, Oid *collations, bool *nullsFirst, @@ -2192,7 +2191,7 @@ create_nestloop_plan(PlannerInfo *root, nestParams, outer_plan, inner_plan, - best_path->jointype); + best_path); copy_path_costsize(&join_plan->join.plan, &best_path->path); @@ -2486,7 +2485,7 @@ create_mergejoin_plan(PlannerInfo *root, mergenullsfirst, outer_plan, inner_plan, - best_path->jpath.jointype); + &best_path->jpath); /* Costs of sort and material steps are included in path cost already */ copy_path_costsize(&join_plan->join.plan, &best_path->jpath.path); @@ -2612,7 +2611,7 @@ create_hashjoin_plan(PlannerInfo *root, hashclauses, outer_plan, (Plan *) hash_plan, - best_path->jpath.jointype); + &best_path->jpath); copy_path_costsize(&join_plan->join.plan, &best_path->jpath.path); @@ -3717,7 +3716,7 @@ make_nestloop(List *tlist, List *nestParams, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinPath *jpath) { NestLoop *node = makeNode(NestLoop); Plan *plan = &node->join.plan; @@ -3727,8 +3726,9 @@ make_nestloop(List *tlist, plan->qual = otherclauses; plan->lefttree = lefttree; plan->righttree = righttree; - node->join.jointype = jointype; + node->join.jointype = jpath->jointype; node->join.joinqual = joinclauses; + node->join.inner_unique = jpath->inner_unique; node->nestParams = nestParams; return node; @@ -3741,7 +3741,7 @@ make_hashjoin(List *tlist, List *hashclauses, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinPath *jpath) { HashJoin *node = makeNode(HashJoin); Plan *plan = &node->join.plan; @@ -3752,8 +3752,9 @@ make_hashjoin(List *tlist, plan->lefttree = lefttree; plan->righttree = righttree; node->hashclauses = hashclauses; - node->join.jointype = jointype; + node->join.jointype = jpath->jointype; node->join.joinqual = joinclauses; + node->join.inner_unique = jpath->inner_unique; return node; } @@ -3801,7 +3802,7 @@ make_mergejoin(List *tlist, bool *mergenullsfirst, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinPath *jpath) { MergeJoin *node = makeNode(MergeJoin); Plan *plan = &node->join.plan; @@ -3816,8 +3817,9 @@ make_mergejoin(List *tlist, node->mergeCollations = mergecollations; node->mergeStrategies = mergestrategies; node->mergeNullsFirst = mergenullsfirst; - node->join.jointype = jointype; + node->join.jointype = jpath->jointype; node->join.joinqual = joinclauses; + node->join.inner_unique = jpath->inner_unique; return node; } diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index a7655e4..8094880 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -1087,6 +1087,7 @@ make_outerjoininfo(PlannerInfo *root, sjinfo->jointype = jointype; /* this always starts out false */ sjinfo->delay_upper_joins = false; + sjinfo->is_unique_join = false; compute_semijoin_info(sjinfo, clause); diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 848df97..55310d8 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -174,6 +174,9 @@ query_planner(PlannerInfo *root, List *tlist, */ fix_placeholder_input_needed_levels(root); + /* Analyze joins to find out which ones have a unique inner side */ + mark_unique_joins(root, joinlist); + /* * Remove any useless outer joins. Ideally this would be done during * jointree preprocessing, but the necessary information isn't available diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 59b17f3..f86f806 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1562,6 +1562,7 @@ typedef struct JoinState PlanState ps; JoinType jointype; List *joinqual; /* JOIN quals (in addition to ps.qual) */ + bool inner_unique; } JoinState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 21cbfa8..122f2f4 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -543,6 +543,7 @@ typedef struct Join Plan plan; JoinType jointype; List *joinqual; /* JOIN quals (in addition to plan.qual) */ + bool inner_unique; } Join; /* ---------------- diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 334cf51..776a269 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1030,6 +1030,7 @@ typedef struct JoinPath Path *outerjoinpath; /* path for the outer side of the join */ Path *innerjoinpath; /* path for the inner side of the join */ + bool inner_unique; List *joinrestrictinfo; /* RestrictInfos to apply to join */ @@ -1406,6 +1407,7 @@ typedef struct SpecialJoinInfo JoinType jointype; /* always INNER, LEFT, FULL, SEMI, or ANTI */ bool lhs_strict; /* joinclause is strict for some LHS rel */ bool delay_upper_joins; /* can't commute with upper RHS */ + bool is_unique_join; /* matches a max of 1 row per outer join row */ /* Remaining fields are set only for JOIN_SEMI jointype: */ bool semi_can_btree; /* true if semi_operators are all btree */ bool semi_can_hash; /* true if semi_operators are all hash */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index fa72918..7a85227 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -122,7 +122,11 @@ extern RestrictInfo *build_implied_join_equality(Oid opno, /* * prototypes for plan/analyzejoins.c */ +extern void mark_unique_joins(PlannerInfo *root, List *joinlist); extern List *remove_useless_joins(PlannerInfo *root, List *joinlist); +extern bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, + List *clause_list); +extern bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); extern bool query_supports_distinctness(Query *query); extern bool query_is_distinct_for(Query *query, List *colnos, List *opids); diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out index dfae84e..ad1d673 100644 --- a/src/test/regress/expected/equivclass.out +++ b/src/test/regress/expected/equivclass.out @@ -186,7 +186,7 @@ explain (costs off) select * from ec1, ec2 where ff = x1 and x1 = '42'::int8alias2; QUERY PLAN ----------------------------------------- - Nested Loop + Nested Loop(inner unique) -> Seq Scan on ec2 Filter: (x1 = '42'::int8alias2) -> Index Scan using ec1_pkey on ec1 @@ -310,7 +310,7 @@ explain (costs off) -> Index Scan using ec1_expr3 on ec1 ec1_5 -> Index Scan using ec1_expr4 on ec1 ec1_6 -> Materialize - -> Merge Join + -> Merge Join(inner unique) Merge Cond: ((((ec1_1.ff + 2) + 1)) = ec1.f1) -> Merge Append Sort Key: (((ec1_1.ff + 2) + 1)) @@ -365,7 +365,7 @@ explain (costs off) where ss1.x = ec1.f1 and ec1.ff = 42::int8; QUERY PLAN ----------------------------------------------------- - Merge Join + Merge Join(inner unique) Merge Cond: ((((ec1_1.ff + 2) + 1)) = ec1.f1) -> Merge Append Sort Key: (((ec1_1.ff + 2) + 1)) diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out index 56e2c99..e3fe97f 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out @@ -34,119 +34,119 @@ INSERT INTO d(aa) VALUES('dddddddd'); SELECT relname, a.* FROM a, pg_class where a.tableoid = pg_class.oid; relname | aa ---------+---------- - a | aaa - a | aaaa - a | aaaaa - a | aaaaaa - a | aaaaaaa a | aaaaaaaa - b | bbb - b | bbbb - b | bbbbb - b | bbbbbb - b | bbbbbbb + a | aaaaaaa + a | aaaaaa + a | aaaaa + a | aaaa + a | aaa b | bbbbbbbb - c | ccc - c | cccc - c | ccccc - c | cccccc - c | ccccccc + b | bbbbbbb + b | bbbbbb + b | bbbbb + b | bbbb + b | bbb c | cccccccc - d | ddd - d | dddd - d | ddddd - d | dddddd - d | ddddddd + c | ccccccc + c | cccccc + c | ccccc + c | cccc + c | ccc d | dddddddd + d | ddddddd + d | dddddd + d | ddddd + d | dddd + d | ddd (24 rows) SELECT relname, b.* FROM b, pg_class where b.tableoid = pg_class.oid; relname | aa | bb ---------+----------+---- - b | bbb | - b | bbbb | - b | bbbbb | - b | bbbbbb | - b | bbbbbbb | b | bbbbbbbb | - d | ddd | - d | dddd | - d | ddddd | - d | dddddd | - d | ddddddd | + b | bbbbbbb | + b | bbbbbb | + b | bbbbb | + b | bbbb | + b | bbb | d | dddddddd | + d | ddddddd | + d | dddddd | + d | ddddd | + d | dddd | + d | ddd | (12 rows) SELECT relname, c.* FROM c, pg_class where c.tableoid = pg_class.oid; relname | aa | cc ---------+----------+---- - c | ccc | - c | cccc | - c | ccccc | - c | cccccc | - c | ccccccc | c | cccccccc | - d | ddd | - d | dddd | - d | ddddd | - d | dddddd | - d | ddddddd | + c | ccccccc | + c | cccccc | + c | ccccc | + c | cccc | + c | ccc | d | dddddddd | + d | ddddddd | + d | dddddd | + d | ddddd | + d | dddd | + d | ddd | (12 rows) SELECT relname, d.* FROM d, pg_class where d.tableoid = pg_class.oid; relname | aa | bb | cc | dd ---------+----------+----+----+---- - d | ddd | | | - d | dddd | | | - d | ddddd | | | - d | dddddd | | | - d | ddddddd | | | d | dddddddd | | | + d | ddddddd | | | + d | dddddd | | | + d | ddddd | | | + d | dddd | | | + d | ddd | | | (6 rows) SELECT relname, a.* FROM ONLY a, pg_class where a.tableoid = pg_class.oid; relname | aa ---------+---------- - a | aaa - a | aaaa - a | aaaaa - a | aaaaaa - a | aaaaaaa a | aaaaaaaa + a | aaaaaaa + a | aaaaaa + a | aaaaa + a | aaaa + a | aaa (6 rows) SELECT relname, b.* FROM ONLY b, pg_class where b.tableoid = pg_class.oid; relname | aa | bb ---------+----------+---- - b | bbb | - b | bbbb | - b | bbbbb | - b | bbbbbb | - b | bbbbbbb | b | bbbbbbbb | + b | bbbbbbb | + b | bbbbbb | + b | bbbbb | + b | bbbb | + b | bbb | (6 rows) SELECT relname, c.* FROM ONLY c, pg_class where c.tableoid = pg_class.oid; relname | aa | cc ---------+----------+---- - c | ccc | - c | cccc | - c | ccccc | - c | cccccc | - c | ccccccc | c | cccccccc | + c | ccccccc | + c | cccccc | + c | ccccc | + c | cccc | + c | ccc | (6 rows) SELECT relname, d.* FROM ONLY d, pg_class where d.tableoid = pg_class.oid; relname | aa | bb | cc | dd ---------+----------+----+----+---- - d | ddd | | | - d | dddd | | | - d | ddddd | | | - d | dddddd | | | - d | ddddddd | | | d | dddddddd | | | + d | ddddddd | | | + d | dddddd | | | + d | ddddd | | | + d | dddd | | | + d | ddd | | | (6 rows) UPDATE a SET aa='zzzz' WHERE aa='aaaa'; @@ -157,143 +157,143 @@ UPDATE a SET aa='zzzzzz' WHERE aa LIKE 'aaa%'; SELECT relname, a.* FROM a, pg_class where a.tableoid = pg_class.oid; relname | aa ---------+---------- - a | zzzz - a | zzzzz a | zzzzzz a | zzzzzz a | zzzzzz a | zzzzzz - b | bbb - b | bbbb - b | bbbbb - b | bbbbbb - b | bbbbbbb + a | zzzzz + a | zzzz b | bbbbbbbb - c | ccc - c | cccc - c | ccccc - c | cccccc - c | ccccccc + b | bbbbbbb + b | bbbbbb + b | bbbbb + b | bbbb + b | bbb c | cccccccc - d | ddd - d | dddd - d | ddddd - d | dddddd - d | ddddddd + c | ccccccc + c | cccccc + c | ccccc + c | cccc + c | ccc d | dddddddd + d | ddddddd + d | dddddd + d | ddddd + d | dddd + d | ddd (24 rows) SELECT relname, b.* FROM b, pg_class where b.tableoid = pg_class.oid; relname | aa | bb ---------+----------+---- - b | bbb | - b | bbbb | - b | bbbbb | - b | bbbbbb | - b | bbbbbbb | b | bbbbbbbb | - d | ddd | - d | dddd | - d | ddddd | - d | dddddd | - d | ddddddd | + b | bbbbbbb | + b | bbbbbb | + b | bbbbb | + b | bbbb | + b | bbb | d | dddddddd | + d | ddddddd | + d | dddddd | + d | ddddd | + d | dddd | + d | ddd | (12 rows) SELECT relname, c.* FROM c, pg_class where c.tableoid = pg_class.oid; relname | aa | cc ---------+----------+---- - c | ccc | - c | cccc | - c | ccccc | - c | cccccc | - c | ccccccc | c | cccccccc | - d | ddd | - d | dddd | - d | ddddd | - d | dddddd | - d | ddddddd | + c | ccccccc | + c | cccccc | + c | ccccc | + c | cccc | + c | ccc | d | dddddddd | + d | ddddddd | + d | dddddd | + d | ddddd | + d | dddd | + d | ddd | (12 rows) SELECT relname, d.* FROM d, pg_class where d.tableoid = pg_class.oid; relname | aa | bb | cc | dd ---------+----------+----+----+---- - d | ddd | | | - d | dddd | | | - d | ddddd | | | - d | dddddd | | | - d | ddddddd | | | d | dddddddd | | | + d | ddddddd | | | + d | dddddd | | | + d | ddddd | | | + d | dddd | | | + d | ddd | | | (6 rows) SELECT relname, a.* FROM ONLY a, pg_class where a.tableoid = pg_class.oid; relname | aa ---------+-------- - a | zzzz - a | zzzzz a | zzzzzz a | zzzzzz a | zzzzzz a | zzzzzz + a | zzzzz + a | zzzz (6 rows) SELECT relname, b.* FROM ONLY b, pg_class where b.tableoid = pg_class.oid; relname | aa | bb ---------+----------+---- - b | bbb | - b | bbbb | - b | bbbbb | - b | bbbbbb | - b | bbbbbbb | b | bbbbbbbb | + b | bbbbbbb | + b | bbbbbb | + b | bbbbb | + b | bbbb | + b | bbb | (6 rows) SELECT relname, c.* FROM ONLY c, pg_class where c.tableoid = pg_class.oid; relname | aa | cc ---------+----------+---- - c | ccc | - c | cccc | - c | ccccc | - c | cccccc | - c | ccccccc | c | cccccccc | + c | ccccccc | + c | cccccc | + c | ccccc | + c | cccc | + c | ccc | (6 rows) SELECT relname, d.* FROM ONLY d, pg_class where d.tableoid = pg_class.oid; relname | aa | bb | cc | dd ---------+----------+----+----+---- - d | ddd | | | - d | dddd | | | - d | ddddd | | | - d | dddddd | | | - d | ddddddd | | | d | dddddddd | | | + d | ddddddd | | | + d | dddddd | | | + d | ddddd | | | + d | dddd | | | + d | ddd | | | (6 rows) UPDATE b SET aa='new'; SELECT relname, a.* FROM a, pg_class where a.tableoid = pg_class.oid; relname | aa ---------+---------- - a | zzzz - a | zzzzz a | zzzzzz a | zzzzzz a | zzzzzz a | zzzzzz + a | zzzzz + a | zzzz b | new b | new b | new b | new b | new b | new - c | ccc - c | cccc - c | ccccc - c | cccccc - c | ccccccc c | cccccccc + c | ccccccc + c | cccccc + c | ccccc + c | cccc + c | ccc d | new d | new d | new @@ -322,12 +322,12 @@ SELECT relname, b.* FROM b, pg_class where b.tableoid = pg_class.oid; SELECT relname, c.* FROM c, pg_class where c.tableoid = pg_class.oid; relname | aa | cc ---------+----------+---- - c | ccc | - c | cccc | - c | ccccc | - c | cccccc | - c | ccccccc | c | cccccccc | + c | ccccccc | + c | cccccc | + c | ccccc | + c | cccc | + c | ccc | d | new | d | new | d | new | @@ -350,12 +350,12 @@ SELECT relname, d.* FROM d, pg_class where d.tableoid = pg_class.oid; SELECT relname, a.* FROM ONLY a, pg_class where a.tableoid = pg_class.oid; relname | aa ---------+-------- - a | zzzz - a | zzzzz a | zzzzzz a | zzzzzz a | zzzzzz a | zzzzzz + a | zzzzz + a | zzzz (6 rows) SELECT relname, b.* FROM ONLY b, pg_class where b.tableoid = pg_class.oid; @@ -372,12 +372,12 @@ SELECT relname, b.* FROM ONLY b, pg_class where b.tableoid = pg_class.oid; SELECT relname, c.* FROM ONLY c, pg_class where c.tableoid = pg_class.oid; relname | aa | cc ---------+----------+---- - c | ccc | - c | cccc | - c | ccccc | - c | cccccc | - c | ccccccc | c | cccccccc | + c | ccccccc | + c | cccccc | + c | ccccc | + c | cccc | + c | ccc | (6 rows) SELECT relname, d.* FROM ONLY d, pg_class where d.tableoid = pg_class.oid; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 57fc910..1bfb9a3 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2614,8 +2614,8 @@ from nt3 as nt3 where nt3.id = 1 and ss2.b3; QUERY PLAN ----------------------------------------------- - Nested Loop - -> Nested Loop + Nested Loop(inner unique) + -> Nested Loop(inner unique) -> Index Scan using nt3_pkey on nt3 Index Cond: (id = 1) -> Index Scan using nt2_pkey on nt2 @@ -3338,7 +3338,7 @@ explain (costs off) on (p.k = ss.k); QUERY PLAN --------------------------------- - Hash Left Join + Hash Left Join(inner unique) Hash Cond: (p.k = c.k) -> Seq Scan on parent p -> Hash @@ -4416,3 +4416,239 @@ ERROR: invalid reference to FROM-clause entry for table "xx1" LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss; ^ HINT: There is an entry for table "xx1", but it cannot be referenced from this part of the query. +-- +-- test planner's ability to mark joins as unique. +-- +create table j1 (id int primary key); +create table j2 (id int primary key); +create table j3 (id int); +insert into j1 values(1),(2),(3); +insert into j2 values(1),(2),(3); +insert into j3 values(1),(1); +analyze j1; +analyze j2; +analyze j3; +-- Ensure join is marked as unique +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Join(inner unique) + Output: j1.id, j2.id + Hash Cond: (j2.id = j1.id) + -> Seq Scan on public.j2 + Output: j2.id + -> Hash + Output: j1.id + -> Seq Scan on public.j1 + Output: j1.id +(9 rows) + +-- Ensure join not marked as unique when not using = +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id > j2.id; + QUERY PLAN +----------------------------------- + Nested Loop + Output: j1.id, j2.id + Join Filter: (j1.id > j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +-- j3 has no unique index or pk on id +explain (verbose, costs off) +select * from j1 inner join j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------- + Hash Join + Output: j1.id, j3.id + Hash Cond: (j1.id = j3.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j3.id + -> Seq Scan on public.j3 + Output: j3.id +(9 rows) + +-- ensure left join is marked as unique +explain (verbose, costs off) +select * from j1 left join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Left Join(inner unique) + Output: j1.id, j2.id + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +-- ensure right join is marked as unique +explain (verbose, costs off) +select * from j1 right join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Left Join(inner unique) + Output: j1.id, j2.id + Hash Cond: (j2.id = j1.id) + -> Seq Scan on public.j2 + Output: j2.id + -> Hash + Output: j1.id + -> Seq Scan on public.j1 + Output: j1.id +(9 rows) + +-- cross joins can't be proved unique +explain (verbose, costs off) +select * from j1 cross join j2; + QUERY PLAN +----------------------------------- + Nested Loop + Output: j1.id, j2.id + -> Seq Scan on public.j1 + Output: j1.id + -> Materialize + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(8 rows) + +-- ensure natural join is marked as unique +explain (verbose, costs off) +select * from j1 natural join j2; + QUERY PLAN +----------------------------------- + Hash Join(inner unique) + Output: j1.id + Hash Cond: (j2.id = j1.id) + -> Seq Scan on public.j2 + Output: j2.id + -> Hash + Output: j1.id + -> Seq Scan on public.j1 + Output: j1.id +(9 rows) + +-- ensure distinct clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select distinct id from j3) j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------- + Nested Loop(inner unique) + Output: j1.id, j3.id + Join Filter: (j1.id = j3.id) + -> HashAggregate + Output: j3.id + Group Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id + -> Seq Scan on public.j1 + Output: j1.id +(10 rows) + +-- ensure group by clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select id from j3 group by id) j3 on j1.id = j3.id; + QUERY PLAN +----------------------------------- + Nested Loop(inner unique) + Output: j1.id, j3.id + Join Filter: (j1.id = j3.id) + -> HashAggregate + Output: j3.id + Group Key: j3.id + -> Seq Scan on public.j3 + Output: j3.id + -> Seq Scan on public.j1 + Output: j1.id +(10 rows) + +-- a subquery with an empty FROM clause should be marked as unique. +explain (verbose, costs off) +select * from j1 +inner join (select 1 id offset 0) j3 on j1.id = j3.id; + QUERY PLAN +------------------------------ + Nested Loop(inner unique) + Output: j1.id, (1) + Join Filter: (j1.id = (1)) + -> Result + Output: 1 + -> Seq Scan on public.j1 + Output: j1.id +(7 rows) + +explain (verbose, costs off) +select * from j1 full join j2 on j1.id = j2.id; + QUERY PLAN +----------------------------------- + Hash Full Join + Output: j1.id, j2.id + Hash Cond: (j1.id = j2.id) + -> Seq Scan on public.j1 + Output: j1.id + -> Hash + Output: j2.id + -> Seq Scan on public.j2 + Output: j2.id +(9 rows) + +drop table j1; +drop table j2; +drop table j3; +-- test a more complex permutations of unique joins +create table j1 (id1 int, id2 int, primary key(id1,id2)); +create table j2 (id1 int, id2 int, primary key(id1,id2)); +create table j3 (id1 int, id2 int, primary key(id1,id2)); +insert into j1 values(1,1),(2,2); +insert into j2 values(1,1); +insert into j3 values(1,1); +analyze j1; +analyze j2; +analyze j3; +-- ensure no unique joins when not all columns which are part of +-- the unique index are part of the join clause. +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1; + QUERY PLAN +------------------------------------------ + Nested Loop + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Join Filter: (j1.id1 = j2.id1) + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 + -> Seq Scan on public.j1 + Output: j1.id1, j1.id2 +(7 rows) + +-- ensure unique joins work with multiple columns +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2; + QUERY PLAN +---------------------------------------------------------- + Nested Loop(inner unique) + Output: j1.id1, j1.id2, j2.id1, j2.id2 + Join Filter: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2)) + -> Seq Scan on public.j2 + Output: j2.id1, j2.id2 + -> Seq Scan on public.j1 + Output: j1.id1, j1.id2 +(7 rows) + +drop table j1; +drop table j2; +drop table j3; diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out index f41bef1..aaf585d 100644 --- a/src/test/regress/expected/rowsecurity.out +++ b/src/test/regress/expected/rowsecurity.out @@ -248,7 +248,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle); EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle); QUERY PLAN ---------------------------------------------------- - Nested Loop + Nested Loop(inner unique) -> Subquery Scan on document Filter: f_leak(document.dtitle) -> Seq Scan on document document_1 diff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out index 82d510d..01b8b45 100644 --- a/src/test/regress/expected/select_views.out +++ b/src/test/regress/expected/select_views.out @@ -1365,7 +1365,7 @@ NOTICE: f_leak => 9801-2345-6789-0123 EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_normal WHERE f_leak(cnum); QUERY PLAN --------------------------------------------------------- - Hash Join + Hash Join(inner unique) Hash Cond: (r.cid = l.cid) -> Seq Scan on credit_card r Filter: f_leak(cnum) @@ -1386,7 +1386,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_secure WHERE f_leak(cnum); --------------------------------------------------------------- Subquery Scan on my_credit_card_secure Filter: f_leak(my_credit_card_secure.cnum) - -> Hash Join + -> Hash Join(inner unique) Hash Cond: (r.cid = l.cid) -> Seq Scan on credit_card r -> Hash @@ -1420,7 +1420,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_normal -> Materialize -> Subquery Scan on l Filter: f_leak(l.cnum) - -> Hash Join + -> Hash Join(inner unique) Hash Cond: (r_1.cid = l_1.cid) -> Seq Scan on credit_card r_1 -> Hash @@ -1451,7 +1451,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_secure -> Seq Scan on credit_usage r Filter: ((ymd >= '10-01-2011'::date) AND (ymd < '11-01-2011'::date)) -> Materialize - -> Hash Join + -> Hash Join(inner unique) Hash Cond: (r_1.cid = l.cid) -> Seq Scan on credit_card r_1 -> Hash diff --git a/src/test/regress/expected/select_views_1.out b/src/test/regress/expected/select_views_1.out index ce22bfa..a37bde4 100644 --- a/src/test/regress/expected/select_views_1.out +++ b/src/test/regress/expected/select_views_1.out @@ -1365,7 +1365,7 @@ NOTICE: f_leak => 9801-2345-6789-0123 EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_normal WHERE f_leak(cnum); QUERY PLAN --------------------------------------------------------- - Hash Join + Hash Join(inner unique) Hash Cond: (r.cid = l.cid) -> Seq Scan on credit_card r Filter: f_leak(cnum) @@ -1386,7 +1386,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_secure WHERE f_leak(cnum); --------------------------------------------------------------- Subquery Scan on my_credit_card_secure Filter: f_leak(my_credit_card_secure.cnum) - -> Hash Join + -> Hash Join(inner unique) Hash Cond: (r.cid = l.cid) -> Seq Scan on credit_card r -> Hash @@ -1420,7 +1420,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_normal -> Materialize -> Subquery Scan on l Filter: f_leak(l.cnum) - -> Hash Join + -> Hash Join(inner unique) Hash Cond: (r_1.cid = l_1.cid) -> Seq Scan on credit_card r_1 -> Hash @@ -1451,7 +1451,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_secure -> Seq Scan on credit_usage r Filter: ((ymd >= '10-01-2011'::date) AND (ymd < '11-01-2011'::date)) -> Materialize - -> Hash Join + -> Hash Join(inner unique) Hash Cond: (r_1.cid = l.cid) -> Seq Scan on credit_card r_1 -> Hash diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 06a27ea..9d27d1e 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1307,3 +1307,98 @@ update xx1 set x2 = f1 from xx1, lateral (select * from int4_tbl where f1 = x1) delete from xx1 using (select * from int4_tbl where f1 = x1) ss; delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss; delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss; + +-- +-- test planner's ability to mark joins as unique. +-- + +create table j1 (id int primary key); +create table j2 (id int primary key); +create table j3 (id int); + +insert into j1 values(1),(2),(3); +insert into j2 values(1),(2),(3); +insert into j3 values(1),(1); + +analyze j1; +analyze j2; +analyze j3; + +-- Ensure join is marked as unique +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id = j2.id; + +-- Ensure join not marked as unique when not using = +explain (verbose, costs off) +select * from j1 inner join j2 on j1.id > j2.id; + +-- j3 has no unique index or pk on id +explain (verbose, costs off) +select * from j1 inner join j3 on j1.id = j3.id; + +-- ensure left join is marked as unique +explain (verbose, costs off) +select * from j1 left join j2 on j1.id = j2.id; + +-- ensure right join is marked as unique +explain (verbose, costs off) +select * from j1 right join j2 on j1.id = j2.id; + +-- cross joins can't be proved unique +explain (verbose, costs off) +select * from j1 cross join j2; + +-- ensure natural join is marked as unique +explain (verbose, costs off) +select * from j1 natural join j2; + +-- ensure distinct clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select distinct id from j3) j3 on j1.id = j3.id; + +-- ensure group by clause uniquifies the join +explain (verbose, costs off) +select * from j1 +inner join (select id from j3 group by id) j3 on j1.id = j3.id; + +-- a subquery with an empty FROM clause should be marked as unique. +explain (verbose, costs off) +select * from j1 +inner join (select 1 id offset 0) j3 on j1.id = j3.id; + +explain (verbose, costs off) +select * from j1 full join j2 on j1.id = j2.id; + +drop table j1; +drop table j2; +drop table j3; + +-- test a more complex permutations of unique joins + +create table j1 (id1 int, id2 int, primary key(id1,id2)); +create table j2 (id1 int, id2 int, primary key(id1,id2)); +create table j3 (id1 int, id2 int, primary key(id1,id2)); + +insert into j1 values(1,1),(2,2); +insert into j2 values(1,1); +insert into j3 values(1,1); + +analyze j1; +analyze j2; +analyze j3; + +-- ensure no unique joins when not all columns which are part of +-- the unique index are part of the join clause. +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1; + +-- ensure unique joins work with multiple columns +explain (verbose, costs off) +select * from j1 +inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2; + +drop table j1; +drop table j2; +drop table j3;