From 42e4031c9255e89428cc095782d08ad95b15a07f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= Date: Sat, 28 Mar 2020 13:44:54 +0800 Subject: [PATCH v3] Maintain UniqueKey at each RelOptInfo, this information can be used to erasing distinct/group path if we are sure the result is unique already. And rel_is_unique_for is modified to use UniqueKey to detect more cases. --- .../postgres_fdw/expected/postgres_fdw.out | 34 +- src/backend/nodes/equalfuncs.c | 3 +- src/backend/nodes/list.c | 27 + src/backend/nodes/makefuncs.c | 15 + src/backend/optimizer/path/Makefile | 3 +- src/backend/optimizer/path/allpaths.c | 25 +- src/backend/optimizer/path/joinrels.c | 2 + src/backend/optimizer/path/uniquekeys.c | 1033 +++++++++++++++++ src/backend/optimizer/plan/analyzejoins.c | 171 +-- src/backend/optimizer/plan/initsplan.c | 9 + src/backend/optimizer/plan/planmain.c | 13 - src/backend/optimizer/plan/planner.c | 33 +- src/backend/optimizer/prep/prepunion.c | 2 + src/backend/optimizer/util/plancat.c | 8 + src/include/nodes/makefuncs.h | 2 + src/include/nodes/nodes.h | 1 + src/include/nodes/pathnodes.h | 23 + src/include/nodes/pg_list.h | 2 + src/include/optimizer/paths.h | 37 + src/test/regress/expected/aggregates.out | 83 +- src/test/regress/expected/join.out | 34 +- src/test/regress/expected/select_distinct.out | 395 +++++++ src/test/regress/sql/join.sql | 3 + src/test/regress/sql/select_distinct.sql | 125 ++ 24 files changed, 1825 insertions(+), 258 deletions(-) create mode 100644 src/backend/optimizer/path/uniquekeys.c diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 62c2697920..ba3947c9f9 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -2902,22 +2902,20 @@ select sum(c1%3), sum(distinct c1%3 order by c1%3) filter (where c1%3 < 2), c2 f -- Outer query is aggregation query explain (verbose, costs off) select distinct (select count(*) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------- - Unique + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Sort Output: ((SubPlan 1)) - -> Sort - Output: ((SubPlan 1)) - Sort Key: ((SubPlan 1)) - -> Foreign Scan - Output: (SubPlan 1) - Relations: Aggregate on (public.ft2 t2) - Remote SQL: SELECT count(*) FILTER (WHERE ((c2 = 6) AND ("C 1" < 10))) FROM "S 1"."T 1" WHERE (((c2 % 6) = 0)) - SubPlan 1 - -> Foreign Scan on public.ft1 t1 - Output: (count(*) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10)))) - Remote SQL: SELECT NULL FROM "S 1"."T 1" WHERE (("C 1" = 6)) -(13 rows) + Sort Key: ((SubPlan 1)) + -> Foreign Scan + Output: (SubPlan 1) + Relations: Aggregate on (public.ft2 t2) + Remote SQL: SELECT count(*) FILTER (WHERE ((c2 = 6) AND ("C 1" < 10))) FROM "S 1"."T 1" WHERE (((c2 % 6) = 0)) + SubPlan 1 + -> Foreign Scan on public.ft1 t1 + Output: (count(*) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10)))) + Remote SQL: SELECT NULL FROM "S 1"."T 1" WHERE (("C 1" = 6)) +(11 rows) select distinct (select count(*) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1; count @@ -3416,7 +3414,6 @@ select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2. Output: sum(q.a), count(q.b) -> Nested Loop Left Join Output: q.a, q.b - Inner Unique: true Join Filter: ((ft4.c1)::numeric <= q.b) -> Foreign Scan on public.ft4 Output: ft4.c1, ft4.c2, ft4.c3 @@ -3429,7 +3426,7 @@ select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2. Output: 13, (avg(ft1.c1)), NULL::bigint Relations: Aggregate on ((public.ft2) LEFT JOIN (public.ft1)) Remote SQL: SELECT 13, avg(r1."C 1"), NULL::bigint FROM ("S 1"."T 1" r2 LEFT JOIN "S 1"."T 1" r1 ON (((r1."C 1" = r2."C 1")))) -(17 rows) +(16 rows) select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2.c1) from ft1 right join ft2 on (ft1.c1 = ft2.c1)) q(a, b, c) on (ft4.c1 <= q.b); sum | count @@ -4198,7 +4195,6 @@ explain (verbose, costs off) select * from ft3 f, loct3 l ------------------------------------------------------------- Hash Join Output: f.f1, f.f2, f.f3, l.f1, l.f2, l.f3 - Inner Unique: true Hash Cond: ((f.f3)::text = (l.f3)::text) -> Foreign Scan on public.ft3 f Output: f.f1, f.f2, f.f3 @@ -4208,7 +4204,7 @@ explain (verbose, costs off) select * from ft3 f, loct3 l -> Index Scan using loct3_f1_key on public.loct3 l Output: l.f1, l.f2, l.f3 Index Cond: (l.f1 = 'foo'::text) -(12 rows) +(11 rows) -- =================================================================== -- test writable foreign table stuff diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 88b912977e..63e92d94ef 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -162,8 +162,9 @@ _equalIntoClause(const IntoClause *a, const IntoClause *b) static bool _equalVar(const Var *a, const Var *b) { - COMPARE_SCALAR_FIELD(varno); + /* Compare varattno first since it has higher selectivity than varno */ COMPARE_SCALAR_FIELD(varattno); + COMPARE_SCALAR_FIELD(varno); COMPARE_SCALAR_FIELD(vartype); COMPARE_SCALAR_FIELD(vartypmod); COMPARE_SCALAR_FIELD(varcollid); diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c index bd0c58cd81..c7336b10f2 100644 --- a/src/backend/nodes/list.c +++ b/src/backend/nodes/list.c @@ -688,6 +688,33 @@ list_member_oid(const List *list, Oid datum) return false; } +/* + * Return true iff there is an equal member in target for every + * member in members + */ +bool +list_all_members_in(const List *members, const List *target) +{ + const ListCell *lc1, *lc2; + if (target == NIL && members != NIL) + return false; + foreach(lc1, members) + { + bool found = false; + foreach(lc2, target) + { + if (equal(lfirst(lc1), lfirst(lc2))) + { + found = true; + break; + } + } + if (!found) + return false; + } + return true; +} + /* * Delete the n'th cell (counting from 0) in list. * diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c index e8cdc90c31..7082c8b1b0 100644 --- a/src/backend/nodes/makefuncs.c +++ b/src/backend/nodes/makefuncs.c @@ -809,3 +809,18 @@ makeVacuumRelation(RangeVar *relation, Oid oid, List *va_cols) v->va_cols = va_cols; return v; } + + +/* + * makeUnqiueKey + */ +UniqueKey* +makeUniqueKey(List *exprs, List* positions, bool guarantee) +{ + UniqueKey * ukey = makeNode(UniqueKey); + Assert(list_length(exprs) == list_length(positions)); + ukey->exprs = exprs; + ukey->positions = positions; + ukey->guarantee = guarantee; + return ukey; +} diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile index 1e199ff66f..7b9820c25f 100644 --- a/src/backend/optimizer/path/Makefile +++ b/src/backend/optimizer/path/Makefile @@ -21,6 +21,7 @@ OBJS = \ joinpath.o \ joinrels.o \ pathkeys.o \ - tidpath.o + tidpath.o \ + uniquekeys.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 905bbe77d8..e7383979aa 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -39,6 +39,7 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/plancat.h" +#include "optimizer/planmain.h" #include "optimizer/planner.h" #include "optimizer/restrictinfo.h" #include "optimizer/tlist.h" @@ -222,14 +223,23 @@ make_one_rel(PlannerInfo *root, List *joinlist) set_base_rel_pathlists(root); /* - * Generate access paths for the entire join tree. + * Remove any useless outer joins. Ideally this would be done during + * jointree preprocessing, but the necessary information isn't available + * until we've built baserel data structures, classified qual clauses + * and uniquekeys */ - rel = make_rel_from_joinlist(root, joinlist); + joinlist = remove_useless_joins(root, joinlist); /* - * The result should join all and only the query's base rels. + * Also, reduce any semijoins with unique inner rels to plain inner joins. + * Likewise, this can't be done until now for lack of needed info. */ - Assert(bms_equal(rel->relids, root->all_baserels)); + reduce_unique_semijoins(root); + + /* + * Generate access paths for the entire join tree. + */ + rel = make_rel_from_joinlist(root, joinlist); return rel; } @@ -786,6 +796,9 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) /* Consider TID scans */ create_tidscan_paths(root, rel); + + /* Set UniqueKeys for this relation */ + populate_baserel_uniquekeys(root, rel, rel->indexlist); } /* @@ -1276,6 +1289,8 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, /* Add paths to the append relation. */ add_paths_to_append_rel(root, rel, live_childrels); + if (IS_PARTITIONED_REL(rel)) + populate_partitionedrel_uniquekeys(root, rel, live_childrels); } @@ -2349,6 +2364,8 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, pathkeys, required_outer)); } + convert_subquery_uniquekeys(root, rel, sub_final_rel); + /* If outer rel allows parallelism, do same for partial paths. */ if (rel->consider_parallel && bms_is_empty(required_outer)) { diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index a21c295b99..c6799aa48c 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -920,6 +920,8 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, /* Apply partitionwise join technique, if possible. */ try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist); + + populate_joinrel_uniquekeys(root, joinrel, rel1, rel2, restrictlist, sjinfo->jointype); } diff --git a/src/backend/optimizer/path/uniquekeys.c b/src/backend/optimizer/path/uniquekeys.c new file mode 100644 index 0000000000..58fe16be55 --- /dev/null +++ b/src/backend/optimizer/path/uniquekeys.c @@ -0,0 +1,1033 @@ +/*------------------------------------------------------------------------- + * + * uniquekeys.c + * Utilities for matching and building unique keys + * + * Portions Copyright (c) 2020, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/optimizer/path/uniquekeys.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "rewrite/rewriteManip.h" + + +/* + * This struct is used to help populate_joinrel_uniquekeys, + * Set added_to_joinrel to true if a uniquekey has been added to joinrel. + * For a joinrel, if both sides have UniqueKey, then the combine of them + * must be unique for the joinrel as well, But we don't need to add it if + * either of them has been added to joinrel already. We use this struct to + * maintain such info. + */ +typedef struct UniqueKeyContextData +{ + UniqueKey *uniquekey; + /* Set to true if the unique key has been added to joinrel->uniquekeys */ + bool added_to_joinrel; + /* If this uniquekey is still useful after join */ + bool useful; +} *UniqueKeyContext; + + +static List *gather_mergeable_baserestrictlist(RelOptInfo *rel); +static List *gather_mergeable_joinclauses(RelOptInfo *joinrel, + RelOptInfo *rel1, + RelOptInfo *rel2, + List *restirctlist, + JoinType jointype); +static bool match_index_to_baserestrictinfo(IndexOptInfo *unique_ind, + List *restrictlist); +static List *initililze_unqiuecontext_for_joinrel(RelOptInfo *joinrel, + RelOptInfo *inputrel); + +static bool innerrel_keeps_unique(PlannerInfo *root, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + bool reverse); +static bool clause_sides_match_join(RestrictInfo *rinfo, + Relids outerrelids, + Relids innerrelids); +static void add_uniquekey_from_index(RelOptInfo *rel, + IndexOptInfo *unique_index); +static void add_uniquekey_for_onerow(RelOptInfo *rel); + +/* Used for unique indexes checking for partitioned table */ +static bool index_constains_partkey(RelOptInfo *partrel, IndexOptInfo *ind); +static IndexOptInfo *simple_copy_indexinfo_to_parent(RelOptInfo *parentrel, + IndexOptInfo *from); +static bool simple_indexinfo_equal(IndexOptInfo *ind1, IndexOptInfo *ind2); +static void adjust_partition_unique_indexlist(RelOptInfo *parentrel, + RelOptInfo *childrel, + List **global_unique_index); +/* Helper function for groupres/distinctrel */ +static void add_uniquekey_from_sortgroups(PlannerInfo *root, + RelOptInfo *rel, + List *sortgroups); + +/* + * populate_baserel_uniquekeys + * Populate 'baserel' uniquekeys list by looking at the rel's unique index + * add baserestrictinfo + */ +void +populate_baserel_uniquekeys(PlannerInfo *root, + RelOptInfo *baserel, + List *indexlist) +{ + ListCell *lc; + List *restrictlist = gather_mergeable_baserestrictlist(baserel); + bool return_one_row = false; + List *matched_uk_indexes = NIL; + + Assert(baserel->rtekind == RTE_RELATION); + + if (root->parse->hasTargetSRFs) + return; + + foreach(lc, indexlist) + { + IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc); + if (!ind->unique || !ind->immediate || + (ind->indpred != NIL && !ind->predOK)) + continue; + + if (match_index_to_baserestrictinfo(ind, restrictlist)) + { + return_one_row = true; + break; + } + /* We can't guarantee if an expression returns a NULL value, so ignore it */ + if (ind->indexprs != NIL) + continue; + matched_uk_indexes = lappend(matched_uk_indexes, ind); + } + + if (return_one_row) + { + /* + * Since only 1 row returned, any column is unique + */ + add_uniquekey_for_onerow(baserel); + } + else + { + foreach(lc, matched_uk_indexes) + add_uniquekey_from_index(baserel, lfirst_node(IndexOptInfo, lc)); + } +} + + +/* + * populate_partitioned_rel_uniquekeys + * The unique index can be used for UniqueKey based on: + * 1). It must include partition keys + * 2). All the childrels must has the same indexes. + */ +void +populate_partitionedrel_uniquekeys(PlannerInfo *root, + RelOptInfo *rel, + List *childrels) +{ + ListCell *lc; + List *global_unique_indexlist = NIL; + RelOptInfo *childrel; + bool is_first = true; + + Assert(IS_PARTITIONED_REL(rel)); + + if (root->parse->hasTargetSRFs) + return; + + if (childrels == NIL) + return; + + childrel = linitial_node(RelOptInfo, childrels); + foreach(lc, childrel->indexlist) + { + IndexOptInfo *ind = lfirst(lc); + IndexOptInfo *global_ind; + if (!ind->unique || !ind->immediate || + (ind->indpred != NIL && !ind->predOK)) + continue; + + global_ind = simple_copy_indexinfo_to_parent(rel, ind); + /* + * If the unique index doesn't contain partkey, then it is unique + * on this partition only, so it is useless for us. + */ + if (!index_constains_partkey(rel, global_ind)) + continue; + global_unique_indexlist = lappend(global_unique_indexlist, global_ind); + } + + /* Fast path */ + if (global_unique_indexlist == NIL) + return; + + foreach(lc, childrels) + { + RelOptInfo *child = lfirst(lc); + if (is_first) + { + is_first = false; + continue; + } + adjust_partition_unique_indexlist(rel, child, &global_unique_indexlist); + } + + /* Now we have the unique index list which as exactly same on all childrels, + * Set the UniqueIndex just like it is non-partition table + */ + populate_baserel_uniquekeys(root, rel, global_unique_indexlist); +} + + +/* + * populate_distinctrel_uniquekeys + */ +void +populate_distinctrel_uniquekeys(PlannerInfo *root, + RelOptInfo *inputrel, + RelOptInfo *distinctrel) +{ + /* The unique key before the distinct is still valid*/ + distinctrel->uniquekeys = list_copy(inputrel->uniquekeys); + add_uniquekey_from_sortgroups(root, distinctrel, root->parse->distinctClause); +} + +/* + * populate_grouprel_uniquekeys + */ +void +populate_grouprel_uniquekeys(PlannerInfo *root, + RelOptInfo *grouprel) +{ + Query *parse = root->parse; + if (parse->hasTargetSRFs) + return; + if (parse->groupingSets != NIL) + return; + if (parse->groupClause != NIL) + add_uniquekey_from_sortgroups(root, + grouprel, + root->parse->groupClause); + else + /* it has aggregation but without a group by, so must be one line return */ + add_uniquekey_for_onerow(grouprel); +} + +/* + * simple_copy_uniquekeys + * Using a function for the one-line code makes us easy to check where we simply + * copied the uniquiekeys. + */ +void +simple_copy_uniquekeys(RelOptInfo *oldrel, + RelOptInfo *newrel) +{ + newrel->uniquekeys = oldrel->uniquekeys; +} + +/* + * populate_unionrel_uniquiekeys + */ +void +populate_unionrel_uniquiekeys(PlannerInfo *root, + RelOptInfo *unionrel) +{ + ListCell *lc; + List *exprs = NIL; + List *colnos = NIL; + int i = 1; + + Assert(unionrel->uniquekeys == NIL); + + if (root->parse->hasTargetSRFs) + return; + + foreach(lc, unionrel->reltarget->exprs) + { + exprs = lappend(exprs, lfirst(lc)); + colnos = lappend_int(colnos, i); + i++; + } + unionrel->uniquekeys = lappend(unionrel->uniquekeys, + makeUniqueKey(exprs, colnos, true)); +} + +/* + * populate_joinrel_uniquekeys + * + * populate uniquekeys for joinrel. We will check each relation to see if it's + * UniqueKey is still valid via innerrel_keeps_unique, if so, we add it to + * joinrel. The guarantee field will be changed on some outer join case. + + * For the uniquekey in either baserel which can't be unique after join, we still + * check if combination of unqiuekeys from both side is still useful for us, + * if yes, we add it to joinrel as well. We only set the guarantee field to true iff both + * uniquekeys have 'guarantee' equals true. + * + */ +void +populate_joinrel_uniquekeys(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, JoinType jointype) +{ + ListCell *lc, *lc2; + List *clause_list = NIL; + List *outerrel_uniquekey_ctx; + List *innerrel_uniquekey_ctx; + + if (root->parse->hasTargetSRFs) + return; + + /* Care about the outerrel relation only for SEMI/ANTI join */ + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + { + foreach(lc, outerrel->uniquekeys) + { + UniqueKey *uniquekey = lfirst_node(UniqueKey, lc); + if (list_all_members_in(uniquekey->exprs, joinrel->reltarget->exprs)) + joinrel->uniquekeys = lappend(joinrel->uniquekeys, uniquekey); + } + return; + } + + /* Fast path */ + if (innerrel->uniquekeys == NIL || outerrel->uniquekeys == NIL) + return; + + outerrel_uniquekey_ctx = initililze_unqiuecontext_for_joinrel(joinrel, outerrel); + innerrel_uniquekey_ctx = initililze_unqiuecontext_for_joinrel(joinrel, innerrel); + + clause_list = gather_mergeable_joinclauses(joinrel, outerrel, innerrel, + restrictlist, jointype); + + if (innerrel_keeps_unique(root, outerrel, innerrel, clause_list, false)) + { + foreach(lc, innerrel_uniquekey_ctx) + { + UniqueKeyContext ctx = (UniqueKeyContext)lfirst(lc); + if (!list_all_members_in(ctx->uniquekey->exprs, joinrel->reltarget->exprs)) + { + /* The UniqueKey on baserel is not useful on the joinrel */ + ctx->useful = false; + continue; + } + + if ((jointype == JOIN_LEFT || jointype == JOIN_FULL) && ctx->uniquekey->guarantee) + { + /* We can't guarantee the uniqueness anymore due to the outer join can + * duplicate the null values on these columns, so we set guarantee to false. + * AND we don't set the ctx->added_to_joinrel on purpose since we still have + * chances have an guarantee uniquekey after we combine with the UniqueKey + * from another relation. + */ + joinrel->uniquekeys = lappend(joinrel->uniquekeys, + makeUniqueKey(ctx->uniquekey->exprs, + ctx->uniquekey->positions, + false)); + + } + else + { + joinrel->uniquekeys = lappend(joinrel->uniquekeys, ctx->uniquekey); + ctx->added_to_joinrel = true; + } + } + } + + if (innerrel_keeps_unique(root, innerrel, outerrel, clause_list, true)) + { + foreach(lc, outerrel_uniquekey_ctx) + { + UniqueKeyContext ctx = (UniqueKeyContext)lfirst(lc); + if (!list_all_members_in(ctx->uniquekey->exprs, joinrel->reltarget->exprs)) + { + ctx->useful = false; + continue; + } + /* NULL values in outer rel can be duplicated under JOIN_FULL only */ + if (jointype == JOIN_FULL && ctx->uniquekey->guarantee) + { + joinrel->uniquekeys = lappend(joinrel->uniquekeys, + makeUniqueKey(ctx->uniquekey->exprs, + ctx->uniquekey->positions, + false)); + + } + else + { + joinrel->uniquekeys = lappend(joinrel->uniquekeys, ctx->uniquekey); + ctx->added_to_joinrel = true; + } + } + } + + /* The combination of the UniqueKey from both sides is unique as well, + * but no bother to add it if its subset has been added already + */ + foreach(lc, outerrel_uniquekey_ctx) + { + UniqueKeyContext context1 = (UniqueKeyContext) lfirst(lc); + if (context1->added_to_joinrel || !context1->useful) + continue; + foreach(lc2, innerrel_uniquekey_ctx) + { + UniqueKeyContext context2 = (UniqueKeyContext) lfirst(lc2); + List *exprs = NIL, *colnos = NIL; + if (context2->added_to_joinrel || !context2->useful) + continue; + exprs = list_copy(context1->uniquekey->exprs); + colnos = list_copy(context1->uniquekey->positions); + exprs = list_concat(exprs, context2->uniquekey->exprs); + colnos = list_concat(colnos, context2->uniquekey->positions); + /* We need both sides guarantee=true, we will say the uniqueness of the combination + * is guarantee. + */ + joinrel->uniquekeys = lappend(joinrel->uniquekeys, makeUniqueKey(exprs, colnos, + context1->uniquekey->guarantee && + context2->uniquekey->guarantee)); + } + } +} + + +/* + * Used to avoid mutli scan of rel->reltarget->exprs, See populate_subquery_uniquekeys + */ +typedef struct SubqueryUniqueKeyData +{ + /* + * Only the Var reference to subquery's unique is unique as well, we can't + * guarantee others + */ + Var *var; + + /* The position of the var in the rel->reltarget */ + int pos; +} *SubqueryUniqueKeyContext; + +/* + * convert_subquery_uniquekeys + * + * currel is the RelOptInfo in current level, sub_final_rel is get from the fetch_upper_rel + * we need to convert the UnqiueKey from sub_final_rel to currel via the positions info in + * UniqueKey + */ +void convert_subquery_uniquekeys(PlannerInfo *root, + RelOptInfo *currel, + RelOptInfo *sub_final_rel) +{ + SubqueryUniqueKeyContext *ctx_array; + SubqueryUniqueKeyContext ctx; + Index max_colno_subq = 0; + ListCell *lc, *lc2; + int pos = 0; + + if (sub_final_rel->uniquekeys == NIL) + /* This should be a common case */ + return; + + /* + * Calculate max_colno in subquery. In fact we can check this with + * list_length(sub_final_rel->reltarget->exprs), However, reltarget + * is not set on UPPERREL_FINAL relation, so do it this way + */ + foreach(lc, sub_final_rel->uniquekeys) + { + UniqueKey * ukey = lfirst_node(UniqueKey, lc); + foreach(lc2, ukey->positions) + { + Index colno = lfirst_int(lc2); + if (max_colno_subq < colno) + max_colno_subq = colno; + } + } + + Assert(max_colno_subq > 0); + ctx_array = palloc0(sizeof(SubqueryUniqueKeyContext) * (max_colno_subq + 1)); + + /* + * Create an array for each expr in currel->reltarget->exprs, the array index + * is the colno in subquery, so that we can get the expr quickly given a colno_subq + */ + foreach(lc, currel->reltarget->exprs) + { + Var *var; + int colno_subq; + pos++; + if (!IsA(lfirst(lc), Var)) + continue; + + var = lfirst_node(Var, lc); + colno_subq = var->varattno; + if (colno_subq > max_colno_subq) + continue; + ctx_array[colno_subq] = palloc0(sizeof(struct SubqueryUniqueKeyData)); + ctx = ctx_array[colno_subq]; /* corresponding to subquery's uniquekey->positions[x] */ + ctx->pos = pos; /* the position in current targetlist, will be used to set UnqiueKey */ + ctx->var = var; + } + + /* Cover the UniqueKey from sub_final_rel to currel */ + foreach(lc, sub_final_rel->uniquekeys) + { + UniqueKey * ukey = lfirst_node(UniqueKey, lc); + bool uniquekey_useful = true; + List *exprs = NIL; + List *colnos = NIL; + foreach(lc2, ukey->positions) + { + Index sub_colno = lfirst_int(lc2); + ctx = ctx_array[sub_colno]; + if (ctx == NULL) + { + /* The column is not used outside */ + uniquekey_useful = false; + break; + } + exprs = lappend(exprs, ctx->var); + colnos = lappend_int(colnos, ctx->pos); + } + if (uniquekey_useful) + currel->uniquekeys = lappend(currel->uniquekeys, + makeUniqueKey(exprs, colnos, ukey->guarantee)); + } +} + + +/* + * innerrel_keeps_unique + * + * Check if Unqiue key of the innerrel is valid after join. innerrel's UniqueKey + * will be still valid if innerrel's uniquekey mergeop outrerel's uniquekey exists + * in clause_list. + * Note: the clause_list must be a list of mergeable restrictinfo already. + */ +static bool +innerrel_keeps_unique(PlannerInfo *root, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *clause_list, + bool reverse) +{ + ListCell *lc, *lc2, *lc3; + + if (outerrel->uniquekeys == NIL || innerrel->uniquekeys == NIL) + return false; + + foreach(lc, outerrel->uniquekeys) + { + List *outer_uq_exprs = lfirst_node(UniqueKey, lc)->exprs; + bool clauselist_matchs_all_exprs = true; + foreach(lc2, outer_uq_exprs) + { + Node *outer_uq_expr = lfirst(lc2); + bool find_uq_expr_in_clauselist = false; + foreach(lc3, clause_list) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc3); + Node *outer_expr; + if (reverse) + outer_expr = rinfo->outer_is_left ? get_rightop(rinfo->clause) : get_leftop(rinfo->clause); + else + outer_expr = rinfo->outer_is_left ? get_leftop(rinfo->clause) : get_rightop(rinfo->clause); + if (equal(outer_expr, outer_uq_expr)) + { + find_uq_expr_in_clauselist = true; + break; + } + } + if (!find_uq_expr_in_clauselist) + { + /* No need to check the next exprs in the current uniquekey */ + clauselist_matchs_all_exprs = false; + break; + } + } + + if (clauselist_matchs_all_exprs) + /* If the clauselist match any uk from outerrel, the innerrel will be unique + * based on the fact that innerrel->uniquekeys != NIL which is checked at the + * beginning + */ + return true; + } + return false; +} + + +/* + * relation_has_uniquekeys_for + * Returns true if we have proofs that 'rel' cannot return multiple rows with + * the same values in each of 'exprs'. Otherwise returns false. + */ +bool +relation_has_uniquekeys_for(PlannerInfo *root, RelOptInfo *rel, List *exprs) +{ + ListCell *lc; + + foreach(lc, rel->uniquekeys) + { + UniqueKey *ukey = lfirst_node(UniqueKey, lc); + if (!ukey->guarantee) + continue; + if (list_all_members_in(ukey->exprs, exprs)) + return true; + } + return false; +} + + +/* + * Examine the rel's restriction clauses for usable var = const clauses + */ +static List* +gather_mergeable_baserestrictlist(RelOptInfo *rel) +{ + List *restrictlist = NIL; + ListCell *lc; + foreach(lc, rel->baserestrictinfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + + /* + * Note: can_join won't be set for a restriction clause, but + * mergeopfamilies will be if it has a mergejoinable operator and + * doesn't contain volatile functions. + */ + if (restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * The clause certainly doesn't refer to anything but the given rel. + * If either side is pseudoconstant then we can use it. + */ + if (bms_is_empty(restrictinfo->left_relids)) + { + /* righthand side is inner */ + restrictinfo->outer_is_left = true; + } + else if (bms_is_empty(restrictinfo->right_relids)) + { + /* lefthand side is inner */ + restrictinfo->outer_is_left = false; + } + else + continue; + + /* OK, add to list */ + restrictlist = lappend(restrictlist, restrictinfo); + } + return restrictlist; +} + + +/* + * gather_mergeable_joinclauses + */ +static List* +gather_mergeable_joinclauses(RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + JoinType jointype) +{ + List *clause_list = NIL; + ListCell *lc; + foreach(lc, restrictlist) + { + RestrictInfo *restrictinfo = (RestrictInfo *)lfirst(lc); + if (IS_OUTER_JOIN(jointype) && + RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids)) + continue; + + /* Ignore if it's not a mergejoinable clause */ + if (!restrictinfo->can_join || + restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * Check if clause has the form "outer op inner" or "inner op outer", + * and if so mark which side is inner. + */ + if (!clause_sides_match_join(restrictinfo, outerrel->relids, innerrel->relids)) + continue; /* no good for these input relations */ + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + return clause_list; +} + + +/* + * Return true if uk = Const in the restrictlist + */ +static bool +match_index_to_baserestrictinfo(IndexOptInfo *unique_ind, List *restrictlist) +{ + int c = 0; + + /* A fast path to avoid the 2 loop scan */ + if (list_length(restrictlist) < unique_ind->ncolumns) + return false; + + for(c = 0; c < unique_ind->ncolumns; c++) + { + ListCell *lc; + bool found_in_restrictinfo = false; + foreach(lc, restrictlist) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + Node *rexpr; + + /* + * The condition's equality operator must be a member of the + * index opfamily, else it is not asserting the right kind of + * equality behavior for this index. We check this first + * since it's probably cheaper than match_index_to_operand(). + */ + if (!list_member_oid(rinfo->mergeopfamilies, unique_ind->opfamily[c])) + continue; + + /* + * XXX at some point we may need to check collations here too. + * For the moment we assume all collations reduce to the same + * notion of equality. + */ + + /* OK, see if the condition operand matches the index key */ + if (rinfo->outer_is_left) + rexpr = get_rightop(rinfo->clause); + else + rexpr = get_leftop(rinfo->clause); + + if (match_index_to_operand(rexpr, c, unique_ind)) + { + found_in_restrictinfo = true; + break; + } + } + if (!found_in_restrictinfo) + return false; + } + return true; +} + +/* + * add_uniquekey_from_index + * We only add the Index Vars whose expr exists in rel->reltarget + */ +static void +add_uniquekey_from_index(RelOptInfo *rel, IndexOptInfo *unique_index) +{ + int c; + List *exprs = NIL; + List *positions = NIL; + bool guarantee = true; + + /* Only add the index which exists in rel->reltarget */ + for(c = 0; c < unique_index->ncolumns; c++) + { + ListCell *lc; + bool find_in_exprs = false; + /* To keep the uniquekey short, We only add it if it exists in rel->reltrget->exprs */ + foreach(lc, rel->reltarget->exprs) + { + Var *var; + if (!IsA(lfirst(lc), Var)) + continue; + var = lfirst_node(Var, lc); + if (var->varattno < InvalidAttrNumber) + /* System column */ + continue; + /* Must check not null for unique index */ + if (!bms_is_member(var->varattno, rel->not_null_cols)) + guarantee = false; + + if (match_index_to_operand((Node *)lfirst(lc), c, unique_index)) + { + find_in_exprs = true; + exprs = lappend(exprs, lfirst(lc)); + positions = lappend_int(positions, c+1); + break; + } + } + if (!find_in_exprs) + return; + } + + if (exprs != NIL) + { + rel->uniquekeys = lappend(rel->uniquekeys, + makeUniqueKey(exprs, positions, guarantee)); + } +} + + +/* + * add_uniquekey_for_onerow + * If we are sure about the relation only returns one row, then all the columns + * are unique + */ +void +add_uniquekey_for_onerow(RelOptInfo *rel) +{ + int c = 1; + ListCell *lc; + foreach(lc, rel->reltarget->exprs) + { + /* Every columns in this relation is unique since only 1 row returned + * No bother to check it is a var or nullable, we can guarantee the uniqueness + */ + rel->uniquekeys = lappend(rel->uniquekeys, + makeUniqueKey(list_make1(lfirst(lc)), + list_make1_int(c), + true)); + c++; + } +} + +/* + * initililze_unqiuecontext_for_joinrel + * Return a List of UniqueKeyContext for an inputrel, we also filter out + * all the unqiuekeys which are not possible to use later + */ +static List * +initililze_unqiuecontext_for_joinrel(RelOptInfo *joinrel, RelOptInfo *inputrel) +{ + List *res = NIL; + ListCell *lc; + foreach(lc, inputrel->uniquekeys) + { + UniqueKeyContext context; + context = palloc(sizeof(struct UniqueKeyContextData)); + context->uniquekey = lfirst_node(UniqueKey, lc); + context->added_to_joinrel = false; + context->useful = true; + res = lappend(res, context); + } + return res; +} + +/* + * clause_sides_match_join + * Determine whether a join clause is of the right form to use in this join. + * + * We already know that the clause is a binary opclause referencing only the + * rels in the current join. The point here is to check whether it has the + * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr", + * rather than mixing outer and inner vars on either side. If it matches, + * we set the transient flag outer_is_left to identify which side is which. + */ +static bool +clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, + Relids innerrelids) +{ + if (bms_is_subset(rinfo->left_relids, outerrelids) && + bms_is_subset(rinfo->right_relids, innerrelids)) + { + /* lefthand side is outer */ + rinfo->outer_is_left = true; + return true; + } + else if (bms_is_subset(rinfo->left_relids, innerrelids) && + bms_is_subset(rinfo->right_relids, outerrelids)) + { + /* righthand side is outer */ + rinfo->outer_is_left = false; + return true; + } + return false; /* no good for these input relations */ +} + + +/* + * Partitoned table Unique Keys. + * The partition table unique key is maintained as: + * 1. The index must be unique as usual. + * 2. The index must contains partition key. + * 3. The index must exist on all the child rel. see simple_indexinfo_equal for + * how we compare it. + */ + +/* index_constains_partkey + * retrun true if the index contains the partiton key. + */ +static bool +index_constains_partkey(RelOptInfo *partrel, IndexOptInfo *ind) +{ + ListCell *lc; + int i; + Assert(IS_PARTITIONED_REL(partrel)); + + for(i = 0; i < partrel->part_scheme->partnatts; i++) + { + Node *part_expr = linitial(partrel->partexprs[i]); + bool found_in_index = false; + foreach(lc, ind->indextlist) + { + Expr *index_expr = lfirst_node(TargetEntry, lc)->expr; + if (equal(index_expr, part_expr)) + { + found_in_index = true; + break; + } + } + if (!found_in_index) + return false; + } + return true; +} + +/* + * simple_indexinfo_equal + * + * Used to check if the 2 index is same as each other. The index here + * is COPIED from childrel and did some tiny changes(see simple_copy_indexinfo_to_parent) + */ + +static bool +simple_indexinfo_equal(IndexOptInfo *ind1, IndexOptInfo *ind2) +{ + Size oid_cmp_len = sizeof(Oid) * ind1->ncolumns; + return ind1->ncolumns == ind2->ncolumns && + ind1->unique == ind2->unique && + memcmp(ind1->indexkeys, ind2->indexkeys, sizeof(int) * ind1->ncolumns) == 0 && + memcmp(ind1->opfamily, ind2->opfamily, oid_cmp_len) == 0 && + memcmp(ind1->opcintype, ind2->opcintype, oid_cmp_len) == 0 && + memcmp(ind1->sortopfamily, ind2->sortopfamily, oid_cmp_len) == 0 && + equal(ind1->indextlist, ind2->indextlist); +} + +/* + * Copy these macros from copyfuncs.c since I don't want make + * simple_copy_indexinfo_to_parent public since it is a so customized copy. + */ + +#define COPY_POINTER_FIELD(fldname, sz) \ + do { \ + Size _size = (sz); \ + newnode->fldname = palloc(_size); \ + memcpy(newnode->fldname, from->fldname, _size); \ + } while (0) + +#define COPY_NODE_FIELD(fldname) \ + (newnode->fldname = copyObjectImpl(from->fldname)) + +#define COPY_SCALAR_FIELD(fldname) \ + (newnode->fldname = from->fldname) + + +/* + * simple_copy_indexinfo_to_parent + * Copy the IndexInfo from child index info to parent, which will be used to + * 1. Test if the same index exists in all the childrels. + * 2. if the parentrel->reltarget/basicrestrinct info matches this index. + * The copied and modified index is just used in this scope. + */ +static IndexOptInfo * +simple_copy_indexinfo_to_parent(RelOptInfo *parentrel, + IndexOptInfo *from) +{ + IndexOptInfo *newnode = makeNode(IndexOptInfo); + + COPY_SCALAR_FIELD(ncolumns); + COPY_SCALAR_FIELD(nkeycolumns); + COPY_SCALAR_FIELD(unique); + COPY_SCALAR_FIELD(immediate); + /* We just need to know if it is NIL or not */ + COPY_SCALAR_FIELD(indpred); + COPY_SCALAR_FIELD(predOK); + COPY_POINTER_FIELD(indexkeys, from->ncolumns * sizeof(int)); + COPY_POINTER_FIELD(indexcollations, from->ncolumns * sizeof(Oid)); + COPY_POINTER_FIELD(opfamily, from->ncolumns * sizeof(Oid)); + COPY_POINTER_FIELD(opcintype, from->ncolumns * sizeof(Oid)); + COPY_POINTER_FIELD(sortopfamily, from->ncolumns * sizeof(Oid)); + COPY_NODE_FIELD(indextlist); + + /* + * We have to change this to let the later index match (like pk = 1) + * rel->reltarget work + */ + ChangeVarNodes((Node*) newnode->indextlist, + from->rel->relid, + parentrel->relid, 0); + newnode->rel = parentrel; + return newnode; +} + +/* + * adjust_partition_unique_indexlist + * + * Check the current known global_unique_indexes to see if every index here + * all exists in the given childrel, if not, it will be removed from + * the list + */ +static void +adjust_partition_unique_indexlist(RelOptInfo *parentrel, + RelOptInfo *childrel, + List **global_unique_indexes) +{ + ListCell *lc, *lc2; + foreach(lc, *global_unique_indexes) + { + IndexOptInfo *g_ind = lfirst_node(IndexOptInfo, lc); + bool found_in_child = false; + + foreach(lc2, childrel->indexlist) + { + IndexOptInfo *p_ind = lfirst_node(IndexOptInfo, lc2); + IndexOptInfo *p_ind_copy; + if (!p_ind->unique || !p_ind->immediate || + (p_ind->indpred != NIL && !p_ind->predOK)) + continue; + p_ind_copy = simple_copy_indexinfo_to_parent(parentrel, p_ind); + if (simple_indexinfo_equal(p_ind_copy, g_ind)) + { + found_in_child = true; + break; + } + } + + if (!found_in_child) + /* There is no same index on other childrel, remove it */ + *global_unique_indexes = foreach_delete_current(*global_unique_indexes, lc); + } +} + +/* Helper function for groupres/distinctrel */ +static void +add_uniquekey_from_sortgroups(PlannerInfo *root, RelOptInfo *rel, List *sortgroups) +{ + Query *parse = root->parse; + ListCell *lc; + List *exprs = NIL, *colnos = NIL; + foreach(lc, sortgroups) + { + Index sortref = lfirst_node(SortGroupClause, lc)->tleSortGroupRef; + int c = 1; + foreach(lc, parse->targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + if (tle->ressortgroupref == sortref) + { + exprs = lappend(exprs, tle->expr); + colnos = lappend_int(colnos, c); + } + ++c; + } + } + rel->uniquekeys = lappend(rel->uniquekeys, makeUniqueKey(exprs, colnos, true)); +} diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index d0ff660284..b96c6290b7 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -37,7 +37,6 @@ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); -static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list); static Oid distinct_col_search(int colno, List *colnos, List *opids); @@ -178,14 +177,6 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) innerrel = find_base_rel(root, innerrelid); - /* - * Before we go to the effort of checking whether any innerrel variables - * are needed above the join, make a quick check to eliminate cases in - * which we will surely be unable to prove uniqueness of the innerrel. - */ - if (!rel_supports_distinctness(root, innerrel)) - return false; - /* Compute the relid set for the join we are considering */ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); @@ -535,14 +526,6 @@ reduce_unique_semijoins(PlannerInfo *root) innerrel = find_base_rel(root, innerrelid); - /* - * Before we trouble to run generate_join_implied_equalities, make a - * quick check to eliminate cases in which we will surely be unable to - * prove uniqueness of the innerrel. - */ - if (!rel_supports_distinctness(root, innerrel)) - continue; - /* Compute the relid set for the join we are considering */ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); @@ -570,54 +553,6 @@ reduce_unique_semijoins(PlannerInfo *root) } -/* - * rel_supports_distinctness - * Could the relation possibly be proven distinct on some set of columns? - * - * This is effectively a pre-checking function for rel_is_distinct_for(). - * It must return true if rel_is_distinct_for() could possibly return true - * with this rel, but it should not expend a lot of cycles. The idea is - * that callers can avoid doing possibly-expensive processing to compute - * rel_is_distinct_for()'s argument lists if the call could not possibly - * succeed. - */ -static bool -rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) -{ - /* We only know about baserels ... */ - if (rel->reloptkind != RELOPT_BASEREL) - return false; - if (rel->rtekind == RTE_RELATION) - { - /* - * For a plain relation, we only know how to prove uniqueness by - * reference to unique indexes. Make sure there's at least one - * suitable unique index. It must be immediately enforced, and if - * it's a partial index, it must match the query. (Keep these - * conditions in sync with relation_has_unique_index_for!) - */ - ListCell *lc; - - foreach(lc, rel->indexlist) - { - IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc); - - if (ind->unique && ind->immediate && - (ind->indpred == NIL || ind->predOK)) - return true; - } - } - else if (rel->rtekind == RTE_SUBQUERY) - { - Query *subquery = root->simple_rte_array[rel->relid]->subquery; - - /* Check if the subquery has any qualities that support distinctness */ - if (query_supports_distinctness(subquery)) - return true; - } - /* We have no proof rules for any other rtekinds. */ - return false; -} /* * rel_is_distinct_for @@ -640,83 +575,34 @@ rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) { - /* - * We could skip a couple of tests here if we assume all callers checked - * rel_supports_distinctness first, but it doesn't seem worth taking any - * risk for. - */ - if (rel->reloptkind != RELOPT_BASEREL) - return false; - if (rel->rtekind == RTE_RELATION) - { - /* - * Examine the indexes to see if we have a matching unique index. - * relation_has_unique_index_for automatically adds any usable - * restriction clauses for the rel, so we needn't do that here. - */ - if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL)) - return true; - } - else if (rel->rtekind == RTE_SUBQUERY) - { - Index relid = rel->relid; - Query *subquery = root->simple_rte_array[relid]->subquery; - List *colnos = NIL; - List *opids = NIL; - ListCell *l; - /* - * Build the argument lists for query_is_distinct_for: a list of - * output column numbers that the query needs to be distinct over, and - * a list of equality operators that the output columns need to be - * distinct according to. - * - * (XXX we are not considering restriction clauses attached to the - * subquery; is that worth doing?) - */ - foreach(l, clause_list) + ListCell *lc1, *lc2, *lc3; + foreach(lc1, rel->uniquekeys) + { + UniqueKey *uqk = lfirst_node(UniqueKey, lc1); + bool all_uqk_exprs_found = true; + foreach(lc2, uqk->exprs) { - RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); - Oid op; - Var *var; - - /* - * Get the equality operator we need uniqueness according to. - * (This might be a cross-type operator and thus not exactly the - * same operator the subquery would consider; that's all right - * since query_is_distinct_for can resolve such cases.) The - * caller's mergejoinability test should have selected only - * OpExprs. - */ - op = castNode(OpExpr, rinfo->clause)->opno; - - /* caller identified the inner side for us */ - if (rinfo->outer_is_left) - var = (Var *) get_rightop(rinfo->clause); - else - var = (Var *) get_leftop(rinfo->clause); - - /* - * We may ignore any RelabelType node above the operand. (There - * won't be more than one, since eval_const_expressions() has been - * applied already.) - */ - if (var && IsA(var, RelabelType)) - var = (Var *) ((RelabelType *) var)->arg; - - /* - * If inner side isn't a Var referencing a subquery output column, - * this clause doesn't help us. - */ - if (!var || !IsA(var, Var) || - var->varno != relid || var->varlevelsup != 0) - continue; - - colnos = lappend_int(colnos, var->varattno); - opids = lappend_oid(opids, op); + Node *uq_expr = lfirst(lc2); + bool find_uq_exprs_in_clause_list = false; + foreach(lc3, clause_list) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc3); + Node *clause_expr = rinfo->outer_is_left ? get_rightop(rinfo->clause): get_leftop(rinfo->clause) ; + if (equal(uq_expr, clause_expr)) + { + find_uq_exprs_in_clause_list = true; + break; + } + } + if (!find_uq_exprs_in_clause_list) + { + all_uqk_exprs_found = false; + break; + } } - - if (query_is_distinct_for(subquery, colnos, opids)) + if (all_uqk_exprs_found) + /* This UnqiueKey is what we want */ return true; } return false; @@ -976,13 +862,6 @@ innerrel_is_unique(PlannerInfo *root, if (restrictlist == NIL) return false; - /* - * Make a quick check to eliminate cases in which we will surely be unable - * to prove uniqueness of the innerrel. - */ - if (!rel_supports_distinctness(root, innerrel)) - return false; - /* * Query the cache to see if we've managed to prove that innerrel is * unique for any subset of this outerrel. We don't need an exact match, diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index e978b491f6..be78d061ae 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -830,6 +830,15 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, { Node *qual = (Node *) lfirst(l); + /* set the not null info now */ + ListCell *lc; + foreach(lc, find_nonnullable_vars(qual)) + { + Var *var = lfirst_node(Var, lc); + RelOptInfo *rel = root->simple_rel_array[var->varno]; + if (var->varattno > InvalidAttrNumber) + rel->not_null_cols = bms_add_member(rel->not_null_cols, var->varattno); + } distribute_qual_to_rels(root, qual, false, below_outer_join, JOIN_INNER, root->qual_security_level, diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 62dfc6d44a..6ad73cb57b 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -213,19 +213,6 @@ query_planner(PlannerInfo *root, */ fix_placeholder_input_needed_levels(root); - /* - * Remove any useless outer joins. Ideally this would be done during - * jointree preprocessing, but the necessary information isn't available - * until we've built baserel data structures and classified qual clauses. - */ - joinlist = remove_useless_joins(root, joinlist); - - /* - * Also, reduce any semijoins with unique inner rels to plain inner joins. - * Likewise, this can't be done until now for lack of needed info. - */ - reduce_unique_semijoins(root); - /* * Now distribute "placeholders" to base rels as needed. This has to be * done after join removal because removal could change whether a diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index b65abf6046..8043fc4382 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2386,6 +2386,8 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, add_path(final_rel, path); } + simple_copy_uniquekeys(current_rel, final_rel); + /* * Generate partial paths for final_rel, too, if outer query levels might * be able to make use of them. @@ -3810,6 +3812,23 @@ create_grouping_paths(PlannerInfo *root, Query *parse = root->parse; RelOptInfo *grouped_rel; RelOptInfo *partially_grouped_rel; + List *required_unique_keys = NIL; + + if (root->parse->groupingSets == NIL) + { + required_unique_keys = get_sortgrouplist_exprs(parse->groupClause, + parse->targetList); + /* + * If the groupby clauses is unique already, groupping node is not necessary + * if there is no aggreation functions + */ + if (required_unique_keys != NIL && + !parse->hasAggs && + !parse->hasWindowFuncs && + parse->havingQual == NULL && + relation_has_uniquekeys_for(root, input_rel, required_unique_keys)) + return input_rel; + } /* * Create grouping relation to hold fully aggregated grouping and/or @@ -3898,6 +3917,8 @@ create_grouping_paths(PlannerInfo *root, } set_cheapest(grouped_rel); + + populate_grouprel_uniquekeys(root, grouped_rel); return grouped_rel; } @@ -4615,7 +4636,7 @@ create_window_paths(PlannerInfo *root, /* Now choose the best path(s) */ set_cheapest(window_rel); - + simple_copy_uniquekeys(input_rel, window_rel); return window_rel; } @@ -4734,6 +4755,12 @@ create_distinct_paths(PlannerInfo *root, bool allow_hash; Path *path; ListCell *lc; + List *required_unique_keys = get_sortgrouplist_exprs(parse->distinctClause, + parse->targetList); + + /* If we the result if unqiue already, we just return the input_rel directly */ + if (relation_has_uniquekeys_for(root, input_rel, required_unique_keys)) + return input_rel; /* For now, do all work in the (DISTINCT, NULL) upperrel */ distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL); @@ -4912,7 +4939,7 @@ create_distinct_paths(PlannerInfo *root, /* Now choose the best path(s) */ set_cheapest(distinct_rel); - + populate_distinctrel_uniquekeys(root, input_rel, distinct_rel); return distinct_rel; } @@ -5060,6 +5087,8 @@ create_ordered_paths(PlannerInfo *root, */ Assert(ordered_rel->pathlist != NIL); + simple_copy_uniquekeys(input_rel, ordered_rel); + return ordered_rel; } diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 951aed80e7..8aa4d24cb0 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -689,6 +689,8 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, /* Undo effects of possibly forcing tuple_fraction to 0 */ root->tuple_fraction = save_fraction; + /* Add the UniqueKeys */ + populate_unionrel_uniquiekeys(root, result_rel); return result_rel; } diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index d82fc5ab8b..34d30b181c 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -117,6 +117,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, Relation relation; bool hasindex; List *indexinfos = NIL; + int i; /* * We need not lock the relation since it was already locked, either by @@ -460,6 +461,13 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, if (inhparent && relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) set_relation_partition_info(root, rel, relation); + Assert(rel->not_null_cols == NULL); + for(i = 0; i < relation->rd_att->natts; i++) + { + if (relation->rd_att->attrs[i].attnotnull) + rel->not_null_cols = bms_add_member(rel->not_null_cols, i+1); + } + table_close(relation, NoLock); /* diff --git a/src/include/nodes/makefuncs.h b/src/include/nodes/makefuncs.h index 31d9aedeeb..d4de97016c 100644 --- a/src/include/nodes/makefuncs.h +++ b/src/include/nodes/makefuncs.h @@ -16,6 +16,7 @@ #include "nodes/execnodes.h" #include "nodes/parsenodes.h" +#include "nodes/pathnodes.h" extern A_Expr *makeA_Expr(A_Expr_Kind kind, List *name, @@ -105,4 +106,5 @@ extern GroupingSet *makeGroupingSet(GroupingSetKind kind, List *content, int loc extern VacuumRelation *makeVacuumRelation(RangeVar *relation, Oid oid, List *va_cols); +extern UniqueKey* makeUniqueKey(List *exprs, List *positions, bool grantee); #endif /* MAKEFUNC_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 8a76afe8cc..679cc4cc9c 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -261,6 +261,7 @@ typedef enum NodeTag T_EquivalenceMember, T_PathKey, T_PathTarget, + T_UniqueKey, T_RestrictInfo, T_IndexClause, T_PlaceHolderVar, diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 0ceb809644..accec6df4e 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -687,6 +687,7 @@ typedef struct RelOptInfo PlannerInfo *subroot; /* if subquery */ List *subplan_params; /* if subquery */ int rel_parallel_workers; /* wanted number of parallel workers */ + Relids not_null_cols; /* the non null column for this relation, start from 1 */ /* Information about foreign tables and foreign joins */ Oid serverid; /* identifies server for the table or join */ @@ -706,6 +707,7 @@ typedef struct RelOptInfo QualCost baserestrictcost; /* cost of evaluating the above */ Index baserestrict_min_security; /* min security_level found in * baserestrictinfo */ + List *uniquekeys; /* List of UniqueKey */ List *joininfo; /* RestrictInfo structures for join clauses * involving this rel */ bool has_eclass_joins; /* T means joininfo is incomplete */ @@ -1017,6 +1019,27 @@ typedef struct PathKey } PathKey; +/* + * UniqueKey + * + * Represents the unique properties held by a RelOptInfo + * exprs is a list of exprs which is unqiue on current RelOptInfo. + * positions is a list of position where the corresponding exprs's location in + * current reloptinfo->reltarget. It will be used transate the exprs's info + * in subquery. + * guarantee: true means it can guarantee the uniqueness all the time, false if we + * can only guarantee the uniqueness without considering the null values. This + * field is necessary for remove_useless_join & reduce_unique_semijions since + * these cases don't care about the null values. + */ +typedef struct UniqueKey +{ + NodeTag type; + List *exprs; + List *positions; + bool guarantee; +} UniqueKey; + /* * PathTarget * diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h index 14ea2766ad..5dfb93895c 100644 --- a/src/include/nodes/pg_list.h +++ b/src/include/nodes/pg_list.h @@ -528,6 +528,8 @@ extern bool list_member_ptr(const List *list, const void *datum); extern bool list_member_int(const List *list, int datum); extern bool list_member_oid(const List *list, Oid datum); +extern bool list_all_members_in(const List *members, const List *target); + extern List *list_delete(List *list, void *datum); extern List *list_delete_ptr(List *list, void *datum); extern List *list_delete_int(List *list, int datum); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9ab73bd20c..f1967d15c2 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -240,4 +240,41 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root, extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *live_childrels); +/* + * uniquekeys.c + * Utilities for matching and building unique keys + */ +extern void populate_baserel_uniquekeys(PlannerInfo *root, + RelOptInfo *baserel, + List* unique_index_list); +extern void populate_partitionedrel_uniquekeys(PlannerInfo *root, + RelOptInfo *rel, + List *childrels); +extern void populate_distinctrel_uniquekeys(PlannerInfo *root, + RelOptInfo *inputrel, + RelOptInfo *distinctrel); +extern void populate_grouprel_uniquekeys(PlannerInfo *root, + RelOptInfo *grouprel); +extern void populate_unionrel_uniquiekeys(PlannerInfo *root, + RelOptInfo *unionrel); +extern void simple_copy_uniquekeys(RelOptInfo *oldrel, + RelOptInfo *newrel); +extern void convert_subquery_uniquekeys(PlannerInfo *root, + RelOptInfo *currel, + RelOptInfo *sub_final_rel); +extern void populate_joinrel_uniquekeys(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *rel1, + RelOptInfo *rel2, + List *restrictlist, + JoinType jointype); + +extern bool innerl_is_unique_v2(PlannerInfo *root, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist); +extern bool relation_has_uniquekeys_for(PlannerInfo *root, + RelOptInfo *rel, + List *exprs); + #endif /* PATHS_H */ diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 14cdcfcca6..4a701af85b 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -870,14 +870,12 @@ explain (costs off) select distinct max(unique2) from tenk1; QUERY PLAN --------------------------------------------------------------------- - HashAggregate - Group Key: $0 + Result InitPlan 1 (returns $0) -> Limit -> Index Only Scan Backward using tenk1_unique2 on tenk1 Index Cond: (unique2 IS NOT NULL) - -> Result -(7 rows) +(5 rows) select distinct max(unique2) from tenk1; max @@ -1036,7 +1034,7 @@ explain (costs off) select distinct min(f1), max(f1) from minmaxtest; QUERY PLAN --------------------------------------------------------------------------------------------- - Unique + Result InitPlan 1 (returns $0) -> Limit -> Merge Append @@ -1059,10 +1057,7 @@ explain (costs off) -> Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8 Index Cond: (f1 IS NOT NULL) -> Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9 - -> Sort - Sort Key: ($0), ($1) - -> Result -(26 rows) +(23 rows) select distinct min(f1), max(f1) from minmaxtest; min | max @@ -1092,12 +1087,10 @@ create temp table t2 (x int, y int, z int, primary key (x, y)); create temp table t3 (a int, b int, c int, primary key(a, b) deferrable); -- Non-primary-key columns can be removed from GROUP BY explain (costs off) select * from t1 group by a,b,c,d; - QUERY PLAN ----------------------- - HashAggregate - Group Key: a, b - -> Seq Scan on t1 -(3 rows) + QUERY PLAN +---------------- + Seq Scan on t1 +(1 row) -- No removal can happen if the complete PK is not present in GROUP BY explain (costs off) select a,c from t1 group by a,c,d; @@ -1112,31 +1105,27 @@ explain (costs off) select a,c from t1 group by a,c,d; explain (costs off) select * from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z; - QUERY PLAN ------------------------------------------------------- - HashAggregate - Group Key: t1.a, t1.b, t2.x, t2.y - -> Hash Join - Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) - -> Seq Scan on t2 - -> Hash - -> Seq Scan on t1 -(7 rows) + QUERY PLAN +------------------------------------------------ + Hash Join + Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t1 +(5 rows) -- Test case where t1 can be optimized but not t2 explain (costs off) select t1.*,t2.x,t2.z from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z; - QUERY PLAN ------------------------------------------------------- - HashAggregate - Group Key: t1.a, t1.b, t2.x, t2.z - -> Hash Join - Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) - -> Seq Scan on t2 - -> Hash - -> Seq Scan on t1 -(7 rows) + QUERY PLAN +------------------------------------------------ + Hash Join + Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t1 +(5 rows) -- Cannot optimize when PK is deferrable explain (costs off) select * from t3 group by a,b,c; @@ -1161,12 +1150,10 @@ explain (costs off) select * from t1 group by a,b,c,d; -- Okay to remove columns if we're only querying the parent. explain (costs off) select * from only t1 group by a,b,c,d; - QUERY PLAN ----------------------- - HashAggregate - Group Key: a, b - -> Seq Scan on t1 -(3 rows) + QUERY PLAN +---------------- + Seq Scan on t1 +(1 row) create temp table p_t1 ( a int, @@ -1179,14 +1166,12 @@ create temp table p_t1_1 partition of p_t1 for values in(1); create temp table p_t1_2 partition of p_t1 for values in(2); -- Ensure we can remove non-PK columns for partitioned tables. explain (costs off) select * from p_t1 group by a,b,c,d; - QUERY PLAN --------------------------------- - HashAggregate - Group Key: p_t1.a, p_t1.b - -> Append - -> Seq Scan on p_t1_1 - -> Seq Scan on p_t1_2 -(5 rows) + QUERY PLAN +-------------------------- + Append + -> Seq Scan on p_t1_1 + -> Seq Scan on p_t1_2 +(3 rows) drop table t1 cascade; NOTICE: drop cascades to table t1c diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 761376b007..6e5b8f83f4 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4414,37 +4414,25 @@ select d.* from d left join (select distinct * from b) s -- not in the join condition. (Note: as of 9.6, we notice that b.id is a -- primary key and so drop b.c_id from the GROUP BY of the resulting plan; -- but this happens too late for join removal in the outer plan level.) +-- XXXX: since b.id is unqiue now so the group by cluase is erased, so +-- the join removal can happen now. explain (costs off) select d.* from d left join (select * from b group by b.id, b.c_id) s on d.a = s.id; - QUERY PLAN ------------------------------------------- - Merge Right Join - Merge Cond: (b.id = d.a) - -> Group - Group Key: b.id - -> Index Scan using b_pkey on b - -> Sort - Sort Key: d.a - -> Seq Scan on d -(8 rows) + QUERY PLAN +--------------- + Seq Scan on d +(1 row) -- similarly, but keying off a DISTINCT clause +-- XXX we erase distinct clause, so the joinremoval is ok as well. explain (costs off) select d.* from d left join (select distinct * from b) s on d.a = s.id; - QUERY PLAN --------------------------------------- - Merge Right Join - Merge Cond: (b.id = d.a) - -> Unique - -> Sort - Sort Key: b.id, b.c_id - -> Seq Scan on b - -> Sort - Sort Key: d.a - -> Seq Scan on d -(9 rows) + QUERY PLAN +--------------- + Seq Scan on d +(1 row) -- check join removal works when uniqueness of the join condition is enforced -- by a UNION diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 11c6f50fbf..e9263d6151 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -306,3 +306,398 @@ SELECT null IS NOT DISTINCT FROM null as "yes"; t (1 row) +create table select_distinct_a(pk1 int, pk2 char(20), uk1 char(20) not null, uk2 int, e int, primary key(pk1, pk2)); +create unique index select_distinct_a_uk on select_distinct_a(uk1, uk2); +create table select_distinct_b(a int, b char(20), pk1 char(20), pk2 int, e int, primary key(pk1, pk2)); +-- distinct erased since (pk1, pk2) +explain (costs off) select distinct * from select_distinct_a; + QUERY PLAN +------------------------------- + Seq Scan on select_distinct_a +(1 row) + +-- distinct can't be reased since since we required all the uk must be not null +explain (costs off) select distinct uk1, uk2 from select_distinct_a; + QUERY PLAN +------------------------------------- + HashAggregate + Group Key: uk1, uk2 + -> Seq Scan on select_distinct_a +(3 rows) + +-- distinct ereased since uk + not null +explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 is not null; + QUERY PLAN +------------------------------- + Seq Scan on select_distinct_a + Filter: (uk2 IS NOT NULL) +(2 rows) + +explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 > 1; + QUERY PLAN +------------------------------- + Seq Scan on select_distinct_a + Filter: (uk2 > 1) +(2 rows) + +-- distinct erased due to group by +explain (costs off) select distinct e from select_distinct_a group by e; + QUERY PLAN +------------------------------------- + HashAggregate + Group Key: e + -> Seq Scan on select_distinct_a +(3 rows) + +-- distinct erased due to the restirctinfo +explain (costs off) select distinct uk1 from select_distinct_a where pk1 = 1 and pk2 = 'c'; + QUERY PLAN +-------------------------------------------------------------- + Index Scan using select_distinct_a_pkey on select_distinct_a + Index Cond: ((pk1 = 1) AND (pk2 = 'c'::bpchar)) +(2 rows) + +-- test join +set enable_mergejoin to off; +set enable_hashjoin to off; +insert into select_distinct_a values(1, 'a', 'a', 0, 1), (1, 'b', 'A', 0, 2), (3, 'c', 'c', 0, 3); +insert into select_distinct_b values(1, 'a', 'a', 0, 1), (4, 'd', 'd', 0, 4), (1, 'e', 'e', 0, 5); +ANALYZE select_distinct_a; +ANALYZE select_distinct_b; +-- Cartesian join +explain (costs off) select distinct a.uk1, a.uk2, b.pk1, b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null; + QUERY PLAN +--------------------------------------------- + Nested Loop + -> Seq Scan on select_distinct_a a + Filter: (uk2 IS NOT NULL) + -> Materialize + -> Seq Scan on select_distinct_b b +(5 rows) + +select distinct a.uk1 COLLATE "C", a.uk2, b.pk1 COLLATE "C", b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null order by 1, 2, 3, 4; + uk1 | uk2 | pk1 | pk2 +----------------------+-----+----------------------+----- + A | 0 | a | 0 + A | 0 | d | 0 + A | 0 | e | 0 + a | 0 | a | 0 + a | 0 | d | 0 + a | 0 | e | 0 + c | 0 | a | 0 + c | 0 | d | 0 + c | 0 | e | 0 +(9 rows) + +-- normal join +explain (costs off) select distinct t1.pk1, t1.pk2 from select_distinct_a t1, select_distinct_b t2 where t1.pk2 = t2.pk1 and t1.e = t2.pk2; + QUERY PLAN +-------------------------------------------------------- + Nested Loop + Join Filter: ((t1.pk2 = t2.pk1) AND (t1.e = t2.pk2)) + -> Seq Scan on select_distinct_a t1 + -> Materialize + -> Seq Scan on select_distinct_b t2 +(5 rows) + +explain (costs off) select distinct t1.pk1, t1.pk2 from select_distinct_b t2, select_distinct_a t1 where t1.pk2 = t2.pk1 and t1.e = t2.pk2; + QUERY PLAN +-------------------------------------------------------- + Nested Loop + Join Filter: ((t2.pk1 = t1.pk2) AND (t2.pk2 = t1.e)) + -> Seq Scan on select_distinct_b t2 + -> Materialize + -> Seq Scan on select_distinct_a t1 +(5 rows) + +-- left join +explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a); + QUERY PLAN +--------------------------------------------- + Nested Loop Left Join + Join Filter: (a.pk1 = b.a) + -> Seq Scan on select_distinct_a a + -> Materialize + -> Seq Scan on select_distinct_b b +(5 rows) + +select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;; + pk1 | pk2 | pk1 | pk2 +-----+----------------------+----------------------+----- + 1 | a | a | 0 + 1 | a | e | 0 + 1 | b | a | 0 + 1 | b | e | 0 + 3 | c | | +(5 rows) + +-- right join +explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a); + QUERY PLAN +--------------------------------------------- + Nested Loop Left Join + Join Filter: (a.pk1 = b.a) + -> Seq Scan on select_distinct_b b + -> Materialize + -> Seq Scan on select_distinct_a a +(5 rows) + +select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4; + pk1 | pk2 | pk1 | pk2 +-----+----------------------+----------------------+----- + 1 | a | a | 0 + 1 | a | e | 0 + 1 | b | a | 0 + 1 | b | e | 0 + | | d | 0 +(5 rows) + +-- full join +explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a); + QUERY PLAN +--------------------------------------------- + Hash Full Join + Hash Cond: (a.pk1 = b.a) + -> Seq Scan on select_distinct_a a + -> Hash + -> Seq Scan on select_distinct_b b +(5 rows) + +select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4; + pk1 | pk2 | pk1 | pk2 +-----+----------------------+----------------------+----- + 1 | a | a | 0 + 1 | a | e | 0 + 1 | b | a | 0 + 1 | b | e | 0 + 3 | c | | + | | d | 0 +(6 rows) + +explain (costs off) select distinct a.pk1, a.pk2, b.pk1 from select_distinct_a a inner join select_distinct_b b on (a.pk1 = b.a); + QUERY PLAN +--------------------------------------------------------- + Unique + -> Sort + Sort Key: a.pk1, a.pk2, b.pk1 + -> Nested Loop + Join Filter: (a.pk1 = b.a) + -> Seq Scan on select_distinct_a a + -> Materialize + -> Seq Scan on select_distinct_b b +(8 rows) + +-- Semi/anti join +explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 in (select a from select_distinct_b); + QUERY PLAN +-------------------------------------------------------------- + Nested Loop Semi Join + Join Filter: (select_distinct_a.pk1 = select_distinct_b.a) + -> Seq Scan on select_distinct_a + -> Materialize + -> Seq Scan on select_distinct_b +(5 rows) + +explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 not in (select a from select_distinct_b); + QUERY PLAN +--------------------------------------- + Seq Scan on select_distinct_a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Seq Scan on select_distinct_b +(4 rows) + +-- Test subquery +explain (costs off) select distinct * from select_distinct_a a, (select a from select_distinct_b group by a) b where a.pk1 = b.a; + QUERY PLAN +------------------------------------------------- + Nested Loop + Join Filter: (a.pk1 = select_distinct_b.a) + -> Seq Scan on select_distinct_a a + -> Materialize + -> HashAggregate + Group Key: select_distinct_b.a + -> Seq Scan on select_distinct_b +(7 rows) + +select distinct * from select_distinct_a a, (select a from select_distinct_b group by a) b where a.pk1 = b.a order by 1, 2, 3; + pk1 | pk2 | uk1 | uk2 | e | a +-----+----------------------+----------------------+-----+---+--- + 1 | a | a | 0 | 1 | 1 + 1 | b | A | 0 | 2 | 1 +(2 rows) + +explain (costs off) select distinct * from select_distinct_a a, (select distinct a from select_distinct_b) b where a.pk1 = b.a; + QUERY PLAN +------------------------------------------------- + Nested Loop + Join Filter: (a.pk1 = select_distinct_b.a) + -> Seq Scan on select_distinct_a a + -> Materialize + -> HashAggregate + Group Key: select_distinct_b.a + -> Seq Scan on select_distinct_b +(7 rows) + +select distinct * from select_distinct_a a, (select distinct a from select_distinct_b) b where a.pk1 = b.a order by 1 ,2, 3; + pk1 | pk2 | uk1 | uk2 | e | a +-----+----------------------+----------------------+-----+---+--- + 1 | a | a | 0 | 1 | 1 + 1 | b | A | 0 | 2 | 1 +(2 rows) + +-- Distinct On +-- can't erase since pk2 is missed +explain (costs off) select distinct on(pk1) pk1, pk2 from select_distinct_a; + QUERY PLAN +------------------------------------------- + Unique + -> Sort + Sort Key: pk1 + -> Seq Scan on select_distinct_a +(4 rows) + +-- ok to erase +explain (costs off) select distinct on(pk1, pk2) pk1, pk2 from select_distinct_a; + QUERY PLAN +------------------------------- + Seq Scan on select_distinct_a +(1 row) + +-- Test partitioned table +create table dist_p (a int, b int not null, c int not null, d int) partition by range (b); +create table dist_p0 partition of dist_p for values from (1) to (10); +create table dist_p1 partition of dist_p for values from (11) to (20); +-- The combines UKs in target list +explain (costs off) select distinct t1.pk1, t1.pk2, t2.b, t2.maxc from select_distinct_a t1, (select b, max(c) as maxc from dist_p group by b) t2; + QUERY PLAN +------------------------------------------------ + Nested Loop + -> HashAggregate + Group Key: dist_p.b + -> Append + -> Seq Scan on dist_p0 dist_p_1 + -> Seq Scan on dist_p1 dist_p_2 + -> Materialize + -> Seq Scan on select_distinct_a t1 +(8 rows) + +-- no uk for t2, so distinct is needed. +explain (costs off) select distinct t1.pk1, t1.pk2, t2.b, t2.c from select_distinct_a t1, dist_p t2; + QUERY PLAN +---------------------------------------------------- + HashAggregate + Group Key: t1.pk1, t1.pk2, t2.b, t2.c + -> Nested Loop + -> Append + -> Seq Scan on dist_p0 t2_1 + -> Seq Scan on dist_p1 t2_2 + -> Materialize + -> Seq Scan on select_distinct_a t1 +(8 rows) + +-- create unqiue index on dist_p +create unique index dist_p_uk_b_c on dist_p(b, c); +-- (t2.b, t2.c) should be the UK +explain (costs off) select distinct t1.pk1, t1.pk2, t2.b, t2.c from select_distinct_a t1, dist_p t2; + QUERY PLAN +---------------------------------------------- + Nested Loop + -> Append + -> Seq Scan on dist_p0 t2_1 + -> Seq Scan on dist_p1 t2_2 + -> Materialize + -> Seq Scan on select_distinct_a t1 +(6 rows) + +drop index dist_p_uk_b_c; +-- we also support create unqiue index on each child tables +create unique index dist_p0_uk_bc on dist_p0(b, c); +-- not ok, since dist_p1 no have exact same index +explain (costs off) select distinct t1.pk1, t1.pk2, t2.b, t2.c from select_distinct_a t1, dist_p t2; + QUERY PLAN +---------------------------------------------------- + HashAggregate + Group Key: t1.pk1, t1.pk2, t2.b, t2.c + -> Nested Loop + -> Append + -> Seq Scan on dist_p0 t2_1 + -> Seq Scan on dist_p1 t2_2 + -> Materialize + -> Seq Scan on select_distinct_a t1 +(8 rows) + +create unique index dist_p1_uk_bc on dist_p1(b, c); +-- OK now +explain (costs off) select distinct t1.pk1, t1.pk2, t2.b, t2.c from select_distinct_a t1, dist_p t2; + QUERY PLAN +---------------------------------------------- + Nested Loop + -> Append + -> Seq Scan on dist_p0 t2_1 + -> Seq Scan on dist_p1 t2_2 + -> Materialize + -> Seq Scan on select_distinct_a t1 +(6 rows) + +-- uk is same on all child tables, however it doesn't include the partkey, so not ok as well. +create unique index dist_p0_uk_c on dist_p0(c); +create unique index dist_p1_uk_c on dist_p1(c); +explain (costs off) select distinct t1.pk1, t1.pk2, t2.c from select_distinct_a t1, dist_p t2; + QUERY PLAN +---------------------------------------------------- + HashAggregate + Group Key: t1.pk1, t1.pk2, t2.c + -> Nested Loop + -> Append + -> Seq Scan on dist_p0 t2_1 + -> Seq Scan on dist_p1 t2_2 + -> Materialize + -> Seq Scan on select_distinct_a t1 +(8 rows) + +drop table dist_p; +-- test some view. +create view distinct_v1 as select distinct uk1, uk2 from select_distinct_a where uk2 is not null; +explain (costs off) select * from distinct_v1; + QUERY PLAN +------------------------------- + Seq Scan on select_distinct_a + Filter: (uk2 IS NOT NULL) +(2 rows) + +alter table select_distinct_a alter column uk1 drop not null; +explain (costs off) select * from distinct_v1; + QUERY PLAN +---------------------------------------------------------------- + Unique + -> Sort + Sort Key: select_distinct_a.uk1, select_distinct_a.uk2 + -> Seq Scan on select_distinct_a + Filter: (uk2 IS NOT NULL) +(5 rows) + +alter table select_distinct_a alter column uk1 set not null; +-- test generic plan +prepare pt as select * from distinct_v1; +explain (costs off) execute pt; + QUERY PLAN +------------------------------- + Seq Scan on select_distinct_a + Filter: (uk2 IS NOT NULL) +(2 rows) + +alter table select_distinct_a alter column uk1 drop not null; +explain (costs off) execute pt; + QUERY PLAN +---------------------------------------------------------------- + Unique + -> Sort + Sort Key: select_distinct_a.uk1, select_distinct_a.uk2 + -> Seq Scan on select_distinct_a + Filter: (uk2 IS NOT NULL) +(5 rows) + +drop view distinct_v1; +drop table select_distinct_a; +drop table select_distinct_b; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 5fc6617369..d6cc8fa845 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1515,11 +1515,14 @@ select d.* from d left join (select distinct * from b) s -- not in the join condition. (Note: as of 9.6, we notice that b.id is a -- primary key and so drop b.c_id from the GROUP BY of the resulting plan; -- but this happens too late for join removal in the outer plan level.) +-- XXXX: since b.id is unqiue now so the group by cluase is erased, so +-- the join removal can happen now. explain (costs off) select d.* from d left join (select * from b group by b.id, b.c_id) s on d.a = s.id; -- similarly, but keying off a DISTINCT clause +-- XXX we erase distinct clause, so the joinremoval is ok as well. explain (costs off) select d.* from d left join (select distinct * from b) s on d.a = s.id; diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index 33102744eb..08d1e35095 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -135,3 +135,128 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no"; SELECT 2 IS NOT DISTINCT FROM 2 as "yes"; SELECT 2 IS NOT DISTINCT FROM null as "no"; SELECT null IS NOT DISTINCT FROM null as "yes"; +create table select_distinct_a(pk1 int, pk2 char(20), uk1 char(20) not null, uk2 int, e int, primary key(pk1, pk2)); +create unique index select_distinct_a_uk on select_distinct_a(uk1, uk2); +create table select_distinct_b(a int, b char(20), pk1 char(20), pk2 int, e int, primary key(pk1, pk2)); + +-- distinct erased since (pk1, pk2) +explain (costs off) select distinct * from select_distinct_a; + +-- distinct can't be reased since since we required all the uk must be not null +explain (costs off) select distinct uk1, uk2 from select_distinct_a; + +-- distinct ereased since uk + not null +explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 is not null; +explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 > 1; + +-- distinct erased due to group by +explain (costs off) select distinct e from select_distinct_a group by e; + +-- distinct erased due to the restirctinfo +explain (costs off) select distinct uk1 from select_distinct_a where pk1 = 1 and pk2 = 'c'; + +-- test join +set enable_mergejoin to off; +set enable_hashjoin to off; + +insert into select_distinct_a values(1, 'a', 'a', 0, 1), (1, 'b', 'A', 0, 2), (3, 'c', 'c', 0, 3); +insert into select_distinct_b values(1, 'a', 'a', 0, 1), (4, 'd', 'd', 0, 4), (1, 'e', 'e', 0, 5); + +ANALYZE select_distinct_a; +ANALYZE select_distinct_b; + +-- Cartesian join +explain (costs off) select distinct a.uk1, a.uk2, b.pk1, b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null; +select distinct a.uk1 COLLATE "C", a.uk2, b.pk1 COLLATE "C", b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null order by 1, 2, 3, 4; + +-- normal join +explain (costs off) select distinct t1.pk1, t1.pk2 from select_distinct_a t1, select_distinct_b t2 where t1.pk2 = t2.pk1 and t1.e = t2.pk2; +explain (costs off) select distinct t1.pk1, t1.pk2 from select_distinct_b t2, select_distinct_a t1 where t1.pk2 = t2.pk1 and t1.e = t2.pk2; + +-- left join +explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a); +select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;; + +-- right join +explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a); +select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4; + +-- full join +explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a); +select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4; + +explain (costs off) select distinct a.pk1, a.pk2, b.pk1 from select_distinct_a a inner join select_distinct_b b on (a.pk1 = b.a); + + +-- Semi/anti join +explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 in (select a from select_distinct_b); +explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 not in (select a from select_distinct_b); + + +-- Test subquery +explain (costs off) select distinct * from select_distinct_a a, (select a from select_distinct_b group by a) b where a.pk1 = b.a; +select distinct * from select_distinct_a a, (select a from select_distinct_b group by a) b where a.pk1 = b.a order by 1, 2, 3; + +explain (costs off) select distinct * from select_distinct_a a, (select distinct a from select_distinct_b) b where a.pk1 = b.a; +select distinct * from select_distinct_a a, (select distinct a from select_distinct_b) b where a.pk1 = b.a order by 1 ,2, 3; + +-- Distinct On +-- can't erase since pk2 is missed +explain (costs off) select distinct on(pk1) pk1, pk2 from select_distinct_a; +-- ok to erase +explain (costs off) select distinct on(pk1, pk2) pk1, pk2 from select_distinct_a; + + +-- Test partitioned table +create table dist_p (a int, b int not null, c int not null, d int) partition by range (b); + +create table dist_p0 partition of dist_p for values from (1) to (10); +create table dist_p1 partition of dist_p for values from (11) to (20); + +-- The combines UKs in target list +explain (costs off) select distinct t1.pk1, t1.pk2, t2.b, t2.maxc from select_distinct_a t1, (select b, max(c) as maxc from dist_p group by b) t2; + +-- no uk for t2, so distinct is needed. +explain (costs off) select distinct t1.pk1, t1.pk2, t2.b, t2.c from select_distinct_a t1, dist_p t2; + +-- create unqiue index on dist_p +create unique index dist_p_uk_b_c on dist_p(b, c); + +-- (t2.b, t2.c) should be the UK +explain (costs off) select distinct t1.pk1, t1.pk2, t2.b, t2.c from select_distinct_a t1, dist_p t2; + +drop index dist_p_uk_b_c; + +-- we also support create unqiue index on each child tables +create unique index dist_p0_uk_bc on dist_p0(b, c); +-- not ok, since dist_p1 no have exact same index +explain (costs off) select distinct t1.pk1, t1.pk2, t2.b, t2.c from select_distinct_a t1, dist_p t2; +create unique index dist_p1_uk_bc on dist_p1(b, c); +-- OK now +explain (costs off) select distinct t1.pk1, t1.pk2, t2.b, t2.c from select_distinct_a t1, dist_p t2; + +-- uk is same on all child tables, however it doesn't include the partkey, so not ok as well. +create unique index dist_p0_uk_c on dist_p0(c); +create unique index dist_p1_uk_c on dist_p1(c); +explain (costs off) select distinct t1.pk1, t1.pk2, t2.c from select_distinct_a t1, dist_p t2; + +drop table dist_p; + +-- test some view. +create view distinct_v1 as select distinct uk1, uk2 from select_distinct_a where uk2 is not null; +explain (costs off) select * from distinct_v1; + +alter table select_distinct_a alter column uk1 drop not null; +explain (costs off) select * from distinct_v1; + +alter table select_distinct_a alter column uk1 set not null; + +-- test generic plan +prepare pt as select * from distinct_v1; +explain (costs off) execute pt; +alter table select_distinct_a alter column uk1 drop not null; +explain (costs off) execute pt; + +drop view distinct_v1; +drop table select_distinct_a; +drop table select_distinct_b; -- 2.21.0