From 60c6662b6782d5d4ad4bba0c57fd5b9fecee7364 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= Date: Mon, 16 Mar 2020 00:48:13 +0800 Subject: [PATCH v4] [Patch] Bypass distinctClause & groupbyClause if the exprs is unique already --- src/backend/nodes/list.c | 27 + src/backend/optimizer/path/Makefile | 3 +- src/backend/optimizer/path/allpaths.c | 7 +- src/backend/optimizer/path/joinrels.c | 2 + src/backend/optimizer/path/uniquekeys.c | 473 ++++++++++++++++++ src/backend/optimizer/plan/initsplan.c | 9 +- src/backend/optimizer/plan/planner.c | 43 ++ src/backend/optimizer/util/plancat.c | 8 + src/include/nodes/pathnodes.h | 27 + src/include/nodes/pg_list.h | 2 + src/include/optimizer/paths.h | 17 + src/test/regress/expected/aggregates.out | 38 +- src/test/regress/expected/join.out | 26 +- src/test/regress/expected/select_distinct.out | 276 ++++++++++ src/test/regress/sql/select_distinct.sql | 84 ++++ 15 files changed, 1003 insertions(+), 39 deletions(-) create mode 100644 src/backend/optimizer/path/uniquekeys.c diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c index bd0c58cd81..a54c8e66bb 100644 --- a/src/backend/nodes/list.c +++ b/src/backend/nodes/list.c @@ -688,6 +688,33 @@ list_member_oid(const List *list, Oid datum) return false; } +/* + * Return ture iff there is an equal member in target for every + * member in members + */ +bool +list_all_members_in(const List *members, const List *target) +{ + const ListCell *lc1, *lc2; + if (target == NIL && members != NIL) + return false; + foreach(lc1, members) + { + bool found = false; + foreach(lc2, target) + { + if (equal(lfirst(lc1), lfirst(lc2))) + { + found = true; + break; + } + } + if (!found) + return false; + } + return true; +} + /* * Delete the n'th cell (counting from 0) in list. * diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile index 1e199ff66f..7b9820c25f 100644 --- a/src/backend/optimizer/path/Makefile +++ b/src/backend/optimizer/path/Makefile @@ -21,6 +21,7 @@ OBJS = \ joinpath.o \ joinrels.o \ pathkeys.o \ - tidpath.o + tidpath.o \ + uniquekeys.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 8286d9cf34..2c65d3715b 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -158,6 +158,7 @@ make_one_rel(PlannerInfo *root, List *joinlist) /* * Construct the all_baserels Relids set. */ + root->all_baserels = NULL; for (rti = 1; rti < root->simple_rel_array_size; rti++) { @@ -578,7 +579,9 @@ set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) * first since partial unique indexes can affect size estimates. */ check_index_predicates(root, rel); - + + populate_baserel_uniquekeys(root, rel); + /* Mark rel with estimated output rows, width, etc */ set_baserel_size_estimates(root, rel); } @@ -2321,6 +2324,8 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, return; } + rel->uniquekeys = sub_final_rel->uniquekeys; + /* * Mark rel with estimated output rows, width, etc. Note that we have to * do this before generating outer-query paths, else cost_subqueryscan is diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index a21c295b99..f1243f31e7 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -920,6 +920,8 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, /* Apply partitionwise join technique, if possible. */ try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist); + + propagate_unique_keys_to_joinrel(root, joinrel, rel1, rel2, restrictlist, sjinfo->jointype); } diff --git a/src/backend/optimizer/path/uniquekeys.c b/src/backend/optimizer/path/uniquekeys.c new file mode 100644 index 0000000000..1be2c4f0db --- /dev/null +++ b/src/backend/optimizer/path/uniquekeys.c @@ -0,0 +1,473 @@ +/*------------------------------------------------------------------------- + * + * uniquekeys.c + * Utilities for matching and building unique keys + * + * Portions Copyright (c) 2020, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/optimizer/path/uniquekeys.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" + +/* + * Examine the rel's restriction clauses for usable var = const clauses + */ +static List* +get_mergeable_const_restrictlist(RelOptInfo *rel) +{ + List *restrictlist = NIL; + ListCell *lc; + foreach(lc, rel->baserestrictinfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + + /* + * Note: can_join won't be set for a restriction clause, but + * mergeopfamilies will be if it has a mergejoinable operator and + * doesn't contain volatile functions. + */ + if (restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* XXX can't we check if is a Const */ + + /* + * The clause certainly doesn't refer to anything but the given rel. + * If either side is pseudoconstant then we can use it. + */ + if (bms_is_empty(restrictinfo->left_relids)) + { + /* righthand side is inner */ + restrictinfo->outer_is_left = true; + } + else if (bms_is_empty(restrictinfo->right_relids)) + { + /* lefthand side is inner */ + restrictinfo->outer_is_left = false; + } + else + continue; + + /* OK, add to list */ + restrictlist = lappend(restrictlist, restrictinfo); + } + + return restrictlist; + +} + + +/* + * Return true if uk = Const in the restrictlist + */ +static bool +match_index_to_restrictinfo(IndexOptInfo *unique_ind, List *restrictlist) +{ + int c = 0; + + if (restrictlist == NIL) + return false; + + for(c = 0; c < unique_ind->nkeycolumns; c++) + { + ListCell *lc; + foreach(lc, restrictlist) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + Node *rexpr; + + /* + * The condition's equality operator must be a member of the + * index opfamily, else it is not asserting the right kind of + * equality behavior for this index. We check this first + * since it's probably cheaper than match_index_to_operand(). + */ + if (!list_member_oid(rinfo->mergeopfamilies, unique_ind->opfamily[c])) + continue; + + /* + * XXX at some point we may need to check collations here too. + * For the moment we assume all collations reduce to the same + * notion of equality. + */ + + /* OK, see if the condition operand matches the index key */ + if (rinfo->outer_is_left) + rexpr = get_rightop(rinfo->clause); + else + rexpr = get_leftop(rinfo->clause); + + if (!match_index_to_operand(rexpr, c, unique_ind)) + { + return false; + } + } + } + return true; +} + +/* + * add_uniquekey_from_index + * We only add the Index Vars whose expr exists in rel->reltarget + */ +static void +add_uniquekey_from_index(RelOptInfo *rel, IndexOptInfo *unique_index) +{ + int c = 0; + List *exprs = NIL; + + /* We only add the index which exists in rel->reltarget */ + for(c = 0; c < unique_index->nkeycolumns; c++) + { + ListCell *lc; + bool find_in_exprs = false; + foreach(lc, rel->reltarget->exprs) + { + Var *var; + /* We never knows a FuncExpr is nullable or not, we only handle Var now */ + if (!IsA(lfirst(lc), Var)) + continue; + var = lfirst_node(Var, lc); + if (var->varattno < InvalidAttrNumber) + /* System column */ + continue; + /* Must check not null for unqiue index */ + if (!bms_is_member(var->varattno, rel->not_null_cols)) + continue; + + /* To keep the uniquekey short, We only add it if it exists in rel->reltrget->exprs */ + if (match_index_to_operand((Node *)lfirst(lc), c, unique_index)) + { + find_in_exprs = true; + exprs = lappend(exprs, lfirst(lc)); + break; + } + } + if (!find_in_exprs) + return; + } + rel->uniquekeys = lappend(rel->uniquekeys, exprs); +} + +/* + * populate_baserel_uniquekeys + * Populate 'baserel' uniquekeys list by looking at the rel's unique index + * add baserestrictinfo + */ +void +populate_baserel_uniquekeys(PlannerInfo *root, RelOptInfo *baserel) +{ + ListCell *lc; + List *restrictlist = get_mergeable_const_restrictlist(baserel); + bool return_one_row = false; + List *matched_uk_indexes = NIL; + + Assert(baserel->rtekind == RTE_RELATION); + + foreach(lc, baserel->indexlist) + { + IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc); + /* + * If the index is not unique, or not immediately enforced, or if it's + * a partial index that doesn't match the query, it's useless here. + */ + if (!ind->unique || !ind->immediate || + (ind->indpred != NIL && !ind->predOK)) + continue; + + if (match_index_to_restrictinfo(ind, restrictlist)) + { + return_one_row = true; + break; + } + matched_uk_indexes = lappend(matched_uk_indexes, ind); + } + + if (return_one_row) + { + foreach(lc, baserel->reltarget->exprs) + { + /* Every columns in this relation is unqiue since only 1 row returned + * No bother to check it is a var or not, also we don't need the check nullable + */ + baserel->uniquekeys = lappend(baserel->uniquekeys, + list_make1(lfirst(lc))); + } + } + else + { + foreach(lc, matched_uk_indexes) + add_uniquekey_from_index(baserel, lfirst_node(IndexOptInfo, lc)); + } +} + + +/* + * relation_has_uniquekeys_for + * Returns true if we have proofs that 'rel' cannot return multiple rows with + * the same values in each of 'exprs'. Otherwise returns false. + */ +bool +relation_has_uniquekeys_for(PlannerInfo *root, RelOptInfo *rel, List *exprs) +{ + ListCell *lc; + + foreach(lc, rel->uniquekeys) + { + List *unique_exprs = lfirst_node(List, lc); + if (unique_exprs == NIL) + continue; + if (list_all_members_in(unique_exprs, exprs)) + return true; + } + return false; +} + +/* + * clause_sides_match_join + * Determine whether a join clause is of the right form to use in this join. + * + * We already know that the clause is a binary opclause referencing only the + * rels in the current join. The point here is to check whether it has the + * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr", + * rather than mixing outer and inner vars on either side. If it matches, + * we set the transient flag outer_is_left to identify which side is which. + */ +static inline bool +clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, + Relids innerrelids) +{ + if (bms_is_subset(rinfo->left_relids, outerrelids) && + bms_is_subset(rinfo->right_relids, innerrelids)) + { + /* lefthand side is outer */ + rinfo->outer_is_left = true; + return true; + } + else if (bms_is_subset(rinfo->left_relids, innerrelids) && + bms_is_subset(rinfo->right_relids, outerrelids)) + { + /* righthand side is outer */ + rinfo->outer_is_left = false; + return true; + } + return false; /* no good for these input relations */ +} + +/* + * clauselist_matches_uniquekeys + * Detect the pattern that rel1.uk_expr = rel2.normal_expr in clause_list, + * if so, we are sure that the UniqueKey of rel2 still can be unqiue key in joinrel. + */ +static bool +clauselist_matches_uniquekeys(List *clause_list, List *uniquekey, bool outer_side) +{ + ListCell *lc; + + if (uniquekey == NIL) + return false; + + foreach(lc, uniquekey) + { + Node *expr = (Node *)lfirst(lc); + ListCell *lc2; + bool matched_expr = false; + + foreach(lc2, clause_list) + { + RestrictInfo *rinfo = (RestrictInfo *)lfirst(lc2); + Node *rexpr; + + /* + * The condition's equality operator must be a member of the + * index opfamily, else it is not asserting the right kind of + * equality behavior for this index. We check this first + * since it's probably cheaper than match_index_to_operand(). + */ + /* XXXXX do we need this? uk_opfamily is a Concept when we come to Index + * for UniqueKey looks we don't need it + **/ + /* if (!list_member_oid(rinfo->mergeopfamilies, key->uk_opfamily)) */ + /* continue; */ + + /* + * XXX at some point we may need to check collations here too. + * For the moment we assume all collations reduce to the same + * notion of equality. + */ + + /* OK, see if the condition operand matches the index key */ + if (rinfo->outer_is_left != outer_side) + rexpr = get_rightop(rinfo->clause); + else + rexpr = get_leftop(rinfo->clause); + + if (IsA(rexpr, RelabelType)) + rexpr = (Node *)((RelabelType *)rexpr)->arg; + + if (equal(rexpr, expr)) + { + matched_expr = true; + break; + } + } + + if (!matched_expr) + return false; + } + + return true; +} + +/* + * Used to record if a uniquekey has been added to joinrel, if so we don't + * need to add other superset of this uniquekey to the joinrel. + */ +typedef struct UniqueKeyContextData +{ + List *uniquekey; + /* Set to true if the unique key has been added to joinrel->uniquekeys */ + bool added_to_joinrel; +} *UniqueKeyContext; + + +/* + * initililze_unqiuecontext_for_joinrel + * Return a List of UniqueKeyContext for an inputrel, we also filter out + * all the unqiuekeys which are not possible to use later + */ +static List * +initililze_unqiuecontext_for_joinrel(RelOptInfo *joinrel, RelOptInfo *inputrel) +{ + List *res = NIL; + ListCell *lc; + foreach(lc, inputrel->uniquekeys) + { + UniqueKeyContext context; + /* If it isn't shown in joinrel->reltarget->exprs, it will be not referenced by others */ + if (!list_all_members_in(lfirst_node(List, lc), joinrel->reltarget->exprs)) + continue; + context = palloc(sizeof(struct UniqueKeyContextData)); + context->uniquekey = lfirst_node(List, lc); + context->added_to_joinrel = false; + res = lappend(res, context); + } + return res; + +} + +/* + * propagate_unique_keys_to_joinrel + * Using 'restrictlist' determine if rel2 can duplicate rows in rel1 and + * vice-versa. If the relation at the other side of the join cannot + * cause row duplication, then tag the uniquekeys for the relation onto + * 'joinrel's uniquekey list. + */ +void +propagate_unique_keys_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *rel1, RelOptInfo *rel2, + List *restrictlist, JoinType jointype) +{ + ListCell *lc, *lc2; + List *clause_list = NIL; + List *rel1_uniquekey_context; + List *rel2_uniquekey_context; + + /* Care about the left relation only for SEMI/ANTI join */ + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + { + foreach(lc, rel1->uniquekeys) + { + List *uniquekey = lfirst_node(List, lc); + if (list_all_members_in(uniquekey, joinrel->reltarget->exprs)) + joinrel->uniquekeys = lappend(joinrel->uniquekeys, uniquekey); + } + return; + } + + rel1_uniquekey_context = initililze_unqiuecontext_for_joinrel(joinrel, rel1); + rel2_uniquekey_context = initililze_unqiuecontext_for_joinrel(joinrel, rel2); + + if (rel1_uniquekey_context == NIL || rel2_uniquekey_context == NIL) + return; + + foreach(lc, restrictlist) + { + RestrictInfo *restrictinfo = (RestrictInfo *)lfirst(lc); + + + if (IS_OUTER_JOIN(jointype) && + RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids)) + continue; + + /* Ignore if it's not a mergejoinable clause */ + if (!restrictinfo->can_join || + restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * Check if clause has the form "outer op inner" or "inner op outer", + * and if so mark which side is inner. + */ + if (!clause_sides_match_join(restrictinfo, rel1->relids, rel2->relids)) + continue; /* no good for these input relations */ + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + + foreach(lc, rel1_uniquekey_context) + { + List *uniquekey = ((UniqueKeyContext)lfirst(lc))->uniquekey; + if (clauselist_matches_uniquekeys(clause_list, uniquekey, true)) + { + foreach(lc2, rel2_uniquekey_context) + { + UniqueKeyContext ctx = (UniqueKeyContext)lfirst(lc); + joinrel->uniquekeys = lappend(joinrel->uniquekeys, ctx->uniquekey); + ctx->added_to_joinrel = true; + } + break; + } + } + + foreach(lc, rel2_uniquekey_context) + { + List *uniquekey = ((UniqueKeyContext)lfirst(lc))->uniquekey; + if (clauselist_matches_uniquekeys(clause_list, uniquekey, true)) + { + foreach(lc2, rel1_uniquekey_context) + { + UniqueKeyContext ctx = (UniqueKeyContext)lfirst(lc); + joinrel->uniquekeys = lappend(joinrel->uniquekeys, ctx->uniquekey); + ctx->added_to_joinrel = true; + } + break; + } + } + + foreach(lc, rel1_uniquekey_context) + { + UniqueKeyContext context1 = (UniqueKeyContext) lfirst(lc); + if (context1->added_to_joinrel) + continue; + foreach(lc2, rel2_uniquekey_context) + { + UniqueKeyContext context2 = (UniqueKeyContext) lfirst(lc2); + List *uniquekey = NIL; + if (context2->added_to_joinrel) + continue; + uniquekey = list_copy(context1->uniquekey); + uniquekey = list_concat(uniquekey, context2->uniquekey); + joinrel->uniquekeys = lappend(joinrel->uniquekeys, uniquekey); + } + } +} diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index e978b491f6..a674e271e7 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -829,7 +829,14 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, foreach(l, (List *) f->quals) { Node *qual = (Node *) lfirst(l); - + ListCell *lc; + foreach(lc, find_nonnullable_vars(qual)) + { + Var *var = lfirst_node(Var, lc); + RelOptInfo *rel = root->simple_rel_array[var->varno]; + if (var->varattno > InvalidAttrNumber) + rel->not_null_cols = bms_add_member(rel->not_null_cols, var->varattno); + } distribute_qual_to_rels(root, qual, false, below_outer_join, JOIN_INNER, root->qual_security_level, diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index d6f2153593..e300dad442 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2389,6 +2389,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, add_path(final_rel, path); } + /* Copy the uniquekeys to final_rel */ + final_rel->uniquekeys = current_rel->uniquekeys; + /* * Generate partial paths for final_rel, too, if outer query levels might * be able to make use of them. @@ -3813,6 +3816,20 @@ create_grouping_paths(PlannerInfo *root, Query *parse = root->parse; RelOptInfo *grouped_rel; RelOptInfo *partially_grouped_rel; + ListCell *lc; + + List *required_unique_keys = get_sortgrouplist_exprs(parse->groupClause, + parse->targetList); + /* + * If the groupby clauses is unique already, groupping node is not necessary + * if there is no aggreation functions + */ + if (required_unique_keys != NIL && + !parse->hasAggs && + !parse->hasWindowFuncs && + parse->havingQual == NULL && + relation_has_uniquekeys_for(root, input_rel, required_unique_keys)) + return input_rel; /* * Create grouping relation to hold fully aggregated grouping and/or @@ -3901,6 +3918,19 @@ create_grouping_paths(PlannerInfo *root, } set_cheapest(grouped_rel); + + /* Copy the upperrel's uniquekeys to grouped_rel and add the one which caused + * by groupBy clause + */ + foreach(lc, input_rel->uniquekeys) + { + List *uniquekey = lfirst_node(List, lc); + if (list_all_members_in(uniquekey, grouped_rel->reltarget->exprs)) + grouped_rel->uniquekeys = lappend(grouped_rel->uniquekeys, uniquekey); + } + if (required_unique_keys != NIL) + grouped_rel->uniquekeys = lappend(grouped_rel->uniquekeys, + required_unique_keys); return grouped_rel; } @@ -4736,6 +4766,12 @@ create_distinct_paths(PlannerInfo *root, bool allow_hash; Path *path; ListCell *lc; + List *required_unique_keys = get_sortgrouplist_exprs(parse->distinctClause, + parse->targetList); + + /* If we the result if unqiue already, we just return the input_rel directly */ + if (relation_has_uniquekeys_for(root, input_rel, required_unique_keys)) + return input_rel; /* For now, do all work in the (DISTINCT, NULL) upperrel */ distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL); @@ -4920,6 +4956,10 @@ create_distinct_paths(PlannerInfo *root, /* Now choose the best path(s) */ set_cheapest(distinct_rel); + /* All the UK before distinct is still valid and we can add one more required_unique_keys */ + distinct_rel->uniquekeys = list_copy(input_rel->uniquekeys); + distinct_rel->uniquekeys = lappend(distinct_rel->uniquekeys, + required_unique_keys); return distinct_rel; } @@ -5066,6 +5106,9 @@ create_ordered_paths(PlannerInfo *root, * need us to do it. */ Assert(ordered_rel->pathlist != NIL); + + /* Copy the unique keys */ + ordered_rel->uniquekeys = input_rel->uniquekeys; return ordered_rel; } diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index d82fc5ab8b..34d30b181c 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -117,6 +117,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, Relation relation; bool hasindex; List *indexinfos = NIL; + int i; /* * We need not lock the relation since it was already locked, either by @@ -460,6 +461,13 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, if (inhparent && relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) set_relation_partition_info(root, rel, relation); + Assert(rel->not_null_cols == NULL); + for(i = 0; i < relation->rd_att->natts; i++) + { + if (relation->rd_att->attrs[i].attnotnull) + rel->not_null_cols = bms_add_member(rel->not_null_cols, i+1); + } + table_close(relation, NoLock); /* diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 3d3be197e0..ee26cfdb5c 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -687,6 +687,7 @@ typedef struct RelOptInfo PlannerInfo *subroot; /* if subquery */ List *subplan_params; /* if subquery */ int rel_parallel_workers; /* wanted number of parallel workers */ + Relids not_null_cols; /* the non null column for this relation, start from 1 */ /* Information about foreign tables and foreign joins */ Oid serverid; /* identifies server for the table or join */ @@ -706,6 +707,7 @@ typedef struct RelOptInfo QualCost baserestrictcost; /* cost of evaluating the above */ Index baserestrict_min_security; /* min security_level found in * baserestrictinfo */ + List *uniquekeys; /* List of Var groups */ List *joininfo; /* RestrictInfo structures for join clauses * involving this rel */ bool has_eclass_joins; /* T means joininfo is incomplete */ @@ -1016,6 +1018,31 @@ typedef struct PathKey bool pk_nulls_first; /* do NULLs come before normal values? */ } PathKey; +/* UniqueKeySet + * + * Represents a set of unique keys + */ +typedef struct UniqueKeySet +{ + NodeTag type; + + Bitmapset *non_null_keys; /* indexes of 'keys' proved non-null */ + List *keys; /* list of UniqueKeys */ +} UniqueKeySet; + +/* + * UniqueKey + * + * Represents the unique properties held by a RelOptInfo or a Path + */ +typedef struct UniqueKey +{ + NodeTag type; + + Oid uk_collation; /* collation, if datatypes are collatable */ + Oid uk_opfamily; /* btree opfamily defining the ordering */ + Expr *uk_expr; /* unique key expression */ +} UniqueKey; /* * PathTarget diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h index 14ea2766ad..5dfb93895c 100644 --- a/src/include/nodes/pg_list.h +++ b/src/include/nodes/pg_list.h @@ -528,6 +528,8 @@ extern bool list_member_ptr(const List *list, const void *datum); extern bool list_member_int(const List *list, int datum); extern bool list_member_oid(const List *list, Oid datum); +extern bool list_all_members_in(const List *members, const List *target); + extern List *list_delete(List *list, void *datum); extern List *list_delete_ptr(List *list, void *datum); extern List *list_delete_int(List *list, int datum); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9ab73bd20c..f0cc2c4245 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -240,4 +240,21 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root, extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *live_childrels); +/* + * uniquekeys.c + * Utilities for matching and building unique keys + */ +extern void populate_baserel_uniquekeys(PlannerInfo *root, + RelOptInfo *baserel); +extern bool relation_has_uniquekeys_for(PlannerInfo *root, + RelOptInfo *rel, + List *exprs); + +extern void propagate_unique_keys_to_joinrel(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *rel1, + RelOptInfo *rel2, + List *restrictlist, + JoinType jointype); + #endif /* PATHS_H */ diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index f457b5b150..07dab0ed5d 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1092,12 +1092,10 @@ create temp table t2 (x int, y int, z int, primary key (x, y)); create temp table t3 (a int, b int, c int, primary key(a, b) deferrable); -- Non-primary-key columns can be removed from GROUP BY explain (costs off) select * from t1 group by a,b,c,d; - QUERY PLAN ----------------------- - HashAggregate - Group Key: a, b - -> Seq Scan on t1 -(3 rows) + QUERY PLAN +---------------- + Seq Scan on t1 +(1 row) -- No removal can happen if the complete PK is not present in GROUP BY explain (costs off) select a,c from t1 group by a,c,d; @@ -1112,16 +1110,14 @@ explain (costs off) select a,c from t1 group by a,c,d; explain (costs off) select * from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z; - QUERY PLAN ------------------------------------------------------- - HashAggregate - Group Key: t1.a, t1.b, t2.x, t2.y - -> Hash Join - Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) - -> Seq Scan on t2 - -> Hash - -> Seq Scan on t1 -(7 rows) + QUERY PLAN +------------------------------------------------ + Hash Join + Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t1 +(5 rows) -- Test case where t1 can be optimized but not t2 explain (costs off) select t1.*,t2.x,t2.z @@ -1161,12 +1157,10 @@ explain (costs off) select * from t1 group by a,b,c,d; -- Okay to remove columns if we're only querying the parent. explain (costs off) select * from only t1 group by a,b,c,d; - QUERY PLAN ----------------------- - HashAggregate - Group Key: a, b - -> Seq Scan on t1 -(3 rows) + QUERY PLAN +---------------- + Seq Scan on t1 +(1 row) create temp table p_t1 ( a int, diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 761376b007..a5d98cf421 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4417,33 +4417,31 @@ select d.* from d left join (select distinct * from b) s explain (costs off) select d.* from d left join (select * from b group by b.id, b.c_id) s on d.a = s.id; - QUERY PLAN ------------------------------------------- + QUERY PLAN +------------------------------------ Merge Right Join Merge Cond: (b.id = d.a) - -> Group - Group Key: b.id - -> Index Scan using b_pkey on b + -> Index Scan using b_pkey on b -> Sort Sort Key: d.a -> Seq Scan on d -(8 rows) +(6 rows) -- similarly, but keying off a DISTINCT clause explain (costs off) select d.* from d left join (select distinct * from b) s on d.a = s.id; - QUERY PLAN --------------------------------------- - Merge Right Join - Merge Cond: (b.id = d.a) - -> Unique - -> Sort - Sort Key: b.id, b.c_id - -> Seq Scan on b + QUERY PLAN +--------------------------------- + Merge Left Join + Merge Cond: (d.a = s.id) -> Sort Sort Key: d.a -> Seq Scan on d + -> Sort + Sort Key: s.id + -> Subquery Scan on s + -> Seq Scan on b (9 rows) -- check join removal works when uniqueness of the join condition is enforced diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index f3696c6d1d..c27e7d4b67 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -244,3 +244,279 @@ SELECT null IS NOT DISTINCT FROM null as "yes"; t (1 row) +create table select_distinct_a(pk1 int, pk2 char(20), uk1 char(20) not null, uk2 int, e int, primary key(pk1, pk2)); +create unique index select_distinct_a_uk on select_distinct_a(uk1, uk2); +create table select_distinct_b(a int, b char(20), pk1 char(20), pk2 int, e int, primary key(pk1, pk2)); +-- distinct erased since (pk1, pk2) +explain (costs off) select distinct * from select_distinct_a; + QUERY PLAN +------------------------------- + Seq Scan on select_distinct_a +(1 row) + +-- distinct can't be reased since since we required all the uk must be not null +explain (costs off) select distinct uk1, uk2 from select_distinct_a; + QUERY PLAN +------------------------------------- + HashAggregate + Group Key: uk1, uk2 + -> Seq Scan on select_distinct_a +(3 rows) + +-- distinct ereased since uk + not null +explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 is not null; + QUERY PLAN +------------------------------- + Seq Scan on select_distinct_a + Filter: (uk2 IS NOT NULL) +(2 rows) + +explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 > 1; + QUERY PLAN +------------------------------- + Seq Scan on select_distinct_a + Filter: (uk2 > 1) +(2 rows) + +-- distinct erased due to group by +explain (costs off) select distinct e from select_distinct_a group by e; + QUERY PLAN +------------------------------------- + HashAggregate + Group Key: e + -> Seq Scan on select_distinct_a +(3 rows) + +-- distinct erased due to the restirctinfo +explain (costs off) select distinct uk1 from select_distinct_a where pk1 = 1 and pk2 = 'c'; + QUERY PLAN +-------------------------------------------------------------- + Index Scan using select_distinct_a_pkey on select_distinct_a + Index Cond: ((pk1 = 1) AND (pk2 = 'c'::bpchar)) +(2 rows) + +-- test join +set enable_mergejoin to off; +set enable_hashjoin to off; +insert into select_distinct_a values(1, 'a', 'a', 0, 1), (1, 'b', 'A', 0, 2), (3, 'c', 'c', 0, 3); +insert into select_distinct_b values(1, 'a', 'a', 0, 1), (4, 'd', 'd', 0, 4), (1, 'e', 'e', 0, 5); +-- Cartesian join +explain (costs off) select distinct a.uk1, a.uk2, b.pk1, b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null; + QUERY PLAN +--------------------------------------------- + Nested Loop + -> Seq Scan on select_distinct_b b + -> Materialize + -> Seq Scan on select_distinct_a a + Filter: (uk2 IS NOT NULL) +(5 rows) + +select distinct a.uk1, a.uk2, b.pk1, b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null order by 1, 2, 3, 4; + uk1 | uk2 | pk1 | pk2 +----------------------+-----+----------------------+----- + a | 0 | a | 0 + a | 0 | d | 0 + a | 0 | e | 0 + A | 0 | a | 0 + A | 0 | d | 0 + A | 0 | e | 0 + c | 0 | a | 0 + c | 0 | d | 0 + c | 0 | e | 0 +(9 rows) + +-- left join +explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a); + QUERY PLAN +--------------------------------------------- + Nested Loop Left Join + Join Filter: (a.pk1 = b.a) + -> Seq Scan on select_distinct_a a + -> Materialize + -> Seq Scan on select_distinct_b b +(5 rows) + +select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;; + pk1 | pk2 | pk1 | pk2 +-----+----------------------+----------------------+----- + 1 | a | a | 0 + 1 | a | e | 0 + 1 | b | a | 0 + 1 | b | e | 0 + 3 | c | | +(5 rows) + +-- right join +explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a); + QUERY PLAN +--------------------------------------------------------------------------- + Nested Loop Left Join + -> Seq Scan on select_distinct_b b + -> Index Only Scan using select_distinct_a_pkey on select_distinct_a a + Index Cond: (pk1 = b.a) +(4 rows) + +select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4; + pk1 | pk2 | pk1 | pk2 +-----+----------------------+----------------------+----- + 1 | a | a | 0 + 1 | a | e | 0 + 1 | b | a | 0 + 1 | b | e | 0 + | | d | 0 +(5 rows) + +-- full join +explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a); + QUERY PLAN +--------------------------------------------- + Hash Full Join + Hash Cond: (a.pk1 = b.a) + -> Seq Scan on select_distinct_a a + -> Hash + -> Seq Scan on select_distinct_b b +(5 rows) + +select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4; + pk1 | pk2 | pk1 | pk2 +-----+----------------------+----------------------+----- + 1 | a | a | 0 + 1 | a | e | 0 + 1 | b | a | 0 + 1 | b | e | 0 + 3 | c | | + | | d | 0 +(6 rows) + +-- distinct can't be erased since b.pk2 is missed +explain (costs off) select distinct a.pk1, a.pk2, b.pk1 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a); + QUERY PLAN +--------------------------------------------------------- + Unique + -> Sort + Sort Key: a.pk1, a.pk2, b.pk1 + -> Hash Full Join + Hash Cond: (a.pk1 = b.a) + -> Seq Scan on select_distinct_a a + -> Hash + -> Seq Scan on select_distinct_b b +(8 rows) + +-- Semi/anti join +explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 in (select a from select_distinct_b); + QUERY PLAN +------------------------------------------------------------------------- + Nested Loop + -> HashAggregate + Group Key: select_distinct_b.a + -> Seq Scan on select_distinct_b + -> Index Only Scan using select_distinct_a_pkey on select_distinct_a + Index Cond: (pk1 = select_distinct_b.a) +(6 rows) + +explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 not in (select a from select_distinct_b); + QUERY PLAN +--------------------------------------- + Seq Scan on select_distinct_a + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Seq Scan on select_distinct_b +(4 rows) + +-- we also can handle some limited subquery +explain (costs off) select distinct * from select_distinct_a a, (select a from select_distinct_b group by a) b where a.pk1 = b.a; + QUERY PLAN +---------------------------------------------------------------------- + Nested Loop + -> HashAggregate + Group Key: select_distinct_b.a + -> Seq Scan on select_distinct_b + -> Index Scan using select_distinct_a_pkey on select_distinct_a a + Index Cond: (pk1 = select_distinct_b.a) +(6 rows) + +select distinct * from select_distinct_a a, (select a from select_distinct_b group by a) b where a.pk1 = b.a order by 1, 2, 3; + pk1 | pk2 | uk1 | uk2 | e | a +-----+----------------------+----------------------+-----+---+--- + 1 | a | a | 0 | 1 | 1 + 1 | b | A | 0 | 2 | 1 +(2 rows) + +explain (costs off) select distinct * from select_distinct_a a, (select distinct a from select_distinct_b) b where a.pk1 = b.a; + QUERY PLAN +---------------------------------------------------------------------- + Nested Loop + -> HashAggregate + Group Key: select_distinct_b.a + -> Seq Scan on select_distinct_b + -> Index Scan using select_distinct_a_pkey on select_distinct_a a + Index Cond: (pk1 = select_distinct_b.a) +(6 rows) + +select distinct * from select_distinct_a a, (select distinct a from select_distinct_b) b where a.pk1 = b.a order by 1 ,2, 3; + pk1 | pk2 | uk1 | uk2 | e | a +-----+----------------------+----------------------+-----+---+--- + 1 | a | a | 0 | 1 | 1 + 1 | b | A | 0 | 2 | 1 +(2 rows) + +-- Distinct On +-- can't erase since pk2 is missed +explain (costs off) select distinct on(pk1) pk1, pk2 from select_distinct_a; + QUERY PLAN +------------------------------------------- + Unique + -> Sort + Sort Key: pk1 + -> Seq Scan on select_distinct_a +(4 rows) + +-- ok to erase +explain (costs off) select distinct on(pk1, pk2) pk1, pk2 from select_distinct_a; + QUERY PLAN +------------------------------- + Seq Scan on select_distinct_a +(1 row) + +-- test some view. +create view distinct_v1 as select distinct uk1, uk2 from select_distinct_a where uk2 is not null; +explain (costs off) select * from distinct_v1; + QUERY PLAN +------------------------------- + Seq Scan on select_distinct_a + Filter: (uk2 IS NOT NULL) +(2 rows) + +alter table select_distinct_a alter column uk1 drop not null; +explain (costs off) select * from distinct_v1; + QUERY PLAN +----------------------------------------------------------- + HashAggregate + Group Key: select_distinct_a.uk1, select_distinct_a.uk2 + -> Seq Scan on select_distinct_a + Filter: (uk2 IS NOT NULL) +(4 rows) + +alter table select_distinct_a alter column uk1 set not null; +-- test generic plan +prepare pt as select * from distinct_v1; +explain (costs off) execute pt; + QUERY PLAN +------------------------------- + Seq Scan on select_distinct_a + Filter: (uk2 IS NOT NULL) +(2 rows) + +alter table select_distinct_a alter column uk1 drop not null; +explain (costs off) execute pt; + QUERY PLAN +----------------------------------------------------------- + HashAggregate + Group Key: select_distinct_a.uk1, select_distinct_a.uk2 + -> Seq Scan on select_distinct_a + Filter: (uk2 IS NOT NULL) +(4 rows) + +drop view distinct_v1; +drop table select_distinct_a; +drop table select_distinct_b; diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index a605e86449..282ca58cf9 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -73,3 +73,87 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no"; SELECT 2 IS NOT DISTINCT FROM 2 as "yes"; SELECT 2 IS NOT DISTINCT FROM null as "no"; SELECT null IS NOT DISTINCT FROM null as "yes"; +create table select_distinct_a(pk1 int, pk2 char(20), uk1 char(20) not null, uk2 int, e int, primary key(pk1, pk2)); +create unique index select_distinct_a_uk on select_distinct_a(uk1, uk2); +create table select_distinct_b(a int, b char(20), pk1 char(20), pk2 int, e int, primary key(pk1, pk2)); + +-- distinct erased since (pk1, pk2) +explain (costs off) select distinct * from select_distinct_a; + +-- distinct can't be reased since since we required all the uk must be not null +explain (costs off) select distinct uk1, uk2 from select_distinct_a; + +-- distinct ereased since uk + not null +explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 is not null; +explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 > 1; + +-- distinct erased due to group by +explain (costs off) select distinct e from select_distinct_a group by e; + +-- distinct erased due to the restirctinfo +explain (costs off) select distinct uk1 from select_distinct_a where pk1 = 1 and pk2 = 'c'; + +-- test join +set enable_mergejoin to off; +set enable_hashjoin to off; + +insert into select_distinct_a values(1, 'a', 'a', 0, 1), (1, 'b', 'A', 0, 2), (3, 'c', 'c', 0, 3); +insert into select_distinct_b values(1, 'a', 'a', 0, 1), (4, 'd', 'd', 0, 4), (1, 'e', 'e', 0, 5); + +-- Cartesian join +explain (costs off) select distinct a.uk1, a.uk2, b.pk1, b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null; +select distinct a.uk1, a.uk2, b.pk1, b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null order by 1, 2, 3, 4; + + +-- left join +explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a); +select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;; + +-- right join +explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a); +select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4; + +-- full join +explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a); +select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4; + +-- distinct can't be erased since b.pk2 is missed +explain (costs off) select distinct a.pk1, a.pk2, b.pk1 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a); + + +-- Semi/anti join +explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 in (select a from select_distinct_b); +explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 not in (select a from select_distinct_b); + +-- we also can handle some limited subquery +explain (costs off) select distinct * from select_distinct_a a, (select a from select_distinct_b group by a) b where a.pk1 = b.a; +select distinct * from select_distinct_a a, (select a from select_distinct_b group by a) b where a.pk1 = b.a order by 1, 2, 3; + +explain (costs off) select distinct * from select_distinct_a a, (select distinct a from select_distinct_b) b where a.pk1 = b.a; +select distinct * from select_distinct_a a, (select distinct a from select_distinct_b) b where a.pk1 = b.a order by 1 ,2, 3; + +-- Distinct On +-- can't erase since pk2 is missed +explain (costs off) select distinct on(pk1) pk1, pk2 from select_distinct_a; +-- ok to erase +explain (costs off) select distinct on(pk1, pk2) pk1, pk2 from select_distinct_a; + + +-- test some view. +create view distinct_v1 as select distinct uk1, uk2 from select_distinct_a where uk2 is not null; +explain (costs off) select * from distinct_v1; + +alter table select_distinct_a alter column uk1 drop not null; +explain (costs off) select * from distinct_v1; + +alter table select_distinct_a alter column uk1 set not null; + +-- test generic plan +prepare pt as select * from distinct_v1; +explain (costs off) execute pt; +alter table select_distinct_a alter column uk1 drop not null; +explain (costs off) execute pt; + +drop view distinct_v1; +drop table select_distinct_a; +drop table select_distinct_b; -- 2.21.0