From 987c1d08bedc0c9c7436ad2daddbab2202b52425 Mon Sep 17 00:00:00 2001 From: "yizhi.fzh" Date: Thu, 9 Nov 2023 17:25:35 +0800 Subject: [PATCH v3 1/1] maintain uniquekey on baserel/joinrel level and use it for marking distinct-as-noop. --- src/backend/nodes/list.c | 17 + src/backend/optimizer/path/Makefile | 3 +- src/backend/optimizer/path/README.uniquekey | 220 +++++++ src/backend/optimizer/path/allpaths.c | 2 + src/backend/optimizer/path/equivclass.c | 48 ++ src/backend/optimizer/path/joinrels.c | 2 + src/backend/optimizer/path/uniquekey.c | 654 ++++++++++++++++++++ src/backend/optimizer/plan/initsplan.c | 8 + src/backend/optimizer/plan/planner.c | 33 + src/backend/optimizer/util/plancat.c | 10 + src/backend/optimizer/util/relnode.c | 2 + src/include/nodes/pathnodes.h | 19 + src/include/nodes/pg_list.h | 2 + src/include/optimizer/paths.h | 13 + src/test/regress/expected/join.out | 11 +- src/test/regress/expected/uniquekey.out | 268 ++++++++ src/test/regress/parallel_schedule | 2 +- src/test/regress/sql/uniquekey.sql | 104 ++++ 18 files changed, 1409 insertions(+), 9 deletions(-) create mode 100644 src/backend/optimizer/path/README.uniquekey create mode 100644 src/backend/optimizer/path/uniquekey.c create mode 100644 src/test/regress/expected/uniquekey.out create mode 100644 src/test/regress/sql/uniquekey.sql diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c index 750ee5a7e5..eaeaa19ad2 100644 --- a/src/backend/nodes/list.c +++ b/src/backend/nodes/list.c @@ -694,6 +694,23 @@ list_member_ptr(const List *list, const void *datum) return false; } +/* + * Return index of the datum in list if found. otherwise return -1. + */ +int +list_member_ptr_pos(const List *list, const void *datum) +{ + ListCell *lc; + + foreach(lc, list) + { + if (lfirst(lc) == datum) + return foreach_current_index(lc); + } + + return -1; +} + /* * Return true iff the integer 'datum' is a member of the list. */ diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile index 1e199ff66f..63cc1505d9 100644 --- a/src/backend/optimizer/path/Makefile +++ b/src/backend/optimizer/path/Makefile @@ -21,6 +21,7 @@ OBJS = \ joinpath.o \ joinrels.o \ pathkeys.o \ - tidpath.o + tidpath.o \ + uniquekey.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/path/README.uniquekey b/src/backend/optimizer/path/README.uniquekey new file mode 100644 index 0000000000..be13b113b9 --- /dev/null +++ b/src/backend/optimizer/path/README.uniquekey @@ -0,0 +1,220 @@ +Here is a design document and a part of implementation. + +What is UniqueKey? +----------------- + +UniqueKey represents a uniqueness information for a RelOptInfo. for +example: + +SELECT id FROM t; + +where the ID is the UniqueKey for the RelOptInfo (t). In the real world, +it has the following attributes: + +1). It should be EquivalenceClass based. for example: + +SELECT a FROM t WHERE a = id; + +In this case, the UniqueKey should be 1 EC with two members +- EC(EM(a), EM(id)). + + +2). Each UniqueKey may be made up with 1+ EquivalenceClass. for example: + +CREATE TABLE t(a int not null, b int not null); +CREATE UNIQUE INDEX on t(a, b); +SELECT * FROM t; + +Where the UniqueKey for RelOptInfo (t) will be 2 ECs with each one has 1 +member. + +- EC(em=a), EC(em=b) + +3). Each RelOptInfo may have 1+ UniqueKeys. + +CREATE TABLE t(a int not null, b int not null, c int not null); +CREATE UNIQUE INDEX on t(a, b); +CREATE UNIQUE INDEX on t(c); + +SELECT * FROM t; + +Where the UniqueKey for RelOptInfo (t) will be +- [EC(em=a), EC(em=b)]. +- [EC(em=c)] + +4). A special case is about the one-row case. It works like: +SELECT * FROM t WHERE id = 1; +Here every single expression in the RelOptInfo (t) is unique since only +one row here. + +Where can we use it? +-------------------- +1. mark the distinct as no-op. SELECT DISTINCT uniquekey FROM v; This + optimization has been required several times in our threads. + +2. Figure out more pathkey within the onerow case, then some planning + time can be reduced to be big extend. This user case is not absurd, + a real user case like this: + + CREATE TABLE small_t (id int primary key, b int, c int .. u int); + CREATE INDEX ON small_t(b); + CREATE INDEX ON small_t(c); + .. + + SELECT * FROM small_t s + JOIN t1 on t1.sb = s.b + JOIN T2 on t2.sc = s.c + .. + JOIN t20 on t20.su = s.u + WHERE s.id = 1; + + Without the above optimization, we don't know s.b /s.c is ordered + already, so it might keep more different paths for small_t because of + they have different interesting pathkey, and use more planning time + for sorting to support merge join. + + With the above optimization, the planning time should be reduced since + the seq scan can produce a ordered result for every expression. + +3. Figure out more interesting pathkey after join with normal UniqueKey. + + CREATE TABLE t(id int primary key, b int, c int); + CREATE INDEX on t(c); + ANALYZE t; + + explain (costs off) + select t1.id, t2.c from t t1 + join t1 t2 on t1.id = t2.b + and t2.c > 3 + order by t1.id, t2.c; + + QUERY PLAN + -------------------------------------------------- + Sort Key: t1.id, t2.c <--- this sort can be avoided actually. + -> Nested Loop + Join Filter: (t1.id = t2.b) + -> Index Only Scan using t_pkey on t t1 + -> Index Scan using t1_c_idx on t1 t2 + Index Cond: (c > 3) + + *Without knowing the t1.id is unique*, which means there are some + duplicated data in t1.id, the duplication data in t1 will break the + order of (t1.id, t2.c), but if we know the t1.id is unique, the sort + will be not needed. I'm pretty happy with this finding. + +4. Optimize some group by case, like + + SELECT id, sum(b) FROM t GROUP BY id + is same with + SELECT id, b from t; + + I'm not sure how often it is in the real life, I'm not so excited with + this for now. + + +How to present ECs in UniqueKey? +-------------------------------- + +I choose "Bitmapset *eclass_indexes;" finally, which is because +Bitmapset is memory compact and good at bms_union, bms_is_subset +stuffs. The value in the bitmap is the positions in root->eq_classes. It +is also be able to present the UniqueKey which is made up from multi +relations or upper relation. + +How to present single row in UniqueKey +------------------------------------- + +I just use a 'Index relid', an non-zero value means the +RelOptInfo[relid] is single row. For the case like + +SELECT * FROM t WHERE id = 1; +The UniqueKey is: +- UniqueKey(eclass_indexes=NULL, relid=1) + +during a join, any unique keys join with single row, it's uniqueness can +be kept. + +SELECT t1.uk, t2.a FROM t WHERE t2.id = 1 and any-qual(t1, t2); +- UniqueKey (t1.uk) + +more specially, join two single row like: + +SELECT * FROM t1 join t2 on true where t1.id = 1 and t2.id = 2; + +the UniqueKey for the JoinRel will be: +- UniqueKey(eclass_indexes=NULL, relid=1) +- UniqueKey(eclass_indexes=NULL, relid=2) + +However, the current single row presentation can't works well with Upper +relation, which I think it would be acceptable. See the following case: + +SELECT count(*) FROM t1 JOIN t2 on true; + + +How to maintain the uniquekey? +------------------------------- +the uniquekey is maintained from baserel to join rel then to upper +relation. In the base rel, it comes from unique index. From the join +relation, it is maintained with two rules: + +- the uniquekey in one side is still unique if it can't be duplicated + after the join. for example: + + SELECT t1.pk FROM t1 JOIN t2 ON t1.a = t2.pk; + UniqueKey on t1: t1.pk + UniqueKey on t1 Join t2: t1.pk + +- The combined unique key from both sides are unique all the times. + SELECT t1.pk , t2.pk FROM t1 join t2 on true; + UniqueKey on t1 join t2: (t1.pk, t2.pk) + +Some other operations like DISTINCT, GROUP BY can produce UniqueKey as well. + +NULL values +----------- +notnullattrs are added into RelOptInfo, which present if these attributes +may not be NULL after the baserestrictinfo is executed. not-null-attributes +may be generated by not-null constraint in catalog or baserestrictinfo +(only) filter. However it is possible become NULLs because of outer +join, then Var.varnullingrels is used in this case. see +'var_is_nullable' function call. + +To simplify the UniqueKey module, it doesn't care about the null values +during the maintaining, which means it may contains multi NULL values +all the time by design. However whenever a user case care about that, +the user case can get the answer with the above logic, that is what +'mark-distinct-as-noop' does. + +How to reduce the overhead +---------------------------------- +UniqueKey employs the similar strategy like PathKey, it only maintain +the interesting PathKey. Currently the interesting UniqueKey includes: +1). It is subset of distinct_pathkeys. +2). It is used in mergeable join clauses for unique key deduction (for +the join rel case, rule 1). In this case, it can be discarded quickly if +the join has been done. + +To avoid to check if an uniquekey is subset of distinct clause again and +again, I cached the result into UnqiueKey struct during the UniqueKey +creation. + +Since our first goal is just used for marking distinct as no-op, so if +there is no distinct clause at all, unique key will be not maintained at +the beginning. so we can have some codes like: + +if (root->distinct_pathkeys == NULL) +return; + +This fast path is NOT added for now for better code coverage. + +What I have now: +---------------- + +The current patch just maintain the UniqueKey at the baserel/joinrel level +and used it for mark-distinct-as-noop purpose. including the basic idea of + +- How the UniqueKey is defined. +- How to find out the interesting pathkey in the baserel/joinrel level. +- How to figure out the unique key contains NULL values. + +Also the test cases are prepared, see uniquekey.sql. diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 67921a0826..a1677924af 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -458,6 +458,8 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel, } } + populate_baserel_uniquekeys(root, rel); + /* * We insist that all non-dummy rels have a nonzero rowcount estimate. */ diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 7fa502d6e2..29edc8d1b5 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -744,6 +744,54 @@ get_eclass_for_sort_expr(PlannerInfo *root, return newec; } +/* + * find_ec_position_matching_expr + * Locate the position of EquivalenceClass whose members matching + * the given expr, if any; return -1 if no match. + */ +int +find_ec_position_matching_expr(PlannerInfo *root, + Expr *expr, + RelOptInfo *baserel) +{ + int i = -1; + + while ((i = bms_next_member(baserel->eclass_indexes, i)) >= 0) + { + EquivalenceClass *ec = list_nth(root->eq_classes, i); + + if (find_ec_member_matching_expr(ec, expr, baserel->relids)) + return i; + } + return -1; +} + +/* + * build_ec_positions_for_exprs + * + * Given a list of exprs, find the related EC positions for each of + * them. if any exprs has no EC related, return NULL; + */ +Bitmapset * +build_ec_positions_for_exprs(PlannerInfo *root, List *exprs, RelOptInfo *rel) +{ + ListCell *lc; + Bitmapset *res = NULL; + + foreach(lc, exprs) + { + int pos = find_ec_position_matching_expr(root, lfirst(lc), rel); + + if (pos < 0) + { + bms_free(res); + return NULL; + } + res = bms_add_member(res, pos); + } + return res; +} + /* * find_ec_member_matching_expr * Locate an EquivalenceClass member matching the given expr, if any; diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index d03ace50a1..ee6ec4b68f 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -743,6 +743,8 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) sjinfo, pushed_down_joins, &restrictlist); + populate_joinrel_uniquekeys(root, joinrel, rel1, rel2, restrictlist, sjinfo->jointype); + /* * If we've already proven this join is empty, we needn't consider any * more paths for it. diff --git a/src/backend/optimizer/path/uniquekey.c b/src/backend/optimizer/path/uniquekey.c new file mode 100644 index 0000000000..ecc7c6d802 --- /dev/null +++ b/src/backend/optimizer/path/uniquekey.c @@ -0,0 +1,654 @@ +/*------------------------------------------------------------------------- + * + * uniquekey.c + * Utilities for maintaining uniquekey. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/optimizer/path/uniquekey.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/sysattr.h" +#include "nodes/nodeFuncs.h" +#include "nodes/pathnodes.h" +#include "optimizer/optimizer.h" +#include "optimizer/paths.h" + + +/* Functions to populate UniqueKey */ +static bool add_uniquekey_for_uniqueindex(PlannerInfo *root, + IndexOptInfo *unique_index, + List *truncatable_exprs, + List *expr_opfamilies); +static bool unique_ecs_useful_for_distinct(PlannerInfo *root, Bitmapset *ec_indexes); + +/* Helper functions to create UniqueKey. */ +static UniqueKey * make_uniquekey(Bitmapset *eclass_indexes, + bool useful_for_distinct); +static void mark_rel_singlerow(RelOptInfo *rel, int relid); + +static UniqueKey * rel_singlerow_uniquekey(RelOptInfo *rel); +static bool uniquekey_contains_multinulls(PlannerInfo *root, RelOptInfo *rel, UniqueKey * ukey); + +/* Debug only */ +static void print_uniquekey(PlannerInfo *root, RelOptInfo *rel); + +static bool uniquekey_contains_in(PlannerInfo *root, UniqueKey * ukey, Bitmapset *ecs, Relids relids); +static bool is_uniquekey_useful_afterjoin(PlannerInfo *root, UniqueKey * ukey, RelOptInfo *joinrel); + +/* + * populate_baserel_uniquekeys + * + * UniqueKey on baserel comes from unique indexes. Any expression + * which equals with Const can be stripped and the left expressions are + * still unique. + */ +void +populate_baserel_uniquekeys(PlannerInfo *root, RelOptInfo *rel) +{ + ListCell *lc; + List *truncatable_exprs = NIL, + *expr_opfamilies = NIL; + + /* + * Currently we only use UniqueKey for mark-distinct-as-noop case, so if + * there is no-distinct-clause at all, we can ignore the maintenance at + * the first place. however for code coverage at the development stage, we + * bypass this fastpath on purpose. + * + * XXX: even we want this fastpath, we still need to distinguish even the + * current subquery has no DISTINCT, but the upper query may have. + */ + + /* + * if (root->distinct_pathkeys == NIL) return; + */ + foreach(lc, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + if (rinfo->mergeopfamilies == NIL) + continue; + + if (!IsA(rinfo->clause, OpExpr)) + continue; + + if (bms_is_empty(rinfo->left_relids)) + truncatable_exprs = lappend(truncatable_exprs, get_rightop(rinfo->clause)); + else if (bms_is_empty(rinfo->right_relids)) + truncatable_exprs = lappend(truncatable_exprs, get_leftop(rinfo->clause)); + else + continue; + + expr_opfamilies = lappend(expr_opfamilies, rinfo->mergeopfamilies); + } + + foreach(lc, rel->indexlist) + { + IndexOptInfo *index = (IndexOptInfo *) lfirst(lc); + + if (!index->unique || !index->immediate || + (index->indpred != NIL && !index->predOK)) + continue; + + if (add_uniquekey_for_uniqueindex(root, index, + truncatable_exprs, + expr_opfamilies)) + /* Find a singlerow case, no need to go through other indexes. */ + return; + } + + print_uniquekey(root, rel); +} + + +/* + * add_uniquekey_for_uniqueindex + * + * populate a UniqueKey if it is interesting, return true iff the + * UniqueKey is an SingleRow. Only the interesting UniqueKeys are kept. + */ +static bool +add_uniquekey_for_uniqueindex(PlannerInfo *root, IndexOptInfo *unique_index, + List *truncatable_exprs, List *expr_opfamilies) +{ + List *unique_exprs = NIL; + Bitmapset *unique_ecs = NULL; + ListCell *indexpr_item; + RelOptInfo *rel = unique_index->rel; + bool used_for_distinct; + int c; + + indexpr_item = list_head(unique_index->indexprs); + + for (c = 0; c < unique_index->nkeycolumns; c++) + { + int attr = unique_index->indexkeys[c]; + Expr *expr; /* The candidate for UniqueKey expression. */ + bool matched_const = false; + ListCell *lc1, + *lc2; + + if (attr > 0) + { + expr = list_nth_node(TargetEntry, unique_index->indextlist, c)->expr; + } + else if (attr == 0) + { + /* Expression index */ + expr = lfirst(indexpr_item); + indexpr_item = lnext(unique_index->indexprs, indexpr_item); + } + else /* attr < 0 */ + { + /* Index on OID is possible, not handle it for now. */ + return false; + } + + /* Ignore the expr which are equals to const. */ + forboth(lc1, truncatable_exprs, lc2, expr_opfamilies) + { + if (list_member_oid((List *) lfirst(lc2), unique_index->opfamily[c]) && + match_index_to_operand((Node *) lfirst(lc1), c, unique_index)) + { + matched_const = true; + break; + } + } + + if (matched_const) + continue; + + unique_exprs = lappend(unique_exprs, expr); + } + + if (unique_exprs == NIL) + { + /* single row is always interesting. */ + mark_rel_singlerow(rel, rel->relid); + return true; + } + + /* + * if no EquivalenceClass is found for any exprs in unique exprs, we are + * sure the whole exprs are not in the DISTINCT clause or mergeable join + * clauses. so it is not interesting. + */ + unique_ecs = build_ec_positions_for_exprs(root, unique_exprs, rel); + if (unique_ecs == NULL) + return false; + + used_for_distinct = unique_ecs_useful_for_distinct(root, unique_ecs); + + + rel->uniquekeys = lappend(rel->uniquekeys, + make_uniquekey(unique_ecs, + used_for_distinct)); + return false; +} + +/* + * make_uniquekey + * Based on UnqiueKey rules, it is impossible for a UnqiueKey + * which have eclass_indexes and relid both set. This function just + * handle eclass_indexes case. + */ +static UniqueKey * +make_uniquekey(Bitmapset *eclass_indexes, bool useful_for_distinct) +{ + UniqueKey *ukey = makeNode(UniqueKey); + + ukey->eclass_indexes = eclass_indexes; + ukey->relid = 0; + ukey->use_for_distinct = useful_for_distinct; + return ukey; +} + +/* + * mark_rel_singlerow + * mark a relation as singlerow. + */ +static void +mark_rel_singlerow(RelOptInfo *rel, int relid) +{ + UniqueKey *ukey = makeNode(UniqueKey); + + ukey->relid = relid; + rel->uniquekeys = list_make1(ukey); +} + +static inline bool +uniquekey_is_singlerow(UniqueKey * ukey) +{ + return ukey->relid != 0; +} + +/* + * + * Return the UniqueKey if rel is a singlerow Relation. othwise + * return NULL. + */ +static UniqueKey * +rel_singlerow_uniquekey(RelOptInfo *rel) +{ + if (rel->uniquekeys != NIL) + { + UniqueKey *ukey = linitial_node(UniqueKey, rel->uniquekeys); + + if (ukey->relid) + return ukey; + } + return NULL; +} + +/* + * print_uniquekey + * Used for easier reivew, should be removed before commit. + */ +static void +print_uniquekey(PlannerInfo *root, RelOptInfo *rel) +{ + if (!enable_geqo) + { + ListCell *lc; + + elog(INFO, "Rel = %s", bmsToString(rel->relids)); + foreach(lc, rel->uniquekeys) + { + UniqueKey *ukey = lfirst_node(UniqueKey, lc); + + elog(INFO, "UNIQUEKEY{indexes=%s, singlerow_rels=%d, use_for_distinct=%d}", + bmsToString(ukey->eclass_indexes), + ukey->relid, + ukey->use_for_distinct); + } + } +} + +/* + * is it possible that the var contains multi NULL values in the given + * RelOptInfo rel? + */ +static bool +var_is_nullable(PlannerInfo *root, Var *var, RelOptInfo *rel) +{ + RelOptInfo *base_rel; + + /* check if the outer join can add the NULL values. */ + if (bms_overlap(var->varnullingrels, rel->relids)) + return true; + + /* check if the user data has the NULL values. */ + base_rel = root->simple_rel_array[var->varno]; + return !bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber, base_rel->notnullattrs); +} + + +/* + * uniquekey_contains_multinulls + * + * Check if the uniquekey contains nulls values. + */ +static bool +uniquekey_contains_multinulls(PlannerInfo *root, RelOptInfo *rel, UniqueKey * ukey) +{ + int i = -1; + + while ((i = bms_next_member(ukey->eclass_indexes, i)) >= 0) + { + EquivalenceClass *ec = list_nth_node(EquivalenceClass, root->eq_classes, i); + ListCell *lc; + + foreach(lc, ec->ec_members) + { + EquivalenceMember *em = lfirst_node(EquivalenceMember, lc); + Var *var; + + var = (Var *) em->em_expr; + + if (!IsA(var, Var)) + continue; + + if (var_is_nullable(root, var, rel)) + return true; + else + + /* + * If any one of member in the EC is not nullable, we all the + * members are not nullable since they are equal with each + * other. + */ + break; + } + } + + return false; +} + + +/* + * relation_is_distinct_for + * + * Check if the rel is distinct for distinct_pathkey. + */ +bool +relation_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *distinct_pathkey) +{ + ListCell *lc; + UniqueKey *singlerow_ukey = rel_singlerow_uniquekey(rel); + Bitmapset *pathkey_bm = NULL; + + if (singlerow_ukey) + { + return !uniquekey_contains_multinulls(root, rel, singlerow_ukey); + } + + foreach(lc, distinct_pathkey) + { + PathKey *pathkey = lfirst_node(PathKey, lc); + int pos = list_member_ptr_pos(root->eq_classes, pathkey->pk_eclass); + + if (pos == -1) + return false; + + pathkey_bm = bms_add_member(pathkey_bm, pos); + } + + foreach(lc, rel->uniquekeys) + { + UniqueKey *ukey = lfirst_node(UniqueKey, lc); + + if (bms_is_subset(ukey->eclass_indexes, pathkey_bm) && + !uniquekey_contains_multinulls(root, rel, ukey)) + return true; + } + + return false; +} + +/* + * unique_ecs_useful_for_distinct + * + * Return true if all the EquivalenceClass for ecs exists in root->distinct_pathkey. + */ +static bool +unique_ecs_useful_for_distinct(PlannerInfo *root, Bitmapset *ec_indexes) +{ + int i = -1; + + while ((i = bms_next_member(ec_indexes, i)) >= 0) + { + EquivalenceClass *ec = list_nth(root->eq_classes, i); + ListCell *p; + bool found = false; + + foreach(p, root->distinct_pathkeys) + { + PathKey *pathkey = lfirst_node(PathKey, p); + + if (ec == pathkey->pk_eclass) + { + found = true; + break; + } + } + if (!found) + return false; + } + return true; +} + +/* + * populate_joinrel_uniquekey_for_rel + * + * Check if the pattern of rel.any_column = other_rel.unique_key_column + * exists, if so, the uniquekey in rel is still valid after join and it is + * added into joinrel and return true. otherwise return false. + */ +static bool +populate_joinrel_uniquekey_for_rel(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *rel, RelOptInfo *other_rel, + List *restrictlist) +{ + bool rel_keep_unique = false; + Bitmapset *other_ecs = NULL; + Relids other_relids = NULL; + ListCell *lc; + + if (rel_singlerow_uniquekey(other_rel)) + { + /* + * any uniquekeys stuff join with single-row, its uniqueness is still + * kept. + */ + goto done; + } + + /* find out the ECs which the rel.any_columns equals to. */ + foreach(lc, restrictlist) + { + RestrictInfo *r = lfirst_node(RestrictInfo, lc); + + if (r->mergeopfamilies == NIL) + continue; + + /* Build the Bitmapset for easy comparing. */ + if (bms_equal(r->left_relids, rel->relids) && r->right_ec != NULL) + { + other_ecs = bms_add_member(other_ecs, list_member_ptr_pos(root->eq_classes, r->right_ec)); + other_relids = bms_add_members(other_relids, r->right_relids); + } + else if (bms_equal(r->right_relids, rel->relids) && r->left_ec != NULL) + { + other_ecs = bms_add_member(other_ecs, list_member_ptr_pos(root->eq_classes, r->left_ec)); + other_relids = bms_add_members(other_relids, r->left_relids); + } + } + + /* Check if these ECs include a uniquekey of other_rel */ + foreach(lc, other_rel->uniquekeys) + { + UniqueKey *ukey = lfirst_node(UniqueKey, lc); + + if (uniquekey_contains_in(root, ukey, other_ecs, other_relids)) + { + rel_keep_unique = true; + break; + } + } + + if (!rel_keep_unique) + return false; + +done: + + /* + * Now copy the uniquekey in rel to joinrel, but first we need to know if + * it is useful. + */ + foreach(lc, rel->uniquekeys) + { + UniqueKey *ukey = lfirst_node(UniqueKey, lc); + + if (is_uniquekey_useful_afterjoin(root, ukey, joinrel)) + { + if (uniquekey_is_singlerow(ukey)) + { + /* + * XXX (?): a). NULL values. b). other relids rather than + * ukey->relid. + */ + mark_rel_singlerow(joinrel, ukey->relid); + break; + } + joinrel->uniquekeys = lappend(joinrel->uniquekeys, ukey); + } + } + + return true; +} + +/* + * populate_joinrel_uniquekeys + */ +void +populate_joinrel_uniquekeys(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, JoinType jointype) +{ + bool outeruk_still_valid = false, + inneruk_still_valid = false; + ListCell *lc, + *lc2; + + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + { + foreach(lc, outerrel->uniquekeys) + { + /* + * the uniquekey on the outer side is not changed after semi/anti + * join. + */ + joinrel->uniquekeys = lappend(joinrel->uniquekeys, lfirst(lc)); + } + return; + } + + if (outerrel->uniquekeys == NIL || innerrel->uniquekeys == NIL) + return; + + outeruk_still_valid = populate_joinrel_uniquekey_for_rel(root, joinrel, outerrel, + innerrel, restrictlist); + inneruk_still_valid = populate_joinrel_uniquekey_for_rel(root, joinrel, innerrel, + outerrel, restrictlist); + + if (outeruk_still_valid || inneruk_still_valid) + + /* + * the uniquekey on outers or inners have been added into joinrel so + * the combined uniuqekey from both sides is not needed. + */ + return; + + /* + * The combined UniqueKey is still unique no matter the join method or + * join clauses. So let build the combined ones. + */ + foreach(lc, outerrel->uniquekeys) + { + UniqueKey *outer_ukey = lfirst(lc); + + if (!is_uniquekey_useful_afterjoin(root, outer_ukey, joinrel)) + /* discard the uniquekey which is not interesting. */ + continue; + + /* singlerow will make the inneruk_still_valid true */ + Assert(!uniquekey_is_singlerow(outer_ukey)); + + foreach(lc2, innerrel->uniquekeys) + { + UniqueKey *inner_ukey = lfirst(lc2); + + if (!is_uniquekey_useful_afterjoin(root, inner_ukey, joinrel)) + continue; + + /* singlerow will make the outeruk_still_valid true */ + Assert(!uniquekey_is_singlerow(inner_ukey)); + + joinrel->uniquekeys = lappend(joinrel->uniquekeys, + make_uniquekey( + bms_union(outer_ukey->eclass_indexes, inner_ukey->eclass_indexes), + outer_ukey->use_for_distinct || inner_ukey->use_for_distinct)); + } + } +} + +/* + * uniquekey_contains_in + * Return if UniqueKey contains in the list of EquivalenceClass + * or the UniqueKey's SingleRow contains in relids. + */ +static bool +uniquekey_contains_in(PlannerInfo *root, UniqueKey * ukey, Bitmapset *ecs, Relids relids) +{ + + if (uniquekey_is_singlerow(ukey)) + { + return bms_is_member(ukey->relid, relids); + } + + return bms_is_subset(ukey->eclass_indexes, ecs); +} + + +/* + * uniquekey_useful_for_merging + * Check if the uniquekey is useful for mergejoins above the given relation. + * + * similar with pathkeys_useful_for_merging. + */ +static bool +uniquekey_useful_for_merging(PlannerInfo *root, UniqueKey * ukey, RelOptInfo *rel) +{ + + int i = -1; + + while ((i = bms_next_member(ukey->eclass_indexes, i)) >= 0) + { + EquivalenceClass *ec = list_nth(root->eq_classes, i); + ListCell *j; + bool matched = false; + + if (rel->has_eclass_joins && eclass_useful_for_merging(root, ec, rel)) + { + matched = true; + } + else + { + foreach(j, rel->joininfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j); + + if (restrictinfo->mergeopfamilies == NIL) + continue; + update_mergeclause_eclasses(root, restrictinfo); + + if (ec == restrictinfo->left_ec || ec == restrictinfo->right_ec) + { + matched = true; + break; + } + } + } + + if (!matched) + return false; + } + + return true; +} + +/* + * is_uniquekey_useful_afterjoin + * + * uniquekey is useful when it contains in distinct_pathkey or in mergable join clauses. + */ +static bool +is_uniquekey_useful_afterjoin(PlannerInfo *root, UniqueKey * ukey, RelOptInfo *joinrel) +{ + if (ukey->use_for_distinct) + return true; + + /* XXX might needs a better judgement */ + if (uniquekey_is_singlerow(ukey)) + return true; + + + return uniquekey_useful_for_merging(root, ukey, joinrel); +} diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index b31d892121..1ed02d6eb2 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -2647,6 +2647,14 @@ distribute_restrictinfo_to_rels(PlannerInfo *root, /* Add clause to rel's restriction list */ rel->baserestrictinfo = lappend(rel->baserestrictinfo, restrictinfo); + { + List *not_null_vars = find_nonnullable_vars((Node *) restrictinfo->clause); + + if (not_null_vars != NIL) + rel->notnullattrs = bms_union(rel->notnullattrs, + list_nth(not_null_vars, rel->relid)); + } + /* Update security level info */ rel->baserestrict_min_security = Min(rel->baserestrict_min_security, restrictinfo->security_level); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a8cea5efe1..3ac2deaa21 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1663,9 +1663,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) gset_data); /* Fix things up if grouping_target contains SRFs */ if (parse->hasTargetSRFs) + { adjust_paths_for_srfs(root, current_rel, grouping_targets, grouping_targets_contain_srfs); + current_rel->uniquekeys = NIL; + } } /* @@ -1725,6 +1728,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * Now we are prepared to build the final-output upperrel. */ final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); + /* simple_copy_uniquekeys(final_rel, current_rel); */ /* * If the input rel is marked consider_parallel and there's nothing that's @@ -4043,6 +4047,22 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, gd, extra->targetList); + if (root->parse->groupingSets) + { + /* nothing to do */ + } + else if (root->parse->groupClause && root->group_pathkeys != NIL) + { + /* + * populate_uniquekeys_from_pathkeys(root, grouped_rel, + * root->group_pathkeys); + */ + } + else + { + /* SingleRow Case */ + } + /* Build final grouping paths */ add_paths_to_grouping_rel(root, input_rel, grouped_rel, partially_grouped_rel, agg_costs, gd, @@ -4662,9 +4682,22 @@ create_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel) { RelOptInfo *distinct_rel; + /* + * distinct_pathkeys may be NIL if it distinctClause is not sortable. XXX: + * What should we do for the else? + */ + if (root->distinct_pathkeys && + relation_is_distinct_for(root, input_rel, root->distinct_pathkeys)) + return input_rel; + /* For now, do all work in the (DISTINCT, NULL) upperrel */ distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL); + /* + * populate_uniquekeys_from_pathkeys(root, distinct_rel, + * root->distinct_pathkeys); + */ + /* * We don't compute anything at this level, so distinct_rel will be * parallel-safe if the input rel is parallel-safe. In particular, if diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 7159c775fb..665d3e890c 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -121,6 +121,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, Relation relation; bool hasindex; List *indexinfos = NIL; + Index i; /* * We need not lock the relation since it was already locked, either by @@ -175,6 +176,15 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, /* Retrieve the parallel_workers reloption, or -1 if not set. */ rel->rel_parallel_workers = RelationGetParallelWorkers(relation, -1); + for (i = 0; i < relation->rd_att->natts; i++) + { + FormData_pg_attribute attr = relation->rd_att->attrs[i]; + + if (attr.attnotnull) + rel->notnullattrs = bms_add_member(rel->notnullattrs, + attr.attnum - FirstLowInvalidHeapAttributeNumber); + } + /* * Make list of indexes. Ignore indexes on system catalogs if told to. * Don't bother with indexes from traditional inheritance parents. For diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 5d83f60eb9..2c9e1fe016 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -284,6 +284,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->all_partrels = NULL; rel->partexprs = NULL; rel->nullable_partexprs = NULL; + rel->uniquekeys = NIL; /* * Pass assorted information down the inheritance hierarchy. @@ -745,6 +746,7 @@ build_join_rel(PlannerInfo *root, joinrel->all_partrels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; + joinrel->uniquekeys = NIL; /* Compute information relevant to the foreign relations. */ set_foreign_rel_properties(joinrel, outer_rel, inner_rel); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index ed85dc7414..301f7e89d7 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -876,6 +876,7 @@ typedef struct RelOptInfo * Vars/Exprs, cost, width */ struct PathTarget *reltarget; + List *uniquekeys; /* A list of UniqueKey. */ /* * materialization information @@ -913,6 +914,9 @@ typedef struct RelOptInfo Relids *attr_needed pg_node_attr(read_write_ignore); /* array indexed [min_attr .. max_attr] */ int32 *attr_widths pg_node_attr(read_write_ignore); + + /* The not null attrs from catalogs or baserestrictinfo. */ + Bitmapset *notnullattrs; /* relids of outer joins that can null this baserel */ Relids nulling_relids; /* LATERAL Vars and PHVs referenced by rel */ @@ -1456,6 +1460,21 @@ typedef struct PathKey bool pk_nulls_first; /* do NULLs come before normal values? */ } PathKey; + +typedef struct UniqueKey +{ + pg_node_attr(no_read, no_query_jumble) + + NodeTag type; + Bitmapset *eclass_indexes; /* Indexes in PlannerInfo's eq_class list of + * ECs that is unique for a RelOptInfo. */ + int relid; + bool use_for_distinct; /* true if it is used in distinct-pathkey, + * in this case we would never check if we + * should discard it during join search. */ +} UniqueKey; + + /* * VolatileFunctionStatus -- allows nodes to cache their * contain_volatile_functions properties. VOLATILITY_UNKNOWN means not yet diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h index 529a382d28..1be53fe7f7 100644 --- a/src/include/nodes/pg_list.h +++ b/src/include/nodes/pg_list.h @@ -581,6 +581,8 @@ extern bool list_member_int(const List *list, int datum); extern bool list_member_oid(const List *list, Oid datum); extern bool list_member_xid(const List *list, TransactionId datum); +extern int list_member_ptr_pos(const List *list, const void *datum); + extern pg_nodiscard List *list_delete(List *list, void *datum); extern pg_nodiscard List *list_delete_ptr(List *list, void *datum); extern pg_nodiscard List *list_delete_int(List *list, int datum); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9e7408c7ec..14649a6bc4 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -139,11 +139,17 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root, extern EquivalenceMember *find_ec_member_matching_expr(EquivalenceClass *ec, Expr *expr, Relids relids); +extern int find_ec_position_matching_expr(PlannerInfo *root, + Expr *expr, + RelOptInfo *rel); extern EquivalenceMember *find_computable_ec_member(PlannerInfo *root, EquivalenceClass *ec, List *exprs, Relids relids, bool require_parallel_safe); +extern Bitmapset *build_ec_positions_for_exprs(PlannerInfo *root, + List *exprs, + RelOptInfo *rel); extern bool relation_can_be_sorted_early(PlannerInfo *root, RelOptInfo *rel, EquivalenceClass *ec, bool require_parallel_safe); @@ -262,4 +268,11 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root, extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *live_childrels); +extern bool relation_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, + List *distinct_pathkey); +extern void populate_baserel_uniquekeys(PlannerInfo *root, + RelOptInfo *baserel); +extern void populate_joinrel_uniquekeys(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, JoinType jointype); #endif /* PATHS_H */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 892ea5f170..0fb7faec12 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -5645,18 +5645,15 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s explain (costs off) select d.* from d left join (select distinct * from b) s on d.a = s.id; - QUERY PLAN --------------------------------------- + QUERY PLAN +------------------------------------ Merge Right Join Merge Cond: (b.id = d.a) - -> Unique - -> Sort - Sort Key: b.id, b.c_id - -> Seq Scan on b + -> Index Scan using b_pkey on b -> Sort Sort Key: d.a -> Seq Scan on d -(9 rows) +(6 rows) -- join removal is not possible here explain (costs off) diff --git a/src/test/regress/expected/uniquekey.out b/src/test/regress/expected/uniquekey.out new file mode 100644 index 0000000000..aa712c2dd7 --- /dev/null +++ b/src/test/regress/expected/uniquekey.out @@ -0,0 +1,268 @@ +CREATE TABLE uk_t (id int primary key, a int not null, b int not null, c int, d int, e int); +explain (costs off) +select distinct id from uk_t; + QUERY PLAN +------------------ + Seq Scan on uk_t +(1 row) + +explain (costs off) +select distinct e from uk_t where id = e; + QUERY PLAN +-------------------- + Seq Scan on uk_t + Filter: (id = e) +(2 rows) + +create unique index on uk_t (a, b); +create unique index on uk_t (c, d); +explain (costs off) +select distinct a, b from uk_t; + QUERY PLAN +------------------ + Seq Scan on uk_t +(1 row) + +explain (costs off) +select distinct c, d from uk_t; + QUERY PLAN +------------------------ + HashAggregate + Group Key: c, d + -> Seq Scan on uk_t +(3 rows) + +explain (costs off) +select distinct c, d from uk_t +where c > 0 and d > 0; + QUERY PLAN +------------------------------------------- + Bitmap Heap Scan on uk_t + Recheck Cond: ((c > 0) AND (d > 0)) + -> Bitmap Index Scan on uk_t_c_d_idx + Index Cond: ((c > 0) AND (d > 0)) +(4 rows) + +explain (costs off) +select distinct d from uk_t +where c > 1 and d > 0; + QUERY PLAN +------------------------------------------------- + HashAggregate + Group Key: d + -> Bitmap Heap Scan on uk_t + Recheck Cond: ((c > 1) AND (d > 0)) + -> Bitmap Index Scan on uk_t_c_d_idx + Index Cond: ((c > 1) AND (d > 0)) +(6 rows) + +explain (costs off) +select distinct d from uk_t +where c = 1 and d > 0; + QUERY PLAN +------------------------------------------- + Bitmap Heap Scan on uk_t + Recheck Cond: ((c = 1) AND (d > 0)) + -> Bitmap Index Scan on uk_t_c_d_idx + Index Cond: ((c = 1) AND (d > 0)) +(4 rows) + +-- test the join case -- +EXPLAIN (COSTS OFF) +SELECT distinct t1.id FROM uk_t t1 JOIN uk_t t2 ON t1.e = t2.id; + QUERY PLAN +--------------------------------- + Hash Join + Hash Cond: (t1.e = t2.id) + -> Seq Scan on uk_t t1 + -> Hash + -> Seq Scan on uk_t t2 +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT distinct t1.id FROM uk_t t1 JOIN uk_t t2 ON t1.e = t2.a and t1.d = t2.b; + QUERY PLAN +------------------------------------------------ + Hash Join + Hash Cond: ((t1.e = t2.a) AND (t1.d = t2.b)) + -> Seq Scan on uk_t t1 + -> Hash + -> Seq Scan on uk_t t2 +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT distinct t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON t1.e = t2.id; + QUERY PLAN +--------------------- + Seq Scan on uk_t t1 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT distinct t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON t1.e = t2.a and t1.d = t2.b; + QUERY PLAN +--------------------- + Seq Scan on uk_t t1 +(1 row) + +-- outer join makes null values so distinct can't be a no-op. +EXPLAIN (COSTS OFF) +SELECT distinct t1.id FROM uk_t t1 RIGHT JOIN uk_t t2 ON t1.e = t2.id; + QUERY PLAN +--------------------------------------- + HashAggregate + Group Key: t1.id + -> Hash Right Join + Hash Cond: (t1.e = t2.id) + -> Seq Scan on uk_t t1 + -> Hash + -> Seq Scan on uk_t t2 +(7 rows) + +-- single row case. +-- single row is unqiue all the time. +-- case 1: +-- t1.d is single row at base rel level, and because of t1.e = t2.id, it is +-- still single row after join. +EXPLAIN (COSTS OFF) +SELECT distinct t1.d FROM uk_t t1 JOIN uk_t t2 ON t1.e = t2.id and t1.id = 1; + QUERY PLAN +-------------------------------------------------- + Nested Loop + -> Index Scan using uk_t_pkey on uk_t t1 + Index Cond: (id = 1) + -> Index Only Scan using uk_t_pkey on uk_t t2 + Index Cond: (id = t1.e) +(5 rows) + +-- case 2 +-- any uniquekey which join with single row whose uniqueness can be kept. +-- t1.d is single row at base rel level because of t1.id = 1 +-- so t2's uniquekey can be kept when join with t2.any_column. +EXPLAIN (COSTS OFF) +SELECT distinct t2.id FROM uk_t t1 JOIN uk_t t2 ON t2.e = t1.e and t1.id = 1; + QUERY PLAN +--------------------------------------------------- + Hash Join + Hash Cond: (t2.e = t1.e) + -> Seq Scan on uk_t t2 + -> Hash + -> Index Scan using uk_t_pkey on uk_t t1 + Index Cond: (id = 1) +(6 rows) + +insert into uk_t SELECT 1, 2, 3, 4, 5, 6; +-- combined uniquekey cases. +-- the combinations of uniquekeys from the 2 sides is still unique +-- no matter the join method and join clauses. +EXPLAIN (COSTS OFF) +SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON true; + QUERY PLAN +--------------------------------- + Nested Loop + -> Seq Scan on uk_t t1 + -> Materialize + -> Seq Scan on uk_t t2 +(4 rows) + +SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON true; + id | id +----+---- + 1 | 1 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON false; + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) + +SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON false; + id | id +----+---- +(0 rows) + +-- BAD CASE: the distinct should be marked as noop in the below 4 cases, but +-- since it is nullable by outer join, so it can't be removed. However the rule +-- here should be if both uniquekey are not nullable, the combinations of them +-- is not *mutli* nullable even by outer join. It is not clear to me how to +-- implement it since I am just feeling not maintaining the not-null stuff +-- during the joins is pretty amazing, otherwise there are too many stuff to +-- do to cover this special case. +EXPLAIN (COSTS OFF) +SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON true; + QUERY PLAN +--------------------------------------- + HashAggregate + Group Key: t2.id, t1.id + -> Nested Loop Left Join + -> Seq Scan on uk_t t1 + -> Materialize + -> Seq Scan on uk_t t2 +(6 rows) + +SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON true; + id | id +----+---- + 1 | 1 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON false; + QUERY PLAN +-------------------------------------- + HashAggregate + Group Key: id, t1.id + -> Nested Loop Left Join + Join Filter: false + -> Seq Scan on uk_t t1 + -> Result + One-Time Filter: false +(7 rows) + +SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON false; + id | id +----+---- + | 1 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON true; + QUERY PLAN +--------------------------------------- + HashAggregate + Group Key: t2.id, t1.id + -> Merge Full Join + -> Seq Scan on uk_t t1 + -> Materialize + -> Seq Scan on uk_t t2 +(6 rows) + +SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON true; + id | id +----+---- + 1 | 1 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON false; + QUERY PLAN +--------------------------------------------- + Unique + -> Sort + Sort Key: t2.id, t1.id + -> Merge Full Join + Join Filter: false + -> Seq Scan on uk_t t1 + -> Materialize + -> Seq Scan on uk_t t2 +(8 rows) + +SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON false; + id | id +----+---- + 1 | + | 1 +(2 rows) + diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index f0987ff537..1362720e36 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -62,7 +62,7 @@ test: sanity_check # join depends on create_misc # ---------- test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index update delete namespace prepared_xacts - +test: uniquekey # ---------- # Another group of parallel tests # ---------- diff --git a/src/test/regress/sql/uniquekey.sql b/src/test/regress/sql/uniquekey.sql new file mode 100644 index 0000000000..4db0213a04 --- /dev/null +++ b/src/test/regress/sql/uniquekey.sql @@ -0,0 +1,104 @@ +CREATE TABLE uk_t (id int primary key, a int not null, b int not null, c int, d int, e int); + +explain (costs off) +select distinct id from uk_t; + +explain (costs off) +select distinct e from uk_t where id = e; + +create unique index on uk_t (a, b); +create unique index on uk_t (c, d); + +explain (costs off) +select distinct a, b from uk_t; + +explain (costs off) +select distinct c, d from uk_t; + +explain (costs off) +select distinct c, d from uk_t +where c > 0 and d > 0; + +explain (costs off) +select distinct d from uk_t +where c > 1 and d > 0; + +explain (costs off) +select distinct d from uk_t +where c = 1 and d > 0; + + +-- test the join case -- + +EXPLAIN (COSTS OFF) +SELECT distinct t1.id FROM uk_t t1 JOIN uk_t t2 ON t1.e = t2.id; + +EXPLAIN (COSTS OFF) +SELECT distinct t1.id FROM uk_t t1 JOIN uk_t t2 ON t1.e = t2.a and t1.d = t2.b; + + +EXPLAIN (COSTS OFF) +SELECT distinct t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON t1.e = t2.id; + +EXPLAIN (COSTS OFF) +SELECT distinct t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON t1.e = t2.a and t1.d = t2.b; + +-- outer join makes null values so distinct can't be a no-op. +EXPLAIN (COSTS OFF) +SELECT distinct t1.id FROM uk_t t1 RIGHT JOIN uk_t t2 ON t1.e = t2.id; + +-- single row case. + +-- single row is unqiue all the time. + +-- case 1: +-- t1.d is single row at base rel level, and because of t1.e = t2.id, it is +-- still single row after join. +EXPLAIN (COSTS OFF) +SELECT distinct t1.d FROM uk_t t1 JOIN uk_t t2 ON t1.e = t2.id and t1.id = 1; + +-- case 2 +-- any uniquekey which join with single row whose uniqueness can be kept. +-- t1.d is single row at base rel level because of t1.id = 1 +-- so t2's uniquekey can be kept when join with t2.any_column. + +EXPLAIN (COSTS OFF) +SELECT distinct t2.id FROM uk_t t1 JOIN uk_t t2 ON t2.e = t1.e and t1.id = 1; + +insert into uk_t SELECT 1, 2, 3, 4, 5, 6; + +-- combined uniquekey cases. +-- the combinations of uniquekeys from the 2 sides is still unique +-- no matter the join method and join clauses. +EXPLAIN (COSTS OFF) +SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON true; +SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON true; + +EXPLAIN (COSTS OFF) +SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON false; +SELECT distinct t2.id, t1.id FROM uk_t t1 JOIN uk_t t2 ON false; + + +-- BAD CASE: the distinct should be marked as noop in the below 4 cases, but +-- since it is nullable by outer join, so it can't be removed. However the rule +-- here should be if both uniquekey are not nullable, the combinations of them +-- is not *mutli* nullable even by outer join. It is not clear to me how to +-- implement it since I am just feeling not maintaining the not-null stuff +-- during the joins is pretty amazing, otherwise there are too many stuff to +-- do to cover this special case. +EXPLAIN (COSTS OFF) +SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON true; +SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON true; + +EXPLAIN (COSTS OFF) +SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON false; +SELECT distinct t2.id, t1.id FROM uk_t t1 LEFT JOIN uk_t t2 ON false; + + +EXPLAIN (COSTS OFF) +SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON true; +SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON true; + +EXPLAIN (COSTS OFF) +SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON false; +SELECT distinct t2.id, t1.id FROM uk_t t1 FULL JOIN uk_t t2 ON false; -- 2.34.1