From 42e4031c9255e89428cc095782d08ad95b15a07f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= <yizhi.fzh@alibaba-inc.com>
Date: Sat, 28 Mar 2020 13:44:54 +0800
Subject: [PATCH v3] Maintain UniqueKey at each RelOptInfo, this information
 can be used

to erasing distinct/group path if we are sure the result is unique
already. And rel_is_unique_for is modified to use UniqueKey to
detect more cases.
---
 .../postgres_fdw/expected/postgres_fdw.out    |   34 +-
 src/backend/nodes/equalfuncs.c                |    3 +-
 src/backend/nodes/list.c                      |   27 +
 src/backend/nodes/makefuncs.c                 |   15 +
 src/backend/optimizer/path/Makefile           |    3 +-
 src/backend/optimizer/path/allpaths.c         |   25 +-
 src/backend/optimizer/path/joinrels.c         |    2 +
 src/backend/optimizer/path/uniquekeys.c       | 1033 +++++++++++++++++
 src/backend/optimizer/plan/analyzejoins.c     |  171 +--
 src/backend/optimizer/plan/initsplan.c        |    9 +
 src/backend/optimizer/plan/planmain.c         |   13 -
 src/backend/optimizer/plan/planner.c          |   33 +-
 src/backend/optimizer/prep/prepunion.c        |    2 +
 src/backend/optimizer/util/plancat.c          |    8 +
 src/include/nodes/makefuncs.h                 |    2 +
 src/include/nodes/nodes.h                     |    1 +
 src/include/nodes/pathnodes.h                 |   23 +
 src/include/nodes/pg_list.h                   |    2 +
 src/include/optimizer/paths.h                 |   37 +
 src/test/regress/expected/aggregates.out      |   83 +-
 src/test/regress/expected/join.out            |   34 +-
 src/test/regress/expected/select_distinct.out |  395 +++++++
 src/test/regress/sql/join.sql                 |    3 +
 src/test/regress/sql/select_distinct.sql      |  125 ++
 24 files changed, 1825 insertions(+), 258 deletions(-)
 create mode 100644 src/backend/optimizer/path/uniquekeys.c

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 62c2697920..ba3947c9f9 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2902,22 +2902,20 @@ select sum(c1%3), sum(distinct c1%3 order by c1%3) filter (where c1%3 < 2), c2 f
 -- Outer query is aggregation query
 explain (verbose, costs off)
 select distinct (select count(*) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1;
-                                                          QUERY PLAN                                                          
-------------------------------------------------------------------------------------------------------------------------------
- Unique
+                                                       QUERY PLAN                                                       
+------------------------------------------------------------------------------------------------------------------------
+ Sort
    Output: ((SubPlan 1))
-   ->  Sort
-         Output: ((SubPlan 1))
-         Sort Key: ((SubPlan 1))
-         ->  Foreign Scan
-               Output: (SubPlan 1)
-               Relations: Aggregate on (public.ft2 t2)
-               Remote SQL: SELECT count(*) FILTER (WHERE ((c2 = 6) AND ("C 1" < 10))) FROM "S 1"."T 1" WHERE (((c2 % 6) = 0))
-               SubPlan 1
-                 ->  Foreign Scan on public.ft1 t1
-                       Output: (count(*) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10))))
-                       Remote SQL: SELECT NULL FROM "S 1"."T 1" WHERE (("C 1" = 6))
-(13 rows)
+   Sort Key: ((SubPlan 1))
+   ->  Foreign Scan
+         Output: (SubPlan 1)
+         Relations: Aggregate on (public.ft2 t2)
+         Remote SQL: SELECT count(*) FILTER (WHERE ((c2 = 6) AND ("C 1" < 10))) FROM "S 1"."T 1" WHERE (((c2 % 6) = 0))
+         SubPlan 1
+           ->  Foreign Scan on public.ft1 t1
+                 Output: (count(*) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10))))
+                 Remote SQL: SELECT NULL FROM "S 1"."T 1" WHERE (("C 1" = 6))
+(11 rows)
 
 select distinct (select count(*) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1;
  count 
@@ -3416,7 +3414,6 @@ select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2.
    Output: sum(q.a), count(q.b)
    ->  Nested Loop Left Join
          Output: q.a, q.b
-         Inner Unique: true
          Join Filter: ((ft4.c1)::numeric <= q.b)
          ->  Foreign Scan on public.ft4
                Output: ft4.c1, ft4.c2, ft4.c3
@@ -3429,7 +3426,7 @@ select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2.
                            Output: 13, (avg(ft1.c1)), NULL::bigint
                            Relations: Aggregate on ((public.ft2) LEFT JOIN (public.ft1))
                            Remote SQL: SELECT 13, avg(r1."C 1"), NULL::bigint FROM ("S 1"."T 1" r2 LEFT JOIN "S 1"."T 1" r1 ON (((r1."C 1" = r2."C 1"))))
-(17 rows)
+(16 rows)
 
 select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2.c1) from ft1 right join ft2 on (ft1.c1 = ft2.c1)) q(a, b, c) on (ft4.c1 <= q.b);
  sum | count 
@@ -4198,7 +4195,6 @@ explain (verbose, costs off) select * from ft3 f, loct3 l
 -------------------------------------------------------------
  Hash Join
    Output: f.f1, f.f2, f.f3, l.f1, l.f2, l.f3
-   Inner Unique: true
    Hash Cond: ((f.f3)::text = (l.f3)::text)
    ->  Foreign Scan on public.ft3 f
          Output: f.f1, f.f2, f.f3
@@ -4208,7 +4204,7 @@ explain (verbose, costs off) select * from ft3 f, loct3 l
          ->  Index Scan using loct3_f1_key on public.loct3 l
                Output: l.f1, l.f2, l.f3
                Index Cond: (l.f1 = 'foo'::text)
-(12 rows)
+(11 rows)
 
 -- ===================================================================
 -- test writable foreign table stuff
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 88b912977e..63e92d94ef 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -162,8 +162,9 @@ _equalIntoClause(const IntoClause *a, const IntoClause *b)
 static bool
 _equalVar(const Var *a, const Var *b)
 {
-	COMPARE_SCALAR_FIELD(varno);
+	/* Compare varattno first since it has higher selectivity than varno */
 	COMPARE_SCALAR_FIELD(varattno);
+	COMPARE_SCALAR_FIELD(varno);
 	COMPARE_SCALAR_FIELD(vartype);
 	COMPARE_SCALAR_FIELD(vartypmod);
 	COMPARE_SCALAR_FIELD(varcollid);
diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c
index bd0c58cd81..c7336b10f2 100644
--- a/src/backend/nodes/list.c
+++ b/src/backend/nodes/list.c
@@ -688,6 +688,33 @@ list_member_oid(const List *list, Oid datum)
 	return false;
 }
 
+/*
+ * Return true iff there is an equal member in target for every
+ * member in members
+ */
+bool
+list_all_members_in(const List *members, const List *target)
+{
+	const ListCell	*lc1, *lc2;
+	if (target == NIL && members != NIL)
+		return false;
+	foreach(lc1, members)
+	{
+		bool found = false;
+		foreach(lc2, target)
+		{
+			if (equal(lfirst(lc1), lfirst(lc2)))
+			{
+				found = true;
+				break;
+			}
+		}
+		if (!found)
+			return false;
+	}
+	return true;
+}
+
 /*
  * Delete the n'th cell (counting from 0) in list.
  *
diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c
index e8cdc90c31..7082c8b1b0 100644
--- a/src/backend/nodes/makefuncs.c
+++ b/src/backend/nodes/makefuncs.c
@@ -809,3 +809,18 @@ makeVacuumRelation(RangeVar *relation, Oid oid, List *va_cols)
 	v->va_cols = va_cols;
 	return v;
 }
+
+
+/*
+ * makeUnqiueKey
+ */
+UniqueKey*
+makeUniqueKey(List *exprs, List* positions, bool guarantee)
+{
+	UniqueKey * ukey = makeNode(UniqueKey);
+	Assert(list_length(exprs) == list_length(positions));
+	ukey->exprs = exprs;
+	ukey->positions = positions;
+	ukey->guarantee = guarantee;
+	return ukey;
+}
diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile
index 1e199ff66f..7b9820c25f 100644
--- a/src/backend/optimizer/path/Makefile
+++ b/src/backend/optimizer/path/Makefile
@@ -21,6 +21,7 @@ OBJS = \
 	joinpath.o \
 	joinrels.o \
 	pathkeys.o \
-	tidpath.o
+	tidpath.o \
+	uniquekeys.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 905bbe77d8..e7383979aa 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -39,6 +39,7 @@
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/plancat.h"
+#include "optimizer/planmain.h"
 #include "optimizer/planner.h"
 #include "optimizer/restrictinfo.h"
 #include "optimizer/tlist.h"
@@ -222,14 +223,23 @@ make_one_rel(PlannerInfo *root, List *joinlist)
 	set_base_rel_pathlists(root);
 
 	/*
-	 * Generate access paths for the entire join tree.
+	 * Remove any useless outer joins.  Ideally this would be done during
+	 * jointree preprocessing, but the necessary information isn't available
+	 * until we've built baserel data structures, classified qual clauses
+	 * and uniquekeys
 	 */
-	rel = make_rel_from_joinlist(root, joinlist);
+	joinlist = remove_useless_joins(root, joinlist);
 
 	/*
-	 * The result should join all and only the query's base rels.
+	 * Also, reduce any semijoins with unique inner rels to plain inner joins.
+	 * Likewise, this can't be done until now for lack of needed info.
 	 */
-	Assert(bms_equal(rel->relids, root->all_baserels));
+	reduce_unique_semijoins(root);
+
+	/*
+	 * Generate access paths for the entire join tree.
+	 */
+	rel = make_rel_from_joinlist(root, joinlist);
 
 	return rel;
 }
@@ -786,6 +796,9 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 
 	/* Consider TID scans */
 	create_tidscan_paths(root, rel);
+
+	/* Set UniqueKeys for this relation */
+	populate_baserel_uniquekeys(root, rel, rel->indexlist);
 }
 
 /*
@@ -1276,6 +1289,8 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 
 	/* Add paths to the append relation. */
 	add_paths_to_append_rel(root, rel, live_childrels);
+	if (IS_PARTITIONED_REL(rel))
+		populate_partitionedrel_uniquekeys(root, rel, live_childrels);
 }
 
 
@@ -2349,6 +2364,8 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 										  pathkeys, required_outer));
 	}
 
+	convert_subquery_uniquekeys(root, rel, sub_final_rel);
+
 	/* If outer rel allows parallelism, do same for partial paths. */
 	if (rel->consider_parallel && bms_is_empty(required_outer))
 	{
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index a21c295b99..c6799aa48c 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -920,6 +920,8 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
 
 	/* Apply partitionwise join technique, if possible. */
 	try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist);
+
+	populate_joinrel_uniquekeys(root, joinrel, rel1, rel2, restrictlist, sjinfo->jointype);
 }
 
 
diff --git a/src/backend/optimizer/path/uniquekeys.c b/src/backend/optimizer/path/uniquekeys.c
new file mode 100644
index 0000000000..58fe16be55
--- /dev/null
+++ b/src/backend/optimizer/path/uniquekeys.c
@@ -0,0 +1,1033 @@
+/*-------------------------------------------------------------------------
+ *
+ * uniquekeys.c
+ *	  Utilities for matching and building unique keys
+ *
+ * Portions Copyright (c) 2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/optimizer/path/uniquekeys.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "rewrite/rewriteManip.h"
+
+
+/*
+ * This struct is used to help populate_joinrel_uniquekeys,
+ * Set added_to_joinrel to true if a uniquekey has been added to joinrel.
+ * For a joinrel, if both sides have UniqueKey, then the combine of them
+ * must be unique for the joinrel as well, But we don't need to add it if
+ * either of them has been added to joinrel already. We use this struct to
+ * maintain such info.
+ */
+typedef struct UniqueKeyContextData
+{
+	UniqueKey	*uniquekey;
+	/* Set to true if the unique key has been added to joinrel->uniquekeys */
+	bool	added_to_joinrel;
+	/* If this uniquekey is still useful after join */
+	bool	useful;
+} *UniqueKeyContext;
+
+
+static List *gather_mergeable_baserestrictlist(RelOptInfo *rel);
+static List *gather_mergeable_joinclauses(RelOptInfo *joinrel,
+										  RelOptInfo *rel1,
+										  RelOptInfo *rel2,
+										  List *restirctlist,
+										  JoinType jointype);
+static bool match_index_to_baserestrictinfo(IndexOptInfo *unique_ind,
+											List *restrictlist);
+static List *initililze_unqiuecontext_for_joinrel(RelOptInfo *joinrel,
+												  RelOptInfo *inputrel);
+
+static bool innerrel_keeps_unique(PlannerInfo *root,
+								  RelOptInfo *outerrel,
+								  RelOptInfo *innerrel,
+								  List *restrictlist,
+								  bool reverse);
+static bool clause_sides_match_join(RestrictInfo *rinfo,
+									Relids outerrelids,
+									Relids innerrelids);
+static void add_uniquekey_from_index(RelOptInfo *rel,
+									 IndexOptInfo *unique_index);
+static void add_uniquekey_for_onerow(RelOptInfo *rel);
+
+/* Used for unique indexes checking for partitioned table */
+static bool index_constains_partkey(RelOptInfo *partrel,  IndexOptInfo *ind);
+static IndexOptInfo *simple_copy_indexinfo_to_parent(RelOptInfo *parentrel,
+													 IndexOptInfo *from);
+static bool simple_indexinfo_equal(IndexOptInfo *ind1, IndexOptInfo *ind2);
+static void adjust_partition_unique_indexlist(RelOptInfo *parentrel,
+											  RelOptInfo *childrel,
+											  List **global_unique_index);
+/* Helper function for groupres/distinctrel */
+static void add_uniquekey_from_sortgroups(PlannerInfo *root,
+										  RelOptInfo *rel,
+										  List *sortgroups);
+
+/*
+ * populate_baserel_uniquekeys
+ *		Populate 'baserel' uniquekeys list by looking at the rel's unique index
+ * add baserestrictinfo
+ */
+void
+populate_baserel_uniquekeys(PlannerInfo *root,
+							RelOptInfo *baserel,
+							List *indexlist)
+{
+	ListCell *lc;
+	List	*restrictlist = gather_mergeable_baserestrictlist(baserel);
+	bool	return_one_row = false;
+	List	*matched_uk_indexes = NIL;
+
+	Assert(baserel->rtekind == RTE_RELATION);
+
+	if (root->parse->hasTargetSRFs)
+		return;
+
+	foreach(lc, indexlist)
+	{
+		IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
+		if (!ind->unique || !ind->immediate ||
+			(ind->indpred != NIL && !ind->predOK))
+			continue;
+
+		if (match_index_to_baserestrictinfo(ind, restrictlist))
+		{
+			return_one_row = true;
+			break;
+		}
+		/* We can't guarantee if an expression returns a NULL value, so ignore it */
+		if (ind->indexprs != NIL)
+			continue;
+		matched_uk_indexes = lappend(matched_uk_indexes, ind);
+	}
+
+	if (return_one_row)
+	{
+		/*
+		 * Since only 1 row returned, any column is unique
+		 */
+		add_uniquekey_for_onerow(baserel);
+	}
+	else
+	{
+		foreach(lc, matched_uk_indexes)
+			add_uniquekey_from_index(baserel, lfirst_node(IndexOptInfo, lc));
+	}
+}
+
+
+/*
+ * populate_partitioned_rel_uniquekeys
+ * The unique index can be used for UniqueKey based on:
+ * 1). It must include partition keys
+ * 2). All the childrels must has the same indexes.
+ */
+void
+populate_partitionedrel_uniquekeys(PlannerInfo *root,
+									RelOptInfo *rel,
+									List *childrels)
+{
+	ListCell	*lc;
+	List	*global_unique_indexlist = NIL;
+	RelOptInfo *childrel;
+	bool is_first = true;
+
+	Assert(IS_PARTITIONED_REL(rel));
+
+	if (root->parse->hasTargetSRFs)
+		return;
+
+	if (childrels == NIL)
+		return;
+
+	childrel = linitial_node(RelOptInfo, childrels);
+	foreach(lc, childrel->indexlist)
+	{
+		IndexOptInfo *ind = lfirst(lc);
+		IndexOptInfo *global_ind;
+		if (!ind->unique || !ind->immediate ||
+			(ind->indpred != NIL && !ind->predOK))
+			continue;
+
+		global_ind = simple_copy_indexinfo_to_parent(rel, ind);
+		/*
+		 * If the unique index doesn't contain partkey, then it is unique
+		 * on this partition only, so it is useless for us.
+		 */
+		if (!index_constains_partkey(rel, global_ind))
+			continue;
+		global_unique_indexlist = lappend(global_unique_indexlist,  global_ind);
+	}
+
+	/* Fast path */
+	if (global_unique_indexlist == NIL)
+		return;
+
+	foreach(lc, childrels)
+	{
+		RelOptInfo *child = lfirst(lc);
+		if (is_first)
+		{
+			is_first = false;
+			continue;
+		}
+		adjust_partition_unique_indexlist(rel, child, &global_unique_indexlist);
+	}
+
+	/* Now we have the unique index list which as exactly same on all childrels,
+	 * Set the UniqueIndex just like it is non-partition table
+	 */
+	populate_baserel_uniquekeys(root, rel, global_unique_indexlist);
+}
+
+
+/*
+ * populate_distinctrel_uniquekeys
+ */
+void
+populate_distinctrel_uniquekeys(PlannerInfo *root,
+									 RelOptInfo *inputrel,
+									 RelOptInfo *distinctrel)
+{
+	/* The unique key before the distinct is still valid*/
+	distinctrel->uniquekeys = list_copy(inputrel->uniquekeys);
+	add_uniquekey_from_sortgroups(root, distinctrel, root->parse->distinctClause);
+}
+
+/*
+ * populate_grouprel_uniquekeys
+ */
+void
+populate_grouprel_uniquekeys(PlannerInfo *root,
+							 RelOptInfo *grouprel)
+{
+	Query *parse = root->parse;
+	if (parse->hasTargetSRFs)
+		return;
+	if (parse->groupingSets != NIL)
+		return;
+	if (parse->groupClause != NIL)
+		add_uniquekey_from_sortgroups(root,
+									  grouprel,
+									  root->parse->groupClause);
+	else
+		/* it has aggregation but without a group by, so must be one line return */
+		add_uniquekey_for_onerow(grouprel);
+}
+
+/*
+ * simple_copy_uniquekeys
+ * Using a function for the one-line code makes us easy to check where we simply
+ * copied the uniquiekeys.
+ */
+void
+simple_copy_uniquekeys(RelOptInfo *oldrel,
+					   RelOptInfo *newrel)
+{
+	newrel->uniquekeys = oldrel->uniquekeys;
+}
+
+/*
+ *  populate_unionrel_uniquiekeys
+ */
+void
+populate_unionrel_uniquiekeys(PlannerInfo *root,
+							  RelOptInfo *unionrel)
+{
+	ListCell	*lc;
+	List	*exprs = NIL;
+	List	*colnos = NIL;
+	int i = 1;
+
+	Assert(unionrel->uniquekeys == NIL);
+
+	if (root->parse->hasTargetSRFs)
+		return;
+
+	foreach(lc,  unionrel->reltarget->exprs)
+	{
+		exprs = lappend(exprs, lfirst(lc));
+		colnos = lappend_int(colnos, i);
+		i++;
+	}
+	unionrel->uniquekeys = lappend(unionrel->uniquekeys,
+								   makeUniqueKey(exprs, colnos, true));
+}
+
+/*
+ * populate_joinrel_uniquekeys
+ *
+ * populate uniquekeys for joinrel. We will check each relation to see if it's
+ * UniqueKey is still valid via innerrel_keeps_unique, if so, we add it to
+ * joinrel.  The guarantee field will be changed on some outer join case.
+
+ * For the uniquekey in either baserel which can't be unique after join, we still
+ * check if combination of unqiuekeys from both side is still useful for us,
+ * if yes, we add it to joinrel as well.  We only set the guarantee field to true iff both
+ * uniquekeys have 'guarantee' equals true.
+ *
+ */
+void
+populate_joinrel_uniquekeys(PlannerInfo *root, RelOptInfo *joinrel,
+							RelOptInfo *outerrel, RelOptInfo *innerrel,
+							List *restrictlist, JoinType jointype)
+{
+	ListCell *lc, *lc2;
+	List	*clause_list = NIL;
+	List	*outerrel_uniquekey_ctx;
+	List	*innerrel_uniquekey_ctx;
+
+	if (root->parse->hasTargetSRFs)
+		return;
+
+	/* Care about the outerrel relation only for SEMI/ANTI join */
+	if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
+	{
+		foreach(lc, outerrel->uniquekeys)
+		{
+			UniqueKey	*uniquekey = lfirst_node(UniqueKey, lc);
+			if (list_all_members_in(uniquekey->exprs, joinrel->reltarget->exprs))
+				joinrel->uniquekeys = lappend(joinrel->uniquekeys, uniquekey);
+		}
+		return;
+	}
+
+    /* Fast path */
+	if (innerrel->uniquekeys == NIL || outerrel->uniquekeys == NIL)
+		return;
+
+	outerrel_uniquekey_ctx = initililze_unqiuecontext_for_joinrel(joinrel, outerrel);
+	innerrel_uniquekey_ctx = initililze_unqiuecontext_for_joinrel(joinrel, innerrel);
+
+	clause_list = gather_mergeable_joinclauses(joinrel, outerrel, innerrel,
+											   restrictlist, jointype);
+
+	if (innerrel_keeps_unique(root, outerrel, innerrel, clause_list, false))
+	{
+		foreach(lc, innerrel_uniquekey_ctx)
+		{
+			UniqueKeyContext ctx = (UniqueKeyContext)lfirst(lc);
+			if (!list_all_members_in(ctx->uniquekey->exprs, joinrel->reltarget->exprs))
+			{
+				/* The UniqueKey on baserel is not useful on the joinrel */
+				ctx->useful = false;
+				continue;
+			}
+
+			if ((jointype == JOIN_LEFT || jointype == JOIN_FULL) && ctx->uniquekey->guarantee)
+			{
+				/* We can't guarantee the uniqueness anymore due to the outer join can
+				 * duplicate the null values on these columns, so we set guarantee to false.
+				 * AND we don't set the ctx->added_to_joinrel on purpose since we still have
+				 * chances have an guarantee uniquekey after we combine with the UniqueKey
+				 * from another relation.
+				 */
+				joinrel->uniquekeys = lappend(joinrel->uniquekeys,
+											  makeUniqueKey(ctx->uniquekey->exprs,
+															ctx->uniquekey->positions,
+															false));
+
+			}
+			else
+			{
+				joinrel->uniquekeys = lappend(joinrel->uniquekeys, ctx->uniquekey);
+				ctx->added_to_joinrel = true;
+			}
+		}
+	}
+
+	if (innerrel_keeps_unique(root, innerrel, outerrel, clause_list, true))
+	{
+		foreach(lc, outerrel_uniquekey_ctx)
+		{
+			UniqueKeyContext ctx = (UniqueKeyContext)lfirst(lc);
+			if (!list_all_members_in(ctx->uniquekey->exprs, joinrel->reltarget->exprs))
+			{
+				ctx->useful = false;
+				continue;
+			}
+			/* NULL values in outer rel can be duplicated under JOIN_FULL only */
+			if (jointype == JOIN_FULL && ctx->uniquekey->guarantee)
+			{
+				joinrel->uniquekeys = lappend(joinrel->uniquekeys,
+											  makeUniqueKey(ctx->uniquekey->exprs,
+															ctx->uniquekey->positions,
+															false));
+
+			}
+			else
+			{
+				joinrel->uniquekeys = lappend(joinrel->uniquekeys, ctx->uniquekey);
+				ctx->added_to_joinrel = true;
+			}
+		}
+	}
+
+	/* The combination of the UniqueKey from both sides is unique as well,
+	 * but no bother to add it if its subset has been added already
+	 */
+	foreach(lc, outerrel_uniquekey_ctx)
+	{
+		UniqueKeyContext context1 = (UniqueKeyContext) lfirst(lc);
+		if (context1->added_to_joinrel || !context1->useful)
+			continue;
+		foreach(lc2, innerrel_uniquekey_ctx)
+		{
+			UniqueKeyContext context2 = (UniqueKeyContext) lfirst(lc2);
+			List	*exprs = NIL, *colnos = NIL;
+			if (context2->added_to_joinrel || !context2->useful)
+				continue;
+			exprs = list_copy(context1->uniquekey->exprs);
+			colnos = list_copy(context1->uniquekey->positions);
+			exprs = list_concat(exprs, context2->uniquekey->exprs);
+			colnos = list_concat(colnos, context2->uniquekey->positions);
+			/* We need both sides guarantee=true, we will say the uniqueness of the combination
+			 * is guarantee.
+			 */
+			joinrel->uniquekeys = lappend(joinrel->uniquekeys, makeUniqueKey(exprs, colnos,
+																			 context1->uniquekey->guarantee &&
+																			 context2->uniquekey->guarantee));
+		}
+	}
+}
+
+
+/*
+ * Used to avoid mutli scan of rel->reltarget->exprs, See populate_subquery_uniquekeys
+ */
+typedef struct SubqueryUniqueKeyData
+{
+	/*
+	 * Only the Var reference to subquery's unique is unique as well, we can't
+	 * guarantee others
+	 */
+	Var *var;
+
+	/* The position of the var in the rel->reltarget */
+	int pos;
+} *SubqueryUniqueKeyContext;
+
+/*
+ * convert_subquery_uniquekeys
+ *
+ * currel is the RelOptInfo in current level, sub_final_rel is get from the fetch_upper_rel
+ * we need to convert the UnqiueKey from sub_final_rel to currel via the positions info in
+ * UniqueKey
+ */
+void convert_subquery_uniquekeys(PlannerInfo *root,
+								 RelOptInfo *currel,
+								 RelOptInfo *sub_final_rel)
+{
+	SubqueryUniqueKeyContext *ctx_array;
+	SubqueryUniqueKeyContext ctx;
+	Index max_colno_subq = 0;
+	ListCell	*lc, *lc2;
+	int pos = 0;
+
+	if (sub_final_rel->uniquekeys == NIL)
+		/* This should be a common case */
+		return;
+
+	/*
+	 * Calculate max_colno in subquery. In fact we can check this with
+	 * list_length(sub_final_rel->reltarget->exprs), However, reltarget
+	 * is not set on UPPERREL_FINAL relation, so do it this way
+	 */
+	foreach(lc, sub_final_rel->uniquekeys)
+	{
+		UniqueKey * ukey = lfirst_node(UniqueKey, lc);
+		foreach(lc2, ukey->positions)
+		{
+			Index colno = lfirst_int(lc2);
+			if (max_colno_subq < colno)
+				max_colno_subq = colno;
+		}
+	}
+
+	Assert(max_colno_subq > 0);
+	ctx_array = palloc0(sizeof(SubqueryUniqueKeyContext) * (max_colno_subq + 1));
+
+	/*
+	 * Create an array for each expr in currel->reltarget->exprs, the array index
+	 * is the colno in subquery, so that we can get the expr quickly given a colno_subq
+	 */
+	foreach(lc, currel->reltarget->exprs)
+	{
+		Var *var;
+		int colno_subq;
+		pos++;
+		if (!IsA(lfirst(lc), Var))
+			continue;
+
+		var = lfirst_node(Var, lc);
+		colno_subq = var->varattno;
+		if (colno_subq > max_colno_subq)
+			continue;
+		ctx_array[colno_subq] = palloc0(sizeof(struct SubqueryUniqueKeyData));
+		ctx = ctx_array[colno_subq]; /* corresponding to subquery's uniquekey->positions[x] */
+		ctx->pos = pos; /* the position in current targetlist,  will be used to set UnqiueKey */
+		ctx->var = var;
+	}
+
+	/* Cover the UniqueKey from sub_final_rel to currel */
+	foreach(lc, sub_final_rel->uniquekeys)
+	{
+		UniqueKey * ukey = lfirst_node(UniqueKey, lc);
+		bool uniquekey_useful = true;
+		List	*exprs = NIL;
+		List	*colnos = NIL;
+		foreach(lc2, ukey->positions)
+		{
+			Index sub_colno = lfirst_int(lc2);
+			ctx = ctx_array[sub_colno];
+			if (ctx == NULL)
+			{
+				/* The column is not used outside */
+				uniquekey_useful = false;
+				break;
+			}
+			exprs = lappend(exprs, ctx->var);
+			colnos = lappend_int(colnos, ctx->pos);
+		}
+		if (uniquekey_useful)
+			currel->uniquekeys = lappend(currel->uniquekeys,
+										 makeUniqueKey(exprs, colnos, ukey->guarantee));
+	}
+}
+
+
+/*
+ * innerrel_keeps_unique
+ *
+ * Check if Unqiue key of the innerrel is valid after join. innerrel's UniqueKey
+ * will be still valid if innerrel's uniquekey mergeop outrerel's uniquekey exists
+ * in clause_list.
+ * Note: the clause_list must be a list of mergeable restrictinfo already.
+ */
+static bool
+innerrel_keeps_unique(PlannerInfo *root,
+					  RelOptInfo *outerrel,
+					  RelOptInfo *innerrel,
+					  List *clause_list,
+					  bool reverse)
+{
+	ListCell	*lc, *lc2, *lc3;
+
+	if (outerrel->uniquekeys == NIL || innerrel->uniquekeys == NIL)
+		return false;
+
+	foreach(lc, outerrel->uniquekeys)
+	{
+		List	*outer_uq_exprs = lfirst_node(UniqueKey, lc)->exprs;
+		bool clauselist_matchs_all_exprs = true;
+		foreach(lc2, outer_uq_exprs)
+		{
+			Node *outer_uq_expr = lfirst(lc2);
+			bool find_uq_expr_in_clauselist = false;
+			foreach(lc3, clause_list)
+			{
+				RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc3);
+				Node *outer_expr;
+				if (reverse)
+					outer_expr = rinfo->outer_is_left ? get_rightop(rinfo->clause) : get_leftop(rinfo->clause);
+				else
+					outer_expr = rinfo->outer_is_left ? get_leftop(rinfo->clause) : get_rightop(rinfo->clause);
+				if (equal(outer_expr, outer_uq_expr))
+				{
+					find_uq_expr_in_clauselist = true;
+					break;
+				}
+			}
+			if (!find_uq_expr_in_clauselist)
+			{
+				/* No need to check the next exprs in the current uniquekey */
+				clauselist_matchs_all_exprs = false;
+				break;
+			}
+		}
+
+		if (clauselist_matchs_all_exprs)
+			/* If the clauselist match any uk from outerrel, the innerrel will be unique
+			 * based on the fact that innerrel->uniquekeys != NIL which is checked at the
+			 * beginning
+			 */
+			return true;
+	}
+	return false;
+}
+
+
+/*
+ * relation_has_uniquekeys_for
+ *		Returns true if we have proofs that 'rel' cannot return multiple rows with
+ *		the same values in each of 'exprs'.  Otherwise returns false.
+ */
+bool
+relation_has_uniquekeys_for(PlannerInfo *root, RelOptInfo *rel, List *exprs)
+{
+	ListCell *lc;
+
+	foreach(lc, rel->uniquekeys)
+	{
+		UniqueKey *ukey = lfirst_node(UniqueKey, lc);
+		if (!ukey->guarantee)
+			continue;
+		if (list_all_members_in(ukey->exprs, exprs))
+			return true;
+	}
+	return false;
+}
+
+
+/*
+ * Examine the rel's restriction clauses for usable var = const clauses
+ */
+static List*
+gather_mergeable_baserestrictlist(RelOptInfo *rel)
+{
+	List	*restrictlist = NIL;
+	ListCell	*lc;
+	foreach(lc, rel->baserestrictinfo)
+	{
+		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
+
+		/*
+		 * Note: can_join won't be set for a restriction clause, but
+		 * mergeopfamilies will be if it has a mergejoinable operator and
+		 * doesn't contain volatile functions.
+		 */
+		if (restrictinfo->mergeopfamilies == NIL)
+			continue;			/* not mergejoinable */
+
+		/*
+		 * The clause certainly doesn't refer to anything but the given rel.
+		 * If either side is pseudoconstant then we can use it.
+		 */
+		if (bms_is_empty(restrictinfo->left_relids))
+		{
+			/* righthand side is inner */
+			restrictinfo->outer_is_left = true;
+		}
+		else if (bms_is_empty(restrictinfo->right_relids))
+		{
+			/* lefthand side is inner */
+			restrictinfo->outer_is_left = false;
+		}
+		else
+			continue;
+
+		/* OK, add to list */
+		restrictlist = lappend(restrictlist, restrictinfo);
+	}
+	return restrictlist;
+}
+
+
+/*
+ * gather_mergeable_joinclauses
+ */
+static List*
+gather_mergeable_joinclauses(RelOptInfo *joinrel,
+							 RelOptInfo *outerrel,
+							 RelOptInfo *innerrel,
+							 List *restrictlist,
+							 JoinType jointype)
+{
+	List	*clause_list = NIL;
+	ListCell	*lc;
+	foreach(lc, restrictlist)
+	{
+		RestrictInfo *restrictinfo = (RestrictInfo *)lfirst(lc);
+		if (IS_OUTER_JOIN(jointype) &&
+			RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
+			continue;
+
+		/* Ignore if it's not a mergejoinable clause */
+		if (!restrictinfo->can_join ||
+			restrictinfo->mergeopfamilies == NIL)
+			continue;			/* not mergejoinable */
+
+		/*
+		 * Check if clause has the form "outer op inner" or "inner op outer",
+		 * and if so mark which side is inner.
+		 */
+		if (!clause_sides_match_join(restrictinfo, outerrel->relids, innerrel->relids))
+			continue;			/* no good for these input relations */
+
+		/* OK, add to list */
+		clause_list = lappend(clause_list, restrictinfo);
+	}
+	return clause_list;
+}
+
+
+/*
+ * Return true if uk = Const in the restrictlist
+ */
+static bool
+match_index_to_baserestrictinfo(IndexOptInfo *unique_ind, List *restrictlist)
+{
+	int c = 0;
+
+	/* A fast path to avoid the 2 loop scan */
+	if (list_length(restrictlist) < unique_ind->ncolumns)
+		return false;
+
+	for(c = 0;  c < unique_ind->ncolumns; c++)
+	{
+		ListCell	*lc;
+		bool	found_in_restrictinfo = false;
+		foreach(lc, restrictlist)
+		{
+			RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+			Node	   *rexpr;
+
+			/*
+			 * The condition's equality operator must be a member of the
+			 * index opfamily, else it is not asserting the right kind of
+			 * equality behavior for this index.  We check this first
+			 * since it's probably cheaper than match_index_to_operand().
+			 */
+			if (!list_member_oid(rinfo->mergeopfamilies, unique_ind->opfamily[c]))
+				continue;
+
+			/*
+			 * XXX at some point we may need to check collations here too.
+			 * For the moment we assume all collations reduce to the same
+			 * notion of equality.
+			 */
+
+			/* OK, see if the condition operand matches the index key */
+			if (rinfo->outer_is_left)
+				rexpr = get_rightop(rinfo->clause);
+			else
+				rexpr = get_leftop(rinfo->clause);
+
+			if (match_index_to_operand(rexpr, c, unique_ind))
+			{
+				found_in_restrictinfo = true;
+				break;
+			}
+		}
+		if (!found_in_restrictinfo)
+			return false;
+	}
+	return true;
+}
+
+/*
+ * add_uniquekey_from_index
+ * 	We only add the Index Vars whose expr exists in rel->reltarget
+ */
+static void
+add_uniquekey_from_index(RelOptInfo *rel, IndexOptInfo *unique_index)
+{
+	int	c;
+	List	*exprs = NIL;
+	List	*positions = NIL;
+	bool	guarantee = true;
+
+	/* Only add the index which exists in rel->reltarget */
+	for(c = 0; c < unique_index->ncolumns; c++)
+	{
+		ListCell	*lc;
+		bool	find_in_exprs = false;
+		/* To keep the uniquekey short, We only add it if it exists in rel->reltrget->exprs */
+		foreach(lc, rel->reltarget->exprs)
+		{
+			Var *var;
+			if (!IsA(lfirst(lc), Var))
+				continue;
+			var = lfirst_node(Var, lc);
+			if (var->varattno < InvalidAttrNumber)
+				/* System column */
+				continue;
+			/* Must check not null for unique index */
+			if (!bms_is_member(var->varattno, rel->not_null_cols))
+				guarantee = false;
+
+			if (match_index_to_operand((Node *)lfirst(lc), c, unique_index))
+			{
+				find_in_exprs = true;
+				exprs = lappend(exprs, lfirst(lc));
+				positions = lappend_int(positions, c+1);
+				break;
+			}
+		}
+		if (!find_in_exprs)
+			return;
+	}
+
+	if (exprs != NIL)
+	{
+		rel->uniquekeys = lappend(rel->uniquekeys,
+								  makeUniqueKey(exprs, positions, guarantee));
+	}
+}
+
+
+/*
+ * add_uniquekey_for_onerow
+ * If we are sure about the relation only returns one row, then all the columns
+ * are unique
+ */
+void
+add_uniquekey_for_onerow(RelOptInfo *rel)
+{
+	int c = 1;
+	ListCell	*lc;
+	foreach(lc,  rel->reltarget->exprs)
+	{
+		/* Every columns in this relation is unique since only 1 row returned
+		 * No bother to check it is a var or nullable, we can guarantee the uniqueness
+		 */
+		rel->uniquekeys = lappend(rel->uniquekeys,
+								  makeUniqueKey(list_make1(lfirst(lc)),
+												list_make1_int(c),
+												true));
+		c++;
+	}
+}
+
+/*
+ * initililze_unqiuecontext_for_joinrel
+ * Return a List of UniqueKeyContext for an inputrel, we also filter out
+ * all the unqiuekeys which are not possible to use later
+ */
+static List *
+initililze_unqiuecontext_for_joinrel(RelOptInfo *joinrel,  RelOptInfo *inputrel)
+{
+	List	*res = NIL;
+	ListCell *lc;
+	foreach(lc,  inputrel->uniquekeys)
+	{
+		UniqueKeyContext context;
+		context = palloc(sizeof(struct UniqueKeyContextData));
+		context->uniquekey = lfirst_node(UniqueKey, lc);
+		context->added_to_joinrel = false;
+		context->useful = true;
+		res = lappend(res, context);
+	}
+	return res;
+}
+
+/*
+ * clause_sides_match_join
+ *	  Determine whether a join clause is of the right form to use in this join.
+ *
+ * We already know that the clause is a binary opclause referencing only the
+ * rels in the current join.  The point here is to check whether it has the
+ * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
+ * rather than mixing outer and inner vars on either side.  If it matches,
+ * we set the transient flag outer_is_left to identify which side is which.
+ */
+static bool
+clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
+						Relids innerrelids)
+{
+	if (bms_is_subset(rinfo->left_relids, outerrelids) &&
+		bms_is_subset(rinfo->right_relids, innerrelids))
+	{
+		/* lefthand side is outer */
+		rinfo->outer_is_left = true;
+		return true;
+	}
+	else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
+			 bms_is_subset(rinfo->right_relids, outerrelids))
+	{
+		/* righthand side is outer */
+		rinfo->outer_is_left = false;
+		return true;
+	}
+	return false;				/* no good for these input relations */
+}
+
+
+/*
+ * Partitoned table Unique Keys.
+ * The partition table unique key is maintained as:
+ * 1. The index must be unique as usual.
+ * 2. The index must contains partition key.
+ * 3. The index must exist on all the child rel. see simple_indexinfo_equal for
+ *    how we compare it.
+ */
+
+/* index_constains_partkey
+ * retrun true if the index contains the partiton key.
+ */
+static bool
+index_constains_partkey(RelOptInfo *partrel,  IndexOptInfo *ind)
+{
+	ListCell	*lc;
+	int	i;
+	Assert(IS_PARTITIONED_REL(partrel));
+
+	for(i = 0; i < partrel->part_scheme->partnatts; i++)
+	{
+		Node *part_expr = linitial(partrel->partexprs[i]);
+		bool found_in_index = false;
+		foreach(lc, ind->indextlist)
+		{
+			Expr *index_expr = lfirst_node(TargetEntry, lc)->expr;
+			if (equal(index_expr, part_expr))
+			{
+				found_in_index = true;
+				break;
+			}
+		}
+		if (!found_in_index)
+			return false;
+	}
+	return true;
+}
+
+/*
+ * simple_indexinfo_equal
+ *
+ * Used to check if the 2 index is same as each other. The index here
+ * is COPIED from childrel and did some tiny changes(see simple_copy_indexinfo_to_parent)
+ */
+
+static bool
+simple_indexinfo_equal(IndexOptInfo *ind1, IndexOptInfo *ind2)
+{
+	Size oid_cmp_len = sizeof(Oid) * ind1->ncolumns;
+	return ind1->ncolumns == ind2->ncolumns &&
+		ind1->unique == ind2->unique &&
+		memcmp(ind1->indexkeys, ind2->indexkeys, sizeof(int) * ind1->ncolumns) == 0 &&
+		memcmp(ind1->opfamily, ind2->opfamily, oid_cmp_len) == 0 &&
+		memcmp(ind1->opcintype, ind2->opcintype, oid_cmp_len) == 0 &&
+		memcmp(ind1->sortopfamily, ind2->sortopfamily, oid_cmp_len) == 0 &&
+		equal(ind1->indextlist, ind2->indextlist);
+}
+
+/*
+ * Copy these macros from copyfuncs.c since I don't want make
+ * simple_copy_indexinfo_to_parent public since it is a so customized copy.
+ */
+
+#define COPY_POINTER_FIELD(fldname, sz) \
+	do { \
+		Size	_size = (sz); \
+		newnode->fldname = palloc(_size); \
+		memcpy(newnode->fldname, from->fldname, _size); \
+	} while (0)
+
+#define COPY_NODE_FIELD(fldname) \
+	(newnode->fldname = copyObjectImpl(from->fldname))
+
+#define COPY_SCALAR_FIELD(fldname) \
+	(newnode->fldname = from->fldname)
+
+
+/*
+ * simple_copy_indexinfo_to_parent
+ * Copy the IndexInfo from child index info to parent, which will be used to
+ * 1. Test if the same index exists in all the childrels.
+ * 2. if the parentrel->reltarget/basicrestrinct info matches this index.
+ * The copied and modified index is just used in this scope.
+ */
+static IndexOptInfo *
+simple_copy_indexinfo_to_parent(RelOptInfo *parentrel,
+								IndexOptInfo *from)
+{
+	IndexOptInfo *newnode = makeNode(IndexOptInfo);
+
+	COPY_SCALAR_FIELD(ncolumns);
+	COPY_SCALAR_FIELD(nkeycolumns);
+	COPY_SCALAR_FIELD(unique);
+	COPY_SCALAR_FIELD(immediate);
+	/* We just need to know if it is NIL or not */
+	COPY_SCALAR_FIELD(indpred);
+	COPY_SCALAR_FIELD(predOK);
+	COPY_POINTER_FIELD(indexkeys, from->ncolumns * sizeof(int));
+	COPY_POINTER_FIELD(indexcollations, from->ncolumns * sizeof(Oid));
+	COPY_POINTER_FIELD(opfamily, from->ncolumns * sizeof(Oid));
+	COPY_POINTER_FIELD(opcintype, from->ncolumns * sizeof(Oid));
+	COPY_POINTER_FIELD(sortopfamily, from->ncolumns * sizeof(Oid));
+	COPY_NODE_FIELD(indextlist);
+
+	/*
+	 * We have to change this to let the later index match (like pk = 1)
+	 * rel->reltarget work
+	 */
+	ChangeVarNodes((Node*) newnode->indextlist,
+				   from->rel->relid,
+				   parentrel->relid, 0);
+	newnode->rel = parentrel;
+	return newnode;
+}
+
+/*
+ * adjust_partition_unique_indexlist
+ *
+ * Check the current known global_unique_indexes to see if every index here
+ * all exists in the given childrel, if not, it will be removed from
+ * the list
+ */
+static void
+adjust_partition_unique_indexlist(RelOptInfo *parentrel,
+								  RelOptInfo *childrel,
+								  List **global_unique_indexes)
+{
+	ListCell	*lc, *lc2;
+	foreach(lc, *global_unique_indexes)
+	{
+		IndexOptInfo	*g_ind = lfirst_node(IndexOptInfo, lc);
+		bool found_in_child = false;
+
+		foreach(lc2, childrel->indexlist)
+		{
+			IndexOptInfo   *p_ind = lfirst_node(IndexOptInfo, lc2);
+			IndexOptInfo   *p_ind_copy;
+			if (!p_ind->unique || !p_ind->immediate ||
+				(p_ind->indpred != NIL && !p_ind->predOK))
+				continue;
+			p_ind_copy = simple_copy_indexinfo_to_parent(parentrel, p_ind);
+			if (simple_indexinfo_equal(p_ind_copy, g_ind))
+			{
+				found_in_child = true;
+				break;
+			}
+		}
+
+		if (!found_in_child)
+			/* There is no same index on other childrel, remove it */
+			*global_unique_indexes = foreach_delete_current(*global_unique_indexes, lc);
+	}
+}
+
+/* Helper function for groupres/distinctrel */
+static void
+add_uniquekey_from_sortgroups(PlannerInfo *root, RelOptInfo *rel, List *sortgroups)
+{
+	Query *parse = root->parse;
+	ListCell *lc;
+	List	*exprs = NIL,  *colnos = NIL;
+	foreach(lc, sortgroups)
+	{
+		Index sortref = lfirst_node(SortGroupClause, lc)->tleSortGroupRef;
+		int c = 1;
+		foreach(lc, parse->targetList)
+		{
+			TargetEntry *tle = lfirst_node(TargetEntry, lc);
+			if (tle->ressortgroupref == sortref)
+			{
+				exprs = lappend(exprs, tle->expr);
+				colnos = lappend_int(colnos, c);
+			}
+			++c;
+		}
+	}
+	rel->uniquekeys = lappend(rel->uniquekeys, makeUniqueKey(exprs, colnos, true));
+}
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index d0ff660284..b96c6290b7 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -37,7 +37,6 @@ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
 static void remove_rel_from_query(PlannerInfo *root, int relid,
 								  Relids joinrelids);
 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
-static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
 static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
 								List *clause_list);
 static Oid	distinct_col_search(int colno, List *colnos, List *opids);
@@ -178,14 +177,6 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 
 	innerrel = find_base_rel(root, innerrelid);
 
-	/*
-	 * Before we go to the effort of checking whether any innerrel variables
-	 * are needed above the join, make a quick check to eliminate cases in
-	 * which we will surely be unable to prove uniqueness of the innerrel.
-	 */
-	if (!rel_supports_distinctness(root, innerrel))
-		return false;
-
 	/* Compute the relid set for the join we are considering */
 	joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
 
@@ -535,14 +526,6 @@ reduce_unique_semijoins(PlannerInfo *root)
 
 		innerrel = find_base_rel(root, innerrelid);
 
-		/*
-		 * Before we trouble to run generate_join_implied_equalities, make a
-		 * quick check to eliminate cases in which we will surely be unable to
-		 * prove uniqueness of the innerrel.
-		 */
-		if (!rel_supports_distinctness(root, innerrel))
-			continue;
-
 		/* Compute the relid set for the join we are considering */
 		joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
 
@@ -570,54 +553,6 @@ reduce_unique_semijoins(PlannerInfo *root)
 }
 
 
-/*
- * rel_supports_distinctness
- *		Could the relation possibly be proven distinct on some set of columns?
- *
- * This is effectively a pre-checking function for rel_is_distinct_for().
- * It must return true if rel_is_distinct_for() could possibly return true
- * with this rel, but it should not expend a lot of cycles.  The idea is
- * that callers can avoid doing possibly-expensive processing to compute
- * rel_is_distinct_for()'s argument lists if the call could not possibly
- * succeed.
- */
-static bool
-rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
-{
-	/* We only know about baserels ... */
-	if (rel->reloptkind != RELOPT_BASEREL)
-		return false;
-	if (rel->rtekind == RTE_RELATION)
-	{
-		/*
-		 * For a plain relation, we only know how to prove uniqueness by
-		 * reference to unique indexes.  Make sure there's at least one
-		 * suitable unique index.  It must be immediately enforced, and if
-		 * it's a partial index, it must match the query.  (Keep these
-		 * conditions in sync with relation_has_unique_index_for!)
-		 */
-		ListCell   *lc;
-
-		foreach(lc, rel->indexlist)
-		{
-			IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
-
-			if (ind->unique && ind->immediate &&
-				(ind->indpred == NIL || ind->predOK))
-				return true;
-		}
-	}
-	else if (rel->rtekind == RTE_SUBQUERY)
-	{
-		Query	   *subquery = root->simple_rte_array[rel->relid]->subquery;
-
-		/* Check if the subquery has any qualities that support distinctness */
-		if (query_supports_distinctness(subquery))
-			return true;
-	}
-	/* We have no proof rules for any other rtekinds. */
-	return false;
-}
 
 /*
  * rel_is_distinct_for
@@ -640,83 +575,34 @@ rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
 static bool
 rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
 {
-	/*
-	 * We could skip a couple of tests here if we assume all callers checked
-	 * rel_supports_distinctness first, but it doesn't seem worth taking any
-	 * risk for.
-	 */
-	if (rel->reloptkind != RELOPT_BASEREL)
-		return false;
-	if (rel->rtekind == RTE_RELATION)
-	{
-		/*
-		 * Examine the indexes to see if we have a matching unique index.
-		 * relation_has_unique_index_for automatically adds any usable
-		 * restriction clauses for the rel, so we needn't do that here.
-		 */
-		if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
-			return true;
-	}
-	else if (rel->rtekind == RTE_SUBQUERY)
-	{
-		Index		relid = rel->relid;
-		Query	   *subquery = root->simple_rte_array[relid]->subquery;
-		List	   *colnos = NIL;
-		List	   *opids = NIL;
-		ListCell   *l;
 
-		/*
-		 * Build the argument lists for query_is_distinct_for: a list of
-		 * output column numbers that the query needs to be distinct over, and
-		 * a list of equality operators that the output columns need to be
-		 * distinct according to.
-		 *
-		 * (XXX we are not considering restriction clauses attached to the
-		 * subquery; is that worth doing?)
-		 */
-		foreach(l, clause_list)
+	ListCell	*lc1, *lc2,  *lc3;
+	foreach(lc1,  rel->uniquekeys)
+	{
+		UniqueKey *uqk = lfirst_node(UniqueKey, lc1);
+		bool all_uqk_exprs_found = true;
+		foreach(lc2, uqk->exprs)
 		{
-			RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
-			Oid			op;
-			Var		   *var;
-
-			/*
-			 * Get the equality operator we need uniqueness according to.
-			 * (This might be a cross-type operator and thus not exactly the
-			 * same operator the subquery would consider; that's all right
-			 * since query_is_distinct_for can resolve such cases.)  The
-			 * caller's mergejoinability test should have selected only
-			 * OpExprs.
-			 */
-			op = castNode(OpExpr, rinfo->clause)->opno;
-
-			/* caller identified the inner side for us */
-			if (rinfo->outer_is_left)
-				var = (Var *) get_rightop(rinfo->clause);
-			else
-				var = (Var *) get_leftop(rinfo->clause);
-
-			/*
-			 * We may ignore any RelabelType node above the operand.  (There
-			 * won't be more than one, since eval_const_expressions() has been
-			 * applied already.)
-			 */
-			if (var && IsA(var, RelabelType))
-				var = (Var *) ((RelabelType *) var)->arg;
-
-			/*
-			 * If inner side isn't a Var referencing a subquery output column,
-			 * this clause doesn't help us.
-			 */
-			if (!var || !IsA(var, Var) ||
-				var->varno != relid || var->varlevelsup != 0)
-				continue;
-
-			colnos = lappend_int(colnos, var->varattno);
-			opids = lappend_oid(opids, op);
+			Node *uq_expr = lfirst(lc2);
+			bool find_uq_exprs_in_clause_list = false;
+			foreach(lc3, clause_list)
+			{
+				RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc3);
+				Node *clause_expr = rinfo->outer_is_left ? get_rightop(rinfo->clause): get_leftop(rinfo->clause) ;
+				if (equal(uq_expr, clause_expr))
+				{
+					find_uq_exprs_in_clause_list = true;
+					break;
+				}
+			}
+			if (!find_uq_exprs_in_clause_list)
+			{
+				all_uqk_exprs_found = false;
+				break;
+			}
 		}
-
-		if (query_is_distinct_for(subquery, colnos, opids))
+		if (all_uqk_exprs_found)
+			/* This UnqiueKey is what we want */
 			return true;
 	}
 	return false;
@@ -976,13 +862,6 @@ innerrel_is_unique(PlannerInfo *root,
 	if (restrictlist == NIL)
 		return false;
 
-	/*
-	 * Make a quick check to eliminate cases in which we will surely be unable
-	 * to prove uniqueness of the innerrel.
-	 */
-	if (!rel_supports_distinctness(root, innerrel))
-		return false;
-
 	/*
 	 * Query the cache to see if we've managed to prove that innerrel is
 	 * unique for any subset of this outerrel.  We don't need an exact match,
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index e978b491f6..be78d061ae 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -830,6 +830,15 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
 		{
 			Node	   *qual = (Node *) lfirst(l);
 
+			/* set the not null info now */
+			ListCell	*lc;
+			foreach(lc, find_nonnullable_vars(qual))
+			{
+				Var *var = lfirst_node(Var, lc);
+				RelOptInfo *rel = root->simple_rel_array[var->varno];
+				if (var->varattno > InvalidAttrNumber)
+					rel->not_null_cols = bms_add_member(rel->not_null_cols, var->varattno);
+			}
 			distribute_qual_to_rels(root, qual,
 									false, below_outer_join, JOIN_INNER,
 									root->qual_security_level,
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 62dfc6d44a..6ad73cb57b 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -213,19 +213,6 @@ query_planner(PlannerInfo *root,
 	 */
 	fix_placeholder_input_needed_levels(root);
 
-	/*
-	 * Remove any useless outer joins.  Ideally this would be done during
-	 * jointree preprocessing, but the necessary information isn't available
-	 * until we've built baserel data structures and classified qual clauses.
-	 */
-	joinlist = remove_useless_joins(root, joinlist);
-
-	/*
-	 * Also, reduce any semijoins with unique inner rels to plain inner joins.
-	 * Likewise, this can't be done until now for lack of needed info.
-	 */
-	reduce_unique_semijoins(root);
-
 	/*
 	 * Now distribute "placeholders" to base rels as needed.  This has to be
 	 * done after join removal because removal could change whether a
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index b65abf6046..8043fc4382 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -2386,6 +2386,8 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		add_path(final_rel, path);
 	}
 
+	simple_copy_uniquekeys(current_rel, final_rel);
+
 	/*
 	 * Generate partial paths for final_rel, too, if outer query levels might
 	 * be able to make use of them.
@@ -3810,6 +3812,23 @@ create_grouping_paths(PlannerInfo *root,
 	Query	   *parse = root->parse;
 	RelOptInfo *grouped_rel;
 	RelOptInfo *partially_grouped_rel;
+	List	*required_unique_keys = NIL;
+
+	if (root->parse->groupingSets == NIL)
+	{
+		required_unique_keys  = get_sortgrouplist_exprs(parse->groupClause,
+														parse->targetList);
+		/*
+		 * If the groupby clauses is unique already,  groupping node is not necessary
+		 * if there is no aggreation functions
+		 */
+		if (required_unique_keys != NIL &&
+			!parse->hasAggs &&
+			!parse->hasWindowFuncs &&
+			parse->havingQual == NULL &&
+			relation_has_uniquekeys_for(root, input_rel, required_unique_keys))
+			return input_rel;
+	}
 
 	/*
 	 * Create grouping relation to hold fully aggregated grouping and/or
@@ -3898,6 +3917,8 @@ create_grouping_paths(PlannerInfo *root,
 	}
 
 	set_cheapest(grouped_rel);
+
+	populate_grouprel_uniquekeys(root, grouped_rel);
 	return grouped_rel;
 }
 
@@ -4615,7 +4636,7 @@ create_window_paths(PlannerInfo *root,
 
 	/* Now choose the best path(s) */
 	set_cheapest(window_rel);
-
+	simple_copy_uniquekeys(input_rel, window_rel);
 	return window_rel;
 }
 
@@ -4734,6 +4755,12 @@ create_distinct_paths(PlannerInfo *root,
 	bool		allow_hash;
 	Path	   *path;
 	ListCell   *lc;
+	List	   *required_unique_keys =  get_sortgrouplist_exprs(parse->distinctClause,
+																parse->targetList);
+
+	/* If we the result if unqiue already, we just return the input_rel directly */
+	if (relation_has_uniquekeys_for(root, input_rel, required_unique_keys))
+		return input_rel;
 
 	/* For now, do all work in the (DISTINCT, NULL) upperrel */
 	distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
@@ -4912,7 +4939,7 @@ create_distinct_paths(PlannerInfo *root,
 
 	/* Now choose the best path(s) */
 	set_cheapest(distinct_rel);
-
+	populate_distinctrel_uniquekeys(root, input_rel, distinct_rel);
 	return distinct_rel;
 }
 
@@ -5060,6 +5087,8 @@ create_ordered_paths(PlannerInfo *root,
 	 */
 	Assert(ordered_rel->pathlist != NIL);
 
+	simple_copy_uniquekeys(input_rel, ordered_rel);
+
 	return ordered_rel;
 }
 
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 951aed80e7..8aa4d24cb0 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -689,6 +689,8 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 	/* Undo effects of possibly forcing tuple_fraction to 0 */
 	root->tuple_fraction = save_fraction;
 
+	/* Add the UniqueKeys */
+	populate_unionrel_uniquiekeys(root, result_rel);
 	return result_rel;
 }
 
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index d82fc5ab8b..34d30b181c 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -117,6 +117,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 	Relation	relation;
 	bool		hasindex;
 	List	   *indexinfos = NIL;
+	int			i;
 
 	/*
 	 * We need not lock the relation since it was already locked, either by
@@ -460,6 +461,13 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 	if (inhparent && relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 		set_relation_partition_info(root, rel, relation);
 
+	Assert(rel->not_null_cols == NULL);
+	for(i = 0; i < relation->rd_att->natts; i++)
+	{
+		if (relation->rd_att->attrs[i].attnotnull)
+			rel->not_null_cols = bms_add_member(rel->not_null_cols, i+1);
+	}
+
 	table_close(relation, NoLock);
 
 	/*
diff --git a/src/include/nodes/makefuncs.h b/src/include/nodes/makefuncs.h
index 31d9aedeeb..d4de97016c 100644
--- a/src/include/nodes/makefuncs.h
+++ b/src/include/nodes/makefuncs.h
@@ -16,6 +16,7 @@
 
 #include "nodes/execnodes.h"
 #include "nodes/parsenodes.h"
+#include "nodes/pathnodes.h"
 
 
 extern A_Expr *makeA_Expr(A_Expr_Kind kind, List *name,
@@ -105,4 +106,5 @@ extern GroupingSet *makeGroupingSet(GroupingSetKind kind, List *content, int loc
 
 extern VacuumRelation *makeVacuumRelation(RangeVar *relation, Oid oid, List *va_cols);
 
+extern UniqueKey* makeUniqueKey(List *exprs, List *positions, bool grantee);
 #endif							/* MAKEFUNC_H */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 8a76afe8cc..679cc4cc9c 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -261,6 +261,7 @@ typedef enum NodeTag
 	T_EquivalenceMember,
 	T_PathKey,
 	T_PathTarget,
+	T_UniqueKey,
 	T_RestrictInfo,
 	T_IndexClause,
 	T_PlaceHolderVar,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 0ceb809644..accec6df4e 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -687,6 +687,7 @@ typedef struct RelOptInfo
 	PlannerInfo *subroot;		/* if subquery */
 	List	   *subplan_params; /* if subquery */
 	int			rel_parallel_workers;	/* wanted number of parallel workers */
+	Relids		not_null_cols; /* the non null column for this relation, start from 1 */
 
 	/* Information about foreign tables and foreign joins */
 	Oid			serverid;		/* identifies server for the table or join */
@@ -706,6 +707,7 @@ typedef struct RelOptInfo
 	QualCost	baserestrictcost;	/* cost of evaluating the above */
 	Index		baserestrict_min_security;	/* min security_level found in
 											 * baserestrictinfo */
+	List	   *uniquekeys;		/* List of UniqueKey */
 	List	   *joininfo;		/* RestrictInfo structures for join clauses
 								 * involving this rel */
 	bool		has_eclass_joins;	/* T means joininfo is incomplete */
@@ -1017,6 +1019,27 @@ typedef struct PathKey
 } PathKey;
 
 
+/*
+ * UniqueKey
+ *
+ * Represents the unique properties held by a RelOptInfo
+ * exprs is a list of exprs which is unqiue on current RelOptInfo.
+ * positions is a list of position where the corresponding exprs's location in
+ * current reloptinfo->reltarget. It will be used transate the exprs's info
+ * in subquery.
+ * guarantee: true means it can guarantee the uniqueness all the time, false if we
+ * can only guarantee the uniqueness without considering the null values. This
+ * field is necessary for remove_useless_join & reduce_unique_semijions since
+ * these cases don't care about the null values.
+ */
+typedef struct UniqueKey
+{
+	NodeTag		type;
+	List	   *exprs;
+	List	   *positions;
+	bool		guarantee;
+} UniqueKey;
+
 /*
  * PathTarget
  *
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index 14ea2766ad..5dfb93895c 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -528,6 +528,8 @@ extern bool list_member_ptr(const List *list, const void *datum);
 extern bool list_member_int(const List *list, int datum);
 extern bool list_member_oid(const List *list, Oid datum);
 
+extern bool list_all_members_in(const List *members, const List *target);
+
 extern List *list_delete(List *list, void *datum);
 extern List *list_delete_ptr(List *list, void *datum);
 extern List *list_delete_int(List *list, int datum);
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 9ab73bd20c..f1967d15c2 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -240,4 +240,41 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root,
 extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 									List *live_childrels);
 
+/*
+ * uniquekeys.c
+ *	  Utilities for matching and building unique keys
+ */
+extern void populate_baserel_uniquekeys(PlannerInfo *root,
+										RelOptInfo *baserel,
+										List* unique_index_list);
+extern void populate_partitionedrel_uniquekeys(PlannerInfo *root,
+												RelOptInfo *rel,
+												List *childrels);
+extern void populate_distinctrel_uniquekeys(PlannerInfo *root,
+											RelOptInfo *inputrel,
+											RelOptInfo *distinctrel);
+extern void populate_grouprel_uniquekeys(PlannerInfo *root,
+										 RelOptInfo *grouprel);
+extern void populate_unionrel_uniquiekeys(PlannerInfo *root,
+										  RelOptInfo *unionrel);
+extern void simple_copy_uniquekeys(RelOptInfo *oldrel,
+								   RelOptInfo *newrel);
+extern void convert_subquery_uniquekeys(PlannerInfo *root,
+										RelOptInfo *currel,
+										RelOptInfo *sub_final_rel);
+extern void populate_joinrel_uniquekeys(PlannerInfo *root,
+										RelOptInfo *joinrel,
+										RelOptInfo *rel1,
+										RelOptInfo *rel2,
+										List *restrictlist,
+										JoinType jointype);
+
+extern bool innerl_is_unique_v2(PlannerInfo *root,
+								RelOptInfo *outerrel,
+								RelOptInfo *innerrel,
+								List *restrictlist);
+extern bool relation_has_uniquekeys_for(PlannerInfo *root,
+										RelOptInfo *rel,
+										List *exprs);
+
 #endif							/* PATHS_H */
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 14cdcfcca6..4a701af85b 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -870,14 +870,12 @@ explain (costs off)
   select distinct max(unique2) from tenk1;
                              QUERY PLAN                              
 ---------------------------------------------------------------------
- HashAggregate
-   Group Key: $0
+ Result
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Index Only Scan Backward using tenk1_unique2 on tenk1
                  Index Cond: (unique2 IS NOT NULL)
-   ->  Result
-(7 rows)
+(5 rows)
 
 select distinct max(unique2) from tenk1;
  max  
@@ -1036,7 +1034,7 @@ explain (costs off)
   select distinct min(f1), max(f1) from minmaxtest;
                                          QUERY PLAN                                          
 ---------------------------------------------------------------------------------------------
- Unique
+ Result
    InitPlan 1 (returns $0)
      ->  Limit
            ->  Merge Append
@@ -1059,10 +1057,7 @@ explain (costs off)
                  ->  Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
                        Index Cond: (f1 IS NOT NULL)
                  ->  Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
-   ->  Sort
-         Sort Key: ($0), ($1)
-         ->  Result
-(26 rows)
+(23 rows)
 
 select distinct min(f1), max(f1) from minmaxtest;
  min | max 
@@ -1092,12 +1087,10 @@ create temp table t2 (x int, y int, z int, primary key (x, y));
 create temp table t3 (a int, b int, c int, primary key(a, b) deferrable);
 -- Non-primary-key columns can be removed from GROUP BY
 explain (costs off) select * from t1 group by a,b,c,d;
-      QUERY PLAN      
-----------------------
- HashAggregate
-   Group Key: a, b
-   ->  Seq Scan on t1
-(3 rows)
+   QUERY PLAN   
+----------------
+ Seq Scan on t1
+(1 row)
 
 -- No removal can happen if the complete PK is not present in GROUP BY
 explain (costs off) select a,c from t1 group by a,c,d;
@@ -1112,31 +1105,27 @@ explain (costs off) select a,c from t1 group by a,c,d;
 explain (costs off) select *
 from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y
 group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z;
-                      QUERY PLAN                      
-------------------------------------------------------
- HashAggregate
-   Group Key: t1.a, t1.b, t2.x, t2.y
-   ->  Hash Join
-         Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
-         ->  Seq Scan on t2
-         ->  Hash
-               ->  Seq Scan on t1
-(7 rows)
+                   QUERY PLAN                   
+------------------------------------------------
+ Hash Join
+   Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
+   ->  Seq Scan on t2
+   ->  Hash
+         ->  Seq Scan on t1
+(5 rows)
 
 -- Test case where t1 can be optimized but not t2
 explain (costs off) select t1.*,t2.x,t2.z
 from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y
 group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z;
-                      QUERY PLAN                      
-------------------------------------------------------
- HashAggregate
-   Group Key: t1.a, t1.b, t2.x, t2.z
-   ->  Hash Join
-         Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
-         ->  Seq Scan on t2
-         ->  Hash
-               ->  Seq Scan on t1
-(7 rows)
+                   QUERY PLAN                   
+------------------------------------------------
+ Hash Join
+   Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
+   ->  Seq Scan on t2
+   ->  Hash
+         ->  Seq Scan on t1
+(5 rows)
 
 -- Cannot optimize when PK is deferrable
 explain (costs off) select * from t3 group by a,b,c;
@@ -1161,12 +1150,10 @@ explain (costs off) select * from t1 group by a,b,c,d;
 
 -- Okay to remove columns if we're only querying the parent.
 explain (costs off) select * from only t1 group by a,b,c,d;
-      QUERY PLAN      
-----------------------
- HashAggregate
-   Group Key: a, b
-   ->  Seq Scan on t1
-(3 rows)
+   QUERY PLAN   
+----------------
+ Seq Scan on t1
+(1 row)
 
 create temp table p_t1 (
   a int,
@@ -1179,14 +1166,12 @@ create temp table p_t1_1 partition of p_t1 for values in(1);
 create temp table p_t1_2 partition of p_t1 for values in(2);
 -- Ensure we can remove non-PK columns for partitioned tables.
 explain (costs off) select * from p_t1 group by a,b,c,d;
-           QUERY PLAN           
---------------------------------
- HashAggregate
-   Group Key: p_t1.a, p_t1.b
-   ->  Append
-         ->  Seq Scan on p_t1_1
-         ->  Seq Scan on p_t1_2
-(5 rows)
+        QUERY PLAN        
+--------------------------
+ Append
+   ->  Seq Scan on p_t1_1
+   ->  Seq Scan on p_t1_2
+(3 rows)
 
 drop table t1 cascade;
 NOTICE:  drop cascades to table t1c
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 761376b007..6e5b8f83f4 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4414,37 +4414,25 @@ select d.* from d left join (select distinct * from b) s
 -- not in the join condition.  (Note: as of 9.6, we notice that b.id is a
 -- primary key and so drop b.c_id from the GROUP BY of the resulting plan;
 -- but this happens too late for join removal in the outer plan level.)
+-- XXXX: since b.id is unqiue now so the group by cluase is erased, so
+-- the join removal can happen now.
 explain (costs off)
 select d.* from d left join (select * from b group by b.id, b.c_id) s
   on d.a = s.id;
-                QUERY PLAN                
-------------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Group
-         Group Key: b.id
-         ->  Index Scan using b_pkey on b
-   ->  Sort
-         Sort Key: d.a
-         ->  Seq Scan on d
-(8 rows)
+  QUERY PLAN   
+---------------
+ Seq Scan on d
+(1 row)
 
 -- similarly, but keying off a DISTINCT clause
+-- XXX we erase distinct clause, so the joinremoval is ok as well.
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
-              QUERY PLAN              
---------------------------------------
- Merge Right Join
-   Merge Cond: (b.id = d.a)
-   ->  Unique
-         ->  Sort
-               Sort Key: b.id, b.c_id
-               ->  Seq Scan on b
-   ->  Sort
-         Sort Key: d.a
-         ->  Seq Scan on d
-(9 rows)
+  QUERY PLAN   
+---------------
+ Seq Scan on d
+(1 row)
 
 -- check join removal works when uniqueness of the join condition is enforced
 -- by a UNION
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index 11c6f50fbf..e9263d6151 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -306,3 +306,398 @@ SELECT null IS NOT DISTINCT FROM null as "yes";
  t
 (1 row)
 
+create table select_distinct_a(pk1 int, pk2 char(20),  uk1 char(20) not null,  uk2 int, e int, primary key(pk1, pk2));
+create unique index select_distinct_a_uk on select_distinct_a(uk1, uk2);
+create table select_distinct_b(a int, b char(20), pk1 char(20), pk2 int, e int, primary key(pk1, pk2));
+-- distinct erased since (pk1, pk2)
+explain (costs off) select distinct * from select_distinct_a;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on select_distinct_a
+(1 row)
+
+-- distinct can't be reased since since we required all the uk must be not null
+explain (costs off) select distinct uk1, uk2 from select_distinct_a;
+             QUERY PLAN              
+-------------------------------------
+ HashAggregate
+   Group Key: uk1, uk2
+   ->  Seq Scan on select_distinct_a
+(3 rows)
+
+-- distinct ereased since uk + not null
+explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 is not null;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on select_distinct_a
+   Filter: (uk2 IS NOT NULL)
+(2 rows)
+
+explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 > 1;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on select_distinct_a
+   Filter: (uk2 > 1)
+(2 rows)
+
+-- distinct erased due to group by
+explain (costs off) select distinct e from select_distinct_a group by e;
+             QUERY PLAN              
+-------------------------------------
+ HashAggregate
+   Group Key: e
+   ->  Seq Scan on select_distinct_a
+(3 rows)
+
+-- distinct erased due to the restirctinfo
+explain (costs off) select distinct uk1 from select_distinct_a where pk1 = 1 and pk2 = 'c';
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Index Scan using select_distinct_a_pkey on select_distinct_a
+   Index Cond: ((pk1 = 1) AND (pk2 = 'c'::bpchar))
+(2 rows)
+
+-- test join
+set enable_mergejoin to off;
+set enable_hashjoin to off;
+insert into select_distinct_a values(1, 'a', 'a', 0, 1), (1, 'b', 'A', 0, 2), (3, 'c', 'c', 0, 3);
+insert into select_distinct_b values(1, 'a', 'a', 0, 1), (4, 'd', 'd', 0, 4), (1, 'e', 'e', 0, 5);
+ANALYZE select_distinct_a;
+ANALYZE select_distinct_b;
+-- Cartesian join
+explain (costs off) select distinct a.uk1, a.uk2, b.pk1, b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null;
+                 QUERY PLAN                  
+---------------------------------------------
+ Nested Loop
+   ->  Seq Scan on select_distinct_a a
+         Filter: (uk2 IS NOT NULL)
+   ->  Materialize
+         ->  Seq Scan on select_distinct_b b
+(5 rows)
+
+select distinct a.uk1 COLLATE "C", a.uk2, b.pk1 COLLATE "C", b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null order by 1, 2, 3, 4;
+         uk1          | uk2 |         pk1          | pk2 
+----------------------+-----+----------------------+-----
+ A                    |   0 | a                    |   0
+ A                    |   0 | d                    |   0
+ A                    |   0 | e                    |   0
+ a                    |   0 | a                    |   0
+ a                    |   0 | d                    |   0
+ a                    |   0 | e                    |   0
+ c                    |   0 | a                    |   0
+ c                    |   0 | d                    |   0
+ c                    |   0 | e                    |   0
+(9 rows)
+
+-- normal join
+explain (costs off) select distinct t1.pk1, t1.pk2 from select_distinct_a t1, select_distinct_b t2 where t1.pk2 = t2.pk1 and t1.e = t2.pk2;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Nested Loop
+   Join Filter: ((t1.pk2 = t2.pk1) AND (t1.e = t2.pk2))
+   ->  Seq Scan on select_distinct_a t1
+   ->  Materialize
+         ->  Seq Scan on select_distinct_b t2
+(5 rows)
+
+explain (costs off) select distinct t1.pk1, t1.pk2 from select_distinct_b t2, select_distinct_a t1 where t1.pk2 = t2.pk1 and t1.e = t2.pk2;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Nested Loop
+   Join Filter: ((t2.pk1 = t1.pk2) AND (t2.pk2 = t1.e))
+   ->  Seq Scan on select_distinct_b t2
+   ->  Materialize
+         ->  Seq Scan on select_distinct_a t1
+(5 rows)
+
+-- left join
+explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a);
+                 QUERY PLAN                  
+---------------------------------------------
+ Nested Loop Left Join
+   Join Filter: (a.pk1 = b.a)
+   ->  Seq Scan on select_distinct_a a
+   ->  Materialize
+         ->  Seq Scan on select_distinct_b b
+(5 rows)
+
+select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;;
+ pk1 |         pk2          |         pk1          | pk2 
+-----+----------------------+----------------------+-----
+   1 | a                    | a                    |   0
+   1 | a                    | e                    |   0
+   1 | b                    | a                    |   0
+   1 | b                    | e                    |   0
+   3 | c                    |                      |    
+(5 rows)
+
+-- right join
+explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a);
+                 QUERY PLAN                  
+---------------------------------------------
+ Nested Loop Left Join
+   Join Filter: (a.pk1 = b.a)
+   ->  Seq Scan on select_distinct_b b
+   ->  Materialize
+         ->  Seq Scan on select_distinct_a a
+(5 rows)
+
+select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;
+ pk1 |         pk2          |         pk1          | pk2 
+-----+----------------------+----------------------+-----
+   1 | a                    | a                    |   0
+   1 | a                    | e                    |   0
+   1 | b                    | a                    |   0
+   1 | b                    | e                    |   0
+     |                      | d                    |   0
+(5 rows)
+
+-- full join
+explain (costs off)  select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a);
+                 QUERY PLAN                  
+---------------------------------------------
+ Hash Full Join
+   Hash Cond: (a.pk1 = b.a)
+   ->  Seq Scan on select_distinct_a a
+   ->  Hash
+         ->  Seq Scan on select_distinct_b b
+(5 rows)
+
+select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;
+ pk1 |         pk2          |         pk1          | pk2 
+-----+----------------------+----------------------+-----
+   1 | a                    | a                    |   0
+   1 | a                    | e                    |   0
+   1 | b                    | a                    |   0
+   1 | b                    | e                    |   0
+   3 | c                    |                      |    
+     |                      | d                    |   0
+(6 rows)
+
+explain (costs off) select distinct a.pk1, a.pk2, b.pk1 from select_distinct_a a inner join select_distinct_b b on (a.pk1 = b.a);
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: a.pk1, a.pk2, b.pk1
+         ->  Nested Loop
+               Join Filter: (a.pk1 = b.a)
+               ->  Seq Scan on select_distinct_a a
+               ->  Materialize
+                     ->  Seq Scan on select_distinct_b b
+(8 rows)
+
+-- Semi/anti join
+explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 in (select a from select_distinct_b);
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Nested Loop Semi Join
+   Join Filter: (select_distinct_a.pk1 = select_distinct_b.a)
+   ->  Seq Scan on select_distinct_a
+   ->  Materialize
+         ->  Seq Scan on select_distinct_b
+(5 rows)
+
+explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 not in (select a from select_distinct_b);
+              QUERY PLAN               
+---------------------------------------
+ Seq Scan on select_distinct_a
+   Filter: (NOT (hashed SubPlan 1))
+   SubPlan 1
+     ->  Seq Scan on select_distinct_b
+(4 rows)
+
+-- Test subquery
+explain (costs off) select distinct * from select_distinct_a a,  (select a from select_distinct_b group by a) b where a.pk1 = b.a;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop
+   Join Filter: (a.pk1 = select_distinct_b.a)
+   ->  Seq Scan on select_distinct_a a
+   ->  Materialize
+         ->  HashAggregate
+               Group Key: select_distinct_b.a
+               ->  Seq Scan on select_distinct_b
+(7 rows)
+
+select distinct * from select_distinct_a a,  (select a from select_distinct_b group by a) b where a.pk1 = b.a order by 1, 2, 3;
+ pk1 |         pk2          |         uk1          | uk2 | e | a 
+-----+----------------------+----------------------+-----+---+---
+   1 | a                    | a                    |   0 | 1 | 1
+   1 | b                    | A                    |   0 | 2 | 1
+(2 rows)
+
+explain (costs off) select distinct * from select_distinct_a a,  (select distinct a from select_distinct_b) b where a.pk1 = b.a;
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop
+   Join Filter: (a.pk1 = select_distinct_b.a)
+   ->  Seq Scan on select_distinct_a a
+   ->  Materialize
+         ->  HashAggregate
+               Group Key: select_distinct_b.a
+               ->  Seq Scan on select_distinct_b
+(7 rows)
+
+select distinct * from select_distinct_a a, (select distinct a from select_distinct_b) b where a.pk1 = b.a order by 1 ,2, 3;
+ pk1 |         pk2          |         uk1          | uk2 | e | a 
+-----+----------------------+----------------------+-----+---+---
+   1 | a                    | a                    |   0 | 1 | 1
+   1 | b                    | A                    |   0 | 2 | 1
+(2 rows)
+
+-- Distinct On
+-- can't erase since pk2 is missed
+explain (costs off) select distinct on(pk1) pk1, pk2 from select_distinct_a;
+                QUERY PLAN                 
+-------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: pk1
+         ->  Seq Scan on select_distinct_a
+(4 rows)
+
+-- ok to erase
+explain (costs off) select distinct on(pk1, pk2) pk1, pk2 from select_distinct_a;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on select_distinct_a
+(1 row)
+
+-- Test partitioned table
+create table dist_p (a int, b int not null, c int not null, d int) partition by  range (b);
+create table dist_p0 partition of dist_p for values from (1) to (10);
+create table dist_p1 partition of dist_p for values from (11) to (20);
+-- The combines UKs in target list
+explain (costs off) select distinct t1.pk1, t1.pk2,  t2.b, t2.maxc from select_distinct_a t1, (select b, max(c) as maxc from dist_p group by b) t2;
+                   QUERY PLAN                   
+------------------------------------------------
+ Nested Loop
+   ->  HashAggregate
+         Group Key: dist_p.b
+         ->  Append
+               ->  Seq Scan on dist_p0 dist_p_1
+               ->  Seq Scan on dist_p1 dist_p_2
+   ->  Materialize
+         ->  Seq Scan on select_distinct_a t1
+(8 rows)
+
+-- no uk for t2, so distinct is needed.
+explain (costs off) select distinct t1.pk1, t1.pk2,  t2.b, t2.c from select_distinct_a t1, dist_p t2;
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: t1.pk1, t1.pk2, t2.b, t2.c
+   ->  Nested Loop
+         ->  Append
+               ->  Seq Scan on dist_p0 t2_1
+               ->  Seq Scan on dist_p1 t2_2
+         ->  Materialize
+               ->  Seq Scan on select_distinct_a t1
+(8 rows)
+
+-- create unqiue index on dist_p
+create unique index dist_p_uk_b_c on dist_p(b, c);
+-- (t2.b, t2.c) should be the UK
+explain (costs off) select distinct t1.pk1, t1.pk2,  t2.b, t2.c from select_distinct_a t1, dist_p t2;
+                  QUERY PLAN                  
+----------------------------------------------
+ Nested Loop
+   ->  Append
+         ->  Seq Scan on dist_p0 t2_1
+         ->  Seq Scan on dist_p1 t2_2
+   ->  Materialize
+         ->  Seq Scan on select_distinct_a t1
+(6 rows)
+
+drop index dist_p_uk_b_c;
+-- we also support create unqiue index on each child tables
+create unique index dist_p0_uk_bc on dist_p0(b, c);
+-- not ok, since dist_p1 no have exact same index
+explain (costs off) select distinct t1.pk1, t1.pk2,  t2.b, t2.c from select_distinct_a t1, dist_p t2;
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: t1.pk1, t1.pk2, t2.b, t2.c
+   ->  Nested Loop
+         ->  Append
+               ->  Seq Scan on dist_p0 t2_1
+               ->  Seq Scan on dist_p1 t2_2
+         ->  Materialize
+               ->  Seq Scan on select_distinct_a t1
+(8 rows)
+
+create unique index dist_p1_uk_bc on dist_p1(b, c);
+-- OK now
+explain (costs off) select distinct t1.pk1, t1.pk2,  t2.b, t2.c from select_distinct_a t1, dist_p t2;
+                  QUERY PLAN                  
+----------------------------------------------
+ Nested Loop
+   ->  Append
+         ->  Seq Scan on dist_p0 t2_1
+         ->  Seq Scan on dist_p1 t2_2
+   ->  Materialize
+         ->  Seq Scan on select_distinct_a t1
+(6 rows)
+
+-- uk is same on all child tables, however it doesn't include the partkey, so not ok as well.
+create unique index dist_p0_uk_c on dist_p0(c);
+create unique index dist_p1_uk_c on dist_p1(c);
+explain (costs off) select distinct t1.pk1, t1.pk2,  t2.c from select_distinct_a t1, dist_p t2;
+                     QUERY PLAN                     
+----------------------------------------------------
+ HashAggregate
+   Group Key: t1.pk1, t1.pk2, t2.c
+   ->  Nested Loop
+         ->  Append
+               ->  Seq Scan on dist_p0 t2_1
+               ->  Seq Scan on dist_p1 t2_2
+         ->  Materialize
+               ->  Seq Scan on select_distinct_a t1
+(8 rows)
+
+drop table dist_p;
+-- test some view.
+create view distinct_v1 as select distinct uk1, uk2 from select_distinct_a where uk2 is not null;
+explain (costs off) select * from distinct_v1;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on select_distinct_a
+   Filter: (uk2 IS NOT NULL)
+(2 rows)
+
+alter table select_distinct_a alter column uk1 drop not null;
+explain (costs off) select * from distinct_v1;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: select_distinct_a.uk1, select_distinct_a.uk2
+         ->  Seq Scan on select_distinct_a
+               Filter: (uk2 IS NOT NULL)
+(5 rows)
+
+alter table select_distinct_a alter column uk1 set not null;
+-- test generic plan
+prepare pt as select * from distinct_v1;
+explain (costs off)  execute pt;
+          QUERY PLAN           
+-------------------------------
+ Seq Scan on select_distinct_a
+   Filter: (uk2 IS NOT NULL)
+(2 rows)
+
+alter table select_distinct_a alter column uk1 drop not null;
+explain (costs off) execute pt;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: select_distinct_a.uk1, select_distinct_a.uk2
+         ->  Seq Scan on select_distinct_a
+               Filter: (uk2 IS NOT NULL)
+(5 rows)
+
+drop view distinct_v1;
+drop table select_distinct_a;
+drop table select_distinct_b;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 5fc6617369..d6cc8fa845 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1515,11 +1515,14 @@ select d.* from d left join (select distinct * from b) s
 -- not in the join condition.  (Note: as of 9.6, we notice that b.id is a
 -- primary key and so drop b.c_id from the GROUP BY of the resulting plan;
 -- but this happens too late for join removal in the outer plan level.)
+-- XXXX: since b.id is unqiue now so the group by cluase is erased, so
+-- the join removal can happen now.
 explain (costs off)
 select d.* from d left join (select * from b group by b.id, b.c_id) s
   on d.a = s.id;
 
 -- similarly, but keying off a DISTINCT clause
+-- XXX we erase distinct clause, so the joinremoval is ok as well.
 explain (costs off)
 select d.* from d left join (select distinct * from b) s
   on d.a = s.id;
diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql
index 33102744eb..08d1e35095 100644
--- a/src/test/regress/sql/select_distinct.sql
+++ b/src/test/regress/sql/select_distinct.sql
@@ -135,3 +135,128 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no";
 SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
 SELECT 2 IS NOT DISTINCT FROM null as "no";
 SELECT null IS NOT DISTINCT FROM null as "yes";
+create table select_distinct_a(pk1 int, pk2 char(20),  uk1 char(20) not null,  uk2 int, e int, primary key(pk1, pk2));
+create unique index select_distinct_a_uk on select_distinct_a(uk1, uk2);
+create table select_distinct_b(a int, b char(20), pk1 char(20), pk2 int, e int, primary key(pk1, pk2));
+
+-- distinct erased since (pk1, pk2)
+explain (costs off) select distinct * from select_distinct_a;
+
+-- distinct can't be reased since since we required all the uk must be not null
+explain (costs off) select distinct uk1, uk2 from select_distinct_a;
+
+-- distinct ereased since uk + not null
+explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 is not null;
+explain (costs off) select distinct uk1, uk2 from select_distinct_a where uk2 > 1;
+
+-- distinct erased due to group by
+explain (costs off) select distinct e from select_distinct_a group by e;
+
+-- distinct erased due to the restirctinfo
+explain (costs off) select distinct uk1 from select_distinct_a where pk1 = 1 and pk2 = 'c';
+
+-- test join
+set enable_mergejoin to off;
+set enable_hashjoin to off;
+
+insert into select_distinct_a values(1, 'a', 'a', 0, 1), (1, 'b', 'A', 0, 2), (3, 'c', 'c', 0, 3);
+insert into select_distinct_b values(1, 'a', 'a', 0, 1), (4, 'd', 'd', 0, 4), (1, 'e', 'e', 0, 5);
+
+ANALYZE select_distinct_a;
+ANALYZE select_distinct_b;
+
+-- Cartesian join
+explain (costs off) select distinct a.uk1, a.uk2, b.pk1, b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null;
+select distinct a.uk1 COLLATE "C", a.uk2, b.pk1 COLLATE "C", b.pk2 from select_distinct_a a, select_distinct_b b where a.uk2 is not null order by 1, 2, 3, 4;
+
+-- normal join
+explain (costs off) select distinct t1.pk1, t1.pk2 from select_distinct_a t1, select_distinct_b t2 where t1.pk2 = t2.pk1 and t1.e = t2.pk2;
+explain (costs off) select distinct t1.pk1, t1.pk2 from select_distinct_b t2, select_distinct_a t1 where t1.pk2 = t2.pk1 and t1.e = t2.pk2;
+
+-- left join
+explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a);
+select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a left join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;;
+
+-- right join
+explain (costs off) select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a);
+select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a right join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;
+
+-- full join
+explain (costs off)  select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a);
+select distinct a.pk1, a.pk2, b.pk1, b.pk2 from select_distinct_a a full outer join select_distinct_b b on (a.pk1 = b.a) order by 1, 2, 3, 4;
+
+explain (costs off) select distinct a.pk1, a.pk2, b.pk1 from select_distinct_a a inner join select_distinct_b b on (a.pk1 = b.a);
+
+
+-- Semi/anti join
+explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 in (select a from select_distinct_b);
+explain (costs off) select distinct pk1, pk2 from select_distinct_a where pk1 not in (select a from select_distinct_b);
+
+
+-- Test subquery
+explain (costs off) select distinct * from select_distinct_a a,  (select a from select_distinct_b group by a) b where a.pk1 = b.a;
+select distinct * from select_distinct_a a,  (select a from select_distinct_b group by a) b where a.pk1 = b.a order by 1, 2, 3;
+
+explain (costs off) select distinct * from select_distinct_a a,  (select distinct a from select_distinct_b) b where a.pk1 = b.a;
+select distinct * from select_distinct_a a, (select distinct a from select_distinct_b) b where a.pk1 = b.a order by 1 ,2, 3;
+
+-- Distinct On
+-- can't erase since pk2 is missed
+explain (costs off) select distinct on(pk1) pk1, pk2 from select_distinct_a;
+-- ok to erase
+explain (costs off) select distinct on(pk1, pk2) pk1, pk2 from select_distinct_a;
+
+
+-- Test partitioned table
+create table dist_p (a int, b int not null, c int not null, d int) partition by  range (b);
+
+create table dist_p0 partition of dist_p for values from (1) to (10);
+create table dist_p1 partition of dist_p for values from (11) to (20);
+
+-- The combines UKs in target list
+explain (costs off) select distinct t1.pk1, t1.pk2,  t2.b, t2.maxc from select_distinct_a t1, (select b, max(c) as maxc from dist_p group by b) t2;
+
+-- no uk for t2, so distinct is needed.
+explain (costs off) select distinct t1.pk1, t1.pk2,  t2.b, t2.c from select_distinct_a t1, dist_p t2;
+
+-- create unqiue index on dist_p
+create unique index dist_p_uk_b_c on dist_p(b, c);
+
+-- (t2.b, t2.c) should be the UK
+explain (costs off) select distinct t1.pk1, t1.pk2,  t2.b, t2.c from select_distinct_a t1, dist_p t2;
+
+drop index dist_p_uk_b_c;
+
+-- we also support create unqiue index on each child tables
+create unique index dist_p0_uk_bc on dist_p0(b, c);
+-- not ok, since dist_p1 no have exact same index
+explain (costs off) select distinct t1.pk1, t1.pk2,  t2.b, t2.c from select_distinct_a t1, dist_p t2;
+create unique index dist_p1_uk_bc on dist_p1(b, c);
+-- OK now
+explain (costs off) select distinct t1.pk1, t1.pk2,  t2.b, t2.c from select_distinct_a t1, dist_p t2;
+
+-- uk is same on all child tables, however it doesn't include the partkey, so not ok as well.
+create unique index dist_p0_uk_c on dist_p0(c);
+create unique index dist_p1_uk_c on dist_p1(c);
+explain (costs off) select distinct t1.pk1, t1.pk2,  t2.c from select_distinct_a t1, dist_p t2;
+
+drop table dist_p;
+
+-- test some view.
+create view distinct_v1 as select distinct uk1, uk2 from select_distinct_a where uk2 is not null;
+explain (costs off) select * from distinct_v1;
+
+alter table select_distinct_a alter column uk1 drop not null;
+explain (costs off) select * from distinct_v1;
+
+alter table select_distinct_a alter column uk1 set not null;
+
+-- test generic plan
+prepare pt as select * from distinct_v1;
+explain (costs off)  execute pt;
+alter table select_distinct_a alter column uk1 drop not null;
+explain (costs off) execute pt;
+
+drop view distinct_v1;
+drop table select_distinct_a;
+drop table select_distinct_b;
-- 
2.21.0