diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 4fbbde1..5504091 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -3923,10 +3923,12 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
 	/*
 	 * We support pushing down INNER, LEFT, RIGHT and FULL OUTER joins.
 	 * Constructing queries representing SEMI and ANTI joins is hard, hence
-	 * not considered right now.
+	 * not considered right now. SEMI_LEFT joins are ok here, since they're
+	 * merely an optimization of LEFT joins.
 	 */
 	if (jointype != JOIN_INNER && jointype != JOIN_LEFT &&
-		jointype != JOIN_RIGHT && jointype != JOIN_FULL)
+		jointype != JOIN_RIGHT && jointype != JOIN_FULL &&
+		jointype != JOIN_SEMI_LEFT)
 		return false;
 
 	/*
@@ -4059,6 +4061,7 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
 			break;
 
 		case JOIN_LEFT:
+		case JOIN_SEMI_LEFT:
 			fpinfo->joinclauses = list_concat(fpinfo->joinclauses,
 										  list_copy(fpinfo_i->remote_conds));
 			fpinfo->remote_conds = list_concat(fpinfo->remote_conds,
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 09c2304..7c114ca 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1121,6 +1121,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
 					case JOIN_LEFT:
 						jointype = "Left";
 						break;
+					case JOIN_SEMI_LEFT:
+						jointype = "Semi Left";
+						break;
 					case JOIN_FULL:
 						jointype = "Full";
 						break;
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 369e666..1436a5b 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -306,10 +306,12 @@ ExecHashJoin(HashJoinState *node)
 					}
 
 					/*
-					 * In a semijoin, we'll consider returning the first
-					 * match, but after that we're done with this outer tuple.
+					 * In a semi or semi left join, we'll consider returning
+					 * the first match, but after that we're done with this
+					 * outer tuple.
 					 */
-					if (node->js.jointype == JOIN_SEMI)
+					if (node->js.jointype == JOIN_SEMI ||
+						node->js.jointype == JOIN_SEMI_LEFT)
 						node->hj_JoinState = HJ_NEED_NEW_OUTER;
 
 					if (otherqual == NIL ||
@@ -502,6 +504,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
 		case JOIN_SEMI:
 			break;
 		case JOIN_LEFT:
+		case JOIN_SEMI_LEFT:
 		case JOIN_ANTI:
 			hjstate->hj_NullInnerTupleSlot =
 				ExecInitNullTupleSlot(estate,
diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c
index 6db09b8..f61a8da 100644
--- a/src/backend/executor/nodeMergejoin.c
+++ b/src/backend/executor/nodeMergejoin.c
@@ -840,10 +840,12 @@ ExecMergeJoin(MergeJoinState *node)
 					}
 
 					/*
-					 * In a semijoin, we'll consider returning the first
-					 * match, but after that we're done with this outer tuple.
+					 * In a semi or semi left join, we'll consider returning
+					 * the first match, but after that we're done with this
+					 * outer tuple.
 					 */
-					if (node->js.jointype == JOIN_SEMI)
+					if (node->js.jointype == JOIN_SEMI ||
+						node->js.jointype == JOIN_SEMI_LEFT)
 						node->mj_JoinState = EXEC_MJ_NEXTOUTER;
 
 					qualResult = (otherqual == NIL ||
@@ -1559,6 +1561,7 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
 			mergestate->mj_FillInner = false;
 			break;
 		case JOIN_LEFT:
+		case JOIN_SEMI_LEFT:
 		case JOIN_ANTI:
 			mergestate->mj_FillOuter = true;
 			mergestate->mj_FillInner = false;
diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c
index 555fa09..ebcf0f2 100644
--- a/src/backend/executor/nodeNestloop.c
+++ b/src/backend/executor/nodeNestloop.c
@@ -182,6 +182,7 @@ ExecNestLoop(NestLoopState *node)
 
 			if (!node->nl_MatchedOuter &&
 				(node->js.jointype == JOIN_LEFT ||
+				 node->js.jointype == JOIN_SEMI_LEFT ||
 				 node->js.jointype == JOIN_ANTI))
 			{
 				/*
@@ -247,10 +248,11 @@ ExecNestLoop(NestLoopState *node)
 			}
 
 			/*
-			 * In a semijoin, we'll consider returning the first match, but
-			 * after that we're done with this outer tuple.
+			 * In a semi or semi left join, we'll consider returning the first
+			 * match, but after that we're done with this outer tuple.
 			 */
-			if (node->js.jointype == JOIN_SEMI)
+			if (node->js.jointype == JOIN_SEMI ||
+				node->js.jointype == JOIN_SEMI_LEFT)
 				node->nl_NeedNewOuter = true;
 
 			if (otherqual == NIL || ExecQual(otherqual, econtext, false))
@@ -358,6 +360,7 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
 		case JOIN_SEMI:
 			break;
 		case JOIN_LEFT:
+		case JOIN_SEMI_LEFT:
 		case JOIN_ANTI:
 			nlstate->nl_NullInnerTupleSlot =
 				ExecInitNullTupleSlot(estate,
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index f4e4a91..005d290 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -2067,6 +2067,7 @@ _copySpecialJoinInfo(const SpecialJoinInfo *from)
 	COPY_SCALAR_FIELD(jointype);
 	COPY_SCALAR_FIELD(lhs_strict);
 	COPY_SCALAR_FIELD(delay_upper_joins);
+	COPY_SCALAR_FIELD(is_unique_join);
 	COPY_SCALAR_FIELD(semi_can_btree);
 	COPY_SCALAR_FIELD(semi_can_hash);
 	COPY_NODE_FIELD(semi_operators);
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 854c062..4a87efd 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -839,6 +839,7 @@ _equalSpecialJoinInfo(const SpecialJoinInfo *a, const SpecialJoinInfo *b)
 	COMPARE_SCALAR_FIELD(jointype);
 	COMPARE_SCALAR_FIELD(lhs_strict);
 	COMPARE_SCALAR_FIELD(delay_upper_joins);
+	COMPARE_SCALAR_FIELD(is_unique_join);
 	COMPARE_SCALAR_FIELD(semi_can_btree);
 	COMPARE_SCALAR_FIELD(semi_can_hash);
 	COMPARE_NODE_FIELD(semi_operators);
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index bfd12ac..f34567b 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2268,6 +2268,7 @@ _outSpecialJoinInfo(StringInfo str, const SpecialJoinInfo *node)
 	WRITE_ENUM_FIELD(jointype, JoinType);
 	WRITE_BOOL_FIELD(lhs_strict);
 	WRITE_BOOL_FIELD(delay_upper_joins);
+	WRITE_BOOL_FIELD(is_unique_join);
 	WRITE_BOOL_FIELD(semi_can_btree);
 	WRITE_BOOL_FIELD(semi_can_hash);
 	WRITE_NODE_FIELD(semi_operators);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b395751..8656629 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1893,8 +1893,8 @@ cost_group(Path *path, PlannerInfo *root,
  * estimate and getting a tight lower bound.  We choose to not examine the
  * join quals here, since that's by far the most expensive part of the
  * calculations.  The end result is that CPU-cost considerations must be
- * left for the second phase; and for SEMI/ANTI joins, we must also postpone
- * incorporation of the inner path's run cost.
+ * left for the second phase; and for SEMI, SEMI_LEFT and ANTI joins, we must
+ * also postpone incorporation of the inner path's run cost.
  *
  * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
  *		other data to be used by final_cost_nestloop
@@ -1902,7 +1902,7 @@ cost_group(Path *path, PlannerInfo *root,
  * 'outer_path' is the outer input to the join
  * 'inner_path' is the inner input to the join
  * 'sjinfo' is extra info about the join for selectivity estimation
- * 'semifactors' contains valid data if jointype is SEMI or ANTI
+ * 'semifactors' contains valid data if jointype is SEMI, SEMI_LEFT or ANTI
  */
 void
 initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace,
@@ -1940,10 +1940,12 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace,
 	inner_run_cost = inner_path->total_cost - inner_path->startup_cost;
 	inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost;
 
-	if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
+	if (jointype == JOIN_SEMI ||
+		jointype == JOIN_SEMI_LEFT ||
+		jointype == JOIN_ANTI)
 	{
 		/*
-		 * SEMI or ANTI join: executor will stop after first match.
+		 * SEMI, SEMI_LEFT or ANTI join: executor will stop after first match.
 		 *
 		 * Getting decent estimates requires inspection of the join quals,
 		 * which we choose to postpone to final_cost_nestloop.
@@ -1977,7 +1979,8 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace,
  * 'path' is already filled in except for the rows and cost fields
  * 'workspace' is the result from initial_cost_nestloop
  * 'sjinfo' is extra info about the join for selectivity estimation
- * 'semifactors' contains valid data if path->jointype is SEMI or ANTI
+ * 'semifactors' contains valid data if path->jointype is SEMI, SEMI_LEFT or
+ * ANTI
  */
 void
 final_cost_nestloop(PlannerInfo *root, NestPath *path,
@@ -2017,10 +2020,12 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
 
 	/* cost of inner-relation source data (we already dealt with outer rel) */
 
-	if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI)
+	if (path->jointype == JOIN_SEMI ||
+		path->jointype == JOIN_SEMI_LEFT ||
+		path->jointype == JOIN_ANTI)
 	{
 		/*
-		 * SEMI or ANTI join: executor will stop after first match.
+		 * SEMI, SEMI_LEFT or ANTI join: executor will stop after first match.
 		 */
 		Cost		inner_run_cost = workspace->inner_run_cost;
 		Cost		inner_rescan_run_cost = workspace->inner_rescan_run_cost;
@@ -2250,6 +2255,7 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
 			innerendsel = cache->leftendsel;
 		}
 		if (jointype == JOIN_LEFT ||
+			jointype == JOIN_SEMI_LEFT ||
 			jointype == JOIN_ANTI)
 		{
 			outerstartsel = 0.0;
@@ -2773,7 +2779,8 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace,
  *		num_batches
  * 'workspace' is the result from initial_cost_hashjoin
  * 'sjinfo' is extra info about the join for selectivity estimation
- * 'semifactors' contains valid data if path->jointype is SEMI or ANTI
+ * 'semifactors' contains valid data if path->jointype is SEMI, SEMI_LEFT or
+ * ANTI
  */
 void
 final_cost_hashjoin(PlannerInfo *root, HashPath *path,
@@ -2896,13 +2903,15 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 
 	/* CPU costs */
 
-	if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI)
+	if (path->jpath.jointype == JOIN_SEMI ||
+		path->jpath.jointype == JOIN_SEMI_LEFT ||
+		path->jpath.jointype == JOIN_ANTI)
 	{
 		double		outer_matched_rows;
 		Selectivity inner_scan_frac;
 
 		/*
-		 * SEMI or ANTI join: executor will stop after first match.
+		 * SEMI, SEMI_LEFT or ANTI join: executor will stop after first match.
 		 *
 		 * For an outer-rel row that has at least one match, we can expect the
 		 * bucket scan to stop after a fraction 1/(match_count+1) of the
@@ -2937,10 +2946,10 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 			clamp_row_est(inner_path_rows / virtualbuckets) * 0.05;
 
 		/* Get # of tuples that will pass the basic join */
-		if (path->jpath.jointype == JOIN_SEMI)
-			hashjointuples = outer_matched_rows;
-		else
+		if (path->jpath.jointype == JOIN_ANTI)
 			hashjointuples = outer_path_rows - outer_matched_rows;
+		else
+			hashjointuples = outer_matched_rows;
 	}
 	else
 	{
@@ -3469,11 +3478,11 @@ get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel,
 
 /*
  * compute_semi_anti_join_factors
- *	  Estimate how much of the inner input a SEMI or ANTI join
+ *	  Estimate how much of the inner input a SEMI, SEMI_LEFT or ANTI join
  *	  can be expected to scan.
  *
- * In a hash or nestloop SEMI/ANTI join, the executor will stop scanning
- * inner rows as soon as it finds a match to the current outer row.
+ * In a hash or nestloop SEMI/SEMI_LEFT/ANTI join, the executor will stop
+ * scanning inner rows as soon as it finds a match to the current outer row.
  * We should therefore adjust some of the cost components for this effect.
  * This function computes some estimates needed for these adjustments.
  * These estimates will be the same regardless of the particular paths used
@@ -3483,7 +3492,7 @@ get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel,
  * Input parameters:
  *	outerrel: outer relation under consideration
  *	innerrel: inner relation under consideration
- *	jointype: must be JOIN_SEMI or JOIN_ANTI
+ *	jointype: must be JOIN_SEMI, JOIN_SEMI_LEFT or JOIN_ANTI
  *	sjinfo: SpecialJoinInfo relevant to this join
  *	restrictlist: join quals
  * Output parameters:
@@ -3506,7 +3515,9 @@ compute_semi_anti_join_factors(PlannerInfo *root,
 	ListCell   *l;
 
 	/* Should only be called in these cases */
-	Assert(jointype == JOIN_SEMI || jointype == JOIN_ANTI);
+	Assert(jointype == JOIN_SEMI ||
+		   jointype == JOIN_SEMI_LEFT ||
+		   jointype == JOIN_ANTI);
 
 	/*
 	 * In an ANTI join, we must ignore clauses that are "pushed down", since
@@ -3530,7 +3541,8 @@ compute_semi_anti_join_factors(PlannerInfo *root,
 		joinquals = restrictlist;
 
 	/*
-	 * Get the JOIN_SEMI or JOIN_ANTI selectivity of the join clauses.
+	 * Get the JOIN_SEMI, JOIN_SEMI_LEFT or JOIN_ANTI selectivity of the join
+	 * clauses.
 	 */
 	jselec = clauselist_selectivity(root,
 									joinquals,
@@ -3969,6 +3981,10 @@ calc_joinrel_size_estimate(PlannerInfo *root,
 	 *
 	 * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the fraction
 	 * of LHS rows that have matches, and we apply that straightforwardly.
+	 *
+	 * For JOIN_SEMI_LEFT, the selectivity is defined as the fraction of the
+	 * LHS rows that have matches, although unlike JOIN_SEMI we must consider
+	 * NULL RHS rows, and take the higher estimate of the two.
 	 */
 	switch (jointype)
 	{
@@ -3993,6 +4009,12 @@ calc_joinrel_size_estimate(PlannerInfo *root,
 			nrows = outer_rows * jselec;
 			/* pselec not used */
 			break;
+		case JOIN_SEMI_LEFT:
+			nrows = outer_rows * jselec;
+			if (nrows < outer_rows)
+				nrows = outer_rows;
+			nrows *= pselec;
+			break;
 		case JOIN_ANTI:
 			nrows = outer_rows * (1.0 - jselec);
 			nrows *= pselec;
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index f3aced3..cfdda7b 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -19,6 +19,7 @@
 #include "executor/executor.h"
 #include "foreign/fdwapi.h"
 #include "optimizer/cost.h"
+#include "optimizer/planmain.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 
@@ -28,6 +29,16 @@ set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
 #define PATH_PARAM_BY_REL(path, rel)  \
 	((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
 
+static bool is_innerrel_unique_for(PlannerInfo *root,
+					   RelOptInfo *outerrel,
+					   RelOptInfo *innerrel,
+					   List *restrictlist);
+static JoinType get_optimal_jointype(PlannerInfo *root,
+					 RelOptInfo *outerrel,
+					 RelOptInfo *innerrel,
+					 JoinType jointype,
+					 SpecialJoinInfo *sjinfo,
+					 List *restrictlist);
 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
 					 JoinType jointype, JoinPathExtraData *extra);
@@ -50,7 +61,176 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
 						 List *restrictlist,
 						 JoinType jointype,
 						 bool *mergejoin_allowed);
+static inline bool clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
+						RelOptInfo *innerrel);
 
+/*
+ * is_innerrel_unique_for
+ *	  Determine if this innerrel can, at most, return a single tuple for each
+ *	  outer tuple, based on the 'restrictlist'.
+ */
+static bool
+is_innerrel_unique_for(PlannerInfo *root,
+					   RelOptInfo *outerrel,
+					   RelOptInfo *innerrel,
+					   List *restrictlist)
+{
+	bool		is_unique;
+	int			org_len;
+	ListCell   *lc;
+
+	if (restrictlist == NIL ||
+		!rel_supports_distinctness(root, innerrel))
+		return false;
+
+	/*
+	 * Remember the number of items that were in the restrictlist as the call
+	 * to relation_has_unique_index_for may add more items which we'll need to
+	 * remove later.
+	 */
+	org_len = list_length(restrictlist);
+
+	/*
+	 * rel_is_distinct_for requires restrict infos to have the correct clause
+	 * direction info
+	 */
+	foreach(lc, restrictlist)
+	{
+		clause_sides_match_join((RestrictInfo *) lfirst(lc), outerrel,
+								innerrel);
+	}
+
+	/* Let rel_is_distinct_for() do the hard work */
+	is_unique = rel_is_distinct_for(root, innerrel, restrictlist);
+
+	/* Remove any list items added by rel_is_distinct_for */
+	list_truncate(restrictlist, org_len);
+
+	return is_unique;
+}
+
+/*
+ * get_optimal_jointype
+ *	  We may be able to optimize some joins by converting the JoinType to one
+ *	  which the executor is able to run more efficiently. Here we look for
+ *	  such cases and if we find a better choice, then we'll return it,
+ *	  otherwise we'll return the original JoinType.
+ */
+static JoinType
+get_optimal_jointype(PlannerInfo *root,
+					 RelOptInfo *outerrel,
+					 RelOptInfo *innerrel,
+					 JoinType jointype,
+					 SpecialJoinInfo *sjinfo,
+					 List *restrictlist)
+{
+	int			innerrelid;
+
+	/*
+	 * LEFT JOINs in which we have proved the inner side to be unique can be
+	 * converted into JOIN_SEMI_LEFT.
+	 */
+	if (jointype == JOIN_LEFT)
+	{
+		if (sjinfo->is_unique_join)
+			return JOIN_SEMI_LEFT;
+		else
+			return JOIN_LEFT;
+	}
+
+	if (!bms_get_singleton_member(innerrel->relids, &innerrelid))
+		return jointype;
+
+	/*
+	 * Any INNER JOINs which can be proven to return at most one inner tuple
+	 * for each outer tuple can be converted in to a JOIN_SEMI.
+	 */
+	if (jointype == JOIN_INNER)
+	{
+		MemoryContext old_context;
+		ListCell   *lc;
+
+		/* can't optimize jointype with an empty restrictlist */
+		if (restrictlist == NIL)
+			return jointype;
+
+		/*
+		 * First let's query the unique and non-unique caches to see if we've
+		 * managed to prove that innerrel is unique for some subset of this
+		 * outerrel. We don't need an exact match, as if we have any extra
+		 * outerrels than were previously cached, then they can't make the
+		 * innerrel any less unique.
+		 */
+		foreach(lc, root->unique_rels[innerrelid])
+		{
+			Bitmapset  *unique_rels = (Bitmapset *) lfirst(lc);
+
+			if (bms_is_subset(unique_rels, outerrel->relids))
+			{
+				/* ensure is_innerrel_unique_for() agrees */
+				Assert(is_innerrel_unique_for(root, outerrel, innerrel,
+											  restrictlist));
+
+				return JOIN_SEMI;		/* Success! */
+			}
+		}
+
+		/*
+		 * We may have previously determined that this outerrel, or some
+		 * superset thereof, cannot prove this innerrel to be unique.
+		 */
+		foreach(lc, root->non_unique_rels[innerrelid])
+		{
+			Bitmapset  *unique_rels = (Bitmapset *) lfirst(lc);
+
+			if (bms_is_subset(outerrel->relids, unique_rels))
+			{
+				/* ensure is_innerrel_unique_for() agrees */
+				Assert(!is_innerrel_unique_for(root, outerrel, innerrel,
+											   restrictlist));
+
+				return jointype;
+			}
+		}
+
+		/*
+		 * We may be getting called from the geqo and might not be working in
+		 * the standard planner's memory context, so let's ensure we are.
+		 */
+		old_context = MemoryContextSwitchTo(root->planner_cxt);
+
+		if (is_innerrel_unique_for(root, outerrel, innerrel, restrictlist))
+		{
+			/*
+			 * XXX Should we attempt to get the minimum set of outerrels which
+			 * proved this innerrel to be unique? If we did this then we might
+			 * be able to make a few more cases unique that we otherwise
+			 * couldn't. However, the standard join search always start with
+			 * fewer rels anyway, so this may not matter, although perhaps we
+			 * should be more aggressive to make this work better with the
+			 * geqo?
+			 */
+
+			/* cache the result for next time */
+			root->unique_rels[innerrelid] =
+				lappend(root->unique_rels[innerrelid], outerrel->relids);
+
+			jointype = JOIN_SEMI;		/* Success! */
+		}
+		else
+		{
+			/*
+			 * None of outerrel helped prove innerrel unique, so we can safely
+			 * reject this rel, or a subset of this rel in future checks.
+			 */
+			root->non_unique_rels[innerrelid] =
+				lappend(root->non_unique_rels[innerrelid], outerrel->relids);
+		}
+
+		MemoryContextSwitchTo(old_context);
+	}
+	return jointype;
+}
 
 /*
  * add_paths_to_joinrel
@@ -88,6 +268,13 @@ add_paths_to_joinrel(PlannerInfo *root,
 	bool		mergejoin_allowed = true;
 	ListCell   *lc;
 
+	/*
+	 * There may be a more optimal JoinType to use. Check for such cases
+	 * first.
+	 */
+	jointype = get_optimal_jointype(root, outerrel, innerrel, jointype, sjinfo,
+									restrictlist);
+
 	extra.restrictlist = restrictlist;
 	extra.mergeclause_list = NIL;
 	extra.sjinfo = sjinfo;
@@ -109,10 +296,12 @@ add_paths_to_joinrel(PlannerInfo *root,
 														  &mergejoin_allowed);
 
 	/*
-	 * If it's SEMI or ANTI join, compute correction factors for cost
-	 * estimation.  These will be the same for all paths.
+	 * If it's SEMI, SEMI_LEFT or ANTI join, compute correction factors for
+	 * cost estimation.  These will be the same for all paths.
 	 */
-	if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
+	if (jointype == JOIN_SEMI ||
+		jointype == JOIN_SEMI_LEFT ||
+		jointype == JOIN_ANTI)
 		compute_semi_anti_join_factors(root, outerrel, innerrel,
 									   jointype, sjinfo, restrictlist,
 									   &extra.semifactors);
@@ -827,16 +1016,17 @@ match_unsorted_outer(PlannerInfo *root,
 	ListCell   *lc1;
 
 	/*
-	 * Nestloop only supports inner, left, semi, and anti joins.  Also, if we
-	 * are doing a right or full mergejoin, we must use *all* the mergeclauses
-	 * as join clauses, else we will not have a valid plan.  (Although these
-	 * two flags are currently inverses, keep them separate for clarity and
-	 * possible future changes.)
+	 * Nestloop only supports inner, left, semi_left, semi, and anti joins.
+	 * Also, if we are doing a right or full mergejoin, we must use *all* the
+	 * mergeclauses as join clauses, else we will not have a valid plan.
+	 * (Although these two flags are currently inverses, keep them separate
+	 * for clarity and possible future changes.)
 	 */
 	switch (jointype)
 	{
 		case JOIN_INNER:
 		case JOIN_LEFT:
+		case JOIN_SEMI_LEFT:
 		case JOIN_SEMI:
 		case JOIN_ANTI:
 			nestjoinOK = true;
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 01d4fea..b91cb79 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -490,10 +490,12 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 			/*
 			 * The proposed join could still be legal, but only if we're
 			 * allowed to associate it into the RHS of this SJ.  That means
-			 * this SJ must be a LEFT join (not SEMI or ANTI, and certainly
-			 * not FULL) and the proposed join must not overlap the LHS.
+			 * this SJ must be a LEFT or SEMI_LEFT join (not SEMI or ANTI, and
+			 * certainly not FULL) and the proposed join must not overlap the
+			 * LHS.
 			 */
-			if (sjinfo->jointype != JOIN_LEFT ||
+			if ((sjinfo->jointype != JOIN_LEFT &&
+				 sjinfo->jointype != JOIN_SEMI_LEFT) ||
 				bms_overlap(joinrelids, sjinfo->min_lefthand))
 				return false;	/* invalid join path */
 
@@ -508,8 +510,8 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 	}
 
 	/*
-	 * Fail if violated any SJ's RHS and didn't match to a LEFT SJ: the
-	 * proposed join can't associate into an SJ's RHS.
+	 * Fail if violated any SJ's RHS and didn't match to a LEFT or SEMI_LEFT
+	 * SJ: the proposed join can't associate into an SJ's RHS.
 	 *
 	 * Also, fail if the proposed join's predicate isn't strict; we're
 	 * essentially checking to see if we can apply outer-join identity 3, and
@@ -518,7 +520,8 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 	 */
 	if (must_be_leftjoin &&
 		(match_sjinfo == NULL ||
-		 match_sjinfo->jointype != JOIN_LEFT ||
+		 (match_sjinfo->jointype != JOIN_LEFT &&
+		  match_sjinfo->jointype != JOIN_SEMI_LEFT) ||
 		 !match_sjinfo->lhs_strict))
 		return false;			/* invalid join path */
 
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index f7f5714..7446eb5 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -34,11 +34,37 @@
 
 /* local functions */
 static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
+static bool specialjoin_is_unique_join(PlannerInfo *root,
+						   SpecialJoinInfo *sjinfo);
 static void remove_rel_from_query(PlannerInfo *root, int relid,
 					  Relids joinrelids);
 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
 static Oid	distinct_col_search(int colno, List *colnos, List *opids);
 
+/*
+ * mark_unique_joins
+ *		Analyze joins in order to determine if their inner side is unique based
+ *		on the join condition.
+ */
+void
+mark_unique_joins(PlannerInfo *root, List *joinlist)
+{
+	ListCell   *lc;
+
+	foreach(lc, root->join_info_list)
+	{
+		SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+
+		/*
+		 * Currently we're only interested in LEFT JOINs that have not already
+		 * been marked as unique by a previous call.
+		 */
+		if (sjinfo->jointype == JOIN_LEFT &&
+			!sjinfo->is_unique_join &&
+			specialjoin_is_unique_join(root, sjinfo))
+			sjinfo->is_unique_join = true;
+	}
+}
 
 /*
  * remove_useless_joins
@@ -92,6 +118,12 @@ restart:
 		root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
 
 		/*
+		 * We may now be able to mark some joins as unique which we could not
+		 * do before
+		 */
+		mark_unique_joins(root, joinlist);
+
+		/*
 		 * Restart the scan.  This is necessary to ensure we find all
 		 * removable joins independently of ordering of the join_info_list
 		 * (note that removal of attr_needed bits may make a join appear
@@ -152,17 +184,17 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 {
 	int			innerrelid;
 	RelOptInfo *innerrel;
-	Query	   *subquery = NULL;
 	Relids		joinrelids;
-	List	   *clause_list = NIL;
-	ListCell   *l;
 	int			attroff;
+	ListCell   *l;
 
 	/*
-	 * Must be a non-delaying left join to a single baserel, else we aren't
-	 * going to be able to do anything with it.
+	 * Join must not duplicate its outer side and must be a non-delaying left
+	 * join to a single baserel, else we aren't going to be able to do
+	 * anything with it.
 	 */
-	if (sjinfo->jointype != JOIN_LEFT ||
+	if (!sjinfo->is_unique_join ||
+		sjinfo->jointype != JOIN_LEFT ||
 		sjinfo->delay_upper_joins)
 		return false;
 
@@ -171,38 +203,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 
 	innerrel = find_base_rel(root, innerrelid);
 
-	if (innerrel->reloptkind != RELOPT_BASEREL)
-		return false;
-
-	/*
-	 * Before we go to the effort of checking whether any innerrel variables
-	 * are needed above the join, make a quick check to eliminate cases in
-	 * which we will surely be unable to prove uniqueness of the innerrel.
-	 */
-	if (innerrel->rtekind == RTE_RELATION)
-	{
-		/*
-		 * For a plain-relation innerrel, we only know how to prove uniqueness
-		 * by reference to unique indexes.  If there are no indexes then
-		 * there's certainly no unique indexes so there's no point in going
-		 * further.
-		 */
-		if (innerrel->indexlist == NIL)
-			return false;
-	}
-	else if (innerrel->rtekind == RTE_SUBQUERY)
-	{
-		subquery = root->simple_rte_array[innerrelid]->subquery;
-
-		/*
-		 * If the subquery has no qualities that support distinctness proofs
-		 * then there's no point in going further.
-		 */
-		if (!query_supports_distinctness(subquery))
-			return false;
-	}
-	else
-		return false;			/* unsupported rtekind */
+	/* Must be true as is_unique_join can only be set to true for base rels */
+	Assert(innerrel->reloptkind == RELOPT_BASEREL);
 
 	/* Compute the relid set for the join we are considering */
 	joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
@@ -213,7 +215,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	 *
 	 * Note that this test only detects use of inner-rel attributes in higher
 	 * join conditions and the target list.  There might be such attributes in
-	 * pushed-down conditions at this join, too.  We check that case below.
+	 * pushed-down conditions at this join, too, but in this case the join
+	 * would not have been marked as unique.
 	 *
 	 * As a micro-optimization, it seems better to start with max_attr and
 	 * count down rather than starting with min_attr and counting up, on the
@@ -254,6 +257,44 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 			return false;		/* it does reference innerrel */
 	}
 
+	return true;
+}
+
+/*
+ * specialjoin_is_unique_join
+ *		True if it can be proved that this special join can only ever match at
+ *		most one inner row for any single outer row. False is returned if
+ *		there's insufficient evidence to prove the join is unique.
+ */
+static bool
+specialjoin_is_unique_join(PlannerInfo *root, SpecialJoinInfo *sjinfo)
+{
+	int			innerrelid;
+	RelOptInfo *innerrel;
+	Relids		joinrelids;
+	ListCell   *l;
+	List	   *clause_list = NIL;
+
+	/* if there's more than one relation involved, then punt */
+	if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
+		return false;
+
+	innerrel = find_base_rel(root, innerrelid);
+
+	if (innerrel->reloptkind != RELOPT_BASEREL)
+		return false;
+
+	/*
+	 * Before we go to the effort of pulling out the join condition's columns,
+	 * make a quick check to eliminate cases in which we will surely be unable
+	 * to prove uniqueness of the innerrel.
+	 */
+	if (!rel_supports_distinctness(root, innerrel))
+		return false;
+
+	/* Compute the relid set for the join we are considering */
+	joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+
 	/*
 	 * Search for mergejoinable clauses that constrain the inner rel against
 	 * either the outer rel or a pseudoconstant.  If an operator is
@@ -275,10 +316,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 			!bms_equal(restrictinfo->required_relids, joinrelids))
 		{
 			/*
-			 * If such a clause actually references the inner rel then join
-			 * removal has to be disallowed.  We have to check this despite
-			 * the previous attr_needed checks because of the possibility of
-			 * pushed-down clauses referencing the rel.
+			 * If such a clause actually references the inner rel then we
+			 * can't mark the join as unique.
 			 */
 			if (bms_is_member(innerrelid, restrictinfo->clause_relids))
 				return false;
@@ -301,71 +340,9 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 		clause_list = lappend(clause_list, restrictinfo);
 	}
 
-	/*
-	 * relation_has_unique_index_for automatically adds any usable restriction
-	 * clauses for the innerrel, so we needn't do that here.  (XXX we are not
-	 * considering restriction clauses for subqueries; is that worth doing?)
-	 */
-
-	if (innerrel->rtekind == RTE_RELATION)
-	{
-		/* Now examine the indexes to see if we have a matching unique index */
-		if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL))
-			return true;
-	}
-	else	/* innerrel->rtekind == RTE_SUBQUERY */
-	{
-		List	   *colnos = NIL;
-		List	   *opids = NIL;
-
-		/*
-		 * Build the argument lists for query_is_distinct_for: a list of
-		 * output column numbers that the query needs to be distinct over, and
-		 * a list of equality operators that the output columns need to be
-		 * distinct according to.
-		 */
-		foreach(l, clause_list)
-		{
-			RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
-			Oid			op;
-			Var		   *var;
-
-			/*
-			 * Get the equality operator we need uniqueness according to.
-			 * (This might be a cross-type operator and thus not exactly the
-			 * same operator the subquery would consider; that's all right
-			 * since query_is_distinct_for can resolve such cases.)  The
-			 * mergejoinability test above should have selected only OpExprs.
-			 */
-			Assert(IsA(rinfo->clause, OpExpr));
-			op = ((OpExpr *) rinfo->clause)->opno;
-
-			/* clause_sides_match_join identified the inner side for us */
-			if (rinfo->outer_is_left)
-				var = (Var *) get_rightop(rinfo->clause);
-			else
-				var = (Var *) get_leftop(rinfo->clause);
-
-			/*
-			 * If inner side isn't a Var referencing a subquery output column,
-			 * this clause doesn't help us.
-			 */
-			if (!var || !IsA(var, Var) ||
-				var->varno != innerrelid || var->varlevelsup != 0)
-				continue;
-
-			colnos = lappend_int(colnos, var->varattno);
-			opids = lappend_oid(opids, op);
-		}
-
-		if (query_is_distinct_for(subquery, colnos, opids))
-			return true;
-	}
+	if (rel_is_distinct_for(root, innerrel, clause_list))
+		return true;
 
-	/*
-	 * Some day it would be nice to check for other methods of establishing
-	 * distinctness.
-	 */
 	return false;
 }
 
@@ -560,6 +537,127 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
 	return result;
 }
 
+/*
+ * rel_is_distinct_for
+ *		Returns True if rel can be proved to be distinct over clause_list
+ *
+ * Note: We expect clause_list to be already processed to check if the
+ * RestrictInfos are in the form "outerrel_expr op innerrel_expr" or
+ * "innerrel_expr op outerrel_expr".
+ *
+ * Note: this method may add items to clause_list, callers should either
+ * make a copy of the list or trim it back to it's original length after
+ * calling this function.
+ */
+bool
+rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
+{
+	int			relid = rel->relid;
+
+	/*
+	 * relation_has_unique_index_for automatically adds any usable restriction
+	 * clauses for the rel, so we needn't do that here.  (XXX we are not
+	 * considering restriction clauses for subqueries; is that worth doing?)
+	 */
+	if (rel->rtekind == RTE_RELATION)
+	{
+		/* Now examine the indexes to see if we have a matching unique index */
+		if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
+			return true;
+	}
+	else if (rel->rtekind == RTE_SUBQUERY)
+	{
+		List	   *colnos = NIL;
+		List	   *opids = NIL;
+		ListCell   *l;
+		Query	   *subquery = root->simple_rte_array[relid]->subquery;
+
+		/*
+		 * Build the argument lists for query_is_distinct_for: a list of
+		 * output column numbers that the query needs to be distinct over, and
+		 * a list of equality operators that the output columns need to be
+		 * distinct according to.
+		 */
+		foreach(l, clause_list)
+		{
+			RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+			Oid			op;
+			Var		   *var;
+
+			if (!IsA(rinfo->clause, OpExpr))
+				continue;
+
+			/*
+			 * Get the equality operator we need uniqueness according to.
+			 * (This might be a cross-type operator and thus not exactly the
+			 * same operator the subquery would consider; that's all right
+			 * since query_is_distinct_for can resolve such cases.)  The
+			 * mergejoinability test above should have selected only OpExprs.
+			 */
+			op = ((OpExpr *) rinfo->clause)->opno;
+
+			/* clause_sides_match_join identified the inner side for us */
+			if (rinfo->outer_is_left)
+				var = (Var *) get_rightop(rinfo->clause);
+			else
+				var = (Var *) get_leftop(rinfo->clause);
+
+			/*
+			 * If inner side isn't a Var referencing a subquery output column,
+			 * this clause doesn't help us.
+			 */
+			if (!var || !IsA(var, Var) ||
+				var->varno != relid || var->varlevelsup != 0)
+				continue;
+
+			colnos = lappend_int(colnos, var->varattno);
+			opids = lappend_oid(opids, op);
+		}
+
+		if (query_is_distinct_for(subquery, colnos, opids))
+			return true;
+	}
+	return false;				/* can't prove rel to be distinct over
+								 * clause_list */
+}
+
+/*
+ * rel_supports_distinctness
+ *		Returns true if rel has some properties which can prove the relation
+ *		to be unique over some set of columns.
+ *
+ * This is effectively a pre-checking function for rel_is_distinct_for().
+ * It must return TRUE if rel_is_distinct_for() could possibly return TRUE
+ */
+bool
+rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
+{
+	if (rel->rtekind == RTE_RELATION)
+	{
+		/*
+		 * For a plain-relation, we only know how to prove uniqueness by
+		 * reference to unique indexes.  If there are no indexes then there's
+		 * certainly no unique indexes so there's nothing to prove uniqueness
+		 * on the relation.
+		 */
+		if (rel->indexlist != NIL)
+			return true;
+	}
+	else if (rel->rtekind == RTE_SUBQUERY)
+	{
+		Query	   *subquery = root->simple_rte_array[rel->relid]->subquery;
+
+		/* Check if the subquery has any qualities that support distinctness */
+		if (query_supports_distinctness(subquery))
+			return true;
+	}
+
+	/*
+	 * Some day it would be nice to check for other methods of establishing
+	 * distinctness.
+	 */
+	return false;
+}
 
 /*
  * query_supports_distinctness - could the query possibly be proven distinct
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 9999eea..e90f358 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -1130,6 +1130,7 @@ make_outerjoininfo(PlannerInfo *root,
 	sjinfo->jointype = jointype;
 	/* this always starts out false */
 	sjinfo->delay_upper_joins = false;
+	sjinfo->is_unique_join = false;
 
 	compute_semijoin_info(sjinfo, clause);
 
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index 88d7ea4..efaa655 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -185,6 +185,9 @@ query_planner(PlannerInfo *root, List *tlist,
 	 */
 	fix_placeholder_input_needed_levels(root);
 
+	/* Analyze joins to find out which ones have a unique inner side */
+	mark_unique_joins(root, joinlist);
+
 	/*
 	 * Remove any useless outer joins.  Ideally this would be done during
 	 * jointree preprocessing, but the necessary information isn't available
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index dd2b9ed..fe6f789 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -1617,6 +1617,7 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset)
 	switch (join->jointype)
 	{
 		case JOIN_LEFT:
+		case JOIN_SEMI_LEFT:
 		case JOIN_SEMI:
 		case JOIN_ANTI:
 			inner_itlist->has_non_vars = false;
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 6f24b03..f1aeb9b 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -71,6 +71,14 @@ setup_simple_rel_arrays(PlannerInfo *root)
 	/* simple_rte_array is an array equivalent of the rtable list */
 	root->simple_rte_array = (RangeTblEntry **)
 		palloc0(root->simple_rel_array_size * sizeof(RangeTblEntry *));
+
+	/* initialize the unique relation caches */
+	root->unique_rels = (List **)
+		palloc0(root->simple_rel_array_size * sizeof(List *));
+
+	root->non_unique_rels = (List **)
+		palloc0(root->simple_rel_array_size * sizeof(List *));
+
 	rti = 1;
 	foreach(lc, root->parse->rtable)
 	{
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index c9edd88..28c2db0 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -978,11 +978,11 @@ transformFromClauseItem(ParseState *pstate, Node *n,
 		/*
 		 * Make the left-side RTEs available for LATERAL access within the
 		 * right side, by temporarily adding them to the pstate's namespace
-		 * list.  Per SQL:2008, if the join type is not INNER or LEFT then the
-		 * left-side names must still be exposed, but it's an error to
-		 * reference them.  (Stupid design, but that's what it says.)  Hence,
-		 * we always push them into the namespace, but mark them as not
-		 * lateral_ok if the jointype is wrong.
+		 * list.  Per SQL:2008, if the join type is not INNER, LEFT or
+		 * SEMI_LEFT  then the left-side names must still be exposed, but it's
+		 * an error to reference them.  (Stupid design, but that's what it
+		 * says.)  Hence, we always push them into the namespace, but mark
+		 * them as not lateral_ok if the jointype is wrong.
 		 *
 		 * Notice that we don't require the merged namespace list to be
 		 * conflict-free.  See the comments for scanNameSpaceForRefname().
@@ -990,7 +990,9 @@ transformFromClauseItem(ParseState *pstate, Node *n,
 		 * NB: this coding relies on the fact that list_concat is not
 		 * destructive to its second argument.
 		 */
-		lateral_ok = (j->jointype == JOIN_INNER || j->jointype == JOIN_LEFT);
+		lateral_ok = (j->jointype == JOIN_INNER ||
+					  j->jointype == JOIN_LEFT ||
+					  j->jointype == JOIN_SEMI_LEFT);
 		setNamespaceLateralState(l_namespace, true, lateral_ok);
 
 		sv_namespace_length = list_length(pstate->p_namespace);
diff --git a/src/backend/utils/adt/network_selfuncs.c b/src/backend/utils/adt/network_selfuncs.c
index 2e39687..4250325 100644
--- a/src/backend/utils/adt/network_selfuncs.c
+++ b/src/backend/utils/adt/network_selfuncs.c
@@ -216,6 +216,7 @@ networkjoinsel(PG_FUNCTION_ARGS)
 	{
 		case JOIN_INNER:
 		case JOIN_LEFT:
+		case JOIN_SEMI_LEFT:	/* XXX belongs here, or with SEMI/ANTI? */
 		case JOIN_FULL:
 
 			/*
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index a6555e9..68aedfa 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -2202,6 +2202,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
 	{
 		case JOIN_INNER:
 		case JOIN_LEFT:
+		case JOIN_SEMI_LEFT: /* XXX belongs here, or with SEMI/ANTI? */
 		case JOIN_FULL:
 			selec = eqjoinsel_inner(operator, &vardata1, &vardata2);
 			break;
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 734df77..87695b1 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -638,6 +638,14 @@ typedef enum JoinType
 	JOIN_ANTI,					/* 1 copy of each LHS row that has no match */
 
 	/*
+	 * The following join type is a variant of JOIN_LEFT for when the inner
+	 * side of the join is known to be unique. This serves solely as an
+	 * optimization to allow the executor to skip looking for another matching
+	 * tuple in the inner side, when it's known that another cannot exist.
+	 */
+	JOIN_SEMI_LEFT,
+
+	/*
 	 * These codes are used internally in the planner, but are not supported
 	 * by the executor (nor, indeed, by most of the planner).
 	 */
@@ -666,6 +674,7 @@ typedef enum JoinType
 #define IS_OUTER_JOIN(jointype) \
 	(((1 << (jointype)) & \
 	  ((1 << JOIN_LEFT) | \
+	   (1 << JOIN_SEMI_LEFT) | \
 	   (1 << JOIN_FULL) | \
 	   (1 << JOIN_RIGHT) | \
 	   (1 << JOIN_ANTI))) != 0)
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index d39c73b..85ee975 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -221,6 +221,18 @@ typedef struct PlannerInfo
 	List	  **join_rel_level; /* lists of join-relation RelOptInfos */
 	int			join_cur_level; /* index of list being extended */
 
+	/*
+	 * During the join search we attempt to optimize joins to try to prove
+	 * their inner side to be unique based on the join condition. This is a
+	 * rather expensive thing to do as it requires checking each relations
+	 * unique indexes to see if the relation can, at most, return one tuple
+	 * for each outer tuple. We use this cache during the join search to
+	 * record lists of the sets of relations which both prove, and disprove
+	 * the uniqueness properties for the relid indexed by these arrays.
+	 */
+	List	  **unique_rels;	/* cache for proven unique rels */
+	List	  **non_unique_rels;	/* cache for proven non-unique rels */
+
 	List	   *init_plans;		/* init SubPlans for query */
 
 	List	   *cte_plan_ids;	/* per-CTE-item list of subplan IDs */
@@ -1729,6 +1741,7 @@ typedef struct SpecialJoinInfo
 	JoinType	jointype;		/* always INNER, LEFT, FULL, SEMI, or ANTI */
 	bool		lhs_strict;		/* joinclause is strict for some LHS rel */
 	bool		delay_upper_joins;		/* can't commute with upper RHS */
+	bool		is_unique_join; /* matches a max of 1 row per outer join row */
 	/* Remaining fields are set only for JOIN_SEMI jointype: */
 	bool		semi_can_btree; /* true if semi_operators are all btree */
 	bool		semi_can_hash;	/* true if semi_operators are all hash */
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 1f96e27..3c0dcbd 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -98,7 +98,11 @@ extern RestrictInfo *build_implied_join_equality(Oid opno,
 /*
  * prototypes for plan/analyzejoins.c
  */
+extern void mark_unique_joins(PlannerInfo *root, List *joinlist);
 extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
+extern bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
+								List *clause_list);
+extern bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
 extern bool query_supports_distinctness(Query *query);
 extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
 
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 3ff6691..952833f 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -880,29 +880,31 @@ explain (costs off) select a,c from t1 group by a,c,d;
 explain (costs off) select *
 from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y
 group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z;
-                      QUERY PLAN                       
--------------------------------------------------------
- Group
+                      QUERY PLAN                      
+------------------------------------------------------
+ HashAggregate
    Group Key: t1.a, t1.b, t2.x, t2.y
-   ->  Merge Join
-         Merge Cond: ((t1.a = t2.x) AND (t1.b = t2.y))
-         ->  Index Scan using t1_pkey on t1
-         ->  Index Scan using t2_pkey on t2
-(6 rows)
+   ->  Hash Semi Join
+         Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
+         ->  Seq Scan on t2
+         ->  Hash
+               ->  Seq Scan on t1
+(7 rows)
 
 -- Test case where t1 can be optimized but not t2
 explain (costs off) select t1.*,t2.x,t2.z
 from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y
 group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z;
-                      QUERY PLAN                       
--------------------------------------------------------
+                      QUERY PLAN                      
+------------------------------------------------------
  HashAggregate
    Group Key: t1.a, t1.b, t2.x, t2.z
-   ->  Merge Join
-         Merge Cond: ((t1.a = t2.x) AND (t1.b = t2.y))
-         ->  Index Scan using t1_pkey on t1
-         ->  Index Scan using t2_pkey on t2
-(6 rows)
+   ->  Hash Semi Join
+         Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
+         ->  Seq Scan on t2
+         ->  Hash
+               ->  Seq Scan on t1
+(7 rows)
 
 -- Cannot optimize when PK is deferrable
 explain (costs off) select * from t3 group by a,b,c;
diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out
index 0391b8e..aca998c 100644
--- a/src/test/regress/expected/equivclass.out
+++ b/src/test/regress/expected/equivclass.out
@@ -186,7 +186,7 @@ explain (costs off)
   select * from ec1, ec2 where ff = x1 and x1 = '42'::int8alias2;
                QUERY PLAN                
 -----------------------------------------
- Nested Loop
+ Nested Loop Semi Join
    ->  Seq Scan on ec2
          Filter: (x1 = '42'::int8alias2)
    ->  Index Scan using ec1_pkey on ec1
@@ -310,7 +310,7 @@ explain (costs off)
          ->  Index Scan using ec1_expr3 on ec1 ec1_5
          ->  Index Scan using ec1_expr4 on ec1 ec1_6
    ->  Materialize
-         ->  Merge Join
+         ->  Merge Semi Join
                Merge Cond: ((((ec1_1.ff + 2) + 1)) = ec1.f1)
                ->  Merge Append
                      Sort Key: (((ec1_1.ff + 2) + 1))
@@ -365,7 +365,7 @@ explain (costs off)
   where ss1.x = ec1.f1 and ec1.ff = 42::int8;
                      QUERY PLAN                      
 -----------------------------------------------------
- Merge Join
+ Merge Semi Join
    Merge Cond: ((((ec1_1.ff + 2) + 1)) = ec1.f1)
    ->  Merge Append
          Sort Key: (((ec1_1.ff + 2) + 1))
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index cafbc5e..2e867fa 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2756,8 +2756,8 @@ from nt3 as nt3
 where nt3.id = 1 and ss2.b3;
                   QUERY PLAN                   
 -----------------------------------------------
- Nested Loop
-   ->  Nested Loop
+ Nested Loop Semi Join
+   ->  Nested Loop Semi Join
          ->  Index Scan using nt3_pkey on nt3
                Index Cond: (id = 1)
          ->  Index Scan using nt2_pkey on nt2
@@ -4035,7 +4035,7 @@ explain (costs off)
     on (p.k = ss.k);
            QUERY PLAN            
 ---------------------------------
- Hash Left Join
+ Hash Semi Left Join
    Hash Cond: (p.k = c.k)
    ->  Seq Scan on parent p
    ->  Hash
@@ -5260,3 +5260,238 @@ ERROR:  invalid reference to FROM-clause entry for table "xx1"
 LINE 1: ...xx1 using lateral (select * from int4_tbl where f1 = x1) ss;
                                                                 ^
 HINT:  There is an entry for table "xx1", but it cannot be referenced from this part of the query.
+--
+-- test planner's ability to change joins into their appropriate semi join
+-- type
+--
+create table j1 (id int primary key);
+create table j2 (id int primary key);
+create table j3 (id int);
+insert into j1 values(1),(2),(3);
+insert into j2 values(1),(2),(3);
+insert into j3 values(1),(1);
+analyze j1;
+analyze j2;
+analyze j3;
+-- ensure join is changed to a semi join
+explain (verbose, costs off)
+select * from j1 inner join j2 on j1.id = j2.id;
+            QUERY PLAN             
+-----------------------------------
+ Hash Semi Join
+   Output: j1.id, j2.id
+   Hash Cond: (j1.id = j2.id)
+   ->  Seq Scan on public.j1
+         Output: j1.id
+   ->  Hash
+         Output: j2.id
+         ->  Seq Scan on public.j2
+               Output: j2.id
+(9 rows)
+
+-- ensure join not changed when not an equi-join
+explain (verbose, costs off)
+select * from j1 inner join j2 on j1.id > j2.id;
+            QUERY PLAN             
+-----------------------------------
+ Nested Loop
+   Output: j1.id, j2.id
+   Join Filter: (j1.id > j2.id)
+   ->  Seq Scan on public.j1
+         Output: j1.id
+   ->  Materialize
+         Output: j2.id
+         ->  Seq Scan on public.j2
+               Output: j2.id
+(9 rows)
+
+-- don't change, as j3 has no unique index or pk on id
+explain (verbose, costs off)
+select * from j1 inner join j3 on j1.id = j3.id;
+            QUERY PLAN             
+-----------------------------------
+ Hash Semi Join
+   Output: j1.id, j3.id
+   Hash Cond: (j3.id = j1.id)
+   ->  Seq Scan on public.j3
+         Output: j3.id
+   ->  Hash
+         Output: j1.id
+         ->  Seq Scan on public.j1
+               Output: j1.id
+(9 rows)
+
+-- ensure left join is converted to left semi join
+explain (verbose, costs off)
+select * from j1 left join j2 on j1.id = j2.id;
+            QUERY PLAN             
+-----------------------------------
+ Hash Semi Left Join
+   Output: j1.id, j2.id
+   Hash Cond: (j1.id = j2.id)
+   ->  Seq Scan on public.j1
+         Output: j1.id
+   ->  Hash
+         Output: j2.id
+         ->  Seq Scan on public.j2
+               Output: j2.id
+(9 rows)
+
+-- ensure right join is converted too
+explain (verbose, costs off)
+select * from j1 right join j2 on j1.id = j2.id;
+            QUERY PLAN             
+-----------------------------------
+ Hash Semi Left Join
+   Output: j1.id, j2.id
+   Hash Cond: (j2.id = j1.id)
+   ->  Seq Scan on public.j2
+         Output: j2.id
+   ->  Hash
+         Output: j1.id
+         ->  Seq Scan on public.j1
+               Output: j1.id
+(9 rows)
+
+-- a clauseless (cross) join can't be converted
+explain (verbose, costs off)
+select * from j1 cross join j2;
+            QUERY PLAN             
+-----------------------------------
+ Nested Loop
+   Output: j1.id, j2.id
+   ->  Seq Scan on public.j1
+         Output: j1.id
+   ->  Materialize
+         Output: j2.id
+         ->  Seq Scan on public.j2
+               Output: j2.id
+(8 rows)
+
+-- ensure a natural join is converted to a semi join
+explain (verbose, costs off)
+select * from j1 natural join j2;
+            QUERY PLAN             
+-----------------------------------
+ Hash Semi Join
+   Output: j1.id
+   Hash Cond: (j1.id = j2.id)
+   ->  Seq Scan on public.j1
+         Output: j1.id
+   ->  Hash
+         Output: j2.id
+         ->  Seq Scan on public.j2
+               Output: j2.id
+(9 rows)
+
+-- ensure distinct clause allows the inner to become a semi join
+explain (verbose, costs off)
+select * from j1
+inner join (select distinct id from j3) j3 on j1.id = j3.id;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Nested Loop Semi Join
+   Output: j1.id, j3.id
+   Join Filter: (j1.id = j3.id)
+   ->  Seq Scan on public.j1
+         Output: j1.id
+   ->  Materialize
+         Output: j3.id
+         ->  Unique
+               Output: j3.id
+               ->  Sort
+                     Output: j3.id
+                     Sort Key: j3.id
+                     ->  Seq Scan on public.j3
+                           Output: j3.id
+(14 rows)
+
+-- ensure group by clause allows the inner to become a semi join
+explain (verbose, costs off)
+select * from j1
+inner join (select id from j3 group by id) j3 on j1.id = j3.id;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Nested Loop Semi Join
+   Output: j1.id, j3.id
+   Join Filter: (j1.id = j3.id)
+   ->  Seq Scan on public.j1
+         Output: j1.id
+   ->  Materialize
+         Output: j3.id
+         ->  Group
+               Output: j3.id
+               Group Key: j3.id
+               ->  Sort
+                     Output: j3.id
+                     Sort Key: j3.id
+                     ->  Seq Scan on public.j3
+                           Output: j3.id
+(15 rows)
+
+-- ensure a full join is not altered
+explain (verbose, costs off)
+select * from j1 full join j2 on j1.id = j2.id;
+            QUERY PLAN             
+-----------------------------------
+ Hash Full Join
+   Output: j1.id, j2.id
+   Hash Cond: (j1.id = j2.id)
+   ->  Seq Scan on public.j1
+         Output: j1.id
+   ->  Hash
+         Output: j2.id
+         ->  Seq Scan on public.j2
+               Output: j2.id
+(9 rows)
+
+drop table j1;
+drop table j2;
+drop table j3;
+-- test a more complex permutations of join conversions
+create table j1 (id1 int, id2 int, primary key(id1,id2));
+create table j2 (id1 int, id2 int, primary key(id1,id2));
+create table j3 (id1 int, id2 int, primary key(id1,id2));
+insert into j1 values(1,1),(2,2);
+insert into j2 values(1,1);
+insert into j3 values(1,1);
+analyze j1;
+analyze j2;
+analyze j3;
+-- ensure there's no join conversion when not all columns which are part of
+-- the unique index are part of the join clause
+explain (verbose, costs off)
+select * from j1
+inner join j2 on j1.id1 = j2.id1;
+                QUERY PLAN                
+------------------------------------------
+ Nested Loop
+   Output: j1.id1, j1.id2, j2.id1, j2.id2
+   Join Filter: (j1.id1 = j2.id1)
+   ->  Seq Scan on public.j2
+         Output: j2.id1, j2.id2
+   ->  Seq Scan on public.j1
+         Output: j1.id1, j1.id2
+(7 rows)
+
+-- ensure inner is converted to semi join when there's multiple columns in the
+-- join condition
+explain (verbose, costs off)
+select * from j1
+inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2;
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Nested Loop Semi Join
+   Output: j1.id1, j1.id2, j2.id1, j2.id2
+   Join Filter: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2))
+   ->  Seq Scan on public.j1
+         Output: j1.id1, j1.id2
+   ->  Materialize
+         Output: j2.id1, j2.id2
+         ->  Seq Scan on public.j2
+               Output: j2.id1, j2.id2
+(9 rows)
+
+drop table j1;
+drop table j2;
+drop table j3;
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
index 067aa8d..3f0c705 100644
--- a/src/test/regress/expected/rowsecurity.out
+++ b/src/test/regress/expected/rowsecurity.out
@@ -276,7 +276,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
 EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);
                      QUERY PLAN                     
 ----------------------------------------------------
- Nested Loop
+ Nested Loop Semi Join
    ->  Subquery Scan on document
          Filter: f_leak(document.dtitle)
          ->  Seq Scan on document document_1
diff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out
index 7f57526..496eb62 100644
--- a/src/test/regress/expected/select_views.out
+++ b/src/test/regress/expected/select_views.out
@@ -1411,7 +1411,7 @@ NOTICE:  f_leak => 9801-2345-6789-0123
 EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_normal WHERE f_leak(cnum);
                        QUERY PLAN                        
 ---------------------------------------------------------
- Hash Join
+ Hash Semi Join
    Hash Cond: (r.cid = l.cid)
    ->  Seq Scan on credit_card r
          Filter: f_leak(cnum)
@@ -1432,7 +1432,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_secure WHERE f_leak(cnum);
 ---------------------------------------------------------------
  Subquery Scan on my_credit_card_secure
    Filter: f_leak(my_credit_card_secure.cnum)
-   ->  Hash Join
+   ->  Hash Semi Join
          Hash Cond: (r.cid = l.cid)
          ->  Seq Scan on credit_card r
          ->  Hash
@@ -1466,7 +1466,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_normal
    ->  Materialize
          ->  Subquery Scan on l
                Filter: f_leak(l.cnum)
-               ->  Hash Join
+               ->  Hash Semi Join
                      Hash Cond: (r_1.cid = l_1.cid)
                      ->  Seq Scan on credit_card r_1
                      ->  Hash
@@ -1497,7 +1497,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM my_credit_card_usage_secure
          ->  Seq Scan on credit_usage r
                Filter: ((ymd >= '10-01-2011'::date) AND (ymd < '11-01-2011'::date))
          ->  Materialize
-               ->  Hash Join
+               ->  Hash Semi Join
                      Hash Cond: (r_1.cid = l.cid)
                      ->  Seq Scan on credit_card r_1
                      ->  Hash
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 3430f91..38cc39c 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1696,3 +1696,96 @@ update xx1 set x2 = f1 from xx1, lateral (select * from int4_tbl where f1 = x1)
 delete from xx1 using (select * from int4_tbl where f1 = x1) ss;
 delete from xx1 using (select * from int4_tbl where f1 = xx1.x1) ss;
 delete from xx1 using lateral (select * from int4_tbl where f1 = x1) ss;
+
+--
+-- test planner's ability to change joins into their appropriate semi join
+-- type
+--
+
+create table j1 (id int primary key);
+create table j2 (id int primary key);
+create table j3 (id int);
+
+insert into j1 values(1),(2),(3);
+insert into j2 values(1),(2),(3);
+insert into j3 values(1),(1);
+
+analyze j1;
+analyze j2;
+analyze j3;
+
+-- ensure join is changed to a semi join
+explain (verbose, costs off)
+select * from j1 inner join j2 on j1.id = j2.id;
+
+-- ensure join not changed when not an equi-join
+explain (verbose, costs off)
+select * from j1 inner join j2 on j1.id > j2.id;
+
+-- don't change, as j3 has no unique index or pk on id
+explain (verbose, costs off)
+select * from j1 inner join j3 on j1.id = j3.id;
+
+-- ensure left join is converted to left semi join
+explain (verbose, costs off)
+select * from j1 left join j2 on j1.id = j2.id;
+
+-- ensure right join is converted too
+explain (verbose, costs off)
+select * from j1 right join j2 on j1.id = j2.id;
+
+-- a clauseless (cross) join can't be converted
+explain (verbose, costs off)
+select * from j1 cross join j2;
+
+-- ensure a natural join is converted to a semi join
+explain (verbose, costs off)
+select * from j1 natural join j2;
+
+-- ensure distinct clause allows the inner to become a semi join
+explain (verbose, costs off)
+select * from j1
+inner join (select distinct id from j3) j3 on j1.id = j3.id;
+
+-- ensure group by clause allows the inner to become a semi join
+explain (verbose, costs off)
+select * from j1
+inner join (select id from j3 group by id) j3 on j1.id = j3.id;
+
+-- ensure a full join is not altered
+explain (verbose, costs off)
+select * from j1 full join j2 on j1.id = j2.id;
+
+drop table j1;
+drop table j2;
+drop table j3;
+
+-- test a more complex permutations of join conversions
+
+create table j1 (id1 int, id2 int, primary key(id1,id2));
+create table j2 (id1 int, id2 int, primary key(id1,id2));
+create table j3 (id1 int, id2 int, primary key(id1,id2));
+
+insert into j1 values(1,1),(2,2);
+insert into j2 values(1,1);
+insert into j3 values(1,1);
+
+analyze j1;
+analyze j2;
+analyze j3;
+
+-- ensure there's no join conversion when not all columns which are part of
+-- the unique index are part of the join clause
+explain (verbose, costs off)
+select * from j1
+inner join j2 on j1.id1 = j2.id1;
+
+-- ensure inner is converted to semi join when there's multiple columns in the
+-- join condition
+explain (verbose, costs off)
+select * from j1
+inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2;
+
+drop table j1;
+drop table j2;
+drop table j3;