From 457becbf1ff49670b4608841654b84c5c75a252e Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Tue, 12 Jun 2018 18:15:22 +0300
Subject: [PATCH 4/4] Plan aggregation in query_planner, to allow aggregating
 below joins.

Consider performing Grouping/Aggregation below Join nodes, if it's legal
to do so. To do this, move the responsibility of planning grouping from
the "upper stages", in grouping_planner, into scan/join planning, in
query_planner().

In query_planner(), after building the RelOptInfo for a scan or join rel,
also build a grouped RelOptInfo to shadow each RelOptInfo (if aggregation
can be done at that rel). The grouped RelOptInfo is stored in a new
'grouped_rel' field in the parent RelOptInfo.

The grouped rel holds Paths where the grouping/aggregation is already
performed at that node. For a base rel, it represents performing the
aggregation on top of the scan, i.e. the Paths contain Agg(Scan). For a
grouped join rel, the paths look like an Agg(Join(A, B)), or Join(Agg(A), B).

This is still a prototype, with a bunch of stuff broken or in need of
cleanup:

- Clarify what all this should mean to the FDW API. I did some hacking
  around in postgres_fdw, to make it work partially, but still lots of
  regression failures there. Need to figure out the details of how
  an FDW should plan grouping nodes in the new world.

- Still one regression failure, in 'partition_aggregate' test.

- grouping_planner() is a bit of a misnomer now, as grouping is now planned
  as part of query_planner().
---
 contrib/postgres_fdw/deparse.c               |  22 +-
 contrib/postgres_fdw/postgres_fdw.c          |   8 +-
 src/backend/optimizer/README                 |  63 ++-
 src/backend/optimizer/geqo/geqo_eval.c       |   3 +
 src/backend/optimizer/path/aggpath.c         | 574 +++++++++++++++++++++++----
 src/backend/optimizer/path/allpaths.c        | 165 ++++++++
 src/backend/optimizer/path/joinrels.c        | 130 ++++++
 src/backend/optimizer/path/pathkeys.c        |   2 +-
 src/backend/optimizer/plan/initsplan.c       | 139 ++++++-
 src/backend/optimizer/plan/planmain.c        |  40 +-
 src/backend/optimizer/plan/planner.c         | 272 ++++---------
 src/backend/optimizer/util/pathnode.c        |  12 +-
 src/backend/optimizer/util/relnode.c         |  46 ++-
 src/include/nodes/relation.h                 |  36 +-
 src/include/optimizer/pathnode.h             |   1 +
 src/include/optimizer/paths.h                |  24 +-
 src/include/optimizer/planmain.h             |   1 +
 src/include/optimizer/planner.h              |   8 +
 src/test/regress/expected/aggregates.out     |  21 +-
 src/test/regress/expected/partition_join.out |   4 +-
 src/test/regress/parallel_schedule           |   2 +-
 src/test/regress/sql/aggregate_pushdown.sql  |  70 ++++
 22 files changed, 1311 insertions(+), 332 deletions(-)
 create mode 100644 src/test/regress/sql/aggregate_pushdown.sql

diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 6b3178350f..f7aa8b9f0e 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -682,7 +682,7 @@ foreign_expr_walker(Node *node,
 				ListCell   *lc;
 
 				/* Not safe to pushdown when not in grouping context */
-				if (!IS_UPPER_REL(glob_cxt->foreignrel))
+				if (!IS_GROUPED_REL(glob_cxt->foreignrel))
 					return false;
 
 				/* Only non-split aggregates are pushable. */
@@ -882,7 +882,7 @@ build_tlist_to_deparse(RelOptInfo *foreignrel)
 	 * For an upper relation, we have already built the target list while
 	 * checking shippability, so just return that.
 	 */
-	if (IS_UPPER_REL(foreignrel))
+	if (IS_GROUPED_REL(foreignrel))
 		return fpinfo->grouped_tlist;
 
 	/*
@@ -959,7 +959,7 @@ deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *rel,
 	 * conditions of the underlying scan relation; otherwise, we can use the
 	 * supplied list of remote conditions directly.
 	 */
-	if (IS_UPPER_REL(rel))
+	if (IS_UPPER_REL(rel) || IS_GROUPED_REL(rel))
 	{
 		PgFdwRelationInfo *ofpinfo;
 
@@ -972,7 +972,7 @@ deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *rel,
 	/* Construct FROM and WHERE clauses */
 	deparseFromExpr(quals, &context);
 
-	if (IS_UPPER_REL(rel))
+	if (IS_GROUPED_REL(rel))
 	{
 		/* Append GROUP BY clause */
 		appendGroupByClause(tlist, &context);
@@ -1029,7 +1029,7 @@ deparseSelectSql(List *tlist, bool is_subquery, List **retrieved_attrs,
 		 */
 		deparseSubqueryTargetList(context);
 	}
-	else if (IS_JOIN_REL(foreignrel) || IS_UPPER_REL(foreignrel))
+	else if (IS_JOIN_REL(foreignrel) || IS_UPPER_REL(foreignrel) || IS_GROUPED_REL(foreignrel))
 	{
 		/*
 		 * For a join or upper relation the input tlist gives the list of
@@ -1444,6 +1444,18 @@ deparseFromExprForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *foreignrel,
 		bool		outerrel_is_target = false;
 		bool		innerrel_is_target = false;
 
+		/*
+		 * In a grouped join rel, fpinfo->outerrel points to the non-grouped
+		 * parent rel, and fpinfo->innerrel is NULL. Dig the inner and outer
+		 * side of the parent.
+		 */
+		if (IS_GROUPED_REL(foreignrel))
+		{
+			fpinfo = (PgFdwRelationInfo *) fpinfo->outerrel->fdw_private;
+			outerrel = fpinfo->outerrel;
+			innerrel = fpinfo->innerrel;
+		}
+
 		if (ignore_rel > 0 && bms_is_member(ignore_rel, foreignrel->relids))
 		{
 			/*
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 7906ade920..acdbeb28df 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -1242,6 +1242,8 @@ postgresGetForeignPlan(PlannerInfo *root,
 			 * Right now, we only consider grouping and aggregation beyond
 			 * joins. Queries involving aggregates or grouping do not require
 			 * EPQ mechanism, hence should not have an outer plan here.
+			 *
+			 * FIXME: is this comment still valid?
 			 */
 			Assert(!IS_UPPER_REL(foreignrel));
 
@@ -2672,7 +2674,7 @@ estimate_path_cost_size(PlannerInfo *root,
 						   &remote_param_join_conds, &local_param_join_conds);
 
 		/* Build the list of columns to be fetched from the foreign server. */
-		if (IS_JOIN_REL(foreignrel) || IS_UPPER_REL(foreignrel))
+		if (IS_JOIN_REL(foreignrel) || IS_UPPER_REL(foreignrel) || IS_GROUPED_REL(foreignrel))
 			fdw_scan_tlist = build_tlist_to_deparse(foreignrel);
 		else
 			fdw_scan_tlist = NIL;
@@ -2753,7 +2755,7 @@ estimate_path_cost_size(PlannerInfo *root,
 			startup_cost = fpinfo->rel_startup_cost;
 			run_cost = fpinfo->rel_total_cost - fpinfo->rel_startup_cost;
 		}
-		else if (IS_JOIN_REL(foreignrel))
+		else if (IS_JOIN_REL(foreignrel) && !IS_GROUPED_REL(foreignrel))
 		{
 			PgFdwRelationInfo *fpinfo_i;
 			PgFdwRelationInfo *fpinfo_o;
@@ -2819,7 +2821,7 @@ estimate_path_cost_size(PlannerInfo *root,
 			run_cost += nrows * remote_conds_cost.per_tuple;
 			run_cost += fpinfo->local_conds_cost.per_tuple * retrieved_rows;
 		}
-		else if (IS_UPPER_REL(foreignrel))
+		else if (IS_GROUPED_REL(foreignrel))
 		{
 			PgFdwRelationInfo *ofpinfo;
 			PathTarget *ptarget = foreignrel->reltarget;
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index 15af9ceff5..c0a4a64e37 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -313,8 +313,7 @@ set up for recursive handling of subqueries
  convert Vars of outer query levels into Params
 --grouping_planner()
   preprocess target list for non-SELECT queries
-  handle UNION/INTERSECT/EXCEPT, GROUP BY, HAVING, aggregates,
-	ORDER BY, DISTINCT, LIMIT
+  handle UNION/INTERSECT/EXCEPT, ORDER BY, DISTINCT, LIMIT
 --query_planner()
    make list of base relations used in query
    split up the qual into restrictions (a=1) and joins (b=c)
@@ -334,9 +333,10 @@ set up for recursive handling of subqueries
       Back at standard_join_search(), generate gather paths if needed for
       each newly constructed joinrel, then apply set_cheapest() to extract
       the cheapest path for it.
+      If the query has a GROUP BY or aggregates, generate grouping paths for
+      each new joinrel, where grouping is possible.
       Loop back if this wasn't the top join level.
   Back at grouping_planner:
-  do grouping (GROUP BY) and aggregation
   do window functions
   make unique (DISTINCT)
   do sorting (ORDER BY)
@@ -981,16 +981,57 @@ security-barrier views be flattened into the parent query, allowing more
 flexibility of planning while still preserving required ordering of qual
 evaluation.  But that will come later.
 
+Grouping
+--------
+
+If the query involves GROUP BY or aggregates, grouping is considered together with
+scans and joins. The straightforward way is to perform the aggregation after
+all the joins, but sometimes it can be done earlier. For example:
+
+SELECT t1.a, count(*) FROM t1, t2 WHERE t1.a = t2.x
+GROUP BY t1.a
+
+The straightforward plan might look like this:
+
+ Aggregate
+   ->  Join
+         ->  Scan on t1
+         ->  Scan on t2
+
+But under some circumstances, the Aggregate can be performed before the join, like
+this:
+
+ Join
+   ->  Aggregate
+         ->  Scan on t1
+   ->  Scan on t2
+
+This transformation is only valid under some conditions. All the GROUP BY
+expressions must be computable at the lower plan node, the join clauses must
+refer to the GROUP BY columns, and the Join above the Aggregate mustn't
+introduce any "new" rows, that would need to be passed to the aggregate functions.
+
+In the dynamic programming algorithm, when we build the join relations, we also
+consider performing Aggregation at each join relation. However, it is not always
+a win to perform the Aggregation at the lowest possible join. Performing a join
+first might eliminate a lot of rows, avoiding the cost of doing the aggregation
+for the eliminated rows. Therefore, we build both aggregated and non-aggregated
+Paths for each join relation, and choose the cheapest aggregated Path of the final
+join relation.
+
+This approach is based on the paper "Including Group-By in Query Optimization"
+by Surajit Chaudhuri and Kyuseok Shim":
+  https://pdfs.semanticscholar.org/3079/5447cec18753254edbbd7839f0afa58b2a39.pdf
 
-Post scan/join planning
------------------------
+Post scan/join/group planning
+-----------------------------
 
-So far we have discussed only scan/join planning, that is, implementation
-of the FROM and WHERE clauses of a SQL query.  But the planner must also
-determine how to deal with GROUP BY, aggregation, and other higher-level
-features of queries; and in many cases there are multiple ways to do these
+So far we have discussed only scan/join/group planning, that is, implementation
+of the FROM and WHERE clauses of a SQL query, as well as GROUP BY and aggregation.
+But the planner must also determine how to deal with higher-level features of
+queries, like window functions and ORDER BY; and in many cases there are multiple ways to do these
 steps and thus opportunities for optimization choices.  These steps, like
-scan/join planning, are handled by constructing Paths representing the
+scan/join/group planning, are handled by constructing Paths representing the
 different ways to do a step, then choosing the cheapest Path.
 
 Since all Paths require a RelOptInfo as "parent", we create RelOptInfos
@@ -1000,7 +1041,7 @@ considered useful for each step.  Currently, we may create these types of
 additional RelOptInfos during upper-level planning:
 
 UPPERREL_SETOP		result of UNION/INTERSECT/EXCEPT, if any
-UPPERREL_PARTIAL_GROUP_AGG	result of partial grouping/aggregation, if any
+UPPERREL_PARTIAL_GROUP_AGG	result of partial grouping/aggregation, if any XXX: do we still create these?
 UPPERREL_GROUP_AGG	result of grouping/aggregation, if any
 UPPERREL_WINDOW		result of window functions, if any
 UPPERREL_DISTINCT	result of "SELECT DISTINCT", if any
diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c
index 3ef7d7d8aa..7724726027 100644
--- a/src/backend/optimizer/geqo/geqo_eval.c
+++ b/src/backend/optimizer/geqo/geqo_eval.c
@@ -268,6 +268,9 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene,
 				/* Create paths for partitionwise joins. */
 				generate_partitionwise_join_paths(root, joinrel);
 
+				/* Create paths for groupings. */
+				generate_grouped_join_paths(root, joinrel);
+
 				/*
 				 * Except for the topmost scan/join rel, consider gathering
 				 * partial paths.  We'll do the same for the topmost scan/join
diff --git a/src/backend/optimizer/path/aggpath.c b/src/backend/optimizer/path/aggpath.c
index 618171b148..edb6f24e05 100644
--- a/src/backend/optimizer/path/aggpath.c
+++ b/src/backend/optimizer/path/aggpath.c
@@ -45,9 +45,6 @@
 #include "utils/selfuncs.h"
 #include "utils/syscache.h"
 
-static RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
-				  PathTarget *target, bool target_parallel_safe,
-				  Node *havingQual);
 static bool is_degenerate_grouping(PlannerInfo *root);
 static void create_degenerate_grouping_paths(PlannerInfo *root,
 								 RelOptInfo *input_rel,
@@ -264,26 +261,19 @@ estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
  * Note: all Paths in input_rel are expected to return the target computed
  * by make_group_input_target.
  */
-RelOptInfo *
+void
 create_grouping_paths(PlannerInfo *root,
 					  RelOptInfo *input_rel,
+					  RelOptInfo *grouped_rel,
 					  PathTarget *target,
 					  bool target_parallel_safe,
 					  const AggClauseCosts *agg_costs,
 					  grouping_sets_data *gd)
 {
 	Query	   *parse = root->parse;
-	RelOptInfo *grouped_rel;
 	RelOptInfo *partially_grouped_rel;
 
 	/*
-	 * Create grouping relation to hold fully aggregated grouping and/or
-	 * aggregation paths.
-	 */
-	grouped_rel = make_grouping_rel(root, input_rel, target,
-									target_parallel_safe, parse->havingQual);
-
-	/*
 	 * Create either paths for a degenerate grouping or paths for ordinary
 	 * grouping, as appropriate.
 	 */
@@ -363,61 +353,6 @@ create_grouping_paths(PlannerInfo *root,
 	}
 
 	set_cheapest(grouped_rel);
-	return grouped_rel;
-}
-
-/*
- * make_grouping_rel
- *
- * Create a new grouping rel and set basic properties.
- *
- * input_rel represents the underlying scan/join relation.
- * target is the output expected from the grouping relation.
- */
-static RelOptInfo *
-make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
-				  PathTarget *target, bool target_parallel_safe,
-				  Node *havingQual)
-{
-	RelOptInfo *grouped_rel;
-
-	if (IS_OTHER_REL(input_rel))
-	{
-		grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG,
-									  input_rel->relids);
-		grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL;
-	}
-	else
-	{
-		/*
-		 * By tradition, the relids set for the main grouping relation is
-		 * NULL.  (This could be changed, but might require adjustments
-		 * elsewhere.)
-		 */
-		grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
-	}
-
-	/* Set target. */
-	grouped_rel->reltarget = target;
-
-	/*
-	 * If the input relation is not parallel-safe, then the grouped relation
-	 * can't be parallel-safe, either.  Otherwise, it's parallel-safe if the
-	 * target list and HAVING quals are parallel-safe.
-	 */
-	if (input_rel->consider_parallel && target_parallel_safe &&
-		is_parallel_safe(root, (Node *) havingQual))
-		grouped_rel->consider_parallel = true;
-
-	/*
-	 * If the input rel belongs to a single FDW, so does the grouped rel.
-	 */
-	grouped_rel->serverid = input_rel->serverid;
-	grouped_rel->userid = input_rel->userid;
-	grouped_rel->useridiscurrent = input_rel->useridiscurrent;
-	grouped_rel->fdwroutine = input_rel->fdwroutine;
-
-	return grouped_rel;
 }
 
 /*
@@ -1040,6 +975,8 @@ create_partial_grouping_paths(PlannerInfo *root,
 							  bool force_rel_creation)
 {
 	Query	   *parse = root->parse;
+	PathTarget *partially_grouped_target;
+	bool		partially_grouped_target_parallel_safe;
 	RelOptInfo *partially_grouped_rel;
 	AggClauseCosts *agg_partial_costs = &extra->agg_partial_costs;
 	AggClauseCosts *agg_final_costs = &extra->agg_final_costs;
@@ -1081,12 +1018,25 @@ create_partial_grouping_paths(PlannerInfo *root,
 		return NULL;
 
 	/*
-	 * Build a new upper relation to represent the result of partially
+	 * Build target list for partial aggregate paths.  These paths cannot just
+	 * emit the same tlist as regular aggregate paths, because (1) we must
+	 * include Vars and Aggrefs needed in HAVING, which might not appear in
+	 * the result tlist, and (2) the Aggrefs must be set in partial mode.
+	 */
+	partially_grouped_target =
+		make_partial_grouping_target(root, grouped_rel->reltarget,
+									 extra->havingQual);
+	partially_grouped_target_parallel_safe =
+		is_parallel_safe(root, (Node *) partially_grouped_target->exprs);
+
+	/*
+	 * Build a new grouped relation to represent the result of partially
 	 * aggregating the rows from the input relation.
 	 */
-	partially_grouped_rel = fetch_upper_rel(root,
-											UPPERREL_PARTIAL_GROUP_AGG,
-											grouped_rel->relids);
+	partially_grouped_rel = make_grouping_rel(root, input_rel,
+											  partially_grouped_target,
+											  partially_grouped_target_parallel_safe,
+											  root->parse->havingQual);
 	partially_grouped_rel->consider_parallel =
 		grouped_rel->consider_parallel;
 	partially_grouped_rel->reloptkind = grouped_rel->reloptkind;
@@ -1095,16 +1045,6 @@ create_partial_grouping_paths(PlannerInfo *root,
 	partially_grouped_rel->useridiscurrent = grouped_rel->useridiscurrent;
 	partially_grouped_rel->fdwroutine = grouped_rel->fdwroutine;
 
-	/*
-	 * Build target list for partial aggregate paths.  These paths cannot just
-	 * emit the same tlist as regular aggregate paths, because (1) we must
-	 * include Vars and Aggrefs needed in HAVING, which might not appear in
-	 * the result tlist, and (2) the Aggrefs must be set in partial mode.
-	 */
-	partially_grouped_rel->reltarget =
-		make_partial_grouping_target(root, grouped_rel->reltarget,
-									 extra->havingQual);
-
 	if (!extra->partial_costs_set)
 	{
 		/*
@@ -1454,10 +1394,12 @@ create_partitionwise_grouping_paths(PlannerInfo *root,
 		 * Create grouping relation to hold fully aggregated grouping and/or
 		 * aggregation paths for the child.
 		 */
+		Assert(!child_input_rel->grouped_rel);
 		child_grouped_rel = make_grouping_rel(root, child_input_rel,
 											  child_target,
 											  extra->target_parallel_safe,
 											  child_extra.havingQual);
+		child_input_rel->grouped_rel = child_grouped_rel;
 
 		/* Ignore empty children. They contribute nothing. */
 		if (IS_DUMMY_REL(child_input_rel))
@@ -1965,3 +1907,473 @@ remap_to_groupclause_idx(List *groupClause,
 
 	return result;
 }
+
+/*
+ * make_group_input_target
+ *	  Generate appropriate PathTarget for initial input to grouping nodes.
+ *
+ * If there is grouping or aggregation, the scan/join subplan cannot emit
+ * the query's final targetlist; for example, it certainly can't emit any
+ * aggregate function calls.  This routine generates the correct target
+ * for the scan/join subplan.
+ *
+ * The input must contain:
+ * - GROUP BY expressions.
+ * - inputs to Aggregates
+ * - other Vars needed to in the final target list. (ie. columns that
+ *   are functionally dependent on the GROUP BY expressions, and
+ *   therefore didn't need to be listed in GROUP BY.)
+ *
+ * Compared to the scan/join input's target list, this must
+ * - Add GROUP BY expressions
+ * - Remove aggregated columns, i.e columns that are not directly listed
+ *   in GROUP BY.
+ *
+ * The query target list passed from the parser already contains entries
+ * for all ORDER BY and GROUP BY expressions, but it will not have entries
+ * for variables used only in HAVING clauses; so we need to add those
+ * variables to the subplan target list.  Also, we flatten all expressions
+ * except GROUP BY items into their component variables; other expressions
+ * will be computed by the upper plan nodes rather than by the subplan.
+ * For example, given a query like
+ *		SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
+ * we want to pass this targetlist to the subplan:
+ *		a+b,c,d
+ * where the a+b target will be used by the Sort/Group steps, and the
+ * other targets will be used for computing the final results.
+ *
+ * 'root->final_target' is the query's final target list (in PathTarget form)
+ */
+PathTarget *
+make_group_input_target(PlannerInfo *root, RelOptInfo *rel)
+{
+	Query	   *parse = root->parse;
+	PathTarget *final_target = root->final_target;
+	PathTarget *input_target;
+	List	   *non_group_vars;
+	ListCell   *lc;
+	ListCell *lec;
+	ListCell *lsortref;
+
+	input_target = create_empty_pathtarget();
+
+	/*
+	 * Begin by adding all grouping columns.
+	 *
+	 * If the grouping is pushed down below a join, we might not have the
+	 * original expression listed in the GROUP BY available, but we can use
+	 * another expression that's known to be equal, because of a qual.
+	 * Hence search the equivalence classes, rather than parse->groupClause
+	 * directly.
+	 */
+	forboth(lec, root->group_ecs, lsortref, root->group_sortrefs)
+	{
+		EquivalenceClass *eclass = lfirst_node(EquivalenceClass, lec);
+		int			sgref = lfirst_int(lsortref);
+		Expr	   *expr;
+
+		if (eclass)
+		{
+			expr = find_em_expr_for_rel(eclass, rel);
+			if (!expr)
+				elog(ERROR, "could not find equivalence class member for given relations");
+		}
+		else
+		{
+			expr = get_sortgroupref_tle(sgref, root->processed_tlist)->expr;
+		}
+
+		add_column_to_pathtarget(input_target, expr, sgref);
+	}
+
+	/*
+	 * Pull out all the Vars mentioned in non-group cols, and
+	 * add them to the input target if not already present.  (A Var used
+	 * directly as a GROUP BY item will be present already.)  Note this
+	 * includes Vars used in resjunk items, so we are covering the needs of
+	 * ORDER BY and window specifications.  Vars used within Aggrefs and
+	 * WindowFuncs will be pulled out here, too.
+	 */
+	non_group_vars = pull_var_clause((Node *) final_target->exprs,
+									 PVC_RECURSE_AGGREGATES |
+									 PVC_RECURSE_WINDOWFUNCS |
+									 PVC_INCLUDE_PLACEHOLDERS);
+
+	/* Only add columns that we have available here. */
+	foreach (lc, non_group_vars)
+	{
+		Expr	   *expr = (Expr *) lfirst(lc);
+
+		if (bms_is_subset(pull_varnos((Node *) expr), rel->relids))
+			add_new_column_to_pathtarget(input_target, expr);
+	}
+	list_free(non_group_vars);
+
+	/*
+	 * If there's a HAVING clause, we'll need the Vars it uses, too.
+	 *
+	 * All the vars referenced in HAVING should be available at this
+	 * scan/join rel.
+	 */
+	if (parse->havingQual)
+	{
+		non_group_vars = pull_var_clause(parse->havingQual,
+									 PVC_RECURSE_AGGREGATES |
+									 PVC_RECURSE_WINDOWFUNCS |
+									 PVC_INCLUDE_PLACEHOLDERS);
+		add_new_columns_to_pathtarget(input_target, non_group_vars);
+		list_free(non_group_vars);
+	}
+
+	/* XXX this causes some redundant cost calculation ... */
+	return set_pathtarget_cost_width(root, input_target);
+}
+
+/*
+ * make_group_input_target
+ *	  Generate appropriate PathTarget for the output of an Agg/Group node
+ *
+ * FIXME: this function is quite a mess, needs a rewrite. I think it would
+ * make sense to merge this with make_group_input_target(), so that both
+ * target lists were created together. There is some duplicate work between
+ * them. Or can we move some of this processing to be done earlier, in
+ * process_targetlist()?
+ */
+PathTarget *
+make_grouping_target(PlannerInfo *root, RelOptInfo *rel,
+					 PathTarget *input_target, PathTarget *final_target)
+{
+	/*
+	 * - grouping columns
+	 * - aggregates
+	 * - columns needed for joins above this node
+	 */
+	Query	   *parse = root->parse;
+	PathTarget *grouping_target;
+	List	   *non_group_cols;
+	List	   *non_group_vars;
+	int			i;
+	ListCell   *lc;
+	Relids		relids;
+	Bitmapset  *group_col_sortrefs = NULL;
+
+	/*
+	 * We must build a target containing all grouping columns, plus any other
+	 * Vars mentioned in the query's targetlist. We can ignore HAVING here,
+	 * it's evaluated at the Agg itself, and doesn't need to be propagated
+	 * above it.
+	 */
+	grouping_target = create_empty_pathtarget();
+	non_group_cols = NIL;
+
+	/*
+	 * First, add all GROUP BY columns.
+	 */
+	/*
+	 * 1. Take the input target list. It should include all grouping cols. Remove everything that's
+	 * not a grouping col.
+	 * 2. Add Aggrefs.
+	 */
+	i = 0;
+	foreach(lc, final_target->exprs)
+	{
+		Expr	   *expr = (Expr *) lfirst(lc);
+		Index		sgref = get_pathtarget_sortgroupref(final_target, i);
+
+		if (bms_is_subset(pull_varnos((Node *) expr), rel->relids))
+		{
+			if (sgref && parse->groupClause &&
+				get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
+			{
+				/*
+				 * It's a grouping column, so add it to the input target as-is.
+				 */
+				group_col_sortrefs = bms_add_member(group_col_sortrefs, sgref);
+				add_column_to_pathtarget(grouping_target, expr, sgref);
+			}
+			else
+			{
+				/*
+				 * Non-grouping column, so just remember the expression for later
+				 * call to pull_var_clause.
+				 */
+				non_group_cols = lappend(non_group_cols, expr);
+			}
+		}
+
+		i++;
+	}
+
+	/* attrs_needed refers to parent relids and not those of a child. */
+	if (rel->top_parent_relids)
+		relids = rel->top_parent_relids;
+	else
+		relids = rel->relids;
+
+	relids = bms_add_member(bms_copy(relids), NEEDED_IN_GROUPING);
+
+	i = 0;
+	foreach(lc, input_target->exprs)
+	{
+		Var		   *var = (Var *) lfirst(lc);
+		RelOptInfo *baserel;
+		int			ndx;
+		Index		sgref = get_pathtarget_sortgroupref(input_target, i);
+
+		/* this is similar to build_joinrel_tlist. */
+
+		if (sgref && bms_is_member(sgref, group_col_sortrefs))
+		{
+			i++;
+			continue;
+		}
+
+		/*
+		 * Ignore PlaceHolderVars in the input tlists; we'll make our own
+		 * decisions about whether to copy them.
+		 */
+		if (IsA(var, PlaceHolderVar))
+		{
+			i++;
+			continue;
+		}
+
+		/*
+		 * Otherwise, anything in a baserel or joinrel targetlist ought to be
+		 * a Var. Children of a partitioned table may have ConvertRowtypeExpr
+		 * translating whole-row Var of a child to that of the parent.
+		 * Children of an inherited table or subquery child rels can not
+		 * directly participate in a join, so other kinds of nodes here.
+		 */
+		if (IsA(var, Var))
+		{
+			baserel = find_base_rel(root, var->varno);
+			ndx = var->varattno - baserel->min_attr;
+		}
+		else if (IsA(var, ConvertRowtypeExpr))
+		{
+			ConvertRowtypeExpr *child_expr = (ConvertRowtypeExpr *) var;
+			Var		   *childvar = (Var *) child_expr->arg;
+
+			/*
+			 * Child's whole-row references are converted to look like those
+			 * of parent using ConvertRowtypeExpr. There can be as many
+			 * ConvertRowtypeExpr decorations as the depth of partition tree.
+			 * The argument to the deepest ConvertRowtypeExpr is expected to
+			 * be a whole-row reference of the child.
+			 */
+			while (IsA(childvar, ConvertRowtypeExpr))
+			{
+				child_expr = (ConvertRowtypeExpr *) childvar;
+				childvar = (Var *) child_expr->arg;
+			}
+			Assert(IsA(childvar, Var) &&childvar->varattno == 0);
+
+			baserel = find_base_rel(root, childvar->varno);
+			ndx = 0 - baserel->min_attr;
+		}
+		else
+		{
+			/*
+			 * If this rel is above grouping, then we can have Aggrefs
+			 * and grouping column expressions in the target list. Carry
+			 * them up to the join rel. They will surely be needed at
+			 * the top of the join tree. (Unless they're only used in
+			 * HAVING?)
+			 */
+#if 0
+			elog(ERROR, "unexpected node type in rel targetlist: %d",
+				 (int) nodeTag(var));
+#endif
+			baserel = NULL;
+		}
+
+		/* Is the target expression still needed above this joinrel? */
+		if (baserel == NULL || bms_nonempty_difference(baserel->attr_needed[ndx], relids))
+		{
+			/* Yup, add it to the output */
+			add_column_to_pathtarget(grouping_target, (Expr *) var, sgref);
+		}
+		i++;
+	}
+
+	/*
+	 * Pull out all the Vars mentioned in non-group cols, and
+	 * add them to the input target if not already present.  (A Var used
+	 * directly as a GROUP BY item will be present already.)  Note this
+	 * includes Vars used in resjunk items, so we are covering the needs of
+	 * ORDER BY and window specifications.  Vars used within
+	 * WindowFuncs will be pulled out here, too. Aggrefs will be included
+	 * as is.
+	 */
+	non_group_vars = pull_var_clause((Node *) non_group_cols,
+									 PVC_INCLUDE_AGGREGATES |
+									 PVC_RECURSE_WINDOWFUNCS |
+									 PVC_INCLUDE_PLACEHOLDERS);
+	foreach (lc, non_group_vars)
+	{
+		Node *n = lfirst(lc);
+
+		if (IsA(n, Aggref))
+			add_new_column_to_pathtarget(grouping_target, (Expr *) n);
+	}
+
+	add_new_columns_to_pathtarget(grouping_target, non_group_vars);
+
+	/* XXX: Add anything needed to evaluate Aggs here, i.e. agg arguments */
+
+	/* clean up cruft */
+	list_free(non_group_vars);
+	list_free(non_group_cols);
+
+	/* XXX this causes some redundant cost calculation ... */
+	return set_pathtarget_cost_width(root, grouping_target);
+}
+
+/*
+ * Is the query's GROUP BY computable at the given relation?
+ *
+ * From "Including Group-By in Query Optimization" paper:
+ *
+ * Definition 3.1: A node n of a given left-deep tree has the invariant
+ * grouping property if the following conditions are true:
+ *
+ * 1. Every aggregating column of the query is a candidate aggregating
+ * column of n.
+ *
+ * 2. Every join column of n is also a grouping column of the query.
+ *
+ * 3. For every join-node that is an ancestor of n, the join is an
+ * equijoin predicate on a foreign key column of n.
+ */
+bool
+is_grouping_computable_at_rel(PlannerInfo *root, RelOptInfo *rel)
+{
+	ListCell   *lc;
+	ListCell   *lec;
+	Relids		other_relids;
+	int			x;
+
+	/*
+	 * If this is the final, top, join node, then surely the grouping
+	 * can be done here.
+	 */
+	if (bms_is_subset(root->all_baserels, rel->relids))
+		return true;
+
+	/*
+	 * Currently, give up on SRFs in target list. It gets too complicated to
+	 * evaluate them in the middle of the join tree. (Note that we check for
+	 * this after checking if this is the final rel, so we still produce
+	 * grouping plans with SRFs, at the top).
+	 *
+	 * FIXME: I'm not sure there's any fundamental reason this wouldn't work.
+	 */
+	if (root->parse->hasTargetSRFs)
+		return false;
+
+	/*
+	 * 1. Every aggregating column of the query is a candidate aggregating
+	 * column of n.
+	 *
+	 * What this means is that we must be able to compute the aggregates
+	 * at this relation. For example, "AVG(tbl.col)" can only be computed
+	 * if 'tbl' is part of this join relation.
+	 */
+	if (!bms_is_subset(root->agg_relids, rel->relids))
+		return false;
+
+	/*
+	 * We must also be able to compute each grouping column here.
+	 */
+	foreach (lec, root->group_ecs)
+	{
+		EquivalenceClass *ec = lfirst_node(EquivalenceClass, lec);
+
+		if (!find_em_expr_for_rel(ec, rel))
+			return false;
+	}
+
+	/*
+	 * non-equijoins can only be evaluated correctly before grouping.
+	 *
+	 * XXX: A parameterized path, for use in the inner side of a nested
+	 * loop join, where all the vars are available as Params, would be
+	 * acceptable, though.
+	 */
+	foreach (lc, rel->joininfo)
+	{
+		RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+
+		if (!bms_is_subset(rinfo->required_relids, rel->relids))
+			return false;
+	}
+
+	x = -1;
+	other_relids = bms_difference(root->all_baserels, rel->relids);
+	while ((x = bms_next_member(other_relids, x)) >= 0)
+	{
+		List	   *joinquals;
+		Relids		joinrelids;
+		Relids		outer_relids;
+		RelOptInfo *other_rel;
+
+		other_rel = find_base_rel(root, x);
+
+		outer_relids = bms_make_singleton(x);
+		joinrelids = bms_add_members(bms_make_singleton(x), rel->relids);
+
+		joinquals = generate_join_implied_equalities(root,
+													 joinrelids,
+													 outer_relids,
+													 other_rel);
+
+		/*
+		 * Check condition 2: the join column must be in GROUP BY.
+		 */
+		foreach(lc, joinquals)
+		{
+			RestrictInfo *joinqual = lfirst_node(RestrictInfo, lc);
+
+			if (!joinqual->can_join)
+			{
+				/* Not a joinable binary opclause */
+				return false;
+			}
+
+			foreach (lec, root->group_ecs)
+			{
+				EquivalenceClass *ec = lfirst_node(EquivalenceClass, lec);
+
+				/* XXX: are left_ec/right_ec guaranteed to be valid here? */
+				if (ec == joinqual->left_ec ||
+					ec == joinqual->right_ec)
+				{
+					break;
+				}
+			}
+			if (lec == NULL)
+			{
+				/* This join qual was not in GROUP BY */
+				return false;
+			}
+		}
+
+		/*
+		 * Check condition 3: performing the remaining joins on top of this
+		 * mustn't "add" any more rows.
+		 */
+		if (!innerrel_is_unique(root,
+								joinrelids, /* joinrelids */
+								rel->relids, /* outerrelids */
+								other_rel, /* innerrel */
+								JOIN_INNER, /* FIXME */
+								joinquals,
+								false)) /* force_cache */
+		{
+			return false;
+		}
+	}
+
+	return true;
+}
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 3ada379f8b..74765d0d53 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -36,6 +36,7 @@
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/plancat.h"
+#include "optimizer/planmain.h"
 #include "optimizer/planner.h"
 #include "optimizer/prep.h"
 #include "optimizer/restrictinfo.h"
@@ -72,6 +73,7 @@ join_search_hook_type join_search_hook = NULL;
 static void set_base_rel_consider_startup(PlannerInfo *root);
 static void set_base_rel_sizes(PlannerInfo *root);
 static void set_base_rel_pathlists(PlannerInfo *root);
+static void set_base_rel_groupings(PlannerInfo *root);
 static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
 			 Index rti, RangeTblEntry *rte);
 static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -180,6 +182,11 @@ make_one_rel(PlannerInfo *root, List *joinlist)
 	set_base_rel_pathlists(root);
 
 	/*
+	 * Generate paths that implement grouping directly on top of base rels.
+	 */
+	set_base_rel_groupings(root);
+
+	/*
 	 * Generate access paths for the entire join tree.
 	 */
 	rel = make_rel_from_joinlist(root, joinlist);
@@ -312,6 +319,33 @@ set_base_rel_pathlists(PlannerInfo *root)
 }
 
 /*
+ * set_base_rel_groupings
+ *	  Generate paths to perform grouping for each base relation.
+ */
+static void
+set_base_rel_groupings(PlannerInfo *root)
+{
+	Index		rti;
+
+	for (rti = 1; rti < root->simple_rel_array_size; rti++)
+	{
+		RelOptInfo *rel = root->simple_rel_array[rti];
+
+		/* there may be empty slots corresponding to non-baserel RTEs */
+		if (rel == NULL)
+			continue;
+
+		Assert(rel->relid == rti);	/* sanity check on array */
+
+		/* ignore RTEs that are "other rels" */
+		if (rel->reloptkind != RELOPT_BASEREL)
+			continue;
+
+		set_rel_grouping(root, rel);
+	}
+}
+
+/*
  * set_rel_size
  *	  Set size estimates for a base relation
  */
@@ -2679,6 +2713,106 @@ make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
 }
 
 /*
+ * set_rel_grouping
+ *	  Generate grouping paths for a relation
+ */
+void
+set_rel_grouping(PlannerInfo *root, RelOptInfo *rel)
+{
+	PathTarget *final_target;
+	PathTarget *scanjoin_target;
+	List	   *scanjoin_targets;
+	List	   *scanjoin_targets_contain_srfs;
+	bool		scanjoin_target_parallel_safe;
+	bool		scanjoin_target_same_exprs;
+	PathTarget *grouping_target;
+	bool		grouping_target_parallel_safe;
+
+	/* If there's no GROUP BY or aggregates, nothing to do. */
+	if (!root->have_grouping)
+		return;
+
+	/*
+	 * For testing: always do grouping at the lowest available level.
+	 * So if we already computed grouped paths, where the grouping was
+	 * done at a lower level, use those paths.
+	 */
+#if 0
+	if (rel->grouped_rel)
+		return;
+#endif
+
+	/*
+	 * Can we apply the GROUPing on this rel?
+	 */
+	if (!is_grouping_computable_at_rel(root, rel))
+		return;
+
+	/*
+	 * Construct a target list for the scan/join below the GROUPing,
+	 * as well as for the grouping rel itself.
+	 */
+	final_target = create_pathtarget(root, root->processed_tlist);
+	scanjoin_target = make_group_input_target(root, rel);
+	scanjoin_target_parallel_safe =
+		is_parallel_safe(root, (Node *) scanjoin_target->exprs);
+
+	grouping_target = make_grouping_target(root, rel, scanjoin_target, final_target);
+	grouping_target_parallel_safe =
+		is_parallel_safe(root, (Node *) grouping_target->exprs);
+
+	if (root->parse->hasTargetSRFs)
+	{
+		/* scanjoin_target will not have any SRFs precomputed for it */
+		split_pathtarget_at_srfs(root, scanjoin_target, NULL,
+								 &scanjoin_targets,
+								 &scanjoin_targets_contain_srfs);
+		scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
+		Assert(!linitial_int(scanjoin_targets_contain_srfs));
+	}
+	else
+	{
+		scanjoin_targets = list_make1(scanjoin_target);
+		scanjoin_targets_contain_srfs = NIL;
+	}
+
+	/* XXX: this changes the target list of the non-grouped paths. Is that bad? */
+	scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1
+		&& equal(scanjoin_target->exprs, rel->reltarget->exprs);
+	apply_scanjoin_target_to_paths(root,
+								   rel,
+								   scanjoin_targets,
+								   scanjoin_targets_contain_srfs,
+								   scanjoin_target_parallel_safe,
+								   scanjoin_target_same_exprs);
+
+	/*
+	 * Create grouping relation to hold fully aggregated grouping and/or
+	 * aggregation paths.
+	 */
+	if (!rel->grouped_rel)
+	{
+		rel->grouped_rel = make_grouping_rel(root, rel, grouping_target,
+										grouping_target_parallel_safe,
+										root->parse->havingQual);
+	}
+
+	/* We can apply grouping here. Construct Paths on */
+	create_grouping_paths(root,
+						  rel,
+						  rel->grouped_rel,
+						  grouping_target,
+						  grouping_target_parallel_safe,
+						  root->agg_costs,
+						  root->gd);
+
+#if 0
+	elog(NOTICE, "input: %s", nodeToString(scanjoin_target));
+	elog(NOTICE, "group: %s", nodeToString(grouping_target));
+#endif
+}
+
+/*
  * standard_join_search
  *	  Find possible joinpaths for a query by successively finding ways
  *	  to join component relations into join relations.
@@ -2761,6 +2895,9 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
 			/* Create paths for partitionwise joins. */
 			generate_partitionwise_join_paths(root, rel);
 
+			/* Create paths for groupings. */
+			generate_grouped_join_paths(root, rel);
+
 			/*
 			 * Except for the topmost scan/join rel, consider gathering
 			 * partial paths.  We'll do the same for the topmost scan/join rel
@@ -3585,6 +3722,27 @@ generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
 }
 
 
+/*
+ * generate_grouped_join_paths
+ * 		Create paths representing GROUP BY / aggregation over join rel.
+ */
+void
+generate_grouped_join_paths(PlannerInfo *root, RelOptInfo *rel)
+{
+	if (!root->have_grouping)
+		return;
+
+	/* Handle only join relations here. */
+	if (!IS_JOIN_REL(rel))
+		return;
+
+	/* Guard against stack overflow due to overly deep partition hierarchy. */
+	check_stack_depth();
+
+	set_rel_grouping(root, rel);
+}
+
+
 /*****************************************************************************
  *			DEBUG SUPPORT
  *****************************************************************************/
@@ -3880,6 +4038,13 @@ debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
 		print_path(root, rel->cheapest_total_path, 1);
 	}
 	printf("\n");
+
+	if (rel->grouped_rel)
+	{
+		printf("GROUPED ");
+		debug_print_rel(root, rel->grouped_rel);
+	}
+
 	fflush(stdout);
 }
 
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 7008e1318e..ae87ba487f 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -15,11 +15,13 @@
 #include "postgres.h"
 
 #include "miscadmin.h"
+#include "foreign/fdwapi.h"
 #include "optimizer/clauses.h"
 #include "optimizer/joininfo.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/prep.h"
+#include "optimizer/tlist.h"
 #include "partitioning/partbounds.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
@@ -46,6 +48,8 @@ static void try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1,
 					   List *parent_restrictlist);
 static int match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel,
 							 bool strict_op);
+static void make_grouped_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, RelOptInfo *joinrel,
+					  SpecialJoinInfo *sjinfo, List *restrictlist);
 
 
 /*
@@ -742,6 +746,17 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 	populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo,
 								restrictlist);
 
+	/*
+	 * If we have grouped paths for either side of the join, create a
+	 * grouped join relation. (Paths where the grouping is done at this
+	 * join relation, is considered later, in generate_grouped_join_paths, after
+	 * building partition-wise join paths.)
+	 */
+	if (rel1->grouped_rel || rel2->grouped_rel)
+	{
+		make_grouped_join_rel(root, rel1, rel2, joinrel, sjinfo, restrictlist);
+	}
+
 	bms_free(joinrelids);
 
 	return joinrel;
@@ -908,6 +923,121 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
 	try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist);
 }
 
+/*
+ * make_grouping_rel
+ *
+ * Create a new grouping rel and set basic properties.
+ *
+ * input_rel represents the underlying scan/join relation.
+ * target is the output expected from the grouping relation.
+ */
+RelOptInfo *
+make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
+				  PathTarget *target, bool target_parallel_safe,
+				  Node *havingQual)
+{
+	RelOptInfo *grouped_rel;
+
+	grouped_rel = build_group_rel(root, input_rel);
+
+	/* Set target. */
+	grouped_rel->reltarget = target;
+
+	/*
+	 * If the input relation is not parallel-safe, then the grouped relation
+	 * can't be parallel-safe, either.  Otherwise, it's parallel-safe if the
+	 * target list and HAVING quals are parallel-safe.
+	 */
+	if (input_rel->consider_parallel && target_parallel_safe &&
+		is_parallel_safe(root, (Node *) havingQual))
+		grouped_rel->consider_parallel = true;
+
+	/*
+	 * If the input rel belongs to a single FDW, so does the grouped rel.
+	 */
+	grouped_rel->serverid = input_rel->serverid;
+	grouped_rel->userid = input_rel->userid;
+	grouped_rel->useridiscurrent = input_rel->useridiscurrent;
+	grouped_rel->fdwroutine = input_rel->fdwroutine;
+
+	/* XXX: what other fields might be needed? */
+	grouped_rel->relid = input_rel->relid;
+
+	return grouped_rel;
+}
+
+/*
+ * make_grouped_join_rel
+ *
+ * Create grouped paths for a join rel, where one side of the join already
+ * has grouped paths.
+ */
+static void
+make_grouped_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, RelOptInfo *joinrel,
+					  SpecialJoinInfo *sjinfo, List *restrictlist)
+{
+	RelOptInfo *grouped_rel;
+
+	/* At least one side of the join should have grouped paths, or we have nothing to do. */
+	Assert(rel1->grouped_rel || rel2->grouped_rel);
+
+	/*
+	 * Also build join rels for grouped children.
+	 *
+	 * The paths for grouping at this join rel are generated later, see
+	 * generate_grouped_join_paths.
+	 */
+	Assert(!joinrel->grouped_rel);
+	grouped_rel = build_group_rel(root, joinrel);
+	joinrel->grouped_rel = grouped_rel;
+
+	/*
+	 * XXX: How much of the fields in RelOptInfo should we copy forom the parent joinrel?
+	 * Perhaps we should call build_join_rel() here instead? Or in build_group_rel().
+	 */
+
+	/*
+	 * FIXME: the row estimate here surely isn't right, as the aggregation below this join
+	 * should have eliminated some rows. In fact, it'd be pretty important to have a more
+	 * sensible estimate here, because otherwise it's unlikely that we choose this plan,
+	 * because we don't "see" the benefit of doing the aggregation at the lower level. The
+	 * main benefit of doing that is precisely that it reduces the number of rows that
+	 * need to be joined.
+	 */
+	grouped_rel->rows = joinrel->rows;
+
+	/*
+	 * It's possible for *both* sides of a join to have grouped paths.
+	 */
+	if (rel1->grouped_rel)
+	{
+		add_new_columns_to_pathtarget(grouped_rel->reltarget,
+									  rel1->grouped_rel->reltarget->exprs);
+		add_new_columns_to_pathtarget(grouped_rel->reltarget,
+									  rel2->reltarget->exprs);
+		populate_joinrel_with_paths(root,
+									rel1->grouped_rel,
+									rel2,
+									grouped_rel,
+									sjinfo,
+									restrictlist);
+	}
+	if (rel2->grouped_rel)
+	{
+		add_new_columns_to_pathtarget(grouped_rel->reltarget,
+									  rel1->reltarget->exprs);
+		add_new_columns_to_pathtarget(grouped_rel->reltarget,
+									  rel2->grouped_rel->reltarget->exprs);
+		populate_joinrel_with_paths(root,
+									rel1,
+									rel2->grouped_rel,
+									grouped_rel,
+									sjinfo,
+									restrictlist);
+	}
+}
+
+
 
 /*
  * have_join_order_restriction
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index ec66cb9c3c..7e919ac63f 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -225,7 +225,7 @@ make_pathkey_from_sortinfo(PlannerInfo *root,
  * This should eventually go away, but we need to restructure SortGroupClause
  * first.
  */
-static PathKey *
+PathKey *
 make_pathkey_from_sortop(PlannerInfo *root,
 						 Expr *expr,
 						 Relids nullable_relids,
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 01335db511..c9a7bef603 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -27,6 +27,7 @@
 #include "optimizer/planner.h"
 #include "optimizer/prep.h"
 #include "optimizer/restrictinfo.h"
+#include "optimizer/tlist.h"
 #include "optimizer/var.h"
 #include "parser/analyze.h"
 #include "rewrite/rewriteManip.h"
@@ -144,20 +145,57 @@ add_base_rels_to_query(PlannerInfo *root, Node *jtnode)
  *	  Add targetlist entries for each var needed in the query's final tlist
  *	  (and HAVING clause, if any) to the appropriate base relations.
  *
- * We mark such vars as needed by "relation 0" to ensure that they will
- * propagate up through all join plan steps.
+ * Vars that are needed in the final target list are marked as needed by
+ * "relation 0", to ensure that they will propagate up through all join and
+ * grouping plan steps. Vars that are needed in aggregate functions are
+' marked as needed by "NEEDED_IN_GROUPING" magic relation.
  */
 void
 build_base_rel_tlists(PlannerInfo *root, List *final_tlist)
 {
-	List	   *tlist_vars = pull_var_clause((Node *) final_tlist,
-											 PVC_RECURSE_AGGREGATES |
-											 PVC_RECURSE_WINDOWFUNCS |
-											 PVC_INCLUDE_PLACEHOLDERS);
+	List	   *tlist_vars;
+	ListCell   *lc;
+	ListCell   *lc2;
 
-	if (tlist_vars != NIL)
+	foreach (lc, final_tlist)
 	{
-		add_vars_to_targetlist(root, tlist_vars, bms_make_singleton(0), true);
+		TargetEntry *tle = lfirst_node(TargetEntry, lc);
+
+		if (tle->resjunk)
+		{
+			tlist_vars = pull_var_clause((Node *) tle->expr,
+										 PVC_RECURSE_AGGREGATES |
+										 PVC_RECURSE_WINDOWFUNCS |
+										 PVC_INCLUDE_PLACEHOLDERS);
+			add_vars_to_targetlist(root, tlist_vars,
+								   bms_make_singleton(NEEDED_IN_GROUPING), true);
+		}
+		else
+		{
+			tlist_vars = pull_var_clause((Node *) tle->expr,
+										 PVC_INCLUDE_AGGREGATES |
+										 PVC_RECURSE_WINDOWFUNCS |
+										 PVC_INCLUDE_PLACEHOLDERS);
+			foreach(lc2, tlist_vars)
+			{
+				Expr *expr = (Expr *) lfirst(lc2);
+
+				if (IsA(expr, Aggref) || IsA(expr, GroupingFunc))
+				{
+					List *aggref_vars;
+
+					aggref_vars = pull_var_clause((Node *) expr,
+												  PVC_RECURSE_AGGREGATES |
+												  PVC_RECURSE_WINDOWFUNCS |
+												  PVC_INCLUDE_PLACEHOLDERS);
+					add_vars_to_targetlist(root, aggref_vars,
+										   bms_make_singleton(NEEDED_IN_GROUPING), true);
+				}
+				else
+					add_vars_to_targetlist(root, list_make1(expr), bms_make_singleton(0), true);
+
+			}
+		}
 		list_free(tlist_vars);
 	}
 
@@ -174,7 +212,7 @@ build_base_rel_tlists(PlannerInfo *root, List *final_tlist)
 		if (having_vars != NIL)
 		{
 			add_vars_to_targetlist(root, having_vars,
-								   bms_make_singleton(0), true);
+								   bms_make_singleton(NEEDED_IN_GROUPING), true);
 			list_free(having_vars);
 		}
 	}
@@ -187,6 +225,9 @@ build_base_rel_tlists(PlannerInfo *root, List *final_tlist)
  *	  as being needed for the indicated join (or for final output if
  *	  where_needed includes "relation 0").
  *
+ *    where_needed can also include NEEDED_IN_GROUPING, to mean that it's
+ *	  needed to compute aggregates, but possibly not in the final output.
+ *
  *	  The list may also contain PlaceHolderVars.  These don't necessarily
  *	  have a single owning relation; we keep their attr_needed info in
  *	  root->placeholder_list instead.  If create_new_ph is true, it's OK
@@ -241,6 +282,86 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars,
 	}
 }
 
+/*
+ * process_targetlist
+ *	  Extract information needed in query_planner() from target list.
+ *
+ * This needs to run after building equivalence classes.
+ */
+void
+process_targetlist(PlannerInfo *root, List *tlist)
+{
+	Query	   *parse = root->parse;
+	PathTarget *final_target;
+	ListCell   *lc;
+	Bitmapset  *agg_relids;
+	List	   *group_ecs = NIL;
+	List	   *group_sortrefs = NIL;
+	List	   *tlist_varnos;
+
+	/*
+	 * Make a copy of the final target list, in PathTarget format, for the
+	 * convenience of other routines in query_planner().
+	 */
+	final_target = create_pathtarget(root, tlist);
+	set_pathtarget_cost_width(root, final_target);
+	root->final_target = final_target;
+
+	/*
+	 * Build equivalence classes to represent each GROUP BY column. In most
+	 * cases, we already created these when we built PathKeys for them, but
+	 * it's not guaranteed. For example, if there are any non-sortable GROUP
+	 * BY expressions.
+	 */
+	foreach(lc, parse->groupClause)
+	{
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc);
+		Expr	   *sortkey;
+		PathKey    *pathkey;
+		EquivalenceClass *eclass;
+
+		sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
+
+		if (OidIsValid(sortcl->sortop))
+		{
+			pathkey = make_pathkey_from_sortop(root,
+											   sortkey,
+											   root->nullable_baserels,
+											   sortcl->sortop,
+											   sortcl->nulls_first,
+											   sortcl->tleSortGroupRef,
+											   true);
+			eclass = pathkey->pk_eclass;
+		}
+		else
+		{
+			/* XXX: Can there be an equivalence class for a non-sortable expression? */
+			eclass = NULL;
+		}
+
+		group_ecs = lappend(group_ecs, eclass);
+		group_sortrefs = lappend_int(group_sortrefs, sortcl->tleSortGroupRef);
+	}
+	root->group_ecs = group_ecs;
+	root->group_sortrefs = group_sortrefs;
+
+	/*
+	 * Compute the minimum set of relations needed to compute aggregates.
+	 */
+	agg_relids = NULL;
+	tlist_varnos = pull_var_clause((Node *) tlist,
+								   PVC_INCLUDE_AGGREGATES |
+								   PVC_RECURSE_WINDOWFUNCS |
+								   PVC_RECURSE_PLACEHOLDERS);
+	foreach(lc, tlist_varnos)
+	{
+		Node	   *e = (Node *) lfirst(lc);
+
+		if (IsA(e, Aggref))
+			agg_relids = bms_join(agg_relids, pull_varnos(e));
+	}
+	root->agg_relids = agg_relids;
+}
 
 /*****************************************************************************
  *
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index dc5cc110a9..09defb0363 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -53,6 +53,12 @@ process_jointree(PlannerInfo *root, List *tlist)
 	if (parse->jointree->fromlist == NIL)
 	{
 		/*
+		 * Construct a PathTarget to represent the final target list, and extract
+		 * information about aggregates.
+		 */
+		process_targetlist(root, tlist);
+
+		/*
 		 * Initialize canon_pathkeys, in case it's something like
 		 * "SELECT 2+2 ORDER BY 1".
 		 */
@@ -189,6 +195,12 @@ process_jointree(PlannerInfo *root, List *tlist)
 	extract_restriction_or_clauses(root);
 
 	/*
+	 * Construct a PathTarget to represent the final target list, and extract
+	 * information about aggregates.
+	 */
+	process_targetlist(root, tlist);
+
+	/*
 	 * We should now have size estimates for every actual table involved in
 	 * the query, and we also know which if any have been deleted from the
 	 * query by join removal; so we can compute total_table_pages.
@@ -220,10 +232,10 @@ process_jointree(PlannerInfo *root, List *tlist)
 /*
  * query_planner
  *	  Generate paths (that is, simplified plans) for a basic query,
- *	  which may involve joins but not any fancier features.
+ *	  which may involve joins and grouping, but not any fancier features.
  *
- * Since query_planner does not handle the toplevel processing (grouping,
- * sorting, etc) it cannot select the best path by itself.  Instead, it
+ * Since query_planner does not handle the toplevel processing (sorting,
+ * etc) it cannot select the best path by itself.  Instead, it
  * returns the RelOptInfo for the top level of joining, and the caller
  * (grouping_planner) can choose among the surviving paths for the rel.
  *
@@ -267,6 +279,16 @@ query_planner(PlannerInfo *root)
 		/* Select cheapest path (pretty easy in this case...) */
 		set_cheapest(final_rel);
 
+		/*
+		 * If there is grouping/aggregation (i.e. something like "SELECT COUNT(*);"),
+		 * add an Agg node on top of the Result.
+		 */
+		if (root->have_grouping)
+		{
+			set_rel_grouping(root, final_rel);
+			final_rel = final_rel->grouped_rel;
+		}
+
 		return final_rel;
 	}
 
@@ -280,5 +302,17 @@ query_planner(PlannerInfo *root)
 		final_rel->cheapest_total_path->param_info != NULL)
 		elog(ERROR, "failed to construct the join relation");
 
+	/*
+	 * If there is grouping and/or aggregation involved, return the
+	 * grouped plan.
+	 */
+	if (root->have_grouping)
+	{
+		if (!final_rel->grouped_rel)
+			elog(ERROR, "grouping query, but no grouping node was created as part of scan/join planning");
+
+		final_rel = final_rel->grouped_rel;
+	}
+
 	return final_rel;
 }
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 0d31eded37..68ee0c09a2 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -129,8 +129,6 @@ static RelOptInfo *create_ordered_paths(PlannerInfo *root,
 					 PathTarget *target,
 					 bool target_parallel_safe,
 					 double limit_tuples);
-static PathTarget *make_group_input_target(PlannerInfo *root,
-						PathTarget *final_target);
 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
 static PathTarget *make_window_input_target(PlannerInfo *root,
@@ -143,12 +141,6 @@ static PathTarget *make_sort_input_target(PlannerInfo *root,
 					   bool *have_postponed_srfs);
 static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
 					  List *targets, List *targets_contain_srfs);
-static void apply_scanjoin_target_to_paths(PlannerInfo *root,
-							   RelOptInfo *rel,
-							   List *scanjoin_targets,
-							   List *scanjoin_targets_contain_srfs,
-							   bool scanjoin_target_parallel_safe,
-							   bool tlist_same_exprs);
 static List *extract_rollup_sets(List *groupingSets);
 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
 
@@ -1687,15 +1679,11 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		List	   *sort_input_targets;
 		List	   *sort_input_targets_contain_srfs;
 		bool		sort_input_target_parallel_safe;
-		PathTarget *grouping_target;
-		List	   *grouping_targets;
-		List	   *grouping_targets_contain_srfs;
-		bool		grouping_target_parallel_safe;
-		PathTarget *scanjoin_target;
-		List	   *scanjoin_targets;
-		List	   *scanjoin_targets_contain_srfs;
-		bool		scanjoin_target_parallel_safe;
-		bool		scanjoin_target_same_exprs;
+		PathTarget *scanjoingrouping_target;
+		List	   *scanjoingrouping_targets;
+		List	   *scanjoingrouping_targets_contain_srfs;
+		bool		scanjoingrouping_target_parallel_safe;
+		bool		scanjoingrouping_target_same_exprs;
 		bool		have_grouping;
 		AggClauseCosts agg_costs;
 		WindowFuncLists *wflists = NULL;
@@ -1716,6 +1704,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 			if (parse->groupClause)
 				parse->groupClause = preprocess_groupclause(root, NIL);
 		}
+		root->gd = gset_data;
 
 		/* Preprocess targetlist */
 		tlist = preprocess_targetlist(root);
@@ -1750,6 +1739,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 			get_agg_clause_costs(root, parse->havingQual, AGGSPLIT_SIMPLE,
 								 &agg_costs);
 		}
+		root->agg_costs = &agg_costs;
 
 		/*
 		 * Locate any window functions in the tlist.  (We don't need to look
@@ -1808,6 +1798,10 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 						 ? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause : NIL)
 						 : parse->groupClause);
 
+		have_grouping = (parse->groupClause || parse->groupingSets ||
+						 parse->hasAggs || root->hasHavingQual);
+		root->have_grouping = have_grouping;
+
 		/*
 		 * Generate the best unsorted and presorted paths for the scan/join
 		 * portion of this Query, ie the processing represented by the
@@ -1852,35 +1846,16 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		 */
 		if (activeWindows)
 		{
-			grouping_target = make_window_input_target(root,
-													   final_target,
-													   activeWindows);
-			grouping_target_parallel_safe =
-				is_parallel_safe(root, (Node *) grouping_target->exprs);
+			scanjoingrouping_target = make_window_input_target(root,
+															   final_target,
+															   activeWindows);
+			scanjoingrouping_target_parallel_safe =
+				is_parallel_safe(root, (Node *) scanjoingrouping_target->exprs);
 		}
 		else
 		{
-			grouping_target = sort_input_target;
-			grouping_target_parallel_safe = sort_input_target_parallel_safe;
-		}
-
-		/*
-		 * If we have grouping or aggregation to do, the topmost scan/join
-		 * plan node must emit what the grouping step wants; otherwise, it
-		 * should emit grouping_target.
-		 */
-		have_grouping = (parse->groupClause || parse->groupingSets ||
-						 parse->hasAggs || root->hasHavingQual);
-		if (have_grouping)
-		{
-			scanjoin_target = make_group_input_target(root, final_target);
-			scanjoin_target_parallel_safe =
-				is_parallel_safe(root, (Node *) grouping_target->exprs);
-		}
-		else
-		{
-			scanjoin_target = grouping_target;
-			scanjoin_target_parallel_safe = grouping_target_parallel_safe;
+			scanjoingrouping_target = sort_input_target;
+			scanjoingrouping_target_parallel_safe = sort_input_target_parallel_safe;
 		}
 
 		/*
@@ -1898,41 +1873,74 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 			final_target = linitial_node(PathTarget, final_targets);
 			Assert(!linitial_int(final_targets_contain_srfs));
 			/* likewise for sort_input_target vs. grouping_target */
-			split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
+			split_pathtarget_at_srfs(root, sort_input_target, scanjoingrouping_target,
 									 &sort_input_targets,
 									 &sort_input_targets_contain_srfs);
 			sort_input_target = linitial_node(PathTarget, sort_input_targets);
 			Assert(!linitial_int(sort_input_targets_contain_srfs));
 			/* likewise for grouping_target vs. scanjoin_target */
-			split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
-									 &grouping_targets,
-									 &grouping_targets_contain_srfs);
-			grouping_target = linitial_node(PathTarget, grouping_targets);
-			Assert(!linitial_int(grouping_targets_contain_srfs));
-			/* scanjoin_target will not have any SRFs precomputed for it */
-			split_pathtarget_at_srfs(root, scanjoin_target, NULL,
-									 &scanjoin_targets,
-									 &scanjoin_targets_contain_srfs);
-			scanjoin_target = linitial_node(PathTarget, scanjoin_targets);
-			Assert(!linitial_int(scanjoin_targets_contain_srfs));
+			split_pathtarget_at_srfs(root, scanjoingrouping_target, current_rel->reltarget,
+									 &scanjoingrouping_targets,
+									 &scanjoingrouping_targets_contain_srfs);
+			scanjoingrouping_target = linitial_node(PathTarget, scanjoingrouping_targets);
+			Assert(!linitial_int(scanjoingrouping_targets_contain_srfs));
 		}
 		else
 		{
 			/* initialize lists; for most of these, dummy values are OK */
 			final_targets = final_targets_contain_srfs = NIL;
 			sort_input_targets = sort_input_targets_contain_srfs = NIL;
-			grouping_targets = grouping_targets_contain_srfs = NIL;
-			scanjoin_targets = list_make1(scanjoin_target);
-			scanjoin_targets_contain_srfs = NIL;
+			scanjoingrouping_targets = list_make1(scanjoingrouping_target);
+			scanjoingrouping_targets_contain_srfs = NIL;
+		}
+
+		/*
+		 * We might have MIN/MAX paths stashed in UPPERREL_GROUP_AGG.
+		 * Merge them with the paths in current rel.
+		 *
+		 * XXX: what we really should do is to use the proper upper-rel
+		 * in the first place. Instead of always building a new grouped-rel
+		 * in query_planner(), it should call fetch_upper_rel with the right
+		 * relids. But when I tried doing that, GEQO became unhappy: it
+		 * allocates join rels in a temp memory context, and the caching
+		 * in fetch_upper_rel() didn't work with that.
+		 */
+		if (have_grouping)
+		{
+			RelOptInfo *grouped_rel;
+
+			/* Copy the paths to 'grouped_rel' */
+			grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
+			grouped_rel->is_grouped_rel = true;
+
+			/*
+			 * Update FDW information in the new upper rel.
+			 */
+			grouped_rel->serverid = current_rel->serverid;
+			grouped_rel->userid = current_rel->userid;
+			grouped_rel->useridiscurrent = current_rel->useridiscurrent;
+			grouped_rel->fdwroutine = current_rel->fdwroutine;
+			grouped_rel->fdw_private = current_rel->fdw_private;
+
+			foreach(lc, current_rel->pathlist)
+			{
+				Path	   *path = (Path *) lfirst(lc);
+
+				path->parent = grouped_rel;
+				add_path(grouped_rel, path);
+			}
+
+			set_cheapest(grouped_rel);
+			current_rel = grouped_rel;
 		}
 
-		/* Apply scan/join target. */
-		scanjoin_target_same_exprs = list_length(scanjoin_targets) == 1
-			&& equal(scanjoin_target->exprs, current_rel->reltarget->exprs);
-		apply_scanjoin_target_to_paths(root, current_rel, scanjoin_targets,
-									   scanjoin_targets_contain_srfs,
-									   scanjoin_target_parallel_safe,
-									   scanjoin_target_same_exprs);
+		/* Apply scan/join/grouping target. */
+		scanjoingrouping_target_same_exprs = list_length(scanjoingrouping_targets) == 1
+			&& equal(scanjoingrouping_target->exprs, current_rel->reltarget->exprs);
+		apply_scanjoin_target_to_paths(root, current_rel, scanjoingrouping_targets,
+											   scanjoingrouping_targets_contain_srfs,
+											   scanjoingrouping_target_parallel_safe,
+											   scanjoingrouping_target_same_exprs);
 
 		/*
 		 * Save the various upper-rel PathTargets we just computed into
@@ -1943,27 +1951,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		 */
 		root->upper_targets[UPPERREL_FINAL] = final_target;
 		root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
-		root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
-
-		/*
-		 * If we have grouping and/or aggregation, consider ways to implement
-		 * that.  We build a new upperrel representing the output of this
-		 * phase.
-		 */
-		if (have_grouping)
-		{
-			current_rel = create_grouping_paths(root,
-												current_rel,
-												grouping_target,
-												grouping_target_parallel_safe,
-												&agg_costs,
-												gset_data);
-			/* Fix things up if grouping_target contains SRFs */
-			if (parse->hasTargetSRFs)
-				adjust_paths_for_srfs(root, current_rel,
-									  grouping_targets,
-									  grouping_targets_contain_srfs);
-		}
+		root->upper_targets[UPPERREL_GROUP_AGG] = scanjoingrouping_target;
 
 		/*
 		 * If we have window functions, consider ways to implement those.  We
@@ -1973,7 +1961,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		{
 			current_rel = create_window_paths(root,
 											  current_rel,
-											  grouping_target,
+											  scanjoingrouping_target,
 											  sort_input_target,
 											  sort_input_target_parallel_safe,
 											  tlist,
@@ -3941,105 +3929,6 @@ create_ordered_paths(PlannerInfo *root,
 	return ordered_rel;
 }
 
-
-/*
- * make_group_input_target
- *	  Generate appropriate PathTarget for initial input to grouping nodes.
- *
- * If there is grouping or aggregation, the scan/join subplan cannot emit
- * the query's final targetlist; for example, it certainly can't emit any
- * aggregate function calls.  This routine generates the correct target
- * for the scan/join subplan.
- *
- * The query target list passed from the parser already contains entries
- * for all ORDER BY and GROUP BY expressions, but it will not have entries
- * for variables used only in HAVING clauses; so we need to add those
- * variables to the subplan target list.  Also, we flatten all expressions
- * except GROUP BY items into their component variables; other expressions
- * will be computed by the upper plan nodes rather than by the subplan.
- * For example, given a query like
- *		SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
- * we want to pass this targetlist to the subplan:
- *		a+b,c,d
- * where the a+b target will be used by the Sort/Group steps, and the
- * other targets will be used for computing the final results.
- *
- * 'final_target' is the query's final target list (in PathTarget form)
- *
- * The result is the PathTarget to be computed by the Paths returned from
- * query_planner().
- */
-static PathTarget *
-make_group_input_target(PlannerInfo *root, PathTarget *final_target)
-{
-	Query	   *parse = root->parse;
-	PathTarget *input_target;
-	List	   *non_group_cols;
-	List	   *non_group_vars;
-	int			i;
-	ListCell   *lc;
-
-	/*
-	 * We must build a target containing all grouping columns, plus any other
-	 * Vars mentioned in the query's targetlist and HAVING qual.
-	 */
-	input_target = create_empty_pathtarget();
-	non_group_cols = NIL;
-
-	i = 0;
-	foreach(lc, final_target->exprs)
-	{
-		Expr	   *expr = (Expr *) lfirst(lc);
-		Index		sgref = get_pathtarget_sortgroupref(final_target, i);
-
-		if (sgref && parse->groupClause &&
-			get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
-		{
-			/*
-			 * It's a grouping column, so add it to the input target as-is.
-			 */
-			add_column_to_pathtarget(input_target, expr, sgref);
-		}
-		else
-		{
-			/*
-			 * Non-grouping column, so just remember the expression for later
-			 * call to pull_var_clause.
-			 */
-			non_group_cols = lappend(non_group_cols, expr);
-		}
-
-		i++;
-	}
-
-	/*
-	 * If there's a HAVING clause, we'll need the Vars it uses, too.
-	 */
-	if (parse->havingQual)
-		non_group_cols = lappend(non_group_cols, parse->havingQual);
-
-	/*
-	 * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
-	 * add them to the input target if not already present.  (A Var used
-	 * directly as a GROUP BY item will be present already.)  Note this
-	 * includes Vars used in resjunk items, so we are covering the needs of
-	 * ORDER BY and window specifications.  Vars used within Aggrefs and
-	 * WindowFuncs will be pulled out here, too.
-	 */
-	non_group_vars = pull_var_clause((Node *) non_group_cols,
-									 PVC_RECURSE_AGGREGATES |
-									 PVC_RECURSE_WINDOWFUNCS |
-									 PVC_INCLUDE_PLACEHOLDERS);
-	add_new_columns_to_pathtarget(input_target, non_group_vars);
-
-	/* clean up cruft */
-	list_free(non_group_vars);
-	list_free(non_group_cols);
-
-	/* XXX this causes some redundant cost calculation ... */
-	return set_pathtarget_cost_width(root, input_target);
-}
-
 /*
  * mark_partial_aggref
  *	  Adjust an Aggref to make it represent a partial-aggregation step.
@@ -5020,7 +4909,7 @@ done:
 /*
  * apply_scanjoin_target_to_paths
  *
- * Adjust the final scan/join relation, and recursively all of its children,
+ * Adjust the final scan/join relation, and recursively all of its children (if partitioned),
  * to generate the final scan/join target.  It would be more correct to model
  * this as a separate planning step with a new RelOptInfo at the toplevel and
  * for each child relation, but doing it this way is noticeably cheaper.
@@ -5031,7 +4920,7 @@ done:
  * appropriate sortgroupref information.  By avoiding the creation of
  * projection paths we save effort both immediately and at plan creation time.
  */
-static void
+void
 apply_scanjoin_target_to_paths(PlannerInfo *root,
 							   RelOptInfo *rel,
 							   List *scanjoin_targets,
@@ -5123,9 +5012,10 @@ apply_scanjoin_target_to_paths(PlannerInfo *root,
 		Path	   *subpath = (Path *) lfirst(lc);
 		Path	   *newpath;
 
-		Assert(subpath->param_info == NULL);
+		//Assert(subpath->param_info == NULL);
 
-		if (tlist_same_exprs)
+		if (tlist_same_exprs &&
+			equal(scanjoin_target->exprs, subpath->pathtarget->exprs))
 			subpath->pathtarget->sortgrouprefs =
 				scanjoin_target->sortgrouprefs;
 		else
@@ -5143,7 +5033,7 @@ apply_scanjoin_target_to_paths(PlannerInfo *root,
 		Path	   *newpath;
 
 		/* Shouldn't have any parameterized paths anymore */
-		Assert(subpath->param_info == NULL);
+		//Assert(subpath->param_info == NULL);
 
 		if (tlist_same_exprs)
 			subpath->pathtarget->sortgrouprefs =
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e190ad49d1..0a3c5bcfad 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2392,8 +2392,7 @@ create_projection_path(PlannerInfo *root,
 	pathnode->path.pathtype = T_Result;
 	pathnode->path.parent = rel;
 	pathnode->path.pathtarget = target;
-	/* For now, assume we are above any joins, so no parameterization */
-	pathnode->path.param_info = NULL;
+	pathnode->path.param_info = subpath->param_info;
 	pathnode->path.parallel_aware = false;
 	pathnode->path.parallel_safe = rel->consider_parallel &&
 		subpath->parallel_safe &&
@@ -2640,8 +2639,7 @@ create_sort_path(PlannerInfo *root,
 	pathnode->path.parent = rel;
 	/* Sort doesn't project, so use source path's pathtarget */
 	pathnode->path.pathtarget = subpath->pathtarget;
-	/* For now, assume we are above any joins, so no parameterization */
-	pathnode->path.param_info = NULL;
+	pathnode->path.param_info = subpath->param_info;
 	pathnode->path.parallel_aware = false;
 	pathnode->path.parallel_safe = rel->consider_parallel &&
 		subpath->parallel_safe;
@@ -2685,8 +2683,7 @@ create_group_path(PlannerInfo *root,
 	pathnode->path.pathtype = T_Group;
 	pathnode->path.parent = rel;
 	pathnode->path.pathtarget = target;
-	/* For now, assume we are above any joins, so no parameterization */
-	pathnode->path.param_info = NULL;
+	pathnode->path.param_info = subpath->param_info;
 	pathnode->path.parallel_aware = false;
 	pathnode->path.parallel_safe = rel->consider_parallel &&
 		subpath->parallel_safe;
@@ -2799,8 +2796,7 @@ create_agg_path(PlannerInfo *root,
 	pathnode->path.pathtype = T_Agg;
 	pathnode->path.parent = rel;
 	pathnode->path.pathtarget = target;
-	/* For now, assume we are above any joins, so no parameterization */
-	pathnode->path.param_info = NULL;
+	pathnode->path.param_info = subpath->param_info;
 	pathnode->path.parallel_aware = false;
 	pathnode->path.parallel_safe = rel->consider_parallel &&
 		subpath->parallel_safe;
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 82b78420e7..6716a10ee4 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -915,12 +915,23 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
 			ndx = 0 - baserel->min_attr;
 		}
 		else
+		{
+			/*
+			 * If this rel is above grouping, then we can have Aggrefs
+			 * and grouping column expressions in the target list. Carry
+			 * them up to the join rel. They will surely be needed at
+			 * the top of the join tree. (Unless they're only used in
+			 * HAVING?)
+			 */
+#if 0
 			elog(ERROR, "unexpected node type in rel targetlist: %d",
 				 (int) nodeTag(var));
-
+#endif
+			baserel = NULL;
+		}
 
 		/* Is the target expression still needed above this joinrel? */
-		if (bms_nonempty_difference(baserel->attr_needed[ndx], relids))
+		if (baserel == NULL || bms_nonempty_difference(baserel->attr_needed[ndx], relids))
 		{
 			/* Yup, add it to the output */
 			joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, var);
@@ -930,7 +941,9 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
 			 * if it's a ConvertRowtypeExpr, it will be computed only for the
 			 * base relation, costing nothing for a join.
 			 */
-			joinrel->reltarget->width += baserel->attr_widths[ndx];
+			/* XXX: where to get estimate here if !baserel? */
+			if (baserel)
+				joinrel->reltarget->width += baserel->attr_widths[ndx];
 		}
 	}
 }
@@ -1760,3 +1773,30 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
 		joinrel->nullable_partexprs[cnt] = nullable_partexpr;
 	}
 }
+
+RelOptInfo *
+build_group_rel(PlannerInfo *root, RelOptInfo *parent)
+{
+	RelOptInfo *grouped_rel;
+
+	grouped_rel = makeNode(RelOptInfo);
+	grouped_rel->reloptkind = parent->reloptkind;
+	grouped_rel->is_grouped_rel = true;
+	grouped_rel->relids = bms_copy(parent->relids);
+	grouped_rel->relids = bms_add_member(grouped_rel->relids, NEEDED_IN_GROUPING);
+
+	/* cheap startup cost is interesting iff not all tuples to be retrieved */
+	grouped_rel->consider_startup = (root->tuple_fraction > 0);
+	grouped_rel->consider_param_startup = false;
+	grouped_rel->consider_parallel = false;	/* might get changed later */
+	grouped_rel->reltarget = create_empty_pathtarget();
+	grouped_rel->pathlist = NIL;
+	grouped_rel->cheapest_startup_path = NULL;
+	grouped_rel->cheapest_total_path = NULL;
+	grouped_rel->cheapest_unique_path = NULL;
+	grouped_rel->cheapest_parameterized_paths = NIL;
+
+	build_joinrel_tlist(root, grouped_rel, parent);
+
+	return grouped_rel;
+}
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index ad77416050..149d198554 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -236,6 +236,9 @@ typedef struct PlannerInfo
 	List	   *join_rel_list;	/* list of join-relation RelOptInfos */
 	struct HTAB *join_rel_hash; /* optional hashtable for join relations */
 
+	struct PathTarget *final_target;
+	Relids		agg_relids;
+
 	/*
 	 * When doing a dynamic-programming-style join search, join_rel_level[k]
 	 * is a list of all join-relation RelOptInfos of level k, and
@@ -281,6 +284,10 @@ typedef struct PlannerInfo
 	List	   *query_pathkeys; /* desired pathkeys for query_planner() */
 
 	List	   *group_pathkeys; /* groupClause pathkeys, if any */
+
+	List	   *group_ecs;		/* groupClause equivalence classes, one for each groupClause item */
+	List	   *group_sortrefs; /* sortrefs, for each entry in group_ecs */
+
 	List	   *window_pathkeys;	/* pathkeys of bottom window, if any */
 	List	   *distinct_pathkeys;	/* distinctClause pathkeys, if any */
 	List	   *sort_pathkeys;	/* sortClause pathkeys, if any */
@@ -290,6 +297,11 @@ typedef struct PlannerInfo
 
 	List	   *initial_rels;	/* RelOptInfos we are now trying to join */
 
+	AggClauseCosts *agg_costs;
+	struct grouping_sets_data *gd;
+
+	bool		have_grouping;
+
 	/* Use fetch_upper_rel() to get any particular upper rel */
 	List	   *upper_rels[UPPERREL_FINAL + 1]; /* upper-rel RelOptInfos */
 
@@ -609,11 +621,30 @@ typedef enum RelOptKind
 	 (rel)->reloptkind == RELOPT_OTHER_JOINREL || \
 	 (rel)->reloptkind == RELOPT_OTHER_UPPER_REL)
 
+/* Is the given relation a grouped relation? */
+#define IS_GROUPED_REL(rel) \
+	((rel)->is_grouped_rel)
+
+/*
+ * NEEDED_IN_GROUPING is a magic range table index used in 'attr_needed',
+ * to indicate that an attribute is needed to compute an Aggregate, ie.
+ * is used as an argument to an aggregate function. It's similar to the
+ * magic '0' value used to indicate that an attribute is needed by the
+ * final target list.
+ *
+ * XXX: obviously 1000 won't work when you have 1000 relations in a query.
+ * I think we'll need to offset the bitmaps by some constant, to make room
+ * for this magic value, e.g. make it 1. (too bad that Bitmapset doesn't
+ * support negative values)
+ */
+#define NEEDED_IN_GROUPING 1000
+
 typedef struct RelOptInfo
 {
 	NodeTag		type;
 
 	RelOptKind	reloptkind;
+	bool		is_grouped_rel;
 
 	/* all relations included in this RelOptInfo */
 	Relids		relids;			/* set of base relids (rangetable indexes) */
@@ -638,6 +669,9 @@ typedef struct RelOptInfo
 	struct Path *cheapest_unique_path;
 	List	   *cheapest_parameterized_paths;
 
+	/* version of this relation that includes the effects of GROUP BY and aggregates */
+	struct RelOptInfo *grouped_rel;
+
 	/* parameterization information needed for both base rels and join rels */
 	/* (see also lateral_vars and lateral_referencers) */
 	Relids		direct_lateral_relids;	/* rels directly laterally referenced */
@@ -2430,7 +2464,7 @@ typedef struct JoinCostWorkspace
  * Data specific to grouping sets
  */
 
-typedef struct
+typedef struct grouping_sets_data
 {
 	List	   *rollups;
 	List	   *hash_sets_idx;
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index e99ae36bef..d294450caa 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -299,5 +299,6 @@ extern RelOptInfo *build_child_join_rel(PlannerInfo *root,
 					 RelOptInfo *outer_rel, RelOptInfo *inner_rel,
 					 RelOptInfo *parent_joinrel, List *restrictlist,
 					 SpecialJoinInfo *sjinfo, JoinType jointype);
+extern RelOptInfo *build_group_rel(PlannerInfo *root, RelOptInfo *parent);
 
 #endif							/* PATHNODE_H */
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 39e3e8f85c..6c274ac309 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -61,11 +61,15 @@ extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel,
 							Path *bitmapqual);
 extern void generate_partitionwise_join_paths(PlannerInfo *root,
 								  RelOptInfo *rel);
+extern void generate_grouped_join_paths(PlannerInfo *root,
+								  RelOptInfo *rel);
 
 #ifdef OPTIMIZER_DEBUG
 extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
 #endif
 
+extern void set_rel_grouping(PlannerInfo *root, RelOptInfo *rel);
+
 /*
  * indxpath.c
  *	  routines to generate index paths
@@ -110,6 +114,9 @@ extern void add_paths_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
 extern void join_search_one_level(PlannerInfo *root, int level);
 extern RelOptInfo *make_join_rel(PlannerInfo *root,
 			  RelOptInfo *rel1, RelOptInfo *rel2);
+extern RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
+				  PathTarget *target, bool target_parallel_safe,
+				  Node *havingQual);
 extern bool have_join_order_restriction(PlannerInfo *root,
 							RelOptInfo *rel1, RelOptInfo *rel2);
 extern bool have_dangerous_phv(PlannerInfo *root,
@@ -123,12 +130,17 @@ extern bool have_partkey_equi_join(RelOptInfo *joinrel,
  * aggpath.c
  *	  routines to create grouping paths
  */
-extern RelOptInfo *create_grouping_paths(PlannerInfo *root,
+extern void create_grouping_paths(PlannerInfo *root,
 					  RelOptInfo *input_rel,
+					  RelOptInfo *grouped_rel,
 					  PathTarget *target,
 					  bool target_parallel_safe,
 					  const AggClauseCosts *agg_costs,
-										 grouping_sets_data *gd);
+										 struct grouping_sets_data *gd);
+extern PathTarget *make_group_input_target(PlannerInfo *root, RelOptInfo *rel);
+extern PathTarget *make_grouping_target(PlannerInfo *root, RelOptInfo *rel, PathTarget *input_target, PathTarget *final_target);
+extern bool is_grouping_computable_at_rel(PlannerInfo *root, RelOptInfo *rel);
+
 extern List *remap_to_groupclause_idx(List *groupClause, List *gsets,
 						 int *tleref_to_colnum_map);
 
@@ -228,6 +240,14 @@ extern List *build_join_pathkeys(PlannerInfo *root,
 extern List *make_pathkeys_for_sortclauses(PlannerInfo *root,
 							  List *sortclauses,
 							  List *tlist);
+extern PathKey *make_pathkey_from_sortop(PlannerInfo *root,
+						 Expr *expr,
+						 Relids nullable_relids,
+						 Oid ordering_op,
+						 bool nulls_first,
+						 Index sortref,
+						 bool create_it);
+
 extern void initialize_mergeclause_eclasses(PlannerInfo *root,
 								RestrictInfo *restrictinfo);
 extern void update_mergeclause_eclasses(PlannerInfo *root,
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index dd6e912373..19b6b7ae9f 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -76,6 +76,7 @@ extern void add_vars_to_targetlist(PlannerInfo *root, List *vars,
 					   Relids where_needed, bool create_new_ph);
 extern void find_lateral_references(PlannerInfo *root);
 extern void create_lateral_join_info(PlannerInfo *root);
+extern void process_targetlist(PlannerInfo *root, List *tlist);
 extern List *deconstruct_jointree(PlannerInfo *root);
 extern void distribute_restrictinfo_to_rels(PlannerInfo *root,
 								RestrictInfo *restrictinfo);
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h
index 497a8c0581..fb214c371f 100644
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -61,4 +61,12 @@ extern grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
 extern bool plan_cluster_use_sort(Oid tableOid, Oid indexOid);
 extern int	plan_create_index_workers(Oid tableOid, Oid indexOid);
 
+extern void
+apply_scanjoin_target_to_paths(PlannerInfo *root,
+							   RelOptInfo *rel,
+							   List *scanjoin_targets,
+							   List *scanjoin_targets_contain_srfs,
+							   bool scanjoin_target_parallel_safe,
+							   bool tlist_same_exprs);
+
 #endif							/* PLANNER_H */
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f85e913850..70a2c5e271 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -982,16 +982,15 @@ explain (costs off) select a,c from t1 group by a,c,d;
 explain (costs off) select *
 from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y
 group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z;
-                      QUERY PLAN                      
-------------------------------------------------------
- HashAggregate
-   Group Key: t1.a, t1.b, t2.x, t2.y
-   ->  Hash Join
-         Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
-         ->  Seq Scan on t2
-         ->  Hash
-               ->  Seq Scan on t1
-(7 rows)
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop
+   ->  HashAggregate
+         Group Key: t1.a, t1.b, t1.a, t1.b
+         ->  Seq Scan on t1
+   ->  Index Scan using t2_pkey on t2
+         Index Cond: ((x = t1.a) AND (y = t1.b))
+(6 rows)
 
 -- Test case where t1 can be optimized but not t2
 explain (costs off) select t1.*,t2.x,t2.z
@@ -1000,7 +999,7 @@ group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z;
                       QUERY PLAN                      
 ------------------------------------------------------
  HashAggregate
-   Group Key: t1.a, t1.b, t2.x, t2.z
+   Group Key: t1.a, t1.b, t1.a, t2.z
    ->  Hash Join
          Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
          ->  Seq Scan on t2
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index b983f9c506..69199d8ac2 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1140,7 +1140,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM plt1 t1, pl
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t1.c, t3.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
@@ -1284,7 +1284,7 @@ SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, ph
                                    QUERY PLAN                                   
 --------------------------------------------------------------------------------
  GroupAggregate
-   Group Key: t1.c, t2.c, t3.c
+   Group Key: t1.c, t1.c, t3.c
    ->  Sort
          Sort Key: t1.c, t3.c
          ->  Append
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 16f979c8d9..f6ed965ebe 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -116,7 +116,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare without_oid c
 # ----------
 # Another group of parallel tests
 # ----------
-test: identity partition_join partition_prune reloptions hash_part indexing partition_aggregate
+test: identity partition_join partition_prune reloptions hash_part indexing partition_aggregate aggregate_pushdown
 
 # event triggers cannot run concurrently with any test that runs DDL
 test: event_trigger
diff --git a/src/test/regress/sql/aggregate_pushdown.sql b/src/test/regress/sql/aggregate_pushdown.sql
new file mode 100644
index 0000000000..afb385f844
--- /dev/null
+++ b/src/test/regress/sql/aggregate_pushdown.sql
@@ -0,0 +1,70 @@
+-- Test cases for "pushing down" Agg/Group node below joins .
+
+create temp table a (id int4 primary key);
+create temp table b (id int4);
+create index on b (id);
+
+insert into a values (1);
+insert into b select g/10 from generate_series(1, 1000) g;
+analyze a,b;
+
+-- Here, the "normal" plan with Aggregate on top wins.
+explain (costs off)
+select b.id from a, b where a.id = b.id group by b.id;
+select b.id from a, b where a.id = b.id group by b.id;
+
+-- With different data, pushing down the Aggregate below the join looks more
+-- attractive
+truncate a;
+insert into a select g from generate_series(1, 10) g;
+analyze a;
+
+explain (costs off)
+select b.id from a, b where a.id = b.id group by b.id;
+select b.id from a, b where a.id = b.id group by b.id;
+
+-- TODO: Test a nested loop join, with a GROUP BY parameterized path
+
+
+
+-- This Grouping can be applied on top of either t1 or t2.
+
+create temp table t1 (a int, b int, primary key (a, b));
+create temp table t2 (x int, y int, primary key (x, y));
+
+explain (costs off)
+select t1.a, count(*) from t1, t2 WHERE t1.a = t2.x AND t1.b = t2.y GROUP BY t1.a, t1.b;
+
+-- This is the same, but because of avg(t1.a), the aggregation cannot be done on 't2'
+explain (costs off)
+select t1.a, avg(t1.a) from t1, t2 WHERE t1.a = t2.x AND t1.b = t2.y GROUP BY t1.a, t1.b;
+
+-- This is the same, but because of avg(t2.x), the aggregation cannot be done on 't1'
+explain (costs off)
+select t1.a, avg(t2.x) from t1, t2 WHERE t1.a = t2.x AND t1.b = t2.y GROUP BY t1.a, t1.b;
+
+-- With both avg(t1.a) and avg(t2.x), aggregation needs to be done after the join.
+explain (costs off)
+select t1.a, avg(t1.a), avg(t2.x) from t1, t2 WHERE t1.a = t2.x AND t1.b = t2.y GROUP BY t1.a, t1.b;
+
+
+drop table a;
+drop table b;
+
+create temp table a (id int4 primary key);
+create temp table b (id int4, v numeric);
+create index on b (id);
+
+insert into a select g from generate_series(1, 10) g;
+insert into b select g / 10, g::numeric / 10.0 from generate_series(1, 1000) g;
+analyze a,b;
+
+-- This grouping can be performed on top of 'b'
+explain (costs off)
+select b.id, avg(b.v) from a, b where a.id = b.id group by b.id;
+select b.id, avg(b.v) from a, b where a.id = b.id group by b.id;
+
+-- But this can not (Except in a nested loop join, with b.id being used as a Param)
+explain (costs off)
+select b.id, avg(b.v) from a, b where a.id = b.id and a.id / 10 + b.v < 1.5 group by b.id;
+select b.id, avg(b.v) from a, b where a.id = b.id and a.id / 10 + b.v < 1.5 group by b.id;
-- 
2.11.0