From 9faafad3d9908f55d8c2e455003f67fc7a223815 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Mon, 3 May 2021 16:31:29 +1200
Subject: [PATCH v6 1/2] Add planner support for ORDER BY aggregates

ORDER BY aggreagtes have, since implemented in Postgres, been executed by
always performing a sort in nodeAgg.c to sort the tuples in the current
group into the correct order before calling the transition function on the
sorted results.  This was not great as often there might be an index that
could have provided pre-sorted input and allowed the transition functions
to be called as the rows come in, rather than having to store them in a
tuplestore in order to sort them later.

Here we get the planner on-board with picking a plan that provides
pre-sorted inputs to ORDER BY aggregates.

Since there can be many ORDER BY aggregates in any given query level, it's
very possible that we can't find an order that suits all aggregates.  The
sort order the the planner chooses is simply the one that suits the most
aggregate functions.  We take the most strictly sorted variation of each
order and see how many aggregate functions can use that, then we try again
with the order of the remaining aggregates to see if another order would
suit more aggregate functions.  For example:

SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ...

would request the sort order to be {a, b} because {a} is a subset of the
sort order of {a,b}, but;

SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ...

would just pick a plan ordered by {a}.

SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ...

would choose to order by {b} since two aggregates suit that vs just one
that requires order by {a}.

Making DISTINCT aggregates work requires a bit more work.  Those are still
handled the traditional way by performing a sort inside nodeAgg.c
---
 src/backend/executor/execExpr.c               |  30 ++-
 src/backend/executor/nodeAgg.c                |  20 +-
 src/backend/nodes/copyfuncs.c                 |   1 +
 src/backend/nodes/equalfuncs.c                |   1 +
 src/backend/nodes/outfuncs.c                  |   1 +
 src/backend/nodes/readfuncs.c                 |   1 +
 src/backend/optimizer/path/pathkeys.c         |  21 ++
 src/backend/optimizer/plan/planagg.c          |   2 +-
 src/backend/optimizer/plan/planner.c          | 202 +++++++++++++++++-
 src/backend/optimizer/prep/prepagg.c          |   7 +-
 src/include/executor/nodeAgg.h                |   6 +
 src/include/nodes/pathnodes.h                 |   8 +-
 src/include/nodes/primnodes.h                 |   4 +
 src/include/optimizer/paths.h                 |   1 +
 src/test/regress/expected/aggregates.out      |  78 +++++++
 .../regress/expected/partition_aggregate.out  |   6 +-
 src/test/regress/sql/aggregates.sql           |  43 ++++
 17 files changed, 403 insertions(+), 29 deletions(-)

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 2c8c414a14..191d5c6b70 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -3417,13 +3417,16 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
 				scratch.resnull = &state->resnull;
 			}
 			argno++;
+
+			Assert(pertrans->numInputs == argno);
 		}
-		else if (pertrans->numSortCols == 0)
+		else if (!pertrans->aggsortrequired)
 		{
 			ListCell   *arg;
 
 			/*
-			 * Normal transition function without ORDER BY / DISTINCT.
+			 * Normal transition function without ORDER BY / DISTINCT or with
+			 * ORDER BY but the planner has given us pre-sorted input.
 			 */
 			strictargs = trans_fcinfo->args + 1;
 
@@ -3431,6 +3434,13 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
 			{
 				TargetEntry *source_tle = (TargetEntry *) lfirst(arg);
 
+				/*
+				 * Don't initialize args for any ORDER BY clause that might
+				 * exist in a presorted aggregate.
+				 */
+				if (argno == pertrans->numTransInputs)
+					break;
+
 				/*
 				 * Start from 1, since the 0th arg will be the transition
 				 * value
@@ -3440,11 +3450,13 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
 								&trans_fcinfo->args[argno + 1].isnull);
 				argno++;
 			}
+			Assert(pertrans->numTransInputs == argno);
 		}
 		else if (pertrans->numInputs == 1)
 		{
 			/*
-			 * DISTINCT and/or ORDER BY case, with a single column sorted on.
+			 * Non-presorted DISTINCT and/or ORDER BY case, with a single
+			 * column sorted on.
 			 */
 			TargetEntry *source_tle =
 			(TargetEntry *) linitial(pertrans->aggref->args);
@@ -3456,11 +3468,14 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
 							&state->resnull);
 			strictnulls = &state->resnull;
 			argno++;
+
+			Assert(pertrans->numInputs == argno);
 		}
 		else
 		{
 			/*
-			 * DISTINCT and/or ORDER BY case, with multiple columns sorted on.
+			 * Non-presorted DISTINCT and/or ORDER BY case, with multiple
+			 * columns sorted on.
 			 */
 			Datum	   *values = pertrans->sortslot->tts_values;
 			bool	   *nulls = pertrans->sortslot->tts_isnull;
@@ -3476,8 +3491,8 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
 								&values[argno], &nulls[argno]);
 				argno++;
 			}
+			Assert(pertrans->numInputs == argno);
 		}
-		Assert(pertrans->numInputs == argno);
 
 		/*
 		 * For a strict transfn, nothing happens when there's a NULL input; we
@@ -3608,7 +3623,8 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
 	/*
 	 * Determine appropriate transition implementation.
 	 *
-	 * For non-ordered aggregates:
+	 * For non-ordered aggregates and ORDER BY aggregates with presorted
+	 * input:
 	 *
 	 * If the initial value for the transition state doesn't exist in the
 	 * pg_aggregate table then we will let the first non-NULL value returned
@@ -3638,7 +3654,7 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
 	 * process_ordered_aggregate_{single, multi} and
 	 * advance_transition_function.
 	 */
-	if (pertrans->numSortCols == 0)
+	if (!pertrans->aggsortrequired)
 	{
 		if (pertrans->transtypeByVal)
 		{
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 914b02ceee..0e72cfe19a 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -602,7 +602,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
 	/*
 	 * Start a fresh sort operation for each DISTINCT/ORDER BY aggregate.
 	 */
-	if (pertrans->numSortCols > 0)
+	if (pertrans->aggsortrequired)
 	{
 		/*
 		 * In case of rescan, maybe there could be an uncompleted sort
@@ -1328,7 +1328,7 @@ finalize_aggregates(AggState *aggstate,
 
 		pergroupstate = &pergroup[transno];
 
-		if (pertrans->numSortCols > 0)
+		if (pertrans->aggsortrequired)
 		{
 			Assert(aggstate->aggstrategy != AGG_HASHED &&
 				   aggstate->aggstrategy != AGG_MIXED);
@@ -4220,6 +4220,11 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
 	 * stick them into arrays.  We ignore ORDER BY for an ordered-set agg,
 	 * however; the agg's transfn and finalfn are responsible for that.
 	 *
+	 * When the planner has set the aggpresorted flag, the input to the
+	 * aggregate is already correctly sorted.  For ORDER BY aggregates we can
+	 * simply treat these as normal aggregates.  The planner does not yet set
+	 * the aggpresorted for DISTINCT aggregates.
+	 *
 	 * Note that by construction, if there is a DISTINCT clause then the ORDER
 	 * BY clause is a prefix of it (see transformDistinctClause).
 	 */
@@ -4227,18 +4232,29 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
 	{
 		sortlist = NIL;
 		numSortCols = numDistinctCols = 0;
+		pertrans->aggsortrequired = false;
+	}
+	else if (aggref->aggpresorted)
+	{
+		/* DISTINCT not yet supported for aggpresorted */
+		Assert(aggref->aggdistinct == NIL);
+		sortlist = NIL;
+		numSortCols = numDistinctCols = 0;
+		pertrans->aggsortrequired = false;
 	}
 	else if (aggref->aggdistinct)
 	{
 		sortlist = aggref->aggdistinct;
 		numSortCols = numDistinctCols = list_length(sortlist);
 		Assert(numSortCols >= list_length(aggref->aggorder));
+		pertrans->aggsortrequired = true;
 	}
 	else
 	{
 		sortlist = aggref->aggorder;
 		numSortCols = list_length(sortlist);
 		numDistinctCols = 0;
+		pertrans->aggsortrequired = (numSortCols > 0);
 	}
 
 	pertrans->numSortCols = numSortCols;
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 9d4893c504..c49f70c09c 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -1541,6 +1541,7 @@ _copyAggref(const Aggref *from)
 	COPY_SCALAR_FIELD(aggstar);
 	COPY_SCALAR_FIELD(aggvariadic);
 	COPY_SCALAR_FIELD(aggkind);
+	COPY_SCALAR_FIELD(aggpresorted);
 	COPY_SCALAR_FIELD(agglevelsup);
 	COPY_SCALAR_FIELD(aggsplit);
 	COPY_SCALAR_FIELD(aggno);
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index b9cc7b199c..235115900b 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -230,6 +230,7 @@ _equalAggref(const Aggref *a, const Aggref *b)
 	COMPARE_SCALAR_FIELD(aggstar);
 	COMPARE_SCALAR_FIELD(aggvariadic);
 	COMPARE_SCALAR_FIELD(aggkind);
+	COMPARE_SCALAR_FIELD(aggpresorted);
 	COMPARE_SCALAR_FIELD(agglevelsup);
 	COMPARE_SCALAR_FIELD(aggsplit);
 	COMPARE_SCALAR_FIELD(aggno);
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index e73be21bd6..729e9e4474 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1179,6 +1179,7 @@ _outAggref(StringInfo str, const Aggref *node)
 	WRITE_BOOL_FIELD(aggstar);
 	WRITE_BOOL_FIELD(aggvariadic);
 	WRITE_CHAR_FIELD(aggkind);
+	WRITE_BOOL_FIELD(aggpresorted);
 	WRITE_UINT_FIELD(agglevelsup);
 	WRITE_ENUM_FIELD(aggsplit, AggSplit);
 	WRITE_INT_FIELD(aggno);
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 77d082d8b4..3db0774382 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -655,6 +655,7 @@ _readAggref(void)
 	READ_BOOL_FIELD(aggstar);
 	READ_BOOL_FIELD(aggvariadic);
 	READ_CHAR_FIELD(aggkind);
+	READ_BOOL_FIELD(aggpresorted);
 	READ_UINT_FIELD(agglevelsup);
 	READ_ENUM_FIELD(aggsplit, AggSplit);
 	READ_INT_FIELD(aggno);
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index bd9a176d7d..d9e8322010 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -96,6 +96,27 @@ make_canonical_pathkey(PlannerInfo *root,
 	return pk;
 }
 
+/*
+ * append_pathkeys
+ *		Append all non-redundant PathKeys in 'source' onto 'target'
+ */
+List *
+append_pathkeys(List *target, List *source)
+{
+	ListCell   *lc;
+
+	Assert(target != NIL);
+
+	foreach(lc, source)
+	{
+		PathKey *pk = lfirst_node(PathKey, lc);
+
+		if (!pathkey_is_redundant(pk, target))
+			target = lappend(target, pk);
+	}
+	return target;
+}
+
 /*
  * pathkey_is_redundant
  *	   Is a pathkey redundant with one already in the given list?
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index c1634d1666..706c5bda94 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -245,7 +245,7 @@ can_minmax_aggs(PlannerInfo *root, List **context)
 	foreach(lc, root->agginfos)
 	{
 		AggInfo    *agginfo = (AggInfo *) lfirst(lc);
-		Aggref	   *aggref = agginfo->representative_aggref;
+		Aggref	   *aggref = linitial_node(Aggref, agginfo->aggrefs);
 		Oid			aggsortop;
 		TargetEntry *curTarget;
 		MinMaxAggInfo *mminfo;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 1868c4eff4..eee4478d64 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -24,6 +24,7 @@
 #include "access/sysattr.h"
 #include "access/table.h"
 #include "access/xact.h"
+#include "catalog/pg_aggregate.h"
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_inherits.h"
 #include "catalog/pg_proc.h"
@@ -3055,6 +3056,192 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
 	return result;
 }
 
+/*
+ * make_pathkeys_for_groupagg
+ *		Determine the pathkeys for the GROUP BY clause and/or any ordered
+ *		aggregates.  We expect at least one of these here.
+ *
+ * Building the pathkeys for the GROUP BY is simple.  Most of the complexity
+ * involved here comes from calculating the best pathkeys for ordered
+ * aggregates.  We define "best" as the pathkeys that suit the most number of
+ * aggregate functions.  We find these by looking at the first ORDER BY
+ * aggregate and taking the pathkeys for that before searching for other
+ * aggregates that require the same or a more strict variation of the same
+ * pathkeys.  We then repeat that process for any remaining aggregates with
+ * different pathkeys and if we find another set of pathkeys that suits a
+ * larger number of aggregates then we return those pathkeys instead.
+ *
+ * When the best pathkeys are found we also mark each Aggref that can use
+ * those pathkeys as aggpresorted = true.
+ */
+static List *
+make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist)
+{
+	List	   *grouppathkeys = NIL;
+	List	   *bestpathkeys;
+	Bitmapset  *bestaggs;
+	Bitmapset  *unprocessed_aggs;
+	ListCell   *lc;
+	int			i;
+
+	Assert(groupClause != NIL || root->numOrderedAggs > 0);
+
+	if (groupClause != NIL)
+	{
+		/* no pathkeys possible if there's an unsortable GROUP BY */
+		if (!grouping_is_sortable(groupClause))
+			return NIL;
+
+		grouppathkeys = make_pathkeys_for_sortclauses(root, groupClause,
+													  tlist);
+	}
+
+	/*
+	 * We can't add pathkeys for ordered aggregates if there are any grouping
+	 * sets.  All handling specific to ordered aggregates must be done by the
+	 * executor in that case.
+	 */
+	if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL)
+		return grouppathkeys;
+
+	/*
+	 * Make a first pass over all AggInfos to collect a Bitmapset containing
+	 * the indexes of all AggInfos to be processed below.
+	 */
+	unprocessed_aggs = NULL;
+	foreach(lc, root->agginfos)
+	{
+		AggInfo    *agginfo = (AggInfo *) lfirst(lc);
+		Aggref	   *aggref = linitial_node(Aggref, agginfo->aggrefs);
+
+		if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
+			continue;
+
+		/* DISTINCT aggregates not yet supported by the planner */
+		if (aggref->aggdistinct != NIL)
+			continue;
+
+		/* skip any unordered aggs */
+		if (aggref->aggorder == NIL)
+			continue;
+
+		unprocessed_aggs = bms_add_member(unprocessed_aggs,
+										  foreach_current_index(lc));
+	}
+
+	/*
+	 * Now process all the unprocessed_aggs to find the best set of pathkeys
+	 * for the given set of aggregates.
+	 *
+	 * On the first outer loop here 'bestaggs' will be empty.   We'll populate
+	 * this during the first loop using the pathkeys for the very first
+	 * AggInfo then taking any stronger pathkeys from any other AggInfos with
+	 * a more strict set of compatible pathkeys.  Once the outer loop is
+	 * complete, we mark off all the aggregates with compatible pathkeys then
+	 * remove those from the unprocessed_aggs and repeat the process to try to
+	 * find another set of pathkeys that are suitable for a larger number of
+	 * aggregates.  The outer loop will stop when there are not enough
+	 * unprocessed aggregates for it to be possible to find a set of pathkeys
+	 * to suit a larger number of aggregates.
+	 */
+	bestpathkeys = NIL;
+	bestaggs = NULL;
+	while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs))
+	{
+		Bitmapset  *aggindexes = NULL;
+		List	   *currpathkeys = NIL;
+
+		i = -1;
+		while ((i = bms_next_member(unprocessed_aggs, i)) >= 0)
+		{
+			AggInfo    *agginfo = list_nth(root->agginfos, i);
+			Aggref	   *aggref = linitial_node(Aggref, agginfo->aggrefs);
+
+			/*
+			 * When not set yet, take the pathkeys from the first unprocessed
+			 * aggregate.
+			 */
+			if (currpathkeys == NIL)
+			{
+				currpathkeys = make_pathkeys_for_sortclauses(root,
+															 aggref->aggorder,
+															 aggref->args);
+
+				/* include the GROUP BY pathkeys, if they exist */
+				if (grouppathkeys != NIL)
+					currpathkeys = append_pathkeys(list_copy(grouppathkeys),
+												   currpathkeys);
+
+				/* record that we found pathkeys for this aggregate */
+				aggindexes = bms_add_member(aggindexes, i);
+			}
+			else
+			{
+				List	   *pathkeys;
+
+				/* now look for a stronger set of matching pathkeys */
+				pathkeys = make_pathkeys_for_sortclauses(root, aggref->aggorder,
+														 aggref->args);
+
+				/* include the GROUP BY pathkeys, if they exist */
+				if (grouppathkeys != NIL)
+					pathkeys = append_pathkeys(list_copy(grouppathkeys),
+											   pathkeys);
+
+				if (pathkeys_contained_in(currpathkeys, pathkeys))
+				{
+					/* pathkeys are stronger, use these ones instead */
+					currpathkeys = pathkeys;
+
+					/* record that we found pathkeys for this aggregate */
+					aggindexes = bms_add_member(aggindexes, i);
+				}
+			}
+		}
+
+		/* remove the aggregates that we've just processed */
+		unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes);
+
+		/*
+		 * If this pass included more aggregates than the previous best then
+		 * use these ones as the best set.
+		 */
+		if (bms_num_members(aggindexes) > bms_num_members(bestaggs))
+		{
+			bestaggs = aggindexes;
+			bestpathkeys = currpathkeys;
+		}
+	}
+
+	/*
+	 * Now that we've found the best set of aggregates we can set the
+	 * presorted flag to indicate to the executor that it needn't bother
+	 * performing a sort for these Aggrefs.
+	 */
+	i = -1;
+	while ((i = bms_next_member(bestaggs, i)) >= 0)
+	{
+		AggInfo    *agginfo = list_nth(root->agginfos, i);
+
+		foreach(lc, agginfo->aggrefs)
+		{
+			Aggref	   *aggref = lfirst_node(Aggref, lc);
+
+			aggref->aggpresorted = true;
+		}
+	}
+
+	/*
+	 * bestpathkeys includes the GROUP BY pathkeys, so if we found any ordered
+	 * aggregates, then return bestpathkeys, otherwise return the
+	 * grouppathkeys.
+	 */
+	if (bestpathkeys != NIL)
+		return bestpathkeys;
+
+	return grouppathkeys;
+}
+
 /*
  * Compute query_pathkeys and other pathkeys during plan generation
  */
@@ -3067,16 +3254,13 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 	List	   *activeWindows = qp_extra->activeWindows;
 
 	/*
-	 * Calculate pathkeys that represent grouping/ordering requirements.  The
-	 * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
-	 * be, in which case we just leave their pathkeys empty.
+	 * Calculate pathkeys that represent grouping/ordering and/or ordered
+	 * aggregate requirements.
 	 */
-	if (qp_extra->groupClause &&
-		grouping_is_sortable(qp_extra->groupClause))
-		root->group_pathkeys =
-			make_pathkeys_for_sortclauses(root,
-										  qp_extra->groupClause,
-										  tlist);
+	if (qp_extra->groupClause != NIL || root->numOrderedAggs > 0)
+		root->group_pathkeys = make_pathkeys_for_groupagg(root,
+														  qp_extra->groupClause,
+														  tlist);
 	else
 		root->group_pathkeys = NIL;
 
diff --git a/src/backend/optimizer/prep/prepagg.c b/src/backend/optimizer/prep/prepagg.c
index 89046f9afb..03153aff89 100644
--- a/src/backend/optimizer/prep/prepagg.c
+++ b/src/backend/optimizer/prep/prepagg.c
@@ -225,6 +225,7 @@ preprocess_aggref(Aggref *aggref, PlannerInfo *root)
 	{
 		AggInfo    *agginfo = list_nth(root->agginfos, aggno);
 
+		agginfo->aggrefs = lappend(agginfo->aggrefs, aggref);
 		transno = agginfo->transno;
 	}
 	else
@@ -232,7 +233,7 @@ preprocess_aggref(Aggref *aggref, PlannerInfo *root)
 		AggInfo    *agginfo = palloc(sizeof(AggInfo));
 
 		agginfo->finalfn_oid = aggfinalfn;
-		agginfo->representative_aggref = aggref;
+		agginfo->aggrefs = list_make1(aggref);
 		agginfo->shareable = shareable;
 
 		aggno = list_length(root->agginfos);
@@ -386,7 +387,7 @@ find_compatible_agg(PlannerInfo *root, Aggref *newagg,
 
 		aggno++;
 
-		existingRef = agginfo->representative_aggref;
+		existingRef = linitial_node(Aggref, agginfo->aggrefs);
 
 		/* all of the following must be the same or it's no match */
 		if (newagg->inputcollid != existingRef->inputcollid ||
@@ -650,7 +651,7 @@ get_agg_clause_costs(PlannerInfo *root, AggSplit aggsplit, AggClauseCosts *costs
 	foreach(lc, root->agginfos)
 	{
 		AggInfo    *agginfo = (AggInfo *) lfirst(lc);
-		Aggref	   *aggref = agginfo->representative_aggref;
+		Aggref	   *aggref = linitial_node(Aggref, agginfo->aggrefs);
 
 		/*
 		 * Add the appropriate component function execution costs to
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h
index 398446d11f..1788725db6 100644
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -48,6 +48,12 @@ typedef struct AggStatePerTransData
 	 */
 	bool		aggshared;
 
+	/*
+	 * True for DISTINCT aggregates and ORDER BY aggregates that are not
+	 * Aggref->aggpresorted
+	 */
+	bool		aggsortrequired;
+
 	/*
 	 * Number of aggregated input columns.  This includes ORDER BY expressions
 	 * in both the plain-agg and ordered-set cases.  Ordered-set direct args
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index a692bcfb53..7e77393f20 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2651,12 +2651,12 @@ typedef struct JoinCostWorkspace
 typedef struct AggInfo
 {
 	/*
-	 * Link to an Aggref expr this state value is for.
+	 * List of Aggref exprs that this state value is for.
 	 *
-	 * There can be multiple identical Aggref's sharing the same per-agg. This
-	 * points to the first one of them.
+	 * There will always be at least one, but there can be multiple identical
+	 * Aggref's sharing the same per-agg.
 	 */
-	Aggref	   *representative_aggref;
+	List	   *aggrefs;
 
 	int			transno;
 
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 996c3e4016..67a1b6b53c 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -304,6 +304,9 @@ typedef struct Param
  * replaced with a single argument representing the partial-aggregate
  * transition values.
  *
+ * aggpresorted is set by the query planner for ORDER BY aggregates where the
+ * plan chosen provides presorted input for this aggregate during execution
+ *
  * aggsplit indicates the expected partial-aggregation mode for the Aggref's
  * parent plan node.  It's always set to AGGSPLIT_SIMPLE in the parser, but
  * the planner might change it to something else.  We use this mainly as
@@ -335,6 +338,7 @@ typedef struct Aggref
 	bool		aggvariadic;	/* true if variadic arguments have been
 								 * combined into an array last argument */
 	char		aggkind;		/* aggregate kind (see pg_aggregate.h) */
+	bool		aggpresorted;	/* aggregate input already sorted */
 	Index		agglevelsup;	/* > 0 if agg belongs to outer query */
 	AggSplit	aggsplit;		/* expected agg-splitting mode of parent Agg */
 	int			aggno;			/* unique ID within the Agg node */
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index f1d111063c..285d3ac87e 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -249,6 +249,7 @@ extern List *truncate_useless_pathkeys(PlannerInfo *root,
 									   RelOptInfo *rel,
 									   List *pathkeys);
 extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);
+extern List *append_pathkeys(List *target, List *source);
 extern PathKey *make_canonical_pathkey(PlannerInfo *root,
 									   EquivalenceClass *eclass, Oid opfamily,
 									   int strategy, bool nulls_first);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 23b112b2af..9c32758803 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1383,6 +1383,84 @@ ERROR:  column "t1.f1" must appear in the GROUP BY clause or be used in an aggre
 LINE 1: select t1.f1 from t1 left join t2 using (f1) group by f1;
                ^
 drop table t1, t2;
+--
+-- Test planner's selection of pathkeys for ORDER BY aggregates
+--
+-- Ensure we order by four.  This suits the most aggregate functions.
+explain (costs off)
+select sum(two order by two),max(four order by four), min(four order by four)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: four
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Ensure we order by two.  It's a tie between ordering by two and four but
+-- we tiebreak on the aggregate's position.
+explain (costs off)
+select
+  sum(two order by two), max(four order by four),
+  min(four order by four), max(two order by two)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: two
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Similar to above, but tiebreak on ordering by four
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: four
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Ensure this one orders by ten since there are 3 aggregates that require ten
+-- vs two that suit two and four.
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two),
+  sum(ten order by ten), min(ten order by ten), max(ten order by ten)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: ten
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Try a case involving a GROUP BY clause where the GROUP BY column is also
+-- part of an aggregate's ORDER BY clause.  We want a sort order that works
+-- for the GROUP BY and first and last aggregate.
+explain (costs off)
+select
+  sum(unique1 order by ten, two), sum(unique1 order by four),
+  sum(unique1 order by two, four)
+from tenk1
+group by ten;
+            QUERY PLAN            
+----------------------------------
+ GroupAggregate
+   Group Key: ten
+   ->  Sort
+         Sort Key: ten, two, four
+         ->  Seq Scan on tenk1
+(5 rows)
+
 --
 -- Test combinations of DISTINCT and/or ORDER BY
 --
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index dfa4b036b5..bb1a506c41 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -367,17 +367,17 @@ SELECT c, sum(b order by a) FROM pagg_tab GROUP BY c ORDER BY 1, 2;
          ->  GroupAggregate
                Group Key: pagg_tab.c
                ->  Sort
-                     Sort Key: pagg_tab.c
+                     Sort Key: pagg_tab.c, pagg_tab.a
                      ->  Seq Scan on pagg_tab_p1 pagg_tab
          ->  GroupAggregate
                Group Key: pagg_tab_1.c
                ->  Sort
-                     Sort Key: pagg_tab_1.c
+                     Sort Key: pagg_tab_1.c, pagg_tab_1.a
                      ->  Seq Scan on pagg_tab_p2 pagg_tab_1
          ->  GroupAggregate
                Group Key: pagg_tab_2.c
                ->  Sort
-                     Sort Key: pagg_tab_2.c
+                     Sort Key: pagg_tab_2.c, pagg_tab_2.a
                      ->  Seq Scan on pagg_tab_p3 pagg_tab_2
 (18 rows)
 
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index ed2d6b3bdf..8a6f3ac282 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -488,6 +488,49 @@ select t1.f1 from t1 left join t2 using (f1) group by f1;
 
 drop table t1, t2;
 
+--
+-- Test planner's selection of pathkeys for ORDER BY aggregates
+--
+
+-- Ensure we order by four.  This suits the most aggregate functions.
+explain (costs off)
+select sum(two order by two),max(four order by four), min(four order by four)
+from tenk1;
+
+-- Ensure we order by two.  It's a tie between ordering by two and four but
+-- we tiebreak on the aggregate's position.
+explain (costs off)
+select
+  sum(two order by two), max(four order by four),
+  min(four order by four), max(two order by two)
+from tenk1;
+
+-- Similar to above, but tiebreak on ordering by four
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two)
+from tenk1;
+
+-- Ensure this one orders by ten since there are 3 aggregates that require ten
+-- vs two that suit two and four.
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two),
+  sum(ten order by ten), min(ten order by ten), max(ten order by ten)
+from tenk1;
+
+-- Try a case involving a GROUP BY clause where the GROUP BY column is also
+-- part of an aggregate's ORDER BY clause.  We want a sort order that works
+-- for the GROUP BY and first and last aggregate.
+explain (costs off)
+select
+  sum(unique1 order by ten, two), sum(unique1 order by four),
+  sum(unique1 order by two, four)
+from tenk1
+group by ten;
+
 --
 -- Test combinations of DISTINCT and/or ORDER BY
 --
-- 
2.30.2