From 215600d12164f214aae8f0de94b16373bd202d69 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Thu, 25 Apr 2024 15:23:57 +0200 Subject: [PATCH v1] Plan min()/max()-like aggregates with index accesses in more cases When the aggregate's operator is included in the ORDER BY's ordering opclass, we know they have a common ordering, and thus should get the same output (or at least same consistency guarantee for that output). So, check if the ORDER BY orders things by only the aggregated expression, and check if that ordering shares a btree opclass with the aggregator's operator. If so, we can use the index. --- src/backend/optimizer/plan/planagg.c | 87 ++++++++++++++++++++---- src/test/regress/expected/aggregates.out | 65 ++++++++++++++++++ src/test/regress/sql/aggregates.sql | 18 +++++ 3 files changed, 156 insertions(+), 14 deletions(-) diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index afb5445b77..d8479fe286 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -253,6 +253,16 @@ can_minmax_aggs(PlannerInfo *root, List **context) if (list_length(aggref->args) != 1) return false; /* it couldn't be MIN/MAX */ + /* + * We might implement the optimization when a FILTER clause is present + * by adding the filter to the quals of the generated subquery. For + * now, just punt. + */ + if (aggref->aggfilter != NULL) + return false; + + curTarget = (TargetEntry *) linitial(aggref->args); + /* * ORDER BY is usually irrelevant for MIN/MAX, but it can change the * outcome if the aggsortop's operator class recognizes non-identical @@ -267,22 +277,71 @@ can_minmax_aggs(PlannerInfo *root, List **context) * quickly. */ if (aggref->aggorder != NIL) - return false; + { + SortGroupClause *orderClause; + + /* + * If the order clause is the same column as the one we're + * aggregating, we can still use the index: It is undefined which + * value is MIN() or MAX(), as well as which value is first or + * last when sorted. So, we can still use the index IFF the + * aggregated expression equals the expression used in the + * ordering operation. + */ + + /* + * We only accept a single argument to min/max aggregates, + * orderings that have more clauses won't provide correct results. + */ + if (list_length(aggref->aggorder) > 1) + return false; + + orderClause = castNode(SortGroupClause, linitial(aggref->aggorder)); + + if (orderClause->tleSortGroupRef != curTarget->ressortgroupref) + return false; + + aggsortop = fetch_agg_sort_op(aggref->aggfnoid); + if (!OidIsValid(aggsortop)) + return false; /* not a MIN/MAX aggregate */ + + if (orderClause->sortop != aggsortop) + { + List *btclasses; + ListCell *cell; + bool match = false; + + btclasses = get_op_btree_interpretation(orderClause->sortop); + + foreach(cell, btclasses) + { + OpBtreeInterpretation *interpretation; + interpretation = (OpBtreeInterpretation *) lfirst(cell); + + if (!match) + { + if (op_in_opfamily(aggsortop, interpretation->opfamily_id)) + match = true; + } + /* Now useless */ + pfree(interpretation); + } + + /* Now not useful anymore */ + pfree(btclasses); + + if (!match) + return false; + } + } + else + { + aggsortop = fetch_agg_sort_op(aggref->aggfnoid); + if (!OidIsValid(aggsortop)) + return false; /* not a MIN/MAX aggregate */ + } /* note: we do not care if DISTINCT is mentioned ... */ - /* - * We might implement the optimization when a FILTER clause is present - * by adding the filter to the quals of the generated subquery. For - * now, just punt. - */ - if (aggref->aggfilter != NULL) - return false; - - aggsortop = fetch_agg_sort_op(aggref->aggfnoid); - if (!OidIsValid(aggsortop)) - return false; /* not a MIN/MAX aggregate */ - - curTarget = (TargetEntry *) linitial(aggref->args); if (contain_mutable_functions((Node *) curTarget->expr)) return false; /* not potentially indexable */ diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 56c361ccef..904a96eb94 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -978,6 +978,71 @@ select max(unique1) from tenk1 where unique1 > 42; 9999 (1 row) +-- When sorting on the column that's being aggregated, indexes can also be +-- used, but only when the aggregate's operator has the same ordering behavior +-- as the ORDER BY-clause, i.e. if it is in the same btree opclass as the one +-- chosen for the ORDER BY clause. +explain (costs off) + select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1; + QUERY PLAN +------------------------------------------------------------ + Result + InitPlan 1 + -> Limit + -> Index Only Scan using tenk1_unique1 on tenk1 + Index Cond: (unique1 IS NOT NULL) +(5 rows) + +select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1; + min +----- + 0 +(1 row) + +explain (costs off) + select max(unique1 ORDER BY unique1 USING <) from tenk1; + QUERY PLAN +--------------------------------------------------------------------- + Result + InitPlan 1 + -> Limit + -> Index Only Scan Backward using tenk1_unique1 on tenk1 + Index Cond: (unique1 IS NOT NULL) +(5 rows) + +select max(unique1 ORDER BY unique1 USING <) from tenk1; + max +------ + 9999 +(1 row) + +explain (costs off) + select max(unique1 ORDER BY tenthous) from tenk1; + QUERY PLAN +------------------------------- + Aggregate + -> Sort + Sort Key: tenthous + -> Seq Scan on tenk1 +(4 rows) + +select max(unique1 ORDER BY tenthous) from tenk1; + max +------ + 9999 +(1 row) + +-- But even then, the index can't be used if we order by multiple columns. +explain (costs off) + select max(unique1 ORDER BY unique1, tenthous) from tenk1; + QUERY PLAN +------------------------------------- + Aggregate + -> Sort + Sort Key: unique1, tenthous + -> Seq Scan on tenk1 +(4 rows) + -- the planner may choose a generic aggregate here if parallel query is -- enabled, since that plan will be parallel safe and the "optimized" -- plan, which has almost identical cost, will not be. we want to test diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index d28338ba3d..3a595a9fd7 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -362,6 +362,24 @@ explain (costs off) select max(unique1) from tenk1 where unique1 > 42; select max(unique1) from tenk1 where unique1 > 42; +-- When sorting on the column that's being aggregated, indexes can also be +-- used, but only when the aggregate's operator has the same ordering behavior +-- as the ORDER BY-clause, i.e. if it is in the same btree opclass as the one +-- chosen for the ORDER BY clause. +explain (costs off) + select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1; +select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1; +explain (costs off) + select max(unique1 ORDER BY unique1 USING <) from tenk1; +select max(unique1 ORDER BY unique1 USING <) from tenk1; +explain (costs off) + select max(unique1 ORDER BY tenthous) from tenk1; +select max(unique1 ORDER BY tenthous) from tenk1; + +-- But even then, the index can't be used if we order by multiple columns. +explain (costs off) + select max(unique1 ORDER BY unique1, tenthous) from tenk1; + -- the planner may choose a generic aggregate here if parallel query is -- enabled, since that plan will be parallel safe and the "optimized" -- plan, which has almost identical cost, will not be. we want to test -- 2.40.1