From 7cc966721a74047108aa53e4f166c3457b20db85 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Wed, 22 Oct 2025 12:25:08 +0900 Subject: [PATCH v1] Fix distinctness check for queries with grouping sets --- src/backend/optimizer/plan/analyzejoins.c | 19 +++--- src/test/regress/expected/join.out | 75 +++++++++++++++++++++++ src/test/regress/sql/join.sql | 28 +++++++++ 3 files changed, 115 insertions(+), 7 deletions(-) diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 6a3c030e8ef..de7cb59e5c2 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -31,6 +31,7 @@ #include "optimizer/placeholder.h" #include "optimizer/planmain.h" #include "optimizer/restrictinfo.h" +#include "parser/parse_agg.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" @@ -1175,6 +1176,8 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) } else if (query->groupingSets) { + List *gsets; + /* * If we have grouping sets with expressions, we probably don't have * uniqueness and analysis would be hard. Punt. @@ -1184,15 +1187,17 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) /* * If we have no groupClause (therefore no grouping expressions), we - * might have one or many empty grouping sets. If there's just one, - * then we're returning only one row and are certainly unique. But - * otherwise, we know we're certainly not unique. + * might have one or many empty grouping sets. If there's just one, or + * if the DISTINCT clause is used on the GROUP BY, then we're + * returning only one row and are certainly unique. But otherwise, we + * know we're certainly not unique. */ - if (list_length(query->groupingSets) == 1 && - ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY) + if (query->groupDistinct) return true; - else - return false; + + gsets = expand_grouping_sets(query->groupingSets, false, -1); + + return (list_length(gsets) == 1); } else { diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index d10095de70f..80a6148810c 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -6134,6 +6134,32 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s Seq Scan on d (1 row) +-- check that join removal works for a left join when joining a subquery +-- that is guaranteed to be unique by GROUPING SETS +explain (costs off) +select d.* from d left join (select 1 as x from b group by ()) s + on d.a = s.x; + QUERY PLAN +--------------- + Seq Scan on d +(1 row) + +explain (costs off) +select d.* from d left join (select 1 as x from b group by grouping sets(())) s + on d.a = s.x; + QUERY PLAN +--------------- + Seq Scan on d +(1 row) + +explain (costs off) +select d.* from d left join (select 1 as x from b group by grouping sets(()), grouping sets(())) s + on d.a = s.x; + QUERY PLAN +--------------- + Seq Scan on d +(1 row) + -- similarly, but keying off a DISTINCT clause explain (costs off) select d.* from d left join (select distinct * from b) s @@ -6162,6 +6188,55 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s -> Seq Scan on d (8 rows) +-- join removal is not possible when the GROUP BY contains non-empty grouping +-- sets or multiple empty grouping sets +explain (costs off) +select d.* from d left join (select 1 as x from b group by rollup(x)) s + on d.a = s.x; + QUERY PLAN +--------------------------------- + Hash Left Join + Hash Cond: (d.a = (1)) + -> Seq Scan on d + -> Hash + -> MixedAggregate + Hash Key: 1 + Group Key: () + -> Seq Scan on b +(8 rows) + +explain (costs off) +select d.* from d left join (select 1 as x from b group by grouping sets((), ())) s + on d.a = s.x; + QUERY PLAN +----------------------------------------- + Hash Left Join + Hash Cond: (d.a = (1)) + -> Seq Scan on d + -> Hash + -> Append + -> Result + Replaces: Aggregate + -> Result + Replaces: Aggregate +(9 rows) + +explain (costs off) +select d.* from d left join (select 1 as x from b group by grouping sets((), grouping sets(()))) s + on d.a = s.x; + QUERY PLAN +----------------------------------------- + Hash Left Join + Hash Cond: (d.a = (1)) + -> Seq Scan on d + -> Hash + -> Append + -> Result + Replaces: Aggregate + -> Result + Replaces: Aggregate +(9 rows) + -- similarly, but keying off a DISTINCT clause explain (costs off) select d.* from d left join (select distinct * from b) s diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index b1732453e8d..3de8dff2729 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -2212,6 +2212,20 @@ explain (costs off) select d.* from d left join (select * from b group by b.id, b.c_id) s on d.a = s.id and d.b = s.c_id; +-- check that join removal works for a left join when joining a subquery +-- that is guaranteed to be unique by GROUPING SETS +explain (costs off) +select d.* from d left join (select 1 as x from b group by ()) s + on d.a = s.x; + +explain (costs off) +select d.* from d left join (select 1 as x from b group by grouping sets(())) s + on d.a = s.x; + +explain (costs off) +select d.* from d left join (select 1 as x from b group by grouping sets(()), grouping sets(())) s + on d.a = s.x; + -- similarly, but keying off a DISTINCT clause explain (costs off) select d.* from d left join (select distinct * from b) s @@ -2225,6 +2239,20 @@ explain (costs off) select d.* from d left join (select * from b group by b.id, b.c_id) s on d.a = s.id; +-- join removal is not possible when the GROUP BY contains non-empty grouping +-- sets or multiple empty grouping sets +explain (costs off) +select d.* from d left join (select 1 as x from b group by rollup(x)) s + on d.a = s.x; + +explain (costs off) +select d.* from d left join (select 1 as x from b group by grouping sets((), ())) s + on d.a = s.x; + +explain (costs off) +select d.* from d left join (select 1 as x from b group by grouping sets((), grouping sets(()))) s + on d.a = s.x; + -- similarly, but keying off a DISTINCT clause explain (costs off) select d.* from d left join (select distinct * from b) s -- 2.39.5 (Apple Git-154)