From 09fed8131d6b2def5e5d76c7b73e86a9ae997c7a Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Fri, 23 Feb 2024 13:41:22 +0800 Subject: [PATCH v1 8/9] Add test cases --- src/test/regress/expected/eager_aggregate.out | 1270 +++++++++++++++++ src/test/regress/parallel_schedule | 2 +- src/test/regress/sql/eager_aggregate.sql | 205 +++ 3 files changed, 1476 insertions(+), 1 deletion(-) create mode 100644 src/test/regress/expected/eager_aggregate.out create mode 100644 src/test/regress/sql/eager_aggregate.sql diff --git a/src/test/regress/expected/eager_aggregate.out b/src/test/regress/expected/eager_aggregate.out new file mode 100644 index 0000000000..2d7dec8a5d --- /dev/null +++ b/src/test/regress/expected/eager_aggregate.out @@ -0,0 +1,1270 @@ +-- +-- EAGER AGGREGATION +-- Test we can push aggregation down below join +-- +-- Enable eager aggregation, which by default is disabled. +SET enable_eager_aggregate TO on; +CREATE TABLE eager_agg_t1 (a int, b int, c double precision); +CREATE TABLE eager_agg_t2 (a int, b int, c double precision); +CREATE TABLE eager_agg_t3 (a int, b int, c double precision); +INSERT INTO eager_agg_t1 SELECT i, i, i FROM generate_series(1, 1000)i; +INSERT INTO eager_agg_t2 SELECT i, i%10, i FROM generate_series(1, 1000)i; +INSERT INTO eager_agg_t3 SELECT i%10, i%10, i FROM generate_series(1, 1000)i; +ANALYZE eager_agg_t1; +ANALYZE eager_agg_t2; +ANALYZE eager_agg_t3; +-- +-- Test eager aggregation over base rel +-- +-- Perform scan of a table, aggregate the result, join it to the other table +-- and finalize the aggregation. +-- Produce results with hash aggregation +SET enable_hashagg TO on; +SET enable_sort TO off; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a; + QUERY PLAN +------------------------------------------------------------ + Finalize HashAggregate + Output: t1.a, avg(t2.c) + Group Key: t1.a + -> Hash Join + Output: t1.a, (PARTIAL avg(t2.c)) + Hash Cond: (t1.b = t2.b) + -> Seq Scan on public.eager_agg_t1 t1 + Output: t1.a, t1.b, t1.c + -> Hash + Output: t2.b, (PARTIAL avg(t2.c)) + -> Partial HashAggregate + Output: t2.b, PARTIAL avg(t2.c) + Group Key: t2.b + -> Seq Scan on public.eager_agg_t2 t2 + Output: t2.a, t2.b, t2.c +(15 rows) + +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a; + a | avg +---+----- + 1 | 496 + 2 | 497 + 6 | 501 + 7 | 502 + 3 | 498 + 4 | 499 + 9 | 504 + 5 | 500 + 8 | 503 +(9 rows) + +-- Produce results with sorting aggregation +SET enable_hashagg TO off; +SET enable_sort TO on; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a; + QUERY PLAN +------------------------------------------------------------------------ + Finalize GroupAggregate + Output: t1.a, avg(t2.c) + Group Key: t1.a + -> Sort + Output: t1.a, (PARTIAL avg(t2.c)) + Sort Key: t1.a + -> Hash Join + Output: t1.a, (PARTIAL avg(t2.c)) + Hash Cond: (t1.b = t2.b) + -> Seq Scan on public.eager_agg_t1 t1 + Output: t1.a, t1.b, t1.c + -> Hash + Output: t2.b, (PARTIAL avg(t2.c)) + -> Partial GroupAggregate + Output: t2.b, PARTIAL avg(t2.c) + Group Key: t2.b + -> Sort + Output: t2.c, t2.b + Sort Key: t2.b + -> Seq Scan on public.eager_agg_t2 t2 + Output: t2.c, t2.b +(21 rows) + +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a; + a | avg +---+----- + 1 | 496 + 2 | 497 + 3 | 498 + 4 | 499 + 5 | 500 + 6 | 501 + 7 | 502 + 8 | 503 + 9 | 504 +(9 rows) + +SET enable_hashagg TO default; +SET enable_sort TO default; +-- +-- Test eager aggregation over join rel +-- +-- Perform join of tables, aggregate the result, join it to the other table +-- and finalize the aggregation. +-- Produce results with hash aggregation +SET enable_hashagg TO on; +SET enable_sort TO off; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a; + QUERY PLAN +------------------------------------------------------------------------ + Finalize HashAggregate + Output: t1.a, avg((t2.c + t3.c)) + Group Key: t1.a + -> Hash Join + Output: t1.a, (PARTIAL avg((t2.c + t3.c))) + Hash Cond: (t1.b = t2.b) + -> Seq Scan on public.eager_agg_t1 t1 + Output: t1.a, t1.b, t1.c + -> Hash + Output: t2.b, (PARTIAL avg((t2.c + t3.c))) + -> Partial HashAggregate + Output: t2.b, PARTIAL avg((t2.c + t3.c)) + Group Key: t2.b + -> Hash Join + Output: t2.c, t3.c, t2.b + Hash Cond: (t3.a = t2.a) + -> Seq Scan on public.eager_agg_t3 t3 + Output: t3.a, t3.b, t3.c + -> Hash + Output: t2.c, t2.b, t2.a + -> Seq Scan on public.eager_agg_t2 t2 + Output: t2.c, t2.b, t2.a +(22 rows) + +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a; + a | avg +---+----- + 1 | 497 + 2 | 499 + 6 | 507 + 7 | 509 + 3 | 501 + 4 | 503 + 9 | 513 + 5 | 505 + 8 | 511 +(9 rows) + +-- Produce results with sorting aggregation +SET enable_hashagg TO off; +SET enable_sort TO on; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a; + QUERY PLAN +------------------------------------------------------------------------------------ + Finalize GroupAggregate + Output: t1.a, avg((t2.c + t3.c)) + Group Key: t1.a + -> Sort + Output: t1.a, (PARTIAL avg((t2.c + t3.c))) + Sort Key: t1.a + -> Hash Join + Output: t1.a, (PARTIAL avg((t2.c + t3.c))) + Hash Cond: (t1.b = t2.b) + -> Seq Scan on public.eager_agg_t1 t1 + Output: t1.a, t1.b, t1.c + -> Hash + Output: t2.b, (PARTIAL avg((t2.c + t3.c))) + -> Partial GroupAggregate + Output: t2.b, PARTIAL avg((t2.c + t3.c)) + Group Key: t2.b + -> Sort + Output: t2.c, t3.c, t2.b + Sort Key: t2.b + -> Hash Join + Output: t2.c, t3.c, t2.b + Hash Cond: (t3.a = t2.a) + -> Seq Scan on public.eager_agg_t3 t3 + Output: t3.a, t3.b, t3.c + -> Hash + Output: t2.c, t2.b, t2.a + -> Seq Scan on public.eager_agg_t2 t2 + Output: t2.c, t2.b, t2.a +(28 rows) + +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a; + a | avg +---+----- + 1 | 497 + 2 | 499 + 3 | 501 + 4 | 503 + 5 | 505 + 6 | 507 + 7 | 509 + 8 | 511 + 9 | 513 +(9 rows) + +SET enable_hashagg TO default; +SET enable_sort TO default; +-- +-- Test that eager aggregation works for outer join +-- +-- Ensure aggregation can be pushed down to the non-nullable side +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t3.a, avg(t3.c) FROM eager_agg_t1 t1 RIGHT JOIN eager_agg_t3 t3 ON t1.b = t3.b GROUP BY t3.a; + QUERY PLAN +------------------------------------------------------------ + Finalize GroupAggregate + Output: t3.a, avg(t3.c) + Group Key: t3.a + -> Sort + Output: t3.a, (PARTIAL avg(t3.c)) + Sort Key: t3.a + -> Hash Left Join + Output: t3.a, (PARTIAL avg(t3.c)) + Hash Cond: (t3.b = t1.b) + -> Partial HashAggregate + Output: t3.a, t3.b, PARTIAL avg(t3.c) + Group Key: t3.a, t3.b + -> Seq Scan on public.eager_agg_t3 t3 + Output: t3.a, t3.b, t3.c + -> Hash + Output: t1.b + -> Seq Scan on public.eager_agg_t1 t1 + Output: t1.b +(18 rows) + +SELECT t3.a, avg(t3.c) FROM eager_agg_t1 t1 RIGHT JOIN eager_agg_t3 t3 ON t1.b = t3.b GROUP BY t3.a; + a | avg +---+----- + 0 | 505 + 1 | 496 + 2 | 497 + 3 | 498 + 4 | 499 + 5 | 500 + 6 | 501 + 7 | 502 + 8 | 503 + 9 | 504 +(10 rows) + +-- Ensure aggregation cannot be pushed down to the nullable side +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t3.a, avg(t3.c) FROM eager_agg_t1 t1 LEFT JOIN eager_agg_t3 t3 ON t1.b = t3.b GROUP BY t3.a; + QUERY PLAN +------------------------------------------------------ + HashAggregate + Output: t3.a, avg(t3.c) + Group Key: t3.a + -> Hash Right Join + Output: t3.a, t3.c + Hash Cond: (t3.b = t1.b) + -> Seq Scan on public.eager_agg_t3 t3 + Output: t3.a, t3.b, t3.c + -> Hash + Output: t1.b + -> Seq Scan on public.eager_agg_t1 t1 + Output: t1.b +(12 rows) + +SELECT t3.a, avg(t3.c) FROM eager_agg_t1 t1 LEFT JOIN eager_agg_t3 t3 ON t1.b = t3.b GROUP BY t3.a; + a | avg +---+----- + 8 | 503 + | + 9 | 504 + 7 | 502 + 1 | 496 + 5 | 500 + 4 | 499 + 2 | 497 + 6 | 501 + 3 | 498 +(10 rows) + +-- +-- Test that eager aggregation works for parallel plans +-- +SET parallel_setup_cost=0; +SET parallel_tuple_cost=0; +SET min_parallel_table_scan_size=0; +SET max_parallel_workers_per_gather=4; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a; + QUERY PLAN +--------------------------------------------------------------------------------- + Finalize GroupAggregate + Output: t1.a, avg(t2.c) + Group Key: t1.a + -> Sort + Output: t1.a, (PARTIAL avg(t2.c)) + Sort Key: t1.a + -> Gather + Output: t1.a, (PARTIAL avg(t2.c)) + Workers Planned: 2 + -> Parallel Hash Join + Output: t1.a, (PARTIAL avg(t2.c)) + Hash Cond: (t1.b = t2.b) + -> Parallel Seq Scan on public.eager_agg_t1 t1 + Output: t1.a, t1.b, t1.c + -> Parallel Hash + Output: t2.b, (PARTIAL avg(t2.c)) + -> Partial HashAggregate + Output: t2.b, PARTIAL avg(t2.c) + Group Key: t2.b + -> Parallel Seq Scan on public.eager_agg_t2 t2 + Output: t2.a, t2.b, t2.c +(21 rows) + +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a; + a | avg +---+----- + 1 | 496 + 2 | 497 + 3 | 498 + 4 | 499 + 5 | 500 + 6 | 501 + 7 | 502 + 8 | 503 + 9 | 504 +(9 rows) + +RESET parallel_setup_cost; +RESET parallel_tuple_cost; +RESET min_parallel_table_scan_size; +RESET max_parallel_workers_per_gather; +DROP TABLE eager_agg_t1; +DROP TABLE eager_agg_t2; +DROP TABLE eager_agg_t3; +-- +-- Test eager aggregation for partitionwise join +-- +-- Enable partitionwise aggregate, which by default is disabled. +SET enable_partitionwise_aggregate TO true; +-- Enable partitionwise join, which by default is disabled. +SET enable_partitionwise_join TO true; +CREATE TABLE eager_agg_tab1(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab1_p1 PARTITION OF eager_agg_tab1 FOR VALUES FROM (0) TO (10); +CREATE TABLE eager_agg_tab1_p2 PARTITION OF eager_agg_tab1 FOR VALUES FROM (10) TO (20); +CREATE TABLE eager_agg_tab1_p3 PARTITION OF eager_agg_tab1 FOR VALUES FROM (20) TO (30); +CREATE TABLE eager_agg_tab2(x int, y int) PARTITION BY RANGE(y); +CREATE TABLE eager_agg_tab2_p1 PARTITION OF eager_agg_tab2 FOR VALUES FROM (0) TO (10); +CREATE TABLE eager_agg_tab2_p2 PARTITION OF eager_agg_tab2 FOR VALUES FROM (10) TO (20); +CREATE TABLE eager_agg_tab2_p3 PARTITION OF eager_agg_tab2 FOR VALUES FROM (20) TO (30); +INSERT INTO eager_agg_tab1 SELECT i % 30, i % 20 FROM generate_series(0, 299, 2) i; +INSERT INTO eager_agg_tab2 SELECT i % 20, i % 30 FROM generate_series(0, 299, 3) i; +ANALYZE eager_agg_tab1; +ANALYZE eager_agg_tab2; +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x; + QUERY PLAN +--------------------------------------------------------------------------------- + Append + -> Finalize HashAggregate + Output: t1.x, sum(t1.y), count(*) + Group Key: t1.x + -> Hash Join + Output: t1.x, (PARTIAL sum(t1.y)), (PARTIAL count(*)) + Hash Cond: (t2.y = t1.x) + -> Seq Scan on public.eager_agg_tab2_p1 t2 + Output: t2.y + -> Hash + Output: t1.x, (PARTIAL sum(t1.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t1.x, PARTIAL sum(t1.y), PARTIAL count(*) + Group Key: t1.x + -> Seq Scan on public.eager_agg_tab1_p1 t1 + Output: t1.x, t1.y + -> Finalize HashAggregate + Output: t1_1.x, sum(t1_1.y), count(*) + Group Key: t1_1.x + -> Hash Join + Output: t1_1.x, (PARTIAL sum(t1_1.y)), (PARTIAL count(*)) + Hash Cond: (t2_1.y = t1_1.x) + -> Seq Scan on public.eager_agg_tab2_p2 t2_1 + Output: t2_1.y + -> Hash + Output: t1_1.x, (PARTIAL sum(t1_1.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t1_1.x, PARTIAL sum(t1_1.y), PARTIAL count(*) + Group Key: t1_1.x + -> Seq Scan on public.eager_agg_tab1_p2 t1_1 + Output: t1_1.x, t1_1.y + -> Finalize HashAggregate + Output: t1_2.x, sum(t1_2.y), count(*) + Group Key: t1_2.x + -> Hash Join + Output: t1_2.x, (PARTIAL sum(t1_2.y)), (PARTIAL count(*)) + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on public.eager_agg_tab2_p3 t2_2 + Output: t2_2.y + -> Hash + Output: t1_2.x, (PARTIAL sum(t1_2.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t1_2.x, PARTIAL sum(t1_2.y), PARTIAL count(*) + Group Key: t1_2.x + -> Seq Scan on public.eager_agg_tab1_p3 t1_2 + Output: t1_2.x, t1_2.y +(46 rows) + +SELECT t1.x, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x; + x | sum | count +----+------+------- + 6 | 1100 | 100 + 0 | 500 | 100 + 12 | 700 | 100 + 18 | 1300 | 100 + 24 | 900 | 100 +(5 rows) + +-- GROUP BY having other matching key +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.y, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y; + QUERY PLAN +--------------------------------------------------------------------------------- + Append + -> Finalize HashAggregate + Output: t2.y, sum(t1.y), count(*) + Group Key: t2.y + -> Hash Join + Output: t2.y, (PARTIAL sum(t1.y)), (PARTIAL count(*)) + Hash Cond: (t2.y = t1.x) + -> Seq Scan on public.eager_agg_tab2_p1 t2 + Output: t2.y + -> Hash + Output: t1.x, (PARTIAL sum(t1.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t1.x, PARTIAL sum(t1.y), PARTIAL count(*) + Group Key: t1.x + -> Seq Scan on public.eager_agg_tab1_p1 t1 + Output: t1.y, t1.x + -> Finalize HashAggregate + Output: t2_1.y, sum(t1_1.y), count(*) + Group Key: t2_1.y + -> Hash Join + Output: t2_1.y, (PARTIAL sum(t1_1.y)), (PARTIAL count(*)) + Hash Cond: (t2_1.y = t1_1.x) + -> Seq Scan on public.eager_agg_tab2_p2 t2_1 + Output: t2_1.y + -> Hash + Output: t1_1.x, (PARTIAL sum(t1_1.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t1_1.x, PARTIAL sum(t1_1.y), PARTIAL count(*) + Group Key: t1_1.x + -> Seq Scan on public.eager_agg_tab1_p2 t1_1 + Output: t1_1.y, t1_1.x + -> Finalize HashAggregate + Output: t2_2.y, sum(t1_2.y), count(*) + Group Key: t2_2.y + -> Hash Join + Output: t2_2.y, (PARTIAL sum(t1_2.y)), (PARTIAL count(*)) + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on public.eager_agg_tab2_p3 t2_2 + Output: t2_2.y + -> Hash + Output: t1_2.x, (PARTIAL sum(t1_2.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t1_2.x, PARTIAL sum(t1_2.y), PARTIAL count(*) + Group Key: t1_2.x + -> Seq Scan on public.eager_agg_tab1_p3 t1_2 + Output: t1_2.y, t1_2.x +(46 rows) + +SELECT t2.y, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y; + y | sum | count +----+------+------- + 6 | 1100 | 100 + 0 | 500 | 100 + 18 | 1300 | 100 + 12 | 700 | 100 + 24 | 900 | 100 +(5 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.x, sum(t1.x), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.x HAVING avg(t1.x) > 10; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Finalize HashAggregate + Output: t2.x, sum(t1.x), count(*) + Group Key: t2.x + Filter: (avg(t1.x) > '10'::numeric) + -> Append + -> Hash Join + Output: t2_1.x, (PARTIAL sum(t1_1.x)), (PARTIAL count(*)), (PARTIAL avg(t1_1.x)) + Hash Cond: (t2_1.y = t1_1.x) + -> Seq Scan on public.eager_agg_tab2_p1 t2_1 + Output: t2_1.x, t2_1.y + -> Hash + Output: t1_1.x, (PARTIAL sum(t1_1.x)), (PARTIAL count(*)), (PARTIAL avg(t1_1.x)) + -> Partial HashAggregate + Output: t1_1.x, PARTIAL sum(t1_1.x), PARTIAL count(*), PARTIAL avg(t1_1.x) + Group Key: t1_1.x + -> Seq Scan on public.eager_agg_tab1_p1 t1_1 + Output: t1_1.x + -> Hash Join + Output: t2_2.x, (PARTIAL sum(t1_2.x)), (PARTIAL count(*)), (PARTIAL avg(t1_2.x)) + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on public.eager_agg_tab2_p2 t2_2 + Output: t2_2.x, t2_2.y + -> Hash + Output: t1_2.x, (PARTIAL sum(t1_2.x)), (PARTIAL count(*)), (PARTIAL avg(t1_2.x)) + -> Partial HashAggregate + Output: t1_2.x, PARTIAL sum(t1_2.x), PARTIAL count(*), PARTIAL avg(t1_2.x) + Group Key: t1_2.x + -> Seq Scan on public.eager_agg_tab1_p2 t1_2 + Output: t1_2.x + -> Hash Join + Output: t2_3.x, (PARTIAL sum(t1_3.x)), (PARTIAL count(*)), (PARTIAL avg(t1_3.x)) + Hash Cond: (t2_3.y = t1_3.x) + -> Seq Scan on public.eager_agg_tab2_p3 t2_3 + Output: t2_3.x, t2_3.y + -> Hash + Output: t1_3.x, (PARTIAL sum(t1_3.x)), (PARTIAL count(*)), (PARTIAL avg(t1_3.x)) + -> Partial HashAggregate + Output: t1_3.x, PARTIAL sum(t1_3.x), PARTIAL count(*), PARTIAL avg(t1_3.x) + Group Key: t1_3.x + -> Seq Scan on public.eager_agg_tab1_p3 t1_3 + Output: t1_3.x +(41 rows) + +SELECT t2.x, sum(t1.x), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.x HAVING avg(t1.x) > 10; + x | sum | count +----+------+------- + 4 | 1200 | 50 + 14 | 1200 | 50 + 18 | 900 | 50 + 2 | 600 | 50 + 12 | 600 | 50 + 8 | 900 | 50 +(6 rows) + +-- Check with eager aggregation over join rel +-- full aggregation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t1.x; + QUERY PLAN +------------------------------------------------------------------------------------- + Append + -> Finalize HashAggregate + Output: t1.x, sum((t2.y + t3.y)) + Group Key: t1.x + -> Hash Join + Output: t1.x, (PARTIAL sum((t2.y + t3.y))) + Hash Cond: (t1.x = t2.x) + -> Seq Scan on public.eager_agg_tab1_p1 t1 + Output: t1.x + -> Hash + Output: t2.x, t3.x, (PARTIAL sum((t2.y + t3.y))) + -> Partial HashAggregate + Output: t2.x, t3.x, PARTIAL sum((t2.y + t3.y)) + Group Key: t2.x + -> Hash Join + Output: t2.y, t2.x, t3.y, t3.x + Hash Cond: (t2.x = t3.x) + -> Seq Scan on public.eager_agg_tab1_p1 t2 + Output: t2.y, t2.x + -> Hash + Output: t3.y, t3.x + -> Seq Scan on public.eager_agg_tab1_p1 t3 + Output: t3.y, t3.x + -> Finalize HashAggregate + Output: t1_1.x, sum((t2_1.y + t3_1.y)) + Group Key: t1_1.x + -> Hash Join + Output: t1_1.x, (PARTIAL sum((t2_1.y + t3_1.y))) + Hash Cond: (t1_1.x = t2_1.x) + -> Seq Scan on public.eager_agg_tab1_p2 t1_1 + Output: t1_1.x + -> Hash + Output: t2_1.x, t3_1.x, (PARTIAL sum((t2_1.y + t3_1.y))) + -> Partial HashAggregate + Output: t2_1.x, t3_1.x, PARTIAL sum((t2_1.y + t3_1.y)) + Group Key: t2_1.x + -> Hash Join + Output: t2_1.y, t2_1.x, t3_1.y, t3_1.x + Hash Cond: (t2_1.x = t3_1.x) + -> Seq Scan on public.eager_agg_tab1_p2 t2_1 + Output: t2_1.y, t2_1.x + -> Hash + Output: t3_1.y, t3_1.x + -> Seq Scan on public.eager_agg_tab1_p2 t3_1 + Output: t3_1.y, t3_1.x + -> Finalize HashAggregate + Output: t1_2.x, sum((t2_2.y + t3_2.y)) + Group Key: t1_2.x + -> Hash Join + Output: t1_2.x, (PARTIAL sum((t2_2.y + t3_2.y))) + Hash Cond: (t1_2.x = t2_2.x) + -> Seq Scan on public.eager_agg_tab1_p3 t1_2 + Output: t1_2.x + -> Hash + Output: t2_2.x, t3_2.x, (PARTIAL sum((t2_2.y + t3_2.y))) + -> Partial HashAggregate + Output: t2_2.x, t3_2.x, PARTIAL sum((t2_2.y + t3_2.y)) + Group Key: t2_2.x + -> Hash Join + Output: t2_2.y, t2_2.x, t3_2.y, t3_2.x + Hash Cond: (t2_2.x = t3_2.x) + -> Seq Scan on public.eager_agg_tab1_p3 t2_2 + Output: t2_2.y, t2_2.x + -> Hash + Output: t3_2.y, t3_2.x + -> Seq Scan on public.eager_agg_tab1_p3 t3_2 + Output: t3_2.y, t3_2.x +(67 rows) + +SELECT t1.x, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t1.x; + x | sum +----+------- + 4 | 18000 + 2 | 14000 + 8 | 26000 + 6 | 22000 + 0 | 10000 + 16 | 22000 + 10 | 10000 + 14 | 18000 + 12 | 14000 + 18 | 26000 + 26 | 22000 + 28 | 26000 + 22 | 14000 + 20 | 10000 + 24 | 18000 +(15 rows) + +-- partial aggregation +SET enable_hashagg TO off; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t3.y, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t3.y; + QUERY PLAN +------------------------------------------------------------------------------------------- + Finalize GroupAggregate + Output: t3.y, sum((t2.y + t3.y)) + Group Key: t3.y + -> Sort + Output: t3.y, (PARTIAL sum((t2.y + t3.y))) + Sort Key: t3.y + -> Append + -> Hash Join + Output: t3_1.y, (PARTIAL sum((t2_1.y + t3_1.y))) + Hash Cond: (t2_1.x = t1_1.x) + -> Partial GroupAggregate + Output: t3_1.y, t2_1.x, t3_1.x, PARTIAL sum((t2_1.y + t3_1.y)) + Group Key: t3_1.y, t2_1.x, t3_1.x + -> Sort + Output: t2_1.y, t3_1.y, t2_1.x, t3_1.x + Sort Key: t3_1.y, t2_1.x + -> Hash Join + Output: t2_1.y, t3_1.y, t2_1.x, t3_1.x + Hash Cond: (t2_1.x = t3_1.x) + -> Seq Scan on public.eager_agg_tab1_p1 t2_1 + Output: t2_1.y, t2_1.x + -> Hash + Output: t3_1.y, t3_1.x + -> Seq Scan on public.eager_agg_tab1_p1 t3_1 + Output: t3_1.y, t3_1.x + -> Hash + Output: t1_1.x + -> Seq Scan on public.eager_agg_tab1_p1 t1_1 + Output: t1_1.x + -> Hash Join + Output: t3_2.y, (PARTIAL sum((t2_2.y + t3_2.y))) + Hash Cond: (t2_2.x = t1_2.x) + -> Partial GroupAggregate + Output: t3_2.y, t2_2.x, t3_2.x, PARTIAL sum((t2_2.y + t3_2.y)) + Group Key: t3_2.y, t2_2.x, t3_2.x + -> Sort + Output: t2_2.y, t3_2.y, t2_2.x, t3_2.x + Sort Key: t3_2.y, t2_2.x + -> Hash Join + Output: t2_2.y, t3_2.y, t2_2.x, t3_2.x + Hash Cond: (t2_2.x = t3_2.x) + -> Seq Scan on public.eager_agg_tab1_p2 t2_2 + Output: t2_2.y, t2_2.x + -> Hash + Output: t3_2.y, t3_2.x + -> Seq Scan on public.eager_agg_tab1_p2 t3_2 + Output: t3_2.y, t3_2.x + -> Hash + Output: t1_2.x + -> Seq Scan on public.eager_agg_tab1_p2 t1_2 + Output: t1_2.x + -> Hash Join + Output: t3_3.y, (PARTIAL sum((t2_3.y + t3_3.y))) + Hash Cond: (t2_3.x = t1_3.x) + -> Partial GroupAggregate + Output: t3_3.y, t2_3.x, t3_3.x, PARTIAL sum((t2_3.y + t3_3.y)) + Group Key: t3_3.y, t2_3.x, t3_3.x + -> Sort + Output: t2_3.y, t3_3.y, t2_3.x, t3_3.x + Sort Key: t3_3.y, t2_3.x + -> Hash Join + Output: t2_3.y, t3_3.y, t2_3.x, t3_3.x + Hash Cond: (t2_3.x = t3_3.x) + -> Seq Scan on public.eager_agg_tab1_p3 t2_3 + Output: t2_3.y, t2_3.x + -> Hash + Output: t3_3.y, t3_3.x + -> Seq Scan on public.eager_agg_tab1_p3 t3_3 + Output: t3_3.y, t3_3.x + -> Hash + Output: t1_3.x + -> Seq Scan on public.eager_agg_tab1_p3 t1_3 + Output: t1_3.x +(73 rows) + +SELECT t3.y, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t3.y; + y | sum +----+------- + 0 | 7500 + 2 | 13500 + 4 | 19500 + 6 | 25500 + 8 | 31500 + 10 | 22500 + 12 | 28500 + 14 | 34500 + 16 | 40500 + 18 | 46500 +(10 rows) + +RESET enable_hashagg; +DROP TABLE eager_agg_tab1; +DROP TABLE eager_agg_tab2; +-- +-- Test with multi-level partitioning scheme +-- +CREATE TABLE eager_agg_tab_ml(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab_ml_p1 PARTITION OF eager_agg_tab_ml FOR VALUES FROM (0) TO (10); +CREATE TABLE eager_agg_tab_ml_p2 PARTITION OF eager_agg_tab_ml FOR VALUES FROM (10) TO (20) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab_ml_p2_s1 PARTITION OF eager_agg_tab_ml_p2 FOR VALUES FROM (10) TO (15); +CREATE TABLE eager_agg_tab_ml_p2_s2 PARTITION OF eager_agg_tab_ml_p2 FOR VALUES FROM (15) TO (20); +CREATE TABLE eager_agg_tab_ml_p3 PARTITION OF eager_agg_tab_ml FOR VALUES FROM (20) TO (30) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab_ml_p3_s1 PARTITION OF eager_agg_tab_ml_p3 FOR VALUES FROM (20) TO (25); +CREATE TABLE eager_agg_tab_ml_p3_s2 PARTITION OF eager_agg_tab_ml_p3 FOR VALUES FROM (25) TO (30); +INSERT INTO eager_agg_tab_ml SELECT i % 30, i % 30 FROM generate_series(1, 1000) i; +ANALYZE eager_agg_tab_ml; +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.x; + QUERY PLAN +--------------------------------------------------------------------------------- + Append + -> Finalize HashAggregate + Output: t1.x, sum(t2.y), count(*) + Group Key: t1.x + -> Hash Join + Output: t1.x, (PARTIAL sum(t2.y)), (PARTIAL count(*)) + Hash Cond: (t1.x = t2.x) + -> Seq Scan on public.eager_agg_tab_ml_p1 t1 + Output: t1.x + -> Hash + Output: t2.x, (PARTIAL sum(t2.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2.x, PARTIAL sum(t2.y), PARTIAL count(*) + Group Key: t2.x + -> Seq Scan on public.eager_agg_tab_ml_p1 t2 + Output: t2.y, t2.x + -> Finalize HashAggregate + Output: t1_1.x, sum(t2_1.y), count(*) + Group Key: t1_1.x + -> Hash Join + Output: t1_1.x, (PARTIAL sum(t2_1.y)), (PARTIAL count(*)) + Hash Cond: (t1_1.x = t2_1.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t1_1 + Output: t1_1.x + -> Hash + Output: t2_1.x, (PARTIAL sum(t2_1.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_1.x, PARTIAL sum(t2_1.y), PARTIAL count(*) + Group Key: t2_1.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t2_1 + Output: t2_1.y, t2_1.x + -> Finalize HashAggregate + Output: t1_2.x, sum(t2_2.y), count(*) + Group Key: t1_2.x + -> Hash Join + Output: t1_2.x, (PARTIAL sum(t2_2.y)), (PARTIAL count(*)) + Hash Cond: (t1_2.x = t2_2.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t1_2 + Output: t1_2.x + -> Hash + Output: t2_2.x, (PARTIAL sum(t2_2.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_2.x, PARTIAL sum(t2_2.y), PARTIAL count(*) + Group Key: t2_2.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t2_2 + Output: t2_2.y, t2_2.x + -> Finalize HashAggregate + Output: t1_3.x, sum(t2_3.y), count(*) + Group Key: t1_3.x + -> Hash Join + Output: t1_3.x, (PARTIAL sum(t2_3.y)), (PARTIAL count(*)) + Hash Cond: (t1_3.x = t2_3.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t1_3 + Output: t1_3.x + -> Hash + Output: t2_3.x, (PARTIAL sum(t2_3.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_3.x, PARTIAL sum(t2_3.y), PARTIAL count(*) + Group Key: t2_3.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t2_3 + Output: t2_3.y, t2_3.x + -> Finalize HashAggregate + Output: t1_4.x, sum(t2_4.y), count(*) + Group Key: t1_4.x + -> Hash Join + Output: t1_4.x, (PARTIAL sum(t2_4.y)), (PARTIAL count(*)) + Hash Cond: (t1_4.x = t2_4.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t1_4 + Output: t1_4.x + -> Hash + Output: t2_4.x, (PARTIAL sum(t2_4.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_4.x, PARTIAL sum(t2_4.y), PARTIAL count(*) + Group Key: t2_4.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t2_4 + Output: t2_4.y, t2_4.x +(76 rows) + +SELECT t1.x, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.x; + x | sum | count +----+-------+------- + 8 | 9248 | 1156 + 9 | 10404 | 1156 + 7 | 8092 | 1156 + 1 | 1156 | 1156 + 5 | 5780 | 1156 + 4 | 4624 | 1156 + 2 | 2312 | 1156 + 0 | 0 | 1089 + 6 | 6936 | 1156 + 3 | 3468 | 1156 + 11 | 11979 | 1089 + 13 | 14157 | 1089 + 10 | 11560 | 1156 + 14 | 15246 | 1089 + 12 | 13068 | 1089 + 17 | 18513 | 1089 + 18 | 19602 | 1089 + 16 | 17424 | 1089 + 15 | 16335 | 1089 + 19 | 20691 | 1089 + 24 | 26136 | 1089 + 21 | 22869 | 1089 + 23 | 25047 | 1089 + 22 | 23958 | 1089 + 20 | 21780 | 1089 + 26 | 28314 | 1089 + 27 | 29403 | 1089 + 25 | 27225 | 1089 + 29 | 31581 | 1089 + 28 | 30492 | 1089 +(30 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.y, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.y; + QUERY PLAN +--------------------------------------------------------------------------------- + Finalize HashAggregate + Output: t1.y, sum(t2.y), count(*) + Group Key: t1.y + -> Append + -> Hash Join + Output: t1_1.y, (PARTIAL sum(t2_1.y)), (PARTIAL count(*)) + Hash Cond: (t1_1.x = t2_1.x) + -> Seq Scan on public.eager_agg_tab_ml_p1 t1_1 + Output: t1_1.y, t1_1.x + -> Hash + Output: t2_1.x, (PARTIAL sum(t2_1.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_1.x, PARTIAL sum(t2_1.y), PARTIAL count(*) + Group Key: t2_1.x + -> Seq Scan on public.eager_agg_tab_ml_p1 t2_1 + Output: t2_1.y, t2_1.x + -> Hash Join + Output: t1_2.y, (PARTIAL sum(t2_2.y)), (PARTIAL count(*)) + Hash Cond: (t1_2.x = t2_2.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t1_2 + Output: t1_2.y, t1_2.x + -> Hash + Output: t2_2.x, (PARTIAL sum(t2_2.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_2.x, PARTIAL sum(t2_2.y), PARTIAL count(*) + Group Key: t2_2.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t2_2 + Output: t2_2.y, t2_2.x + -> Hash Join + Output: t1_3.y, (PARTIAL sum(t2_3.y)), (PARTIAL count(*)) + Hash Cond: (t1_3.x = t2_3.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t1_3 + Output: t1_3.y, t1_3.x + -> Hash + Output: t2_3.x, (PARTIAL sum(t2_3.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_3.x, PARTIAL sum(t2_3.y), PARTIAL count(*) + Group Key: t2_3.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t2_3 + Output: t2_3.y, t2_3.x + -> Hash Join + Output: t1_4.y, (PARTIAL sum(t2_4.y)), (PARTIAL count(*)) + Hash Cond: (t1_4.x = t2_4.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t1_4 + Output: t1_4.y, t1_4.x + -> Hash + Output: t2_4.x, (PARTIAL sum(t2_4.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_4.x, PARTIAL sum(t2_4.y), PARTIAL count(*) + Group Key: t2_4.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t2_4 + Output: t2_4.y, t2_4.x + -> Hash Join + Output: t1_5.y, (PARTIAL sum(t2_5.y)), (PARTIAL count(*)) + Hash Cond: (t1_5.x = t2_5.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t1_5 + Output: t1_5.y, t1_5.x + -> Hash + Output: t2_5.x, (PARTIAL sum(t2_5.y)), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_5.x, PARTIAL sum(t2_5.y), PARTIAL count(*) + Group Key: t2_5.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t2_5 + Output: t2_5.y, t2_5.x +(64 rows) + +SELECT t1.y, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.y; + y | sum | count +----+-------+------- + 29 | 31581 | 1089 + 4 | 4624 | 1156 + 0 | 0 | 1089 + 10 | 11560 | 1156 + 9 | 10404 | 1156 + 7 | 8092 | 1156 + 15 | 16335 | 1089 + 6 | 6936 | 1156 + 26 | 28314 | 1089 + 12 | 13068 | 1089 + 24 | 26136 | 1089 + 19 | 20691 | 1089 + 25 | 27225 | 1089 + 21 | 22869 | 1089 + 14 | 15246 | 1089 + 3 | 3468 | 1156 + 17 | 18513 | 1089 + 28 | 30492 | 1089 + 22 | 23958 | 1089 + 20 | 21780 | 1089 + 13 | 14157 | 1089 + 1 | 1156 | 1156 + 5 | 5780 | 1156 + 18 | 19602 | 1089 + 2 | 2312 | 1156 + 16 | 17424 | 1089 + 27 | 29403 | 1089 + 23 | 25047 | 1089 + 11 | 11979 | 1089 + 8 | 9248 | 1156 +(30 rows) + +-- Check with eager aggregation over join rel +-- full aggregation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t1.x; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Append + -> Finalize HashAggregate + Output: t1.x, sum((t2.y + t3.y)), count(*) + Group Key: t1.x + -> Hash Join + Output: t1.x, (PARTIAL sum((t2.y + t3.y))), (PARTIAL count(*)) + Hash Cond: (t1.x = t2.x) + -> Seq Scan on public.eager_agg_tab_ml_p1 t1 + Output: t1.x + -> Hash + Output: t2.x, t3.x, (PARTIAL sum((t2.y + t3.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2.x, t3.x, PARTIAL sum((t2.y + t3.y)), PARTIAL count(*) + Group Key: t2.x + -> Hash Join + Output: t2.y, t2.x, t3.y, t3.x + Hash Cond: (t2.x = t3.x) + -> Seq Scan on public.eager_agg_tab_ml_p1 t2 + Output: t2.y, t2.x + -> Hash + Output: t3.y, t3.x + -> Seq Scan on public.eager_agg_tab_ml_p1 t3 + Output: t3.y, t3.x + -> Finalize HashAggregate + Output: t1_1.x, sum((t2_1.y + t3_1.y)), count(*) + Group Key: t1_1.x + -> Hash Join + Output: t1_1.x, (PARTIAL sum((t2_1.y + t3_1.y))), (PARTIAL count(*)) + Hash Cond: (t1_1.x = t2_1.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t1_1 + Output: t1_1.x + -> Hash + Output: t2_1.x, t3_1.x, (PARTIAL sum((t2_1.y + t3_1.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_1.x, t3_1.x, PARTIAL sum((t2_1.y + t3_1.y)), PARTIAL count(*) + Group Key: t2_1.x + -> Hash Join + Output: t2_1.y, t2_1.x, t3_1.y, t3_1.x + Hash Cond: (t2_1.x = t3_1.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t2_1 + Output: t2_1.y, t2_1.x + -> Hash + Output: t3_1.y, t3_1.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t3_1 + Output: t3_1.y, t3_1.x + -> Finalize HashAggregate + Output: t1_2.x, sum((t2_2.y + t3_2.y)), count(*) + Group Key: t1_2.x + -> Hash Join + Output: t1_2.x, (PARTIAL sum((t2_2.y + t3_2.y))), (PARTIAL count(*)) + Hash Cond: (t1_2.x = t2_2.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t1_2 + Output: t1_2.x + -> Hash + Output: t2_2.x, t3_2.x, (PARTIAL sum((t2_2.y + t3_2.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_2.x, t3_2.x, PARTIAL sum((t2_2.y + t3_2.y)), PARTIAL count(*) + Group Key: t2_2.x + -> Hash Join + Output: t2_2.y, t2_2.x, t3_2.y, t3_2.x + Hash Cond: (t2_2.x = t3_2.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t2_2 + Output: t2_2.y, t2_2.x + -> Hash + Output: t3_2.y, t3_2.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t3_2 + Output: t3_2.y, t3_2.x + -> Finalize HashAggregate + Output: t1_3.x, sum((t2_3.y + t3_3.y)), count(*) + Group Key: t1_3.x + -> Hash Join + Output: t1_3.x, (PARTIAL sum((t2_3.y + t3_3.y))), (PARTIAL count(*)) + Hash Cond: (t1_3.x = t2_3.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t1_3 + Output: t1_3.x + -> Hash + Output: t2_3.x, t3_3.x, (PARTIAL sum((t2_3.y + t3_3.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_3.x, t3_3.x, PARTIAL sum((t2_3.y + t3_3.y)), PARTIAL count(*) + Group Key: t2_3.x + -> Hash Join + Output: t2_3.y, t2_3.x, t3_3.y, t3_3.x + Hash Cond: (t2_3.x = t3_3.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t2_3 + Output: t2_3.y, t2_3.x + -> Hash + Output: t3_3.y, t3_3.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t3_3 + Output: t3_3.y, t3_3.x + -> Finalize HashAggregate + Output: t1_4.x, sum((t2_4.y + t3_4.y)), count(*) + Group Key: t1_4.x + -> Hash Join + Output: t1_4.x, (PARTIAL sum((t2_4.y + t3_4.y))), (PARTIAL count(*)) + Hash Cond: (t1_4.x = t2_4.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t1_4 + Output: t1_4.x + -> Hash + Output: t2_4.x, t3_4.x, (PARTIAL sum((t2_4.y + t3_4.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2_4.x, t3_4.x, PARTIAL sum((t2_4.y + t3_4.y)), PARTIAL count(*) + Group Key: t2_4.x + -> Hash Join + Output: t2_4.y, t2_4.x, t3_4.y, t3_4.x + Hash Cond: (t2_4.x = t3_4.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t2_4 + Output: t2_4.y, t2_4.x + -> Hash + Output: t3_4.y, t3_4.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t3_4 + Output: t3_4.y, t3_4.x +(111 rows) + +SELECT t1.x, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t1.x; + x | sum | count +----+---------+------- + 8 | 628864 | 39304 + 9 | 707472 | 39304 + 7 | 550256 | 39304 + 1 | 78608 | 39304 + 5 | 393040 | 39304 + 4 | 314432 | 39304 + 2 | 157216 | 39304 + 0 | 0 | 35937 + 6 | 471648 | 39304 + 3 | 235824 | 39304 + 11 | 790614 | 35937 + 13 | 934362 | 35937 + 10 | 786080 | 39304 + 14 | 1006236 | 35937 + 12 | 862488 | 35937 + 17 | 1221858 | 35937 + 18 | 1293732 | 35937 + 16 | 1149984 | 35937 + 15 | 1078110 | 35937 + 19 | 1365606 | 35937 + 24 | 1724976 | 35937 + 21 | 1509354 | 35937 + 23 | 1653102 | 35937 + 22 | 1581228 | 35937 + 20 | 1437480 | 35937 + 26 | 1868724 | 35937 + 27 | 1940598 | 35937 + 25 | 1796850 | 35937 + 29 | 2084346 | 35937 + 28 | 2012472 | 35937 +(30 rows) + +-- partial aggregation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t3.y, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t3.y; + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Finalize HashAggregate + Output: t3.y, sum((t2.y + t3.y)), count(*) + Group Key: t3.y + -> Append + -> Hash Join + Output: t3_1.y, (PARTIAL sum((t2_1.y + t3_1.y))), (PARTIAL count(*)) + Hash Cond: (t1_1.x = t2_1.x) + -> Seq Scan on public.eager_agg_tab_ml_p1 t1_1 + Output: t1_1.x + -> Hash + Output: t3_1.y, t2_1.x, t3_1.x, (PARTIAL sum((t2_1.y + t3_1.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t3_1.y, t2_1.x, t3_1.x, PARTIAL sum((t2_1.y + t3_1.y)), PARTIAL count(*) + Group Key: t3_1.y, t2_1.x, t3_1.x + -> Hash Join + Output: t2_1.y, t3_1.y, t2_1.x, t3_1.x + Hash Cond: (t2_1.x = t3_1.x) + -> Seq Scan on public.eager_agg_tab_ml_p1 t2_1 + Output: t2_1.y, t2_1.x + -> Hash + Output: t3_1.y, t3_1.x + -> Seq Scan on public.eager_agg_tab_ml_p1 t3_1 + Output: t3_1.y, t3_1.x + -> Hash Join + Output: t3_2.y, (PARTIAL sum((t2_2.y + t3_2.y))), (PARTIAL count(*)) + Hash Cond: (t1_2.x = t2_2.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t1_2 + Output: t1_2.x + -> Hash + Output: t3_2.y, t2_2.x, t3_2.x, (PARTIAL sum((t2_2.y + t3_2.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t3_2.y, t2_2.x, t3_2.x, PARTIAL sum((t2_2.y + t3_2.y)), PARTIAL count(*) + Group Key: t3_2.y, t2_2.x, t3_2.x + -> Hash Join + Output: t2_2.y, t3_2.y, t2_2.x, t3_2.x + Hash Cond: (t2_2.x = t3_2.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t2_2 + Output: t2_2.y, t2_2.x + -> Hash + Output: t3_2.y, t3_2.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s1 t3_2 + Output: t3_2.y, t3_2.x + -> Hash Join + Output: t3_3.y, (PARTIAL sum((t2_3.y + t3_3.y))), (PARTIAL count(*)) + Hash Cond: (t1_3.x = t2_3.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t1_3 + Output: t1_3.x + -> Hash + Output: t3_3.y, t2_3.x, t3_3.x, (PARTIAL sum((t2_3.y + t3_3.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t3_3.y, t2_3.x, t3_3.x, PARTIAL sum((t2_3.y + t3_3.y)), PARTIAL count(*) + Group Key: t3_3.y, t2_3.x, t3_3.x + -> Hash Join + Output: t2_3.y, t3_3.y, t2_3.x, t3_3.x + Hash Cond: (t2_3.x = t3_3.x) + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t2_3 + Output: t2_3.y, t2_3.x + -> Hash + Output: t3_3.y, t3_3.x + -> Seq Scan on public.eager_agg_tab_ml_p2_s2 t3_3 + Output: t3_3.y, t3_3.x + -> Hash Join + Output: t3_4.y, (PARTIAL sum((t2_4.y + t3_4.y))), (PARTIAL count(*)) + Hash Cond: (t1_4.x = t2_4.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t1_4 + Output: t1_4.x + -> Hash + Output: t3_4.y, t2_4.x, t3_4.x, (PARTIAL sum((t2_4.y + t3_4.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t3_4.y, t2_4.x, t3_4.x, PARTIAL sum((t2_4.y + t3_4.y)), PARTIAL count(*) + Group Key: t3_4.y, t2_4.x, t3_4.x + -> Hash Join + Output: t2_4.y, t3_4.y, t2_4.x, t3_4.x + Hash Cond: (t2_4.x = t3_4.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t2_4 + Output: t2_4.y, t2_4.x + -> Hash + Output: t3_4.y, t3_4.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s1 t3_4 + Output: t3_4.y, t3_4.x + -> Hash Join + Output: t3_5.y, (PARTIAL sum((t2_5.y + t3_5.y))), (PARTIAL count(*)) + Hash Cond: (t1_5.x = t2_5.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t1_5 + Output: t1_5.x + -> Hash + Output: t3_5.y, t2_5.x, t3_5.x, (PARTIAL sum((t2_5.y + t3_5.y))), (PARTIAL count(*)) + -> Partial HashAggregate + Output: t3_5.y, t2_5.x, t3_5.x, PARTIAL sum((t2_5.y + t3_5.y)), PARTIAL count(*) + Group Key: t3_5.y, t2_5.x, t3_5.x + -> Hash Join + Output: t2_5.y, t3_5.y, t2_5.x, t3_5.x + Hash Cond: (t2_5.x = t3_5.x) + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t2_5 + Output: t2_5.y, t2_5.x + -> Hash + Output: t3_5.y, t3_5.x + -> Seq Scan on public.eager_agg_tab_ml_p3_s2 t3_5 + Output: t3_5.y, t3_5.x +(99 rows) + +SELECT t3.y, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t3.y; + y | sum | count +----+---------+------- + 29 | 2084346 | 35937 + 4 | 314432 | 39304 + 0 | 0 | 35937 + 10 | 786080 | 39304 + 9 | 707472 | 39304 + 7 | 550256 | 39304 + 15 | 1078110 | 35937 + 6 | 471648 | 39304 + 26 | 1868724 | 35937 + 12 | 862488 | 35937 + 24 | 1724976 | 35937 + 19 | 1365606 | 35937 + 25 | 1796850 | 35937 + 21 | 1509354 | 35937 + 14 | 1006236 | 35937 + 3 | 235824 | 39304 + 17 | 1221858 | 35937 + 28 | 2012472 | 35937 + 22 | 1581228 | 35937 + 20 | 1437480 | 35937 + 13 | 934362 | 35937 + 1 | 78608 | 39304 + 5 | 393040 | 39304 + 18 | 1293732 | 35937 + 2 | 157216 | 39304 + 16 | 1149984 | 35937 + 27 | 1940598 | 35937 + 23 | 1653102 | 35937 + 11 | 790614 | 35937 + 8 | 628864 | 39304 +(30 rows) + +DROP TABLE eager_agg_tab_ml; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 1d8a414eea..250a9dba21 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -119,7 +119,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr # The stats test resets stats, so nothing else needing stats access can be in # this group. # ---------- -test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats predicate +test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats predicate eager_aggregate # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL diff --git a/src/test/regress/sql/eager_aggregate.sql b/src/test/regress/sql/eager_aggregate.sql new file mode 100644 index 0000000000..aba2c41557 --- /dev/null +++ b/src/test/regress/sql/eager_aggregate.sql @@ -0,0 +1,205 @@ +-- +-- EAGER AGGREGATION +-- Test we can push aggregation down below join +-- + +-- Enable eager aggregation, which by default is disabled. +SET enable_eager_aggregate TO on; + +CREATE TABLE eager_agg_t1 (a int, b int, c double precision); +CREATE TABLE eager_agg_t2 (a int, b int, c double precision); +CREATE TABLE eager_agg_t3 (a int, b int, c double precision); + +INSERT INTO eager_agg_t1 SELECT i, i, i FROM generate_series(1, 1000)i; +INSERT INTO eager_agg_t2 SELECT i, i%10, i FROM generate_series(1, 1000)i; +INSERT INTO eager_agg_t3 SELECT i%10, i%10, i FROM generate_series(1, 1000)i; + +ANALYZE eager_agg_t1; +ANALYZE eager_agg_t2; +ANALYZE eager_agg_t3; + + +-- +-- Test eager aggregation over base rel +-- + +-- Perform scan of a table, aggregate the result, join it to the other table +-- and finalize the aggregation. + +-- Produce results with hash aggregation +SET enable_hashagg TO on; +SET enable_sort TO off; + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a; +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a; + +-- Produce results with sorting aggregation +SET enable_hashagg TO off; +SET enable_sort TO on; + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a; +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a; + +SET enable_hashagg TO default; +SET enable_sort TO default; + + +-- +-- Test eager aggregation over join rel +-- + +-- Perform join of tables, aggregate the result, join it to the other table +-- and finalize the aggregation. + +-- Produce results with hash aggregation +SET enable_hashagg TO on; +SET enable_sort TO off; + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a; +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a; + +-- Produce results with sorting aggregation +SET enable_hashagg TO off; +SET enable_sort TO on; + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a; +SELECT t1.a, avg(t2.c + t3.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b JOIN eager_agg_t3 t3 ON t2.a = t3.a GROUP BY t1.a; + +SET enable_hashagg TO default; +SET enable_sort TO default; + + +-- +-- Test that eager aggregation works for outer join +-- + +-- Ensure aggregation can be pushed down to the non-nullable side +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t3.a, avg(t3.c) FROM eager_agg_t1 t1 RIGHT JOIN eager_agg_t3 t3 ON t1.b = t3.b GROUP BY t3.a; +SELECT t3.a, avg(t3.c) FROM eager_agg_t1 t1 RIGHT JOIN eager_agg_t3 t3 ON t1.b = t3.b GROUP BY t3.a; + +-- Ensure aggregation cannot be pushed down to the nullable side +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t3.a, avg(t3.c) FROM eager_agg_t1 t1 LEFT JOIN eager_agg_t3 t3 ON t1.b = t3.b GROUP BY t3.a; +SELECT t3.a, avg(t3.c) FROM eager_agg_t1 t1 LEFT JOIN eager_agg_t3 t3 ON t1.b = t3.b GROUP BY t3.a; + + +-- +-- Test that eager aggregation works for parallel plans +-- +SET parallel_setup_cost=0; +SET parallel_tuple_cost=0; +SET min_parallel_table_scan_size=0; +SET max_parallel_workers_per_gather=4; + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a; +SELECT t1.a, avg(t2.c) FROM eager_agg_t1 t1 JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a; + +RESET parallel_setup_cost; +RESET parallel_tuple_cost; +RESET min_parallel_table_scan_size; +RESET max_parallel_workers_per_gather; + + +DROP TABLE eager_agg_t1; +DROP TABLE eager_agg_t2; +DROP TABLE eager_agg_t3; + + +-- +-- Test eager aggregation for partitionwise join +-- + +-- Enable partitionwise aggregate, which by default is disabled. +SET enable_partitionwise_aggregate TO true; +-- Enable partitionwise join, which by default is disabled. +SET enable_partitionwise_join TO true; + +CREATE TABLE eager_agg_tab1(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab1_p1 PARTITION OF eager_agg_tab1 FOR VALUES FROM (0) TO (10); +CREATE TABLE eager_agg_tab1_p2 PARTITION OF eager_agg_tab1 FOR VALUES FROM (10) TO (20); +CREATE TABLE eager_agg_tab1_p3 PARTITION OF eager_agg_tab1 FOR VALUES FROM (20) TO (30); +CREATE TABLE eager_agg_tab2(x int, y int) PARTITION BY RANGE(y); +CREATE TABLE eager_agg_tab2_p1 PARTITION OF eager_agg_tab2 FOR VALUES FROM (0) TO (10); +CREATE TABLE eager_agg_tab2_p2 PARTITION OF eager_agg_tab2 FOR VALUES FROM (10) TO (20); +CREATE TABLE eager_agg_tab2_p3 PARTITION OF eager_agg_tab2 FOR VALUES FROM (20) TO (30); +INSERT INTO eager_agg_tab1 SELECT i % 30, i % 20 FROM generate_series(0, 299, 2) i; +INSERT INTO eager_agg_tab2 SELECT i % 20, i % 30 FROM generate_series(0, 299, 3) i; + +ANALYZE eager_agg_tab1; +ANALYZE eager_agg_tab2; + +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x; +SELECT t1.x, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x; + +-- GROUP BY having other matching key +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.y, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y; +SELECT t2.y, sum(t1.y), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y; + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.x, sum(t1.x), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.x HAVING avg(t1.x) > 10; +SELECT t2.x, sum(t1.x), count(*) FROM eager_agg_tab1 t1, eager_agg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.x HAVING avg(t1.x) > 10; + +-- Check with eager aggregation over join rel +-- full aggregation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t1.x; +SELECT t1.x, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t1.x; + +-- partial aggregation +SET enable_hashagg TO off; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t3.y, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t3.y; +SELECT t3.y, sum(t2.y + t3.y) FROM eager_agg_tab1 t1 JOIN eager_agg_tab1 t2 ON t1.x = t2.x JOIN eager_agg_tab1 t3 ON t2.x = t3.x GROUP BY t3.y; +RESET enable_hashagg; + +DROP TABLE eager_agg_tab1; +DROP TABLE eager_agg_tab2; + + +-- +-- Test with multi-level partitioning scheme +-- +CREATE TABLE eager_agg_tab_ml(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab_ml_p1 PARTITION OF eager_agg_tab_ml FOR VALUES FROM (0) TO (10); +CREATE TABLE eager_agg_tab_ml_p2 PARTITION OF eager_agg_tab_ml FOR VALUES FROM (10) TO (20) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab_ml_p2_s1 PARTITION OF eager_agg_tab_ml_p2 FOR VALUES FROM (10) TO (15); +CREATE TABLE eager_agg_tab_ml_p2_s2 PARTITION OF eager_agg_tab_ml_p2 FOR VALUES FROM (15) TO (20); +CREATE TABLE eager_agg_tab_ml_p3 PARTITION OF eager_agg_tab_ml FOR VALUES FROM (20) TO (30) PARTITION BY RANGE(x); +CREATE TABLE eager_agg_tab_ml_p3_s1 PARTITION OF eager_agg_tab_ml_p3 FOR VALUES FROM (20) TO (25); +CREATE TABLE eager_agg_tab_ml_p3_s2 PARTITION OF eager_agg_tab_ml_p3 FOR VALUES FROM (25) TO (30); +INSERT INTO eager_agg_tab_ml SELECT i % 30, i % 30 FROM generate_series(1, 1000) i; + +ANALYZE eager_agg_tab_ml; + +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.x; +SELECT t1.x, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.x; + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.y, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.y; +SELECT t1.y, sum(t2.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x GROUP BY t1.y; + +-- Check with eager aggregation over join rel +-- full aggregation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t1.x, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t1.x; +SELECT t1.x, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t1.x; + +-- partial aggregation +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t3.y, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t3.y; +SELECT t3.y, sum(t2.y + t3.y), count(*) FROM eager_agg_tab_ml t1 JOIN eager_agg_tab_ml t2 ON t1.x = t2.x JOIN eager_agg_tab_ml t3 on t2.x = t3.x GROUP BY t3.y; + +DROP TABLE eager_agg_tab_ml; -- 2.31.0