BUG #15430: partition-wise join only works in combination withpruning on 1 partition - Mailing list pgsql-bugs
| From | PG Bug reporting form |
|---|---|
| Subject | BUG #15430: partition-wise join only works in combination withpruning on 1 partition |
| Date | |
| Msg-id | 15430-ee54c4208e54e9b1@postgresql.org Whole thread Raw |
| Responses |
Re: BUG #15430: partition-wise join only works in combination withpruning on 1 partition
|
| List | pgsql-bugs |
The following bug has been logged on the website:
Bug reference: 15430
Logged by: Bart Debersaques
Email address: agile.data.analytics@gmail.com
PostgreSQL version: 11rc1
Operating system: Centos 7
Description:
psql <<-EOF
create schema part_test
;
CREATE TABLE part_test.measurement (
city_id int not null,
logdate date not null,
peaktemp int,
unitsales
int
) PARTITION BY RANGE (logdate)
;
CREATE TABLE part_test.measurement_y2006m02 PARTITION OF
part_test.measurement
FOR VALUES FROM ('2006-02-01') TO ('2006-03-01')
;
CREATE TABLE part_test.measurement_y2006m03 PARTITION OF
part_test.measurement
FOR VALUES FROM ('2006-03-01') TO ('2006-04-01')
;
EOF
psql <<-EOF
CREATE TABLE part_test.measurement2 (
city_id int not null,
logdate date not null,
peaktemp int,
unitsales
int
) PARTITION BY RANGE (logdate)
;
CREATE TABLE part_test.measurement2_y2006m02 PARTITION OF
part_test.measurement2
FOR VALUES FROM ('2006-02-01') TO ('2006-03-01')
;
CREATE TABLE part_test.measurement2_y2006m03 PARTITION OF
part_test.measurement2
FOR VALUES FROM ('2006-03-01') TO ('2006-04-01')
;
EOF
psql <<-EOF
explain
select *
from
part_test.measurement m1
inner join part_test.measurement2 m2
on m1.city_id = m2.city_id and m1.logdate = m2.logdate
where m1.logdate = '2006-02-01'::date
EOF
# Hash Join (cost=33.28..66.58 rows=9 width=32)
# Hash Cond: (m1.city_id = m2.city_id)
# -> Append (cost=0.00..33.17 rows=9 width=16)
# -> Seq Scan on measurement_y2006m02 m1 (cost=0.00..33.12 rows=9
width=16)
# Filter: (logdate = '2006-02-01'::date)
# -> Hash (cost=33.17..33.17 rows=9 width=16)
# -> Append (cost=0.00..33.17 rows=9 width=16)
# -> Seq Scan on measurement2_y2006m02 m2 (cost=0.00..33.12
rows=9 width=16)
# Filter: (logdate = '2006-02-01'::date)
# conclusion: pruning applied to both tables = OK
psql <<-EOF
explain
select *
from
part_test.measurement m1
inner join part_test.measurement2 m2
on m1.city_id = m2.city_id and m1.logdate = m2.logdate
where m1.logdate in ('2006-02-01'::date, '2006-03-01'::date)
EOF
# Hash Join (cost=66.97..170.25 rows=3 width=32)
# Hash Cond: ((m2.city_id = m1.city_id) AND (m2.logdate = m1.logdate))
# -> Append (cost=0.00..75.50 rows=3700 width=16)
# -> Seq Scan on measurement2_y2006m02 m2 (cost=0.00..28.50
rows=1850 width=16)
# -> Seq Scan on measurement2_y2006m03 m2_1 (cost=0.00..28.50
rows=1850 width=16)
# -> Hash (cost=66.43..66.43 rows=36 width=16)
# -> Append (cost=0.00..66.43 rows=36 width=16)
# -> Seq Scan on measurement_y2006m02 m1 (cost=0.00..33.12
rows=18 width=16)
# Filter: (logdate = ANY
('{2006-02-01,2006-03-01}'::date[]))
# -> Seq Scan on measurement_y2006m03 m1_1 (cost=0.00..33.12
rows=18 width=16)
# Filter: (logdate = ANY
('{2006-02-01,2006-03-01}'::date[]))
# conclusion: join performed on full tab scans, join not performed on a
per-partion basis
psql <<-EOF
explain
select *
from
part_test.measurement m1
inner join part_test.measurement2 m2
on m1.city_id = m2.city_id and m1.logdate = m2.logdate
EOF
# Merge Join (cost=589.57..648.49 rows=342 width=32)
# Merge Cond: ((m1.city_id = m2.city_id) AND (m1.logdate = m2.logdate))
# -> Sort (cost=294.79..304.04 rows=3700 width=16)
# Sort Key: m1.city_id, m1.logdate
# -> Append (cost=0.00..75.50 rows=3700 width=16)
# -> Seq Scan on measurement_y2006m02 m1 (cost=0.00..28.50
rows=1850 width=16)
# -> Seq Scan on measurement_y2006m03 m1_1 (cost=0.00..28.50
rows=1850 width=16)
# -> Sort (cost=294.79..304.04 rows=3700 width=16)
# Sort Key: m2.city_id, m2.logdate
# -> Append (cost=0.00..75.50 rows=3700 width=16)
# -> Seq Scan on measurement2_y2006m02 m2 (cost=0.00..28.50
rows=1850 width=16)
# -> Seq Scan on measurement2_y2006m03 m2_1
(cost=0.00..28.50 rows=1850 width=16)
# conclusion: join performed on full tab scans, join not performed on a
per-partion basis
# desired behaviour:
# in serial execution and for an equi join: partitions in m1 should be
iterated and joined with the equivaluent in m2
# parallel execution: the above should be done with several partitions in
parallel
# please assign to Ashutosh Bapat as described in
https://www.enterprisedb.com/blog/partition-wise-joins-%E2%80%9Cdivide-and-conquer-joins-between-partitioned-table
reading
https://www.enterprisedb.com/blog/partition-wise-joins-%E2%80%9Cdivide-and-conquer-joins-between-partitioned-table
pgsql-bugs by date: