From c05e93afb56befb567d8a95b090db453264d3b9f Mon Sep 17 00:00:00 2001 From: David Rowley Date: Thu, 18 May 2017 20:10:05 +1200 Subject: [PATCH 1/2] Minimal fix for foreign key join estimations --- src/backend/optimizer/path/costsize.c | 95 +++++++++-------------------------- src/test/regress/expected/join.out | 32 ++++++++++++ src/test/regress/sql/join.sql | 25 +++++++++ 3 files changed, 82 insertions(+), 70 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index cdb18d9..d7bdd41 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -4303,12 +4303,21 @@ get_foreign_key_join_selectivity(PlannerInfo *root, List *worklist = *restrictlist; ListCell *lc; + /* + * When the inner side of the semi/anti join is itself a join, it's hard + * to guess what fraction of the referenced table will get through the + * join. Let's just reject this case outright for now. + */ + if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) && + bms_membership(inner_relids) != BMS_SINGLETON) + return 1.0; + /* Consider each FK constraint that is known to match the query */ foreach(lc, root->fkey_list) { ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc); + RelOptInfo *ref_rel; bool ref_is_outer; - bool use_smallest_selectivity = false; List *removedlist; ListCell *cell; ListCell *prev; @@ -4429,10 +4438,6 @@ get_foreign_key_join_selectivity(PlannerInfo *root, * be double-counting the null fraction, and (2) it's not very clear * how to combine null fractions for multiple referencing columns. * - * In the use_smallest_selectivity code below, null derating is done - * implicitly by relying on clause_selectivity(); in the other cases, - * we do nothing for now about correcting for nulls. - * * XXX another point here is that if either side of an FK constraint * is an inheritance parent, we estimate as though the constraint * covers all its children as well. This is not an unreasonable @@ -4443,52 +4448,25 @@ get_foreign_key_join_selectivity(PlannerInfo *root, * work, it is uncommon in practice to have an FK referencing a parent * table. So, at least for now, disregard inheritance here. */ - if (ref_is_outer && jointype != JOIN_INNER) - { - /* - * When the referenced table is on the outer side of a non-inner - * join, knowing that each inner row has exactly one match is not - * as useful as one could wish, since we really need to know the - * fraction of outer rows with a match. Still, we can avoid the - * folly of multiplying the per-column estimates together. Take - * the smallest per-column selectivity, instead. (This should - * correspond to the FK column with the most nulls.) - */ - use_smallest_selectivity = true; - } - else if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + + ref_rel = find_base_rel(root, fkinfo->ref_relid); + + if (!ref_is_outer && (jointype == JOIN_SEMI || jointype == JOIN_ANTI)) { /* * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the - * fraction of LHS rows that have matches. The referenced table - * is on the inner side (we already handled the other case above), - * so the FK implies that every LHS row has a match *in the + * fraction of LHS rows that have matches. Here we handle the + * case where the referenced table is on the inner side,* so the + * FK implies that every LHS row has a match *in the * referenced table*. But any restriction or join clauses below - * here will reduce the number of matches. + * here will reduce the number of matches. It's possible that + * we'll never hit this case for a SEMI join, as surely unique + * proofs were found on the inner side of the SEMI join which + * allowed it to be converted into an INNER join. Nevertheless, + * we'd better just handle this case properly in case it does + * arise. */ - if (bms_membership(inner_relids) == BMS_SINGLETON) - { - /* - * When the inner side of the semi/anti join is just the - * referenced table, we may take the FK selectivity as equal - * to the selectivity of the table's restriction clauses. - */ - RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid); - double ref_tuples = Max(ref_rel->tuples, 1.0); - - fkselec *= ref_rel->rows / ref_tuples; - } - else - { - /* - * When the inner side of the semi/anti join is itself a join, - * it's hard to guess what fraction of the referenced table - * will get through the join. But we still don't want to - * multiply per-column estimates together. Take the smallest - * per-column selectivity, instead. - */ - use_smallest_selectivity = true; - } + fkselec *= ref_rel->rows / Max(ref_rel->tuples, 1.0); } else { @@ -4497,30 +4475,7 @@ get_foreign_key_join_selectivity(PlannerInfo *root, * guard against tuples == 0. Note we should use the raw table * tuple count, not any estimate of its filtered or joined size. */ - RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid); - double ref_tuples = Max(ref_rel->tuples, 1.0); - - fkselec *= 1.0 / ref_tuples; - } - - /* - * Common code for cases where we should use the smallest selectivity - * that would be computed for any one of the FK's clauses. - */ - if (use_smallest_selectivity) - { - Selectivity thisfksel = 1.0; - - foreach(cell, removedlist) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); - Selectivity csel; - - csel = clause_selectivity(root, (Node *) rinfo, - 0, jointype, sjinfo); - thisfksel = Min(thisfksel, csel); - } - fkselec *= thisfksel; + fkselec *= 1.0 / Max(ref_rel->tuples, 1.0); } } diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index d08b1e1..9e6e23d 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -5691,3 +5691,35 @@ where exists (select 1 from j3 (13 rows) drop table j3; +-- +-- Test Foreign key join estimation performs sanely for outer joins +-- +begin work; +create table fkest (a int, b int, c int unique, primary key(a,b)); +create table fkest1 (a int, b int, primary key(a,b)); +insert into fkest select x/10,x%10, x from generate_Series(1,400) x; +insert into fkest1 select x/10,x%10 from generate_Series(1,400) x; +alter table fkest1 add constraint fkest1_a_b_fkey foreign key (a,b) references fkest; +analyze fkest; +analyze fkest1; +explain (costs off) select * from fkest f +left join fkest1 f1 on f.a = f1.a and f.b = f1.b +left join fkest1 f2 on f.a = f2.a and f.b = f2.b +left join fkest1 f3 on f.a = f3.a and f.b = f3.b +where f.c = 1; + QUERY PLAN +------------------------------------------------------------------ + Nested Loop Left Join + -> Nested Loop Left Join + -> Nested Loop Left Join + -> Seq Scan on fkest f + Filter: (c = 1) + -> Index Only Scan using fkest1_pkey on fkest1 f1 + Index Cond: ((a = f.a) AND (b = f.b)) + -> Index Only Scan using fkest1_pkey on fkest1 f2 + Index Cond: ((a = f.a) AND (b = f.b)) + -> Index Only Scan using fkest1_pkey on fkest1 f3 + Index Cond: ((a = f.a) AND (b = f.b)) +(11 rows) + +rollback; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index c3994ea..5c79d74 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1878,3 +1878,28 @@ where exists (select 1 from j3 and t1.unique1 < 1; drop table j3; + +-- +-- Test Foreign key join estimation performs sanely for outer joins +-- + +begin work; + +create table fkest (a int, b int, c int unique, primary key(a,b)); +create table fkest1 (a int, b int, primary key(a,b)); + +insert into fkest select x/10,x%10, x from generate_Series(1,400) x; +insert into fkest1 select x/10,x%10 from generate_Series(1,400) x; + +alter table fkest1 add constraint fkest1_a_b_fkey foreign key (a,b) references fkest; + +analyze fkest; +analyze fkest1; + +explain (costs off) select * from fkest f +left join fkest1 f1 on f.a = f1.a and f.b = f1.b +left join fkest1 f2 on f.a = f2.a and f.b = f2.b +left join fkest1 f3 on f.a = f3.a and f.b = f3.b +where f.c = 1; + +rollback; -- 1.9.5.msysgit.1