From 755b6383aa8abaeff2e040841e80a2a1cf893e12 Mon Sep 17 00:00:00 2001 From: Jinbao Chen Date: Fri, 22 Nov 2019 16:55:09 +0800 Subject: [PATCH] Added the selection rate of the inner table non-empty bucket The planner will use big table as inner table in hash join if small table have fewer unique values. But this plan is much slower than using small table as inner table. In general, the cost of creating a hash table is higher than the cost of querying a hash table. So we tend to use small tables as internal tables. But if the average chain length of the bucket is large, the situation is just the opposite. If virtualbuckets is much larger than innerndistinct, and outerndistinct is much larger than innerndistinct. Then most tuples of the outer table will match the empty bucket. So when we calculate the cost of traversing the bucket, we need to ignore the tuple matching empty bucket. So we add the selection rate of the inner table non-empty bucket. The formula is: (1 - ((outerndistinct - innerndistinct)/outerndistinct)* ((virtualbuckets - innerndistinct)/virtualbuckets)) --- src/backend/optimizer/path/costsize.c | 73 ++++++++++++++++++++++++++++++++- src/test/regress/expected/join_hash.out | 24 +++++++++++ src/test/regress/sql/join_hash.sql | 19 +++++++++ 3 files changed, 115 insertions(+), 1 deletion(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index c5f6593485..2633b020ed 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -3382,6 +3382,9 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, double virtualbuckets; Selectivity innerbucketsize; Selectivity innermcvfreq; + double outerndistinct; + double innerndistinct; + Selectivity outer_match_nonempty_frac; ListCell *hcl; /* Mark the path with the correct row estimate */ @@ -3426,20 +3429,30 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, * because we avoid contaminating the cache with a value that's wrong for * non-unique-ified paths. */ + outerndistinct = 1.0; + if (IsA(inner_path, UniquePath)) { innerbucketsize = 1.0 / virtualbuckets; innermcvfreq = 0.0; + innerndistinct = inner_path_rows; } else { innerbucketsize = 1.0; innermcvfreq = 1.0; + innerndistinct = 1.0; + foreach(hcl, hashclauses) { RestrictInfo *restrictinfo = lfirst_node(RestrictInfo, hcl); Selectivity thisbucketsize; Selectivity thismcvfreq; + double thisinnerndistinct; + double thisouterndistinct; + VariableStatData vardatainner; + VariableStatData vardataouter; + bool isdefault; /* * First we have to figure out which side of the hashjoin clause @@ -3465,6 +3478,25 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, thisbucketsize = restrictinfo->right_bucketsize; } thismcvfreq = restrictinfo->right_mcvfreq; + + examine_variable(root, get_rightop(restrictinfo->clause), 0, &vardatainner); + thisinnerndistinct = get_variable_numdistinct(&vardatainner, &isdefault); + if (vardatainner.rel && vardatainner.rel->tuples > 0) + { + thisinnerndistinct *= vardatainner.rel->rows / vardatainner.rel->tuples; + thisinnerndistinct = clamp_row_est(thisinnerndistinct); + } + ReleaseVariableStats(vardatainner); + + /* lefthand side is outer */ + examine_variable(root, get_leftop(restrictinfo->clause), 0, &vardataouter); + thisouterndistinct = get_variable_numdistinct(&vardataouter, &isdefault); + if (vardataouter.rel && vardataouter.rel->tuples > 0) + { + thisinnerndistinct *= vardataouter.rel->rows / vardataouter.rel->tuples; + thisinnerndistinct = clamp_row_est(thisinnerndistinct); + } + ReleaseVariableStats(vardataouter); } else { @@ -3483,12 +3515,35 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, thisbucketsize = restrictinfo->left_bucketsize; } thismcvfreq = restrictinfo->left_mcvfreq; + + examine_variable(root, get_leftop(restrictinfo->clause), 0, &vardatainner); + thisinnerndistinct = get_variable_numdistinct(&vardatainner, &isdefault); + if (vardatainner.rel && vardatainner.rel->tuples > 0) + { + thisinnerndistinct *= vardatainner.rel->rows / vardatainner.rel->tuples; + thisinnerndistinct = clamp_row_est(thisinnerndistinct); + } + ReleaseVariableStats(vardatainner); + + /* righthand side is outers */ + examine_variable(root, get_rightop(restrictinfo->clause), 0, &vardataouter); + thisouterndistinct = get_variable_numdistinct(&vardataouter, &isdefault); + if (vardataouter.rel && vardataouter.rel->tuples > 0) + { + thisinnerndistinct *= vardataouter.rel->rows / vardataouter.rel->tuples; + thisinnerndistinct = clamp_row_est(thisinnerndistinct); + } + ReleaseVariableStats(vardataouter); } if (innerbucketsize > thisbucketsize) innerbucketsize = thisbucketsize; if (innermcvfreq > thismcvfreq) innermcvfreq = thismcvfreq; + if (outerndistinct < thisouterndistinct) + outerndistinct = thisouterndistinct; + if (innerndistinct < thisinnerndistinct) + innerndistinct = thisinnerndistinct; } } @@ -3516,6 +3571,21 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, /* CPU costs */ + /* + * If virtualbuckets is much larger than innerndistinct, and + * outerndistinct is much larger than innerndistinct. Then most + * tuples of the outer table will match the empty bucket. So when + * we calculate the cost of traversing the bucket, we need to ignore + * the tuple matching empty bucket. + */ + outer_match_nonempty_frac = 1.0; + if (virtualbuckets > innerndistinct * 2 && outerndistinct > innerndistinct * 2) + { + outer_match_nonempty_frac = (1 - + ((outerndistinct - innerndistinct)/outerndistinct)* + ((virtualbuckets - innerndistinct)/virtualbuckets)); + } + if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI || extra->inner_unique) @@ -3539,7 +3609,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0); startup_cost += hash_qual_cost.startup; - run_cost += hash_qual_cost.per_tuple * outer_matched_rows * + run_cost += hash_qual_cost.per_tuple * outer_matched_rows * outer_match_nonempty_frac * clamp_row_est(inner_path_rows * innerbucketsize * inner_scan_frac) * 0.5; /* @@ -3579,6 +3649,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, */ startup_cost += hash_qual_cost.startup; run_cost += hash_qual_cost.per_tuple * outer_path_rows * + outer_match_nonempty_frac * clamp_row_est(inner_path_rows * innerbucketsize) * 0.5; /* diff --git a/src/test/regress/expected/join_hash.out b/src/test/regress/expected/join_hash.out index 3a91c144a2..548f9724ad 100644 --- a/src/test/regress/expected/join_hash.out +++ b/src/test/regress/expected/join_hash.out @@ -1012,4 +1012,28 @@ WHERE text | t | hjtest_1 | hjtest_2 (1 row) +-- If virtualbuckets is much larger than innerndistinct, and +-- outerndistinct is much larger than innerndistinct. Then most +-- tuples of the outer table will match the empty bucket. So when +-- we calculate the cost of traversing the bucket, we need to ignore +-- the tuple matching empty bucket. +savepoint settings; +set max_parallel_workers_per_gather = 0; +create table join_hash_t_small(a int); +create table join_hash_t_big(b int); +insert into join_hash_t_small select i%100 from generate_series(0, 3000)i; +insert into join_hash_t_big select i%100000 from generate_series(1, 100000)i ; +analyze join_hash_t_small; +analyze join_hash_t_big; +explain (costs off) select * from join_hash_t_small, join_hash_t_big where a = b; + QUERY PLAN +-------------------------------------------------------- + Hash Join + Hash Cond: (join_hash_t_big.b = join_hash_t_small.a) + -> Seq Scan on join_hash_t_big + -> Hash + -> Seq Scan on join_hash_t_small +(5 rows) + +rollback to settings; ROLLBACK; diff --git a/src/test/regress/sql/join_hash.sql b/src/test/regress/sql/join_hash.sql index 68c1a8c7b6..154e9e0085 100644 --- a/src/test/regress/sql/join_hash.sql +++ b/src/test/regress/sql/join_hash.sql @@ -537,4 +537,23 @@ WHERE AND (SELECT hjtest_2.c * 5) < 55 AND hjtest_1.a <> hjtest_2.b; +-- If virtualbuckets is much larger than innerndistinct, and +-- outerndistinct is much larger than innerndistinct. Then most +-- tuples of the outer table will match the empty bucket. So when +-- we calculate the cost of traversing the bucket, we need to ignore +-- the tuple matching empty bucket. +savepoint settings; +set max_parallel_workers_per_gather = 0; +create table join_hash_t_small(a int); +create table join_hash_t_big(b int); + +insert into join_hash_t_small select i%100 from generate_series(0, 3000)i; +insert into join_hash_t_big select i%100000 from generate_series(1, 100000)i ; + +analyze join_hash_t_small; +analyze join_hash_t_big; + +explain (costs off) select * from join_hash_t_small, join_hash_t_big where a = b; +rollback to settings; + ROLLBACK; -- 2.14.2