diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index 277639f..1c4353a 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -1013,7 +1013,9 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel,
baserel->baserestrictinfo,
0,
JOIN_INNER,
- NULL);
+ NULL,
+ NULL,
+ false);
nrows = clamp_row_est(nrows);
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 03f1480..9f21297 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -591,7 +591,9 @@ postgresGetForeignRelSize(PlannerInfo *root,
fpinfo->local_conds,
baserel->relid,
JOIN_INNER,
- NULL);
+ NULL,
+ NULL,
+ false);
cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root);
@@ -2573,7 +2575,9 @@ estimate_path_cost_size(PlannerInfo *root,
local_param_join_conds,
foreignrel->relid,
JOIN_INNER,
- NULL);
+ NULL,
+ NULL,
+ false);
local_sel *= fpinfo->local_conds_sel;
rows = clamp_row_est(rows * local_sel);
@@ -4447,7 +4451,9 @@ postgresGetForeignJoinPaths(PlannerInfo *root,
fpinfo->local_conds,
0,
JOIN_INNER,
- NULL);
+ NULL,
+ NULL,
+ false);
cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root);
/*
@@ -4457,7 +4463,7 @@ postgresGetForeignJoinPaths(PlannerInfo *root,
if (!fpinfo->use_remote_estimate)
fpinfo->joinclause_sel = clauselist_selectivity(root, fpinfo->joinclauses,
0, fpinfo->jointype,
- extra->sjinfo);
+ extra->sjinfo, NULL, false);
/* Estimate costs for bare join relation */
estimate_path_cost_size(root, joinrel, NIL, NIL, &rows,
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index ac39c63..58b5ca9 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -4339,6 +4339,15 @@
+
+ stadependencies
+ pg_dependencies
+
+
+ Functional dependencies, serialized as pg_dependencies> type.
+
+
+
diff --git a/doc/src/sgml/planstats.sgml b/doc/src/sgml/planstats.sgml
index b73c66b..b61d159 100644
--- a/doc/src/sgml/planstats.sgml
+++ b/doc/src/sgml/planstats.sgml
@@ -446,6 +446,150 @@ rows = (outer_cardinality * inner_cardinality) * selectivity
in src/backend/utils/adt/selfuncs.c.
+
+ Functional Dependencies
+
+
+ The simplest type of extended statistics are functional dependencies,
+ used in definitions of database normal forms. When simplified, saying that
+ b> is functionally dependent on a> means that
+ knowledge of value of a> is sufficient to determine value of
+ b>.
+
+
+
+ In normalized databases, only functional dependencies on primary keys
+ and super keys are allowed. In practice however many data sets are not
+ fully normalized, for example due to intentional denormalization for
+ performance reasons.
+
+
+
+ Functional dependencies directly affect accuracy of the estimates, as
+ conditions on the dependent column(s) do not restrict the result set,
+ resulting in underestimates.
+
+
+
+ To inform the planner about the functional dependencies, or rather to
+ instruct it to search for them during ANALYZE>, we can use
+ the CREATE STATISTICS> command.
+
+
+CREATE TABLE t (a INT, b INT);
+INSERT INTO t SELECT i/100, i/100 FROM generate_series(1,10000) s(i);
+CREATE STATISTICS s1 WITH (dependencies) ON (a, b) FROM t;
+ANALYZE t;
+EXPLAIN ANALYZE SELECT * FROM t WHERE a = 1 AND b = 1;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------
+ Seq Scan on t (cost=0.00..195.00 rows=100 width=8) (actual time=0.095..3.118 rows=100 loops=1)
+ Filter: ((a = 1) AND (b = 1))
+ Rows Removed by Filter: 9900
+ Planning time: 0.367 ms
+ Execution time: 3.380 ms
+(5 rows)
+
+
+ The planner is now aware of the functional dependencies and considers
+ them when computing selectivity of the second condition. Running
+ the query without the statistics would lead to quite different estimates.
+
+
+DROP STATISTICS s1;
+EXPLAIN ANALYZE SELECT * FROM t WHERE a = 1 AND b = 1;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------
+ Seq Scan on t (cost=0.00..195.00 rows=1 width=8) (actual time=0.000..6.379 rows=100 loops=1)
+ Filter: ((a = 1) AND (b = 1))
+ Rows Removed by Filter: 9900
+ Planning time: 0.000 ms
+ Execution time: 6.379 ms
+(5 rows)
+
+
+
+
+ Similarly to per-column statistics, extended statistics are stored in
+ a system catalog called pg_statistic_ext, but
+ there is also a more convenient view pg_stats_ext.
+ To inspect the statistics s1 defined above,
+ you may do this:
+
+
+SELECT tablename, staname, attnums, depsbytes
+ FROM pg_stats_ext WHERE staname = 's1';
+ tablename | staname | attnums | depsbytes
+-----------+---------+---------+-----------
+ t | s1 | 1 2 | 40
+(1 row)
+
+
+ This shows that the statistics are defined on table t>,
+ attnums lists attribute numbers of columns
+ (references pg_attribute). It also shows
+ the length in bytes of the functional dependencies, as found by
+ ANALYZE> when serialized into a bytea> column.
+
+
+
+ When computing the selectivity, the planner inspects all conditions and
+ attempts to identify which conditions are already implied by some other
+ conditions, and eliminates them (but only for the estimation, all
+ conditions will be evaluated on tuples during execution). In the example
+ query, either of the conditions may get eliminated, improving the estimate.
+
+
+
+ Limitations of functional dependencies
+
+
+ Functional dependencies are a very simple type of statistics, and
+ as such have several limitations. The first limitation is that they
+ only work with simple equality conditions, comparing columns and constant
+ values. It's not possible to use them to eliminate equality conditions
+ comparing two columns or a column to an expression, range clauses,
+ LIKE> or any other type of conditions.
+
+
+
+ When eliminating the implied conditions, the planner assumes that the
+ conditions are compatible. Consider the following example, violating
+ this assumption:
+
+
+EXPLAIN ANALYZE SELECT * FROM t WHERE a = 1 AND b = 10;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------
+ Seq Scan on t (cost=0.00..195.00 rows=100 width=8) (actual time=2.992..2.992 rows=0 loops=1)
+ Filter: ((a = 1) AND (b = 10))
+ Rows Removed by Filter: 10000
+ Planning time: 0.232 ms
+ Execution time: 3.033 ms
+(5 rows)
+
+
+ While there are no rows with such combination of values, the planner
+ is unable to verify whether the values match - it only knows that
+ the columns are functionally dependent.
+
+
+
+ This assumption is more about queries executed on the database - in many
+ cases it's actually satisfied (e.g. when the GUI only allows selecting
+ compatible values). But if that's not the case, functional dependencies
+ may not be a viable option.
+
+
+
+ For additional information about functional dependencies, see
+ src/backend/statistics/README.dependencies>.
+
+
+
+
+
+
diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml
index 60184a3..6600edf 100644
--- a/doc/src/sgml/ref/create_statistics.sgml
+++ b/doc/src/sgml/ref/create_statistics.sgml
@@ -21,8 +21,9 @@ PostgreSQL documentation
-CREATE STATISTICS [ IF NOT EXISTS ] statistics_name ON (
- column_name, column_name [, ...])
+CREATE STATISTICS [ IF NOT EXISTS ] statistics_name
+ WITH ( option [= value] [, ... ] )
+ ON ( column_name, column_name [, ...])
FROM table_name
@@ -94,6 +95,41 @@ CREATE STATISTICS [ IF NOT EXISTS ] statistics_na
+
+ Parameters
+
+
+ statistics parameters
+
+
+
+ The WITH> clause can specify options>
+ for statistics. The currently available parameters are listed below.
+
+
+
+
+
+ dependencies> (boolean>)
+
+
+ Enables functional dependencies for the statistics.
+
+
+
+
+
+ ndistinct> (boolean>)
+
+
+ Enables ndistinct coefficients for the statistics.
+
+
+
+
+
+
+
@@ -122,7 +158,7 @@ CREATE TABLE t1 (
INSERT INTO t1 SELECT i/100, i/500
FROM generate_series(1,1000000) s(i);
-CREATE STATISTICS s1 ON (a, b) FROM t1;
+CREATE STATISTICS s1 WITH (dependencies) ON (a, b) FROM t1;
ANALYZE t1;
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index d357c8b..c19e68e 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -192,7 +192,8 @@ CREATE VIEW pg_stats_ext AS
C.relname AS tablename,
S.staname AS staname,
S.stakeys AS attnums,
- length(s.standistinct) AS ndistbytes
+ length(s.standistinct::bytea) AS ndistbytes,
+ length(S.stadependencies::bytea) AS depsbytes
FROM (pg_statistic_ext S JOIN pg_class C ON (C.oid = S.starelid))
LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace);
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
index 0750329..8d483db 100644
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -62,10 +62,11 @@ CreateStatistics(CreateStatsStmt *stmt)
Oid relid;
ObjectAddress parentobject,
childobject;
- Datum types[1]; /* only ndistinct defined now */
+ Datum types[2]; /* one for each possible type of statistics */
int ntypes;
ArrayType *staenabled;
bool build_ndistinct;
+ bool build_dependencies;
bool requested_type = false;
Assert(IsA(stmt, CreateStatsStmt));
@@ -159,7 +160,7 @@ CreateStatistics(CreateStatsStmt *stmt)
errmsg("statistics require at least 2 columns")));
/*
- * Sort the attnums, which makes detecting duplicies somewhat easier, and
+ * Sort the attnums, which makes detecting duplicities somewhat easier, and
* it does not hurt (it does not affect the efficiency, unlike for
* indexes, for example).
*/
@@ -182,6 +183,7 @@ CreateStatistics(CreateStatsStmt *stmt)
* recognized.
*/
build_ndistinct = false;
+ build_dependencies = false;
foreach(l, stmt->options)
{
DefElem *opt = (DefElem *) lfirst(l);
@@ -191,6 +193,11 @@ CreateStatistics(CreateStatsStmt *stmt)
build_ndistinct = defGetBoolean(opt);
requested_type = true;
}
+ else if (strcmp(opt->defname, "dependencies") == 0)
+ {
+ build_dependencies = defGetBoolean(opt);
+ requested_type = true;
+ }
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -199,12 +206,17 @@ CreateStatistics(CreateStatsStmt *stmt)
}
/* If no statistic type was specified, build them all. */
if (!requested_type)
+ {
build_ndistinct = true;
+ build_dependencies = true;
+ }
/* construct the char array of enabled statistic types */
ntypes = 0;
if (build_ndistinct)
types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT);
+ if (build_dependencies)
+ types[ntypes++] = CharGetDatum(STATS_EXT_DEPENDENCIES);
Assert(ntypes > 0);
staenabled = construct_array(types, ntypes, CHAROID, 1, true, 'c');
@@ -222,6 +234,7 @@ CreateStatistics(CreateStatsStmt *stmt)
/* no statistics build yet */
nulls[Anum_pg_statistic_ext_standistinct - 1] = true;
+ nulls[Anum_pg_statistic_ext_stadependencies - 1] = true;
/* insert it into pg_statistic_ext */
statrel = heap_open(StatisticExtRelationId, RowExclusiveLock);
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index af2934a..369e481 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -14,14 +14,20 @@
*/
#include "postgres.h"
+#include "access/sysattr.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_statistic_ext.h"
#include "nodes/makefuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/plancat.h"
+#include "optimizer/var.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
+#include "statistics/statistics.h"
+#include "utils/typcache.h"
/*
@@ -40,6 +46,22 @@ typedef struct RangeQueryClause
static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
bool varonleft, bool isLTsel, Selectivity s2);
+static MVDependency *find_strongest_dependency(StatisticExtInfo *stats,
+ MVDependencies *dependencies,
+ Bitmapset *attnums);
+static Selectivity clauselist_ext_selectivity_deps(PlannerInfo *root, Index relid,
+ List *clauses, StatisticExtInfo *stats,
+ Index varRelid, JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ RelOptInfo *rel);
+static Bitmapset *collect_ext_attnums(List *clauses, Index relid);
+static int count_attnums_covered_by_stats(StatisticExtInfo *info, Bitmapset *attnums);
+static StatisticExtInfo *choose_ext_statistics(List *stats, Bitmapset *attnums, char requiredkind);
+static List *clauselist_ext_split(PlannerInfo *root, Index relid,
+ List *clauses, List **mvclauses,
+ StatisticExtInfo *stats);
+static bool clause_is_ext_compatible(Node *clause, Index relid, AttrNumber *attnum);
+static bool has_stats_of_kind(List *stats, char requiredkind);
/****************************************************************************
@@ -60,23 +82,33 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
* subclauses. However, that's only right if the subclauses have independent
* probabilities, and in reality they are often NOT independent. So,
* we want to be smarter where we can.
-
- * Currently, the only extra smarts we have is to recognize "range queries",
- * such as "x > 34 AND x < 42". Clauses are recognized as possible range
- * query components if they are restriction opclauses whose operators have
- * scalarltsel() or scalargtsel() as their restriction selectivity estimator.
- * We pair up clauses of this form that refer to the same variable. An
- * unpairable clause of this kind is simply multiplied into the selectivity
- * product in the normal way. But when we find a pair, we know that the
- * selectivities represent the relative positions of the low and high bounds
- * within the column's range, so instead of figuring the selectivity as
- * hisel * losel, we can figure it as hisel + losel - 1. (To visualize this,
- * see that hisel is the fraction of the range below the high bound, while
- * losel is the fraction above the low bound; so hisel can be interpreted
- * directly as a 0..1 value but we need to convert losel to 1-losel before
- * interpreting it as a value. Then the available range is 1-losel to hisel.
- * However, this calculation double-excludes nulls, so really we need
- * hisel + losel + null_frac - 1.)
+ *
+ * When 'tryextstats' is true, and 'rel' is not null, we'll try to apply
+ * selectivity estimates using any extended statistcs on 'rel'. Currently this
+ * is limited only to base relations with an rtekind of RTE_RELATION.
+ *
+ * If we identify such extended statistics apply, we try to apply them.
+ * Currently we only have (soft) functional dependencies, so we try to reduce
+ * the list of clauses.
+ *
+ * Then we remove the clauses estimated using extended stats, and process
+ * the rest of the clauses using the regular per-column stats.
+ *
+ * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
+ * are recognized as possible range query components if they are restriction
+ * opclauses whose operators have scalarltsel() or scalargtsel() as their
+ * restriction selectivity estimator. We pair up clauses of this form that
+ * refer to the same variable. An unpairable clause of this kind is simply
+ * multiplied into the selectivity product in the normal way. But when we
+ * find a pair, we know that the selectivities represent the relative
+ * positions of the low and high bounds within the column's range, so instead
+ * of figuring the selectivity as hisel * losel, we can figure it as hisel +
+ * losel - 1. (To visualize this, see that hisel is the fraction of the range
+ * below the high bound, while losel is the fraction above the low bound; so
+ * hisel can be interpreted directly as a 0..1 value but we need to convert
+ * losel to 1-losel before interpreting it as a value. Then the available
+ * range is 1-losel to hisel. However, this calculation double-excludes
+ * nulls, so really we need hisel + losel + null_frac - 1.)
*
* If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
* and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
@@ -93,19 +125,71 @@ clauselist_selectivity(PlannerInfo *root,
List *clauses,
int varRelid,
JoinType jointype,
- SpecialJoinInfo *sjinfo)
+ SpecialJoinInfo *sjinfo,
+ RelOptInfo *rel,
+ bool tryextstats)
{
Selectivity s1 = 1.0;
RangeQueryClause *rqlist = NULL;
ListCell *l;
/*
- * If there's exactly one clause, then no use in trying to match up pairs,
- * so just go directly to clause_selectivity().
+ * If there's exactly one clause, then extended statistics is futile
+ * at this level (we might be able to apply them later if it's AND/OR
+ * clause). So just go directly to clause_selectivity().
*/
if (list_length(clauses) == 1)
return clause_selectivity(root, (Node *) linitial(clauses),
- varRelid, jointype, sjinfo);
+ varRelid, jointype, sjinfo, rel, tryextstats);
+
+ /*
+ * Check for common reasons where we can't apply multivariate dependency
+ * statistics. We want to be as cheap as possible here as most likely
+ * we'll not be using multivariate statistics in most cases.
+ */
+ if (tryextstats && rel && rel->rtekind == RTE_RELATION &&
+ rel->statlist != NIL &&
+ has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
+ {
+ Index relid = rel->relid;
+ Bitmapset *mvattnums;
+
+ /*
+ * Now that we've validated that we actually have some multivariate
+ * statistics, we'll want to check that the clauses reference more
+ * than a single column.
+ */
+
+ /* extract all of the attribute attnums into a bitmap set. */
+ mvattnums = collect_ext_attnums(clauses, relid);
+
+ /* we can't do anything with mv stats unless we got two or more */
+ if (bms_num_members(mvattnums) >= 2)
+ {
+ StatisticExtInfo *stat;
+
+ /* and search for the statistic covering the most attributes */
+ stat = choose_ext_statistics(rel->statlist, mvattnums,
+ STATS_EXT_DEPENDENCIES);
+
+ if (stat != NULL) /* we have a matching stats */
+ {
+ /* clauses compatible with multi-variate stats */
+ List *mvclauses = NIL;
+
+ /* split the clauselist into regular and mv-clauses */
+ clauses = clauselist_ext_split(root, relid, clauses,
+ &mvclauses, stat);
+
+ /* Empty list of clauses is a clear sign something went wrong. */
+ Assert(list_length(mvclauses));
+
+ /* compute the extended stats (dependencies) */
+ s1 *= clauselist_ext_selectivity_deps(root, relid, mvclauses, stat,
+ varRelid, jointype, sjinfo, rel);
+ }
+ }
+ }
/*
* Initial scan over clauses. Anything that doesn't look like a potential
@@ -119,7 +203,8 @@ clauselist_selectivity(PlannerInfo *root,
Selectivity s2;
/* Always compute the selectivity using clause_selectivity */
- s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);
+ s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo, rel,
+ tryextstats);
/*
* Check for being passed a RestrictInfo.
@@ -484,7 +569,9 @@ clause_selectivity(PlannerInfo *root,
Node *clause,
int varRelid,
JoinType jointype,
- SpecialJoinInfo *sjinfo)
+ SpecialJoinInfo *sjinfo,
+ RelOptInfo *rel,
+ bool tryextstats)
{
Selectivity s1 = 0.5; /* default for any unhandled clause type */
RestrictInfo *rinfo = NULL;
@@ -604,7 +691,9 @@ clause_selectivity(PlannerInfo *root,
(Node *) get_notclausearg((Expr *) clause),
varRelid,
jointype,
- sjinfo);
+ sjinfo,
+ rel,
+ tryextstats);
}
else if (and_clause(clause))
{
@@ -613,7 +702,9 @@ clause_selectivity(PlannerInfo *root,
((BoolExpr *) clause)->args,
varRelid,
jointype,
- sjinfo);
+ sjinfo,
+ rel,
+ tryextstats);
}
else if (or_clause(clause))
{
@@ -632,7 +723,9 @@ clause_selectivity(PlannerInfo *root,
(Node *) lfirst(arg),
varRelid,
jointype,
- sjinfo);
+ sjinfo,
+ rel,
+ tryextstats);
s1 = s1 + s2 - s1 * s2;
}
@@ -725,7 +818,9 @@ clause_selectivity(PlannerInfo *root,
(Node *) ((RelabelType *) clause)->arg,
varRelid,
jointype,
- sjinfo);
+ sjinfo,
+ rel,
+ tryextstats);
}
else if (IsA(clause, CoerceToDomain))
{
@@ -734,7 +829,9 @@ clause_selectivity(PlannerInfo *root,
(Node *) ((CoerceToDomain *) clause)->arg,
varRelid,
jointype,
- sjinfo);
+ sjinfo,
+ rel,
+ tryextstats);
}
else
{
@@ -763,3 +860,542 @@ clause_selectivity(PlannerInfo *root,
return s1;
}
+
+/*
+ * find_strongest_dependency
+ * find the strongest dependency on the attributes
+ *
+ * When applying functional dependencies, we start with the strongest ones
+ * strongest dependencies. That is, we select the dependency that:
+ *
+ * (a) has all attributes covered by equality clauses
+ *
+ * (b) has the most attributes
+ *
+ * (c) has the highest degree of validity
+ *
+ * This guarantees that we eliminate the most redundant conditions first
+ * (see the comment at clauselist_ext_selectivity_deps).
+ */
+static MVDependency *
+find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies,
+ Bitmapset *attnums)
+{
+ int i;
+ MVDependency *strongest = NULL;
+
+ /* number of attnums in clauses */
+ int nattnums = bms_num_members(attnums);
+
+ /*
+ * Iterate over the MVDependency items and find the strongest one from
+ * the fully-matched dependencies. We do the cheap checks first, before
+ * matching it against the attnums.
+ */
+ for (i = 0; i < dependencies->ndeps; i++)
+ {
+ MVDependency *dependency = dependencies->deps[i];
+
+ /*
+ * Skip dependencies referencing more attributes than available clauses,
+ * as those can't be fully matched.
+ */
+ if (dependency->nattributes > nattnums)
+ continue;
+
+ /* We can skip dependencies on fewer attributes than the best one. */
+ if (strongest && (strongest->nattributes > dependency->nattributes))
+ continue;
+
+ /* And also weaker dependencies on the same number of attributes. */
+ if (strongest &&
+ (strongest->nattributes == dependency->nattributes) &&
+ (strongest->degree > dependency->degree))
+ continue;
+
+ /*
+ * Check if the depdendency is full matched to the attnums. If so we
+ * can save it as the strongest match, since we rejected any weaker
+ * matches above.
+ */
+ if (dependency_is_fully_matched(dependency, attnums))
+ strongest = dependency;
+ }
+
+ return strongest;
+}
+
+/*
+ * clauselist_ext_selectivity_deps
+ * estimate selectivity using functional dependencies
+ *
+ * Given equality clauses on attributes (a,b) we find the strongest dependency
+ * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
+ * dependency, we then combine the per-clause selectivities using the formula
+ *
+ * P(a,b) = P(a) * [f + (1-f)*P(b)]
+ *
+ * where 'f' is the degree of the dependency.
+ *
+ * With clauses on more than two attributes, the dependencies are applied
+ * recursively, starting with the widest/strongest dependencies. For example
+ * P(a,b,c) is first split like this:
+ *
+ * P(a,b,c) = P(a,b) * [f + (1-f)*P(c)]
+ *
+ * assuming (a,b=>c) is the strongest dependency.
+ */
+static Selectivity
+clauselist_ext_selectivity_deps(PlannerInfo *root, Index relid,
+ List *clauses, StatisticExtInfo *stats,
+ Index varRelid, JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ RelOptInfo *rel)
+{
+ ListCell *lc;
+ Selectivity s1 = 1.0;
+ MVDependencies *dependencies;
+
+ Assert(stats->kind == STATS_EXT_DEPENDENCIES);
+
+ /* load the dependency items stored in the statistics */
+ dependencies = staext_dependencies_load(stats->statOid);
+
+ Assert(dependencies);
+
+ /*
+ * Apply the dependencies recursively, starting with the widest/strongest
+ * ones, and proceeding to the smaller/weaker ones. At the end of each
+ * round we factor in the selectivity of clauses on the implied attribute,
+ * and remove the clauses from the list.
+ */
+ while (true)
+ {
+ Selectivity s2 = 1.0;
+ Bitmapset *attnums;
+ MVDependency *dependency;
+
+ /* clauses remaining after removing those on the "implied" attribute */
+ List *clauses_filtered = NIL;
+
+ attnums = collect_ext_attnums(clauses, relid);
+
+ /* no point in looking for dependencies with fewer than 2 attributes */
+ if (bms_num_members(attnums) < 2)
+ break;
+
+ /* the widest/strongest dependency, fully matched by clauses */
+ dependency = find_strongest_dependency(stats, dependencies, attnums);
+
+ /* if no suitable dependency was found, we're done */
+ if (!dependency)
+ break;
+
+ /*
+ * We found an applicable dependency, so find all the clauses on the
+ * implied attribute - with dependency (a,b => c) we look for
+ * clauses on 'c'.
+ *
+ * We only expect to find one such clause, as the optimizer will
+ * detect conflicting clauses like
+ *
+ * (b=1) AND (b=2)
+ *
+ * and eliminate them from the list of clauses.
+ */
+ foreach(lc, clauses)
+ {
+ AttrNumber attnum_clause = InvalidAttrNumber;
+ Node *clause = (Node *) lfirst(lc);
+
+ /*
+ * Get the attnum referenced by the clause. At this point we should
+ * only see equality clauses compatible with functional dependencies,
+ * so just error out if we stumble upon something else.
+ */
+ if (!clause_is_ext_compatible(clause, relid, &attnum_clause))
+ elog(ERROR, "clause not compatible with functional dependencies");
+
+ Assert(AttributeNumberIsValid(attnum_clause));
+
+ /*
+ * If the clause is not on the implied attribute, add it to the list
+ * of filtered clauses (for the next round) and continue with the
+ * next one.
+ */
+ if (!dependency_implies_attribute(dependency, attnum_clause))
+ {
+ clauses_filtered = lappend(clauses_filtered, clause);
+ continue;
+ }
+
+ /*
+ * Otherwise compute selectivity of the clause, and multiply it with
+ * other clauses on the same attribute.
+ */
+ s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo,
+ rel, false);
+ }
+
+ /*
+ * Now factor in the selectivity for all the "implied" clauses into the
+ * final one, using this formula:
+ *
+ * P(a,b) = P(a) * (f + (1-f) * P(b))
+ *
+ * where 'f' is the degree of validity of the dependency.
+ */
+ s1 *= (dependency->degree + (1 - dependency->degree) * s2);
+
+ /* And only keep the filtered clauses for the next round. */
+ clauses = clauses_filtered;
+ }
+
+ pfree(dependencies);
+
+ /* And now simply multiply with selectivities of the remaining clauses. */
+ foreach (lc, clauses)
+ {
+ Node *clause = (Node *) lfirst(lc);
+
+ s1 *= clause_selectivity(root, clause, varRelid, jointype, sjinfo,
+ rel, false);
+ }
+
+ return s1;
+}
+
+/*
+ * collect_ext_attnums
+ * collect attnums from clauses compatible with extended stats
+ *
+ * Functional dependencies only work with equality claues of the form
+ *
+ * Var = Const
+ *
+ * so walk the clause list and collect attnums from such clauses.
+ */
+static Bitmapset *
+collect_ext_attnums(List *clauses, Index relid)
+{
+ Bitmapset *attnums = NULL;
+ ListCell *l;
+
+ /*
+ * Walk through the clauses and identify the ones we can estimate using
+ * extended stats, and remember the relid/columns. We'll then
+ * cross-check if we have suitable stats, and only if needed we'll split
+ * the clauses into extended and regular lists.
+ *
+ * For now we're only interested in RestrictInfo nodes with nested OpExpr,
+ * using either a range or equality.
+ */
+ foreach(l, clauses)
+ {
+ AttrNumber attnum;
+ Node *clause = (Node *) lfirst(l);
+
+ /* ignore the result for now - we only need the info */
+ if (clause_is_ext_compatible(clause, relid, &attnum))
+ attnums = bms_add_member(attnums, attnum);
+ }
+
+ /*
+ * If there are not at least two attributes referenced by the clause(s),
+ * we can throw everything out (as we'll revert to simple stats).
+ */
+ if (bms_num_members(attnums) <= 1)
+ {
+ bms_free(attnums);
+ return NULL;
+ }
+
+ return attnums;
+}
+
+/*
+ * count_attnums_covered_by_stats
+ * return the number of 'attnums' matched to this extended statistics
+ * object
+ */
+static int
+count_attnums_covered_by_stats(StatisticExtInfo *info, Bitmapset *attnums)
+{
+ Bitmapset *covered;
+ int ncovered;
+
+ covered = bms_intersect(attnums, info->keys);
+ ncovered = bms_num_members(covered);
+ bms_free(covered);
+
+ return ncovered;
+}
+
+/*
+ * We're looking for statistics matching at least 2 attributes, referenced in
+ * clauses compatible with extended statistics. The current selection
+ * criteria is very simple - we choose the statistics referencing the most
+ * attributes.
+ *
+ * If there are multiple statistics referencing the same number of columns
+ * (from the clauses), the one with fewer source columns (as listed in the
+ * CREATE STATISTICS command) wins, based on the assumption that the object
+ * is either smaller or more accurate. Else the first one wins.
+ */
+static StatisticExtInfo *
+choose_ext_statistics(List *stats, Bitmapset *attnums, char requiredkind)
+{
+ ListCell *lc;
+ StatisticExtInfo *choice = NULL;
+ int current_matches = 2; /* goal #1: maximize */
+ int current_dims = (STATS_MAX_DIMENSIONS + 1); /* goal #2: minimize */
+
+ foreach(lc, stats)
+ {
+ StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
+ int matches;
+ int numattrs;
+
+ /* skip statistics that are not the correct type */
+ if (info->kind != requiredkind)
+ continue;
+
+ /* determine how many attributes of these stats can be matched to */
+ matches = count_attnums_covered_by_stats(info, attnums);
+
+ /*
+ * save the actual number of keys in the stats so that we can choose
+ * the narrowest stats with the most matching keys.
+ */
+ numattrs = bms_num_members(info->keys);
+
+ /*
+ * Use these statistics when it increases the number of matched clauses
+ * or when it matches the same number of attributes but these stats
+ * have fewer keys than any previous match.
+ */
+ if (matches > current_matches ||
+ (matches == current_matches && current_dims > numattrs))
+ {
+ choice = info;
+ current_matches = matches;
+ current_dims = numattrs;
+ }
+ }
+
+ return choice;
+}
+
+
+/*
+ * clauselist_ext_split
+ * split the clause list into a part to be estimated using the provided
+ * statistics, and remaining clauses (estimated in some other way)
+ */
+static List *
+clauselist_ext_split(PlannerInfo *root, Index relid,
+ List *clauses, List **mvclauses,
+ StatisticExtInfo *stats)
+{
+ ListCell *l;
+ List *non_mvclauses = NIL;
+
+ /* erase the list of mv-compatible clauses */
+ *mvclauses = NIL;
+
+ foreach(l, clauses)
+ {
+ bool match = false; /* by default not mv-compatible */
+ AttrNumber attnum = InvalidAttrNumber;
+ Node *clause = (Node *) lfirst(l);
+
+ if (clause_is_ext_compatible(clause, relid, &attnum))
+ {
+ /* are all the attributes part of the selected stats? */
+ if (bms_is_member(attnum, stats->keys))
+ match = true;
+ }
+
+ /*
+ * The clause matches the selected stats, so put it to the list of
+ * mv-compatible clauses. Otherwise, keep it in the list of 'regular'
+ * clauses (that may be selected later).
+ */
+ if (match)
+ *mvclauses = lappend(*mvclauses, clause);
+ else
+ non_mvclauses = lappend(non_mvclauses, clause);
+ }
+
+ /*
+ * Perform regular estimation using the clauses incompatible with the
+ * chosen histogram (or MV stats in general).
+ */
+ return non_mvclauses;
+
+}
+
+typedef struct
+{
+ Index varno; /* relid we're interested in */
+ Bitmapset *varattnos; /* attnums referenced by the clauses */
+} mv_compatible_context;
+
+/*
+ * Recursive walker that checks compatibility of the clause with extended
+ * statistics, and collects attnums from the Vars.
+ */
+static bool
+mv_compatible_walker(Node *node, mv_compatible_context *context)
+{
+ if (node == NULL)
+ return false;
+
+ if (IsA(node, RestrictInfo))
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) node;
+
+ /* Pseudoconstants are not really interesting here. */
+ if (rinfo->pseudoconstant)
+ return true;
+
+ /* clauses referencing multiple varnos are incompatible */
+ if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
+ return true;
+
+ /* check the clause inside the RestrictInfo */
+ return mv_compatible_walker((Node *) rinfo->clause, context);
+ }
+
+ if (IsA(node, Var))
+ {
+ Var *var = (Var *) node;
+
+ /*
+ * Also, the variable needs to reference the right relid (this might
+ * be unnecessary given the other checks, but let's be sure).
+ */
+ if (var->varno != context->varno)
+ return true;
+
+ /* Also skip system attributes (we don't allow stats on those). */
+ if (!AttrNumberIsForUserDefinedAttr(var->varattno))
+ return true;
+
+ /* Seems fine, so let's remember the attnum. */
+ context->varattnos = bms_add_member(context->varattnos, var->varattno);
+
+ return false;
+ }
+
+ /*
+ * And finally the operator expressions - we only allow simple expressions
+ * with two arguments, where one is a Var and the other is a constant, and
+ * it's a simple comparison (which we detect using estimator function).
+ */
+ if (is_opclause(node))
+ {
+ OpExpr *expr = (OpExpr *) node;
+ Var *var;
+ bool varonleft = true;
+ bool ok;
+
+ /* Only expressions with two arguments are considered compatible. */
+ if (list_length(expr->args) != 2)
+ return true;
+
+ /* see if it actually has the right */
+ ok = (NumRelids((Node *) expr) == 1) &&
+ (is_pseudo_constant_clause(lsecond(expr->args)) ||
+ (varonleft = false,
+ is_pseudo_constant_clause(linitial(expr->args))));
+
+ /* unsupported structure (two variables or so) */
+ if (!ok)
+ return true;
+
+ /*
+ * If it's not "=" operator, just ignore the clause, as it's not
+ * compatible with functinal dependencies. Otherwise note the relid
+ * and attnum for the variable.
+ *
+ * This uses the function for estimating selectivity, not the operator
+ * directly (a bit awkward, but well ...).
+ */
+ switch (get_oprrest(expr->opno))
+ {
+ case F_EQSEL:
+
+ /* equality conditions are compatible with all statistics */
+ break;
+
+ default:
+
+ /* unknown estimator */
+ return true;
+ }
+
+ var = (varonleft) ? linitial(expr->args) : lsecond(expr->args);
+
+ return mv_compatible_walker((Node *) var, context);
+ }
+
+ /* Node not explicitly supported, so terminate */
+ return true;
+}
+
+/*
+ * clause_is_ext_compatible
+ * decide if the clause is compatible with extended statistics
+ *
+ * Determines whether the clause is compatible with extended stats,
+ * and if it is, returns some additional information - varno (index
+ * into simple_rte_array) and a bitmap of attributes. This is then
+ * used to fetch related extended statistics.
+ *
+ * At this moment we only support basic conditions of the form
+ *
+ * variable OP constant
+ *
+ * where OP is '=' (determined by looking at the associated function
+ * for estimating selectivity, just like with the single-dimensional
+ * case).
+ */
+static bool
+clause_is_ext_compatible(Node *clause, Index relid, AttrNumber *attnum)
+{
+ mv_compatible_context context;
+
+ context.varno = relid;
+ context.varattnos = NULL; /* no attnums */
+
+ if (mv_compatible_walker(clause, &context))
+ return false;
+
+ /* remember the newly collected attnums */
+ *attnum = bms_singleton_member(context.varattnos);
+
+ return true;
+}
+
+/*
+ * has_stats_of_kind
+ * check that the list contains statistic of a given type
+ *
+ * Check for any stats with the required kind.
+ */
+static bool
+has_stats_of_kind(List *stats, char requiredkind)
+{
+ ListCell *s;
+
+ foreach(s, stats)
+ {
+ StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(s);
+
+ if (stat->kind == requiredkind)
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 92de2b7..165729b 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3713,7 +3713,9 @@ compute_semi_anti_join_factors(PlannerInfo *root,
joinquals,
0,
jointype,
- sjinfo);
+ sjinfo,
+ NULL,
+ false);
/*
* Also get the normal inner-join selectivity of the join clauses.
@@ -3736,7 +3738,9 @@ compute_semi_anti_join_factors(PlannerInfo *root,
joinquals,
0,
JOIN_INNER,
- &norm_sjinfo);
+ &norm_sjinfo,
+ NULL,
+ false);
/* Avoid leaking a lot of ListCells */
if (jointype == JOIN_ANTI)
@@ -3903,7 +3907,8 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals)
Node *qual = (Node *) lfirst(l);
/* Note that clause_selectivity will be able to cache its result */
- selec *= clause_selectivity(root, qual, 0, JOIN_INNER, &sjinfo);
+ selec *= clause_selectivity(root, qual, 0, JOIN_INNER, &sjinfo, NULL,
+ false);
}
/* Apply it to the input relation sizes */
@@ -3939,7 +3944,9 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel)
rel->baserestrictinfo,
0,
JOIN_INNER,
- NULL);
+ NULL,
+ rel,
+ true); /* try ext stats */
rel->rows = clamp_row_est(nrows);
@@ -3976,7 +3983,9 @@ get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel,
allclauses,
rel->relid, /* do not use 0! */
JOIN_INNER,
- NULL);
+ NULL,
+ rel,
+ true); /* try ext stats */
nrows = clamp_row_est(nrows);
/* For safety, make sure result is not more than the base estimate */
if (nrows > rel->rows)
@@ -4142,12 +4151,16 @@ calc_joinrel_size_estimate(PlannerInfo *root,
joinquals,
0,
jointype,
- sjinfo);
+ sjinfo,
+ NULL,
+ false);
pselec = clauselist_selectivity(root,
pushedquals,
0,
jointype,
- sjinfo);
+ sjinfo,
+ NULL,
+ false);
/* Avoid leaking a lot of ListCells */
list_free(joinquals);
@@ -4159,7 +4172,9 @@ calc_joinrel_size_estimate(PlannerInfo *root,
restrictlist,
0,
jointype,
- sjinfo);
+ sjinfo,
+ NULL,
+ false);
pselec = 0.0; /* not used, keep compiler quiet */
}
@@ -4454,7 +4469,7 @@ get_foreign_key_join_selectivity(PlannerInfo *root,
Selectivity csel;
csel = clause_selectivity(root, (Node *) rinfo,
- 0, jointype, sjinfo);
+ 0, jointype, sjinfo, NULL, false);
thisfksel = Min(thisfksel, csel);
}
fkselec *= thisfksel;
diff --git a/src/backend/optimizer/util/orclauses.c b/src/backend/optimizer/util/orclauses.c
index 9cbcaed..633e7d3 100644
--- a/src/backend/optimizer/util/orclauses.c
+++ b/src/backend/optimizer/util/orclauses.c
@@ -280,7 +280,7 @@ consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
* saving work later.)
*/
or_selec = clause_selectivity(root, (Node *) or_rinfo,
- 0, JOIN_INNER, NULL);
+ 0, JOIN_INNER, NULL, rel, true);
/*
* The clause is only worth adding to the query if it rejects a useful
@@ -344,7 +344,7 @@ consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
/* Compute inner-join size */
orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
- 0, JOIN_INNER, &sjinfo);
+ 0, JOIN_INNER, &sjinfo, NULL, false);
/* And hack cached selectivity so join size remains the same */
join_or_rinfo->norm_selec = orig_selec / or_selec;
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index cc88dcc..e35ea0d 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -1308,6 +1308,18 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
stainfos = lcons(info, stainfos);
}
+ if (statext_is_kind_built(htup, STATS_EXT_DEPENDENCIES))
+ {
+ StatisticExtInfo *info = makeNode(StatisticExtInfo);
+
+ info->statOid = statOid;
+ info->rel = rel;
+ info->kind = STATS_EXT_DEPENDENCIES;
+ info->keys = bms_copy(keys);
+
+ stainfos = lcons(info, stainfos);
+ }
+
ReleaseSysCache(htup);
bms_free(keys);
}
diff --git a/src/backend/statistics/Makefile b/src/backend/statistics/Makefile
index b3615bd..3404e45 100644
--- a/src/backend/statistics/Makefile
+++ b/src/backend/statistics/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/statistics
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-OBJS = extended_stats.o mvdistinct.o
+OBJS = extended_stats.o dependencies.o mvdistinct.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/statistics/README b/src/backend/statistics/README
index beb7c24..5ffc76e 100644
--- a/src/backend/statistics/README
+++ b/src/backend/statistics/README
@@ -8,9 +8,74 @@ not true, resulting in estimation errors.
Extended statistics track different types of dependencies between the columns,
hopefully improving the estimates and producing better plans.
-Currently we only have one type of extended statistics - ndistinct
-coefficients, and we use it to improve estimates of grouping queries. See
-README.ndistinct for details.
+
+Types of statistics
+-------------------
+
+There are two kinds of extended statistics:
+
+ (a) ndistinct coefficients
+
+ (b) soft functional dependencies (README.dependencies)
+
+
+Compatible clause types
+-----------------------
+
+Each type of statistics may be used to estimate some subset of clause types.
+
+ (a) functional dependencies - equality clauses (AND), possibly IS NULL
+
+Currently, only OpExprs in the form Var op Const, or Const op Var are
+supported, however it's feasible to expand the code later to also estimate the
+selectivities on clauses such as Var op Var.
+
+
+Complex clauses
+---------------
+
+We also support estimating more complex clauses - essentially AND/OR clauses
+with (Var op Const) as leaves, as long as all the referenced attributes are
+covered by a single statistics.
+
+For example this condition
+
+ (a=1) AND ((b=2) OR ((c=3) AND (d=4)))
+
+may be estimated using statistics on (a,b,c,d). If we only have statistics on
+(b,c,d) we may estimate the second part, and estimate (a=1) using simple stats.
+
+If we only have statistics on (a,b,c) we can't apply it at all at this point,
+but it's worth pointing out clauselist_selectivity() works recursively and when
+handling the second part (the OR-clause), we'll be able to apply the statistics.
+
+Note: The multi-statistics estimation patch also makes it possible to pass some
+clauses as 'conditions' into the deeper parts of the expression tree.
+
+
+Selectivity estimation
+----------------------
+
+When estimating selectivity, we aim to achieve several things:
+
+ (a) maximize the estimate accuracy
+
+ (b) minimize the overhead, especially when no suitable extended statistics
+ exist (so if you are not using extended stats, there's no overhead)
+
+This clauselist_selectivity() performs several inexpensive checks first, before
+even attempting to do the more expensive estimation.
+
+ (1) check if there are extended stats on the relation
+
+ (2) check there are at least two attributes referenced by clauses compatible
+ with extended statistics (equality clauses for func. dependencies)
+
+ (3) perform reduction of equality clauses using func. dependencies
+
+ (4) estimate the reduced list of clauses using regular statistics
+
+Whenever we find there are no suitable stats, we skip the expensive steps.
Size of sample in ANALYZE
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index d2b9f6a..71971ae 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -47,7 +47,7 @@ static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
int natts, VacAttrStats **vacattrstats);
static void statext_store(Relation pg_stext, Oid relid,
- MVNDistinct *ndistinct,
+ MVNDistinct *ndistinct, MVDependencies *dependencies,
VacAttrStats **stats);
@@ -74,6 +74,7 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
{
StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
MVNDistinct *ndistinct = NULL;
+ MVDependencies *dependencies = NULL;
VacAttrStats **stats;
ListCell *lc2;
@@ -93,10 +94,13 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
if (t == STATS_EXT_NDISTINCT)
ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
stat->columns, stats);
+ else if (t == STATS_EXT_DEPENDENCIES)
+ dependencies = statext_dependencies_build(numrows, rows,
+ stat->columns, stats);
}
/* store the statistics in the catalog */
- statext_store(pg_stext, stat->statOid, ndistinct, stats);
+ statext_store(pg_stext, stat->statOid, ndistinct, dependencies, stats);
}
heap_close(pg_stext, RowExclusiveLock);
@@ -117,6 +121,10 @@ statext_is_kind_built(HeapTuple htup, char type)
attnum = Anum_pg_statistic_ext_standistinct;
break;
+ case STATS_EXT_DEPENDENCIES:
+ attnum = Anum_pg_statistic_ext_stadependencies;
+ break;
+
default:
elog(ERROR, "unexpected statistics type requested: %d", type);
}
@@ -178,7 +186,8 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
enabled = (char *) ARR_DATA_PTR(arr);
for (i = 0; i < ARR_DIMS(arr)[0]; i++)
{
- Assert(enabled[i] == STATS_EXT_NDISTINCT);
+ Assert((enabled[i] == STATS_EXT_NDISTINCT) ||
+ (enabled[i] == STATS_EXT_DEPENDENCIES));
entry->types = lappend_int(entry->types, (int) enabled[i]);
}
@@ -256,7 +265,7 @@ lookup_var_attr_stats(Relation rel, Bitmapset *attrs, int natts,
*/
static void
statext_store(Relation pg_stext, Oid statOid,
- MVNDistinct *ndistinct,
+ MVNDistinct *ndistinct, MVDependencies *dependencies,
VacAttrStats **stats)
{
HeapTuple stup,
@@ -280,8 +289,17 @@ statext_store(Relation pg_stext, Oid statOid,
values[Anum_pg_statistic_ext_standistinct - 1] = PointerGetDatum(data);
}
+ if (dependencies != NULL)
+ {
+ bytea *data = statext_dependencies_serialize(dependencies);
+
+ nulls[Anum_pg_statistic_ext_stadependencies - 1] = (data == NULL);
+ values[Anum_pg_statistic_ext_stadependencies - 1] = PointerGetDatum(data);
+ }
+
/* always replace the value (either by bytea or NULL) */
replaces[Anum_pg_statistic_ext_standistinct - 1] = true;
+ replaces[Anum_pg_statistic_ext_stadependencies - 1] = true;
/* there should already be a pg_statistic_ext tuple */
oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index c2681ce..84934ce 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -1452,6 +1452,13 @@ pg_get_statisticsext_worker(Oid statextid, bool missing_ok)
StringInfoData buf;
int colno;
char *nsp;
+ ArrayType *arr;
+ char *enabled;
+ Datum datum;
+ bool isnull;
+ bool ndistinct_enabled;
+ bool dependencies_enabled;
+ int i;
statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid));
@@ -1467,10 +1474,55 @@ pg_get_statisticsext_worker(Oid statextid, bool missing_ok)
initStringInfo(&buf);
nsp = get_namespace_name(statextrec->stanamespace);
- appendStringInfo(&buf, "CREATE STATISTICS %s ON (",
+ appendStringInfo(&buf, "CREATE STATISTICS %s",
quote_qualified_identifier(nsp,
NameStr(statextrec->staname)));
+ /*
+ * Lookup the staenabled column so that we know how to handle the WITH
+ * clause.
+ */
+ datum = SysCacheGetAttr(STATEXTOID, statexttup,
+ Anum_pg_statistic_ext_staenabled, &isnull);
+ Assert(!isnull);
+ arr = DatumGetArrayTypeP(datum);
+ if (ARR_NDIM(arr) != 1 ||
+ ARR_HASNULL(arr) ||
+ ARR_ELEMTYPE(arr) != CHAROID)
+ elog(ERROR, "staenabled is not a 1-D char array");
+ enabled = (char *) ARR_DATA_PTR(arr);
+
+ ndistinct_enabled = false;
+ dependencies_enabled = false;
+
+ for (i = 0; i < ARR_DIMS(arr)[0]; i++)
+ {
+ if (enabled[i] == STATS_EXT_NDISTINCT)
+ ndistinct_enabled = true;
+ if (enabled[i] == STATS_EXT_DEPENDENCIES)
+ dependencies_enabled = true;
+ }
+
+ /*
+ * If any option is disabled, then we'll need to append a WITH clause to
+ * show which options are enabled. We omit the WITH clause on purpose
+ * when all options are enabled, so a pg_dump/pg_restore will create all
+ * statistics types on a newer postgres version, if the statistics had all
+ * options enabled on the original version.
+ */
+ if (!ndistinct_enabled || !dependencies_enabled)
+ {
+ appendStringInfoString(&buf, " WITH (");
+ if (ndistinct_enabled)
+ appendStringInfoString(&buf, "ndistinct");
+ else if (dependencies_enabled)
+ appendStringInfoString(&buf, "dependencies");
+
+ appendStringInfoChar(&buf, ')');
+ }
+
+ appendStringInfoString(&buf, " ON (");
+
for (colno = 0; colno < statextrec->stakeys.dim1; colno++)
{
AttrNumber attnum = statextrec->stakeys.values[colno];
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 5c382a2..1b18ce2 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -1633,13 +1633,19 @@ booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
case IS_NOT_FALSE:
selec = (double) clause_selectivity(root, arg,
varRelid,
- jointype, sjinfo);
+ jointype,
+ sjinfo,
+ NULL,
+ false);
break;
case IS_FALSE:
case IS_NOT_TRUE:
selec = 1.0 - (double) clause_selectivity(root, arg,
varRelid,
- jointype, sjinfo);
+ jointype,
+ sjinfo,
+ NULL,
+ false);
break;
default:
elog(ERROR, "unrecognized booltesttype: %d",
@@ -6436,7 +6442,9 @@ genericcostestimate(PlannerInfo *root,
indexSelectivity = clauselist_selectivity(root, selectivityQuals,
index->rel->relid,
JOIN_INNER,
- NULL);
+ NULL,
+ index->rel,
+ true);
/*
* If caller didn't give us an estimate, estimate the number of index
@@ -6757,7 +6765,9 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
index->rel->relid,
JOIN_INNER,
- NULL);
+ NULL,
+ index->rel,
+ true);
numIndexTuples = btreeSelectivity * index->rel->tuples;
/*
@@ -7516,7 +7526,9 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexSelectivity = clauselist_selectivity(root, selectivityQuals,
index->rel->relid,
JOIN_INNER,
- NULL);
+ NULL,
+ index->rel,
+ true);
/* fetch estimated page cost for tablespace containing index */
get_tablespace_page_costs(index->reltablespace,
@@ -7748,7 +7760,8 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*indexSelectivity =
clauselist_selectivity(root, indexQuals,
path->indexinfo->rel->relid,
- JOIN_INNER, NULL);
+ JOIN_INNER, NULL,
+ path->indexinfo->rel, true);
*indexCorrelation = 1;
/*
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index b0f3e5e..df50beb 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -2331,7 +2331,8 @@ describeOneTableDetails(const char *schemaname,
" FROM ((SELECT pg_catalog.unnest(stakeys) AS attnum) s\n"
" JOIN pg_catalog.pg_attribute a ON (starelid = a.attrelid AND\n"
"a.attnum = s.attnum AND not attisdropped))) AS columns,\n"
- " (staenabled::char[] @> '{d}'::char[]) AS ndist_enabled\n"
+ " (staenabled::char[] @> '{d}'::char[]) AS ndist_enabled,\n"
+ " (staenabled::char[] @> '{f}'::char[]) AS deps_enabled\n"
"FROM pg_catalog.pg_statistic_ext stat WHERE starelid = '%s'\n"
"ORDER BY 1;",
oid);
@@ -2348,7 +2349,7 @@ describeOneTableDetails(const char *schemaname,
for (i = 0; i < tuples; i++)
{
- int cnt = 0;
+ bool gotone = false;
printfPQExpBuffer(&buf, " ");
@@ -2361,7 +2362,12 @@ describeOneTableDetails(const char *schemaname,
if (strcmp(PQgetvalue(result, i, 5), "t") == 0)
{
appendPQExpBufferStr(&buf, "ndistinct");
- cnt++;
+ gotone = true;
+ }
+
+ if (strcmp(PQgetvalue(result, i, 6), "t") == 0)
+ {
+ appendPQExpBuffer(&buf, "%sdependencies", gotone ? ", " : "");
}
appendPQExpBuffer(&buf, ") ON (%s)",
diff --git a/src/include/catalog/pg_cast.h b/src/include/catalog/pg_cast.h
index bc5d28a..c9dd0d8 100644
--- a/src/include/catalog/pg_cast.h
+++ b/src/include/catalog/pg_cast.h
@@ -258,6 +258,10 @@ DATA(insert ( 194 25 0 i b ));
DATA(insert ( 3361 17 0 i b ));
DATA(insert ( 3361 25 0 i i ));
+/* pg_dependencies can be coerced to, but not from, bytea and text */
+DATA(insert ( 3402 17 0 i b ));
+DATA(insert ( 3402 25 0 i i ));
+
/*
* Datetime category
*/
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 220ba7b..a2b29da 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2771,6 +2771,15 @@ DESCR("I/O");
DATA(insert OID = 3358 ( pg_ndistinct_send PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 17 "3361" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_send _null_ _null_ _null_ ));
DESCR("I/O");
+DATA(insert OID = 3375 ( pg_dependencies_in PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3402 "2275" _null_ _null_ _null_ _null_ _null_ pg_dependencies_in _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3373 ( pg_dependencies_out PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2275 "3402" _null_ _null_ _null_ _null_ _null_ pg_dependencies_out _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3374 ( pg_dependencies_recv PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 3402 "2281" _null_ _null_ _null_ _null_ _null_ pg_dependencies_recv _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 3377 ( pg_dependencies_send PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 17 "3402" _null_ _null_ _null_ _null_ _null_ pg_dependencies_send _null_ _null_ _null_ ));
+DESCR("I/O");
+
DATA(insert OID = 1928 ( pg_stat_get_numscans PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ ));
DESCR("statistics: number of scans done for table/index");
DATA(insert OID = 1929 ( pg_stat_get_tuples_returned PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_tuples_returned _null_ _null_ _null_ ));
diff --git a/src/include/catalog/pg_statistic_ext.h b/src/include/catalog/pg_statistic_ext.h
index 5f67fe7..0a1cc04 100644
--- a/src/include/catalog/pg_statistic_ext.h
+++ b/src/include/catalog/pg_statistic_ext.h
@@ -46,6 +46,7 @@ CATALOG(pg_statistic_ext,3381)
char staenabled[1] BKI_FORCE_NOT_NULL; /* statistic types
* requested to build */
pg_ndistinct standistinct; /* ndistinct coefficients (serialized) */
+ pg_dependencies stadependencies; /* dependencies (serialized) */
#endif
} FormData_pg_statistic_ext;
@@ -61,7 +62,7 @@ typedef FormData_pg_statistic_ext *Form_pg_statistic_ext;
* compiler constants for pg_statistic_ext
* ----------------
*/
-#define Natts_pg_statistic_ext 7
+#define Natts_pg_statistic_ext 8
#define Anum_pg_statistic_ext_starelid 1
#define Anum_pg_statistic_ext_staname 2
#define Anum_pg_statistic_ext_stanamespace 3
@@ -69,7 +70,9 @@ typedef FormData_pg_statistic_ext *Form_pg_statistic_ext;
#define Anum_pg_statistic_ext_stakeys 5
#define Anum_pg_statistic_ext_staenabled 6
#define Anum_pg_statistic_ext_standistinct 7
+#define Anum_pg_statistic_ext_stadependencies 8
-#define STATS_EXT_NDISTINCT 'd'
+#define STATS_EXT_NDISTINCT 'd'
+#define STATS_EXT_DEPENDENCIES 'f'
#endif /* PG_STATISTIC_EXT_H */
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h
index 9ad6725..345e916 100644
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -368,6 +368,10 @@ DATA(insert OID = 3361 ( pg_ndistinct PGNSP PGUID -1 f b S f t \054 0 0 0 pg_nd
DESCR("multivariate ndistinct coefficients");
#define PGNDISTINCTOID 3361
+DATA(insert OID = 3402 ( pg_dependencies PGNSP PGUID -1 f b S f t \054 0 0 0 pg_dependencies_in pg_dependencies_out pg_dependencies_recv pg_dependencies_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ ));
+DESCR("multivariate dependencies");
+#define PGDEPENDENCIESOID 3402
+
DATA(insert OID = 32 ( pg_ddl_command PGNSP PGUID SIZEOF_POINTER t p P f t \054 0 0 0 pg_ddl_command_in pg_ddl_command_out pg_ddl_command_recv pg_ddl_command_send - - - ALIGNOF_POINTER p f 0 -1 0 0 _null_ _null_ _null_ ));
DESCR("internal type for passing CollectedCommand");
#define PGDDLCOMMANDOID 32
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index d9a9b12..06a3719 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -200,12 +200,16 @@ extern Selectivity clauselist_selectivity(PlannerInfo *root,
List *clauses,
int varRelid,
JoinType jointype,
- SpecialJoinInfo *sjinfo);
+ SpecialJoinInfo *sjinfo,
+ RelOptInfo *rel,
+ bool tryextstats);
extern Selectivity clause_selectivity(PlannerInfo *root,
Node *clause,
int varRelid,
JoinType jointype,
- SpecialJoinInfo *sjinfo);
+ SpecialJoinInfo *sjinfo,
+ RelOptInfo *rel,
+ bool tryextstats);
extern void cost_gather_merge(GatherMergePath *path, PlannerInfo *root,
RelOptInfo *rel, ParamPathInfo *param_info,
Cost input_startup_cost, Cost input_total_cost,
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index 961f1f7..0c40b86 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -52,6 +52,11 @@ extern MVNDistinct *statext_ndistinct_build(double totalrows,
extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct);
extern MVNDistinct *statext_ndistinct_deserialize(bytea *data);
+extern MVDependencies *statext_dependencies_build(int numrows, HeapTuple *rows,
+ Bitmapset *attrs, VacAttrStats **stats);
+extern bytea *statext_dependencies_serialize(MVDependencies *dependencies);
+extern MVDependencies *statext_dependencies_deserialize(bytea *data);
+
extern MultiSortSupport multi_sort_init(int ndims);
extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
Oid oper);
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
index 91645bf..6fd7dce 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -44,7 +44,45 @@ typedef struct MVNDistinct
#define SizeOfMVNDistinct (offsetof(MVNDistinct, nitems) + sizeof(uint32))
+/* size of the struct excluding the items array */
+#define SizeOfMVNDistinct (offsetof(MVNDistinct, nitems) + sizeof(uint32))
+
+#define STATS_DEPS_MAGIC 0xB4549A2C /* marks serialized bytea */
+#define STATS_DEPS_TYPE_BASIC 1 /* basic dependencies type */
+
+/*
+ * Functional dependencies, tracking column-level relationships (values
+ * in one column determine values in another one).
+ */
+typedef struct MVDependency
+{
+ double degree; /* degree of validity (0-1) */
+ AttrNumber nattributes; /* number of attributes */
+ AttrNumber attributes[FLEXIBLE_ARRAY_MEMBER]; /* attribute numbers */
+} MVDependency;
+
+/* size of the struct excluding the deps array */
+#define SizeOfDependency \
+ (offsetof(MVDependency, nattributes) + sizeof(AttrNumber))
+
+typedef struct MVDependencies
+{
+ uint32 magic; /* magic constant marker */
+ uint32 type; /* type of MV Dependencies (BASIC) */
+ uint32 ndeps; /* number of dependencies */
+ MVDependency *deps[FLEXIBLE_ARRAY_MEMBER]; /* dependencies */
+} MVDependencies;
+
+/* size of the struct excluding the deps array */
+#define SizeOfDependencies (offsetof(MVDependencies, ndeps) + sizeof(uint32))
+
+extern bool dependency_implies_attribute(MVDependency *dependency,
+ AttrNumber attnum);
+extern bool dependency_is_fully_matched(MVDependency *dependency,
+ Bitmapset *attnums);
+
extern MVNDistinct *statext_ndistinct_load(Oid mvoid);
+extern MVDependencies *staext_dependencies_load(Oid mvoid);
extern void BuildRelationExtStatistics(Relation onerel, double totalrows,
int numrows, HeapTuple *rows,
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 262036a..d23f876 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -824,11 +824,12 @@ WHERE c.castmethod = 'b' AND
character varying | character | 0 | i
pg_node_tree | text | 0 | i
pg_ndistinct | bytea | 0 | i
+ pg_dependencies | bytea | 0 | i
cidr | inet | 0 | i
xml | text | 0 | a
xml | character varying | 0 | a
xml | character | 0 | a
-(8 rows)
+(9 rows)
-- **************** pg_conversion ****************
-- Look for illegal values in pg_conversion fields.
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index d706f42..cba82bb 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2192,7 +2192,8 @@ pg_stats_ext| SELECT n.nspname AS schemaname,
c.relname AS tablename,
s.staname,
s.stakeys AS attnums,
- length((s.standistinct)::text) AS ndistbytes
+ length((s.standistinct)::bytea) AS ndistbytes,
+ length((s.stadependencies)::bytea) AS depsbytes
FROM ((pg_statistic_ext s
JOIN pg_class c ON ((c.oid = s.starelid)))
LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)));
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 8fe96d6..b43208d 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -31,7 +31,7 @@ ALTER TABLE ab1 DROP COLUMN a;
b | integer | | |
c | integer | | |
Statistics:
- "public.ab1_b_c_stats" WITH (ndistinct) ON (b, c)
+ "public.ab1_b_c_stats" WITH (ndistinct, dependencies) ON (b, c)
DROP TABLE ab1;
-- Ensure things work sanely with SET STATISTICS 0
@@ -135,7 +135,7 @@ SELECT staenabled, standistinct
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
staenabled | standistinct
------------+------------------------------------------------------------------------------------------------
- {d} | [{(b 3 4), 301.000000}, {(b 3 6), 301.000000}, {(b 4 6), 301.000000}, {(b 3 4 6), 301.000000}]
+ {d,f} | [{(b 3 4), 301.000000}, {(b 3 6), 301.000000}, {(b 4 6), 301.000000}, {(b 3 4 6), 301.000000}]
(1 row)
-- Hash Aggregate, thanks to estimates improved by the statistic
@@ -201,7 +201,7 @@ SELECT staenabled, standistinct
FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass;
staenabled | standistinct
------------+----------------------------------------------------------------------------------------------------
- {d} | [{(b 3 4), 2550.000000}, {(b 3 6), 800.000000}, {(b 4 6), 1632.000000}, {(b 3 4 6), 10000.000000}]
+ {d,f} | [{(b 3 4), 2550.000000}, {(b 3 6), 800.000000}, {(b 4 6), 1632.000000}, {(b 3 4 6), 10000.000000}]
(1 row)
-- plans using Group Aggregate, thanks to using correct esimates
@@ -311,3 +311,107 @@ EXPLAIN (COSTS off)
(3 rows)
DROP TABLE ndistinct;
+-- functional dependencies tests
+CREATE TABLE functional_dependencies (
+ filler1 TEXT,
+ filler2 NUMERIC,
+ a INT,
+ b TEXT,
+ filler3 DATE,
+ c INT,
+ d TEXT
+);
+SET random_page_cost = 1.2;
+CREATE INDEX fdeps_ab_idx ON functional_dependencies (a, b);
+CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c);
+-- random data (no functional dependencies)
+INSERT INTO functional_dependencies (a, b, c, filler1)
+ SELECT mod(i, 23), mod(i, 29), mod(i, 31), i FROM generate_series(1,5000) s(i);
+ANALYZE functional_dependencies;
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+ QUERY PLAN
+---------------------------------------------------
+ Bitmap Heap Scan on functional_dependencies
+ Recheck Cond: ((a = 1) AND (b = '1'::text))
+ -> Bitmap Index Scan on fdeps_abc_idx
+ Index Cond: ((a = 1) AND (b = '1'::text))
+(4 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+ QUERY PLAN
+-----------------------------------------------------------
+ Index Scan using fdeps_abc_idx on functional_dependencies
+ Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1))
+(2 rows)
+
+-- create statistics
+CREATE STATISTICS func_deps_stat WITH (dependencies) ON (a, b, c) FROM functional_dependencies;
+ANALYZE functional_dependencies;
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+ QUERY PLAN
+---------------------------------------------------
+ Bitmap Heap Scan on functional_dependencies
+ Recheck Cond: ((a = 1) AND (b = '1'::text))
+ -> Bitmap Index Scan on fdeps_abc_idx
+ Index Cond: ((a = 1) AND (b = '1'::text))
+(4 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+ QUERY PLAN
+-----------------------------------------------------------
+ Index Scan using fdeps_abc_idx on functional_dependencies
+ Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1))
+(2 rows)
+
+-- a => b, a => c, b => c
+TRUNCATE functional_dependencies;
+DROP STATISTICS func_deps_stat;
+INSERT INTO functional_dependencies (a, b, c, filler1)
+ SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
+ANALYZE functional_dependencies;
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+ QUERY PLAN
+-----------------------------------------------------------
+ Index Scan using fdeps_abc_idx on functional_dependencies
+ Index Cond: ((a = 1) AND (b = '1'::text))
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+ QUERY PLAN
+-----------------------------------------------------------
+ Index Scan using fdeps_abc_idx on functional_dependencies
+ Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1))
+(2 rows)
+
+-- create statistics
+CREATE STATISTICS func_deps_stat WITH (dependencies) ON (a, b, c) FROM functional_dependencies;
+ANALYZE functional_dependencies;
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+ QUERY PLAN
+---------------------------------------------------
+ Bitmap Heap Scan on functional_dependencies
+ Recheck Cond: ((a = 1) AND (b = '1'::text))
+ -> Bitmap Index Scan on fdeps_abc_idx
+ Index Cond: ((a = 1) AND (b = '1'::text))
+(4 rows)
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+ QUERY PLAN
+---------------------------------------------------
+ Bitmap Heap Scan on functional_dependencies
+ Recheck Cond: ((a = 1) AND (b = '1'::text))
+ Filter: (c = 1)
+ -> Bitmap Index Scan on fdeps_ab_idx
+ Index Cond: ((a = 1) AND (b = '1'::text))
+(5 rows)
+
+RESET random_page_cost;
+DROP TABLE functional_dependencies;
diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out
index 84022f6..7b200ba 100644
--- a/src/test/regress/expected/type_sanity.out
+++ b/src/test/regress/expected/type_sanity.out
@@ -67,12 +67,13 @@ WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%'
(SELECT 1 FROM pg_type as p2
WHERE p2.typname = ('_' || p1.typname)::name AND
p2.typelem = p1.oid and p1.typarray = p2.oid);
- oid | typname
-------+--------------
+ oid | typname
+------+-----------------
194 | pg_node_tree
3361 | pg_ndistinct
+ 3402 | pg_dependencies
210 | smgr
-(3 rows)
+(4 rows)
-- Make sure typarray points to a varlena array type of our own base
SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype,
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 4faaf88..1b0018d 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -163,3 +163,71 @@ EXPLAIN (COSTS off)
SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
DROP TABLE ndistinct;
+
+-- functional dependencies tests
+CREATE TABLE functional_dependencies (
+ filler1 TEXT,
+ filler2 NUMERIC,
+ a INT,
+ b TEXT,
+ filler3 DATE,
+ c INT,
+ d TEXT
+);
+
+SET random_page_cost = 1.2;
+
+CREATE INDEX fdeps_ab_idx ON functional_dependencies (a, b);
+CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c);
+
+-- random data (no functional dependencies)
+INSERT INTO functional_dependencies (a, b, c, filler1)
+ SELECT mod(i, 23), mod(i, 29), mod(i, 31), i FROM generate_series(1,5000) s(i);
+
+ANALYZE functional_dependencies;
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+
+-- create statistics
+CREATE STATISTICS func_deps_stat WITH (dependencies) ON (a, b, c) FROM functional_dependencies;
+
+ANALYZE functional_dependencies;
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+
+-- a => b, a => c, b => c
+TRUNCATE functional_dependencies;
+DROP STATISTICS func_deps_stat;
+
+INSERT INTO functional_dependencies (a, b, c, filler1)
+ SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
+
+ANALYZE functional_dependencies;
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+
+-- create statistics
+CREATE STATISTICS func_deps_stat WITH (dependencies) ON (a, b, c) FROM functional_dependencies;
+
+ANALYZE functional_dependencies;
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+
+EXPLAIN (COSTS OFF)
+ SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+
+RESET random_page_cost;
+DROP TABLE functional_dependencies;