From 71837f82db88baa9e0250d3868078140cf11f5a7 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Thu, 26 Aug 2021 23:01:04 +0200 Subject: [PATCH 1/3] Improve estimates for Var op Var with the same Var When estimating (Var op Var) conditions, we can treat the case with the same Var on both sides as a special case, and we can provide better selectivity estimate than for the generic case. For example for (a = a) we know it's 1.0, because all rows are expected to match. Similarly for (a != a) , wich has selectivity 0.0. And the same logic can be applied to inequality comparisons, like (a < a) etc. In principle, those clauses are a bit strange and queries are unlikely to use them. But query generators sometimes do silly things, and these checks are quite cheap so it's likely a win. --- src/backend/utils/adt/selfuncs.c | 77 +++++++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 0c8c05f6c2..22608aadc1 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -210,7 +210,8 @@ static bool get_actual_variable_endpoint(Relation heapRel, MemoryContext outercontext, Datum *endpointDatum); static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); - +static bool matching_restriction_variables(PlannerInfo *root, List *args, + int varRelid); /* * eqsel - Selectivity of "=" for any data types. @@ -256,6 +257,14 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate) } } + /* + * It it's (variable = variable) with the same variable on both sides, it's + * a special case and we know it's not expected to filter anything, so we + * estimate the selectivity as 1.0 (or 0.0 if it's negated). + */ + if (matching_restriction_variables(root, args, varRelid)) + return (negate) ? 0.0 : 1.0; + /* * If expression is not variable = something or something = variable, then * punt and return a default estimate. @@ -1408,6 +1417,22 @@ scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq) Oid consttype; double selec; + /* + * Handle (variable < variable) and (variable <= variable) with the same + * variable on both sides as a special case. The strict inequality should + * not match any rows, hence selectivity is 0.0. The other case is about + * the same as equality, so selectivity is 1.0. + */ + if (matching_restriction_variables(root, args, varRelid)) + { + /* The case with equality matches all rows, so estimate it as 1.0. */ + if (iseq) + PG_RETURN_FLOAT8(1.0); + + /* Strict inequality matches nothing, so selectivity is 0.0. */ + PG_RETURN_FLOAT8(0.0); + } + /* * If expression is not variable op something or something op variable, * then punt and return a default estimate. @@ -4871,6 +4896,56 @@ get_restriction_variable(PlannerInfo *root, List *args, int varRelid, return false; } + +/* + * matching_restriction_variable + * Examine the args of a restriction clause to see if it's of the + * form (variable op variable) with the same variable on both sides. + * + * Inputs: + * root: the planner info + * args: clause argument list + * varRelid: see specs for restriction selectivity functions + * + * Returns true if the same variable is on both sides, otherwise false. + */ +static bool +matching_restriction_variables(PlannerInfo *root, List *args, int varRelid) +{ + Node *left, + *right; + VariableStatData ldata; + VariableStatData rdata; + bool res = false; + + /* Fail if not a binary opclause (probably shouldn't happen) */ + if (list_length(args) != 2) + return false; + + left = (Node *) linitial(args); + right = (Node *) lsecond(args); + + /* + * Examine both sides. Note that when varRelid is nonzero, Vars of other + * relations will be treated as pseudoconstants. + */ + examine_variable(root, left, varRelid, &ldata); + examine_variable(root, right, varRelid, &rdata); + + /* + * If both sides are variable, and are equal, we win. + */ + if ((ldata.rel != NULL && rdata.rel != NULL) && + equal(ldata.var, rdata.var)) + res = true; + + /* We don't need the stats. */ + ReleaseVariableStats(ldata); + ReleaseVariableStats(rdata); + + return res; +} + /* * get_join_variables * Apply examine_variable() to each side of a join clause. -- 2.31.1