Re: Bad row estimation with indexed func returning bool - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: Bad row estimation with indexed func returning bool |
Date | |
Msg-id | 4874.1443051017@sss.pgh.pa.us Whole thread Raw |
In response to | Re: Bad row estimation with indexed func returning bool (Tom Lane <tgl@sss.pgh.pa.us>) |
Responses |
Re: Bad row estimation with indexed func returning bool
|
List | pgsql-hackers |
I wrote: > However, in the case at hand, the complaint basically is why aren't we > treating the boolean function expression like a boolean variable, and > looking to see if there are stats available for it, like this other > bit in clause_selectivity: > /* > * A Var at the top of a clause must be a bool Var. This is > * equivalent to the clause reln.attribute = 't', so we compute > * the selectivity as if that is what we have. > */ > s1 = restriction_selectivity(root, > BooleanEqualOperator, > list_make2(var, > makeBoolConst(true, > false)), > InvalidOid, > varRelid); > Indeed you could argue that this ought to be the fallback behavior for > *any* unhandled case, not just function expressions. Not sure if we'd > need to restrict it to single-relation expressions or not. > The implication of doing it like this would be that the default estimate > in the absence of any matching stats would be 0.5 (since eqsel defaults > to 1/ndistinct, and get_variable_numdistinct will report 2.0 for any > boolean-type expression it has no stats for). That's not a huge change > from the existing 0.3333333 estimate, which seems pretty unprincipled > anyway ... but it would probably be enough to annoy people if we did it in > stable branches. So I'd be inclined to propose changing this in HEAD and > maybe 9.5, but not further back. (For non-function expressions, 0.5 is > the default already, so those would not change behavior.) I experimented with the attached patch. The change in the default estimate for a function results in just one change in the standard regression test results, so far as I can find. Comments, objections? regards, tom lane diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index dcac1c1..c862b70 100644 *** a/src/backend/optimizer/path/clausesel.c --- b/src/backend/optimizer/path/clausesel.c *************** *** 14,20 **** */ #include "postgres.h" - #include "catalog/pg_operator.h" #include "nodes/makefuncs.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" --- 14,19 ---- *************** clause_selectivity(PlannerInfo *root, *** 568,585 **** if (var->varlevelsup == 0 && (varRelid == 0 || varRelid == (int) var->varno)) { ! /* ! * A Var at the top of a clause must be a bool Var. This is ! * equivalent to the clause reln.attribute = 't', so we compute ! * the selectivity as if that is what we have. ! */ ! s1 = restriction_selectivity(root, ! BooleanEqualOperator, ! list_make2(var, ! makeBoolConst(true, ! false)), ! InvalidOid, ! varRelid); } } else if (IsA(clause, Const)) --- 567,574 ---- if (var->varlevelsup == 0 && (varRelid == 0 || varRelid == (int) var->varno)) { ! /* Use the restriction selectivity function for a bool Var */ ! s1 = boolvarsel(root, (Node *) var, varRelid); } } else if (IsA(clause, Const)) *************** clause_selectivity(PlannerInfo *root, *** 680,704 **** if (IsA(clause, DistinctExpr)) s1 = 1.0 - s1; } - else if (is_funcclause(clause)) - { - /* - * This is not an operator, so we guess at the selectivity. THIS IS A - * HACK TO GET V4 OUT THE DOOR. FUNCS SHOULD BE ABLE TO HAVE - * SELECTIVITIES THEMSELVES. -- JMH 7/9/92 - */ - s1 = (Selectivity) 0.3333333; - } - #ifdef NOT_USED - else if (IsA(clause, SubPlan) || - IsA(clause, AlternativeSubPlan)) - { - /* - * Just for the moment! FIX ME! - vadim 02/04/98 - */ - s1 = (Selectivity) 0.5; - } - #endif else if (IsA(clause, ScalarArrayOpExpr)) { /* Use node specific selectivity calculation function */ --- 669,674 ---- *************** clause_selectivity(PlannerInfo *root, *** 766,771 **** --- 736,752 ---- jointype, sjinfo); } + else + { + /* + * For anything else, see if we can consider it as a boolean variable. + * This only works if it's an immutable expression in Vars of a single + * relation; but there's no point in us checking that here because + * boolvarsel() will do it internally, and return a suitable default + * selectivity (0.5) if not. + */ + s1 = boolvarsel(root, clause, varRelid); + } /* Cache the result if possible */ if (cacheable) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 72bc502..a643c6f 100644 *** a/src/backend/utils/adt/selfuncs.c --- b/src/backend/utils/adt/selfuncs.c *************** *** 105,110 **** --- 105,111 ---- #include "access/sysattr.h" #include "catalog/index.h" #include "catalog/pg_collation.h" + #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_statistic.h" #include "catalog/pg_type.h" *************** icnlikesel(PG_FUNCTION_ARGS) *** 1440,1445 **** --- 1441,1479 ---- } /* + * boolvarsel - Selectivity of Boolean variable. + * + * This can actually be called on any expression involving only Vars of + * the specified relation. If there are statistics about the Var or + * expression (the latter can happen, if it's indexed) then we'll produce + * a real estimate; otherwise we default to 0.5. + */ + Selectivity + boolvarsel(PlannerInfo *root, Node *arg, int varRelid) + { + VariableStatData vardata; + double selec; + + examine_variable(root, arg, varRelid, &vardata); + if (HeapTupleIsValid(vardata.statsTuple)) + { + /* + * A boolean variable V is equivalent to the clause V = 't', so we + * compute the selectivity as if that is what we have. + */ + selec = var_eq_const(&vardata, BooleanEqualOperator, + BoolGetDatum(true), false, true); + } + else + { + /* Punt and estimate 0.5 */ + selec = 0.5; + } + ReleaseVariableStats(vardata); + return selec; + } + + /* * booltestsel - Selectivity of BooleanTest Node. */ Selectivity diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index b3d8017..a7433d9 100644 *** a/src/include/utils/selfuncs.h --- b/src/include/utils/selfuncs.h *************** extern Datum icregexnejoinsel(PG_FUNCTIO *** 164,169 **** --- 164,170 ---- extern Datum nlikejoinsel(PG_FUNCTION_ARGS); extern Datum icnlikejoinsel(PG_FUNCTION_ARGS); + extern Selectivity boolvarsel(PlannerInfo *root, Node *arg, int varRelid); extern Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out index 9d3540f..88dcca3 100644 *** a/src/test/regress/expected/rowsecurity.out --- b/src/test/regress/expected/rowsecurity.out *************** EXPLAIN (COSTS OFF) SELECT * FROM docume *** 186,204 **** (7 rows) EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle); ! QUERY PLAN ! ---------------------------------------------------------------------- Hash Join ! Hash Cond: (category.cid = document.cid) ! -> Seq Scan on category -> Hash ! -> Subquery Scan on document ! Filter: f_leak(document.dtitle) ! -> Seq Scan on document document_1 ! Filter: (dlevel <= $0) ! InitPlan 1 (returns $0) ! -> Index Scan using uaccount_pkey on uaccount ! Index Cond: (pguser = "current_user"()) (11 rows) -- only owner can change policies --- 186,204 ---- (7 rows) EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle); ! QUERY PLAN ! ---------------------------------------------------------------- Hash Join ! Hash Cond: (document.cid = category.cid) ! -> Subquery Scan on document ! Filter: f_leak(document.dtitle) ! -> Seq Scan on document document_1 ! Filter: (dlevel <= $0) ! InitPlan 1 (returns $0) ! -> Index Scan using uaccount_pkey on uaccount ! Index Cond: (pguser = "current_user"()) -> Hash ! -> Seq Scan on category (11 rows) -- only owner can change policies
pgsql-hackers by date: