From 06e2d15030a04c7477bbeff56bf4f5a41ba1b0bc Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Wed, 17 Feb 2021 01:02:22 +0100 Subject: [PATCH 4/4] WIP rework tracking of expressions --- src/backend/optimizer/util/plancat.c | 21 +-- src/backend/statistics/dependencies.c | 131 ++++++++++++++---- src/backend/statistics/extended_stats.c | 77 +++++----- src/backend/statistics/mcv.c | 28 +++- src/backend/statistics/mvdistinct.c | 120 +++++++++------- src/backend/utils/adt/selfuncs.c | 29 +++- .../statistics/extended_stats_internal.h | 11 +- src/include/statistics/statistics.h | 3 +- 8 files changed, 268 insertions(+), 152 deletions(-) diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 3a3362e9ed..e91feebb73 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -1320,6 +1320,7 @@ get_relation_statistics(RelOptInfo *rel, Relation relation) Bitmapset *keys = NULL; int i; List *exprs = NIL; + int nexprs; htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid)); if (!HeapTupleIsValid(htup)) @@ -1330,14 +1331,6 @@ get_relation_statistics(RelOptInfo *rel, Relation relation) if (!HeapTupleIsValid(dtup)) elog(ERROR, "cache lookup failed for statistics object %u", statOid); - /* - * First, build the array of columns covered. This is ultimately - * wasted if no stats within the object have actually been built, but - * it doesn't seem worth troubling over that case. - */ - for (i = 0; i < staForm->stxkeys.dim1; i++) - keys = bms_add_member(keys, staForm->stxkeys.values[i]); - /* * preprocess expression (if any) * @@ -1381,6 +1374,18 @@ get_relation_statistics(RelOptInfo *rel, Relation relation) } } + /* + * First, build the array of columns covered. This is ultimately + * wasted if no stats within the object have actually been built, but + * it doesn't seem worth troubling over that case. + * + * Offset by number of expressions, to handle negative attnums (which + * we assign to expressions). + */ + nexprs = list_length(exprs); + for (i = 0; i < staForm->stxkeys.dim1; i++) + keys = bms_add_member(keys, staForm->stxkeys.values[i] + nexprs); + /* add one StatisticExtInfo for each kind built */ if (statext_is_kind_built(dtup, STATS_EXT_NDISTINCT)) { diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index 6bf3127bcc..9d53a95f73 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -252,7 +252,7 @@ dependency_degree(int numrows, HeapTuple *rows, ExprInfo *exprs, int k, * member easier, and then construct a filtered version with only attnums * referenced by the dependency we validate. */ - attnums = build_attnums_array(attrs, &numattrs); + attnums = build_attnums_array(attrs, exprs->nexprs, &numattrs); attnums_dep = (AttrNumber *) palloc(k * sizeof(AttrNumber)); for (i = 0; i < k; i++) @@ -376,15 +376,34 @@ statext_dependencies_build(int numrows, HeapTuple *rows, /* result */ MVDependencies *dependencies = NULL; - /* treat expressions as special attributes with high attnums */ - attrs = add_expressions_to_attributes(attrs, exprs->nexprs); - /* * Transform the bms into an array, to make accessing i-th member easier. */ - attnums = build_attnums_array(attrs, &numattrs); + attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * (bms_num_members(attrs) + exprs->nexprs)); + + numattrs = 0; + + /* treat expressions as attributes with negative attnums */ + for (i = 0; i < exprs->nexprs; i++) + attnums[numattrs++] = -(i+1); + + /* + * regular attributes + * + * XXX Maybe add this in the opposite order, just like in MCV? first + * regular attnums, then exressions. + */ + k = -1; + while ((k = bms_next_member(attrs, k)) >= 0) + attnums[numattrs++] = k; Assert(numattrs >= 2); + Assert(numattrs == (bms_num_members(attrs) + exprs->nexprs)); + + /* build a new bitmapset of attnums with offset */ + attrs = NULL; + for (i = 0; i < numattrs; i++) + attrs = bms_add_member(attrs, attnums[i] + exprs->nexprs); /* * We'll try build functional dependencies starting from the smallest ones @@ -1374,8 +1393,10 @@ dependencies_clauselist_selectivity(PlannerInfo *root, * We also skip clauses that we already estimated using different types of * statistics (we treat them as incompatible). * - * For expressions, we generate attnums higher than MaxHeapAttributeNumber - * so that we can work with attnums only. + * To handle expressions, we assign them negative attnums, as if it was a + * system attribute (this is fine, as we only allow extended stats on user + * attributes). And then we offset everything by the number of expressions, + * so that we can store the values in a bitmapset. */ listidx = 0; foreach(l, clauses) @@ -1391,13 +1412,12 @@ dependencies_clauselist_selectivity(PlannerInfo *root, { /* * If it's a simple column refrence, just extract the attnum. If - * it's an expression, make sure it's not a duplicate and assign - * a special attnum to it (higher than any regular value). + * it's an expression, assign a negative attnum as if it was a + * system attribute. */ if (dependency_is_compatible_clause(clause, rel->relid, &attnum)) { list_attnums[listidx] = attnum; - clauses_attnums = bms_add_member(clauses_attnums, attnum); } else if (dependency_is_compatible_expression(clause, rel->relid, rel->statlist, @@ -1413,7 +1433,8 @@ dependencies_clauselist_selectivity(PlannerInfo *root, { if (equal(unique_exprs[i], expr)) { - attnum = EXPRESSION_ATTNUM(i); + /* negative attribute number to expression */ + attnum = -(i + 1); break; } } @@ -1421,14 +1442,10 @@ dependencies_clauselist_selectivity(PlannerInfo *root, /* not found in the list, so add it */ if (attnum == InvalidAttrNumber) { - attnum = EXPRESSION_ATTNUM(unique_exprs_cnt); unique_exprs[unique_exprs_cnt++] = expr; - /* shouldn't have seen this attnum yet */ - Assert(!bms_is_member(attnum, clauses_attnums)); - - /* we may add the attnum repeatedly to clauses_attnums */ - clauses_attnums = bms_add_member(clauses_attnums, attnum); + /* after incrementing the value, to get -1, -2, ... */ + attnum = -unique_exprs_cnt; } /* remember which attnum was assigned to this clause */ @@ -1439,6 +1456,37 @@ dependencies_clauselist_selectivity(PlannerInfo *root, listidx++; } + Assert(listidx == list_length(clauses)); + + /* + * Now that we know how many expressions there are, we can offset the + * values just enough to build the bitmapset. + */ + for (i = 0; i < list_length(clauses); i++) + { + AttrNumber attnum; + + /* ignore incompatible or already estimated clauses */ + if (list_attnums[i] == InvalidAttrNumber) + continue; + + /* make sure the attnum is in the expected range */ + Assert(list_attnums[i] >= (-unique_exprs_cnt)); + Assert(list_attnums[i] <= MaxHeapAttributeNumber); + + /* make sure the attnum is not negative */ + attnum = list_attnums[i] + unique_exprs_cnt; + + /* + * Expressions are unique, and so we must not have seen this attnum + * before. + */ + Assert(AttrNumberIsForUserDefinedAttr(list_attnums[i]) || + !bms_is_member(attnum, clauses_attnums)); + + clauses_attnums = bms_add_member(clauses_attnums, attnum); + } + /* * If there's not at least two distinct attnums and expressions, then * reject the whole list of clauses. We must return 1.0 so the calling @@ -1470,19 +1518,40 @@ dependencies_clauselist_selectivity(PlannerInfo *root, foreach(l, rel->statlist) { StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l); - Bitmapset *matched; int nmatched; int nexprs; + int k; MVDependencies *deps; /* skip statistics that are not of the correct type */ if (stat->kind != STATS_EXT_DEPENDENCIES) continue; - /* count matching simple clauses */ - matched = bms_intersect(clauses_attnums, stat->keys); - nmatched = bms_num_members(matched); - bms_free(matched); + /* + * Count matching attributes - we have to undo two attnum offsets. + * First, the dependency is offset using the number of expressions + * for that statistics, and then (if it's a plain attribute) we + * need to apply the same offset as above, by unique_exprs_cnt. + */ + nmatched = 0; + k = -1; + while ((k = bms_next_member(stat->keys, k)) >= 0) + { + AttrNumber attnum = (AttrNumber) k; + + /* undo the per-statistics offset */ + attnum = attnum - list_length(stat->exprs); + + /* skip expressions */ + if (!AttrNumberIsForUserDefinedAttr(attnum)) + continue; + + /* apply the same offset as above */ + attnum += unique_exprs_cnt; + + if (bms_is_member(attnum, clauses_attnums)) + nmatched++; + } /* count matching expressions */ nexprs = 0; @@ -1537,13 +1606,23 @@ dependencies_clauselist_selectivity(PlannerInfo *root, Node *expr; int k; AttrNumber unique_attnum = InvalidAttrNumber; + AttrNumber attnum; - /* regular attribute, no need to remap */ - if (dep->attributes[j] <= MaxHeapAttributeNumber) + /* undo the per-statistics offset */ + attnum = dep->attributes[j] - list_length(stat->exprs); + + /* regular attribute, simply offset by number of expressions */ + if (AttrNumberIsForUserDefinedAttr(attnum)) + { + dep->attributes[j] = attnum + unique_exprs_cnt; continue; + } + + /* the attnum should be a valid system attnum (-1, -2, ...) */ + Assert(AttributeNumberIsValid(attnum)); /* index of the expression */ - idx = EXPRESSION_INDEX(dep->attributes[j]); + idx = (1 - attnum); /* make sure the expression index is valid */ Assert((idx >= 0) && (idx < list_length(stat->exprs))); @@ -1559,7 +1638,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root, */ if (equal(unique_exprs[k], expr)) { - unique_attnum = EXPRESSION_ATTNUM(k); + unique_attnum = -(k + 1) + unique_exprs_cnt; break; } } diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 6ed938d6ab..611871c08b 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -892,7 +892,7 @@ bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size, * is not necessary here (and when querying the bitmap). */ AttrNumber * -build_attnums_array(Bitmapset *attrs, int *numattrs) +build_attnums_array(Bitmapset *attrs, int nexprs, int *numattrs) { int i, j; @@ -908,16 +908,19 @@ build_attnums_array(Bitmapset *attrs, int *numattrs) j = -1; while ((j = bms_next_member(attrs, j)) >= 0) { + AttrNumber attnum = (j - nexprs); + /* * Make sure the bitmap contains only user-defined attributes. As * bitmaps can't contain negative values, this can be violated in two * ways. Firstly, the bitmap might contain 0 as a member, and secondly * the integer value might be larger than MaxAttrNumber. */ - Assert(AttrNumberIsForUserDefinedAttr(j)); - Assert(j <= MaxAttrNumber); + Assert(AttributeNumberIsValid(attnum)); + Assert(attnum <= MaxAttrNumber); + Assert(attnum >= (-nexprs)); - attnums[i++] = (AttrNumber) j; + attnums[i++] = (AttrNumber) attnum; /* protect against overflows */ Assert(i <= num); @@ -984,15 +987,16 @@ build_sorted_items(int numrows, int *nitems, HeapTuple *rows, ExprInfo *exprs, Datum value; bool isnull; int attlen; + AttrNumber attnum = attnums[j]; - if (attnums[j] <= MaxHeapAttributeNumber) + if (AttrNumberIsForUserDefinedAttr(attnum)) { - value = heap_getattr(rows[i], attnums[j], tdesc, &isnull); - attlen = TupleDescAttr(tdesc, attnums[j] - 1)->attlen; + value = heap_getattr(rows[i], attnum, tdesc, &isnull); + attlen = TupleDescAttr(tdesc, attnum - 1)->attlen; } else { - int idx = EXPRESSION_INDEX(attnums[j]); + int idx = -(attnums[j] + 1); Assert((idx >= 0) && (idx < exprs->nexprs)); @@ -1097,6 +1101,23 @@ stat_find_expression(StatisticExtInfo *stat, Node *expr) return -1; } +static bool +stat_covers_attributes(StatisticExtInfo *stat, Bitmapset *attnums) +{ + int k; + + k = -1; + while ((k = bms_next_member(attnums, k)) >= 0) + { + AttrNumber attnum = k + list_length(stat->exprs); + + if (!bms_is_member(attnum, stat->keys)) + return false; + } + + return true; +} + /* * stat_covers_expressions * Test whether a statistics object covers all expressions in a list. @@ -1181,7 +1202,7 @@ choose_best_statistics(List *stats, char requiredkind, continue; /* ignore clauses that are not covered by this object */ - if (!bms_is_subset(clause_attnums[i], info->keys) || + if (!stat_covers_attributes(info, clause_attnums[i]) || !stat_covers_expressions(info, clause_exprs[i], &expr_idxs)) continue; @@ -1678,6 +1699,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli listidx = 0; foreach(l, clauses) { + int k; + /* * If the clause is not already estimated and is compatible with * the selected statistics object (all attributes and expressions @@ -1685,7 +1708,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli * estimate. */ if (!bms_is_member(listidx, *estimatedclauses) && - bms_is_subset(list_attnums[listidx], stat->keys) && + stat_covers_attributes(stat, list_attnums[listidx]) && stat_covers_expressions(stat, list_exprs[listidx], NULL)) { /* record simple clauses (single column or expression) */ @@ -2555,37 +2578,3 @@ evaluate_expressions(Relation rel, List *exprs, int numrows, HeapTuple *rows) return result; } - -/* - * add_expressions_to_attributes - * add expressions as attributes with high attnums - * - * Treat the expressions as attributes with attnums above the regular - * attnum range. This will allow us to handle everything in the same - * way, and identify expressions in the dependencies. - * - * XXX This always creates a copy of the bitmap. We might optimize this - * by only creating the copy with (nexprs > 0) but then we'd have to track - * this in order to free it (if we want to). Does not seem worth it. - */ -Bitmapset * -add_expressions_to_attributes(Bitmapset *attrs, int nexprs) -{ - int i; - - /* - * Copy the bitmapset and add fake attnums representing expressions, - * starting above MaxHeapAttributeNumber. - */ - attrs = bms_copy(attrs); - - /* start with (MaxHeapAttributeNumber + 1) */ - for (i = 0; i < nexprs; i++) - { - Assert(EXPRESSION_ATTNUM(i) > MaxHeapAttributeNumber); - - attrs = bms_add_member(attrs, EXPRESSION_ATTNUM(i)); - } - - return attrs; -} diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 9720e49ab4..f0b911bd4a 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -187,6 +187,7 @@ statext_mcv_build(int numrows, HeapTuple *rows, ExprInfo *exprs, double totalrows, int stattarget) { int i, + k, numattrs, ngroups, nitems; @@ -206,10 +207,26 @@ statext_mcv_build(int numrows, HeapTuple *rows, ExprInfo *exprs, * XXX We do this after build_mss, because that expects the bitmapset * to only contain simple attributes (with a matching VacAttrStats) */ - attrs = add_expressions_to_attributes(attrs, exprs->nexprs); - /* now build the array, with the special expression attnums */ - attnums = build_attnums_array(attrs, &numattrs); + /* + * Transform the bms into an array, to make accessing i-th member easier. + */ + attnums = (AttrNumber *) palloc(sizeof(AttrNumber) * (bms_num_members(attrs) + exprs->nexprs)); + + numattrs = 0; + + /* regular attributes */ + k = -1; + while ((k = bms_next_member(attrs, k)) >= 0) + attnums[numattrs++] = k; + + /* treat expressions as attributes with negative attnums */ + for (i = 0; i < exprs->nexprs; i++) + attnums[numattrs++] = -(i+1); + + Assert(numattrs >= 2); + Assert(numattrs == (bms_num_members(attrs) + exprs->nexprs)); + /* sort the rows */ items = build_sorted_items(numrows, &nitems, rows, exprs, @@ -349,7 +366,6 @@ statext_mcv_build(int numrows, HeapTuple *rows, ExprInfo *exprs, pfree(items); pfree(groups); - pfree(attrs); return mcvlist; } @@ -1629,7 +1645,9 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, int idx; /* match the attribute to a dimension of the statistic */ - idx = bms_member_index(keys, var->varattno); + idx = bms_member_index(keys, var->varattno + list_length(exprs)); + + Assert(idx >= 0); /* * Walk through the MCV items and evaluate the current clause. diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c index 55d3fa0e1f..5e796e7123 100644 --- a/src/backend/statistics/mvdistinct.c +++ b/src/backend/statistics/mvdistinct.c @@ -83,9 +83,9 @@ static void generate_combinations(CombinationGenerator *state); * This computes the ndistinct estimate using the same estimator used * in analyze.c and then computes the coefficient. * - * To handle expressions easily, we treat them as special attributes with - * attnums above MaxHeapAttributeNumber, and we assume the expressions are - * placed after all simple attributes. + * To handle expressions easily, we treat them as system attributes with + * negative attnums, and offset everything by number of expressions to + * allow using Bitmapsets. */ MVNDistinct * statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows, @@ -93,10 +93,12 @@ statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows, VacAttrStats **stats) { MVNDistinct *result; + int i; int k; int itemcnt; int numattrs = bms_num_members(attrs); int numcombs = num_combinations(numattrs + exprs->nexprs); + Bitmapset *tmp = NULL; result = palloc(offsetof(MVNDistinct, items) + numcombs * sizeof(MVNDistinctItem)); @@ -104,8 +106,26 @@ statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows, result->type = STATS_NDISTINCT_TYPE_BASIC; result->nitems = numcombs; - /* treat expressions as special attributes with high attnums */ - attrs = add_expressions_to_attributes(attrs, exprs->nexprs); + /* + * Treat expressions as system attributes with negative attnums, + * but offset everything by number of expressions. + */ + for (i = 0; i < exprs->nexprs; i++) + { + AttrNumber attnum = -(i + 1); + tmp = bms_add_member(tmp, attnum + exprs->nexprs); + } + + /* regular attributes */ + k = -1; + while ((k = bms_next_member(attrs, k)) >= 0) + { + AttrNumber attnum = k; + tmp = bms_add_member(tmp, attnum + exprs->nexprs); + } + + /* use the newly built bitmapset */ + attrs = tmp; /* make sure there were no clashes */ Assert(bms_num_members(attrs) == numattrs + exprs->nexprs); @@ -124,29 +144,33 @@ statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows, MVNDistinctItem *item = &result->items[itemcnt]; int j; - item->attrs = NULL; + item->attributes = palloc(sizeof(AttrNumber) * k); + item->nattributes = k; + for (j = 0; j < k; j++) { AttrNumber attnum = InvalidAttrNumber; /* - * The simple attributes are before expressions, so have - * indexes below numattrs. - * */ - if (combination[j] < numattrs) - attnum = stats[combination[j]]->attr->attnum; + * The expressions have negative attnums, so even with the + * offset are before regular attributes. So the first chunk + * of indexes are for expressions. + */ + if (combination[j] >= exprs->nexprs) + attnum + = stats[combination[j] - exprs->nexprs]->attr->attnum; else { /* make sure the expression index is valid */ - Assert((combination[j] - numattrs) >= 0); - Assert((combination[j] - numattrs) < exprs->nexprs); + Assert(combination[j] >= 0); + Assert(combination[j] < exprs->nexprs); - attnum = EXPRESSION_ATTNUM(combination[j] - numattrs); + attnum = -(combination[j] + 1); } Assert(attnum != InvalidAttrNumber); - item->attrs = bms_add_member(item->attrs, attnum); + item->attributes[j] = attnum; } item->ndistinct = @@ -223,7 +247,7 @@ statext_ndistinct_serialize(MVNDistinct *ndistinct) { int nmembers; - nmembers = bms_num_members(ndistinct->items[i].attrs); + nmembers = ndistinct->items[i].nattributes; Assert(nmembers >= 2); len += SizeOfItem(nmembers); @@ -248,22 +272,15 @@ statext_ndistinct_serialize(MVNDistinct *ndistinct) for (i = 0; i < ndistinct->nitems; i++) { MVNDistinctItem item = ndistinct->items[i]; - int nmembers = bms_num_members(item.attrs); - int x; + int nmembers = item.nattributes; memcpy(tmp, &item.ndistinct, sizeof(double)); tmp += sizeof(double); memcpy(tmp, &nmembers, sizeof(int)); tmp += sizeof(int); - x = -1; - while ((x = bms_next_member(item.attrs, x)) >= 0) - { - AttrNumber value = (AttrNumber) x; - - memcpy(tmp, &value, sizeof(AttrNumber)); - tmp += sizeof(AttrNumber); - } + memcpy(tmp, item.attributes, sizeof(AttrNumber) * nmembers); + tmp += nmembers * sizeof(AttrNumber); /* protect against overflows */ Assert(tmp <= ((char *) output + len)); @@ -335,27 +352,21 @@ statext_ndistinct_deserialize(bytea *data) for (i = 0; i < ndistinct->nitems; i++) { MVNDistinctItem *item = &ndistinct->items[i]; - int nelems; - - item->attrs = NULL; /* ndistinct value */ memcpy(&item->ndistinct, tmp, sizeof(double)); tmp += sizeof(double); /* number of attributes */ - memcpy(&nelems, tmp, sizeof(int)); + memcpy(&item->nattributes, tmp, sizeof(int)); tmp += sizeof(int); - Assert((nelems >= 2) && (nelems <= STATS_MAX_DIMENSIONS)); + Assert((item->nattributes >= 2) && (item->nattributes <= STATS_MAX_DIMENSIONS)); - while (nelems-- > 0) - { - AttrNumber attno; + item->attributes + = (AttrNumber *) palloc(item->nattributes * sizeof(AttrNumber)); - memcpy(&attno, tmp, sizeof(AttrNumber)); - tmp += sizeof(AttrNumber); - item->attrs = bms_add_member(item->attrs, attno); - } + memcpy(item->attributes, tmp, sizeof(AttrNumber) * item->nattributes); + tmp += sizeof(AttrNumber) * item->nattributes; /* still within the bytea */ Assert(tmp <= ((char *) data + VARSIZE_ANY(data))); @@ -403,17 +414,16 @@ pg_ndistinct_out(PG_FUNCTION_ARGS) for (i = 0; i < ndist->nitems; i++) { - MVNDistinctItem item = ndist->items[i]; - int x = -1; - bool first = true; + int j; + MVNDistinctItem item = ndist->items[i]; if (i > 0) appendStringInfoString(&str, ", "); - while ((x = bms_next_member(item.attrs, x)) >= 0) + for (j = 0; j < item.nattributes; j++) { - appendStringInfo(&str, "%s%d", first ? "\"" : ", ", x); - first = false; + AttrNumber attnum = item.attributes[j]; + appendStringInfo(&str, "%s%d", (j == 0) ? "\"" : ", ", attnum); } appendStringInfo(&str, "\": %d", (int) item.ndistinct); } @@ -508,9 +518,10 @@ ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows, TupleDesc tdesc = NULL; Oid collid = InvalidOid; - if (combination[i] < nattrs) + /* first nexprs indexes are for expressions, then regular attributes */ + if (combination[i] >= exprs->nexprs) { - VacAttrStats *colstat = stats[combination[i]]; + VacAttrStats *colstat = stats[combination[i] - exprs->nexprs]; typid = colstat->attrtypid; attnum = colstat->attr->attnum; collid = colstat->attrcollid; @@ -518,8 +529,8 @@ ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows, } else { - typid = exprs->types[combination[i] - nattrs]; - collid = exprs->collations[combination[i] - nattrs]; + typid = exprs->types[combination[i]]; + collid = exprs->collations[combination[i]]; } type = lookup_type_cache(typid, TYPECACHE_LT_OPR); @@ -534,10 +545,13 @@ ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows, for (j = 0; j < numrows; j++) { /* - * The first nattrs indexes identify simple attributes, higher - * indexes are expressions. + * The first exprs indexes identify expressions, higher indexes + * are for plain attributes. + * + * XXX This seems a bit strange that we don't offset the (i) + * in any way? */ - if (combination[i] < nattrs) + if (combination[i] >= exprs->nexprs) items[j].values[i] = heap_getattr(rows[j], attnum, @@ -545,7 +559,9 @@ ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows, &items[j].isnull[i]); else { - int idx = (combination[i] - nattrs); + /* we know the first nexprs expressions are expressions, + * and the value is directly the expression index */ + int idx = combination[i]; /* make sure the expression index is valid */ Assert((idx >= 0) && (idx < exprs->nexprs)); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index e52e490a08..ba0e23a616 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -4144,7 +4144,8 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, if (equal(exprinfo->expr, expr)) { - matched = bms_add_member(matched, MaxHeapAttributeNumber + idx); + AttrNumber attnum = -(idx + 1); + matched = bms_add_member(matched, attnum + list_length(matched_info->exprs)); found = true; break; } @@ -4165,10 +4166,10 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, if (!AttrNumberIsForUserDefinedAttr(attnum)) continue; - if (!bms_is_member(attnum, matched_info->keys)) + if (!bms_is_member(attnum + list_length(matched_info->exprs), matched_info->keys)) continue; - matched = bms_add_member(matched, attnum); + matched = bms_add_member(matched, attnum + list_length(matched_info->exprs)); } } } @@ -4176,13 +4177,29 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, /* Find the specific item that exactly matches the combination */ for (i = 0; i < stats->nitems; i++) { + int j; MVNDistinctItem *tmpitem = &stats->items[i]; - if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL) + if (tmpitem->nattributes != bms_num_members(matched)) + continue; + + /* assume it's the right item */ + item = tmpitem; + + for (j = 0; j < tmpitem->nattributes; j++) { - item = tmpitem; - break; + AttrNumber attnum = tmpitem->attributes[j]; + + if (!bms_is_member(attnum, matched)) + { + /* nah, it's not this item */ + item = NULL; + break; + } } + + if (item) + break; } /* make sure we found an item */ diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index b2e59f9bc5..1f09799deb 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -106,7 +106,7 @@ extern void *bsearch_arg(const void *key, const void *base, int (*compar) (const void *, const void *, void *), void *arg); -extern AttrNumber *build_attnums_array(Bitmapset *attrs, int *numattrs); +extern AttrNumber *build_attnums_array(Bitmapset *attrs, int nexprs, int *numattrs); extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows, ExprInfo *exprs, TupleDesc tdesc, @@ -141,13 +141,4 @@ extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root, Selectivity *overlap_basesel, Selectivity *totalsel); -extern Bitmapset *add_expressions_to_attributes(Bitmapset *attrs, int nexprs); - -/* translate 0-based expression index to attnum and back */ -#define EXPRESSION_ATTNUM(index) \ - (MaxHeapAttributeNumber + (index) + 1) - -#define EXPRESSION_INDEX(attnum) \ - ((attnum) - MaxHeapAttributeNumber - 1) - #endif /* EXTENDED_STATS_INTERNAL_H */ diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h index 006d578e0c..326cf26fea 100644 --- a/src/include/statistics/statistics.h +++ b/src/include/statistics/statistics.h @@ -26,7 +26,8 @@ typedef struct MVNDistinctItem { double ndistinct; /* ndistinct value for this combination */ - Bitmapset *attrs; /* attr numbers of items */ + int nattributes; /* number of attributes */ + AttrNumber *attributes; /* attribute numbers */ } MVNDistinctItem; /* A MVNDistinct object, comprising all possible combinations of columns */ -- 2.26.2