From 5285271b1b4cbe193718f1ceacb57fda16f219c2 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Fri, 15 Dec 2023 10:22:10 +0800 Subject: [PATCH v2] Clamp tuple_width to MaxAllocSize This patch clamps tuple-width estimate to MaxAllocSize in case it exceeds the limit or overflows. Anything larger than MaxAllocSize would not align with reality. In passing, this patch also changes the loop_count parameter to be double for compute_bitmap_pages(). This adjustment is made to prevent potential overflow issues when calling this function from cost_bitmap_heap_scan(), as reported by Alexander Lakhin. --- src/backend/access/table/tableam.c | 2 +- src/backend/optimizer/path/costsize.c | 36 ++++++++++++++++++------ src/backend/optimizer/plan/planner.c | 7 +++-- src/backend/optimizer/util/placeholder.c | 6 +++- src/backend/optimizer/util/plancat.c | 10 +++---- src/backend/optimizer/util/relnode.c | 11 ++++++-- src/include/optimizer/cost.h | 2 +- src/include/optimizer/optimizer.h | 1 + src/include/optimizer/plancat.h | 4 +-- 9 files changed, 55 insertions(+), 24 deletions(-) diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index c6bdb7e1c6..334fb01116 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -736,7 +736,7 @@ table_block_relation_estimate_size(Relation rel, int32 *attr_widths, * default plans which are kind of a headache for regression testing, * and (c) different table AMs might use different padding schemes. */ - int32 tuple_width; + double tuple_width; int fillfactor; /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index d6ceafd51c..f9406f4672 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -244,6 +244,24 @@ clamp_cardinality_to_long(Cardinality x) return (x < (double) LONG_MAX) ? (long) x : LONG_MAX; } +/* + * clamp_width_est + * Force a tuple-width estimate to a sane value. + */ +int +clamp_width_est(double tuple_width) +{ + /* + * Clamp tuple-width estimate to MaxAllocSize in case it exceeds the limit + * or overflows. Anything larger than MaxAllocSize would not align with + * reality. + */ + if (tuple_width > MaxAllocSize || isinf(tuple_width)) + tuple_width = MaxAllocSize; + + return (int) tuple_width; +} + /* * cost_seqscan @@ -6101,7 +6119,7 @@ static void set_rel_width(PlannerInfo *root, RelOptInfo *rel) { Oid reloid = planner_rt_fetch(rel->relid, root)->relid; - int32 tuple_width = 0; + double tuple_width = 0; bool have_wholerow_var = false; ListCell *lc; @@ -6213,7 +6231,7 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel) */ if (have_wholerow_var) { - int32 wholerow_width = MAXALIGN(SizeofHeapTupleHeader); + double wholerow_width = MAXALIGN(SizeofHeapTupleHeader); if (reloid != InvalidOid) { @@ -6230,7 +6248,7 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel) wholerow_width += rel->attr_widths[i - rel->min_attr]; } - rel->attr_widths[0 - rel->min_attr] = wholerow_width; + rel->attr_widths[0 - rel->min_attr] = clamp_width_est(wholerow_width); /* * Include the whole-row Var as part of the output tuple. Yes, that @@ -6239,8 +6257,8 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel) tuple_width += wholerow_width; } - Assert(tuple_width >= 0); - rel->reltarget->width = tuple_width; + rel->reltarget->width = clamp_width_est(tuple_width); + Assert(rel->reltarget->width >= 0); } /* @@ -6258,7 +6276,7 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel) PathTarget * set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target) { - int32 tuple_width = 0; + double tuple_width = 0; ListCell *lc; /* Vars are assumed to have cost zero, but other exprs do not */ @@ -6282,8 +6300,8 @@ set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target) } } - Assert(tuple_width >= 0); - target->width = tuple_width; + target->width = clamp_width_est(tuple_width); + Assert(target->width >= 0); return target; } @@ -6398,7 +6416,7 @@ get_parallel_divisor(Path *path) */ double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, - int loop_count, Cost *cost, double *tuple) + double loop_count, Cost *cost, double *tuple) { Cost indexTotalCost; Selectivity indexSelectivity; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a8cea5efe1..f6e9c1328d 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4611,6 +4611,7 @@ create_one_window_path(PlannerInfo *root, * we do need to account for the increase in tlist width. */ ListCell *lc2; + double tuple_width = window_target->width; window_target = copy_pathtarget(window_target); foreach(lc2, wflists->windowFuncs[wc->winref]) @@ -4618,8 +4619,9 @@ create_one_window_path(PlannerInfo *root, WindowFunc *wfunc = lfirst_node(WindowFunc, lc2); add_column_to_pathtarget(window_target, (Expr *) wfunc, 0); - window_target->width += get_typavgwidth(wfunc->wintype, -1); + tuple_width += get_typavgwidth(wfunc->wintype, -1); } + window_target->width = clamp_width_est(tuple_width); } else { @@ -6635,7 +6637,8 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) * set_baserel_size_estimates, just do a quick hack for rows and width. */ rel->rows = rel->tuples; - rel->reltarget->width = get_relation_data_width(tableOid, NULL); + rel->reltarget->width = + clamp_width_est(get_relation_data_width(tableOid, NULL)); root->total_table_pages = rel->pages; diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c index b1723578e6..480130de62 100644 --- a/src/backend/optimizer/util/placeholder.c +++ b/src/backend/optimizer/util/placeholder.c @@ -375,6 +375,7 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, SpecialJoinInfo *sjinfo) { Relids relids = joinrel->relids; + double tuple_width = joinrel->reltarget->width; ListCell *lc; foreach(lc, root->placeholder_list) @@ -419,7 +420,7 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, cost_qual_eval_node(&cost, (Node *) phv->phexpr, root); joinrel->reltarget->cost.startup += cost.startup; joinrel->reltarget->cost.per_tuple += cost.per_tuple; - joinrel->reltarget->width += phinfo->ph_width; + tuple_width += phinfo->ph_width; } } @@ -443,6 +444,9 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, phinfo->ph_lateral); } } + + joinrel->reltarget->width = clamp_width_est(tuple_width); + Assert(joinrel->reltarget->width >= 0); } /* diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 7159c775fb..f73ccaae52 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -1083,7 +1083,7 @@ estimate_rel_size(Relation rel, int32 *attr_widths, * * XXX: Should this logic be more index specific? */ - int32 tuple_width; + double tuple_width; tuple_width = get_rel_data_width(rel, attr_widths); tuple_width += MAXALIGN(SizeofHeapTupleHeader); @@ -1134,10 +1134,10 @@ estimate_rel_size(Relation rel, int32 *attr_widths, * since they might be mostly NULLs, treating them as zero-width is not * necessarily the wrong thing anyway. */ -int32 +double get_rel_data_width(Relation rel, int32 *attr_widths) { - int32 tuple_width = 0; + double tuple_width = 0; int i; for (i = 1; i <= RelationGetNumberOfAttributes(rel); i++) @@ -1176,10 +1176,10 @@ get_rel_data_width(Relation rel, int32 *attr_widths) * External API for get_rel_data_width: same behavior except we have to * open the relcache entry. */ -int32 +double get_relation_data_width(Oid relid, int32 *attr_widths) { - int32 result; + double result; Relation relation; /* As above, assume relation is already locked */ diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 5d83f60eb9..53b7d91593 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -22,6 +22,7 @@ #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/inherit.h" +#include "optimizer/optimizer.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/placeholder.h" @@ -1092,6 +1093,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, bool can_null) { Relids relids = joinrel->relids; + double tuple_width = joinrel->reltarget->width; ListCell *vars; ListCell *lc; @@ -1144,7 +1146,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, phv); /* Bubbling up the precomputed result has cost zero */ - joinrel->reltarget->width += phinfo->ph_width; + tuple_width += phinfo->ph_width; } continue; } @@ -1165,7 +1167,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, list_nth(root->row_identity_vars, var->varattno - 1); /* Update reltarget width estimate from RowIdentityVarInfo */ - joinrel->reltarget->width += ridinfo->rowidwidth; + tuple_width += ridinfo->rowidwidth; } else { @@ -1181,7 +1183,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, continue; /* nope, skip it */ /* Update reltarget width estimate from baserel's attr_widths */ - joinrel->reltarget->width += baserel->attr_widths[ndx]; + tuple_width += baserel->attr_widths[ndx]; } /* @@ -1221,6 +1223,9 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, /* Vars have cost zero, so no need to adjust reltarget->cost */ } + + joinrel->reltarget->width = clamp_width_est(tuple_width); + Assert(joinrel->reltarget->width >= 0); } /* diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 6d50afbf74..da464cdba9 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -210,6 +210,6 @@ extern void set_result_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_foreign_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern PathTarget *set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target); extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, - Path *bitmapqual, int loop_count, Cost *cost, double *tuple); + Path *bitmapqual, double loop_count, Cost *cost, double *tuple); #endif /* COST_H */ diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index 6e8b81c51d..b95a264a21 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -91,6 +91,7 @@ extern PGDLLIMPORT int effective_cache_size; extern double clamp_row_est(double nrows); extern long clamp_cardinality_to_long(Cardinality x); +extern int clamp_width_est(double tuple_width); /* in path/indxpath.c: */ diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index eb1c3ccc4b..e0e0be0eca 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -33,8 +33,8 @@ extern List *infer_arbiter_indexes(PlannerInfo *root); extern void estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac); -extern int32 get_rel_data_width(Relation rel, int32 *attr_widths); -extern int32 get_relation_data_width(Oid relid, int32 *attr_widths); +extern double get_rel_data_width(Relation rel, int32 *attr_widths); +extern double get_relation_data_width(Oid relid, int32 *attr_widths); extern bool relation_excluded_by_constraints(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); -- 2.31.0