From 25211c6b6c4eebdcdea92a956b8cfbe928fa94f8 Mon Sep 17 00:00:00 2001 From: Anthonin Bonnefoy Date: Wed, 24 Jul 2024 10:15:35 +0200 Subject: Output pages and tuples stats in ANALYZE VERBOSE Pages and tuples stats were only displayed as a log within acquire_sample function and ANALYZE VERBOSE couldn't display those as they were not available, only the total live and dead tuples were exported. To make the behaviour more consistent with VACUUM VERBOSE and between autoanalyze logs and ANALYZE VERBOSE: - relevant sampling stats are in a AcquireSampleStats - log is removed from acquire sample functions. - do_analyze_rel now outputs scanned and tuples statistics when relevant. sampling from fdw doesn't provide those statistics so they are not displayed in those cases. The sampling functions of both file_fdw and postgres_fdw were updated to reflect those changes. --- contrib/file_fdw/file_fdw.c | 36 +++------ contrib/postgres_fdw/postgres_fdw.c | 29 ++----- src/backend/commands/analyze.c | 113 ++++++++++++++-------------- src/include/foreign/fdwapi.h | 18 ++++- 4 files changed, 89 insertions(+), 107 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 249d82d3a0..2a4b0d8a3b 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -160,9 +160,8 @@ static void estimate_size(PlannerInfo *root, RelOptInfo *baserel, static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel, FileFdwPlanState *fdw_private, Cost *startup_cost, Cost *total_cost); -static int file_acquire_sample_rows(Relation onerel, int elevel, - HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows); +static int file_acquire_sample_rows(Relation onerel, HeapTuple *rows, + int targrows, AcquireSampleStats * sstats); /* @@ -1112,7 +1111,7 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel, * which must have at least targrows entries. * The actual number of rows selected is returned as the function result. * We also count the total number of rows in the file and return it into - * *totalrows. Note that *totaldeadrows is always set to 0. + * sstats->total_live_tuples. * * Note that the returned list of rows is not always in order by physical * position in the file. Therefore, correlation estimates derived later @@ -1120,11 +1119,9 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel, * currently (the planner only pays attention to correlation for indexscans). */ static int -file_acquire_sample_rows(Relation onerel, int elevel, - HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows) +file_acquire_sample_rows(Relation onerel, HeapTuple *rows, + int targrows, AcquireSampleStats * sstats) { - int numrows = 0; double rowstoskip = -1; /* -1 means not set yet */ ReservoirStateData rstate; TupleDesc tupDesc; @@ -1141,6 +1138,8 @@ file_acquire_sample_rows(Relation onerel, int elevel, Assert(onerel); Assert(targrows > 0); + Assert(sstats->sampled_tuples == 0); + Assert(sstats->total_live_tuples == 0); tupDesc = RelationGetDescr(onerel); values = (Datum *) palloc(tupDesc->natts * sizeof(Datum)); @@ -1172,8 +1171,6 @@ file_acquire_sample_rows(Relation onerel, int elevel, errcallback.previous = error_context_stack; error_context_stack = &errcallback; - *totalrows = 0; - *totaldeadrows = 0; for (;;) { /* Check for user-requested abort or sleep */ @@ -1196,9 +1193,9 @@ file_acquire_sample_rows(Relation onerel, int elevel, * reach the end of the relation. This algorithm is from Jeff Vitter's * paper (see more info in commands/analyze.c). */ - if (numrows < targrows) + if (sstats->sampled_tuples < targrows) { - rows[numrows++] = heap_form_tuple(tupDesc, values, nulls); + rows[sstats->sampled_tuples++] = heap_form_tuple(tupDesc, values, nulls); } else { @@ -1208,7 +1205,7 @@ file_acquire_sample_rows(Relation onerel, int elevel, * not-yet-incremented value of totalrows as t. */ if (rowstoskip < 0) - rowstoskip = reservoir_get_next_S(&rstate, *totalrows, targrows); + rowstoskip = reservoir_get_next_S(&rstate, sstats->total_live_tuples, targrows); if (rowstoskip <= 0) { @@ -1226,7 +1223,7 @@ file_acquire_sample_rows(Relation onerel, int elevel, rowstoskip -= 1; } - *totalrows += 1; + sstats->total_live_tuples += 1; } /* Remove error callback. */ @@ -1240,14 +1237,5 @@ file_acquire_sample_rows(Relation onerel, int elevel, pfree(values); pfree(nulls); - /* - * Emit some interesting relation info - */ - ereport(elevel, - (errmsg("\"%s\": file contains %.0f rows; " - "%d rows in sample", - RelationGetRelationName(onerel), - *totalrows, numrows))); - - return numrows; + return sstats->sampled_tuples; } diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index fc65d81e21..eddfef21e3 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -498,10 +498,8 @@ static void process_query_params(ExprContext *econtext, FmgrInfo *param_flinfo, List *param_exprs, const char **param_values); -static int postgresAcquireSampleRowsFunc(Relation relation, int elevel, - HeapTuple *rows, int targrows, - double *totalrows, - double *totaldeadrows); +static int postgresAcquireSampleRowsFunc(Relation relation, HeapTuple *rows, + int targrows, AcquireSampleStats * sstats); static void analyze_row_processor(PGresult *res, int row, PgFdwAnalyzeState *astate); static void produce_tuple_asynchronously(AsyncRequest *areq, bool fetch); @@ -5056,7 +5054,7 @@ postgresGetAnalyzeInfoForForeignTable(Relation relation, bool *can_tablesample) * which must have at least targrows entries. * The actual number of rows selected is returned as the function result. * We also count the total number of rows in the table and return it into - * *totalrows. Note that *totaldeadrows is always set to 0. + * sstats->total_live_tuples. * * Note that the returned list of rows is not always in order by physical * position in the table. Therefore, correlation estimates derived later @@ -5064,10 +5062,8 @@ postgresGetAnalyzeInfoForForeignTable(Relation relation, bool *can_tablesample) * currently (the planner only pays attention to correlation for indexscans). */ static int -postgresAcquireSampleRowsFunc(Relation relation, int elevel, - HeapTuple *rows, int targrows, - double *totalrows, - double *totaldeadrows) +postgresAcquireSampleRowsFunc(Relation relation, HeapTuple *rows, + int targrows, AcquireSampleStats * sstats) { PgFdwAnalyzeState astate; ForeignTable *table; @@ -5345,26 +5341,17 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel, ReleaseConnection(conn); - /* We assume that we have no dead tuple. */ - *totaldeadrows = 0.0; - /* * Without sampling, we've retrieved all living tuples from foreign * server, so report that as totalrows. Otherwise use the reltuples * estimate we got from the remote side. */ if (method == ANALYZE_SAMPLE_OFF) - *totalrows = astate.samplerows; + sstats->total_live_tuples = astate.samplerows; else - *totalrows = reltuples; + sstats->total_live_tuples = reltuples; - /* - * Emit some interesting relation info - */ - ereport(elevel, - (errmsg("\"%s\": table contains %.0f rows, %d rows in sample", - RelationGetRelationName(relation), - *totalrows, astate.numrows))); + sstats->sampled_tuples = astate.numrows; return astate.numrows; } diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index d492772aac..7469226313 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -87,13 +87,12 @@ static void compute_index_stats(Relation onerel, double totalrows, MemoryContext col_context); static VacAttrStats *examine_attribute(Relation onerel, int attnum, Node *index_expr); -static int acquire_sample_rows(Relation onerel, int elevel, - HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows); +static int acquire_sample_rows(Relation onerel, HeapTuple *rows, + int targrows, AcquireSampleStats * sstats); static int compare_rows(const void *a, const void *b, void *arg); static int acquire_inherited_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows); + AcquireSampleStats * sstats); static void update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats); static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); @@ -296,8 +295,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, int targrows, numrows, minrows; - double totalrows, - totaldeadrows; + AcquireSampleStats sstats; HeapTuple *rows; PGRUsage ru0; TimestampTz starttime = 0; @@ -524,14 +522,16 @@ do_analyze_rel(Relation onerel, VacuumParams *params, pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE, inh ? PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS_INH : PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS); + + memset(&sstats, 0, sizeof(AcquireSampleStats)); + if (inh) numrows = acquire_inherited_sample_rows(onerel, elevel, rows, targrows, - &totalrows, &totaldeadrows); + &sstats); else - numrows = (*acquirefunc) (onerel, elevel, - rows, targrows, - &totalrows, &totaldeadrows); + numrows = (*acquirefunc) (onerel, rows, + targrows, &sstats); /* * Compute the statistics. Temporary results during the calculations for @@ -562,7 +562,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, stats->compute_stats(stats, std_fetch_func, numrows, - totalrows); + sstats.total_live_tuples); /* * If the appropriate flavor of the n_distinct option is @@ -582,7 +582,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, } if (nindexes > 0) - compute_index_stats(onerel, totalrows, + compute_index_stats(onerel, sstats.total_live_tuples, indexdata, nindexes, rows, numrows, col_context); @@ -607,7 +607,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, } /* Build extended statistics (if there are any). */ - BuildRelationExtStatistics(onerel, inh, totalrows, numrows, rows, + BuildRelationExtStatistics(onerel, inh, sstats.total_live_tuples, numrows, rows, attr_cnt, vacattrstats); } @@ -641,7 +641,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, CommandCounterIncrement(); vac_update_relstats(onerel, relpages, - totalrows, + sstats.total_live_tuples, relallvisible, hasindex, InvalidTransactionId, @@ -655,7 +655,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, AnlIndexData *thisdata = &indexdata[ind]; double totalindexrows; - totalindexrows = ceil(thisdata->tupleFract * totalrows); + totalindexrows = ceil(thisdata->tupleFract * sstats.total_live_tuples); vac_update_relstats(Irel[ind], RelationGetNumberOfBlocks(Irel[ind]), totalindexrows, @@ -674,7 +674,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, * in their pg_class entries except for reltuples and relhasindex. */ CommandCounterIncrement(); - vac_update_relstats(onerel, -1, totalrows, + vac_update_relstats(onerel, -1, sstats.total_live_tuples, 0, hasindex, InvalidTransactionId, InvalidMultiXactId, NULL, NULL, @@ -691,8 +691,8 @@ do_analyze_rel(Relation onerel, VacuumParams *params, * columns; otherwise, there is still work for auto-analyze to do. */ if (!inh) - pgstat_report_analyze(onerel, totalrows, totaldeadrows, - (va_cols == NIL)); + pgstat_report_analyze(onerel, sstats.total_live_tuples, + sstats.total_dead_tuples, (va_cols == NIL)); else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) pgstat_report_analyze(onerel, 0, 0, (va_cols == NIL)); @@ -810,6 +810,26 @@ do_analyze_rel(Relation onerel, VacuumParams *params, get_database_name(MyDatabaseId), get_namespace_name(RelationGetNamespace(onerel)), RelationGetRelationName(onerel)); + + /* + * Sampling from file_fdw and postgres_fdw won't report pages + * stats nor live_tuples and dead_tuples. Just display tuples + * sampled and total tuples estimate in this case. + */ + if (sstats.scanned_pages == 0) + { + appendStringInfo(&buf, _("tuples: %d tuples in samples, %.0f estimated total tuples\n"), + sstats.sampled_tuples, sstats.total_live_tuples); + } + else + { + appendStringInfo(&buf, _("pages: %u of %u scanned\n"), + sstats.scanned_pages, sstats.rel_pages); + appendStringInfo(&buf, _("tuples: %.0f live tuples, %.0f are dead; %d tuples in samples, %.0f estimated total tuples\n"), + sstats.live_tuples, sstats.dead_tuples, + sstats.sampled_tuples, sstats.total_live_tuples); + } + if (track_io_timing) { double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000; @@ -1184,17 +1204,13 @@ block_sampling_read_stream_next(ReadStream *stream, * density near the start of the table. */ static int -acquire_sample_rows(Relation onerel, int elevel, - HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows) +acquire_sample_rows(Relation onerel, HeapTuple *rows, + int targrows, AcquireSampleStats * sstats) { int numrows = 0; /* # rows now in reservoir */ double samplerows = 0; /* total # rows collected */ - double liverows = 0; /* # live rows seen */ - double deadrows = 0; /* # dead rows seen */ double rowstoskip = -1; /* -1 means not set yet */ uint32 randseed; /* Seed for block sampler(s) */ - BlockNumber totalblocks; TransactionId OldestXmin; BlockSamplerData bs; ReservoirStateData rstate; @@ -1206,14 +1222,14 @@ acquire_sample_rows(Relation onerel, int elevel, Assert(targrows > 0); - totalblocks = RelationGetNumberOfBlocks(onerel); + sstats->rel_pages = RelationGetNumberOfBlocks(onerel); /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */ OldestXmin = GetOldestNonRemovableTransactionId(onerel); /* Prepare for sampling block numbers */ randseed = pg_prng_uint32(&pg_global_prng_state); - nblocks = BlockSampler_Init(&bs, totalblocks, targrows, randseed); + nblocks = BlockSampler_Init(&bs, sstats->rel_pages, targrows, randseed); /* Report sampling block numbers */ pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_TOTAL, @@ -1238,7 +1254,7 @@ acquire_sample_rows(Relation onerel, int elevel, { vacuum_delay_point(); - while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot)) + while (table_scan_analyze_next_tuple(scan, OldestXmin, &sstats->live_tuples, &sstats->dead_tuples, slot)) { /* * The first targrows sample rows are simply copied into the @@ -1313,26 +1329,11 @@ acquire_sample_rows(Relation onerel, int elevel, */ if (bs.m > 0) { - *totalrows = floor((liverows / bs.m) * totalblocks + 0.5); - *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5); + sstats->total_live_tuples = floor((sstats->live_tuples / bs.m) * sstats->rel_pages + 0.5); + sstats->total_dead_tuples = floor((sstats->dead_tuples / bs.m) * sstats->rel_pages + 0.5); } - else - { - *totalrows = 0.0; - *totaldeadrows = 0.0; - } - - /* - * Emit some interesting relation info - */ - ereport(elevel, - (errmsg("\"%s\": scanned %d of %u pages, " - "containing %.0f live rows and %.0f dead rows; " - "%d rows in sample, %.0f estimated total rows", - RelationGetRelationName(onerel), - bs.m, totalblocks, - liverows, deadrows, - numrows, *totalrows))); + sstats->scanned_pages = bs.m; + sstats->sampled_tuples = numrows; return numrows; } @@ -1373,7 +1374,7 @@ compare_rows(const void *a, const void *b, void *arg) static int acquire_inherited_sample_rows(Relation onerel, int elevel, HeapTuple *rows, int targrows, - double *totalrows, double *totaldeadrows) + AcquireSampleStats * sstats) { List *tableOIDs; Relation *rels; @@ -1386,10 +1387,6 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, ListCell *lc; bool has_child; - /* Initialize output parameters to zero now, in case we exit early */ - *totalrows = 0; - *totaldeadrows = 0; - /* * Find all members of inheritance set. We only need AccessShareLock on * the children. @@ -1559,13 +1556,13 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, if (childtargrows > 0) { int childrows; - double trows, - tdrows; + AcquireSampleStats childsstats; + + memset(&childsstats, 0, sizeof(AcquireSampleStats)); /* Fetch a random sample of the child's rows */ - childrows = (*acquirefunc) (childrel, elevel, - rows + numrows, childtargrows, - &trows, &tdrows); + childrows = (*acquirefunc) (childrel, rows + numrows, + childtargrows, &childsstats); /* We may need to convert from child's rowtype to parent's */ if (childrows > 0 && @@ -1594,8 +1591,8 @@ acquire_inherited_sample_rows(Relation onerel, int elevel, /* And add to counts */ numrows += childrows; - *totalrows += trows; - *totaldeadrows += tdrows; + sstats->total_live_tuples += childsstats.total_live_tuples; + sstats->total_dead_tuples += childsstats.total_dead_tuples; } } diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h index fcde3876b2..b27571f92a 100644 --- a/src/include/foreign/fdwapi.h +++ b/src/include/foreign/fdwapi.h @@ -19,6 +19,18 @@ /* To avoid including explain.h here, reference ExplainState thus: */ struct ExplainState; +typedef struct AcquireSampleStats +{ + BlockNumber rel_pages; /* total number of pages */ + BlockNumber scanned_pages; /* # pages examined */ + double live_tuples; /* # live tuples found in scanned pages */ + double dead_tuples; /* # dead tuples found in scanned pages */ + + int sampled_tuples; /* # sampled tuples */ + double total_live_tuples; /* # estimated live tuples */ + double total_dead_tuples; /* # estimated dead tuples */ +} AcquireSampleStats; + /* * Callback function signatures --- see fdwhandler.sgml for more info. @@ -148,10 +160,8 @@ typedef void (*ExplainForeignModify_function) (ModifyTableState *mtstate, typedef void (*ExplainDirectModify_function) (ForeignScanState *node, struct ExplainState *es); -typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel, - HeapTuple *rows, int targrows, - double *totalrows, - double *totaldeadrows); +typedef int (*AcquireSampleRowsFunc) (Relation relation, HeapTuple *rows, + int targrows, AcquireSampleStats * sstats); typedef bool (*AnalyzeForeignTable_function) (Relation relation, AcquireSampleRowsFunc *func, -- 2.39.3 (Apple Git-146)