From d3cffa3e00b4e2b17ab31ea1fd7096abd3a70481 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Mon, 18 Apr 2022 14:50:46 -0700 Subject: [PATCH v1] Have vacuum use live reltuples for indexes. Author: Peter Geoghegan Discussion: https://postgr.es/m/CAH2-WzmfddQ18Su7Zp6nnBnb3CMRT8Stv8Zg=ic1OmZcRt7Q=A@mail.gmail.com --- src/include/access/genam.h | 16 ++-- src/include/commands/vacuum.h | 4 +- src/backend/access/gin/ginvacuum.c | 2 +- src/backend/access/gist/gistvacuum.c | 3 + src/backend/access/heap/vacuumlazy.c | 117 ++++++++++++-------------- src/backend/access/nbtree/nbtree.c | 3 + src/backend/access/spgist/spgvacuum.c | 3 + src/backend/commands/vacuum.c | 20 +++-- contrib/pgstattuple/pgstatapprox.c | 3 +- 9 files changed, 92 insertions(+), 79 deletions(-) diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 134b20f1e..8ebc46e67 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -37,9 +37,10 @@ typedef struct IndexBuildResult * Struct for input arguments passed to ambulkdelete and amvacuumcleanup * * num_heap_tuples is accurate only when estimated_count is false; - * otherwise it's just an estimate (currently, the estimate is the - * prior value of the relation's pg_class.reltuples field, so it could - * even be -1). It will always just be an estimate during ambulkdelete. + * otherwise it's just an estimate (taken from the heap rel's existing + * pg_class.reltuples value). It's always an estimate during ambulkdelete. + * During amvacuumcleanup this is the value that VACUUM is about to set + * the heap rel's pg_class.reltuples to (even when it's an estimate). */ typedef struct IndexVacuumInfo { @@ -48,7 +49,7 @@ typedef struct IndexVacuumInfo bool report_progress; /* emit progress.h status reports */ bool estimated_count; /* num_heap_tuples is an estimate */ int message_level; /* ereport level for progress messages */ - double num_heap_tuples; /* tuples remaining in heap */ + double num_heap_tuples; /* live tuples remaining in heap */ BufferAccessStrategy strategy; /* access strategy for reads */ } IndexVacuumInfo; @@ -67,6 +68,11 @@ typedef struct IndexVacuumInfo * deleted by the current vacuum operation. pages_deleted and pages_free * refer to free space within the index file. * + * Note: num_index_tuples is used by VACUUM to set pg_class.reltuples, which + * is defined as the number of live tuples in the relation. num_heap_tuples + * should be used to cap num_index_tuples in amvacuumcleanup routines, though + * only when num_heap_tuples is not an estimate. + * * Note: Some index AMs may compute num_index_tuples by reference to * num_heap_tuples, in which case they should copy the estimated_count field * from IndexVacuumInfo. @@ -75,7 +81,7 @@ typedef struct IndexBulkDeleteResult { BlockNumber num_pages; /* pages remaining in index */ bool estimated_count; /* num_index_tuples is an estimate */ - double num_index_tuples; /* tuples remaining */ + double num_index_tuples; /* live tuples remaining */ double tuples_removed; /* # removed during vacuum operation */ BlockNumber pages_newly_deleted; /* # pages marked deleted by us */ BlockNumber pages_deleted; /* # pages marked deleted (could be by us) */ diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index f38e1148f..d26c43b95 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -273,9 +273,9 @@ extern void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel); extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode); extern double vac_estimate_reltuples(Relation relation, - BlockNumber total_pages, + BlockNumber rel_pages, BlockNumber scanned_pages, - double scanned_tuples); + double scanned_tuples, bool *estimate); extern void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index b4fa5f6bf..8ec9ec7c7 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -724,7 +724,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) memset(&idxStat, 0, sizeof(idxStat)); /* - * XXX we always report the heap tuple count as the number of index + * XXX we always report the heap tuple count as the number of live index * entries. This is bogus if the index is partial, but it's real hard to * tell how many distinct heap entries are referenced by a GIN index. */ diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index f190decdf..fd5fdea1c 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -94,6 +94,9 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) * double-counting some index tuples, so disbelieve any total that exceeds * the underlying heap's count ... if we know that accurately. Otherwise * this might just make matters worse. + * + * This is also useful as a way of avoiding counting any dead tuples in + * our final num_index_tuples. */ if (!info->estimated_count) { diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 7f2e88740..0f96199e0 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -165,8 +165,6 @@ typedef struct LVRelState /* rel's initial relfrozenxid and relminmxid */ TransactionId relfrozenxid; MultiXactId relminmxid; - double old_live_tuples; /* previous value of pg_class.reltuples */ - /* VACUUM operation's cutoffs for freezing and pruning */ TransactionId OldestXmin; GlobalVisState *vistest; @@ -202,12 +200,6 @@ typedef struct LVRelState BlockNumber missed_dead_pages; /* # pages with missed dead tuples */ BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ - /* Statistics output by us, for table */ - double new_rel_tuples; /* new estimated total # of tuples */ - double new_live_tuples; /* new estimated total # of live tuples */ - /* Statistics output by index AMs */ - IndexBulkDeleteResult **indstats; - /* Instrumentation counters */ int num_index_scans; /* Counters that follow are only for scanned_pages */ @@ -216,6 +208,9 @@ typedef struct LVRelState int64 live_tuples; /* # live tuples remaining */ int64 recently_dead_tuples; /* # dead, but not yet removable */ int64 missed_dead_tuples; /* # removable, but not removed */ + + /* Per-index instrumentation */ + IndexBulkDeleteResult **indstats; } LVRelState; /* @@ -266,7 +261,8 @@ static void lazy_vacuum_heap_rel(LVRelState *vacrel); static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, int index, Buffer *vmbuffer); static bool lazy_check_wraparound_failsafe(LVRelState *vacrel); -static void lazy_cleanup_all_indexes(LVRelState *vacrel); +static void lazy_cleanup_all_indexes(LVRelState *vacrel, double new_reltuples, + bool estimate); static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, @@ -314,6 +310,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, instrument, aggressive, skipwithvm, + estimate, frozenxid_updated, minmulti_updated; TransactionId OldestXmin, @@ -323,6 +320,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, BlockNumber orig_rel_pages, new_rel_pages, new_rel_allvisible; + double new_reltuples; PGRUsage ru0; TimestampTz starttime = 0; PgStat_Counter startreadtime = 0, @@ -445,6 +443,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, vacrel->do_index_vacuuming = true; vacrel->do_index_cleanup = true; vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED); + vacrel->bstrategy = bstrategy; if (params->index_cleanup == VACOPTVALUE_DISABLED) { /* Force disable index vacuuming up-front */ @@ -462,32 +461,20 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, Assert(params->index_cleanup == VACOPTVALUE_AUTO); } - vacrel->bstrategy = bstrategy; - vacrel->relfrozenxid = rel->rd_rel->relfrozenxid; - vacrel->relminmxid = rel->rd_rel->relminmxid; - vacrel->old_live_tuples = rel->rd_rel->reltuples; - - /* Initialize page counters explicitly (be tidy) */ + /* Initialize counters explicitly (be tidy) */ vacrel->scanned_pages = 0; vacrel->removed_pages = 0; vacrel->lpdead_item_pages = 0; vacrel->missed_dead_pages = 0; vacrel->nonempty_pages = 0; - /* dead_items_alloc allocates vacrel->dead_items later on */ - - /* Allocate/initialize output statistics state */ - vacrel->new_rel_tuples = 0; - vacrel->new_live_tuples = 0; - vacrel->indstats = (IndexBulkDeleteResult **) - palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *)); - - /* Initialize remaining counters (be tidy) */ vacrel->num_index_scans = 0; vacrel->tuples_deleted = 0; vacrel->lpdead_items = 0; vacrel->live_tuples = 0; vacrel->recently_dead_tuples = 0; vacrel->missed_dead_tuples = 0; + vacrel->indstats = (IndexBulkDeleteResult **) + palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *)); /* * Determine the extent of the blocks that we'll scan in lazy_scan_heap, @@ -506,6 +493,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * frozen) during its scan. */ vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel); + vacrel->relfrozenxid = rel->rd_rel->relfrozenxid; + vacrel->relminmxid = rel->rd_rel->relminmxid; vacrel->OldestXmin = OldestXmin; vacrel->vistest = GlobalVisTestFor(rel); /* FreezeLimit controls XID freezing (always <= OldestXmin) */ @@ -533,6 +522,24 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, */ lazy_scan_heap(vacrel); + /* + * Finished all pruning, freezing, and vacuuming. + * + * We still need to do final index cleanup via calls to each index's + * amvacuumcleanup routine. Most individual index AMs need only finalize + * index statistics, which are set in each index's pg_class entry below. + * But some index AMs (such as GIN) do significant amounts of I/O here, + * even when no index vacuuming occurred. Parallel workers help us here. + * + * First we need to get our final reltuples for rel's pg_class entry, + * since amvacuumcleanup routines use this information too. + */ + new_reltuples = vac_estimate_reltuples(vacrel->rel, orig_rel_pages, + vacrel->scanned_pages, + vacrel->live_tuples, &estimate); + if (vacrel->nindexes > 0 && vacrel->do_index_cleanup) + lazy_cleanup_all_indexes(vacrel, new_reltuples, estimate); + /* * Free resources managed by dead_items_alloc. This ends parallel mode in * passing when necessary. @@ -602,12 +609,12 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, /* * Now actually update rel's pg_class entry. * - * In principle new_live_tuples could be -1 indicating that we (still) + * In principle new_reltuples could be -1 indicating that we (still) * don't know the tuple count. In practice that can't happen, since we * scan every page that isn't skipped using the visibility map. */ - vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples, - new_rel_allvisible, vacrel->nindexes > 0, + vac_update_relstats(rel, new_rel_pages, new_reltuples, new_rel_allvisible, + vacrel->nindexes > 0, vacrel->NewRelfrozenXid, vacrel->NewRelminMxid, &frozenxid_updated, &minmulti_updated, false); @@ -621,9 +628,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * soon in cases where the failsafe prevented significant amounts of heap * vacuuming. */ - pgstat_report_vacuum(RelationGetRelid(rel), - rel->rd_rel->relisshared, - Max(vacrel->new_live_tuples, 0), + pgstat_report_vacuum(RelationGetRelid(rel), rel->rd_rel->relisshared, + Max(new_reltuples, 0), vacrel->recently_dead_tuples + vacrel->missed_dead_tuples); pgstat_progress_end_command(); @@ -694,9 +700,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, orig_rel_pages == 0 ? 100.0 : 100.0 * vacrel->scanned_pages / orig_rel_pages); appendStringInfo(&buf, - _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"), + _("tuples: %lld removed, %lld live remain, %lld dead but not yet removable remain\n"), (long long) vacrel->tuples_deleted, - (long long) vacrel->new_rel_tuples, + (long long) Max(new_reltuples, 0), (long long) vacrel->recently_dead_tuples); if (vacrel->missed_dead_tuples > 0) appendStringInfo(&buf, @@ -1243,19 +1249,6 @@ lazy_scan_heap(LVRelState *vacrel) /* report that everything is now scanned */ pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); - /* now we can compute the new value for pg_class.reltuples */ - vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages, - vacrel->scanned_pages, - vacrel->live_tuples); - - /* - * Also compute the total number of surviving heap entries. In the - * (unlikely) scenario that new_live_tuples is -1, take it as zero. - */ - vacrel->new_rel_tuples = - Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples + - vacrel->missed_dead_tuples; - /* * Do index vacuuming (call each index's ambulkdelete routine), then do * related heap vacuuming @@ -1272,10 +1265,6 @@ lazy_scan_heap(LVRelState *vacrel) /* report all blocks vacuumed */ pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno); - - /* Do final index cleanup (call each index's amvacuumcleanup routine) */ - if (vacrel->nindexes > 0 && vacrel->do_index_cleanup) - lazy_cleanup_all_indexes(vacrel); } /* @@ -2300,6 +2289,7 @@ lazy_vacuum(LVRelState *vacrel) static bool lazy_vacuum_all_indexes(LVRelState *vacrel) { + double old_reltuples = vacrel->rel->rd_rel->reltuples; bool allindexes = true; Assert(vacrel->nindexes > 0); @@ -2325,8 +2315,7 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) IndexBulkDeleteResult *istat = vacrel->indstats[idx]; vacrel->indstats[idx] = - lazy_vacuum_one_index(indrel, istat, vacrel->old_live_tuples, - vacrel); + lazy_vacuum_one_index(indrel, istat, old_reltuples, vacrel); if (lazy_check_wraparound_failsafe(vacrel)) { @@ -2339,7 +2328,7 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) else { /* Outsource everything to parallel variant */ - parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, vacrel->old_live_tuples, + parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_reltuples, vacrel->num_index_scans); /* @@ -2649,11 +2638,9 @@ lazy_check_wraparound_failsafe(LVRelState *vacrel) * lazy_cleanup_all_indexes() -- cleanup all indexes of relation. */ static void -lazy_cleanup_all_indexes(LVRelState *vacrel) +lazy_cleanup_all_indexes(LVRelState *vacrel, double new_reltuples, + bool estimate) { - double reltuples = vacrel->new_rel_tuples; - bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages; - Assert(vacrel->do_index_cleanup); Assert(vacrel->nindexes > 0); @@ -2668,17 +2655,17 @@ lazy_cleanup_all_indexes(LVRelState *vacrel) Relation indrel = vacrel->indrels[idx]; IndexBulkDeleteResult *istat = vacrel->indstats[idx]; - vacrel->indstats[idx] = - lazy_cleanup_one_index(indrel, istat, reltuples, - estimated_count, vacrel); + vacrel->indstats[idx] = lazy_cleanup_one_index(indrel, istat, + new_reltuples, + estimate, vacrel); } } else { /* Outsource everything to parallel variant */ - parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples, + parallel_vacuum_cleanup_all_indexes(vacrel->pvs, new_reltuples, vacrel->num_index_scans, - estimated_count); + estimate); } } @@ -3332,7 +3319,12 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, } /* - * Update index statistics in pg_class if the statistics are accurate. + * Update index statistics in pg_class if we have accurate information. + * + * Note: pg_class.reltuples is defined as an estimate of the number of live + * tuples. Index AMs can deal with that provided lazy_cleanup_all_indexes was + * called with a non-estimate for the target table's own reltuples. Otherwise + * reltuples might include some dead index tuples, which shouldn't hurt much. */ static void update_relstats_all_indexes(LVRelState *vacrel) @@ -3351,7 +3343,6 @@ update_relstats_all_indexes(LVRelState *vacrel) if (istat == NULL || istat->estimated_count) continue; - /* Update index statistics */ vac_update_relstats(indrel, istat->num_pages, istat->num_index_tuples, diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 06131f23d..c6ea17b17 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -878,6 +878,9 @@ btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) * double-counting some index tuples, so disbelieve any total that exceeds * the underlying heap's count ... if we know that accurately. Otherwise * this might just make matters worse. + * + * This is also useful as a way of avoiding counting any dead tuples in + * our final num_index_tuples. */ if (!info->estimated_count) { diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c index 004963053..693eeb83c 100644 --- a/src/backend/access/spgist/spgvacuum.c +++ b/src/backend/access/spgist/spgvacuum.c @@ -964,6 +964,9 @@ spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) * double-counting some index tuples, so disbelieve any total that exceeds * the underlying heap's count ... if we know that accurately. Otherwise * this might just make matters worse. + * + * This is also useful as a way of avoiding counting any dead tuples in + * our final num_index_tuples. */ if (!info->estimated_count) { diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index e0fc7e8d7..816e2ec65 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1220,9 +1220,10 @@ vacuum_xid_failsafe_check(TransactionId relfrozenxid, MultiXactId relminmxid) */ double vac_estimate_reltuples(Relation relation, - BlockNumber total_pages, + BlockNumber rel_pages, BlockNumber scanned_pages, - double scanned_tuples) + double scanned_tuples, + bool *estimate) { BlockNumber old_rel_pages = relation->rd_rel->relpages; double old_rel_tuples = relation->rd_rel->reltuples; @@ -1230,9 +1231,14 @@ vac_estimate_reltuples(Relation relation, double unscanned_pages; double total_tuples; + *estimate = true; /* for now */ + /* If we did scan the whole table, just use the count as-is */ - if (scanned_pages >= total_pages) + if (scanned_pages >= rel_pages) + { + *estimate = false; return scanned_tuples; + } /* * If scanned_pages is zero but total_pages isn't, keep the existing value @@ -1256,8 +1262,8 @@ vac_estimate_reltuples(Relation relation, * a few of its pages (less than a quasi-arbitrary threshold of 2%) were * scanned by this VACUUM, assume that reltuples has not changed at all. */ - if (old_rel_pages == total_pages && - scanned_pages < (double) total_pages * 0.02) + if (old_rel_pages == rel_pages && + scanned_pages < (double) rel_pages * 0.02) return old_rel_tuples; /* @@ -1265,7 +1271,7 @@ vac_estimate_reltuples(Relation relation, * scanned_tuples to match total_pages. */ if (old_rel_tuples < 0 || old_rel_pages == 0) - return floor((scanned_tuples / scanned_pages) * total_pages + 0.5); + return floor((scanned_tuples / scanned_pages) * rel_pages + 0.5); /* * Okay, we've covered the corner cases. The normal calculation is to @@ -1274,7 +1280,7 @@ vac_estimate_reltuples(Relation relation, * and finally add on the number of tuples in the scanned pages. */ old_density = old_rel_tuples / old_rel_pages; - unscanned_pages = (double) total_pages - (double) scanned_pages; + unscanned_pages = (double) rel_pages - (double) scanned_pages; total_tuples = old_density * unscanned_pages + scanned_tuples; return floor(total_tuples + 0.5); } diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c index 15ddc3223..1227b5e13 100644 --- a/contrib/pgstattuple/pgstatapprox.c +++ b/contrib/pgstattuple/pgstatapprox.c @@ -67,6 +67,7 @@ statapprox_heap(Relation rel, output_type *stat) BlockNumber scanned, nblocks, blkno; + bool estimate; Buffer vmbuffer = InvalidBuffer; BufferAccessStrategy bstrategy; TransactionId OldestXmin; @@ -193,7 +194,7 @@ statapprox_heap(Relation rel, output_type *stat) * we already accounted for the space in those pages, too. */ stat->tuple_count = vac_estimate_reltuples(rel, nblocks, scanned, - stat->tuple_count); + stat->tuple_count, &estimate); /* It's not clear if we could get -1 here, but be safe. */ stat->tuple_count = Max(stat->tuple_count, 0); -- 2.32.0