From 5ae5dde505ded1f555324382f9db6e7fbd114492 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Wed, 23 Dec 2020 20:42:53 -0800 Subject: [PATCH 7/8] btvacuumstrategy() bottom-up index deletion changes --- src/backend/access/heap/vacuumlazy.c | 69 +++++++++++++++++++++++++--- src/backend/access/nbtree/nbtree.c | 35 ++++++++++++-- src/backend/commands/vacuum.c | 6 +++ 3 files changed, 100 insertions(+), 10 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 93c4488e39..c45c49d561 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -358,11 +358,13 @@ static void lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, Relation *Irel, int nindexes, bool aggressive); static void choose_vacuum_strategy(LVRelStats *vacrelstats, VacuumParams *params, - Relation *Irel, int nindexes); + Relation *Irel, int nindexes, double live_tuples, + int maxdeadpage); static void lazy_vacuum_table_and_indexes(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, Relation *Irel, int nindexes, IndexBulkDeleteResult **stats, - LVParallelState *lps); + LVParallelState *lps, double live_tuples, + int maxdeadpage); static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats); static bool lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelStats *vacrelstats); @@ -781,6 +783,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, BlockNumber empty_pages, vacuumed_pages, next_fsm_block_to_vacuum; + int maxdeadpage = 0; /* controls if we skip heap vacuum scan */ double num_tuples, /* total number of nonremovable tuples */ live_tuples, /* live tuples (reltuples estimate) */ tups_vacuumed, /* tuples cleaned up by vacuum */ @@ -1080,7 +1083,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, /* Vacuum the table and its indexes */ lazy_vacuum_table_and_indexes(onerel, params, vacrelstats, Irel, nindexes, indstats, - lps); + lps, live_tuples, maxdeadpage); /* * Vacuum the Free Space Map to make newly-freed space visible on @@ -1666,6 +1669,9 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, */ if (dead_tuples->num_tuples == prev_dead_count) RecordPageWithFreeSpace(onerel, blkno, freespace); + else + maxdeadpage = Max(maxdeadpage, + dead_tuples->num_tuples - prev_dead_count); } /* report that everything is scanned and vacuumed */ @@ -1707,7 +1713,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, if (dead_tuples->num_tuples > 0) lazy_vacuum_table_and_indexes(onerel, params, vacrelstats, Irel, nindexes, indstats, - lps); + lps, live_tuples, maxdeadpage); /* * Vacuum the remainder of the Free Space Map. We must do this whether or @@ -1780,14 +1786,16 @@ static void lazy_vacuum_table_and_indexes(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, Relation *Irel, int nindexes, IndexBulkDeleteResult **indstats, - LVParallelState *lps) + LVParallelState *lps, double live_tuples, + int maxdeadpage) { /* * Choose the vacuum strategy for this vacuum cycle. * choose_vacuum_strategy will set the decision to * vacrelstats->vacuum_heap. */ - choose_vacuum_strategy(vacrelstats, params, Irel, nindexes); + choose_vacuum_strategy(vacrelstats, params, Irel, nindexes, live_tuples, + maxdeadpage); /* Work on all the indexes, then the heap */ lazy_vacuum_all_indexes(onerel, Irel, indstats, vacrelstats, lps, @@ -1825,7 +1833,8 @@ lazy_vacuum_table_and_indexes(Relation onerel, VacuumParams *params, */ static void choose_vacuum_strategy(LVRelStats *vacrelstats, VacuumParams *params, - Relation *Irel, int nindexes) + Relation *Irel, int nindexes, double live_tuples, + int maxdeadpage) { bool vacuum_heap = true; @@ -1865,6 +1874,52 @@ choose_vacuum_strategy(LVRelStats *vacrelstats, VacuumParams *params, break; } } + + /* + * XXX: This 130 test is for the maximum number of LP_DEAD items on + * any one heap page encountered during heap scan by caller. The + * general idea here is to preserve the original pristine state of the + * table when it is subject to constant non-HOT updates when heap fill + * factor is reduced from its default. + * + * If we do this right (and with bottom-up index deletion), the + * overall effect for non-HOT-update heavy workloads is that both + * table and indexes (or at least a subset of indexes on the table + * that are never logically modified by the updates) never grow even + * by one block. We can actually make those things perfectly stable + * over time in the absence of queries that hold open MVCC snapshots + * for a long time. Stability is perhaps the most important thing + * here (not performance per se). + * + * The exact number used here (130) is based on the assumption that + * heap fillfactor is set to 90 in this table -- we can fit roughly + * 200 "extra" LP_DEAD items on heap pages before they start to + * "overflow" with that setting (e.g. before a pgbench_accounts table + * that is subject to constant non-HOT updates needs to allocate new + * pages just for new versions). We're trying to avoid having VACUUM + * call lazy_vacuum_heap() in most cases, but we don't want to be too + * aggressive: it would be risky to make the value we test for much + * higher/closer to ~200, since it might be too late by the time we + * actually call lazy_vacuum_heap(). (Unsure of this, but that's the + * idea, at least.) + * + * Since we're mostly worried about stability over time here, we have + * to be worried about "small" effects. If there are just a few heap + * page overflows in each VACUUM cycle, that still means that heap + * page overflows are _possible_. It is perhaps only a matter of time + * until the heap becomes almost as fragmented as it would with a heap + * fill factor of 100 -- so "small" effects may be really important. + * (Just guessing here, but I can say for sure that the bottom-up + * deletion patch works that way, so it is an "educated guess".) + */ + if (!vacuum_heap) + { + if (maxdeadpage > 130 || + /* Also check if maintenance_work_mem space is running out */ + vacrelstats->dead_tuples->num_tuples > + vacrelstats->dead_tuples->max_tuples / 2) + vacuum_heap = true; + } } vacrelstats->vacuum_heap = vacuum_heap; diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 420457c1a2..ee071cb463 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -878,12 +878,35 @@ _bt_vacuum_needs_cleanup(IndexVacuumInfo *info) } /* - * Choose the vacuum strategy. Currently always do ambulkdelete. + * Choose the vacuum strategy */ IndexVacuumStrategy btvacuumstrategy(IndexVacuumInfo *info) { - return INDEX_VACUUM_BULKDELETE; + Relation rel = info->index; + + /* + * This strcmp() is a quick and dirty prototype of logic that decides + * whether or not the index needs to get a bulk deletion during this + * VACUUM. A real version of this logic could work by remembering the + * size of the index during the last VACUUM. It would only return + * INDEX_VACUUM_BULKDELETE to choose_vacuum_strategy()/vacuumlazy.c iff it + * found that the index is now larger than it was last time around, even + * by one single block. (It could get a lot more sophisticated than that, + * for example by trying to understand UPDATEs vs DELETEs, but a very + * simple approach is probably almost as useful to users.) + * + * Further details on the strcmp() and my benchmarking: + * + * The index named abalance_ruin is the only index that receives logical + * changes in my pgbench benchmarks. It is one index among several on + * pgbench_accounts. It covers the abalance column, which makes almost + * 100% of all UPDATEs non-HOT UPDATEs. + */ + if (strcmp(RelationGetRelationName(rel), "abalance_ruin") == 0) + return INDEX_VACUUM_BULKDELETE; + + return INDEX_VACUUM_NONE; } /* @@ -903,8 +926,14 @@ btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, /* * Skip deleting index entries if the corresponding heap tuples will * not be deleted. + * + * XXX: Maybe we need to remember the decision made in btvacuumstrategy() + * in an AM-generic way, or using some standard idiom that is owned by the + * index AM? The strcmp() here repeats work done in btvacuumstrategy(), + * which is not ideal. */ - if (info->bulkdelete_skippable) + if (info->bulkdelete_skippable && + strcmp(RelationGetRelationName(rel), "abalance_ruin") != 0) return NULL; /* allocate stats if first time through, else re-use existing struct */ diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 6a182ba9cd..223b7cb820 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1875,6 +1875,11 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params) */ if (params->index_cleanup == VACOPT_TERNARY_DEFAULT) { + /* + * XXX had to comment this out to get choose_vacuum_strategy() to do + * the right thing + */ +#if 0 if (onerel->rd_options != NULL) { if (((StdRdOptions *) onerel->rd_options)->vacuum_index_cleanup) @@ -1882,6 +1887,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params) else params->index_cleanup = VACOPT_TERNARY_DISABLED; } +#endif } /* Set truncate option based on reloptions if not yet */ -- 2.27.0