diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 3a8fc7d..e2fd6da 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1822,6 +1822,31 @@ include_dir 'conf.d' + + Index Vacuum + + + vacuum_cleanup_index_scale_factor (floating point) + + vacuum_cleanup_index_scale_factor configuration parameter + + + + + When no tuples were deleted from the heap, B-tree indexes might still + be scanned during VACUUM cleanup stage by two + reasons. The first reason is that B-tree index contains deleted pages + which can be recycled during cleanup. The second reason is that B-tree + index statistics is stalled. The criterion of stalled index statistics + is number of inserted tuples since previous statistics collection + is greater than vacuum_cleanup_index_scale_factor + fraction of total number of heap tuples. + + + + + + Background Writer diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index 1fd21e1..ad30de2 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -337,6 +337,21 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] + B-tree indexes additionally accept this parameter: + + + + + vacuum_cleanup_index_scale_factor + + + Per-table value for . + + + + + + GiST indexes additionally accept this parameter: diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 46276ce..fdd78cd 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -27,6 +27,7 @@ #include "catalog/pg_type.h" #include "commands/defrem.h" #include "commands/tablespace.h" +#include "commands/vacuum.h" #include "commands/view.h" #include "nodes/makefuncs.h" #include "postmaster/postmaster.h" @@ -400,6 +401,15 @@ static relopt_real realRelOpts[] = }, 0, -1.0, DBL_MAX }, + { + { + "vacuum_cleanup_index_scale_factor", + "Number of tuples inserts prior to index cleanup as a fraction of relpages.", + RELOPT_KIND_BTREE, + ShareUpdateExclusiveLock + }, + -1, 0.0, 100.0 + }, /* list terminator */ {{NULL}} }; @@ -1362,7 +1372,9 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind) {"user_catalog_table", RELOPT_TYPE_BOOL, offsetof(StdRdOptions, user_catalog_table)}, {"parallel_workers", RELOPT_TYPE_INT, - offsetof(StdRdOptions, parallel_workers)} + offsetof(StdRdOptions, parallel_workers)}, + {"vacuum_cleanup_index_scale_factor", RELOPT_TYPE_REAL, + offsetof(StdRdOptions, vacuum_cleanup_index_scale_factor)} }; options = parseRelOptions(reloptions, validate, kind, &numoptions); diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 92afe2d..6008232 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -760,9 +760,21 @@ _bt_page_recyclable(Page page) * interested in it. */ opaque = (BTPageOpaque) PageGetSpecialPointer(page); - if (P_ISDELETED(opaque) && - TransactionIdPrecedes(opaque->btpo.xact, RecentGlobalXmin)) - return true; + if (P_ISDELETED(opaque)) + { + uint32 xminEpoch, pageEpoch; + + pageEpoch = PageGetPruneXid(page); + xminEpoch = GetEpochForXid(RecentGlobalXmin); + + /* The page epoch belongs to an older epoch */ + if (pageEpoch < xminEpoch) + return true; + + /* Belong to the same epoch but the deleted xid is too old */ + if (TransactionIdPrecedes(opaque->btpo.xact, RecentGlobalXmin)) + return true; + } return false; } @@ -1770,16 +1782,19 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) * transactions are gone. Storing GetTopTransactionId() would work, but * we're in VACUUM and would not otherwise have an XID. Having already * updated links to the target, ReadNewTransactionId() suffices as an - * upper bound. Any scan having retained a now-stale link is advertising - * in its PGXACT an xmin less than or equal to the value we read here. It - * will continue to do so, holding back RecentGlobalXmin, for the duration - * of that scan. + * upper bound. We also store an epoch number for the XID into page + * header. It will be used for judgement whether the page is recyclable + * or not beyond XID wraparound. Any scan having retained a now-stale + * link is advertising in its PGXACT an xmin less than or equal to the + * value we read here. It will continue to do so, holding back + * RecentGlobalXmin, for the duration of that scan. */ page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); opaque->btpo_flags &= ~BTP_HALF_DEAD; opaque->btpo_flags |= BTP_DELETED; opaque->btpo.xact = ReadNewTransactionId(); + PageSetPruneEpoch(page, GetEpochForXid(opaque->btpo.xact)); /* And update the metapage, if needed */ if (BufferIsValid(metabuf)) diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 8158508..1e0795f 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -806,6 +806,34 @@ btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, } /* + * Check if index needs cleanup assuming that btbulkdelete() wasn't called. + */ +static bool +_bt_vacuum_needs_cleanup(IndexVacuumInfo *info) +{ + StdRdOptions *relopts; + float4 cleanup_scale_factor; + + relopts = (StdRdOptions *) info->index->rd_options; + cleanup_scale_factor = (relopts && + relopts->vacuum_cleanup_index_scale_factor >= 0) + ? relopts->vacuum_cleanup_index_scale_factor + : vacuum_cleanup_index_scale_factor; + + /* + * If table receives large enough amount of insertions and no cleanup + * was performed, then index might appear to have stalled statistics. + * In order to evade that, we perform cleanup when table receives + * vacuum_cleanup_index_scale_factor fractions of insertions. + */ + if (cleanup_scale_factor < 0 || info->prev_num_heap_tuples <= 0 || + info->num_heap_tuples > info->prev_num_heap_tuples * (1.0 + cleanup_scale_factor)) + return true; + + return false; +} + +/* * Post-VACUUM cleanup. * * Result: a palloc'd struct containing statistical info for VACUUM displays. @@ -819,15 +847,20 @@ btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) /* * If btbulkdelete was called, we need not do anything, just return the - * stats from the latest btbulkdelete call. If it wasn't called, we must - * still do a pass over the index, to recycle any newly-recyclable pages - * and to obtain index statistics. + * stats from the latest btbulkdelete call. If it wasn't called, we might + * still need to do a pass over the index, to recycle any newly-recyclable + * pages and to obtain index statistics. _bt_vacuum_needs_cleanup checks + * is there are newly-recyclable or stalled index statistics. * * Since we aren't going to actually delete any leaf items, there's no * need to go through all the vacuum-cycle-ID pushups. */ if (stats == NULL) { + /* Check if we need a cleanup */ + if (!_bt_vacuum_needs_cleanup(info)) + return NULL; + stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); btvacuumscan(info, stats, NULL, NULL, 0); } diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 47a6c4d..a83c278 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -8344,6 +8344,34 @@ GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch) } /* + * GetEpochForXid - get the epoch associated with the xid + */ +uint32 +GetEpochForXid(TransactionId xid) +{ + uint32 ckptXidEpoch; + TransactionId ckptXid; + + /* Must read checkpoint infofirst, else have race condition */ + SpinLockAcquire(&XLogCtl->info_lck); + ckptXidEpoch = XLogCtl->ckptXidEpoch; + ckptXid = XLogCtl->ckptXid; + SpinLockRelease(&XLogCtl->info_lck); + + /* + * Xid is logically lather than ckptXid. If it's numerically less, it + * must have wrapped. If it's numerically more but logically less, + * it must belong to previous epoch. + */ + if (xid > ckptXid && TransactionIdPrecedes(xid, ckptXid)) + ckptXidEpoch--; + else if (xid < ckptXid && TransactionIdFollows(xid, ckptXid)) + ckptXidEpoch++; + + return ckptXidEpoch; +} + +/* * This must be called ONCE during postmaster or standalone-backend shutdown */ void diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index ef93fb4..431bf34 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -367,6 +367,7 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, Oid save_userid; int save_sec_context; int save_nestlevel; + float4 prev_reltuples; if (inh) ereport(elevel, @@ -628,6 +629,9 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, vacattrstats); } + /* Before update the stats in pg_class, remember the old values for later use */ + prev_reltuples = onerel->rd_rel->reltuples; + /* * Update pages/tuples stats in pg_class ... but not if we're doing * inherited stats. @@ -696,6 +700,7 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, ivinfo.estimated_count = true; ivinfo.message_level = elevel; ivinfo.num_heap_tuples = onerel->rd_rel->reltuples; + ivinfo.prev_num_heap_tuples = prev_reltuples; ivinfo.strategy = vac_strategy; stats = index_vacuum_cleanup(&ivinfo, NULL); diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 9ac84e8..0ae16f3 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -45,6 +45,7 @@ #include "access/visibilitymap.h" #include "access/xlog.h" #include "catalog/catalog.h" +#include "catalog/pg_am.h" #include "catalog/storage.h" #include "commands/dbcommands.h" #include "commands/progress.h" @@ -131,6 +132,8 @@ typedef struct LVRelStats bool lock_waiter_detected; } LVRelStats; +/* GUC parameter */ +double vacuum_cleanup_index_scale_factor; /* A few variables that don't seem worth passing around as parameters */ static int elevel = -1; @@ -1340,9 +1343,15 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_INDEX_CLEANUP); - /* Do post-vacuum cleanup and statistics update for each index */ for (i = 0; i < nindexes; i++) + { + /* + * Do post-vacuum cleanup and statistics update for each index. + * For B-tree index, we can skip them if we didn't scan the pages + * as much as the threshold. + */ lazy_cleanup_index(Irel[i], indstats[i], vacrelstats); + } /* If no indexes, make log report that lazy_vacuum_heap would've made */ if (vacuumed_pages) @@ -1657,6 +1666,7 @@ lazy_cleanup_index(Relation indrel, ivinfo.estimated_count = (vacrelstats->tupcount_pages < vacrelstats->rel_pages); ivinfo.message_level = elevel; ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples; + ivinfo.prev_num_heap_tuples = vacrelstats->old_rel_tuples; ivinfo.strategy = vac_strategy; stats = index_vacuum_cleanup(&ivinfo, stats); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 4116c2d..408e9a9 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -3084,6 +3084,16 @@ static struct config_real ConfigureNamesReal[] = NULL, NULL, NULL }, + { + {"vacuum_cleanup_index_scale_factor", PGC_USERSET, WAL_CHECKPOINTS, + gettext_noop("Number of tuples insertes prior to index cleanup as a fraction of relpages."), + NULL + }, + &vacuum_cleanup_index_scale_factor, + 0.01, 0.0, 100.0, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0.0, 0.0, 0.0, NULL, NULL, NULL diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 24c720b..e418041 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -48,6 +48,7 @@ typedef struct IndexVacuumInfo bool estimated_count; /* num_heap_tuples is an estimate */ int message_level; /* ereport level for progress messages */ double num_heap_tuples; /* tuples remaining in heap */ + double prev_num_heap_tuples; BufferAccessStrategy strategy; /* access strategy for reads */ } IndexVacuumInfo; diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 421ba6d..ea791d5 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -277,6 +277,7 @@ extern XLogRecPtr GetInsertRecPtr(void); extern XLogRecPtr GetFlushRecPtr(void); extern XLogRecPtr GetLastImportantRecPtr(void); extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch); +extern uint32 GetEpochForXid(TransactionId xid); extern void RemovePromoteSignalFiles(void); extern bool CheckPromoteSignal(void); diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 85d472f..59d907d 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -153,6 +153,7 @@ extern int vacuum_freeze_min_age; extern int vacuum_freeze_table_age; extern int vacuum_multixact_freeze_min_age; extern int vacuum_multixact_freeze_table_age; +extern double vacuum_cleanup_index_scale_factor; /* in commands/vacuum.c */ diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 85dd10c..ee48a5b 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -400,6 +400,14 @@ do { \ } while (0) #define PageClearPrunable(page) \ (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId) +#define PageGetPruneXid(page) \ + (((PageHeader) (page))->pd_prune_xid) +/* + * Similar to PageSetPrunable without checking the argument. + * It's used for storing a epoch to this field. + */ +#define PageSetPruneEpoch(page, epoch) \ + (((PageHeader) (page))->pd_prune_xid = (epoch)) /* ---------------------------------------------------------------- diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index aa8add5..ca9d157 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -281,6 +281,8 @@ typedef struct StdRdOptions AutoVacOpts autovacuum; /* autovacuum-related options */ bool user_catalog_table; /* use as an additional catalog relation */ int parallel_workers; /* max number of parallel workers */ + /* fraction of modified or new pages prior to index cleanup */ + double vacuum_cleanup_index_scale_factor; } StdRdOptions; #define HEAP_MIN_FILLFACTOR 10 diff --git a/src/test/regress/expected/btree_index.out b/src/test/regress/expected/btree_index.out index 755cd17..4778ac1 100644 --- a/src/test/regress/expected/btree_index.out +++ b/src/test/regress/expected/btree_index.out @@ -150,3 +150,32 @@ vacuum btree_tall_tbl; -- need to insert some rows to cause the fast root page to split. insert into btree_tall_tbl (id, t) select g, repeat('x', 100) from generate_series(1, 500) g; +-- +-- Test vacuum_cleanup_index_scale_factor +-- +-- Simple create +create table btree_test(a int); +create index btree_idx1 on btree_test(a) with (vacuum_cleanup_index_scale_factor = 40.0); +select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass; + reloptions +------------------------------------------ + {vacuum_cleanup_index_scale_factor=40.0} +(1 row) + +-- Fail while setting improper values +create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = -10.0); +ERROR: value -10.0 out of bounds for option "vacuum_cleanup_index_scale_factor" +DETAIL: Valid values are between "0.000000" and "100.000000". +create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 100.0); +create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 'string'); +ERROR: invalid value for floating point option "vacuum_cleanup_index_scale_factor": string +create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = true); +ERROR: invalid value for floating point option "vacuum_cleanup_index_scale_factor": true +-- Simple ALTER INDEX +alter index btree_idx1 set (vacuum_cleanup_index_scale_factor = 70.0); +select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass; + reloptions +------------------------------------------ + {vacuum_cleanup_index_scale_factor=70.0} +(1 row) + diff --git a/src/test/regress/sql/btree_index.sql b/src/test/regress/sql/btree_index.sql index 65b08c8..21171f7 100644 --- a/src/test/regress/sql/btree_index.sql +++ b/src/test/regress/sql/btree_index.sql @@ -92,3 +92,22 @@ vacuum btree_tall_tbl; -- need to insert some rows to cause the fast root page to split. insert into btree_tall_tbl (id, t) select g, repeat('x', 100) from generate_series(1, 500) g; + +-- +-- Test vacuum_cleanup_index_scale_factor +-- + +-- Simple create +create table btree_test(a int); +create index btree_idx1 on btree_test(a) with (vacuum_cleanup_index_scale_factor = 40.0); +select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass; + +-- Fail while setting improper values +create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = -10.0); +create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 100.0); +create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 'string'); +create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = true); + +-- Simple ALTER INDEX +alter index btree_idx1 set (vacuum_cleanup_index_scale_factor = 70.0); +select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass;