diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 3a8fc7d..e2fd6da 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1822,6 +1822,31 @@ include_dir 'conf.d'
+
+ Index Vacuum
+
+
+ vacuum_cleanup_index_scale_factor (floating point)
+
+ vacuum_cleanup_index_scale_factor configuration parameter
+
+
+
+
+ When no tuples were deleted from the heap, B-tree indexes might still
+ be scanned during VACUUM cleanup stage by two
+ reasons. The first reason is that B-tree index contains deleted pages
+ which can be recycled during cleanup. The second reason is that B-tree
+ index statistics is stalled. The criterion of stalled index statistics
+ is number of inserted tuples since previous statistics collection
+ is greater than vacuum_cleanup_index_scale_factor
+ fraction of total number of heap tuples.
+
+
+
+
+
+
Background Writer
diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 1fd21e1..ad30de2 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -337,6 +337,21 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ]
+ B-tree indexes additionally accept this parameter:
+
+
+
+
+ vacuum_cleanup_index_scale_factor
+
+
+ Per-table value for .
+
+
+
+
+
+
GiST indexes additionally accept this parameter:
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 46276ce..fdd78cd 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -27,6 +27,7 @@
#include "catalog/pg_type.h"
#include "commands/defrem.h"
#include "commands/tablespace.h"
+#include "commands/vacuum.h"
#include "commands/view.h"
#include "nodes/makefuncs.h"
#include "postmaster/postmaster.h"
@@ -400,6 +401,15 @@ static relopt_real realRelOpts[] =
},
0, -1.0, DBL_MAX
},
+ {
+ {
+ "vacuum_cleanup_index_scale_factor",
+ "Number of tuples inserts prior to index cleanup as a fraction of relpages.",
+ RELOPT_KIND_BTREE,
+ ShareUpdateExclusiveLock
+ },
+ -1, 0.0, 100.0
+ },
/* list terminator */
{{NULL}}
};
@@ -1362,7 +1372,9 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
{"user_catalog_table", RELOPT_TYPE_BOOL,
offsetof(StdRdOptions, user_catalog_table)},
{"parallel_workers", RELOPT_TYPE_INT,
- offsetof(StdRdOptions, parallel_workers)}
+ offsetof(StdRdOptions, parallel_workers)},
+ {"vacuum_cleanup_index_scale_factor", RELOPT_TYPE_REAL,
+ offsetof(StdRdOptions, vacuum_cleanup_index_scale_factor)}
};
options = parseRelOptions(reloptions, validate, kind, &numoptions);
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 92afe2d..6008232 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -760,9 +760,21 @@ _bt_page_recyclable(Page page)
* interested in it.
*/
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
- if (P_ISDELETED(opaque) &&
- TransactionIdPrecedes(opaque->btpo.xact, RecentGlobalXmin))
- return true;
+ if (P_ISDELETED(opaque))
+ {
+ uint32 xminEpoch, pageEpoch;
+
+ pageEpoch = PageGetPruneXid(page);
+ xminEpoch = GetEpochForXid(RecentGlobalXmin);
+
+ /* The page epoch belongs to an older epoch */
+ if (pageEpoch < xminEpoch)
+ return true;
+
+ /* Belong to the same epoch but the deleted xid is too old */
+ if (TransactionIdPrecedes(opaque->btpo.xact, RecentGlobalXmin))
+ return true;
+ }
return false;
}
@@ -1770,16 +1782,19 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
* transactions are gone. Storing GetTopTransactionId() would work, but
* we're in VACUUM and would not otherwise have an XID. Having already
* updated links to the target, ReadNewTransactionId() suffices as an
- * upper bound. Any scan having retained a now-stale link is advertising
- * in its PGXACT an xmin less than or equal to the value we read here. It
- * will continue to do so, holding back RecentGlobalXmin, for the duration
- * of that scan.
+ * upper bound. We also store an epoch number for the XID into page
+ * header. It will be used for judgement whether the page is recyclable
+ * or not beyond XID wraparound. Any scan having retained a now-stale
+ * link is advertising in its PGXACT an xmin less than or equal to the
+ * value we read here. It will continue to do so, holding back
+ * RecentGlobalXmin, for the duration of that scan.
*/
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
opaque->btpo_flags &= ~BTP_HALF_DEAD;
opaque->btpo_flags |= BTP_DELETED;
opaque->btpo.xact = ReadNewTransactionId();
+ PageSetPruneEpoch(page, GetEpochForXid(opaque->btpo.xact));
/* And update the metapage, if needed */
if (BufferIsValid(metabuf))
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 8158508..1e0795f 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -806,6 +806,34 @@ btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
}
/*
+ * Check if index needs cleanup assuming that btbulkdelete() wasn't called.
+ */
+static bool
+_bt_vacuum_needs_cleanup(IndexVacuumInfo *info)
+{
+ StdRdOptions *relopts;
+ float4 cleanup_scale_factor;
+
+ relopts = (StdRdOptions *) info->index->rd_options;
+ cleanup_scale_factor = (relopts &&
+ relopts->vacuum_cleanup_index_scale_factor >= 0)
+ ? relopts->vacuum_cleanup_index_scale_factor
+ : vacuum_cleanup_index_scale_factor;
+
+ /*
+ * If table receives large enough amount of insertions and no cleanup
+ * was performed, then index might appear to have stalled statistics.
+ * In order to evade that, we perform cleanup when table receives
+ * vacuum_cleanup_index_scale_factor fractions of insertions.
+ */
+ if (cleanup_scale_factor < 0 || info->prev_num_heap_tuples <= 0 ||
+ info->num_heap_tuples > info->prev_num_heap_tuples * (1.0 + cleanup_scale_factor))
+ return true;
+
+ return false;
+}
+
+/*
* Post-VACUUM cleanup.
*
* Result: a palloc'd struct containing statistical info for VACUUM displays.
@@ -819,15 +847,20 @@ btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
/*
* If btbulkdelete was called, we need not do anything, just return the
- * stats from the latest btbulkdelete call. If it wasn't called, we must
- * still do a pass over the index, to recycle any newly-recyclable pages
- * and to obtain index statistics.
+ * stats from the latest btbulkdelete call. If it wasn't called, we might
+ * still need to do a pass over the index, to recycle any newly-recyclable
+ * pages and to obtain index statistics. _bt_vacuum_needs_cleanup checks
+ * is there are newly-recyclable or stalled index statistics.
*
* Since we aren't going to actually delete any leaf items, there's no
* need to go through all the vacuum-cycle-ID pushups.
*/
if (stats == NULL)
{
+ /* Check if we need a cleanup */
+ if (!_bt_vacuum_needs_cleanup(info))
+ return NULL;
+
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
btvacuumscan(info, stats, NULL, NULL, 0);
}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 47a6c4d..a83c278 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -8344,6 +8344,34 @@ GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch)
}
/*
+ * GetEpochForXid - get the epoch associated with the xid
+ */
+uint32
+GetEpochForXid(TransactionId xid)
+{
+ uint32 ckptXidEpoch;
+ TransactionId ckptXid;
+
+ /* Must read checkpoint infofirst, else have race condition */
+ SpinLockAcquire(&XLogCtl->info_lck);
+ ckptXidEpoch = XLogCtl->ckptXidEpoch;
+ ckptXid = XLogCtl->ckptXid;
+ SpinLockRelease(&XLogCtl->info_lck);
+
+ /*
+ * Xid is logically lather than ckptXid. If it's numerically less, it
+ * must have wrapped. If it's numerically more but logically less,
+ * it must belong to previous epoch.
+ */
+ if (xid > ckptXid && TransactionIdPrecedes(xid, ckptXid))
+ ckptXidEpoch--;
+ else if (xid < ckptXid && TransactionIdFollows(xid, ckptXid))
+ ckptXidEpoch++;
+
+ return ckptXidEpoch;
+}
+
+/*
* This must be called ONCE during postmaster or standalone-backend shutdown
*/
void
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index ef93fb4..431bf34 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -367,6 +367,7 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
Oid save_userid;
int save_sec_context;
int save_nestlevel;
+ float4 prev_reltuples;
if (inh)
ereport(elevel,
@@ -628,6 +629,9 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
vacattrstats);
}
+ /* Before update the stats in pg_class, remember the old values for later use */
+ prev_reltuples = onerel->rd_rel->reltuples;
+
/*
* Update pages/tuples stats in pg_class ... but not if we're doing
* inherited stats.
@@ -696,6 +700,7 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
ivinfo.estimated_count = true;
ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = onerel->rd_rel->reltuples;
+ ivinfo.prev_num_heap_tuples = prev_reltuples;
ivinfo.strategy = vac_strategy;
stats = index_vacuum_cleanup(&ivinfo, NULL);
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 9ac84e8..0ae16f3 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -45,6 +45,7 @@
#include "access/visibilitymap.h"
#include "access/xlog.h"
#include "catalog/catalog.h"
+#include "catalog/pg_am.h"
#include "catalog/storage.h"
#include "commands/dbcommands.h"
#include "commands/progress.h"
@@ -131,6 +132,8 @@ typedef struct LVRelStats
bool lock_waiter_detected;
} LVRelStats;
+/* GUC parameter */
+double vacuum_cleanup_index_scale_factor;
/* A few variables that don't seem worth passing around as parameters */
static int elevel = -1;
@@ -1340,9 +1343,15 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
PROGRESS_VACUUM_PHASE_INDEX_CLEANUP);
- /* Do post-vacuum cleanup and statistics update for each index */
for (i = 0; i < nindexes; i++)
+ {
+ /*
+ * Do post-vacuum cleanup and statistics update for each index.
+ * For B-tree index, we can skip them if we didn't scan the pages
+ * as much as the threshold.
+ */
lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
+ }
/* If no indexes, make log report that lazy_vacuum_heap would've made */
if (vacuumed_pages)
@@ -1657,6 +1666,7 @@ lazy_cleanup_index(Relation indrel,
ivinfo.estimated_count = (vacrelstats->tupcount_pages < vacrelstats->rel_pages);
ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples;
+ ivinfo.prev_num_heap_tuples = vacrelstats->old_rel_tuples;
ivinfo.strategy = vac_strategy;
stats = index_vacuum_cleanup(&ivinfo, stats);
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 4116c2d..408e9a9 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3084,6 +3084,16 @@ static struct config_real ConfigureNamesReal[] =
NULL, NULL, NULL
},
+ {
+ {"vacuum_cleanup_index_scale_factor", PGC_USERSET, WAL_CHECKPOINTS,
+ gettext_noop("Number of tuples insertes prior to index cleanup as a fraction of relpages."),
+ NULL
+ },
+ &vacuum_cleanup_index_scale_factor,
+ 0.01, 0.0, 100.0,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0.0, 0.0, 0.0, NULL, NULL, NULL
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index 24c720b..e418041 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -48,6 +48,7 @@ typedef struct IndexVacuumInfo
bool estimated_count; /* num_heap_tuples is an estimate */
int message_level; /* ereport level for progress messages */
double num_heap_tuples; /* tuples remaining in heap */
+ double prev_num_heap_tuples;
BufferAccessStrategy strategy; /* access strategy for reads */
} IndexVacuumInfo;
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 421ba6d..ea791d5 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -277,6 +277,7 @@ extern XLogRecPtr GetInsertRecPtr(void);
extern XLogRecPtr GetFlushRecPtr(void);
extern XLogRecPtr GetLastImportantRecPtr(void);
extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch);
+extern uint32 GetEpochForXid(TransactionId xid);
extern void RemovePromoteSignalFiles(void);
extern bool CheckPromoteSignal(void);
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 85d472f..59d907d 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -153,6 +153,7 @@ extern int vacuum_freeze_min_age;
extern int vacuum_freeze_table_age;
extern int vacuum_multixact_freeze_min_age;
extern int vacuum_multixact_freeze_table_age;
+extern double vacuum_cleanup_index_scale_factor;
/* in commands/vacuum.c */
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 85dd10c..ee48a5b 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -400,6 +400,14 @@ do { \
} while (0)
#define PageClearPrunable(page) \
(((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
+#define PageGetPruneXid(page) \
+ (((PageHeader) (page))->pd_prune_xid)
+/*
+ * Similar to PageSetPrunable without checking the argument.
+ * It's used for storing a epoch to this field.
+ */
+#define PageSetPruneEpoch(page, epoch) \
+ (((PageHeader) (page))->pd_prune_xid = (epoch))
/* ----------------------------------------------------------------
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index aa8add5..ca9d157 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -281,6 +281,8 @@ typedef struct StdRdOptions
AutoVacOpts autovacuum; /* autovacuum-related options */
bool user_catalog_table; /* use as an additional catalog relation */
int parallel_workers; /* max number of parallel workers */
+ /* fraction of modified or new pages prior to index cleanup */
+ double vacuum_cleanup_index_scale_factor;
} StdRdOptions;
#define HEAP_MIN_FILLFACTOR 10
diff --git a/src/test/regress/expected/btree_index.out b/src/test/regress/expected/btree_index.out
index 755cd17..4778ac1 100644
--- a/src/test/regress/expected/btree_index.out
+++ b/src/test/regress/expected/btree_index.out
@@ -150,3 +150,32 @@ vacuum btree_tall_tbl;
-- need to insert some rows to cause the fast root page to split.
insert into btree_tall_tbl (id, t)
select g, repeat('x', 100) from generate_series(1, 500) g;
+--
+-- Test vacuum_cleanup_index_scale_factor
+--
+-- Simple create
+create table btree_test(a int);
+create index btree_idx1 on btree_test(a) with (vacuum_cleanup_index_scale_factor = 40.0);
+select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass;
+ reloptions
+------------------------------------------
+ {vacuum_cleanup_index_scale_factor=40.0}
+(1 row)
+
+-- Fail while setting improper values
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = -10.0);
+ERROR: value -10.0 out of bounds for option "vacuum_cleanup_index_scale_factor"
+DETAIL: Valid values are between "0.000000" and "100.000000".
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 100.0);
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 'string');
+ERROR: invalid value for floating point option "vacuum_cleanup_index_scale_factor": string
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = true);
+ERROR: invalid value for floating point option "vacuum_cleanup_index_scale_factor": true
+-- Simple ALTER INDEX
+alter index btree_idx1 set (vacuum_cleanup_index_scale_factor = 70.0);
+select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass;
+ reloptions
+------------------------------------------
+ {vacuum_cleanup_index_scale_factor=70.0}
+(1 row)
+
diff --git a/src/test/regress/sql/btree_index.sql b/src/test/regress/sql/btree_index.sql
index 65b08c8..21171f7 100644
--- a/src/test/regress/sql/btree_index.sql
+++ b/src/test/regress/sql/btree_index.sql
@@ -92,3 +92,22 @@ vacuum btree_tall_tbl;
-- need to insert some rows to cause the fast root page to split.
insert into btree_tall_tbl (id, t)
select g, repeat('x', 100) from generate_series(1, 500) g;
+
+--
+-- Test vacuum_cleanup_index_scale_factor
+--
+
+-- Simple create
+create table btree_test(a int);
+create index btree_idx1 on btree_test(a) with (vacuum_cleanup_index_scale_factor = 40.0);
+select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass;
+
+-- Fail while setting improper values
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = -10.0);
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 100.0);
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = 'string');
+create index btree_idx_err on btree_test(a) with (vacuum_cleanup_index_scale_factor = true);
+
+-- Simple ALTER INDEX
+alter index btree_idx1 set (vacuum_cleanup_index_scale_factor = 70.0);
+select reloptions from pg_class WHERE oid = 'btree_idx1'::regclass;