From 979b6081f4595c605c75beb36ec7f789dd0bad0e Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Fri, 19 Mar 2021 14:51:44 -0700 Subject: [PATCH v5 3/3] Skip index vacuuming dynamically. Author: Masahiko Sawada Reviewed-By: Peter Geoghegan Discussion: https://postgr.es/m/CAD21AoAtZb4+HJT_8RoOXvu4HM-Zd4HKS3YSMCH6+-W=bDyh-w@mail.gmail.com --- src/include/commands/vacuum.h | 6 +- src/include/utils/rel.h | 10 +- src/backend/access/common/reloptions.c | 39 ++++++-- src/backend/access/heap/vacuumlazy.c | 133 ++++++++++++++++++++----- src/backend/commands/vacuum.c | 33 ++++-- 5 files changed, 172 insertions(+), 49 deletions(-) diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index d029da5ac0..4885bbb44c 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -21,6 +21,7 @@ #include "parser/parse_node.h" #include "storage/buf.h" #include "storage/lock.h" +#include "utils/rel.h" #include "utils/relcache.h" /* @@ -216,8 +217,9 @@ typedef struct VacuumParams int log_min_duration; /* minimum execution threshold in ms at * which verbose logs are activated, -1 * to use default */ - VacOptTernaryValue index_cleanup; /* Do index vacuum and cleanup, - * default value depends on reloptions */ + VacOptIndexCleanupValue index_cleanup; /* Do index vacuum and cleanup, + * default value depends on + * reloptions */ VacOptTernaryValue truncate; /* Truncate empty pages at the end, * default value depends on reloptions */ diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 8eee1c1a83..8040bf76db 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -295,6 +295,13 @@ typedef struct AutoVacOpts float8 analyze_scale_factor; } AutoVacOpts; +typedef enum VacOptIndexCleanupValue +{ + VACOPT_CLEANUP_AUTO = 0, + VACOPT_CLEANUP_DISABLED, + VACOPT_CLEANUP_ENABLED +} VacOptIndexCleanupValue; + typedef struct StdRdOptions { int32 vl_len_; /* varlena header (do not touch directly!) */ @@ -304,7 +311,8 @@ typedef struct StdRdOptions AutoVacOpts autovacuum; /* autovacuum-related options */ bool user_catalog_table; /* use as an additional catalog relation */ int parallel_workers; /* max number of parallel workers */ - bool vacuum_index_cleanup; /* enables index vacuuming and cleanup */ + VacOptIndexCleanupValue vacuum_index_cleanup; /* enables index vacuuming + * and cleanup */ bool vacuum_truncate; /* enables vacuum to truncate a relation */ bool parallel_insert_enabled; /* enables planner's use of * parallel insert */ diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 5a0ae99750..282978a310 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -140,15 +140,6 @@ static relopt_bool boolRelOpts[] = }, false }, - { - { - "vacuum_index_cleanup", - "Enables index vacuuming and index cleanup", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, - true - }, { { "vacuum_truncate", @@ -501,6 +492,23 @@ relopt_enum_elt_def viewCheckOptValues[] = {(const char *) NULL} /* list terminator */ }; +/* + * values from VacOptTernaryValue for index_cleanup option. + * Allowing boolean values other than "on" and "off" are for + * backward compatibility as the option is used to be an + * boolean. + */ +relopt_enum_elt_def vacOptTernaryOptValues[] = +{ + {"auto", VACOPT_CLEANUP_AUTO}, + {"true", VACOPT_CLEANUP_ENABLED}, + {"false", VACOPT_CLEANUP_DISABLED}, + {"on", VACOPT_CLEANUP_ENABLED}, + {"off", VACOPT_CLEANUP_DISABLED}, + {"1", VACOPT_CLEANUP_ENABLED}, + {"0", VACOPT_CLEANUP_DISABLED} +}; + static relopt_enum enumRelOpts[] = { { @@ -525,6 +533,17 @@ static relopt_enum enumRelOpts[] = VIEW_OPTION_CHECK_OPTION_NOT_SET, gettext_noop("Valid values are \"local\" and \"cascaded\".") }, + { + { + "vacuum_index_cleanup", + "Enables index vacuuming and index cleanup", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + vacOptTernaryOptValues, + VACOPT_CLEANUP_AUTO, + gettext_noop("Valid values are \"on\", \"off\", and \"auto\".") + }, /* list terminator */ {{NULL}} }; @@ -1865,7 +1884,7 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind) offsetof(StdRdOptions, user_catalog_table)}, {"parallel_workers", RELOPT_TYPE_INT, offsetof(StdRdOptions, parallel_workers)}, - {"vacuum_index_cleanup", RELOPT_TYPE_BOOL, + {"vacuum_index_cleanup", RELOPT_TYPE_ENUM, offsetof(StdRdOptions, vacuum_index_cleanup)}, {"vacuum_truncate", RELOPT_TYPE_BOOL, offsetof(StdRdOptions, vacuum_truncate)}, diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 132cfcba16..27a1e4c74f 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -131,6 +131,12 @@ */ #define PREFETCH_SIZE ((BlockNumber) 32) +/* + * The threshold of the percentage of heap blocks having LP_DEAD line pointer + * above which index vacuuming goes ahead. + */ +#define SKIP_VACUUM_PAGES_RATIO 0.01 + /* * DSM keys for parallel vacuum. Unlike other parallel execution code, since * we don't need to worry about DSM keys conflicting with plan_node_id we can @@ -385,8 +391,10 @@ static void lazy_scan_heap(Relation onerel, VacuumParams *params, bool aggressive); static void lazy_vacuum_pruned_items(Relation onerel, LVRelStats *vacrelstats, Relation *Irel, int nindexes, - LVParallelState* lps, - VacOptTernaryValue index_cleanup); + LVParallelState *lps, + VacOptIndexCleanupValue index_cleanup, + BlockNumber has_dead_items_pages, + bool onecall); static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats); static bool lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelStats *vacrelstats); @@ -486,7 +494,6 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params, PgStat_Counter startwritetime = 0; Assert(params != NULL); - Assert(params->index_cleanup != VACOPT_TERNARY_DEFAULT); Assert(params->truncate != VACOPT_TERNARY_DEFAULT); /* measure elapsed time iff autovacuum logging requires it */ @@ -1349,7 +1356,8 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, next_fsm_block_to_vacuum; PGRUsage ru0; Buffer vmbuffer = InvalidBuffer; - bool skipping_blocks; + bool skipping_blocks, + have_vacuumed_indexes = false; xl_heap_freeze_tuple *frozen; StringInfoData buf; const int initprog_index[] = { @@ -1363,7 +1371,8 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, /* Counters of # blocks in onerel: */ BlockNumber empty_pages, - vacuumed_pages; + vacuumed_pages, + has_dead_items_pages; pg_rusage_init(&ru0); @@ -1378,7 +1387,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, vacrelstats->relnamespace, vacrelstats->relname))); - empty_pages = vacuumed_pages = 0; + empty_pages = vacuumed_pages = has_dead_items_pages = 0; /* Initialize counters */ c.num_tuples = 0; @@ -1638,9 +1647,18 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, vmbuffer = InvalidBuffer; } + /* + * Definitely won't be skipping index vacuuming due to finding + * very few dead items during this VACUUM operation -- that's only + * something that lazy_vacuum_pruned_items() is willing to do when + * it is only called once during the entire VACUUM operation. + */ + have_vacuumed_indexes = true; + /* Remove the collected garbage tuples from table and indexes */ lazy_vacuum_pruned_items(onerel, vacrelstats, Irel, nindexes, lps, - params->index_cleanup); + params->index_cleanup, + has_dead_items_pages, false); /* * Vacuum the Free Space Map to make newly-freed space visible on @@ -1777,6 +1795,17 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, scan_prune_page(onerel, buf, vacrelstats, vistest, frozen, &c, &ps, &vms); + /* + * Remember the number of pages having at least one LP_DEAD line + * pointer. This could be from this VACUUM, a previous VACUUM, or + * even opportunistic pruning. Note that this is exactly the same + * thing as having items that are stored in dead_tuples space, because + * scan_prune_page() doesn't count anything other than LP_DEAD items + * as dead (as of PostgreSQL 14). + */ + if (ps.has_dead_items) + has_dead_items_pages++; + /* * Step 7 for block: Set up details for saving free space in FSM at * end of loop. (Also performs extra single pass strategy steps in @@ -1791,9 +1820,18 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, savefreespace = false; freespace = 0; if (nindexes > 0 && ps.has_dead_items && - params->index_cleanup != VACOPT_TERNARY_DISABLED) + params->index_cleanup != VACOPT_CLEANUP_DISABLED) { - /* Wait until lazy_vacuum_heap() to save free space */ + /* + * Wait until lazy_vacuum_heap() to save free space. + * + * Note: It's not in fact 100% certain that we really will call + * lazy_vacuum_heap() in INDEX_CLEANUP = AUTO case (which is the + * common case) -- lazy_vacuum_pruned_items() might opt to skip + * index vacuuming (and so must skip heap vacuuming). This is + * deemed okay, because there can't be very much free space when + * this happens. + */ } else { @@ -1905,7 +1943,8 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, Assert(nindexes > 0 || dead_tuples->num_tuples == 0); if (dead_tuples->num_tuples > 0) lazy_vacuum_pruned_items(onerel, vacrelstats, Irel, nindexes, lps, - params->index_cleanup); + params->index_cleanup, has_dead_items_pages, + !have_vacuumed_indexes); /* * Vacuum the remainder of the Free Space Map. We must do this whether or @@ -1920,10 +1959,11 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, /* * Do post-vacuum cleanup. * - * Note that post-vacuum cleanup does not take place with - * INDEX_CLEANUP=OFF. + * Note that post-vacuum cleanup is supposed to take place when + * lazy_vacuum_pruned_items() decided to skip index vacuuming, but not + * with INDEX_CLEANUP=OFF. */ - if (nindexes > 0 && params->index_cleanup != VACOPT_TERNARY_DISABLED) + if (nindexes > 0 && params->index_cleanup != VACOPT_CLEANUP_DISABLED) lazy_cleanup_all_indexes(Irel, vacrelstats, lps, nindexes); /* @@ -1936,10 +1976,14 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, /* * Update index statistics. * - * Note that updating the statistics does not take place with - * INDEX_CLEANUP=OFF. + * Note that updating the statistics takes places when + * lazy_vacuum_pruned_items() decided to skip index vacuuming, but not + * with INDEX_CLEANUP=OFF. + * + * (In practice most index AMs won't have accurate statistics from + * cleanup, but the index AM API allows them to, so we must check.) */ - if (nindexes > 0 && params->index_cleanup != VACOPT_TERNARY_DISABLED) + if (nindexes > 0 && params->index_cleanup != VACOPT_CLEANUP_DISABLED) update_index_statistics(Irel, vacrelstats->indstats, nindexes); /* @@ -1985,12 +2029,14 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, /* * Remove the collected garbage tuples from the table and its indexes. * - * We may be required to skip index vacuuming by INDEX_CLEANUP reloption. + * We may be able to skip index vacuuming (we may even be required to do so by + * reloption) */ static void lazy_vacuum_pruned_items(Relation onerel, LVRelStats *vacrelstats, Relation *Irel, int nindexes, LVParallelState *lps, - VacOptTernaryValue index_cleanup) + VacOptIndexCleanupValue index_cleanup, + BlockNumber has_dead_items_pages, bool onecall) { bool skipping; @@ -1998,11 +2044,40 @@ lazy_vacuum_pruned_items(Relation onerel, LVRelStats *vacrelstats, Assert(nindexes > 0); Assert(!IsParallelWorker()); - /* Check whether or not to do index vacuum and heap vacuum */ - if (index_cleanup == VACOPT_TERNARY_DISABLED) + /* + * Check whether or not to do index vacuum and heap vacuum. + * + * We do both index vacuum and heap vacuum if more than + * SKIP_VACUUM_PAGES_RATIO of all heap pages have at least one LP_DEAD + * line pointer. This is normally a case where dead tuples on the heap + * are highly concentrated in relatively few heap blocks, where the + * index's enhanced deletion mechanism that is clever about heap block + * dead tuple concentrations including btree's bottom-up index deletion + * works well. Also, since we can clean only a few heap blocks, it would + * be a less negative impact in terms of visibility map update. + */ + if (index_cleanup == VACOPT_CLEANUP_DISABLED) skipping = true; - else + else if (index_cleanup == VACOPT_CLEANUP_ENABLED) skipping = false; + else if (!onecall) + skipping = false; + else + { + BlockNumber rel_pages_threshold; + + Assert(onecall); + Assert(vacrelstats->num_index_scans == 0); + Assert(index_cleanup == VACOPT_CLEANUP_AUTO); + + rel_pages_threshold = + (double) vacrelstats->rel_pages * SKIP_VACUUM_PAGES_RATIO; + + if (has_dead_items_pages < rel_pages_threshold) + skipping = true; + else + skipping = false; + } if (!skipping) { @@ -2024,10 +2099,18 @@ lazy_vacuum_pruned_items(Relation onerel, LVRelStats *vacrelstats, * the similar "nindexes == 0" specific ereport() at the end of * lazy_scan_heap(). */ - ereport(elevel, - (errmsg("\"%s\": INDEX_CLEANUP off forced VACUUM to not totally remove %d pruned items", - vacrelstats->relname, - vacrelstats->dead_tuples->num_tuples))); + if (index_cleanup == VACOPT_CLEANUP_AUTO) + ereport(elevel, + (errmsg("\"%s\": opted to not totally remove %d pruned items in %u pages", + vacrelstats->relname, + vacrelstats->dead_tuples->num_tuples, + has_dead_items_pages))); + else + ereport(elevel, + (errmsg("\"%s\": INDEX_CLEANUP off forced VACUUM to not totally remove %d pruned items in %u pages", + vacrelstats->relname, + vacrelstats->dead_tuples->num_tuples, + has_dead_items_pages))); } /* diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index c064352e23..0d3aece45b 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -108,7 +108,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel) ListCell *lc; /* Set default value */ - params.index_cleanup = VACOPT_TERNARY_DEFAULT; + params.index_cleanup = VACOPT_CLEANUP_AUTO; params.truncate = VACOPT_TERNARY_DEFAULT; /* By default parallel vacuum is enabled */ @@ -140,7 +140,14 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel) else if (strcmp(opt->defname, "disable_page_skipping") == 0) disable_page_skipping = defGetBoolean(opt); else if (strcmp(opt->defname, "index_cleanup") == 0) - params.index_cleanup = get_vacopt_ternary_value(opt); + { + if (opt->arg == NULL || strcmp(defGetString(opt), "auto") == 0) + params.index_cleanup = VACOPT_CLEANUP_AUTO; + else if (defGetBoolean(opt)) + params.index_cleanup = VACOPT_CLEANUP_ENABLED; + else + params.index_cleanup = VACOPT_CLEANUP_DISABLED; + } else if (strcmp(opt->defname, "process_toast") == 0) process_toast = defGetBoolean(opt); else if (strcmp(opt->defname, "truncate") == 0) @@ -1880,15 +1887,19 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params) onerelid = onerel->rd_lockInfo.lockRelId; LockRelationIdForSession(&onerelid, lmode); - /* Set index cleanup option based on reloptions if not yet */ - if (params->index_cleanup == VACOPT_TERNARY_DEFAULT) - { - if (onerel->rd_options == NULL || - ((StdRdOptions *) onerel->rd_options)->vacuum_index_cleanup) - params->index_cleanup = VACOPT_TERNARY_ENABLED; - else - params->index_cleanup = VACOPT_TERNARY_DISABLED; - } + /* + * Set index cleanup option based on reloptions if not set to either ON or + * OFF. Note that an VACUUM(INDEX_CLEANUP=AUTO) command is interpreted as + * "prefer reloption, but if it's not set dynamically determine if index + * vacuuming and cleanup" takes place in vacuumlazy.c. Note also that the + * reloption might be explicitly set to AUTO. + * + * XXX: Do we really want that? + */ + if (params->index_cleanup == VACOPT_CLEANUP_AUTO && + onerel->rd_options != NULL) + params->index_cleanup = + ((StdRdOptions *) onerel->rd_options)->vacuum_index_cleanup; /* Set truncate option based on reloptions if not yet */ if (params->truncate == VACOPT_TERNARY_DEFAULT) -- 2.27.0