From cd535fd571cf56a459d7689d7832c1c47d02df16 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Fri, 8 Dec 2023 14:48:54 -0500 Subject: [PATCH v3 08/12] Count table modification VM clears When a page formerly marked all visible in the visibility map is modified, check if it remained unmodified for at least target_freeze_duration. If it was modified sooner than target_freeze_duration, it is an early unset. We want to keep track of the number and age of pages which are modified before target_freeze_duration so that vacuum can predict whether or not a page it is considering opportunistically freezing is likely to be modified before target_freeze_duration has elapsed. This commit adds only the tracking of unsets upon page modification, not the use of these stats by vacuum. The target_freeze_duration is specified in seconds and must be translated to LSNs. This is done at the beginning of each table vacuum. The translated target_freeze_duration in LSNs is cached in the the table-level stats. Since the LSN consumption rate is global, there is no reason to store the translated value in table-level stats. It should be moved somewhere else. --- src/backend/access/heap/heapam.c | 86 +++++++--- src/backend/access/heap/vacuumlazy.c | 2 + src/backend/utils/activity/pgstat_relation.c | 159 +++++++++++++++++++ src/include/pgstat.h | 50 ++++++ src/tools/pgindent/typedefs.list | 1 + 5 files changed, 277 insertions(+), 21 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index fa21a5a09a0..f50379b96ca 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2532,6 +2532,9 @@ heap_delete(Relation relation, ItemPointer tid, bool have_tuple_lock = false; bool iscombo; bool all_visible_cleared = false; + uint8 old_vmbits = 0; + XLogRecPtr insert_lsn = InvalidXLogRecPtr; + XLogRecPtr page_lsn = InvalidXLogRecPtr; HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */ bool old_key_copied = false; @@ -2793,8 +2796,8 @@ l1: { all_visible_cleared = true; PageClearAllVisible(page); - visibilitymap_clear(relation, BufferGetBlockNumber(buffer), - vmbuffer, VISIBILITYMAP_VALID_BITS); + old_vmbits = visibilitymap_clear(relation, BufferGetBlockNumber(buffer), + vmbuffer, VISIBILITYMAP_VALID_BITS); } /* store transaction information of xact deleting the tuple */ @@ -2875,7 +2878,8 @@ l1: /* filtering by origin on a row level is much more efficient */ XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN); - recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE); + insert_lsn = recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE); + page_lsn = PageGetLSN(page); PageSetLSN(page, recptr); } @@ -2920,6 +2924,9 @@ l1: pgstat_count_heap_delete(relation); + if (old_vmbits & VISIBILITYMAP_ALL_VISIBLE) + pgstat_count_vm_unset(relation, page_lsn, insert_lsn, old_vmbits); + if (old_key_tuple != NULL && old_key_copied) heap_freetuple(old_key_tuple); @@ -3026,6 +3033,12 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, infomask_new_tuple, infomask2_new_tuple; + uint8 old_page_old_vmbits = 0; + uint8 new_page_old_vmbits = 0; + XLogRecPtr old_page_lsn = InvalidXLogRecPtr; + XLogRecPtr new_page_lsn = InvalidXLogRecPtr; + XLogRecPtr insert_lsn = InvalidXLogRecPtr; + Assert(ItemPointerIsValid(otid)); /* Cheap, simplistic check that the tuple matches the rel's rowtype. */ @@ -3563,8 +3576,9 @@ l2: */ if (PageIsAllVisible(page)) { - cleared_all_frozen = visibilitymap_clear(relation, block, vmbuffer, - VISIBILITYMAP_ALL_FROZEN) & VISIBILITYMAP_ALL_FROZEN; + old_page_old_vmbits = visibilitymap_clear(relation, block, vmbuffer, + VISIBILITYMAP_ALL_FROZEN); + cleared_all_frozen = old_page_old_vmbits & VISIBILITYMAP_ALL_FROZEN; } MarkBufferDirty(buffer); @@ -3584,7 +3598,8 @@ l2: xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0; XLogRegisterData((char *) &xlrec, SizeOfHeapLock); - recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK); + insert_lsn = recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK); + old_page_lsn = PageGetLSN(page); PageSetLSN(page, recptr); } @@ -3793,15 +3808,15 @@ l2: { all_visible_cleared = true; PageClearAllVisible(BufferGetPage(buffer)); - visibilitymap_clear(relation, BufferGetBlockNumber(buffer), - vmbuffer, VISIBILITYMAP_VALID_BITS); + old_page_old_vmbits = visibilitymap_clear(relation, BufferGetBlockNumber(buffer), + vmbuffer, VISIBILITYMAP_VALID_BITS); } if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf))) { all_visible_cleared_new = true; PageClearAllVisible(BufferGetPage(newbuf)); - visibilitymap_clear(relation, BufferGetBlockNumber(newbuf), - vmbuffer_new, VISIBILITYMAP_VALID_BITS); + new_page_old_vmbits = visibilitymap_clear(relation, BufferGetBlockNumber(newbuf), + vmbuffer_new, VISIBILITYMAP_VALID_BITS); } if (newbuf != buffer) @@ -3823,15 +3838,18 @@ l2: log_heap_new_cid(relation, heaptup); } - recptr = log_heap_update(relation, buffer, - newbuf, &oldtup, heaptup, - old_key_tuple, - all_visible_cleared, - all_visible_cleared_new); + insert_lsn = recptr = log_heap_update(relation, buffer, + newbuf, &oldtup, heaptup, + old_key_tuple, + all_visible_cleared, + all_visible_cleared_new); if (newbuf != buffer) { + new_page_lsn = PageGetLSN(BufferGetPage(newbuf)); PageSetLSN(BufferGetPage(newbuf), recptr); } + + old_page_lsn = PageGetLSN(BufferGetPage(buffer)); PageSetLSN(BufferGetPage(buffer), recptr); } @@ -3866,6 +3884,15 @@ l2: if (have_tuple_lock) UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode); + if (old_page_old_vmbits & VISIBILITYMAP_ALL_VISIBLE) + pgstat_count_vm_unset(relation, old_page_lsn, + insert_lsn, old_page_old_vmbits); + + /* MTODO: figure out if we whether or not to count unfreezing new page */ + if (newbuf != buffer && + new_page_old_vmbits & VISIBILITYMAP_ALL_VISIBLE) + pgstat_count_vm_unset(relation, new_page_lsn, insert_lsn, new_page_old_vmbits); + pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer); /* @@ -4162,6 +4189,10 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, bool skip_tuple_lock = false; bool have_tuple_lock = false; bool cleared_all_frozen = false; + XLogRecPtr insert_lsn = InvalidXLogRecPtr; + XLogRecPtr page_lsn = InvalidXLogRecPtr; + uint8 old_vmbits = 0; + *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); block = ItemPointerGetBlockNumber(tid); @@ -4761,8 +4792,9 @@ failed: /* Clear only the all-frozen bit on visibility map if needed */ if (PageIsAllVisible(page)) { - cleared_all_frozen = visibilitymap_clear(relation, block, vmbuffer, - VISIBILITYMAP_ALL_FROZEN) & VISIBILITYMAP_ALL_FROZEN; + old_vmbits = visibilitymap_clear(relation, block, vmbuffer, + VISIBILITYMAP_ALL_FROZEN); + cleared_all_frozen = old_vmbits & VISIBILITYMAP_ALL_FROZEN; } MarkBufferDirty(*buffer); @@ -4796,7 +4828,8 @@ failed: /* we don't decode row locks atm, so no need to log the origin */ - recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK); + insert_lsn = recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK); + page_lsn = PageGetLSN(page); PageSetLSN(page, recptr); } @@ -4824,6 +4857,9 @@ out_unlocked: if (have_tuple_lock) UnlockTupleTuplock(relation, tid, mode); + if (old_vmbits & VISIBILITYMAP_ALL_VISIBLE) + pgstat_count_vm_unset(relation, page_lsn, insert_lsn, old_vmbits); + return result; } @@ -5276,6 +5312,9 @@ heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, TransactionId xid, new_xmax; TransactionId priorXmax = InvalidTransactionId; bool cleared_all_frozen = false; + uint8 old_vmbits = 0; + XLogRecPtr page_lsn = InvalidXLogRecPtr; + XLogRecPtr insert_lsn = InvalidXLogRecPtr; bool pinned_desired_page; Buffer vmbuffer = InvalidBuffer; BlockNumber block; @@ -5515,8 +5554,9 @@ l4: if (PageIsAllVisible(BufferGetPage(buf))) { - cleared_all_frozen = visibilitymap_clear(rel, block, vmbuffer, - VISIBILITYMAP_ALL_FROZEN) & VISIBILITYMAP_ALL_FROZEN; + old_vmbits = visibilitymap_clear(rel, block, vmbuffer, + VISIBILITYMAP_ALL_FROZEN); + cleared_all_frozen = old_vmbits & VISIBILITYMAP_ALL_FROZEN; } START_CRIT_SECTION(); @@ -5548,7 +5588,8 @@ l4: XLogRegisterData((char *) &xlrec, SizeOfHeapLockUpdated); - recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED); + insert_lsn = recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_LOCK_UPDATED); + page_lsn = PageGetLSN(page); PageSetLSN(page, recptr); } @@ -5581,6 +5622,9 @@ out_unlocked: if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); + if (old_vmbits & VISIBILITYMAP_ALL_VISIBLE) + pgstat_count_vm_unset(rel, page_lsn, insert_lsn, old_vmbits); + return result; } diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 3b9299b8924..c788e7e5ca4 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -489,6 +489,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, vacrel->relname))); } + pgstat_refresh_frz_dur(RelationGetRelid(rel), rel->rd_rel->relisshared); + /* * Allocate dead_items array memory using dead_items_alloc. This handles * parallel VACUUM initialization as part of allocating shared memory diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index f5d726e2921..5048112408b 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -18,6 +18,7 @@ #include "postgres.h" #include "access/twophase_rmgr.h" +#include "access/visibilitymapdefs.h" #include "access/xact.h" #include "catalog/partition.h" #include "postmaster/autovacuum.h" @@ -205,6 +206,162 @@ pgstat_drop_relation(Relation rel) } } + +/* + * The first time a page is modified after having been set all visible, we + * check the duration it was unmodified against the target_freeze_duration. The + * page has only an LSN, not a timestmap, so we must translate the page LSN to + * time using the LSNTimeline. Because the LSN consumption rate can change, we + * want to refresh this translated value periodically. Doing so at the start of + * each table vacuum is convenient. + */ +void +pgstat_refresh_frz_dur(Oid tableoid, bool shared) +{ + PgStat_EntryRef *entry_ref; + PgStat_StatTabEntry *tabentry; + TimestampTz cur_time; + XLogRecPtr cur_lsn; + TimestampTz target_time; + XLogRecPtr target_lsn; + uint64 target_dur_usecs; + Oid dboid = (shared ? InvalidOid : MyDatabaseId); + + if (!pgstat_track_counts) + return; + + target_dur_usecs = target_freeze_duration * USECS_PER_SEC; + + cur_time = GetCurrentTimestamp(); + + /* + * We can afford to acquire exact (not approximate) insert LSN at the + * start of each relation vacuum. The translation of the GUC value to time + * will be more accurate. + */ + cur_lsn = GetXLogInsertRecPtr(); + + /* + * How long ago would a page have to have been set all visible for it to + * qualify as having remained unmodified for target_freeze_duration. It + * shouldn't happen that current time - target_freeze_duration is less + * than zero, but TimestampTz is signed, so we better do this check. + */ + target_time = target_dur_usecs >= cur_time ? 0 : cur_time - target_dur_usecs; + + /* + * Use the global LSNTimeline stored in WAL statistics to translate the + * target_time into an LSN based on our LSN consumption over that period. + */ + target_lsn = pgstat_wal_estimate_lsn_at_time(target_time); + + /* Now get the table-level stats */ + entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION, + dboid, tableoid, false); + + Assert(entry_ref != NULL && entry_ref->shared_stats != NULL); + + tabentry = &((PgStatShared_Relation *) entry_ref->shared_stats)->stats; + + /* + * Update the translated value of target_freeze_duration so that table + * modifications use a fresh value when determining whether or not a page + * was modified sooner than target_freeze_duration after having been set + * all visible. There is no reason for this to be cached at the table + * level, but it is easiest to keep it here for now. + */ + tabentry->target_frz_dur_lsns = cur_lsn - target_lsn; + + pgstat_unlock_entry(entry_ref); + + /* + * MTODO: would like to flush table stats so future unsets use + * target_frz_dur_lsns new value. However pgstat_report_stat() can't be + * called here due to being in a transaction. Is there some other way to + * do this? + */ + + return; +} + + +/* + * Upon update, delete, or tuple lock, if the page being modified was + * previously set all visible in the visibility map, check and record whether + * or not the page has remained unmodified for longer than + * target_freeze_duration. Record both the page age and the page's former + * status in the VM. The distribution of ages can be used to predict whether or + * not a given page is likely to remain unmodified for longer than + * target_freeze_duration. + * + * Note that we do not count inserts as unsets even when they are modifying a + * formerly all visible page. This is because vacuum updates the freespace map + * after pruning, freezing, and reaping dead tuples. The next insert is likely + * to be to this page, so the page's age at insert is unrelated to the page + * modification pattern and will only reflect that vacuum made space available + * on the page. + */ +void +pgstat_count_vm_unset(Relation relation, XLogRecPtr page_lsn, + XLogRecPtr current_lsn, uint8 old_vmbits) +{ + PgStat_StatTabEntry *tabentry; + XLogRecPtr target_frz_duration; + XLogRecPtr vm_duration_lsns; + PgStat_TableCounts *tabcounts; + + /* + * Can't be all frozen without being all visible and we shouldn't call + * this function if all bits were unset + */ + Assert(old_vmbits & VISIBILITYMAP_ALL_VISIBLE); + + if (!pgstat_track_counts) + return; + + tabentry = pgstat_fetch_stat_tabentry_ext(relation->rd_rel->relisshared, + RelationGetRelid(relation)); + + /* + * MTODO: Where can we cache this such that it is easy to get here but not + * table-level? + */ + target_frz_duration = tabentry->target_frz_dur_lsns; + + vm_duration_lsns = current_lsn - page_lsn; + vm_duration_lsns = Max(vm_duration_lsns, 0); + + tabcounts = &relation->pgstat_info->counts; + + tabcounts->unsets.unvis++; + + /* + * If the page is being modified before target_freeze_duration, count it + * as an unset for vacuum freeze statistics. We want to determine the + * likelihood that a page being vacuumed will be modified before that + * amount of time has elapsed, irrespective of whether or not we got it + * right last vacuum. + */ + if (vm_duration_lsns < target_frz_duration) + accumulator_insert(&tabcounts->unsets.early_unsets, vm_duration_lsns); + + /* + * If it was frozen and modified before the target duration, it is an + * early unfreeze. If it was not frozen and remained unmodified for longer + * than the target duration, it is a missed opportunity to freeze. + */ + if (old_vmbits & VISIBILITYMAP_ALL_FROZEN) + { + tabcounts->unsets.vm_unfreezes++; + + if (vm_duration_lsns < target_frz_duration) + tabcounts->unsets.early_unfreezes++; + } + else if (vm_duration_lsns >= target_frz_duration) + tabcounts->unsets.missed_freezes++; +} + + /* * Report that the table was just vacuumed and flush IO statistics. */ @@ -845,6 +1002,8 @@ pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) tabentry->tuples_hot_updated += lstats->counts.tuples_hot_updated; tabentry->tuples_newpage_updated += lstats->counts.tuples_newpage_updated; + pgstat_unset_absorb(&tabentry->vm_unset, &lstats->counts.unsets); + /* * If table was truncated/dropped, first reset the live/dead counters. */ diff --git a/src/include/pgstat.h b/src/include/pgstat.h index e5cceb0237b..a1945e6a5e8 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -202,6 +202,44 @@ accumulator_calculate(PgStat_Accumulator *accumulator, double *mean, *stddev = sqrt((accumulator->q - pow(accumulator->s, 2) / accumulator->n) / accumulator->n); } +typedef struct PgStat_VMUnset +{ + /* times a page marked frozen in the VM was modified */ + int64 vm_unfreezes; + /* times a page was unfrozen before target_freeze_duration elapsed */ + int64 early_unfreezes; + /* times a page marked all visible in the VM was modified */ + int64 unvis; + + /* + * times a page only marked all visible and not all frozen in the VM + * remained unmodified for longer than target_freeze_duration + */ + int64 missed_freezes; + + /* + * times that pages marked either all visible or all visible and all + * frozen in the VM were modified before target_freeze_duration elapsed. + * The accumulator tracks their ages as well as occurrences. We include + * pages which were marked all visible but not all frozen because we care + * about how long pages remain unmodified in general. If we only counted + * the ages of early unfreezes, it would skew our data based on our own + * failure to freeze the right pages. + */ + PgStat_Accumulator early_unsets; +} PgStat_VMUnset; + +static inline void +pgstat_unset_absorb(PgStat_VMUnset *target, PgStat_VMUnset *source) +{ + target->vm_unfreezes += source->vm_unfreezes; + target->early_unfreezes += source->early_unfreezes; + target->unvis += source->unvis; + target->missed_freezes += source->missed_freezes; + + accumulator_absorb(&target->early_unsets, &source->early_unsets); +} + /* ---------- * PgStat_TableCounts The actual per-table counts kept by a backend * @@ -236,6 +274,8 @@ typedef struct PgStat_TableCounts PgStat_Counter tuples_newpage_updated; bool truncdropped; + PgStat_VMUnset unsets; + PgStat_Counter delta_live_tuples; PgStat_Counter delta_dead_tuples; PgStat_Counter changed_tuples; @@ -491,6 +531,11 @@ typedef struct PgStat_StatTabEntry PgStat_Counter analyze_count; TimestampTz last_autoanalyze_time; /* autovacuum initiated */ PgStat_Counter autoanalyze_count; + + /* calculated at vac start and used upon unset */ + XLogRecPtr target_frz_dur_lsns; + /* updated upon VM unset */ + PgStat_VMUnset vm_unset; } PgStat_StatTabEntry; /* @@ -695,6 +740,11 @@ extern void pgstat_report_analyze(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, bool resetcounter); +extern void pgstat_refresh_frz_dur(Oid tableoid, bool shared); + +extern void pgstat_count_vm_unset(Relation relation, XLogRecPtr page_lsn, + XLogRecPtr current_lsn, uint8 old_vmbits); + /* * If stats are enabled, but pending data hasn't been prepared yet, call * pgstat_assoc_relation() to do so. See its comment for why this is done diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index cf8086616b2..890a091d426 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2091,6 +2091,7 @@ PgStat_SubXactStatus PgStat_TableCounts PgStat_TableStatus PgStat_TableXactStatus +PgStat_VMUnset PgStat_WalStats PgXmlErrorContext PgXmlStrictness -- 2.37.2