From 97e8c7f74c9c7c7450de3fbf872a436d26b16a05 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Wed, 8 Nov 2023 15:15:31 -0500 Subject: [PATCH v1 6/9] Add vacuum freeze statistics structures Add a ring buffer of PgStat_Frz to the table-level stats in PgStat_StatTabEntry. At the beginning of a vacuum of a relation, initialize a new PgStat_Frz entry to record stats including how many pages in the relation are frozen by this vacuum. Once all of the available spots in the ring buffer are used, the oldest entries are combined -- being careful not to combine stats from vacuums ending before now - target_page_freeze_duration with those ending after. Future commits will increment these stats and then use them to alter how aggressively vacuum opportunistically freezes pages. --- src/backend/utils/activity/pgstat_relation.c | 163 +++++++++++++++++++ src/include/pgstat.h | 133 +++++++++++++++ src/tools/pgindent/typedefs.list | 1 + 3 files changed, 297 insertions(+) diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index bd92380a68..eb387e7eac 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -50,6 +50,8 @@ static void ensure_tabstat_xact_level(PgStat_TableStatus *pgstat_info); static void save_truncdrop_counters(PgStat_TableXactStatus *trans, bool is_drop); static void restore_truncdrop_counters(PgStat_TableXactStatus *trans); +static void pgstat_combine_vacuum_stats(PgStat_Frz *next, PgStat_Frz *oldest); + /* * Copy stats between relations. This is used for things like REINDEX @@ -205,6 +207,167 @@ pgstat_drop_relation(Relation rel) } } +/* + * Given two adjacent PgStat_Frz, combine the data from the older PgStat_Frz + * into the newer one. This is used when PgStat_StatTabEntry has filled and we + * need to free up a spot for an imminent vacuum. + */ +static void +pgstat_combine_vacuum_stats(PgStat_Frz *next, PgStat_Frz *oldest) +{ + next->start_lsn = oldest->start_lsn; + next->start_time = oldest->start_time; + + next->count = oldest->count + 1; + + if (oldest->freezes > 0) + { + next->freezes += oldest->freezes; + + /* + * We only track page age when freezing tuples on a page during + * vacuum, not when simply setting a page frozen in the VM. + */ + next->sum_page_age_lsns += oldest->sum_page_age_lsns; + next->max_frz_page_age = Max(next->max_frz_page_age, + oldest->max_frz_page_age); + + next->min_frz_page_age = Min(next->min_frz_page_age, + oldest->min_frz_page_age); + } + + next->vm_page_freezes += oldest->vm_page_freezes; + + if (oldest->unfreezes > 0) + { + next->unfreezes += oldest->unfreezes; + next->early_unfreezes += oldest->early_unfreezes; + next->total_frozen_duration_lsns += oldest->total_frozen_duration_lsns; + next->max_frozen_duration_lsns = Max(next->max_frozen_duration_lsns, + oldest->max_frozen_duration_lsns); + + next->min_frozen_duration_lsns = Min(next->min_frozen_duration_lsns, + oldest->min_frozen_duration_lsns); + } + + /* + * Though the total number of pages (and frozen pages) in the relation at + * the beginning and end of vacuum does not mean anything on its own when + * combined across entries, we use these numbers to calculate ratios, so + * we still must sum them. + */ + next->frozen_pages_end += oldest->frozen_pages_end; + next->frozen_pages_start += oldest->frozen_pages_start; + + next->relsize_end += oldest->relsize_end; + next->relsize_start += oldest->relsize_start; + + next->scanned_pages += oldest->scanned_pages; + + next->freeze_fpis += oldest->freeze_fpis; +} + +/* + * At the beginning of a vacuum, set up a PgStat_Frz. If there are no free + * buckets in PgStat_StatTabEntry->frz_buckets, combine two PgStat_Frz entries + * into a single bucket -- being mindful not to combine PgStat_Frz ending + * before now - target_page_freeze_duration with those ending after. + */ +void +pgstat_setup_vacuum_frz_stats(Oid tableoid, bool shared) +{ + PgStat_EntryRef *entry_ref; + PgStat_StatTabEntry *tabentry; + PgStat_Frz *current; + XLogRecPtr insert_lsn; + Oid dboid = (shared ? InvalidOid : MyDatabaseId); + TimestampTz ts = GetCurrentTimestamp(); + + if (!pgstat_track_counts) + return; + + /* Use exact (not approximate) insert LSN at vacuum start/end */ + insert_lsn = GetXLogInsertRecPtr(); + + entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION, + dboid, tableoid, false); + + tabentry = &((PgStatShared_Relation *) entry_ref->shared_stats)->stats; + + /* + * While free buckets remain, simply use the next bucket for the next + * freeze period. + */ + if (tabentry->frz_nbuckets_used < VAC_FRZ_STATS_MAX_NBUCKETS) + { + tabentry->frz_current = tabentry->frz_nbuckets_used; + tabentry->frz_nbuckets_used++; + } + else + { + PgStat_Frz *oldest; + PgStat_Frz *next; + int next_idx; + TimestampTz cutoff; + + /* + * We want pages to stay frozen for at least + * target_page_freeze_duration. If they are unfrozen before that, it + * is an early unfreeze. We want all earlier unfreezes to be correctly + * attributed to a vacuum. So, don't combine vacuums which ended + * before the cutoff with those that ended after. cutoff is how long + * ago a pages would be allowed to have been unfrozen to not be + * considered an early unfreeze. + */ + cutoff = ts - (target_page_freeze_duration * USECS_PER_SEC); + + next_idx = (tabentry->frz_oldest + 1) % VAC_FRZ_STATS_MAX_NBUCKETS; + + oldest = &tabentry->frz_buckets[tabentry->frz_oldest]; + next = &tabentry->frz_buckets[next_idx]; + + /* + * If oldest is old enough but next is not old enough, we can't just + * combine them. instead combine next and next next, then copy oldest + * into next. + */ + if (oldest->end_time < cutoff && next->end_time > cutoff) + { + int next_next_idx = (next_idx + 1) % VAC_FRZ_STATS_MAX_NBUCKETS; + PgStat_Frz *next_next = &tabentry->frz_buckets[next_next_idx]; + + pgstat_combine_vacuum_stats(next_next, next); + + memcpy(next, oldest, sizeof(PgStat_Frz)); + } + else + pgstat_combine_vacuum_stats(next, oldest); + + tabentry->frz_current = tabentry->frz_oldest; + tabentry->frz_oldest = next_idx; + } + + Assert(tabentry->frz_current < VAC_FRZ_STATS_MAX_NBUCKETS); + Assert(tabentry->frz_oldest < VAC_FRZ_STATS_MAX_NBUCKETS); + + current = &tabentry->frz_buckets[tabentry->frz_current]; + memset(current, 0, sizeof(PgStat_Frz)); + current->start_lsn = insert_lsn; + current->count = 1; + current->start_time = ts; + + pgstat_unlock_entry(entry_ref); + + /* + * Flush IO stats at the beginning of the vacuum after setting the start + * time and start LSN for this vacuum. This ensures that pages that are + * unfrozen before the end of the vacuum are still attributed as an + * unfreeze to that vacuum. + */ + pgstat_flush_io(false); +} + + /* * Report that the table was just vacuumed and flush IO statistics. * diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 5e84deec9a..00c67deda4 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -394,6 +394,131 @@ typedef struct PgStat_StatSubEntry TimestampTz stat_reset_timestamp; } PgStat_StatSubEntry; +/* + * Each PgStat_Frz is a bucket in a ring buffer containing stats from one or + * more freeze periods, PgStat_StatTabEntry->frz_buckets. Each freeze period is + * a single vacuum of a single relation. Once VAC_FRZ_STATS_MAX_NBUCKETS # of + * freeze periods have been recorded, multiple older freeze periods are + * combined into single buckets (PgStat_Frz). + * + * Pages frozen by vacuum are tracked in the current PgStat_Frz. Once those + * pages are modified, those "unfreezes" are tracked in the PgStat_Frz bucket + * whose LSN span (start_lsn -> end_lsn) covers the page freeze LSN. + * + * Because each PgStat_Frz may contain stats from multiple freeze periods, many + * of the stats only make sense when used to calculate a ratio. For example, + * adding the relsize at the end of a vacuum across multiple vacuums is + * meaningless. However, we use frozen_pages_end and relsize_end to calculate + * the percentage of the relation that is frozen at the end of the vacuum. This + * is effectively an average when calculated across multiple vacuums in the + * same bucket. + */ +typedef struct PgStat_Frz +{ + /* insert LSN at the start of the oldest freeze period in this bucket */ + XLogRecPtr start_lsn; + /* insert LSN at the end of the newest freeze period in this bucket */ + XLogRecPtr end_lsn; + /* start time of the oldest freeze period in this bucket */ + TimestampTz start_time; + /* end time of the newest freeze period in this bucket */ + TimestampTz end_time; + /* number of freeze periods we have combined into this bucket */ + int count; + + /* + * number of pages with newly frozen tuples for all freeze periods in this + * bucket + */ + int64 freezes; + + /* + * number of pages newly marked frozen in the visibility map by vacuum + * during all freeze periods in this bucket + */ + int64 vm_page_freezes; + + /* + * number of pages whose all frozen bit was cleared in the visibility map + * which were frozen during all freeze periods in this bucket + */ + int64 unfreezes; + + /* + * a subset of unfreezes, this is the number of pages frozen during all + * freeze periods in this bucket which were unfrozen before + * target_page_freeze_duration seconds had elapsed + */ + int64 early_unfreezes; + + /* + * Number of pages of this relation marked all frozen in the visibility + * map at the end of all freeze periods in this bucket + */ + int64 frozen_pages_end; + + /* + * Number of pages of this relation marked all frozen in the visibility + * map at the start of this freeze period. + */ + int64 frozen_pages_start; + + /* + * number of pages in the relation at the beginning and end of all freeze + * periods in this bucket + */ + int64 relsize_end; + int64 relsize_start; + + /* + * number of pages actually scanned by vacuum (not skipped) during all + * freeze periods in this bucket. + */ + int64 scanned_pages; + + /* + * number of freeze records emitted by vacuum containing FPIs during all + * freeze periods in this bucket + */ + int64 freeze_fpis; + + /* + * When a page is frozen, its age in LSNs is the number of LSNs elapsed + * since it was last modified. Keeping track of a running sum of page ages + * at the time of freezing for freezes happening during all freeze periods + * in this bucket allows us to calculate an average page age, in LSNs, for + * pages we end up freezing. + */ + double sum_page_age_lsns; + + /* + * the oldest and youngest pages (in LSNs) that we froze during all freeze + * periods in this bucket + */ + XLogRecPtr max_frz_page_age; + XLogRecPtr min_frz_page_age; + + /* + * When a page is unfrozen, the number of LSNs for which it stayed frozen + * is added to this total. This allows us to calculate the average "time" + * in LSNs that a page stays frozen for. + */ + double total_frozen_duration_lsns; + + /* + * The page frozen during any of the freeze periods in this bucket which + * lasted the longest before being modified and unfrozen. + */ + XLogRecPtr max_frozen_duration_lsns; + + /* + * The page frozen during any of the freeze periods in this bucket which + * was modified the soonest after being frozen. + */ + XLogRecPtr min_frozen_duration_lsns; +} PgStat_Frz; + +#define VAC_FRZ_STATS_MAX_NBUCKETS 15 typedef struct PgStat_StatTabEntry { PgStat_Counter numscans; @@ -424,6 +549,11 @@ typedef struct PgStat_StatTabEntry PgStat_Counter analyze_count; TimestampTz last_autoanalyze_time; /* autovacuum initiated */ PgStat_Counter autoanalyze_count; + + int frz_current; + int frz_oldest; + int frz_nbuckets_used; + PgStat_Frz frz_buckets[VAC_FRZ_STATS_MAX_NBUCKETS]; } PgStat_StatTabEntry; typedef struct PgStat_WalStats @@ -589,6 +719,9 @@ extern void pgstat_assoc_relation(Relation rel); extern void pgstat_unlink_relation(Relation rel); extern void pgstat_report_vacuum(Oid tableoid, bool shared, LVRelState *vacrel); + +extern void pgstat_setup_vacuum_frz_stats(Oid tableoid, bool shared); + extern void pgstat_report_analyze(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, bool resetcounter); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 87c1aee379..7cda07cdf8 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3594,6 +3594,7 @@ pgssStoreKind pgssVersion pgstat_entry_ref_hash_hash pgstat_entry_ref_hash_iterator +PgStat_Frz pgstat_page pgstat_snapshot_hash pgstattuple_type -- 2.37.2