From d3d93d7a604102d77b4e220a6453d39686eb271d Mon Sep 17 00:00:00 2001 From: vignesh Date: Wed, 7 Apr 2021 13:07:26 +0530 Subject: [PATCH v4 3/3] Handle overwriting of replication slot statistic issue. There is a remote scenario where one of the replication slots is dropped and the drop slot statistics message is not received by the statistic collector process, now if the max_replication_slots is reduced to the actual number of replication slots that are in use and the server is re-started then the statistics process will not be aware of this and the statistic collector process will write beyond the slots available, fixed it by skipping the replication slot statistic that are after max_replication_slot. --- contrib/test_decoding/t/001_repl_stats.pl | 24 +++++++++++++++++++++-- src/backend/postmaster/pgstat.c | 18 +++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/contrib/test_decoding/t/001_repl_stats.pl b/contrib/test_decoding/t/001_repl_stats.pl index 11ffe0ca3e..4a1b113c41 100644 --- a/contrib/test_decoding/t/001_repl_stats.pl +++ b/contrib/test_decoding/t/001_repl_stats.pl @@ -5,7 +5,7 @@ use warnings; use File::Path qw(rmtree); use PostgresNode; use TestLib; -use Test::More tests => 2; +use Test::More tests => 3; # Test set-up my $node = get_new_node('test'); @@ -102,12 +102,32 @@ is($result, qq(regression_slot1|t|t regression_slot2|t|t regression_slot3|t|t), 'check replication statistics are updated'); +# Test to remove one of the replication slots and adjust max_replication_slots +# accordingly to the number of slots and verify replication statistics data is +# fine after restart. +$node->stop; +my $datadir = $node->data_dir; +my $slot3_replslotdir = "$datadir/pg_replslot/regression_slot3"; + +rmtree($slot3_replslotdir); + +$node->append_conf('postgresql.conf', 'max_replication_slots = 2'); +$node->start; + +# Verify statistics data present in pg_stat_replication_slots are sane after +# restart. +$result = $node->safe_psql('postgres', + "SELECT slot_name, total_txns > 0 AS total_txn, total_bytes > 0 AS total_bytes + FROM pg_stat_replication_slots ORDER BY slot_name" +); +is($result, qq(regression_slot1|t|t +regression_slot2|t|t), 'check replication statistics are updated'); + # cleanup $node->safe_psql('postgres', "DROP TABLE test_repl_stat"); $node->safe_psql('postgres', "DROP FUNCTION wait_for_decode_stats(TEXT)"); $node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot1')"); $node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot2')"); -$node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot3')"); # shutdown $node->stop; diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 89b9315af6..c292d4ab94 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -4069,6 +4069,24 @@ pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep) * slot follows. */ case 'R': + /* + * There is a remote scenario where one of the replication slots + * is dropped and the drop slot statistics message is not + * received by the statistic collector process, now if the + * max_replication_slots is reduced to the actual number of + * replication slots that are in use and the server is + * re-started then the statistics process will not be aware of + * this. To avoid writing beyond the max_replication_slots + * this replication slot statistic information will be skipped. + */ + if (max_replication_slots == nReplSlotStats) + { + ereport(pgStatRunningInCollector ? LOG : WARNING, + (errmsg("skipping \"%s\" replication slot statistics as pg_stat_replication_slots does not have enough slots", + NameStr(replSlotStats[nReplSlotStats].slotname)))); + goto done; + } + if (fread(&replSlotStats[nReplSlotStats], 1, sizeof(PgStat_ReplSlotStats), fpin) != sizeof(PgStat_ReplSlotStats)) { -- 2.25.1