From ce413bd37c38d4b3602d11ff59fabbaa722e47e0 Mon Sep 17 00:00:00 2001 From: "suyu.cmj" Date: Mon, 15 Sep 2025 08:44:17 +0000 Subject: [PATCH] Newly created replication slot may be invalidated by checkpoint Commit 2090edc6f32f652a2c995ca5f7e65748ae1e4c5d introduced a change that the minimal restart_lsn is obtained at the start of checkpoint creation. If a replication slot is created and performs a WAL reservation concurrently, the WAL segment contains the new slot's restart_lsn could be removed by the ongoing checkpoint. Add a perl test to reproduce this scenario. --- src/backend/replication/slot.c | 10 +++ src/backend/utils/misc/injection_point.c | 15 +++++ src/include/utils/injection_point.h | 1 + .../recovery/t/049_invalidate_new_slot.pl | 64 +++++++++++++++++++ 4 files changed, 90 insertions(+) create mode 100644 src/test/recovery/t/049_invalidate_new_slot.pl diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index b9e2b115dab..34cf03b92e7 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -57,6 +57,7 @@ #include "utils/builtins.h" #include "utils/guc_hooks.h" #include "utils/varlena.h" +#include "utils/injection_point.h" /* * Replication slot on-disk data structure. @@ -1443,6 +1444,15 @@ ReplicationSlotReserveWal(void) slot->data.restart_lsn = restart_lsn; SpinLockRelease(&slot->mutex); +#ifdef USE_INJECTION_POINTS + if (InjectionPointFind("delay_slot_reserve_wal")) + { + elog(LOG, "inject delay before computing required lsn"); + while (InjectionPointFind("delay_slot_reserve_wal")) + pg_usleep(1000000); + } +#endif + /* prevent WAL removal as fast as possible */ ReplicationSlotsComputeRequiredLSN(); diff --git a/src/backend/utils/misc/injection_point.c b/src/backend/utils/misc/injection_point.c index b33cddefabc..76e79b2306d 100644 --- a/src/backend/utils/misc/injection_point.c +++ b/src/backend/utils/misc/injection_point.c @@ -535,3 +535,18 @@ InjectionPointRun(const char *name) elog(ERROR, "Injection points are not supported by this build"); #endif } + +bool +InjectionPointFind(const char *name) +{ +#ifdef USE_INJECTION_POINTS + InjectionPointCacheEntry *cache_entry; + + cache_entry = InjectionPointCacheRefresh(name); + + return cache_entry != NULL; +#else + elog(ERROR, "Injection points are not supported by this build"); + return false; +#endif +} diff --git a/src/include/utils/injection_point.h b/src/include/utils/injection_point.h index 6e417cedc60..bcc3de3e5b1 100644 --- a/src/include/utils/injection_point.h +++ b/src/include/utils/injection_point.h @@ -36,6 +36,7 @@ extern void InjectionPointAttach(const char *name, int private_data_size); extern void InjectionPointRun(const char *name); extern bool InjectionPointDetach(const char *name); +extern bool InjectionPointFind(const char *name); #ifdef EXEC_BACKEND extern PGDLLIMPORT struct InjectionPointsCtl *ActiveInjectionPoints; diff --git a/src/test/recovery/t/049_invalidate_new_slot.pl b/src/test/recovery/t/049_invalidate_new_slot.pl new file mode 100644 index 00000000000..178352a4b08 --- /dev/null +++ b/src/test/recovery/t/049_invalidate_new_slot.pl @@ -0,0 +1,64 @@ +# This test checks that the new slot maybe invalidated by checkpoint +# + +use strict; +use warnings; +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +if ($ENV{enable_injection_points} ne 'yes') +{ + plan skip_all => 'Injection points not supported by this build'; +} + +# Setup primary node +my $node = PostgreSQL::Test::Cluster->new('primary'); +$node->init(allows_streaming => 1); + +$node->append_conf( + 'postgresql.conf', qq( + log_checkpoints = true + checkpoint_timeout = 3000 + Logging_collector = false +)); +$node->start; + +my $res = + $node->safe_psql('postgres', "create extension injection_points;"); +$res = $node->safe_psql('postgres', + "select injection_points_attach('delay_slot_reserve_wal', 'notice')"); + +# create slot and reserve wal +my $host = $node->host; +my $port = $node->port; +`nohup psql -h $host -p $port -c "SELECT pg_create_physical_replication_slot('standby1', true)" >tmp_check/test.file 2>&1 &`; + +# switch wal and do checkpoint +$res = $node->safe_psql('postgres', + "select pg_switch_wal()"); +$res = $node->safe_psql('postgres', + "create table test(a int)"); +$res = $node->safe_psql('postgres', + "INSERT INTO test(a) SELECT generate_series(1,100) as newwal"); + +`nohup psql -h $host -p $port -c 'checkpoint' >tmp_check/test.file 2>&1 &`; +sleep 3; + +$res = $node->safe_psql('postgres', + "select injection_points_detach('delay_slot_reserve_wal')"); + +my $log_offset = -s $node->logfile; +$node->wait_for_log(qr/checkpoint complete/, $log_offset); +print "checkpoint finished\n"; + +$res = $node->safe_psql('postgres', + "select invalidation_reason from pg_replication_slots where slot_name='standby1'"); +print "res:$res\n"; +is($res, "wal_removed", "slot is invalidated by checkpoint"); + +`rm tmp_check/test.file`; +$node->stop; + +done_testing(); -- 2.27.0