From ef8d9fb83d72b15f23d96e705122ad153dcb4fa5 Mon Sep 17 00:00:00 2001 From: Vitaly Davydov Date: Sun, 26 Oct 2025 11:22:54 +0300 Subject: [PATCH] Test a specific case of physical slot invalidation When ReplicationSlotReserveWal() succeeds to read old RedoRecPtr right before the assignment of a new RedoRecPtr in a checkpointer which runs in parallel, the created slot may be invalidated. The core issue: redo rec ptr change during wal reservation by a slot. The serialization of execution of redo rec ptr assignment and slot reservation may fix the issue. --- src/backend/access/transam/xlog.c | 3 + src/backend/replication/slot.c | 4 + src/test/recovery/meson.build | 1 + ..._create_physical_slot_during_checkpoint.pl | 176 ++++++++++++++++++ 4 files changed, 184 insertions(+) create mode 100644 src/test/recovery/t/100_create_physical_slot_during_checkpoint.pl diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 22d0a2e8c3a..64b394383ac 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7385,6 +7385,9 @@ CreateCheckPoint(int flags) */ XLByteToSeg(RedoRecPtr, _logSegNo, wal_segment_size); KeepLogSeg(recptr, &_logSegNo); + + INJECTION_POINT("checkpoint-before-old-wal-removal-2", NULL); + if (InvalidateObsoleteReplicationSlots(RS_INVAL_WAL_REMOVED | RS_INVAL_IDLE_TIMEOUT, _logSegNo, InvalidOid, InvalidTransactionId)) diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 1ec1e997b27..4b5b24d3759 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -1610,10 +1610,14 @@ ReplicationSlotReserveWal(void) else restart_lsn = GetXLogInsertRecPtr(); + INJECTION_POINT("physical-slot-reserve-wal-get-redo", NULL); + SpinLockAcquire(&slot->mutex); slot->data.restart_lsn = restart_lsn; SpinLockRelease(&slot->mutex); + INJECTION_POINT("physical-slot-reserve-wal-before-compute-required-lsn", NULL); + /* prevent WAL removal as fast as possible */ ReplicationSlotsComputeRequiredLSN(); diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build index 523a5cd5b52..189c1710f28 100644 --- a/src/test/recovery/meson.build +++ b/src/test/recovery/meson.build @@ -58,6 +58,7 @@ tests += { 't/047_checkpoint_physical_slot.pl', 't/048_vacuum_horizon_floor.pl', 't/049_wait_for_lsn.pl', + 't/100_create_physical_slot_during_checkpoint.pl' ], }, } diff --git a/src/test/recovery/t/100_create_physical_slot_during_checkpoint.pl b/src/test/recovery/t/100_create_physical_slot_during_checkpoint.pl new file mode 100644 index 00000000000..d12bc5f2d56 --- /dev/null +++ b/src/test/recovery/t/100_create_physical_slot_during_checkpoint.pl @@ -0,0 +1,176 @@ +# This test checks that the new slot maybe invalidated by checkpoint + +use strict; +use warnings; +use Config; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +if ($ENV{enable_injection_points} ne 'yes') +{ + plan skip_all => 'Injection points not supported by this build'; +} + +my $res; + +# Setup primary node +my $node = PostgreSQL::Test::Cluster->new('primary'); +$node->init(allows_streaming => 1); + +$node->append_conf('postgresql.conf', q{ + wal_level = logical + checkpoint_timeout = 30min + checkpoint_completion_target = 0.1 +}); + +$node->start(); +$node->safe_psql('postgres', q{create extension injection_points;}); + +my $createslot = $node->background_psql('postgres'); +my $checkpoint = $node->background_psql('postgres'); + +sub physreplslot_start_create() +{ + $createslot->query_until( + qr/start-create-slot/, + q(\echo start-create-slot + select pg_create_physical_replication_slot('standby1', true); + )); +} + +sub checkpoint_start() +{ + $checkpoint->query_until( + qr/start-checkpoint/, + q(\echo start-checkpoint + checkpoint; + )); +} + +sub injection_point_attach($$) +{ + my ($injectpname, $event) = @_; + + $node->safe_psql('postgres', "select injection_points_attach('$injectpname','$event')"); +} + +sub injection_point_detach($) +{ + my $injectpname = shift; + + $node->safe_psql('postgres', "select injection_points_detach('$injectpname');"); +} + +sub injection_point_wait($$) +{ + my ($subsystem, $injectpname) = @_; + + note("waiting for $injectpname injection point"); + $node->wait_for_event($subsystem, $injectpname); + note("injection_point $injectpname is reached"); +} + +sub injection_point_wakeup($) +{ + my $injectpname = shift; + + $node->safe_psql('postgres', "select injection_points_wakeup('$injectpname');"); +} + +sub get_slot_invalidation_state($) +{ + my $slotname = shift; + + return $node->safe_psql('postgres', + "select invalidation_reason from pg_replication_slots where slot_name='$slotname'"); +} + +# Original case: injection point in the checkpointer right before the calculation +# of WAL segments for removal. +sub test1() +{ + injection_point_attach('physical-slot-reserve-wal-get-redo', 'wait'); + injection_point_attach('checkpoint-before-old-wal-removal', 'wait'); + injection_point_attach("physical-slot-reserve-wal-before-compute-required-lsn", 'wait'); + + # start create physical replication slot + physreplslot_start_create(); + + # reach the get-redo injection point and stop there + injection_point_wait('client backend', 'physical-slot-reserve-wal-get-redo'); + + $node->advance_wal(6); + + # start checkpoint + checkpoint_start(); + + # reach the injection point before old WAL removal and stop there + injection_point_wait('checkpointer', 'checkpoint-before-old-wal-removal'); + + injection_point_wakeup('physical-slot-reserve-wal-get-redo'); + + injection_point_wakeup('checkpoint-before-old-wal-removal'); + + injection_point_wakeup('physical-slot-reserve-wal-before-compute-required-lsn'); + + # complete physical slot creation + eval { $createslot->quit(); }; + + # complete checkpoint + eval { $checkpoint->quit(); }; + + is(get_slot_invalidation_state('standby1'), "", "slot is not invalidated by checkpoint"); + + injection_point_detach('physical-slot-reserve-wal-get-redo'); + injection_point_detach('checkpoint-before-old-wal-removal'); + injection_point_detach("physical-slot-reserve-wal-before-compute-required-lsn"); + + $node->safe_psql('postgres', "select pg_drop_replication_slot('standby1')"); +} + +# Based on Hayato's case: injection point in the checkpointer between the +# calculation of WAL segments for removal and slot invalidation. +sub test2() +{ + injection_point_attach('physical-slot-reserve-wal-get-redo', 'wait'); + injection_point_attach('checkpoint-before-old-wal-removal-2', 'wait'); + injection_point_attach("physical-slot-reserve-wal-before-compute-required-lsn", 'wait'); + + # start create physical replication slot + physreplslot_start_create(); + + # reach the get-redo injection point and stop there + injection_point_wait('client backend', 'physical-slot-reserve-wal-get-redo'); + + $node->advance_wal(6); + + # start checkpoint + checkpoint_start(); + + # reach the injection point before old WAL removal and stop there + injection_point_wait('checkpointer', 'checkpoint-before-old-wal-removal-2'); + + injection_point_wakeup('physical-slot-reserve-wal-get-redo'); + + injection_point_wakeup('checkpoint-before-old-wal-removal-2'); + + injection_point_wakeup('physical-slot-reserve-wal-before-compute-required-lsn'); + + # complete physical slot creation + eval { $createslot->quit(); }; + + # complete checkpoint + eval { $checkpoint->quit(); }; + + is(get_slot_invalidation_state('standby1'), "", "slot is not invalidated by checkpoint"); + + injection_point_detach('physical-slot-reserve-wal-get-redo'); + injection_point_detach('checkpoint-before-old-wal-removal-2'); + injection_point_detach("physical-slot-reserve-wal-before-compute-required-lsn"); +} + +test1(); +test2(); + +done_testing(); -- 2.43.0