From 6584480f4fa9ebc91ac81c06cd2aa3bc20b7af5e Mon Sep 17 00:00:00 2001 From: Zhijie Hou Date: Fri, 21 Nov 2025 13:18:13 +0800 Subject: [PATCH v6 2/3] Fix the race condition of updating slot minimum LSN Previously, there is a race condition: if a backend creates a new slot and attempts to initialize the slot.restart_lsn during WAL reservation, but meanwhile, another backend invokes ReplicationSlotsComputeRequiredLSN(), the slot minimum LSN may be initially updated by the newly created slot, only to be subsequently overwritten by the backend running ReplicationSlotsComputeRequiredLSN() with an more recent LSN. This scenario could lead to the premature removal of WALs reserved by the new slot during a checkpoint, resulting in the newly created slot being invalidated. The commit closes this race condition by acquiring an exclusive ReplicationSlotControlLock when updating slot.restart_lsn during WAL reservation. Additionally, XLogSetReplicationSlotMinimumLSN() is placed under the protection of the ReplicationSlotControlLock. This serializes the update of slot.restart_lsn and the computation of the minimum LSN in other backends, ensuring that a more recent minimum LSN isn't computed while an older one is still being reserved. --- src/backend/replication/slot.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index a75d59e96d7..027f23b4b43 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -1237,6 +1237,12 @@ ReplicationSlotsComputeRequiredLSN(void) Assert(ReplicationSlotCtl != NULL); + /* + * Hold the ReplicationSlotControlLock until after updating the slot minimum + * LSN value, to prevent overwriting the minimum LSN with a position more + * recent than the WAL position reserved by another newly created slot (see + * ReplicationSlotReserveWal and reserve_wal_for_local_slot). + */ LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); for (i = 0; i < max_replication_slots; i++) { @@ -1282,9 +1288,10 @@ ReplicationSlotsComputeRequiredLSN(void) restart_lsn < min_required)) min_required = restart_lsn; } - LWLockRelease(ReplicationSlotControlLock); XLogSetReplicationSlotMinimumLSN(min_required); + + LWLockRelease(ReplicationSlotControlLock); } /* @@ -1614,10 +1621,22 @@ ReplicationSlotReserveWal(void) else restart_lsn = GetXLogInsertRecPtr(); + /* + * Hold the ReplicationSlotControlLock exclusive when updating the slot + * restart_lsn, so no backend can compute and update the new value + * concurrently. This prevents other from backends from overwriting the + * minimum LSN with a position more recent than the WAL position being + * reserved , ensuring the WALs required by this slot are not prematurely + * removed during checkpoint. + */ + LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE); + SpinLockAcquire(&slot->mutex); slot->data.restart_lsn = restart_lsn; SpinLockRelease(&slot->mutex); + LWLockRelease(ReplicationSlotControlLock); + /* prevent WAL removal as fast as possible */ ReplicationSlotsComputeRequiredLSN(); -- 2.51.1.windows.1