From 525a9e94fa6ca652f9a79eb2c914382cf527520e Mon Sep 17 00:00:00 2001 From: Ajin Cherian Date: Tue, 23 Sep 2025 16:54:17 +1000 Subject: [PATCH v3] Reset synced slots when a standby is promoted. On promotion, reset any slots which have the 'synced' flag set so that the primary starts with synced flag set false. This ensures consistent behavior across all switchovers. Also handle the possibility of server crashing before all slots are reset by reseting slots on primary on a restart. --- doc/src/sgml/system-views.sgml | 3 +- src/backend/access/transam/xlog.c | 18 +++-- src/backend/access/transam/xlogrecovery.c | 9 --- src/backend/replication/slot.c | 68 +++++++++++++++++++ src/include/replication/slot.h | 1 + .../t/040_standby_failover_slots_sync.pl | 6 +- 6 files changed, 86 insertions(+), 19 deletions(-) diff --git a/doc/src/sgml/system-views.sgml b/doc/src/sgml/system-views.sgml index 4187191ea74..ff9384127cd 100644 --- a/doc/src/sgml/system-views.sgml +++ b/doc/src/sgml/system-views.sgml @@ -3031,8 +3031,7 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on - the primary is default false for all slots but may (if leftover from a - promoted standby) also be true. + the primary is false for all slots. diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index eac1de75ed0..5ebb74888b0 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -5639,7 +5639,8 @@ StartupXLOG(void) /* * Initialize replication slots, before there's a chance to remove - * required resources. + * required resources. Clear any leftover 'synced' flags on replication + * slots when on the primary. */ StartupReplicationSlots(); @@ -6241,13 +6242,20 @@ StartupXLOG(void) WalSndWakeup(true, true); /* - * If this was a promotion, request an (online) checkpoint now. This isn't - * required for consistency, but the last restartpoint might be far back, - * and in case of a crash, recovering from it might take a longer than is - * appropriate now that we're not in standby mode anymore. + * If this was a promotion, first reset any slots that had been marked as + * synced during standby mode. Although slots that are marked as synced + * are reset on a restart of the primary, we need to do it in the promotion + * path as it could be some time before the next restart. + * Then request an (online) checkpoint. The checkpoint isn't required for + * consistency, but the last restartpoint might be far back, and in case + * of a crash, recovery could take longer than desirable now that we're not + * in standby mode anymore. */ if (promoted) + { + ResetSyncedSlots(); RequestCheckpoint(CHECKPOINT_FORCE); + } } /* diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 346319338a0..37ad309201e 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -1481,15 +1481,6 @@ FinishWalRecovery(void) /* * Shutdown the slot sync worker to drop any temporary slots acquired by * it and to prevent it from keep trying to fetch the failover slots. - * - * We do not update the 'synced' column in 'pg_replication_slots' system - * view from true to false here, as any failed update could leave 'synced' - * column false for some slots. This could cause issues during slot sync - * after restarting the server as a standby. While updating the 'synced' - * column after switching to the new timeline is an option, it does not - * simplify the handling for the 'synced' column. Therefore, we retain the - * 'synced' column as true after promotion as it may provide useful - * information about the slot origin. */ ShutDownSlotSync(); diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index fd0fdb96d42..1a15ae32226 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -852,6 +852,58 @@ restart: LWLockRelease(ReplicationSlotControlLock); } +/* + * ResetSyncedSlots() + * + * Reset the synced flag to false for all replication slots where it is + * currently true. + */ +void +ResetSyncedSlots(void) +{ + int i; + + /* + * Iterate through all replication slot entries and reset synced ones + */ + for (i = 0; i < max_replication_slots; i++) + { + ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i]; + + /* Skip inactive/unused slots */ + if (!s->in_use) + continue; + + /* we're only interested in logical slots */ + if (!SlotIsLogical(s)) + continue; + + /* Check if this slot was marked as synced */ + if (s->data.synced) + { + /* Acquire the slot */ + ReplicationSlotAcquire(NameStr(s->data.name), false, true); + + /* Reset the synced flag under spinlock protection */ + SpinLockAcquire(&s->mutex); + s->data.synced = false; + SpinLockRelease(&s->mutex); + + /* Mark dirty and save outside the spinlock */ + ReplicationSlotMarkDirty(); + ReplicationSlotSave(); + + ereport(LOG, + (errmsg("reset synced flag for replication slot \"%s\"", + NameStr(s->data.name)))); + + /* Release the slot */ + ReplicationSlotRelease(); + } + } + +} + /* * Permanently drop replication slot identified by the passed in name. */ @@ -2690,6 +2742,22 @@ RestoreSlotFromDisk(const char *name) ReplicationSlotSetInactiveSince(slot, now, false); restored = true; + + /* + * A primary should never have a slot with the 'synced' flag set. + * Even if this server was previously a standby, the flag should + * have been cleared during promotion. The only case it may still + * be set is if the server crashed during promotion. In that case, + * reset it now and mark the slot dirty. + */ + if (!StandbyMode && slot->data.synced) + { + ReplicationSlotAcquire(NameStr(slot->data.name), false, true); + slot->data.synced = false; + ReplicationSlotMarkDirty(); + ReplicationSlotRelease(); + } + break; } diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index fe62162cde3..7902d51781d 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -336,6 +336,7 @@ extern int ReplicationSlotIndex(ReplicationSlot *slot); extern bool ReplicationSlotName(int index, Name name); extern void ReplicationSlotNameForTablesync(Oid suboid, Oid relid, char *syncslotname, Size szslot); extern void ReplicationSlotDropAtPubNode(WalReceiverConn *wrconn, char *slotname, bool missing_ok); +extern void ResetSyncedSlots(void); extern void StartupReplicationSlots(void); extern void CheckPointReplicationSlots(bool is_shutdown); diff --git a/src/test/recovery/t/040_standby_failover_slots_sync.pl b/src/test/recovery/t/040_standby_failover_slots_sync.pl index 2c61c51e914..0f225aa09c1 100644 --- a/src/test/recovery/t/040_standby_failover_slots_sync.pl +++ b/src/test/recovery/t/040_standby_failover_slots_sync.pl @@ -932,13 +932,13 @@ my $standby1_conninfo = $standby1->connstr . ' dbname=postgres'; $subscriber1->safe_psql('postgres', "ALTER SUBSCRIPTION regress_mysub1 CONNECTION '$standby1_conninfo';"); -# Confirm the synced slot 'lsub1_slot' is retained on the new primary +# Confirm the synced slot 'lsub1_slot' is reset on the new primary is( $standby1->safe_psql( 'postgres', q{SELECT count(*) = 2 FROM pg_replication_slots WHERE slot_name IN ('lsub1_slot', 'snap_test_slot') AND synced AND NOT temporary;} ), - 't', - 'synced slot retained on the new primary'); + 'f', + 'synced slot reset on the new primary'); # Commit the prepared transaction $standby1->safe_psql('postgres', "COMMIT PREPARED 'test_twophase_slotsync';"); -- 2.47.3