From 8b383fc8a655f2a45bcdeed3096a2bb88aba5687 Mon Sep 17 00:00:00 2001 From: Vitaly Davydov Date: Mon, 6 Oct 2025 16:36:29 +0300 Subject: [PATCH 2/2] Fix invalidation when slot is created during checkpoint If the slot is creating during checkpoint, it may be invalidated by the checkpointer because of the lag between slot's restart_lsn assignment and update of the XLogCtl->replicationSlotMinLSN. The issue seems to have place in past, but the commit 2090edc6f32f652a2c increased the probability of slot invalidation. To fix the issue we explicitly update XLogCtl->replicationSlotMinLSN in the checkpointer after a new redo LSN assignment. It guarantees that the slot's restart_lsn is taken into account in calculation of the oldest LSN for WAL segments removal. For slots, which restart_lsn is assigned after the update, the current redo LSN will protect the WAL for the slot. Discussion: https://www.postgresql.org/message-id/flat/5e045179-236f-4f8f-84f1-0f2566ba784c.mengjuan.cmj%40alibaba-inc.com --- src/backend/access/transam/xlog.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 287339b7fae..8f7496ce43f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7103,12 +7103,18 @@ CreateCheckPoint(int flags) END_CRIT_SECTION(); /* - * Get the current minimum LSN to be used later in the WAL segment - * cleanup. We may clean up only WAL segments, which are not needed - * according to synchronized LSNs of replication slots. The slot's LSN - * might be advanced concurrently, so we call this before + * Update slots' oldest reserved lsn and save it in slotsMinReqLSN to be + * used later in the WAL segment cleanup. We may clean up only WAL segments, + * which are not needed according to synchronized LSNs of replication slots. + * The slot's LSN might be advanced concurrently, so we call this before * CheckPointReplicationSlots() synchronizes replication slots. + * + * The update of slots' oldest LSN is required to guarantee that the slot's + * restart_lsn is taken into account in calculation of the oldest LSN for + * WAL segments removal. If the slot's restart_lsn is assigned after this + * update, the current redo LSN will protect the WAL for the slot. */ + ReplicationSlotsComputeRequiredLSN(); slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN(); /* @@ -7676,12 +7682,18 @@ CreateRestartPoint(int flags) CheckpointStats.ckpt_start_t = GetCurrentTimestamp(); /* - * Get the current minimum LSN to be used later in the WAL segment - * cleanup. We may clean up only WAL segments, which are not needed - * according to synchronized LSNs of replication slots. The slot's LSN - * might be advanced concurrently, so we call this before + * Update slots' oldest reserved lsn and save it in slotsMinReqLSN to be + * used later in the WAL segment cleanup. We may clean up only WAL segments, + * which are not needed according to synchronized LSNs of replication slots. + * The slot's LSN might be advanced concurrently, so we call this before * CheckPointReplicationSlots() synchronizes replication slots. + * + * The update of slots' oldest LSN is required to guarantee that the slot's + * restart_lsn is taken into account in calculation of the oldest LSN for + * WAL segments removal. If the slot's restart_lsn is assigned after this + * update, the current redo LSN will protect the WAL for the slot. */ + ReplicationSlotsComputeRequiredLSN(); slotsMinReqLSN = XLogGetReplicationSlotMinimumLSN(); if (log_checkpoints) -- 2.34.1