From cd90ff97b12d3c2e74da6cfa4b0b8939c6f6dbb6 Mon Sep 17 00:00:00 2001 From: alterego665 <824662526@qq.com> Date: Sun, 8 Jun 2025 20:28:17 +0800 Subject: [PATCH] Add progressive backoff to XactLockTableWait functions XactLockTableWait() and ConditionalXactLockTableWait() currently use a fixed 1ms sleep when waiting for transaction completion. In logical replication scenarios, particularly during CREATE REPLICATION SLOT, these functions may wait for very long periods (minutes to hours) for old transactions to complete, leading to excessive CPU usage due to frequent polling. This patch implements progressive backoff: keep sleeping for 1ms until total sleep time reaches 10 seconds, then start doubling the sleep duration each cycle, up to a maximum of 10 seconds per sleep. This balances responsiveness for normal operations (which typically complete within seconds) against CPU efficiency for long waits common in logical replication scenarios. --- src/backend/storage/lmgr/lmgr.c | 48 ++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 3f6bf70bd3c..495fa607932 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -667,6 +667,13 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XactLockTableWaitInfo info; ErrorContextCallback callback; bool first = true; + long total_sleep_us = 0; + long sleep_us = 1000; /* Start with 1ms */ + bool do_backoff = false; + + /* Progressive backoff threshold */ + const long backoff_threshold_us = 10 * USECS_PER_SEC; /* 10 seconds */ + const long max_sleep_us = 10 * USECS_PER_SEC; /* 10 seconds */ /* * If an operation is specified, set up our verbose error context @@ -713,13 +720,25 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, * as when building snapshots for logical decoding. It is possible to * see a transaction in ProcArray before it registers itself in the * locktable. The topmost transaction in that case is the same xid, - * so we try again after a short sleep. (Don't sleep the first time - * through, to avoid slowing down the normal case.) + * so we try again after a progressive sleep. (Don't sleep the first + * time through, to avoid slowing down the normal case.) */ if (!first) { CHECK_FOR_INTERRUPTS(); - pg_usleep(1000L); + pg_usleep(sleep_us); + + /* Track total only until we start doing backoff */ + if (!do_backoff) + { + total_sleep_us += sleep_us; + if (total_sleep_us >= backoff_threshold_us) + do_backoff = true; + } + + /* Exponential backoff once threshold is reached */ + if (do_backoff && sleep_us < max_sleep_us) + sleep_us = Min(sleep_us * 2, max_sleep_us); } first = false; xid = SubTransGetTopmostTransaction(xid); @@ -734,12 +753,21 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, * * As above, but only lock if we can get the lock without blocking. * Returns true if the lock was acquired. + * + * Uses the same progressive backoff as XactLockTableWait. */ bool ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure) { LOCKTAG tag; bool first = true; + long total_sleep_us = 0; + long sleep_us = 1000; /* Start with 1ms */ + bool do_backoff = false; + + /* Progressive backoff threshold */ + const long backoff_threshold_us = 10 * USECS_PER_SEC; /* 10 seconds */ + const long max_sleep_us = 10 * USECS_PER_SEC; /* 10 seconds */ for (;;) { @@ -762,7 +790,19 @@ ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure) if (!first) { CHECK_FOR_INTERRUPTS(); - pg_usleep(1000L); + pg_usleep(sleep_us); + + /* Track total only until we start doing backoff */ + if (!do_backoff) + { + total_sleep_us += sleep_us; + if (total_sleep_us >= backoff_threshold_us) + do_backoff = true; + } + + /* Exponential backoff once threshold is reached */ + if (do_backoff && sleep_us < max_sleep_us) + sleep_us = Min(sleep_us * 2, max_sleep_us); } first = false; xid = SubTransGetTopmostTransaction(xid); -- 2.48.1