From f04540339130d0c06998dd5f2bc9b7eedba5d3f3 Mon Sep 17 00:00:00 2001 From: Hayato Kuroda Date: Thu, 15 Feb 2024 02:47:38 +0000 Subject: [PATCH v20 11/12] Detect the disconnection from the primary during the recovery Previously, the wait_for_end_recovery() function would indefinitely wait for a server to exit recovery mode, without considering scenarios where the server might be disconnected from the primary. This could lead to situations where the server never reaches a consistent state, as it remains unaware of its disconnection. This patch introduces a new check within the wait_for_end_recovery() process, leveraging the pg_stat_wal_receiver system view to verify the presence of an active walreceiver process. While this method does not account for potential frequent restarts of the walreceiver, it provides a straightforward and effective means to detect disconnections from the primary server during the recovery phase. --- src/bin/pg_basebackup/pg_createsubscriber.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/bin/pg_basebackup/pg_createsubscriber.c b/src/bin/pg_basebackup/pg_createsubscriber.c index db088024d3..2458c874e5 100644 --- a/src/bin/pg_basebackup/pg_createsubscriber.c +++ b/src/bin/pg_basebackup/pg_createsubscriber.c @@ -1209,9 +1209,12 @@ wait_for_end_recovery(const char *conninfo, const char *pg_bin_dir, for (;;) { - bool in_recovery; + bool in_recovery, + still_alive; - res = PQexec(conn, "SELECT pg_catalog.pg_is_in_recovery()"); + res = PQexec(conn, + "SELECT pg_catalog.pg_is_in_recovery(), count(pid) " + "FROM pg_catalog.pg_stat_wal_receiver;"); if (PQresultStatus(res) != PGRES_TUPLES_OK) pg_fatal("could not obtain recovery progress"); @@ -1220,6 +1223,7 @@ wait_for_end_recovery(const char *conninfo, const char *pg_bin_dir, pg_fatal("unexpected result from pg_is_in_recovery function"); in_recovery = (strcmp(PQgetvalue(res, 0, 0), "t") == 0); + still_alive = (strcmp(PQgetvalue(res, 0, 0), "t") == 0); PQclear(res); @@ -1233,6 +1237,13 @@ wait_for_end_recovery(const char *conninfo, const char *pg_bin_dir, break; } + /* Bail out if we have disconnected from the primary */ + if (!still_alive) + { + stop_standby_server(pg_bin_dir, opt->subscriber_dir); + pg_fatal("disconnected from the primary while waiting the end of recovery"); + } + /* Bail out after recovery_timeout seconds if this option is set */ if (opt->recovery_timeout > 0 && timer >= opt->recovery_timeout) { -- 2.43.0