From b4c362af111487617f560df54fd0f2971628642d Mon Sep 17 00:00:00 2001 From: Hari Babu Date: Thu, 28 Mar 2019 15:30:01 +1100 Subject: [PATCH 8/8] Server recovery mode handling in_recovery GUC_REPORT is added to update the clients when the server is recovery mode, this is useful for the client connections to conenct to a standby server with a faster check instead of executing a command. New SIGUSR1 handling interrupt is added to support reporting of recovery mode exit to all backends and their respective clients. Some parts of the code is taken from earlier development by Elvis Pranskevichus and Tsunakawa Takayuki. --- doc/src/sgml/libpq.sgml | 14 ++- doc/src/sgml/protocol.sgml | 8 +- src/backend/access/transam/xlog.c | 9 ++ src/backend/commands/async.c | 53 +++++++++++ src/backend/storage/ipc/procarray.c | 30 ++++++ src/backend/storage/ipc/procsignal.c | 3 + src/backend/storage/ipc/standby.c | 9 ++ src/backend/tcop/postgres.c | 4 + src/backend/utils/misc/check_guc | 2 +- src/backend/utils/misc/guc.c | 17 +++- src/include/commands/async.h | 5 + src/include/storage/procarray.h | 1 + src/include/storage/procsignal.h | 2 + src/include/storage/standby.h | 1 + src/interfaces/libpq/fe-connect.c | 133 +++++++++++++++++---------- src/interfaces/libpq/fe-exec.c | 4 + src/interfaces/libpq/libpq-int.h | 1 + 17 files changed, 237 insertions(+), 59 deletions(-) diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index 328b632e16..f3bbff9193 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -1631,8 +1631,10 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname To find out whether the server is in recovery mode or not, query SELECT pg_is_in_recovery() - will be sent upon any successful connection; if it returns t, means server - is in recovery mode. + will be sent upon any successful connection if the server is prior to version 12; if it returns + t, it means server is in recovery mode. But for server version 12 or greater + uses the value of in_recovery configuration parameter that is reported by the + server upon successful connection. @@ -2000,15 +2002,17 @@ const char *PQparameterStatus(const PGconn *conn, const char *paramName); IntervalStyle, TimeZone, integer_datetimes, - standard_conforming_strings, and - transaction_read_only. + standard_conforming_strings, + transaction_read_only and + in_recovery. (server_encoding, TimeZone, and integer_datetimes were not reported by releases before 8.0; standard_conforming_strings was not reported by releases before 8.1; IntervalStyle was not reported by releases before 8.4; application_name was not reported by releases before 9.0; - transaction_read_only was not reported by release before 12.0.) + transaction_read_only and in_recovery + were not reported by release before 12.0.) Note that server_version, server_encoding and diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index f3357b5c59..0da02263ea 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -1284,15 +1284,17 @@ SELCT 1/0; IntervalStyle, TimeZone, integer_datetimes, - standard_conforming_strings, and - transaction_read_only. + standard_conforming_strings, + transaction_read_only and + in_recovery. (server_encoding, TimeZone, and integer_datetimes were not reported by releases before 8.0; standard_conforming_strings was not reported by releases before 8.1; IntervalStyle was not reported by releases before 8.4; application_name was not reported by releases before 9.0; - transaction_read_only was not reported by releases before 12.0.) + transaction_read_only and in_recovery + were not reported by releases before 12.0.) Note that server_version, server_encoding and diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 19d7911ec5..fa51c736fe 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7670,7 +7670,10 @@ StartupXLOG(void) * RecoverPreparedTransactions(), see notes for lock_twophase_recover() */ if (standbyState != STANDBY_DISABLED) + { ShutdownRecoveryTransactionEnvironment(); + SendRecoveryExitSignal(); + } /* Shut down xlogreader */ if (readFile >= 0) @@ -7879,6 +7882,12 @@ RecoveryInProgress(void) InitXLOGAccess(); } + /* Update in_recovery status. */ + if (LocalRecoveryInProgress) + SetConfigOption("in_recovery", + "on", + PGC_INTERNAL, PGC_S_OVERRIDE); + /* * Note: We don't need a memory barrier when we're still in recovery. * We might exit recovery immediately after return, so the caller diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 5a7ee0de4c..934844523c 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -356,6 +356,15 @@ static List *upperPendingNotifies = NIL; /* list of upper-xact lists */ */ volatile sig_atomic_t notifyInterruptPending = false; +/* + * Inbound recovery exit are initially processed by + * HandleRecoveryExitInterrupt(), called from inside a signal handler. + * That just sets the recoveryExitInterruptPending flag and sets the process + * latch. ProcessRecoveryExitInterrupt() will then be called whenever it's + * safe to actually deal with the interrupt. + */ +volatile sig_atomic_t recoveryExitInterruptPending = false; + /* True if we've registered an on_shmem_exit cleanup */ static bool unlistenExitRegistered = false; @@ -1736,6 +1745,50 @@ ProcessNotifyInterrupt(void) ProcessIncomingNotify(); } +/* + * HandleRecoveryExitInterrupt + * + * Signal handler portion of interrupt handling. Let the backend know + * that the server has exited the recovery mode. + */ +void +HandleRecoveryExitInterrupt(void) +{ + /* + * Note: this is called by a SIGNAL HANDLER. You must be very wary what + * you do here. + */ + + /* signal that work needs to be done */ + recoveryExitInterruptPending = true; + + /* make sure the event is processed in due course */ + SetLatch(MyLatch); +} + +/* + * ProcessRecoveryExitInterrupt + * + * This is called just after waiting for a frontend command. If a + * interrupt arrives (via HandleRecoveryExitInterrupt()) while reading, + * the read will be interrupted via the process's latch, and this routine + * will get called. +*/ +void +ProcessRecoveryExitInterrupt(void) +{ + recoveryExitInterruptPending = false; + + SetConfigOption("in_recovery", + "off", + PGC_INTERNAL, PGC_S_OVERRIDE); + + /* + * Flush output buffer so that clients receive the ParameterStatus message + * as soon as possible. + */ + pq_flush(); +} /* * Read all pending notifications from the queue, and deliver appropriate diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 010cc061c8..a5f3568d09 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -2972,6 +2972,36 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared) return true; /* timed out, still conflicts */ } +/* + * SendSignalToAllBackends --- send a signal to all backends. + */ +void +SendSignalToAllBackends(ProcSignalReason reason) +{ + ProcArrayStruct *arrayP = procArray; + int index; + pid_t pid = 0; + + LWLockAcquire(ProcArrayLock, LW_SHARED); + + for (index = 0; index < arrayP->numProcs; index++) + { + int pgprocno = arrayP->pgprocnos[index]; + volatile PGPROC *proc = &allProcs[pgprocno]; + VirtualTransactionId procvxid; + + GET_VXID_FROM_PGPROC(procvxid, *proc); + + pid = proc->pid; + if (pid != 0) + { + (void) SendProcSignal(pid, reason, procvxid.backendId); + } + } + + LWLockRelease(ProcArrayLock); +} + /* * ProcArraySetReplicationSlotXmin * diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index 7605b2c367..e4548dc323 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -292,6 +292,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + if (CheckProcSignal(PROCSIG_RECOVERY_EXIT)) + HandleRecoveryExitInterrupt(); + SetLatch(MyLatch); latch_sigusr1_handler(); diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index cd56dca3ae..65f6ec7ca2 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -138,6 +138,15 @@ ShutdownRecoveryTransactionEnvironment(void) VirtualXactLockTableCleanup(); } +/* + * SendRecoveryExitSignal + * Signal backends that the server has exited recovery mode. + */ +void +SendRecoveryExitSignal(void) +{ + SendSignalToAllBackends(PROCSIG_RECOVERY_EXIT); +} /* * ----------------------------------------------------- diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index f9ce3d8f22..d8d13f5a75 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -543,6 +543,10 @@ ProcessClientReadInterrupt(bool blocked) /* Process notify interrupts, if any */ if (notifyInterruptPending) ProcessNotifyInterrupt(); + + /* Process recovery exit interrupts that happened while reading */ + if (recoveryExitInterruptPending) + ProcessRecoveryExitInterrupt(); } else if (ProcDiePending) { diff --git a/src/backend/utils/misc/check_guc b/src/backend/utils/misc/check_guc index d228bbed68..1c0f51ac8b 100755 --- a/src/backend/utils/misc/check_guc +++ b/src/backend/utils/misc/check_guc @@ -21,7 +21,7 @@ is_superuser lc_collate lc_ctype lc_messages lc_monetary lc_numeric lc_time \ pre_auth_delay role seed server_encoding server_version server_version_int \ session_authorization trace_lock_oidmin trace_lock_table trace_locks trace_lwlocks \ trace_notify trace_userlocks transaction_isolation transaction_read_only \ -zero_damaged_pages" +zero_damaged_pages in_recovery" ### What options are listed in postgresql.conf.sample, but don't appear ### in guc.c? diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index d464be2c5b..98795126d0 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -578,7 +578,7 @@ static char *recovery_target_xid_string; static char *recovery_target_time_string; static char *recovery_target_name_string; static char *recovery_target_lsn_string; - +static bool in_recovery; /* should be static, but commands/variable.c needs to get at this */ char *role_string; @@ -1728,6 +1728,21 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + /* + * Not for general use --- used to indicate whether the instance is + * recovery mode + */ + {"in_recovery", PGC_INTERNAL, UNGROUPED, + gettext_noop("Shows whether the instance is in recovery mode."), + NULL, + GUC_REPORT | GUC_NO_SHOW_ALL | GUC_NO_RESET_ALL | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE + }, + &in_recovery, + false, + NULL, NULL, NULL + }, + { {"allow_system_table_mods", PGC_POSTMASTER, DEVELOPER_OPTIONS, gettext_noop("Allows modifications of the structure of system tables."), diff --git a/src/include/commands/async.h b/src/include/commands/async.h index cfea78e039..7c0bbd3897 100644 --- a/src/include/commands/async.h +++ b/src/include/commands/async.h @@ -24,6 +24,7 @@ extern bool Trace_notify; extern volatile sig_atomic_t notifyInterruptPending; +extern volatile sig_atomic_t recoveryExitInterruptPending; extern Size AsyncShmemSize(void); extern void AsyncShmemInit(void); @@ -54,4 +55,8 @@ extern void HandleNotifyInterrupt(void); /* process interrupts */ extern void ProcessNotifyInterrupt(void); +/* recovery exit interrupt handling functions */ +extern void HandleRecoveryExitInterrupt(void); +extern void ProcessRecoveryExitInterrupt(void); + #endif /* ASYNC_H */ diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index bd24850989..296d606e51 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -113,6 +113,7 @@ extern void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conf extern int CountUserBackends(Oid roleid); extern bool CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared); +extern void SendSignalToAllBackends(ProcSignalReason reason); extern void XidCacheRemoveRunningXids(TransactionId xid, int nxids, const TransactionId *xids, diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index 9f2f965d5c..722357c829 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -42,6 +42,8 @@ typedef enum PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, + PROCSIG_RECOVERY_EXIT, /* recovery exit interrupt */ + NUM_PROCSIGNALS /* Must be last! */ } ProcSignalReason; diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index 2361243514..3bb9023a6b 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -26,6 +26,7 @@ extern int max_standby_streaming_delay; extern void InitRecoveryTransactionEnvironment(void); extern void ShutdownRecoveryTransactionEnvironment(void); +extern void SendRecoveryExitSignal(void); extern void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid, RelFileNode node); diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index 82b80385d1..15ac9866bd 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -2077,6 +2077,49 @@ reject_checked_write_connection(PGconn *conn) conn->try_next_host = true; } +static void +reject_checked_recovery_connection(PGconn *conn) +{ + /* Not a requested type; fail this connection. */ + const char *displayed_host; + const char *displayed_port; + + /* Append error report to conn->errorMessage. */ + if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS) + displayed_host = conn->connhost[conn->whichhost].hostaddr; + else + displayed_host = conn->connhost[conn->whichhost].host; + displayed_port = conn->connhost[conn->whichhost].port; + if (displayed_port == NULL || displayed_port[0] == '\0') + displayed_port = DEF_PGPORT_STR; + + if (conn->requested_session_type == SESSION_TYPE_PRIMARY) + appendPQExpBuffer(&conn->errorMessage, + libpq_gettext("server is in recovery mode " + "\"%s:%s\"\n"), + displayed_host, displayed_port); + else + appendPQExpBuffer(&conn->errorMessage, + libpq_gettext("server is not in recovery mode " + "\"%s:%s\"\n"), + displayed_host, displayed_port); + + /* Close connection politely. */ + conn->status = CONNECTION_OK; + sendTerminateConn(conn); + + /* Record primary host index */ + if (conn->requested_session_type == SESSION_TYPE_PREFER_STANDBY && + conn->read_write_or_primary_host_index == -1) + conn->read_write_or_primary_host_index = conn->whichhost; + + /* + * Try next host if any, but we don't want to consider additional + * addresses for this host. + */ + conn->try_next_host = true; +} + /* ---------------- * PQconnectPoll * @@ -3374,27 +3417,52 @@ keep_going: /* We will come back to here until there is conn->requested_session_type == SESSION_TYPE_PREFER_STANDBY || conn->requested_session_type == SESSION_TYPE_STANDBY))) { - /* - * Save existing error messages across the PQsendQuery - * attempt. This is necessary because PQsendQuery is - * going to reset conn->errorMessage, so we would lose - * error messages related to previous hosts we have tried - * and failed to connect to. - */ - if (!saveErrorMessage(conn, &savedMessage)) - goto error_return; - conn->status = CONNECTION_OK; - if (!PQsendQuery(conn, "SELECT pg_is_in_recovery()")) + if (conn->sversion < 120000) { + /* + * Save existing error messages across the PQsendQuery + * attempt. This is necessary because PQsendQuery is + * going to reset conn->errorMessage, so we would lose + * error messages related to previous hosts we have + * tried and failed to connect to. + */ + if (!saveErrorMessage(conn, &savedMessage)) + goto error_return; + + conn->status = CONNECTION_OK; + if (!PQsendQuery(conn, "SELECT pg_is_in_recovery()")) + { + restoreErrorMessage(conn, &savedMessage); + goto error_return; + } + + conn->status = CONNECTION_CHECK_RECOVERY; + restoreErrorMessage(conn, &savedMessage); - goto error_return; + return PGRES_POLLING_READING; } + else if ((conn->in_recovery && + conn->requested_session_type == SESSION_TYPE_PRIMARY) || + (!conn->in_recovery && + (conn->requested_session_type == SESSION_TYPE_PREFER_STANDBY || + conn->requested_session_type == SESSION_TYPE_STANDBY))) + { + /* + * The following scenario is possible only for the + * prefer-standby mode for the next pass of the list + * of connections as it couldn't find any servers that + * are in recovery. + */ + if (conn->read_write_or_primary_host_index == -2) + goto consume_checked_target_connection; - conn->status = CONNECTION_CHECK_RECOVERY; + reject_checked_recovery_connection(conn); + goto keep_going; + } - restoreErrorMessage(conn, &savedMessage); - return PGRES_POLLING_READING; + /* obtained the requested type, consume it */ + goto consume_checked_target_connection; } /* @@ -3643,40 +3711,7 @@ keep_going: /* We will come back to here until there is PQclear(res); restoreErrorMessage(conn, &savedMessage); - /* Append error report to conn->errorMessage. */ - if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS) - displayed_host = conn->connhost[conn->whichhost].hostaddr; - else - displayed_host = conn->connhost[conn->whichhost].host; - displayed_port = conn->connhost[conn->whichhost].port; - if (displayed_port == NULL || displayed_port[0] == '\0') - displayed_port = DEF_PGPORT_STR; - - if (conn->requested_session_type == SESSION_TYPE_PRIMARY) - appendPQExpBuffer(&conn->errorMessage, - libpq_gettext("server is in recovery mode " - "\"%s:%s\"\n"), - displayed_host, displayed_port); - else - appendPQExpBuffer(&conn->errorMessage, - libpq_gettext("server is not in recovery mode " - "\"%s:%s\"\n"), - displayed_host, displayed_port); - - /* Close connection politely. */ - conn->status = CONNECTION_OK; - sendTerminateConn(conn); - - /* Record primary host index */ - if (conn->requested_session_type == SESSION_TYPE_PREFER_STANDBY && - conn->read_write_or_primary_host_index == -1) - conn->read_write_or_primary_host_index = conn->whichhost; - - /* - * Try next host if any, but we don't want to consider - * additional addresses for this host. - */ - conn->try_next_host = true; + reject_checked_recovery_connection(conn); goto keep_going; } diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index d2658efba5..38b8abcc44 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -1117,6 +1117,10 @@ pqSaveParameterStatus(PGconn *conn, const char *name, const char *value) { conn->transaction_read_only = (strcmp(value, "on") == 0); } + else if (strcmp(name, "in_recovery") == 0) + { + conn->in_recovery = (strcmp(value, "on") == 0); + } } diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index d60ee385a6..d135f3d9a3 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -442,6 +442,7 @@ struct pg_conn int client_encoding; /* encoding id */ bool std_strings; /* standard_conforming_strings */ bool transaction_read_only; /* transaction_read_only */ + bool in_recovery; /* in_recovery */ PGVerbosity verbosity; /* error/notice message verbosity */ PGContextVisibility show_context; /* whether to show CONTEXT field */ PGlobjfuncs *lobjfuncs; /* private state for large-object access fns */ -- 2.20.1.windows.1