From 53b3f3252bdbfb777f1c058a6ff93d78df491dc6 Mon Sep 17 00:00:00 2001 From: Nitin Date: Tue, 3 Aug 2021 11:48:28 +0530 Subject: [PATCH] Shows the progress of the operations performed during startup process. The interval between each progress update is configurable and it should be mentioned in seconds --- src/backend/access/transam/xlog.c | 97 +++++++++++++++++++++++++++ src/backend/postmaster/startup.c | 1 + src/backend/storage/file/fd.c | 18 +++++ src/backend/storage/file/reinit.c | 20 ++++++ src/backend/utils/init/postinit.c | 1 + src/backend/utils/misc/guc.c | 12 ++++ src/backend/utils/misc/postgresql.conf.sample | 6 ++ src/include/access/xlog.h | 17 +++++ src/include/utils/timeout.h | 1 + 9 files changed, 173 insertions(+) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 8b39a2f..190c95f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -79,6 +79,7 @@ #include "utils/relmapper.h" #include "utils/pg_rusage.h" #include "utils/snapmgr.h" +#include "utils/timeout.h" #include "utils/timestamp.h" extern uint32 bootstrap_data_checksum_version; @@ -378,6 +379,23 @@ static bool doRequestWalReceiverReply; */ static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr; +/* + * Start timestamp of the operation that occur during startup process. + */ +static TimestampTz startupProcessOpStartTime; + +/* + * Indicates whether the startup progress interval mentioned by the user is + * elapsed or not. TRUE if timeout occurred, FALSE otherwise. + */ +static bool logStartupProgressTimeout; + +/* + * The interval between each progress update of the operations that occur + * during startup process. + */ +int log_startup_progress_interval; + /*---------- * Shared-memory data structures for XLOG control * @@ -7353,6 +7371,7 @@ StartupXLOG(void) ereport(LOG, (errmsg("redo starts at %X/%X", LSN_FORMAT_ARGS(ReadRecPtr)))); + InitStartupProgress(); /* * main redo apply loop @@ -7361,6 +7380,10 @@ StartupXLOG(void) { bool switchedTLI = false; + ereport_startup_progress(false, + "redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X", + LSN_FORMAT_ARGS(ReadRecPtr)); + #ifdef WAL_DEBUG if (XLOG_DEBUG || (rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) || @@ -12984,3 +13007,77 @@ XLogRequestWalReceiverReply(void) { doRequestWalReceiverReply = true; } + +/* + * Schedule a wakeup call for logging the progress of startup process. + */ +void +LogStartupProgressTimeoutHandler(void) +{ + logStartupProgressTimeout = true; +} + +/* + * Sets the start timestamp of the current operation and also enables the + * timeout for logging the progress of startup process. + */ +void +InitStartupProgress(void) +{ + startupProcessOpStartTime = GetCurrentTimestamp(); + enable_timeout_after(LOG_STARTUP_PROGRESS_TIMEOUT, + (log_startup_progress_interval * 1000)); +} + +/* + * Decides whether to log the startup progress or not based on whether the + * timer is expired or not. It returns TRUE if the startup progress has to be + * reported, otherwise returns FALSE. It calculates the elapsed time and sets + * the respective out parameters secs and usecs. If the force parameter was set + * to TRUE, disables the timer and returns TRUE, otherwise it checks whether + * the timer is expired or not. If not expired, it returns FALSE, otherwise + * enables the timer for next log message and returns TRUE. + */ +bool +LogStartupProgressTimerExpired(bool force, long *secs, int *usecs) +{ + long seconds; + int useconds; + int elapsed_ms; + int interval_in_ms; + + /* If the feature is disabled, then no need to proceed further. */ + if (log_startup_progress_interval < 0) + return false; + + /* Calculate the elapsed time. */ + TimestampDifference(startupProcessOpStartTime, + GetCurrentTimestamp(), + &seconds, &useconds); + + *secs = seconds; + *usecs = useconds; + + /* Indicates the end of progress reporting. Disables the timer. */ + if (force) + { + disable_timeout(LOG_STARTUP_PROGRESS_TIMEOUT, false); + return true; + } + + /* If the timeout has not occurred, then no need to log the details. */ + if (!logStartupProgressTimeout) + return false; + + /* + * Enable the timer for the next log message based on the time that current + * log message timer was supposed to fire. + */ + elapsed_ms = (seconds * 1000) + (useconds / 1000); + interval_in_ms = log_startup_progress_interval * 1000; + enable_timeout_after(LOG_STARTUP_PROGRESS_TIMEOUT, + (interval_in_ms - (elapsed_ms % interval_in_ms))); + + logStartupProgressTimeout = false; + return true; +} diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c index 0f4f00d..502481f 100644 --- a/src/backend/postmaster/startup.c +++ b/src/backend/postmaster/startup.c @@ -233,6 +233,7 @@ StartupProcessMain(void) RegisterTimeout(STANDBY_DEADLOCK_TIMEOUT, StandbyDeadLockHandler); RegisterTimeout(STANDBY_TIMEOUT, StandbyTimeoutHandler); RegisterTimeout(STANDBY_LOCK_TIMEOUT, StandbyLockTimeoutHandler); + RegisterTimeout(LOG_STARTUP_PROGRESS_TIMEOUT, LogStartupProgressTimeoutHandler); /* * Unblock signals (they were blocked when the postmaster forked us) diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 5d5e8ae..7faf30f 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -3334,6 +3334,10 @@ do_syncfs(const char *path) { int fd; + ereport_startup_progress(false, + "syncing data directory (syncfs), elapsed time: %ld.%02d s, current path: %s", + path); + fd = OpenTransientFile(path, O_RDONLY); if (fd < 0) { @@ -3410,6 +3414,8 @@ SyncDataDirectory(void) DIR *dir; struct dirent *de; + InitStartupProgress(); + /* * On Linux, we don't have to open every single file one by one. We * can use syncfs() to sync whole filesystems. We only expect @@ -3436,6 +3442,9 @@ SyncDataDirectory(void) /* If pg_wal is a symlink, process that too. */ if (xlog_is_symlink) do_syncfs("pg_wal"); + + ereport_startup_progress(true, + "data directory sync (syncfs) complete after %ld.%02d s"); return; } #endif /* !HAVE_SYNCFS */ @@ -3452,6 +3461,8 @@ SyncDataDirectory(void) walkdir("pg_tblspc", pre_sync_fname, true, DEBUG1); #endif + InitStartupProgress(); + /* * Now we do the fsync()s in the same order. * @@ -3465,6 +3476,9 @@ SyncDataDirectory(void) if (xlog_is_symlink) walkdir("pg_wal", datadir_fsync_fname, false, LOG); walkdir("pg_tblspc", datadir_fsync_fname, true, LOG); + + ereport_startup_progress(true, + "data directory sync (fsync) complete after %ld.%02d s"); } /* @@ -3583,6 +3597,10 @@ pre_sync_fname(const char *fname, bool isdir, int elevel) static void datadir_fsync_fname(const char *fname, bool isdir, int elevel) { + ereport_startup_progress(false, + "syncing data directory (fsync), elapsed time: %ld.%02d s, current path: %s", + fname); + /* * We want to silently ignoring errors about unreadable files. Pass that * desire on to fsync_fname_ext(). diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c index 40c758d..b4c1182 100644 --- a/src/backend/storage/file/reinit.c +++ b/src/backend/storage/file/reinit.c @@ -16,6 +16,7 @@ #include +#include "access/xlog.h" #include "common/relpath.h" #include "storage/copydir.h" #include "storage/fd.h" @@ -65,6 +66,8 @@ ResetUnloggedRelations(int op) ALLOCSET_DEFAULT_SIZES); oldctx = MemoryContextSwitchTo(tmpctx); + InitStartupProgress(); + /* * First process unlogged files in pg_default ($PGDATA/base) */ @@ -86,6 +89,13 @@ ResetUnloggedRelations(int op) ResetUnloggedRelationsInTablespaceDir(temp_path, op); } + if (op & UNLOGGED_RELATION_INIT) + ereport_startup_progress(true, + "resetting unlogged relations (init) complete after %ld.%02d s"); + else if (op & UNLOGGED_RELATION_CLEANUP) + ereport_startup_progress(true, + "resetting unlogged relations (cleanup) complete after %ld.%02d s"); + FreeDir(spc_dir); /* @@ -136,6 +146,16 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op) snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s", tsdirname, de->d_name); + + if (op & UNLOGGED_RELATION_INIT) + ereport_startup_progress(false, + "resetting unlogged relations (init), elapsed time: %ld.%02d s, current path: %s", + dbspace_path); + else if (op & UNLOGGED_RELATION_CLEANUP) + ereport_startup_progress(false, + "resetting unlogged relations (cleanup), elapsed time: %ld.%02d s, current path: %s", + dbspace_path); + ResetUnloggedRelationsInDbspaceDir(dbspace_path, op); } diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 51d1bbe..6b1dd4b 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -622,6 +622,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, IdleInTransactionSessionTimeoutHandler); RegisterTimeout(IDLE_SESSION_TIMEOUT, IdleSessionTimeoutHandler); RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler); + RegisterTimeout(LOG_STARTUP_PROGRESS_TIMEOUT, LogStartupProgressTimeoutHandler); } /* diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index a2e0f8d..82bbc88 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -3546,6 +3546,18 @@ static struct config_int ConfigureNamesInt[] = check_client_connection_check_interval, NULL, NULL }, + { + {"log_startup_progress_interval", PGC_SIGHUP, LOGGING_WHEN, + gettext_noop("Sets the time interval between each progress update " + "of the startup process."), + gettext_noop("0 logs all messages. -1 turns this feature off."), + GUC_UNIT_S, + }, + &log_startup_progress_interval, + -1, -1, INT_MAX, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index b242a7f..901e4ba 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -523,6 +523,12 @@ # are logged regardless of their duration; 1.0 logs all # statements from all transactions, 0.0 never logs +log_startup_progress_interval = 10 # Sets the time interval between each + # progress update of the startup process. + # -1 disables the feature, 0 logs all + # messages, > 0 indicates the interval in + # seconds. + # - What to Log - #debug_print_parse = off diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 0a8ede7..f71f4e4 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -90,6 +90,7 @@ extern char *PrimaryConnInfo; extern char *PrimarySlotName; extern bool wal_receiver_create_temp_slot; extern bool track_wal_io_timing; +extern int log_startup_progress_interval; /* indirectly set via GUC system */ extern TransactionId recoveryTargetXid; @@ -214,6 +215,18 @@ extern bool XLOG_DEBUG; #define XLOG_MARK_UNIMPORTANT 0x02 /* record not important for durability */ #define XLOG_INCLUDE_XID 0x04 /* include XID of top-level xact */ +/* + * Logs the startup progress message. The FALSE value of force parameter + * indicates the operation is in progress and TRUE indicates the operation is + * completed. + */ +#define ereport_startup_progress(force, msg, ...) \ + do { \ + long secs; \ + int usecs; \ + if (LogStartupProgressTimerExpired(force, &secs, &usecs)) \ + ereport(LOG, errmsg(msg, secs, (usecs / 10000), ##__VA_ARGS__ )); \ + } while(0) /* Checkpoint statistics */ typedef struct CheckpointStatsData @@ -357,6 +370,10 @@ extern void do_pg_abort_backup(int code, Datum arg); extern void register_persistent_abort_backup_handler(void); extern SessionBackupState get_backup_status(void); +extern void InitStartupProgress(void); +extern void LogStartupProgressTimeoutHandler(void); +bool LogStartupProgressTimerExpired(bool force, long *secs, int *usecs); + /* File path names (all relative to $PGDATA) */ #define RECOVERY_SIGNAL_FILE "recovery.signal" #define STANDBY_SIGNAL_FILE "standby.signal" diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h index 93e6a69..ef892dd 100644 --- a/src/include/utils/timeout.h +++ b/src/include/utils/timeout.h @@ -33,6 +33,7 @@ typedef enum TimeoutId IDLE_IN_TRANSACTION_SESSION_TIMEOUT, IDLE_SESSION_TIMEOUT, CLIENT_CONNECTION_CHECK_TIMEOUT, + LOG_STARTUP_PROGRESS_TIMEOUT, /* First user-definable timeout reason */ USER_TIMEOUT, /* Maximum number of timeout reasons */ -- 1.8.3.1