From 9443005040be52225498d58678b5faa1668bd2ad Mon Sep 17 00:00:00 2001 From: Nitin Date: Fri, 23 Jul 2021 15:46:56 +0530 Subject: [PATCH 1/2] Shows the progress of the operations performed during startup process. The interval between each progress update is configurable and it should be mentioned in seconds --- src/backend/access/transam/xlog.c | 178 ++++++++++++++++++ src/backend/postmaster/startup.c | 1 + src/backend/storage/file/fd.c | 13 ++ src/backend/storage/file/reinit.c | 18 ++ src/backend/utils/misc/guc.c | 13 +- src/backend/utils/misc/postgresql.conf.sample | 6 + src/include/access/xlog.h | 18 ++ src/include/utils/timeout.h | 1 + 8 files changed, 247 insertions(+), 1 deletion(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 3479402272..4f052050f3 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -79,6 +79,7 @@ #include "utils/relmapper.h" #include "utils/pg_rusage.h" #include "utils/snapmgr.h" +#include "utils/timeout.h" #include "utils/timestamp.h" extern uint32 bootstrap_data_checksum_version; @@ -397,6 +398,23 @@ static bool doRequestWalReceiverReply; */ static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr; +/* + * Start timestamp of the operation that occur during startup process. + */ +static TimestampTz startupProcessOpStartTime; + +/* + * Indicates whether the startup progress interval mentioned by the user is + * elapsed or not. TRUE if timeout occurred, FALSE otherwise. + */ +static bool logStartupProgressTimeout; + +/* + * The interval between each progress update of the operations that occur + * during startup process. + */ +int log_startup_progress_interval; + /*---------- * Shared-memory data structures for XLOG control * @@ -7351,6 +7369,8 @@ StartupXLOG(void) (errmsg("redo starts at %X/%X", LSN_FORMAT_ARGS(ReadRecPtr)))); + InitStartupProgress(); + /* * main redo apply loop */ @@ -7358,6 +7378,8 @@ StartupXLOG(void) { bool switchedTLI = false; + LogStartupProgress(STARTUP_PROCESS_OP_REDO, NULL); + #ifdef WAL_DEBUG if (XLOG_DEBUG || (rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) || @@ -13021,3 +13043,159 @@ XLogRequestWalReceiverReply(void) { doRequestWalReceiverReply = true; } + +/* + * Schedule a wakeup call for logging the progress of startup process. + */ +void +LogStartupProgressTimeoutHandler(void) +{ + logStartupProgressTimeout = true; +} + +/* + * Sets the start timestamp of the current operation and also enables the + * timeout for logging the progress of startup process. + */ +void +InitStartupProgress(void) +{ + /* + * If not called from startup process then just return without + * initialization. + */ + if (!AmStartupProcess()) + return; + + startupProcessOpStartTime = GetCurrentTimestamp(); + enable_timeout_after(LOG_STARTUP_PROGRESS_TIMEOUT, + (log_startup_progress_interval * 1000)); +} + +/* + * Logs the progress of the operations performed during startup process. + */ +void +LogStartupProgress(StartupProcessOp operation, const char *path) +{ + long secs; + int usecs; + int elapsed_ms; + int interval_in_ms; + + /* If not called from startup process then this feature is of no use. */ + if (!AmStartupProcess()) + return; + + /* If the feature is disabled, then no need to proceed further. */ + if (log_startup_progress_interval < 0) + return; + + /* If the timeout has not occurred, then no need to log the details. */ + if (!logStartupProgressTimeout) + return; + + /* Timeout has occurred. */ + TimestampDifference(startupProcessOpStartTime, + GetCurrentTimestamp(), + &secs, &usecs); + + /* + * Enable the timer for the next log message based on the time that current + * log message timer was supposed to fire. + */ + elapsed_ms = (secs * 1000) + (usecs / 1000); + interval_in_ms = log_startup_progress_interval * 1000; + enable_timeout_after(LOG_STARTUP_PROGRESS_TIMEOUT, + (interval_in_ms - (elapsed_ms % interval_in_ms))); + + switch(operation) + { + case STARTUP_PROCESS_OP_SYNCFS: + ereport(LOG, + (errmsg("syncing data directory (syncfs), elapsed time: %ld.%02d s, current path: %s", + secs, (usecs / 10000), path))); + break; + case STARTUP_PROCESS_OP_FSYNC: + ereport(LOG, + (errmsg("syncing data directory (fsync), elapsed time: %ld.%02d s, current path: %s", + secs, (usecs / 10000), path))); + break; + case STARTUP_PROCESS_OP_REDO: + ereport(LOG, + (errmsg("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X", + secs, (usecs / 10000), LSN_FORMAT_ARGS(ReadRecPtr)))); + break; + case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT: + ereport(LOG, + (errmsg("resetting unlogged relations (init), elapsed time: %ld.%02d s, current path: %s", + secs, (usecs / 10000), path))); + break; + case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP: + ereport(LOG, + (errmsg("resetting unlogged relations (cleanup), elapsed time: %ld.%02d s, current path: %s", + secs, (usecs / 10000), path))); + break; + default: + ereport(ERROR, + (errmsg("unrecognized operation (%d) in startup progress update", + operation))); + break; + } + + logStartupProgressTimeout = false; +} + +/* + * Logs the completion of the operation performed during startup process and + * disables the timeout used for logging the progress. + */ +void +LogStartupProgressComplete(StartupProcessOp operation) +{ + long secs; + int usecs; + + /* If not called from startup process then this feature is of no use. */ + if (!AmStartupProcess()) + return; + + /* If the feature is disabled, then no need to proceed further. */ + if (log_startup_progress_interval < 0) + return; + + TimestampDifference(startupProcessOpStartTime, + GetCurrentTimestamp(), + &secs, &usecs); + + switch(operation) + { + case STARTUP_PROCESS_OP_SYNCFS: + ereport(LOG, + (errmsg("data directory sync (syncfs) complete after %ld.%02d s", + secs, (usecs / 10000)))); + break; + case STARTUP_PROCESS_OP_FSYNC: + ereport(LOG, + (errmsg("data directory sync (fsync) complete after %ld.%02d s", + secs, (usecs / 10000)))); + break; + case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT: + ereport(LOG, + (errmsg("resetting unlogged relations (init) complete after %ld.%02d s", + secs, (usecs / 10000)))); + break; + case STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP: + ereport(LOG, + (errmsg("resetting unlogged relations (cleanup) complete after %ld.%02d s", + secs, (usecs / 10000)))); + break; + default: + ereport(ERROR, + (errmsg("unrecognized operation (%d) in startup progress update", + operation))); + break; + } + + disable_timeout(LOG_STARTUP_PROGRESS_TIMEOUT, false); +} diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c index 69077bd207..08c271cad6 100644 --- a/src/backend/postmaster/startup.c +++ b/src/backend/postmaster/startup.c @@ -232,6 +232,7 @@ StartupProcessMain(void) RegisterTimeout(STANDBY_DEADLOCK_TIMEOUT, StandbyDeadLockHandler); RegisterTimeout(STANDBY_TIMEOUT, StandbyTimeoutHandler); RegisterTimeout(STANDBY_LOCK_TIMEOUT, StandbyLockTimeoutHandler); + RegisterTimeout(LOG_STARTUP_PROGRESS_TIMEOUT, LogStartupProgressTimeoutHandler); /* * Unblock signals (they were blocked when the postmaster forked us) diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 5d5e8ae94e..49ecbbd3b9 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -100,6 +100,7 @@ #include "storage/ipc.h" #include "utils/guc.h" #include "utils/resowner_private.h" +#include "utils/timeout.h" /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */ #if defined(HAVE_SYNC_FILE_RANGE) @@ -3334,6 +3335,8 @@ do_syncfs(const char *path) { int fd; + LogStartupProgress(STARTUP_PROCESS_OP_SYNCFS, path); + fd = OpenTransientFile(path, O_RDONLY); if (fd < 0) { @@ -3410,6 +3413,8 @@ SyncDataDirectory(void) DIR *dir; struct dirent *de; + InitStartupProgress(); + /* * On Linux, we don't have to open every single file one by one. We * can use syncfs() to sync whole filesystems. We only expect @@ -3436,6 +3441,8 @@ SyncDataDirectory(void) /* If pg_wal is a symlink, process that too. */ if (xlog_is_symlink) do_syncfs("pg_wal"); + + LogStartupProgressComplete(STARTUP_PROCESS_OP_SYNCFS); return; } #endif /* !HAVE_SYNCFS */ @@ -3452,6 +3459,8 @@ SyncDataDirectory(void) walkdir("pg_tblspc", pre_sync_fname, true, DEBUG1); #endif + InitStartupProgress(); + /* * Now we do the fsync()s in the same order. * @@ -3465,6 +3474,8 @@ SyncDataDirectory(void) if (xlog_is_symlink) walkdir("pg_wal", datadir_fsync_fname, false, LOG); walkdir("pg_tblspc", datadir_fsync_fname, true, LOG); + + LogStartupProgressComplete(STARTUP_PROCESS_OP_FSYNC); } /* @@ -3505,6 +3516,8 @@ walkdir(const char *path, snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name); + LogStartupProgress(STARTUP_PROCESS_OP_FSYNC, path); + switch (get_dirent_type(subpath, de, process_symlinks, elevel)) { case PGFILETYPE_REG: diff --git a/src/backend/storage/file/reinit.c b/src/backend/storage/file/reinit.c index 40c758d789..ba18996d98 100644 --- a/src/backend/storage/file/reinit.c +++ b/src/backend/storage/file/reinit.c @@ -16,12 +16,14 @@ #include +#include "access/xlog.h" #include "common/relpath.h" #include "storage/copydir.h" #include "storage/fd.h" #include "storage/reinit.h" #include "utils/hsearch.h" #include "utils/memutils.h" +#include "utils/timeout.h" static void ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op); @@ -65,6 +67,8 @@ ResetUnloggedRelations(int op) ALLOCSET_DEFAULT_SIZES); oldctx = MemoryContextSwitchTo(tmpctx); + InitStartupProgress(); + /* * First process unlogged files in pg_default ($PGDATA/base) */ @@ -75,6 +79,7 @@ ResetUnloggedRelations(int op) */ spc_dir = AllocateDir("pg_tblspc"); + while ((spc_de = ReadDir(spc_dir, "pg_tblspc")) != NULL) { if (strcmp(spc_de->d_name, ".") == 0 || @@ -86,6 +91,11 @@ ResetUnloggedRelations(int op) ResetUnloggedRelationsInTablespaceDir(temp_path, op); } + if (op & UNLOGGED_RELATION_INIT) + LogStartupProgressComplete(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT); + else if (op & UNLOGGED_RELATION_CLEANUP) + LogStartupProgressComplete(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP); + FreeDir(spc_dir); /* @@ -136,6 +146,14 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op) snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s", tsdirname, de->d_name); + + if (op & UNLOGGED_RELATION_INIT) + LogStartupProgress(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT, + dbspace_path); + else if (op & UNLOGGED_RELATION_CLEANUP) + LogStartupProgress(STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP, + dbspace_path); + ResetUnloggedRelationsInDbspaceDir(dbspace_path, op); } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index a2e0f8de7e..d0ea7de8ff 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -679,7 +679,6 @@ static char *recovery_target_lsn_string; /* should be static, but commands/variable.c needs to get at this */ char *role_string; - /* * Displayable names for context types (enum GucContext) * @@ -3546,6 +3545,18 @@ static struct config_int ConfigureNamesInt[] = check_client_connection_check_interval, NULL, NULL }, + { + {"log_startup_progress_interval", PGC_SIGHUP, LOGGING_WHEN, + gettext_noop("Sets the time interval between each progress update " + "of the startup process."), + gettext_noop("0 logs all messages. -1 turns this feature off."), + GUC_UNIT_S, + }, + &log_startup_progress_interval, + -1, -1, INT_MAX, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index b242a7fc8b..901e4ba461 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -523,6 +523,12 @@ # are logged regardless of their duration; 1.0 logs all # statements from all transactions, 0.0 never logs +log_startup_progress_interval = 10 # Sets the time interval between each + # progress update of the startup process. + # -1 disables the feature, 0 logs all + # messages, > 0 indicates the interval in + # seconds. + # - What to Log - #debug_print_parse = off diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index ccfcf43d62..f17d91bf9c 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -132,6 +132,7 @@ extern char *PrimaryConnInfo; extern char *PrimarySlotName; extern bool wal_receiver_create_temp_slot; extern bool track_wal_io_timing; +extern int log_startup_progress_interval; /* indirectly set via GUC system */ extern TransactionId recoveryTargetXid; @@ -295,6 +296,18 @@ typedef enum WALAvailability WALAVAIL_REMOVED /* WAL segment has been removed */ } WALAvailability; +/* + * Codes of the operations performed during startup process + */ +typedef enum StartupProcessOp +{ + STARTUP_PROCESS_OP_SYNCFS, + STARTUP_PROCESS_OP_FSYNC, + STARTUP_PROCESS_OP_REDO, + STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_INIT, + STARTUP_PROCESS_OP_RESET_UNLOGGED_REL_CLEANUP +} StartupProcessOp; + struct XLogRecData; extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, @@ -399,6 +412,11 @@ extern void do_pg_abort_backup(int code, Datum arg); extern void register_persistent_abort_backup_handler(void); extern SessionBackupState get_backup_status(void); +extern void InitStartupProgress(void); +extern void LogStartupProgress(StartupProcessOp operation, const char *path); +extern void LogStartupProgressComplete(StartupProcessOp operation); +extern void LogStartupProgressTimeoutHandler(void); + /* File path names (all relative to $PGDATA) */ #define RECOVERY_SIGNAL_FILE "recovery.signal" #define STANDBY_SIGNAL_FILE "standby.signal" diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h index 93e6a691b3..ef892dd647 100644 --- a/src/include/utils/timeout.h +++ b/src/include/utils/timeout.h @@ -33,6 +33,7 @@ typedef enum TimeoutId IDLE_IN_TRANSACTION_SESSION_TIMEOUT, IDLE_SESSION_TIMEOUT, CLIENT_CONNECTION_CHECK_TIMEOUT, + LOG_STARTUP_PROGRESS_TIMEOUT, /* First user-definable timeout reason */ USER_TIMEOUT, /* Maximum number of timeout reasons */ -- 2.17.0