From 2678b810ad212ae36a28d481ace180ca05d498bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Herrera?= Date: Mon, 28 Jul 2025 20:56:20 +0200 Subject: [PATCH 2/3] LogChildExit / HandleChildCrash support (Didn't actually test that bgworkers are doing the expected thing!) Signed-off-by: Jonathan Gonzalez V. --- src/backend/postmaster/postmaster.c | 121 +++++++++--------- src/test/perl/PostgreSQL/Test/Cluster.pm | 2 +- .../postmaster/t/002_connection_limits.pl | 2 +- 3 files changed, 63 insertions(+), 62 deletions(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index e1d643b013d..83cea73fb6c 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -428,8 +428,8 @@ static void process_pm_reload_request(void); static void process_pm_shutdown_request(void); static void dummy_handler(SIGNAL_ARGS); static void CleanupBackend(PMChild *bp, int exitstatus); -static void HandleChildCrash(int pid, int exitstatus, const char *procname); -static void LogChildExit(int lev, const char *procname, +static void HandleChildCrash(int pid, int exitstatus, BackendType proctype, const char *addtype); +static void LogChildExit(int lev, BackendType proctype, const char *addtype, int pid, int exitstatus); static void PostmasterStateMachine(void); static void UpdatePMState(PMState newState); @@ -2288,8 +2288,7 @@ process_pm_child_exit(void) StartupStatus != STARTUP_SIGNALED && !EXIT_STATUS_0(exitstatus)) { - LogChildExit(LOG, _("startup process"), - pid, exitstatus); + LogChildExit(LOG, B_STARTUP, NULL, pid, exitstatus); ereport(LOG, (errmsg("aborting startup due to startup process failure"))); ExitPostmaster(1); @@ -2323,8 +2322,7 @@ process_pm_child_exit(void) } else StartupStatus = STARTUP_CRASHED; - HandleChildCrash(pid, exitstatus, - _("startup process")); + HandleChildCrash(pid, exitstatus, B_STARTUP, NULL); continue; } @@ -2368,8 +2366,7 @@ process_pm_child_exit(void) ReleasePostmasterChildSlot(BgWriterPMChild); BgWriterPMChild = NULL; if (!EXIT_STATUS_0(exitstatus)) - HandleChildCrash(pid, exitstatus, - _("background writer process")); + HandleChildCrash(pid, exitstatus, B_BG_WRITER, NULL); continue; } @@ -2401,8 +2398,7 @@ process_pm_child_exit(void) * Any unexpected exit of the checkpointer (including FATAL * exit) is treated as a crash. */ - HandleChildCrash(pid, exitstatus, - _("checkpointer process")); + HandleChildCrash(pid, exitstatus, B_CHECKPOINTER, NULL); } continue; @@ -2418,8 +2414,7 @@ process_pm_child_exit(void) ReleasePostmasterChildSlot(WalWriterPMChild); WalWriterPMChild = NULL; if (!EXIT_STATUS_0(exitstatus)) - HandleChildCrash(pid, exitstatus, - _("WAL writer process")); + HandleChildCrash(pid, exitstatus, B_WAL_WRITER, NULL); continue; } @@ -2434,8 +2429,7 @@ process_pm_child_exit(void) ReleasePostmasterChildSlot(WalReceiverPMChild); WalReceiverPMChild = NULL; if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) - HandleChildCrash(pid, exitstatus, - _("WAL receiver process")); + HandleChildCrash(pid, exitstatus, B_WAL_RECEIVER, NULL); continue; } @@ -2449,8 +2443,7 @@ process_pm_child_exit(void) ReleasePostmasterChildSlot(WalSummarizerPMChild); WalSummarizerPMChild = NULL; if (!EXIT_STATUS_0(exitstatus)) - HandleChildCrash(pid, exitstatus, - _("WAL summarizer process")); + HandleChildCrash(pid, exitstatus, B_WAL_SUMMARIZER, NULL); continue; } @@ -2465,8 +2458,7 @@ process_pm_child_exit(void) ReleasePostmasterChildSlot(AutoVacLauncherPMChild); AutoVacLauncherPMChild = NULL; if (!EXIT_STATUS_0(exitstatus)) - HandleChildCrash(pid, exitstatus, - _("autovacuum launcher process")); + HandleChildCrash(pid, exitstatus, B_AUTOVAC_LAUNCHER, NULL); continue; } @@ -2481,8 +2473,7 @@ process_pm_child_exit(void) ReleasePostmasterChildSlot(PgArchPMChild); PgArchPMChild = NULL; if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) - HandleChildCrash(pid, exitstatus, - _("archiver process")); + HandleChildCrash(pid, exitstatus, B_ARCHIVER, NULL); continue; } @@ -2497,8 +2488,7 @@ process_pm_child_exit(void) StartSysLogger(); if (!EXIT_STATUS_0(exitstatus)) - LogChildExit(LOG, _("system logger process"), - pid, exitstatus); + LogChildExit(LOG, B_LOGGER, NULL, pid, exitstatus); continue; } @@ -2514,8 +2504,7 @@ process_pm_child_exit(void) ReleasePostmasterChildSlot(SlotSyncWorkerPMChild); SlotSyncWorkerPMChild = NULL; if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) - HandleChildCrash(pid, exitstatus, - _("slot sync worker process")); + HandleChildCrash(pid, exitstatus, B_SLOTSYNC_WORKER, NULL); continue; } @@ -2523,7 +2512,7 @@ process_pm_child_exit(void) if (maybe_reap_io_worker(pid)) { if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) - HandleChildCrash(pid, exitstatus, _("io worker")); + HandleChildCrash(pid, exitstatus, B_IO_WORKER, NULL); maybe_adjust_io_workers(); continue; @@ -2545,9 +2534,9 @@ process_pm_child_exit(void) else { if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) - HandleChildCrash(pid, exitstatus, _("untracked child process")); + HandleChildCrash(pid, exitstatus, B_INVALID, NULL); else - LogChildExit(LOG, _("untracked child process"), pid, exitstatus); + LogChildExit(LOG, B_INVALID, NULL, pid, exitstatus); } } /* loop over pending child-death reports */ @@ -2568,8 +2557,8 @@ static void CleanupBackend(PMChild *bp, int exitstatus) /* child's exit status. */ { - char namebuf[MAXPGPATH]; - const char *procname; + char namebuf[MAXPGPATH]; + char *procname; bool crashed = false; bool logged = false; pid_t bp_pid; @@ -2578,14 +2567,13 @@ CleanupBackend(PMChild *bp, RegisteredBgWorker *rw; /* Construct a process name for the log message */ - if (bp->bkend_type == B_BG_WORKER) + if (bp && bp->bkend_type == B_BG_WORKER && bp->rw) { - snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""), - bp->rw->rw_worker.bgw_type); + strlcpy(namebuf, bp->rw->rw_worker.bgw_type, MAXPGPATH); procname = namebuf; } else - procname = _(GetBackendTypeDesc(bp->bkend_type)); + procname = NULL; /* * If a backend dies in an ugly way then we must signal all other backends @@ -2607,7 +2595,7 @@ CleanupBackend(PMChild *bp, */ if (exitstatus == ERROR_WAIT_NO_CHILDREN) { - LogChildExit(LOG, procname, bp->pid, exitstatus); + LogChildExit(LOG, bp->bkend_type, procname, bp->pid, exitstatus); logged = true; crashed = false; } @@ -2642,7 +2630,7 @@ CleanupBackend(PMChild *bp, */ if (crashed) { - HandleChildCrash(bp_pid, exitstatus, procname); + HandleChildCrash(bp_pid, exitstatus, bp_bkend_type, procname); return; } @@ -2680,7 +2668,7 @@ CleanupBackend(PMChild *bp, if (!logged) { LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG, - procname, bp_pid, exitstatus); + bp_bkend_type, procname, bp_pid, exitstatus); logged = true; } @@ -2689,7 +2677,7 @@ CleanupBackend(PMChild *bp, } if (!logged) - LogChildExit(DEBUG2, procname, bp_pid, exitstatus); + LogChildExit(DEBUG2, bp_bkend_type, procname, bp_pid, exitstatus); } /* @@ -2781,8 +2769,8 @@ HandleFatalError(QuitSignalReason reason, bool consider_sigabrt) } /* - * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer, - * walwriter, autovacuum, archiver, slot sync worker, or background worker. + * HandleChildCrash -- cleanup after failed backend or certain auxiliary + * processes. * * The objectives here are to clean up our local state about the child * process, and to signal all other remaining children to quickdie. @@ -2790,7 +2778,7 @@ HandleFatalError(QuitSignalReason reason, bool consider_sigabrt) * The caller has already released its PMChild slot. */ static void -HandleChildCrash(int pid, int exitstatus, const char *procname) +HandleChildCrash(int pid, int exitstatus, BackendType proctype, const char *addtype) { /* * We only log messages and send signals if this is the first process @@ -2802,7 +2790,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) if (FatalError || Shutdown == ImmediateShutdown) return; - LogChildExit(LOG, procname, pid, exitstatus); + LogChildExit(LOG, proctype, addtype, pid, exitstatus); ereport(LOG, (errmsg("terminating any other active server processes"))); @@ -2815,9 +2803,13 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) /* * Log the death of a child process. + * + * 'addtype' is an additional word or short phrase that describes the process, + * such as a background worker 'type'. */ static void -LogChildExit(int lev, const char *procname, int pid, int exitstatus) +LogChildExit(int lev, BackendType proctype, const char *addtype, int pid, + int exitstatus) { /* * size of activity_buffer is arbitrary, but set equal to default @@ -2832,14 +2824,13 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus) sizeof(activity_buffer)); if (WIFEXITED(exitstatus)) - ereport(lev, + ereport(lev, addtype ? + errmsg("\"%s\" process of type \"%s\" (PID %d) exited with exit code %d", + GetBackendTypeDesc(proctype), addtype, pid, WEXITSTATUS(exitstatus)) : + errmsg("process of type \"%s\" (PID %d) exited with exit code %d", + GetBackendTypeDesc(proctype), pid, WEXITSTATUS(exitstatus)), - /*------ - translator: %s is a noun phrase describing a child process, such as - "server process" */ - (errmsg("%s (PID %d) exited with exit code %d", - procname, pid, WEXITSTATUS(exitstatus)), - activity ? errdetail("Failed process was running: %s", activity) : 0)); + activity ? errdetail("Failed process was running: %s", activity) : 0); else if (WIFSIGNALED(exitstatus)) { #if defined(WIN32) @@ -2848,20 +2839,27 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus) /*------ translator: %s is a noun phrase describing a child process, such as "server process" */ - (errmsg("%s (PID %d) was terminated by exception 0x%X", - procname, pid, WTERMSIG(exitstatus)), - errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."), - activity ? errdetail("Failed process was running: %s", activity) : 0)); + addtype ? + errmsg("\"%s\" process of type \"%s\" (PID %d) was terminated by exception 0x%X", + GetBackendTypeDesc(proctype), addtype, pid, WTERMSIG(exitstatus)) : + errmsg("\"%s\" process (PID %d) was terminated by exception 0x%X", + GetBackendTypeDesc(proctype), addtype, pid, WTERMSIG(exitstatus)), + errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."), + activity ? errdetail("Failed process was running: %s", activity) : 0); #else ereport(lev, /*------ translator: %s is a noun phrase describing a child process, such as "server process" */ - (errmsg("%s (PID %d) was terminated by signal %d: %s", - procname, pid, WTERMSIG(exitstatus), - pg_strsignal(WTERMSIG(exitstatus))), - activity ? errdetail("Failed process was running: %s", activity) : 0)); + addtype ? + errmsg("\"%s\" process of type \"%s\" (PID %d) was terminated by signal %d: %s", + GetBackendTypeDesc(proctype), addtype, pid, WTERMSIG(exitstatus), + pg_strsignal(WTERMSIG(exitstatus))) : + errmsg("\"%s\" process (PID %d) was terminated by signal %d: %s", + GetBackendTypeDesc(proctype), pid, WTERMSIG(exitstatus), + pg_strsignal(WTERMSIG(exitstatus))), + activity ? errdetail("Failed process was running: %s", activity) : 0); #endif } else @@ -2870,9 +2868,12 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus) /*------ translator: %s is a noun phrase describing a child process, such as "server process" */ - (errmsg("%s (PID %d) exited with unrecognized status %d", - procname, pid, exitstatus), - activity ? errdetail("Failed process was running: %s", activity) : 0)); + addtype ? + errmsg("\"%s\" process of type \"%s\" (PID %d) exited with unrecognized status %d", + GetBackendTypeDesc(proctype), addtype, pid, exitstatus) : + errmsg("\"%s\" process (PID %d) exited with unrecognized status %d", + GetBackendTypeDesc(proctype), pid, exitstatus), + activity ? errdetail("Failed process was running: %s", activity) : 0); } /* diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index 35413f14019..92c5b06868f 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -2696,7 +2696,7 @@ sub connect_fails if (defined($params{log_like}) or defined($params{log_unlike})) { $self->wait_for_log( - qr/DEBUG: (?:00000: )?forked new client backend, pid=(\d+) socket.*DEBUG: (?:00000: )?client backend \(PID \1\) exited with exit code \d/s, + qr/DEBUG: (?:00000: )?forked new client backend, pid=(\d+) socket.*DEBUG: (?:00000: )?process of type \"client backend\" \(PID \1\) exited with exit code \d/s, $log_location); $self->log_check($test_name, $log_location, %params); diff --git a/src/test/postmaster/t/002_connection_limits.pl b/src/test/postmaster/t/002_connection_limits.pl index 4a7fb16261f..a67ae77d59a 100644 --- a/src/test/postmaster/t/002_connection_limits.pl +++ b/src/test/postmaster/t/002_connection_limits.pl @@ -69,7 +69,7 @@ sub connect_fails_wait $node->connect_fails($connstr, $test_name, %params); $node->wait_for_log( - qr/DEBUG: (00000: )?client backend.*exited with exit code 1/, + qr/DEBUG: (00000: )?process of type "client backend".*exited with exit code 1/, $log_location); ok(1, "$test_name: client backend process exited"); } -- 2.48.1