From deee881ab6651dde633d0d53c9bf81d67135ac04 Mon Sep 17 00:00:00 2001 From: Kuntal Ghosh Date: Wed, 5 Apr 2017 14:10:14 +0530 Subject: [PATCH] Fix parallel worker counts after a crash Number of terminated parallel workers should be at least the number of registered parallel worker. When ForgetBackgroundWorker is called due to a bgworker crash, we should not increase the terminated parallel worker count; --- src/backend/postmaster/bgworker.c | 16 +++++++++++++--- src/backend/postmaster/postmaster.c | 6 +++--- src/include/postmaster/bgworker_internals.h | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c index 0823317..59b13fc 100644 --- a/src/backend/postmaster/bgworker.c +++ b/src/backend/postmaster/bgworker.c @@ -399,10 +399,12 @@ BackgroundWorkerStateChange(void) * points to it. This convention allows deletion of workers during * searches of the worker list, and saves having to search the list again. * + * wasCrashed indicates whether the worker crashed previously. + * * This function must be invoked only in the postmaster. */ void -ForgetBackgroundWorker(slist_mutable_iter *cur) +ForgetBackgroundWorker(slist_mutable_iter *cur, bool wasCrashed) { RegisteredBgWorker *rw; BackgroundWorkerSlot *slot; @@ -412,7 +414,15 @@ ForgetBackgroundWorker(slist_mutable_iter *cur) Assert(rw->rw_shmem_slot < max_worker_processes); slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot]; if ((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0) - BackgroundWorkerData->parallel_terminate_count++; + { + /* + * If the worker crashed previously, shared memory must have been + * initialized. Hence, we don't increase the terminate count in + * that case. + */ + if (!wasCrashed) + BackgroundWorkerData->parallel_terminate_count++; + } slot->in_use = false; @@ -471,7 +481,7 @@ ReportBackgroundWorkerExit(slist_mutable_iter *cur) */ if (rw->rw_terminate || rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART) - ForgetBackgroundWorker(cur); + ForgetBackgroundWorker(cur, false); if (notify_pid != 0) kill(notify_pid, SIGUSR1); diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 6831342..aa7ccf3 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -1601,7 +1601,7 @@ DetermineSleepTime(struct timeval * timeout) if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART || rw->rw_terminate) { - ForgetBackgroundWorker(&siter); + ForgetBackgroundWorker(&siter, false); continue; } @@ -5716,7 +5716,7 @@ maybe_start_bgworker(void) /* marked for death? */ if (rw->rw_terminate) { - ForgetBackgroundWorker(&iter); + ForgetBackgroundWorker(&iter, false); continue; } @@ -5731,7 +5731,7 @@ maybe_start_bgworker(void) { if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART) { - ForgetBackgroundWorker(&iter); + ForgetBackgroundWorker(&iter, true); continue; } diff --git a/src/include/postmaster/bgworker_internals.h b/src/include/postmaster/bgworker_internals.h index 9a2de4f..f50b2b1 100644 --- a/src/include/postmaster/bgworker_internals.h +++ b/src/include/postmaster/bgworker_internals.h @@ -40,7 +40,7 @@ extern slist_head BackgroundWorkerList; extern Size BackgroundWorkerShmemSize(void); extern void BackgroundWorkerShmemInit(void); extern void BackgroundWorkerStateChange(void); -extern void ForgetBackgroundWorker(slist_mutable_iter *cur); +extern void ForgetBackgroundWorker(slist_mutable_iter *cur, bool wasCrashed); extern void ReportBackgroundWorkerPID(RegisteredBgWorker *); extern void ReportBackgroundWorkerExit(slist_mutable_iter *cur); extern void BackgroundWorkerStopNotifications(pid_t pid); -- 1.8.3.1