Re: [HACKERS] Unportable implementation of background worker start - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: [HACKERS] Unportable implementation of background worker start |
Date | |
Msg-id | 4905.1492813727@sss.pgh.pa.us Whole thread Raw |
In response to | Re: [HACKERS] Unportable implementation of background worker start (Alvaro Herrera <alvherre@2ndquadrant.com>) |
List | pgsql-hackers |
Alvaro Herrera <alvherre@2ndquadrant.com> writes: > Tom Lane wrote: >> It also appears to me that do_start_bgworker's treatment of fork >> failure is completely brain dead. Did anyone really think about >> that case? > Hmm, I probably modelled it on autovacuum without giving that case much > additional consideration. Attached is a proposed patch that should make fork failure behave sanely, ie it works much the same as a worker crash immediately after launch. I also refactored things a bit to make do_start_bgworker fully responsible for updating the RegisteredBgWorker's state, rather than doing just some of it as before. I tested this by hot-wiring the fork_process call to fail some of the time, which showed that the postmaster now seems to recover OK, but parallel.c's logic is completely innocent of the idea that worker-startup failure is possible. The leader backend just freezes, and nothing short of kill -9 on that backend will get you out of it. Fixing that seems like material for a separate patch though. regards, tom lane diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index c382345..8461c24 100644 *** a/src/backend/postmaster/postmaster.c --- b/src/backend/postmaster/postmaster.c *************** static void TerminateChildren(int signal *** 420,425 **** --- 420,426 ---- #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL) static int CountChildren(int target); + static bool assign_backendlist_entry(RegisteredBgWorker *rw); static void maybe_start_bgworker(void); static bool CreateOptsFile(int argc, char *argv[], char *fullprogname); static pid_t StartChildProcess(AuxProcType type); *************** bgworker_forkexec(int shmem_slot) *** 5531,5543 **** * Start a new bgworker. * Starting time conditions must have been checked already. * * This code is heavily based on autovacuum.c, q.v. */ ! static void do_start_bgworker(RegisteredBgWorker *rw) { pid_t worker_pid; ereport(DEBUG1, (errmsg("starting background worker process \"%s\"", rw->rw_worker.bgw_name))); --- 5532,5564 ---- * Start a new bgworker. * Starting time conditions must have been checked already. * + * Returns true on success, false on failure. + * In either case, update the RegisteredBgWorker's state appropriately. + * * This code is heavily based on autovacuum.c, q.v. */ ! static bool do_start_bgworker(RegisteredBgWorker *rw) { pid_t worker_pid; + Assert(rw->rw_pid == 0); + + /* + * Allocate and assign the Backend element. Note we must do this before + * forking, so that we can handle out of memory properly. + * + * Treat failure as though the worker had crashed. That way, the + * postmaster will wait a bit before attempting to start it again; if it + * tried again right away, most likely it'd find itself repeating the + * out-of-memory or fork failure condition. + */ + if (!assign_backendlist_entry(rw)) + { + rw->rw_crashed_at = GetCurrentTimestamp(); + return false; + } + ereport(DEBUG1, (errmsg("starting background worker process \"%s\"", rw->rw_worker.bgw_name))); *************** do_start_bgworker(RegisteredBgWorker *rw *** 5549,5557 **** #endif { case -1: ereport(LOG, (errmsg("could not fork worker process: %m"))); ! return; #ifndef EXEC_BACKEND case 0: --- 5570,5586 ---- #endif { case -1: + /* in postmaster, fork failed ... */ ereport(LOG, (errmsg("could not fork worker process: %m"))); ! /* undo what assign_backendlist_entry did */ ! ReleasePostmasterChildSlot(rw->rw_child_slot); ! rw->rw_child_slot = 0; ! free(rw->rw_backend); ! rw->rw_backend = NULL; ! /* mark entry as crashed, so we'll try again later */ ! rw->rw_crashed_at = GetCurrentTimestamp(); ! break; #ifndef EXEC_BACKEND case 0: *************** do_start_bgworker(RegisteredBgWorker *rw *** 5575,5588 **** PostmasterContext = NULL; StartBackgroundWorker(); break; #endif default: rw->rw_pid = worker_pid; rw->rw_backend->pid = rw->rw_pid; ReportBackgroundWorkerPID(rw); ! break; } } /* --- 5604,5627 ---- PostmasterContext = NULL; StartBackgroundWorker(); + + exit(1); /* should not get here */ break; #endif default: + /* in postmaster, fork successful ... */ rw->rw_pid = worker_pid; rw->rw_backend->pid = rw->rw_pid; ReportBackgroundWorkerPID(rw); ! /* add new worker to lists of backends */ ! dlist_push_head(&BackendList, &rw->rw_backend->elem); ! #ifdef EXEC_BACKEND ! ShmemBackendArrayAdd(rw->rw_backend); ! #endif ! return true; } + + return false; } /* *************** bgworker_should_start_now(BgWorkerStartT *** 5629,5634 **** --- 5668,5675 ---- * Allocate the Backend struct for a connected background worker, but don't * add it to the list of backends just yet. * + * On failure, return false without changing any worker state. + * * Some info from the Backend is copied into the passed rw. */ static bool *************** assign_backendlist_entry(RegisteredBgWor *** 5647,5654 **** ereport(LOG, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("could not generate random cancel key"))); - - rw->rw_crashed_at = GetCurrentTimestamp(); return false; } --- 5688,5693 ---- *************** assign_backendlist_entry(RegisteredBgWor *** 5658,5671 **** ereport(LOG, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); - - /* - * The worker didn't really crash, but setting this nonzero makes - * postmaster wait a bit before attempting to start it again; if it - * tried again right away, most likely it'd find itself under the same - * memory pressure. - */ - rw->rw_crashed_at = GetCurrentTimestamp(); return false; } --- 5697,5702 ---- *************** assign_backendlist_entry(RegisteredBgWor *** 5687,5692 **** --- 5718,5728 ---- * As a side effect, the bgworker control variables are set or reset whenever * there are more workers to start after this one, and whenever the overall * system state requires it. + * + * The reason we start at most one worker per call is to avoid consuming the + * postmaster's attention for too long when many such requests are pending. + * As long as StartWorkerNeeded is true, ServerLoop will not block and will + * call this function again after dealing with any other issues. */ static void maybe_start_bgworker(void) *************** maybe_start_bgworker(void) *** 5694,5706 **** slist_mutable_iter iter; TimestampTz now = 0; if (FatalError) { StartWorkerNeeded = false; HaveCrashedWorker = false; ! return; /* not yet */ } HaveCrashedWorker = false; slist_foreach_modify(iter, &BackgroundWorkerList) --- 5730,5748 ---- slist_mutable_iter iter; TimestampTz now = 0; + /* + * During crash recovery, we have no need to be called until the state + * transition out of recovery. + */ if (FatalError) { StartWorkerNeeded = false; HaveCrashedWorker = false; ! return; } + /* Don't need to be called again unless we find a reason for it below */ + StartWorkerNeeded = false; HaveCrashedWorker = false; slist_foreach_modify(iter, &BackgroundWorkerList) *************** maybe_start_bgworker(void) *** 5709,5719 **** rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur); ! /* already running? */ if (rw->rw_pid != 0) continue; ! /* marked for death? */ if (rw->rw_terminate) { ForgetBackgroundWorker(&iter); --- 5751,5761 ---- rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur); ! /* ignore if already running */ if (rw->rw_pid != 0) continue; ! /* if marked for death, clean up and remove from list */ if (rw->rw_terminate) { ForgetBackgroundWorker(&iter); *************** maybe_start_bgworker(void) *** 5735,5746 **** --- 5777,5790 ---- continue; } + /* read system time only when needed */ if (now == 0) now = GetCurrentTimestamp(); if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now, rw->rw_worker.bgw_restart_time * 1000)) { + /* Set flag to remember that we have workers to start later */ HaveCrashedWorker = true; continue; } *************** maybe_start_bgworker(void) *** 5748,5782 **** if (bgworker_should_start_now(rw->rw_worker.bgw_start_time)) { ! /* reset crash time before calling assign_backendlist_entry */ rw->rw_crashed_at = 0; /* ! * Allocate and assign the Backend element. Note we must do this ! * before forking, so that we can handle out of memory properly. */ ! if (!assign_backendlist_entry(rw)) return; ! ! do_start_bgworker(rw); /* sets rw->rw_pid */ ! ! dlist_push_head(&BackendList, &rw->rw_backend->elem); ! #ifdef EXEC_BACKEND ! ShmemBackendArrayAdd(rw->rw_backend); ! #endif /* ! * Have ServerLoop call us again. Note that there might not ! * actually *be* another runnable worker, but we don't care all ! * that much; we will find out the next time we run. */ StartWorkerNeeded = true; return; } } - - /* no runnable worker found */ - StartWorkerNeeded = false; } /* --- 5792,5826 ---- if (bgworker_should_start_now(rw->rw_worker.bgw_start_time)) { ! /* reset crash time before trying to start worker */ rw->rw_crashed_at = 0; /* ! * Try to start the worker. ! * ! * On failure, give up processing workers for now, but set ! * StartWorkerNeeded so we'll come back here on the next iteration ! * of ServerLoop to try again. (We don't want to wait, because ! * there might be additional ready-to-run workers.) We could set ! * HaveCrashedWorker as well, since this worker is now marked ! * crashed, but there's no need because the next run of this ! * function will do that. */ ! if (!do_start_bgworker(rw)) ! { ! StartWorkerNeeded = true; return; ! } /* ! * Quit, but have ServerLoop call us again to look for additional ! * ready-to-run workers. There might not be any, but we'll find ! * out the next time we run. */ StartWorkerNeeded = true; return; } } } /* -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
pgsql-hackers by date: