Thread: PL/Perl backed crashed during spi_exec_query
Hi, One of our customers is running 8.2.14 and use a couple of pl/perl and pl/perlu functions written by CMD. Everything worked normally until they tried to call one particular pl/perl function from pl/perl via spi. It appears that a die call inside the callee just crashes the backend. Here is the simple example: CREATE OR REPLACE FUNCTION caller() RETURNS VOID AS $$my $res = spi_exec_query('select callee()'); $$ LANGUAGE plperlu; CREATE OR REPLACE FUNCTION callee() RETURNS VOID AS $$die "callee died"; $$ LANGUAGE plperl; On my system (8.2.14 running OS X 10.6.1, perl 5.8.9 installed from macports, relevant flags from perl -V: usethreads=define use5005threads=undef useithreads=define usemultiplicity=define ): postgres=# select callee(); ERROR: error from Perl function: callee died at line 2. postgres=# select caller(); server closed the connection unexpectedlyThis probably means the server terminated abnormallybefore or while processing therequest. The connection to the server was lost. Attempting reset: Failed. and in the server log: ERROR: error from Perl function: callee died at line 2. STATEMENT: select callee(); error from Perl function: callee died at line 2.. LOG: server process (PID 36132) exited with exit code 255 LOG: terminating any other active server processes FATAL: the database system is in recovery mode LOG: all server processes terminated; reinitializing Here is gdb output with a backtrace of the process just before it exits. It seems to terminate on croak call. The edata structure seems to b valid, so I suspect there is something with the interpreter that is executing croak. Breakpoint 1, plperl_spi_exec (query=0x10053d650 "select callee()", limit=0) at plperl.c:1814 1814 SPI_restore_connection(); (gdb) n 1817 croak("%s", edata->message); (gdb) p edata $1 = (ErrorData *) 0x100949290 (gdb) p *edata $2 = { elevel = 20, output_to_server = 1 '\001', output_to_client = 1 '\001', show_funcname = 0 '\0', filename = 0x100684b08"plperl.c", lineno = 1131, funcname = 0x1006860af "plperl_call_perl_func", sqlerrcode = 2600, message = 0x100949238"error from Perl function: callee died at line 2.", detail = 0x0, hint = 0x0, context = 0x100949328 "SQL statement \"select callee()\"", cursorpos = 0, internalpos= 0, internalquery = 0x0, saved_errno = 0 } (gdb) bt #0 plperl_spi_exec (query=0x10053d650 "select callee()", limit=0) at plperl.c:1817 #1 0x00000001006831c1 in XS__spi_exec_query (my_perl=<value temporarily unavailable, due to optimizations>, cv=<value temporarily unavailable, due to optimizations>) at SPI.xs:118 #2 0x00000001007056d2 in Perl_pp_entersub () #3 0x00000001006fdbba in Perl_runops_standard () #4 0x00000001006f7c8d in Perl_call_sv () #5 0x000000010067cec4 in plperl_call_perl_func (desc=0x10093f600, fcinfo=0x7fff5fbfda20) at plperl.c:1110 #6 0x000000010067fdc9 in plperl_func_handler [inlined] () at /private/ tmp/postgresql-8.2.14/src/pl/plperl/plperl.c:1240 #7 0x000000010067fdc9 in plperl_call_handler (fcinfo=0x7fff5fbfda20) at plperl.c:858 #8 0x00000001000f8f0e in ExecMakeFunctionResult (fcache=0x1008ecab0, econtext=0x1008ec980, isNull=0x1008f06a8 "", isDone=0x1008f06c8) at execQual.c:1340 #9 0x00000001000f6c2f in ExecTargetList [inlined] () at /private/tmp/ postgresql-8.2.14/src/backend/executor/execQual.c:4190 #10 0x00000001000f6c2f in ExecProject (projInfo=<value temporarily unavailable, due to optimizations>, isDone=0x7fff5fbfde9c) at execQual.c:4391 #11 0x000000010010a2c3 in ExecResult (node=0x1008ec868) at nodeResult.c:157 #12 0x00000001000f63b2 in ExecProcNode (node=0x1008ec868) at execProcnode.c:334 #13 0x00000001000f51fc in ExecutePlan [inlined] () at /private/tmp/ postgresql-8.2.14/src/backend/executor/execMain.c:1172 #14 0x00000001000f51fc in ExecutorRun (queryDesc=<value temporarily unavailable, due to optimizations>, direction=ForwardScanDirection, count=0) at execMain.c:244 #15 0x0000000100195099 in PortalRunSelect (portal=0x1008cc438, forward=<value temporarily unavailable, due to optimizations>, count=0, dest=0x1008b39b0) at pquery.c:831 #16 0x0000000100196d8f in PortalRun (portal=0x1008cc438, count=9223372036854775807, dest=0x1008b39b0, altdest=0x1008b39b0, completionTag=0x7fff5fbfe260 "") at pquery.c:656 #17 0x0000000100191a95 in exec_simple_query (query_string=0x1008b3238 "select caller();") at postgres.c:957 #18 0x0000000100192c19 in PostgresMain (argc=4, argv=0x100836300, username=0x10081bdf8 "alexk") at postgres.c:3472 #19 0x0000000100164254 in ServerLoop () at postmaster.c:2934 #20 0x00000001001650c9 in PostmasterMain (argc=3, argv=0x100500470) at postmaster.c:966 #21 0x0000000100119f89 in main (argc=3, argv=0x100500470) at main.c:188 (gdb) s Program exited with code 0377. -- Alexey Klyukin http://www.CommandPrompt.com/ The PostgreSQL Company - Command Prompt, Inc
Alexey Klyukin <alexk@commandprompt.com> writes: > One of our customers is running 8.2.14 and use a couple of pl/perl and > pl/perlu functions written by CMD. Everything worked normally until > they tried to call one particular pl/perl function from pl/perl via > spi. It appears that a die call inside the callee just crashes the > backend. I think the critical point is actually that you're calling plperl from plperlu, and we're being careless about restoring the former interpreter selection on error exit. The attached patch moves the responsibility for that into plperl_call_handler, which already has a suitable PG_TRY block. regards, tom lane Index: plperl.c =================================================================== RCS file: /cvsroot/pgsql/src/pl/plperl/plperl.c,v retrieving revision 1.152 diff -c -r1.152 plperl.c *** plperl.c 28 Sep 2009 17:31:12 -0000 1.152 --- plperl.c 31 Oct 2009 17:27:14 -0000 *************** *** 380,390 **** } } ! static void restore_context(bool old_context) { ! if (trusted_context != old_context) { if (old_context) PERL_SET_CONTEXT(plperl_trusted_interp); --- 380,392 ---- } } ! /* ! * Restore previous interpreter selection, if two are active ! */ static void restore_context(bool old_context) { ! if (interp_state == INTERP_BOTH && trusted_context != old_context) { if (old_context) PERL_SET_CONTEXT(plperl_trusted_interp); *************** *** 870,878 **** plperl_call_handler(PG_FUNCTION_ARGS) { Datum retval; ! plperl_call_data *save_call_data; - save_call_data = current_call_data; PG_TRY(); { if (CALLED_AS_TRIGGER(fcinfo)) --- 872,880 ---- plperl_call_handler(PG_FUNCTION_ARGS) { Datum retval; ! plperl_call_data *save_call_data = current_call_data; ! bool oldcontext = trusted_context; PG_TRY(); { if (CALLED_AS_TRIGGER(fcinfo)) *************** *** 883,893 **** --- 885,897 ---- PG_CATCH(); { current_call_data = save_call_data; + restore_context(oldcontext); PG_RE_THROW(); } PG_END_TRY(); current_call_data = save_call_data; + restore_context(oldcontext); return retval; } *************** *** 1226,1232 **** Datum retval; ReturnSetInfo *rsi; SV *array_ret = NULL; - bool oldcontext = trusted_context; ErrorContextCallback pl_error_context; /* --- 1230,1235 ---- *************** *** 1376,1384 **** if (array_ret == NULL) SvREFCNT_dec(perlret); - current_call_data = NULL; - restore_context(oldcontext); - return retval; } --- 1379,1384 ---- *************** *** 1391,1397 **** Datum retval; SV *svTD; HV *hvTD; - bool oldcontext = trusted_context; ErrorContextCallback pl_error_context; /* --- 1391,1396 ---- *************** *** 1491,1498 **** if (perlret) SvREFCNT_dec(perlret); - current_call_data = NULL; - restore_context(oldcontext); return retval; } --- 1490,1495 ----
On Oct 31, 2009, at 7:30 PM, Tom Lane wrote: > Alexey Klyukin <alexk@commandprompt.com> writes: >> One of our customers is running 8.2.14 and use a couple of pl/perl >> and >> pl/perlu functions written by CMD. Everything worked normally until >> they tried to call one particular pl/perl function from pl/perl via >> spi. It appears that a die call inside the callee just crashes the >> backend. > > I think the critical point is actually that you're calling plperl from > plperlu, and we're being careless about restoring the former > interpreter > selection on error exit. The attached patch moves the responsibility > for that into plperl_call_handler, which already has a suitable > PG_TRY block. The patch solves the problem, thank you! > > regards, tom lane > > Index: plperl.c > =================================================================== > RCS file: /cvsroot/pgsql/src/pl/plperl/plperl.c,v > retrieving revision 1.152 > diff -c -r1.152 plperl.c > *** plperl.c 28 Sep 2009 17:31:12 -0000 1.152 > --- plperl.c 31 Oct 2009 17:27:14 -0000 > *************** > *** 380,390 **** > } > } > > ! > static void > restore_context(bool old_context) > { > ! if (trusted_context != old_context) > { > if (old_context) > PERL_SET_CONTEXT(plperl_trusted_interp); > --- 380,392 ---- > } > } > > ! /* > ! * Restore previous interpreter selection, if two are active > ! */ > static void > restore_context(bool old_context) > { > ! if (interp_state == INTERP_BOTH && trusted_context != old_context) > { > if (old_context) > PERL_SET_CONTEXT(plperl_trusted_interp); > *************** > *** 870,878 **** > plperl_call_handler(PG_FUNCTION_ARGS) > { > Datum retval; > ! plperl_call_data *save_call_data; > > - save_call_data = current_call_data; > PG_TRY(); > { > if (CALLED_AS_TRIGGER(fcinfo)) > --- 872,880 ---- > plperl_call_handler(PG_FUNCTION_ARGS) > { > Datum retval; > ! plperl_call_data *save_call_data = current_call_data; > ! bool oldcontext = trusted_context; > > PG_TRY(); > { > if (CALLED_AS_TRIGGER(fcinfo)) > *************** > *** 883,893 **** > --- 885,897 ---- > PG_CATCH(); > { > current_call_data = save_call_data; > + restore_context(oldcontext); > PG_RE_THROW(); > } > PG_END_TRY(); > > current_call_data = save_call_data; > + restore_context(oldcontext); > return retval; > } > > *************** > *** 1226,1232 **** > Datum retval; > ReturnSetInfo *rsi; > SV *array_ret = NULL; > - bool oldcontext = trusted_context; > ErrorContextCallback pl_error_context; > > /* > --- 1230,1235 ---- > *************** > *** 1376,1384 **** > if (array_ret == NULL) > SvREFCNT_dec(perlret); > > - current_call_data = NULL; > - restore_context(oldcontext); > - > return retval; > } > > --- 1379,1384 ---- > *************** > *** 1391,1397 **** > Datum retval; > SV *svTD; > HV *hvTD; > - bool oldcontext = trusted_context; > ErrorContextCallback pl_error_context; > > /* > --- 1391,1396 ---- > *************** > *** 1491,1498 **** > if (perlret) > SvREFCNT_dec(perlret); > > - current_call_data = NULL; > - restore_context(oldcontext); > return retval; > } > > --- 1490,1495 ----