From 77c214c9fb2fa7f9a003b96db0e5ec6506217e38 Mon Sep 17 00:00:00 2001 From: Julien Rouhaud Date: Fri, 6 Mar 2020 13:40:56 +0100 Subject: [PATCH v2 1/2] Add an optional cancel on clause to isolationtester steps. Some sanity checks can require a command to wait on a lock and eventually be cancelled. The only way to do that was to rely on calls to pg_cancel_backend() filtering pg_stat_activity view, but this isn't a satisfactory solution as there's no way to guarantee that only the wanted backend will be canceled. Instead, add a new optional "cancel on " clause to the step definition. When this clause is specified, isolationtester will actively wait on that command, and issue a cancel when the query is waiting on the given wait event. Author: Julien Rouhaud Reviewed-by: Discussion: https://postgr.es/m/20200305035354.GQ2593%40paquier.xyz --- src/backend/utils/adt/lockfuncs.c | 54 ++++++++++++++++++++++++++-- src/include/catalog/pg_proc.dat | 5 ++- src/test/isolation/README | 12 +++++-- src/test/isolation/isolationtester.c | 43 +++++++++++++++++----- src/test/isolation/isolationtester.h | 8 +++++ src/test/isolation/specparse.y | 20 +++++++++-- src/test/isolation/specscanner.l | 2 ++ 7 files changed, 127 insertions(+), 17 deletions(-) diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index ecb1bf92ff..cc2bc94d0d 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -17,7 +17,9 @@ #include "catalog/pg_type.h" #include "funcapi.h" #include "miscadmin.h" +#include "pgstat.h" #include "storage/predicate_internals.h" +#include "storage/procarray.h" #include "utils/array.h" #include "utils/builtins.h" @@ -578,17 +580,28 @@ pg_safe_snapshot_blocking_pids(PG_FUNCTION_ARGS) Datum pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS) { + TupleDesc tupdesc; int blocked_pid = PG_GETARG_INT32(0); ArrayType *interesting_pids_a = PG_GETARG_ARRAYTYPE_P(1); ArrayType *blocking_pids_a; int32 *interesting_pids; int32 *blocking_pids; + Datum values[3]; + bool nulls[3]; + PGPROC *proc; + uint32 raw_wait_event = 0; + const char *wait_event_type = NULL; + const char *wait_event = NULL; int num_interesting_pids; int num_blocking_pids; int dummy; int i, j; + /* Initialise values and NULL flags arrays */ + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + /* Validate the passed-in array */ Assert(ARR_ELEMTYPE(interesting_pids_a) == INT4OID); if (array_contains_nulls(interesting_pids_a)) @@ -597,6 +610,34 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS) num_interesting_pids = ArrayGetNItems(ARR_NDIM(interesting_pids_a), ARR_DIMS(interesting_pids_a)); + /* Initialise attributes information in the tuple descriptor */ + tupdesc = CreateTemplateTupleDesc(3); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "blocked", + BOOLOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "wait_event_type", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "wait_even", + TEXTOID, -1, 0); + + BlessTupleDesc(tupdesc); + + proc = BackendPidGetProc(blocked_pid); + if (proc) + { +#define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var)))) + raw_wait_event = UINT32_ACCESS_ONCE(proc->wait_event_info); + wait_event_type = pgstat_get_wait_event_type(raw_wait_event); + wait_event = pgstat_get_wait_event(raw_wait_event); + + if (wait_event_type != NULL) + values[1] = CStringGetTextDatum(wait_event_type); + if (wait_event != NULL) + values[2] = CStringGetTextDatum(wait_event); + } + + nulls[1] = (wait_event_type == NULL); + nulls[2] = (wait_event == NULL); + /* * Get the PIDs of all sessions blocking the given session's attempt to * acquire heavyweight locks. @@ -623,7 +664,11 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS) for (j = 0; j < num_interesting_pids; j++) { if (blocking_pids[i] == interesting_pids[j]) - PG_RETURN_BOOL(true); + { + values[0] = BoolGetDatum(true); + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, + values, nulls))); + } } /* @@ -636,9 +681,12 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS) * buffer and check if the number of safe snapshot blockers is non-zero. */ if (GetSafeSnapshotBlockingPids(blocked_pid, &dummy, 1) > 0) - PG_RETURN_BOOL(true); + values[0] = BoolGetDatum(true); + + values[0] = BoolGetDatum(false); - PG_RETURN_BOOL(false); + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, + values, nulls))); } diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 7fb574f9dc..77529e181d 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5869,7 +5869,10 @@ prosrc => 'pg_safe_snapshot_blocking_pids' }, { oid => '3378', descr => 'isolationtester support function', proname => 'pg_isolation_test_session_is_blocked', provolatile => 'v', - prorettype => 'bool', proargtypes => 'int4 _int4', + prorettype => 'record', proargtypes => 'int4 _int4', + proallargtypes => '{int4,_int4,bool,text,text}', + proargmodes => '{i,i,o,o,o}', + proargnames => '{blocking_pid,interesting_pids,blocked,wait_event_type,wait_event}', prosrc => 'pg_isolation_test_session_is_blocked' }, { oid => '1065', descr => 'view two-phase transactions', proname => 'pg_prepared_xact', prorows => '1000', proretset => 't', diff --git a/src/test/isolation/README b/src/test/isolation/README index 217953d183..04aed5cd17 100644 --- a/src/test/isolation/README +++ b/src/test/isolation/README @@ -86,10 +86,12 @@ session "" Each step has the syntax - step "" { } + step "" { } [ cancel on "" "" ] - where is a name identifying this step, and SQL is a SQL statement - (or statements, separated by semicolons) that is executed in the step. + where is a name identifying this step, SQL is a SQL statement + (or statements, separated by semicolons) that is executed in the step and + and a wait event specification for which + isolationtester will cancel the query if it's blocked on it. Step names must be unique across the whole spec file. permutation "" ... @@ -125,6 +127,10 @@ after PGISOLATIONTIMEOUT seconds. If the cancel doesn't work, isolationtester will exit uncleanly after a total of twice PGISOLATIONTIMEOUT. Testing invalid permutations should be avoided because they can make the isolation tests take a very long time to run, and they serve no useful testing purpose. +If a test specified the optionnal cancel on specification, then isolationtester +will actively wait for the step commands completion rather than continuing with +the permutation next step, and send a cancel once the given wait event is +blocking the query. Note that isolationtester recognizes that a command has blocked by looking to see if it is shown as waiting in the pg_locks view; therefore, only diff --git a/src/test/isolation/isolationtester.c b/src/test/isolation/isolationtester.c index f80261c022..f530d9923d 100644 --- a/src/test/isolation/isolationtester.c +++ b/src/test/isolation/isolationtester.c @@ -43,6 +43,7 @@ static void run_permutation(TestSpec *testspec, int nsteps, Step **steps); #define STEP_NONBLOCK 0x1 /* return 0 as soon as cmd waits for a lock */ #define STEP_RETRY 0x2 /* this is a retry of a previously-waiting cmd */ +#define STEP_WAIT 0x4 /* active wait for a given wait event */ static bool try_complete_step(TestSpec *testspec, Step *step, int flags); static int step_qsort_cmp(const void *a, const void *b); @@ -212,7 +213,7 @@ main(int argc, char **argv) */ initPQExpBuffer(&wait_query); appendPQExpBufferStr(&wait_query, - "SELECT pg_catalog.pg_isolation_test_session_is_blocked($1, '{"); + "SELECT * FROM pg_catalog.pg_isolation_test_session_is_blocked($1, '{"); /* The spec syntax requires at least one session; assume that here. */ appendPQExpBufferStr(&wait_query, backend_pid_strs[1]); for (i = 2; i < nconns; i++) @@ -587,8 +588,14 @@ run_permutation(TestSpec *testspec, int nsteps, Step **steps) exit(1); } - /* Try to complete this step without blocking. */ - mustwait = try_complete_step(testspec, step, STEP_NONBLOCK); + /* + * Try to complete this step without blocking, unless the step has a + * cancel on clause. + */ + mustwait = try_complete_step(testspec, step, + (step->waitinfo ? STEP_WAIT : STEP_NONBLOCK)); + if (step->waitinfo) + report_error_message(step); /* Check for completion of any steps that were previously waiting. */ w = 0; @@ -720,10 +727,12 @@ try_complete_step(TestSpec *testspec, Step *step, int flags) else if (ret == 0) /* select() timeout: check for lock wait */ { struct timeval current_time; + char *wait_event_type = ""; + char *wait_event = ""; int64 td; /* If it's OK for the step to block, check whether it has. */ - if (flags & STEP_NONBLOCK) + if (flags & (STEP_NONBLOCK | STEP_WAIT)) { bool waiting; @@ -738,9 +747,17 @@ try_complete_step(TestSpec *testspec, Step *step, int flags) exit(1); } waiting = ((PQgetvalue(res, 0, 0))[0] == 't'); + if (waiting && (flags & STEP_WAIT)) + { + wait_event_type = pg_strdup(PQgetvalue(res, 0, 1)); + Assert(wait_event_type != NULL); + + wait_event = pg_strdup(PQgetvalue(res, 0, 2)); + Assert(wait_event != NULL); + } PQclear(res); - if (waiting) /* waiting to acquire a lock */ + if (waiting && (flags & STEP_NONBLOCK)) /* waiting to acquire a lock */ { /* * Since it takes time to perform the lock-check query, @@ -787,7 +804,11 @@ try_complete_step(TestSpec *testspec, Step *step, int flags) * failing, but remaining permutations and tests should still be * OK. */ - if (td > max_step_wait && !canceled) + if ((td > max_step_wait || + (step->waitinfo + && strcmp(step->waitinfo->wait_event_type, wait_event_type) == 0 + && strcmp(step->waitinfo->wait_event, wait_event) == 0)) + && !canceled) { PGcancel *cancel = PQgetCancel(conn); @@ -801,8 +822,14 @@ try_complete_step(TestSpec *testspec, Step *step, int flags) * print to stdout not stderr, as this should appear * in the test case's results */ - printf("isolationtester: canceling step %s after %d seconds\n", - step->name, (int) (td / USECS_PER_SEC)); + if (!step->waitinfo) + printf("isolationtester: canceling step %s after %d seconds\n", + step->name, (int) (td / USECS_PER_SEC)); + else + printf("isolationtester: canceling step %s on wait event %s/%s\n", + step->name, + step->waitinfo->wait_event_type, + step->waitinfo->wait_event); canceled = true; } else diff --git a/src/test/isolation/isolationtester.h b/src/test/isolation/isolationtester.h index 9cf5012416..5df540485c 100644 --- a/src/test/isolation/isolationtester.h +++ b/src/test/isolation/isolationtester.h @@ -26,6 +26,12 @@ struct Session int nsteps; }; +typedef struct WaitInfo +{ + char *wait_event_type; + char *wait_event; +} WaitInfo; + struct Step { int session; @@ -33,6 +39,8 @@ struct Step char *name; char *sql; char *errormsg; + WaitInfo *waitinfo; + struct timeval start_time; }; typedef struct diff --git a/src/test/isolation/specparse.y b/src/test/isolation/specparse.y index 5e007e1bf0..c8e72f4316 100644 --- a/src/test/isolation/specparse.y +++ b/src/test/isolation/specparse.y @@ -27,6 +27,7 @@ TestSpec parseresult; /* result of parsing is left here */ char *str; Session *session; Step *step; + WaitInfo *waitinfo; Permutation *permutation; struct { @@ -43,9 +44,11 @@ TestSpec parseresult; /* result of parsing is left here */ %type session %type step %type permutation +%type opt_cancel %token sqlblock string_literal -%token PERMUTATION SESSION SETUP STEP TEARDOWN TEST +%token LITERAL +%token CANCEL ON PERMUTATION SESSION SETUP STEP TEARDOWN TEST %% @@ -140,16 +143,29 @@ step_list: step: - STEP string_literal sqlblock + STEP string_literal sqlblock opt_cancel { $$ = pg_malloc(sizeof(Step)); $$->name = $2; $$->sql = $3; $$->used = false; $$->errormsg = NULL; + $$->waitinfo = $4; } ; +opt_cancel: + CANCEL ON string_literal string_literal + { + $$ = pg_malloc(sizeof(WaitInfo)); + $$->wait_event_type = $3; + $$->wait_event = $4; + } + | /* EMPTY */ + { + $$ = NULL; + } + ; opt_permutation_list: permutation_list diff --git a/src/test/isolation/specscanner.l b/src/test/isolation/specscanner.l index 410f17727e..672e5fa2fc 100644 --- a/src/test/isolation/specscanner.l +++ b/src/test/isolation/specscanner.l @@ -48,6 +48,8 @@ comment ("#"{non_newline}*) litbufsize = LITBUF_INIT; %} +cancel { return CANCEL; } +on { return ON; } permutation { return PERMUTATION; } session { return SESSION; } setup { return SETUP; } -- 2.25.1