diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile index 39b368b..49bb462 100644 --- a/contrib/pg_stat_statements/Makefile +++ b/contrib/pg_stat_statements/Makefile @@ -4,7 +4,8 @@ MODULE_big = pg_stat_statements OBJS = pg_stat_statements.o $(WIN32RES) EXTENSION = pg_stat_statements -DATA = pg_stat_statements--1.4.sql pg_stat_statements--1.4--1.5.sql \ +DATA = pg_stat_statements--1.4.sql \ + pg_stat_statements--1.5--1.6.sql pg_stat_statements--1.4--1.5.sql \ pg_stat_statements--1.3--1.4.sql pg_stat_statements--1.2--1.3.sql \ pg_stat_statements--1.1--1.2.sql pg_stat_statements--1.0--1.1.sql \ pg_stat_statements--unpackaged--1.0.sql diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out index 5318c35..e6725ed 100644 --- a/contrib/pg_stat_statements/expected/pg_stat_statements.out +++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out @@ -395,4 +395,33 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT pg_stat_statements_reset() | 1 | 1 (8 rows) +-- test to see if any plans have been recorded. +SELECT + CASE WHEN plan_time > 0 THEN 1 ELSE 0 END, + CASE WHEN plan_timestamp >= timestamp '1970-01-01 00:00:00' THEN 1 ELSE 0 END +FROM pg_stat_statements_plans ORDER BY plan_timestamp; + case | case +------+------ + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 +(6 rows) + +-- test if there is some text in the recorded plans. +SELECT substr(plan, 0, 11) FROM pg_stat_statements_plans ORDER BY plan_timestamp; + substr +------------ + Query Text + Query Text + Query Text + Query Text + Query Text + Query Text + Query Text + Query Text +(8 rows) + DROP EXTENSION pg_stat_statements; diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.5--1.6.sql b/contrib/pg_stat_statements/pg_stat_statements--1.5--1.6.sql new file mode 100644 index 0000000..907c84a --- /dev/null +++ b/contrib/pg_stat_statements/pg_stat_statements--1.5--1.6.sql @@ -0,0 +1,86 @@ +/* contrib/pg_stat_statements/pg_stat_statements--1.5--1.6.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION pg_stat_statements UPDATE TO '1.6'" to load this file. \quit + +/* First we have to remove them from the extension */ +ALTER EXTENSION pg_stat_statements DROP VIEW pg_stat_statements; +ALTER EXTENSION pg_stat_statements DROP FUNCTION pg_stat_statements(boolean); +ALTER EXTENSION pg_stat_statements DROP FUNCTION pg_stat_statements_reset(); + +/* Then we can drop them */ +DROP VIEW pg_stat_statements; +DROP FUNCTION pg_stat_statements(boolean); +DROP FUNCTION pg_stat_statements_reset(); + +-- Register functions. +CREATE FUNCTION pg_stat_statements_reset() +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C PARALLEL SAFE; + +CREATE FUNCTION pg_stat_statements_plan_reset(IN queryid bigint) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C PARALLEL SAFE; + +CREATE FUNCTION pg_stat_statements_plan_reset(IN queryid bigint, IN plantype cstring) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C PARALLEL SAFE; + +CREATE FUNCTION pg_stat_statements(IN showtext boolean, + OUT userid oid, + OUT dbid oid, + OUT queryid bigint, + OUT query text, + OUT calls int8, + OUT total_time float8, + OUT min_time float8, + OUT max_time float8, + OUT mean_time float8, + OUT stddev_time float8, + OUT rows int8, + OUT shared_blks_hit int8, + OUT shared_blks_read int8, + OUT shared_blks_dirtied int8, + OUT shared_blks_written int8, + OUT local_blks_hit int8, + OUT local_blks_read int8, + OUT local_blks_dirtied int8, + OUT local_blks_written int8, + OUT temp_blks_read int8, + OUT temp_blks_written int8, + OUT blk_read_time float8, + OUT blk_write_time float8 +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_stat_statements_1_6' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +CREATE FUNCTION pg_stat_statements_plans(IN showtext boolean, + OUT queryid bigint, + OUT plan text, + OUT plan_time float8, + OUT plan_timestamp timestamp +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_stat_statements_plans_1_6' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +-- Register a view on the function for ease of use. +CREATE VIEW pg_stat_statements AS + SELECT * FROM pg_stat_statements(true); + +CREATE VIEW pg_stat_statements_plans AS + SELECT * FROM pg_stat_statements_plans(true); + +GRANT SELECT ON pg_stat_statements TO PUBLIC; +GRANT SELECT ON pg_stat_statements_plans TO PUBLIC; + +-- Don't want this to be available to non-superusers. +REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_stat_statements_plan_reset(bigint) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_stat_statements_plan_reset(bigint, cstring) FROM PUBLIC; + +GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO pg_read_all_stats; diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 9286734..7dc6f81 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -63,6 +63,7 @@ #include "access/hash.h" #include "catalog/pg_authid.h" +#include "commands/explain.h" #include "executor/instrument.h" #include "funcapi.h" #include "mb/pg_wchar.h" @@ -78,6 +79,8 @@ #include "tcop/utility.h" #include "utils/builtins.h" #include "utils/memutils.h" +#include "utils/timestamp.h" +#include "utils/varlena.h" PG_MODULE_MAGIC; @@ -119,7 +122,8 @@ typedef enum pgssVersion PGSS_V1_0 = 0, PGSS_V1_1, PGSS_V1_2, - PGSS_V1_3 + PGSS_V1_3, + PGSS_V1_6 } pgssVersion; /* @@ -165,6 +169,16 @@ typedef struct Counters double usage; /* usage factor */ } Counters; +typedef struct pgssPlan +{ + Size offset; + int len; + double time; /* execution time in msec when the latest plan was updated */ + TimestampTz timestamp; + int64 outliers; + int64 updates; +} pgssPlan; + /* * Statistics per statement * @@ -178,6 +192,8 @@ typedef struct pgssEntry Counters counters; /* the statistics for this query */ Size query_offset; /* query text offset in external file */ int query_len; /* # of valid bytes in query string, or -1 */ + pgssPlan good_plan; + pgssPlan bad_plan; int encoding; /* query text encoding */ slock_t mutex; /* protects the counters only */ } pgssEntry; @@ -265,11 +281,26 @@ static const struct config_enum_entry track_options[] = {NULL, 0, false} }; +static const struct config_enum_entry format_options[] = +{ + {"text", EXPLAIN_FORMAT_TEXT, false}, + {"xml", EXPLAIN_FORMAT_XML, false}, + {"json", EXPLAIN_FORMAT_JSON, false}, + {"yaml", EXPLAIN_FORMAT_YAML, false}, + {NULL, 0, false} +}; + static int pgss_max; /* max # statements to track */ static int pgss_track; /* tracking level */ static bool pgss_track_utility; /* whether to track utility commands */ static bool pgss_save; /* whether to save stats across shutdown */ - +static bool pgss_good_plan_enable; /* whether to save good_plans */ +static bool pgss_bad_plan_enable; /* whether to save bad_plans */ +static bool pgss_good_plan_enable_temp; /* used to store state between plan_track_check and _assign */ +static bool pgss_bad_plan_enable_temp; /* used to store state between plan_track_check and _assign */ +static int pgss_plan_format; /* id which sets the output format */ +static double pgss_plan_fence_factor; /* can be used to scale the fences for plan updates */ +static char *pgss_plan_track_str; /* String holding the arguments provided to the pgss_plan_track GUC */ #define pgss_enabled() \ (pgss_track == PGSS_TRACK_ALL || \ @@ -291,7 +322,10 @@ void _PG_fini(void); PG_FUNCTION_INFO_V1(pg_stat_statements_reset); PG_FUNCTION_INFO_V1(pg_stat_statements_1_2); PG_FUNCTION_INFO_V1(pg_stat_statements_1_3); +PG_FUNCTION_INFO_V1(pg_stat_statements_1_6); PG_FUNCTION_INFO_V1(pg_stat_statements); +PG_FUNCTION_INFO_V1(pg_stat_statements_plans_1_6); +PG_FUNCTION_INFO_V1(pg_stat_statements_plan_reset); static void pgss_shmem_startup(void); static void pgss_shmem_shutdown(int code, Datum arg); @@ -309,12 +343,19 @@ static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, static uint64 pgss_hash_string(const char *str, int len); static void pgss_store(const char *query, uint64 queryId, int query_location, int query_len, + QueryDesc *queryDesc, double total_time, uint64 rows, const BufferUsage *bufusage, pgssJumbleState *jstate); +Datum pg_stat_statements_plan_reset(PG_FUNCTION_ARGS); static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext); +static void pg_stat_statements_plans_internal(FunctionCallInfo fcinfo, + pgssVersion api_version, + bool showtext); +static int fill_plan_str(Datum values[], bool nulls[], int i, pgssPlan *plan, + pgssEntry *entry, char *qbuffer, Size qbuffer_size); static Size pgss_memsize(void); static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky); @@ -338,7 +379,8 @@ static char *generate_normalized_query(pgssJumbleState *jstate, const char *quer static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query, int query_loc); static int comp_location(const void *a, const void *b); - +static bool plan_track_check_hook(char **newvalue, void **extra, GucSource source); +static void plan_track_assign_hook(const char *newvalue, void *extra); /* * Module load callback @@ -407,6 +449,42 @@ _PG_init(void) NULL, NULL); + DefineCustomStringVariable("pg_stat_statements.plan_track", + "Enable plan tracking", + "Specify the types of plans that shall be tracked: all, none, good, or bad. Support for multiple individual types (curently only good and bad) is available.", + &pgss_plan_track_str, + "all", + PGC_SUSET, + GUC_LIST_INPUT, + plan_track_check_hook, + plan_track_assign_hook, + NULL); + + DefineCustomEnumVariable("pg_stat_statements.plan_format", + "Sets the output format for the plans.", + "Notice that the plan format can not be changed after a plan is saved. Valid values are text, json, xml and yaml.", + &pgss_plan_format, + EXPLAIN_FORMAT_TEXT, + format_options, + PGC_SUSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomRealVariable("pg_stat_statements.plan_fence_factor", + "Sets the factor that scales the fences that are used to determine whether a plan should be updated.", + NULL, + &pgss_plan_fence_factor, + 1.5, + 0.0, + +INFINITY, + PGC_SUSET, + 0, + NULL, + NULL, + NULL); + EmitWarningsOnPlaceholders("pg_stat_statements"); /* @@ -470,7 +548,11 @@ pgss_shmem_startup(void) int32 pgver; int32 i; int buffer_size; + int good_plan_buffer_size; + int bad_plan_buffer_size; char *buffer = NULL; + char *good_plan_buffer = NULL; + char *bad_plan_buffer = NULL; if (prev_shmem_startup_hook) prev_shmem_startup_hook(); @@ -561,7 +643,11 @@ pgss_shmem_startup(void) } buffer_size = 2048; + good_plan_buffer_size = 2048; + bad_plan_buffer_size = 2048; buffer = (char *) palloc(buffer_size); + good_plan_buffer = (char *) palloc(good_plan_buffer_size); + bad_plan_buffer = (char *) palloc(bad_plan_buffer_size); if (fread(&header, sizeof(uint32), 1, file) != 1 || fread(&pgver, sizeof(uint32), 1, file) != 1 || @@ -595,8 +681,34 @@ pgss_shmem_startup(void) if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1) goto read_error; + /* Resize good plan_buffer and read into it */ + if (temp.good_plan.len >= good_plan_buffer_size) + { + good_plan_buffer_size = + Max(good_plan_buffer_size * 2, temp.good_plan.len + 1); + good_plan_buffer= repalloc(good_plan_buffer, good_plan_buffer_size); + } + + if (fread(good_plan_buffer, 1, temp.good_plan.len + 1, file) + != temp.good_plan.len + 1) + goto read_error; + + /* Resize bad plan_buffer and read into it */ + if (temp.bad_plan.len >= bad_plan_buffer_size) + { + bad_plan_buffer_size = + Max(bad_plan_buffer_size * 2, temp.bad_plan.len + 1); + bad_plan_buffer= repalloc(bad_plan_buffer, bad_plan_buffer_size); + } + + if (fread(bad_plan_buffer, 1, temp.bad_plan.len + 1, file) + != temp.bad_plan.len + 1) + goto read_error; + /* Should have a trailing null, but let's make sure */ buffer[temp.query_len] = '\0'; + good_plan_buffer[temp.good_plan.len] = '\0'; + bad_plan_buffer[temp.bad_plan.len] = '\0'; /* Skip loading "sticky" entries */ if (temp.counters.calls == 0) @@ -608,6 +720,20 @@ pgss_shmem_startup(void) goto write_error; pgss->extent += temp.query_len + 1; + /* Store the good plan text*/ + temp.good_plan.offset = pgss->extent; + if (fwrite(good_plan_buffer, 1, temp.good_plan.len + 1, qfile) + != temp.good_plan.len + 1) + goto write_error; + pgss->extent += temp.good_plan.len + 1; + + /* Store the bad plan text*/ + temp.bad_plan.offset = pgss->extent; + if (fwrite(bad_plan_buffer, 1, temp.bad_plan.len + 1, qfile) + != temp.bad_plan.len + 1) + goto write_error; + pgss->extent += temp.bad_plan.len + 1; + /* make the hashtable entry (discards old entries if too many) */ entry = entry_alloc(&temp.key, query_offset, temp.query_len, temp.encoding, @@ -615,9 +741,14 @@ pgss_shmem_startup(void) /* copy in the actual stats */ entry->counters = temp.counters; + /* copy in the plans */ + entry->good_plan = temp.good_plan; + entry->bad_plan = temp.bad_plan; } pfree(buffer); + pfree(good_plan_buffer); + pfree(bad_plan_buffer); FreeFile(file); FreeFile(qfile); @@ -658,6 +789,10 @@ write_error: fail: if (buffer) pfree(buffer); + if (good_plan_buffer) + pfree(good_plan_buffer); + if (bad_plan_buffer) + pfree(bad_plan_buffer); if (file) FreeFile(file); if (qfile) @@ -723,14 +858,52 @@ pgss_shmem_shutdown(int code, Datum arg) while ((entry = hash_seq_search(&hash_seq)) != NULL) { int len = entry->query_len; + int good_plan_len = entry->good_plan.len; + int bad_plan_len = entry->bad_plan.len; char *qstr = qtext_fetch(entry->query_offset, len, qbuffer, qbuffer_size); + char *good_plan_str; + char *bad_plan_str; + if (good_plan_len > 0) + { + /* A good plan is available */ + good_plan_str = qtext_fetch(entry->good_plan.offset, good_plan_len, + qbuffer, qbuffer_size); + } + else + { + /* + * There is no good plan available. This can be caused by a query + * which has no plan (for example insert or delete queries) or + * because good plans are disabled. In this case we have to store + * an empty string instead of null. + */ + good_plan_str = ""; + } + if (bad_plan_len > 0) + { + /* A bad plan is available */ + bad_plan_str = qtext_fetch(entry->bad_plan.offset, bad_plan_len, + qbuffer, qbuffer_size); + } + else + { + /* + * There is no bad plan available. This can be caused by a query + * which has no plan (for example insert or delete queries) or + * because bad plans are disabled. In this case we have to store + * an empty string instead of null. + */ + bad_plan_str = ""; + } if (qstr == NULL) continue; /* Ignore any entries with bogus texts */ - + /* Write entries, queries and plans serialized to the dump file */ if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 || - fwrite(qstr, 1, len + 1, file) != len + 1) + fwrite(qstr, 1, len + 1, file) != len + 1 || + fwrite(good_plan_str, 1, good_plan_len + 1, file) != good_plan_len + 1 || + fwrite(bad_plan_str, 1, bad_plan_len + 1, file) != bad_plan_len + 1) { /* note: we assume hash_seq_term won't change errno */ hash_seq_term(&hash_seq); @@ -835,6 +1008,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query) query->queryId, query->stmt_location, query->stmt_len, + NULL, 0, 0, NULL, @@ -942,6 +1116,7 @@ pgss_ExecutorEnd(QueryDesc *queryDesc) queryId, queryDesc->plannedstmt->stmt_location, queryDesc->plannedstmt->stmt_len, + queryDesc, /* queryDesc itself is needed to retrieve the plan */ queryDesc->totaltime->total * 1000.0, /* convert to msec */ queryDesc->estate->es_processed, &queryDesc->totaltime->bufusage, @@ -1053,6 +1228,7 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, 0, /* signal that it's a utility stmt */ pstmt->stmt_location, pstmt->stmt_len, + NULL, INSTR_TIME_GET_MILLISEC(duration), rows, &bufusage, @@ -1083,6 +1259,26 @@ pgss_hash_string(const char *str, int len) len, 0)); } +/* + * Update a plan in the text file. + * We can re-use qtext_store to save the plan. + */ +static void +update_plan(volatile pgssPlan* pgssPlan_ptr, const char* plan_str, int plan_len, + double total_time) +{ + if (plan_len > 0) + { + Size plan_offset; + qtext_store(plan_str, plan_len, &plan_offset, NULL); + pgssPlan_ptr->offset = plan_offset; + pgssPlan_ptr->len = plan_len; + pgssPlan_ptr->time = total_time; + pgssPlan_ptr->timestamp = GetCurrentTimestamp(); + } + return; +} + /* * Store some statistics for a statement. * @@ -1096,6 +1292,7 @@ pgss_hash_string(const char *str, int len) static void pgss_store(const char *query, uint64 queryId, int query_location, int query_len, + QueryDesc *queryDesc, double total_time, uint64 rows, const BufferUsage *bufusage, pgssJumbleState *jstate) @@ -1134,6 +1331,7 @@ pgss_store(const char *query, uint64 queryId, query_len = strlen(query); } + /* * Discard leading and trailing whitespace, too. Use scanner_isspace() * not libc's isspace(), because we want to match the lexer's behavior. @@ -1231,9 +1429,136 @@ pgss_store(const char *query, uint64 queryId, * locking rules at the head of the file) */ volatile pgssEntry *e = (volatile pgssEntry *) entry; + bool good_plan_needs_updating = false; + bool bad_plan_needs_updating = false; + int plan_len = 0; + char* plan; SpinLockAcquire(&e->mutex); + if (queryDesc && (pgss_good_plan_enable || pgss_bad_plan_enable)) + { + /* Avoid division by zero errors. */ + if (e->counters.calls > 0) + { + /* + * The quantiles for a normal distributions can be estimated through + * X = expected value + standard deviation * Z-score + * For the 25% and 75% quantiles (=lower and upper quartile), + * the z-scores -0.6754 and 0.6745 are used. + */ + double z_score = 0.6745; + /* + * Since we're already making a bunch of statistical assumptions, + * let's use the fact that the expected value can be approximated + * by the mean of a dataset (see "law of large numbers"). + */ + + double std_dev = sqrt(e->counters.sum_var_time / e->counters.calls); + double exp_val = e->counters.mean_time; + + double lower_quartile = exp_val - std_dev * z_score; + double upper_quartile = exp_val + std_dev * z_score; + + double interquartile_dist = upper_quartile - lower_quartile; + + /* + * We calculate a set of inner and outer fences, similar to the + * definition of whiskers in boxplots. + * By default, pgss_plan_fence_factor is set to 1.5 . + */ + double lower_fence = lower_quartile - pgss_plan_fence_factor * interquartile_dist; + double upper_fence = upper_quartile + pgss_plan_fence_factor * interquartile_dist; + + if (pgss_good_plan_enable) + { + /* + * Only update bad_plan if execution time was slower than the lower fence. + * The use of statistics to estimate wether the change in execution time + * was big enough to warrant a plan update is debatable, however it is + * probably the fastest way to identify changes in the planner + * without actually comparing the plans step by step. + */ + if (total_time < lower_fence) + { + e->good_plan.outliers++; + /* Only update plan if execution time was slower than that of the old plan*/ + if (total_time < e->good_plan.time) + { + good_plan_needs_updating = true; + e->good_plan.updates++; + } + } + } + + if (pgss_bad_plan_enable) + { + /* Only update good_plan if execution time was faster than the upperer fence.*/ + if (total_time > upper_fence) + { + e->bad_plan.outliers++; + if (total_time > e->bad_plan.time) + { + bad_plan_needs_updating = true; + e->bad_plan.updates++; + } + } + } + } + + /* + * Plans should be updated whenever no previous plan was stored. + */ + if (pgss_good_plan_enable && e->good_plan.time==0) + good_plan_needs_updating = true; + if (pgss_bad_plan_enable && e->bad_plan.time==0) + bad_plan_needs_updating = true; + + if (good_plan_needs_updating || bad_plan_needs_updating) + { + /* *es is used to retrieve the plan associated with the statement*/ + ExplainState *es = NewExplainState(); + + es->analyze = (queryDesc->instrument_options && false); + es->verbose = false; + es->buffers = (es->analyze && false); + es->timing = (es->analyze && true); + es->format = pgss_plan_format; + + ExplainBeginOutput(es); + ExplainQueryText(es, queryDesc); + ExplainPrintPlan(es, queryDesc); + ExplainEndOutput(es); + + /* Remove last line break */ + if (es->str->len > 0 && es->str->data[es->str->len - 1] == '\n') + es->str->data[--es->str->len] = '\0'; + + /* Fix JSON to output an object */ + if (pgss_plan_format == EXPLAIN_FORMAT_JSON) + { + es->str->data[0] = '{'; + es->str->data[es->str->len - 1] = '}'; + } + + plan = es->str->data; + plan_len = strlen(plan); + + if (good_plan_needs_updating) + { + update_plan(&e->good_plan, plan, plan_len, total_time); + } + + if (bad_plan_needs_updating) + { + update_plan(&e->bad_plan, plan, plan_len, total_time); + } + + pfree(es->str->data); + pfree(es); + } + } + /* "Unstick" entry if it was previously sticky */ if (e->counters.calls == 0) e->counters.usage = USAGE_INIT; @@ -1305,13 +1630,80 @@ pg_stat_statements_reset(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +Datum +pg_stat_statements_plan_reset(PG_FUNCTION_ARGS) +{ + pgssEntry *entry; + pgssHashKey key; + + uint64 query_id = PG_GETARG_INT64(0); + + bool good_plan_reset = false; + bool bad_plan_reset = false; + + if (PG_NARGS() == 2) + { + char *plan_type_str = PG_GETARG_CSTRING(1); + if (pg_strcasecmp(plan_type_str, "good") == 0) + good_plan_reset = true; + else if (pg_strcasecmp(plan_type_str, "bad") == 0) + bad_plan_reset = true; + else + elog(LOG, "If a second argument is provided to pg_stat_statements_plan_reset, it must be of a valid plan type ('good' or 'bad')."); + } + else + { + good_plan_reset = true; + bad_plan_reset = true; + } + + if (!pgss || !pgss_hash) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("pg_stat_statements must be loaded via shared_preload_libraries"))); + + /* Set up key for hashtable search */ + key.userid = GetUserId(); + key.dbid = MyDatabaseId; + key.queryid = query_id; + + /* Lookup the hash table entry with shared lock. */ + LWLockAcquire(pgss->lock, LW_SHARED); + + entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL); + + if (entry) + { + if (good_plan_reset) + { + entry->good_plan.offset = -1; + entry->good_plan.len = 0; + entry->good_plan.time = 0; + entry->good_plan.timestamp = 0; + } + if (bad_plan_reset) + { + entry->bad_plan.offset = -1; + entry->bad_plan.len = 0; + entry->bad_plan.time = 0; + entry->bad_plan.timestamp = 0; + } + } + LWLockRelease(pgss->lock); + PG_RETURN_VOID(); +} + /* Number of output arguments (columns) for various API versions */ #define PG_STAT_STATEMENTS_COLS_V1_0 14 #define PG_STAT_STATEMENTS_COLS_V1_1 18 #define PG_STAT_STATEMENTS_COLS_V1_2 19 #define PG_STAT_STATEMENTS_COLS_V1_3 23 +#define PG_STAT_STATEMENTS_COLS_V1_6 23 #define PG_STAT_STATEMENTS_COLS 23 /* maximum of above */ +#define PG_STAT_STATEMENTS_PLANS_COLS_V1_6 4 +#define PG_STAT_STATEMENTS_PLANS_COLS 4 /* maximum of above */ + /* * Retrieve statement statistics. * @@ -1322,6 +1714,16 @@ pg_stat_statements_reset(PG_FUNCTION_ARGS) * expected API version is identified by embedding it in the C name of the * function. Unfortunately we weren't bright enough to do that for 1.1. */ +Datum +pg_stat_statements_1_6(PG_FUNCTION_ARGS) +{ + bool showtext = PG_GETARG_BOOL(0); + + pg_stat_statements_internal(fcinfo, PGSS_V1_6, showtext); + + return (Datum) 0; +} + Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS) { @@ -1424,10 +1826,14 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo, if (api_version != PGSS_V1_2) elog(ERROR, "incorrect number of output arguments"); break; - case PG_STAT_STATEMENTS_COLS_V1_3: - if (api_version != PGSS_V1_3) + case PG_STAT_STATEMENTS_COLS_V1_3: //same as V1_6... + if (api_version != PGSS_V1_3 && api_version != PGSS_V1_6) elog(ERROR, "incorrect number of output arguments"); break; + //case PG_STAT_STATEMENTS_COLS_V1_6: + // if (api_version != PGSS_V1_6) + // elog(ERROR, "incorrect number of output arguments"); + // break; default: elog(ERROR, "incorrect number of output arguments"); } @@ -1629,6 +2035,7 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo, api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 : api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 : api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 : + api_version == PGSS_V1_6 ? PG_STAT_STATEMENTS_COLS_V1_6 : -1 /* fail if you forget to update this assert */ )); tuplestore_putvalues(tupstore, tupdesc, values, nulls); @@ -1643,6 +2050,271 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo, tuplestore_donestoring(tupstore); } +Datum +pg_stat_statements_plans_1_6(PG_FUNCTION_ARGS) +{ + bool showtext = PG_GETARG_BOOL(0); + + pg_stat_statements_plans_internal(fcinfo, PGSS_V1_6, showtext); + + return (Datum) 0; +} + +static void +pg_stat_statements_plans_internal(FunctionCallInfo fcinfo, + pgssVersion api_version, + bool showtext) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + Oid userid = GetUserId(); + bool is_allowed_role = false; + char *qbuffer = NULL; + Size qbuffer_size = 0; + Size extent = 0; + int gc_count = 0; + HASH_SEQ_STATUS hash_seq; + pgssEntry *entry; + + /* Superusers or members of pg_read_all_stats members are allowed */ + is_allowed_role = is_member_of_role(GetUserId(), DEFAULT_ROLE_READ_ALL_STATS); + + /* hash table must exist already */ + if (!pgss || !pgss_hash) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("pg_stat_statements must be loaded via shared_preload_libraries"))); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not " \ + "allowed in this context"))); + + /* Switch into long-lived context to construct returned data structures */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + /* + * Check we have the expected number of output arguments. Aside from + * being a good safety check, we need a kluge here to detect API version + * 1.1, which was wedged into the code in an ill-considered way. + */ + switch (tupdesc->natts) + { + case PG_STAT_STATEMENTS_PLANS_COLS_V1_6: + if (api_version != PGSS_V1_6) + elog(ERROR, "incorrect number of output arguments"); + break; + default: + elog(ERROR, "incorrect number of output arguments"); + } + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + MemoryContextSwitchTo(oldcontext); + + /* + * We'd like to load the query text file (if needed) while not holding any + * lock on pgss->lock. In the worst case we'll have to do this again + * after we have the lock, but it's unlikely enough to make this a win + * despite occasional duplicated work. We need to reload if anybody + * writes to the file (either a retail qtext_store(), or a garbage + * collection) between this point and where we've gotten shared lock. If + * a qtext_store is actually in progress when we look, we might as well + * skip the speculative load entirely. + */ + if (showtext) + { + int n_writers; + + /* Take the mutex so we can examine variables */ + { + volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; + + SpinLockAcquire(&s->mutex); + extent = s->extent; + n_writers = s->n_writers; + gc_count = s->gc_count; + SpinLockRelease(&s->mutex); + } + + /* No point in loading file now if there are active writers */ + if (n_writers == 0) + qbuffer = qtext_load_file(&qbuffer_size); + } + + /* + * Get shared lock, load or reload the query text file if we must, and + * iterate over the hashtable entries. + * + * With a large hash table, we might be holding the lock rather longer + * than one could wish. However, this only blocks creation of new hash + * table entries, and the larger the hash table the less likely that is to + * be needed. So we can hope this is okay. Perhaps someday we'll decide + * we need to partition the hash table to limit the time spent holding any + * one lock. + */ + LWLockAcquire(pgss->lock, LW_SHARED); + + if (showtext) + { + /* + * Here it is safe to examine extent and gc_count without taking the + * mutex. Note that although other processes might change + * pgss->extent just after we look at it, the strings they then write + * into the file cannot yet be referenced in the hashtable, so we + * don't care whether we see them or not. + * + * If qtext_load_file fails, we just press on; we'll return NULL for + * every query text. + */ + if (qbuffer == NULL || + pgss->extent != extent || + pgss->gc_count != gc_count) + { + if (qbuffer) + free(qbuffer); + qbuffer = qtext_load_file(&qbuffer_size); + } + } + + hash_seq_init(&hash_seq, pgss_hash); + while ((entry = hash_seq_search(&hash_seq)) != NULL) + { + int numPlans = 2; + pgssPlan *planArray[numPlans]; + planArray[0] = &entry->good_plan; + planArray[1] = &entry->bad_plan; + + /** Iterate through the array of plans. */ + for(int j = 0; j < numPlans; j++) + { + Datum values[PG_STAT_STATEMENTS_PLANS_COLS]; + bool nulls[PG_STAT_STATEMENTS_PLANS_COLS]; + int i = 0; + int64 calls = 0; + pgssPlan planCopy; + int64 queryid = entry->key.queryid; + + memset(values, 0, sizeof(values)); + memset(nulls, 0, sizeof(nulls)); + + /* Make a local copy of the plan */ + SpinLockAcquire(&entry->mutex); + calls = entry->counters.calls; + planCopy = (pgssPlan) **planArray; + SpinLockRelease(&entry->mutex); + /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */ + if(calls == 0 || planCopy.time == 0) + continue; + + if (is_allowed_role || entry->key.userid == userid) + { + if (api_version >= PGSS_V1_2) + values[i++] = Int64GetDatumFast(queryid); + + if (showtext && api_version >= PGSS_V1_6) + { + i = fill_plan_str(values, nulls, i, &planCopy, + entry, qbuffer, qbuffer_size); + } + else + { + nulls[i++] = true; + } + } + else + { + /* Don't show queryid */ + if (api_version >= PGSS_V1_2) + nulls[i++] = true; + + /* + * Don't show plan text, but hint as to the reason for not doing + * so if it was requested + */ + if (showtext) + values[i++] = CStringGetTextDatum(""); + else + nulls[i++] = true; + } + + if (api_version >= PGSS_V1_6) + { + /* Set plan times and timestamps */ + values[i++] = Float8GetDatumFast(planCopy.time); + + /* if there is nothing in the timestamp field, we are not interested in it */ + if (planCopy.timestamp) + values[i++] = TimestampTzGetDatum(planCopy.timestamp); + else + nulls[i++] = true; + } + + Assert(i == (api_version == PGSS_V1_6 ? PG_STAT_STATEMENTS_PLANS_COLS_V1_6 : + -1 /* fail if you forget to update this assert */ )); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + } + + /* clean up and return the tuplestore */ + LWLockRelease(pgss->lock); + + if (qbuffer) + free(qbuffer); + + tuplestore_donestoring(tupstore); +} + +/* Fill the plan string into the values array. */ +static int +fill_plan_str(Datum values[], bool nulls[], int i, pgssPlan *plan, + pgssEntry *entry, char *qbuffer, Size qbuffer_size) +{ + if (plan && plan->len > 0) + { + char *pstr = qtext_fetch(plan->offset, plan->len, qbuffer, qbuffer_size); + if (pstr) + { + char *enc; + enc = pg_any_to_server(pstr, plan->len, entry->encoding); + values[i++] = CStringGetTextDatum(enc); + + if (enc != pstr) + pfree(enc); + } + else + { + /* failed to get the string of the plan */ + nulls[i++] = true; + } + } + else + { + /* no plan available or plan_len not greater than 0 */ + nulls[i++] = true; + } + return i; +} + + /* * Estimate shared memory space needed. */ @@ -1694,6 +2366,8 @@ entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, /* reset the statistics */ memset(&entry->counters, 0, sizeof(Counters)); + memset(&entry->good_plan, 0, sizeof(pgssPlan)); + memset(&entry->bad_plan, 0, sizeof(pgssPlan)); /* set the appropriate initial usage count */ entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT; /* re-initialize the mutex each time ... we assume no one using it */ @@ -2104,6 +2778,18 @@ gc_qtexts(void) qbuffer, qbuffer_size); + int good_plan_len = entry->good_plan.len; + char *good_plan = qtext_fetch(entry->good_plan.offset, + good_plan_len, + qbuffer, + qbuffer_size); + + int bad_plan_len = entry->bad_plan.len; + char *bad_plan = qtext_fetch(entry->bad_plan.offset, + bad_plan_len, + qbuffer, + qbuffer_size); + if (qry == NULL) { /* Trouble ... drop the text */ @@ -2125,6 +2811,53 @@ gc_qtexts(void) entry->query_offset = extent; extent += query_len + 1; + + if (good_plan == NULL || good_plan_len <= 0) + { + /* There was an error while loading the good_plan or there was simply never a + * good_plan recorded, so we make sure that this entry knows this. */ + entry->good_plan.offset = 0; + entry->good_plan.len = -1; + } + else + { + /* Save the good plan */ + if (fwrite(good_plan, 1, good_plan_len + 1, qfile) != good_plan_len + 1) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write pg_stat_statement file \"%s\": %m", + PGSS_TEXT_FILE))); + hash_seq_term(&hash_seq); + goto gc_fail; + } + entry->good_plan.offset = extent; + extent += good_plan_len + 1; + } + + if (bad_plan == NULL || bad_plan_len <= 0) + { + /* There was an error while loading the bad_plan or there was simply never a + * bad_plan recorded, so we make sure that this entry knows this. */ + entry->bad_plan.offset = 0; + entry->bad_plan.len = -1; + } + else + { + /* Save the bad plan */ + if (fwrite(bad_plan, 1, bad_plan_len + 1, qfile) != bad_plan_len + 1) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write pg_stat_statement file \"%s\": %m", + PGSS_TEXT_FILE))); + hash_seq_term(&hash_seq); + goto gc_fail; + } + entry->bad_plan.offset = extent; + extent += bad_plan_len + 1; + } + nentries++; } @@ -3163,3 +3896,81 @@ comp_location(const void *a, const void *b) else return 0; } + +/* + * Check the list of plans that shall be tracked and set the + * appropriate temporary bools. + */ +static bool +plan_track_check_hook(char **newvalue, void **extra, GucSource source) +{ + char *rawstring; + List *elemlist; + ListCell *l; + + /* Set all temp bools to false */ + pgss_good_plan_enable_temp = false; + pgss_bad_plan_enable_temp = false; + + if (pg_strcasecmp(*newvalue, "all") == 0) + { + pgss_good_plan_enable_temp = true; + pgss_bad_plan_enable_temp = true; + } + else if (pg_strcasecmp(*newvalue, "none") == 0) + return true; + else + { + /* Need a modifiable copy of string */ + rawstring = pstrdup(*newvalue); + + /* Parse string into list of identifiers */ + if (!SplitIdentifierString(rawstring, ',', &elemlist)) + { + /* syntax error in list */ + GUC_check_errdetail("List syntax is invalid."); + pfree(rawstring); + list_free(elemlist); + return false; + } + + foreach(l, elemlist) + { + char *tok = (char *) lfirst(l); + + if (pg_strcasecmp(tok, "good") == 0) + pgss_good_plan_enable_temp = true; + else if (pg_strcasecmp(tok, "bad") == 0) + pgss_bad_plan_enable_temp = true; + else if (pg_strcasecmp(tok, "all") == 0 || pg_strcasecmp(tok, "none") == 0) + { + GUC_check_errdetail("Key word \"%s\" cannot be combined with other key words.", tok); + pfree(rawstring); + list_free(elemlist); + return false; + } + else + { + GUC_check_errdetail("Unrecognized key word: \"%s\".", tok); + pfree(rawstring); + list_free(elemlist); + return false; + } + } + pfree(rawstring); + list_free(elemlist); + } + + return true; +} + +/* + * The bools enabling and disabling tracking of certain plans + * are set to the values previously set in the temporary variables. + */ +static void +plan_track_assign_hook(const char *newvalue, void *extra) +{ + pgss_good_plan_enable = pgss_good_plan_enable_temp; + pgss_bad_plan_enable = pgss_bad_plan_enable_temp; +} \ No newline at end of file diff --git a/contrib/pg_stat_statements/pg_stat_statements.control b/contrib/pg_stat_statements/pg_stat_statements.control index 193fcdf..617038b 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.control +++ b/contrib/pg_stat_statements/pg_stat_statements.control @@ -1,5 +1,5 @@ # pg_stat_statements extension comment = 'track execution statistics of all SQL statements executed' -default_version = '1.5' +default_version = '1.6' module_pathname = '$libdir/pg_stat_statements' relocatable = true diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql index a8361fd..3f98f4b 100644 --- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql +++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql @@ -195,4 +195,13 @@ DROP FUNCTION PLUS_TWO(INTEGER); SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; +-- test to see if any plans have been recorded. +SELECT + CASE WHEN plan_time > 0 THEN 1 ELSE 0 END, + CASE WHEN plan_timestamp >= timestamp '1970-01-01 00:00:00' THEN 1 ELSE 0 END +FROM pg_stat_statements_plans ORDER BY plan_timestamp; + +-- test if there is some text in the recorded plans. +SELECT substr(plan, 0, 11) FROM pg_stat_statements_plans ORDER BY plan_timestamp; + DROP EXTENSION pg_stat_statements;