From ffaa79144ff34eeaa89b55cbced2a182310ea522 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 20 Jan 2023 13:27:29 +0900 Subject: [PATCH v4 4/4] Add GUC utility_query_id This GUC has two modes to control the computation method of query IDs for utilities: - 'string', the default, to hash the string query. - 'jumble', to use the parsed tree. --- src/include/nodes/queryjumble.h | 7 ++ src/backend/nodes/queryjumblefuncs.c | 81 ++++++++++++++----- src/backend/utils/misc/guc_tables.c | 16 ++++ src/backend/utils/misc/postgresql.conf.sample | 1 + doc/src/sgml/config.sgml | 18 +++++ .../expected/pg_stat_statements.out | 31 +++++++ .../sql/pg_stat_statements.sql | 17 ++++ 7 files changed, 151 insertions(+), 20 deletions(-) diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h index 204b8f74fd..261aea6bcf 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/queryjumble.h @@ -59,8 +59,15 @@ enum ComputeQueryIdType COMPUTE_QUERY_ID_REGRESS }; +enum UtilityQueryIdType +{ + UTILITY_QUERY_ID_STRING, + UTILITY_QUERY_ID_JUMBLE +}; + /* GUC parameters */ extern PGDLLIMPORT int compute_query_id; +extern PGDLLIMPORT int utility_query_id; extern const char *CleanQuerytext(const char *query, int *location, int *len); diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index 278150fba0..dd9ab8f353 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -41,12 +41,15 @@ /* GUC parameters */ int compute_query_id = COMPUTE_QUERY_ID_AUTO; +int utility_query_id = UTILITY_QUERY_ID_STRING; /* True when compute_query_id is ON, or AUTO and a module requests them */ bool query_id_enabled = false; static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size); +static uint64 compute_utility_query_id(const char *query_text, + int query_location, int query_len); static void RecordConstLocation(JumbleState *jstate, int location); static void _jumbleNode(JumbleState *jstate, Node *node); static void _jumbleList(JumbleState *jstate, Node *node); @@ -102,29 +105,39 @@ JumbleQuery(Query *query, const char *querytext) Assert(IsQueryIdEnabled()); - jstate = (JumbleState *) palloc(sizeof(JumbleState)); + if (query->utilityStmt && + compute_query_id == UTILITY_QUERY_ID_STRING) + { + query->queryId = compute_utility_query_id(querytext, + query->stmt_location, + query->stmt_len); + } + else + { + jstate = (JumbleState *) palloc(sizeof(JumbleState)); - /* Set up workspace for query jumbling */ - jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE); - jstate->jumble_len = 0; - jstate->clocations_buf_size = 32; - jstate->clocations = (LocationLen *) - palloc(jstate->clocations_buf_size * sizeof(LocationLen)); - jstate->clocations_count = 0; - jstate->highest_extern_param_id = 0; + /* Set up workspace for query jumbling */ + jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE); + jstate->jumble_len = 0; + jstate->clocations_buf_size = 32; + jstate->clocations = (LocationLen *) + palloc(jstate->clocations_buf_size * sizeof(LocationLen)); + jstate->clocations_count = 0; + jstate->highest_extern_param_id = 0; - /* Compute query ID and mark the Query node with it */ - _jumbleNode(jstate, (Node *) query); - query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble, - jstate->jumble_len, - 0)); + /* Compute query ID and mark the Query node with it */ + _jumbleNode(jstate, (Node *) query); + query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble, + jstate->jumble_len, + 0)); - /* - * If we are unlucky enough to get a hash of zero, use 1 instead, to - * prevent confusion with the utility-statement case. - */ - if (query->queryId == UINT64CONST(0)) - query->queryId = UINT64CONST(1); + /* + * If we are unlucky enough to get a hash of zero, use 1 instead, to + * prevent confusion with the utility-statement case. + */ + if (query->queryId == UINT64CONST(0)) + query->queryId = UINT64CONST(1); + } return jstate; } @@ -142,6 +155,34 @@ EnableQueryId(void) query_id_enabled = true; } +/* + * Compute a query identifier for the given utility query string. + */ +static uint64 +compute_utility_query_id(const char *query_text, int query_location, int query_len) +{ + uint64 queryId; + const char *sql; + + /* + * Confine our attention to the relevant part of the string, if the query + * is a portion of a multi-statement source string. + */ + sql = CleanQuerytext(query_text, &query_location, &query_len); + + queryId = DatumGetUInt64(hash_any_extended((const unsigned char *) sql, + query_len, 0)); + + /* + * If we are unlucky enough to get a hash of zero(invalid), use queryID as + * 2 instead, queryID 1 is already in use for normal statements. + */ + if (queryId == UINT64CONST(0)) + queryId = UINT64CONST(2); + + return queryId; +} + /* * AppendJumble: Append a value that is substantive in a given query to * the current jumble. diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index f9bfbbbd95..869e8a3a6f 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -294,6 +294,12 @@ static const struct config_enum_entry compute_query_id_options[] = { {NULL, 0, false} }; +static const struct config_enum_entry utility_query_id_options[] = { + {"string", UTILITY_QUERY_ID_STRING, false}, + {"jumble", UTILITY_QUERY_ID_JUMBLE, false}, + {NULL, 0, false} +}; + /* * Although only "on", "off", and "partition" are documented, we * accept all the likely variants of "on" and "off". @@ -4563,6 +4569,16 @@ struct config_enum ConfigureNamesEnum[] = NULL, NULL, NULL }, + { + {"utility_query_id", PGC_SUSET, STATS_MONITORING, + gettext_noop("Controls method computing query ID for utilities."), + NULL + }, + &utility_query_id, + UTILITY_QUERY_ID_STRING, utility_query_id_options, + NULL, NULL, NULL + }, + { {"constraint_exclusion", PGC_USERSET, QUERY_TUNING_OTHER, gettext_noop("Enables the planner to use constraints to optimize queries."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 4cceda4162..4d43c9d3c4 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -622,6 +622,7 @@ # - Monitoring - #compute_query_id = auto +#utility_query_id = string # string, jumble #log_statement_stats = off #log_parser_stats = off #log_planner_stats = off diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 89d53f2a64..70c55f1a79 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8203,6 +8203,24 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + utility_query_id (enum) + + utility_query_id configuration parameter + + + + + Controls the method used to compute the query identifier of a utility + query. Valid values are string to use a hash of the + query string and jumble to compute the query + identifier depending on the parsed tree of the utility query (less + performant, but allows for more parameterization of the queries + involved). The default is string. + + + + log_statement_stats (boolean) diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out index 9ac5c87c3a..8bdf8beec3 100644 --- a/contrib/pg_stat_statements/expected/pg_stat_statements.out +++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out @@ -554,6 +554,7 @@ DROP TABLE pgss_a, pgss_b CASCADE; -- utility commands -- SET pg_stat_statements.track_utility = TRUE; +SET utility_query_id = 'string'; SELECT pg_stat_statements_reset(); pg_stat_statements_reset -------------------------- @@ -592,6 +593,36 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 | 0 (9 rows) +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SET utility_query_id = 'jumble'; +-- These queries have a different string, but the same parsing +-- representation. +Begin; +Create Table test_utility_query (a int); +Drop Table test_utility_query; +Commit; +BEGIN; +CREATE TABLE test_utility_query (a int); +DROP TABLE test_utility_query; +COMMIT; +SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls | rows +------------------------------------------------------------------------------+-------+------ + Begin | 2 | 0 + Commit | 2 | 0 + Create Table test_utility_query (a int) | 2 | 0 + Drop Table test_utility_query | 2 | 0 + SELECT pg_stat_statements_reset() | 1 | 1 + SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 | 0 + SET utility_query_id = 'jumble' | 1 | 0 +(7 rows) + +RESET utility_query_id; -- -- Track the total number of rows retrieved or affected by the utility -- commands of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED VIEW, diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql index 8f5c866225..81d663f81c 100644 --- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql +++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql @@ -258,6 +258,7 @@ DROP TABLE pgss_a, pgss_b CASCADE; -- utility commands -- SET pg_stat_statements.track_utility = TRUE; +SET utility_query_id = 'string'; SELECT pg_stat_statements_reset(); SELECT 1; @@ -272,6 +273,22 @@ DROP FUNCTION PLUS_TWO(INTEGER); SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; +SELECT pg_stat_statements_reset(); +SET utility_query_id = 'jumble'; +-- These queries have a different string, but the same parsing +-- representation. +Begin; +Create Table test_utility_query (a int); +Drop Table test_utility_query; +Commit; +BEGIN; +CREATE TABLE test_utility_query (a int); +DROP TABLE test_utility_query; +COMMIT; + +SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; +RESET utility_query_id; + -- -- Track the total number of rows retrieved or affected by the utility -- commands of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED VIEW, -- 2.39.0