From 6fe42d23f1ba3d648184852b32caf7db71020b71 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 24 Jan 2023 15:52:37 +0900 Subject: [PATCH v6 4/4] Add GUC utility_query_id This GUC has two modes to control the computation method of query IDs for utilities: - 'string', the default, to hash the string query. - 'jumble', to use the parsed tree. --- src/include/nodes/queryjumble.h | 7 ++ src/backend/nodes/queryjumblefuncs.c | 81 ++++++++++++++----- src/backend/utils/misc/guc_tables.c | 16 ++++ src/backend/utils/misc/postgresql.conf.sample | 1 + doc/src/sgml/config.sgml | 31 +++++++ .../expected/pg_stat_statements.out | 31 +++++++ .../sql/pg_stat_statements.sql | 17 ++++ 7 files changed, 164 insertions(+), 20 deletions(-) diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h index 204b8f74fd..261aea6bcf 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/queryjumble.h @@ -59,8 +59,15 @@ enum ComputeQueryIdType COMPUTE_QUERY_ID_REGRESS }; +enum UtilityQueryIdType +{ + UTILITY_QUERY_ID_STRING, + UTILITY_QUERY_ID_JUMBLE +}; + /* GUC parameters */ extern PGDLLIMPORT int compute_query_id; +extern PGDLLIMPORT int utility_query_id; extern const char *CleanQuerytext(const char *query, int *location, int *len); diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index 16fdf7164a..b8738fba08 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -41,12 +41,15 @@ /* GUC parameters */ int compute_query_id = COMPUTE_QUERY_ID_AUTO; +int utility_query_id = UTILITY_QUERY_ID_STRING; /* True when compute_query_id is ON, or AUTO and a module requests them */ bool query_id_enabled = false; static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size); +static uint64 compute_utility_query_id(const char *query_text, + int query_location, int query_len); static void RecordConstLocation(JumbleState *jstate, int location); static void _jumbleNode(JumbleState *jstate, Node *node); static void _jumbleList(JumbleState *jstate, Node *node); @@ -102,29 +105,39 @@ JumbleQuery(Query *query, const char *querytext) Assert(IsQueryIdEnabled()); - jstate = (JumbleState *) palloc(sizeof(JumbleState)); + if (query->utilityStmt && + compute_query_id == UTILITY_QUERY_ID_STRING) + { + query->queryId = compute_utility_query_id(querytext, + query->stmt_location, + query->stmt_len); + } + else + { + jstate = (JumbleState *) palloc(sizeof(JumbleState)); - /* Set up workspace for query jumbling */ - jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE); - jstate->jumble_len = 0; - jstate->clocations_buf_size = 32; - jstate->clocations = (LocationLen *) - palloc(jstate->clocations_buf_size * sizeof(LocationLen)); - jstate->clocations_count = 0; - jstate->highest_extern_param_id = 0; + /* Set up workspace for query jumbling */ + jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE); + jstate->jumble_len = 0; + jstate->clocations_buf_size = 32; + jstate->clocations = (LocationLen *) + palloc(jstate->clocations_buf_size * sizeof(LocationLen)); + jstate->clocations_count = 0; + jstate->highest_extern_param_id = 0; - /* Compute query ID and mark the Query node with it */ - _jumbleNode(jstate, (Node *) query); - query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble, - jstate->jumble_len, - 0)); + /* Compute query ID and mark the Query node with it */ + _jumbleNode(jstate, (Node *) query); + query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble, + jstate->jumble_len, + 0)); - /* - * If we are unlucky enough to get a hash of zero, use 1 instead, to - * prevent confusion with the utility-statement case. - */ - if (query->queryId == UINT64CONST(0)) - query->queryId = UINT64CONST(1); + /* + * If we are unlucky enough to get a hash of zero, use 1 instead, to + * prevent confusion with the utility-statement case. + */ + if (query->queryId == UINT64CONST(0)) + query->queryId = UINT64CONST(1); + } return jstate; } @@ -142,6 +155,34 @@ EnableQueryId(void) query_id_enabled = true; } +/* + * Compute a query identifier for the given utility query string. + */ +static uint64 +compute_utility_query_id(const char *query_text, int query_location, int query_len) +{ + uint64 queryId; + const char *sql; + + /* + * Confine our attention to the relevant part of the string, if the query + * is a portion of a multi-statement source string. + */ + sql = CleanQuerytext(query_text, &query_location, &query_len); + + queryId = DatumGetUInt64(hash_any_extended((const unsigned char *) sql, + query_len, 0)); + + /* + * If we are unlucky enough to get a hash of zero(invalid), use queryID as + * 2 instead, queryID 1 is already in use for normal statements. + */ + if (queryId == UINT64CONST(0)) + queryId = UINT64CONST(2); + + return queryId; +} + /* * AppendJumble: Append a value that is substantive in a given query to * the current jumble. diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 4ac808ed22..97619c4e1d 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -294,6 +294,12 @@ static const struct config_enum_entry compute_query_id_options[] = { {NULL, 0, false} }; +static const struct config_enum_entry utility_query_id_options[] = { + {"string", UTILITY_QUERY_ID_STRING, false}, + {"jumble", UTILITY_QUERY_ID_JUMBLE, false}, + {NULL, 0, false} +}; + /* * Although only "on", "off", and "partition" are documented, we * accept all the likely variants of "on" and "off". @@ -4574,6 +4580,16 @@ struct config_enum ConfigureNamesEnum[] = NULL, NULL, NULL }, + { + {"utility_query_id", PGC_SUSET, STATS_MONITORING, + gettext_noop("Controls method computing query ID for utilities."), + NULL + }, + &utility_query_id, + UTILITY_QUERY_ID_STRING, utility_query_id_options, + NULL, NULL, NULL + }, + { {"constraint_exclusion", PGC_USERSET, QUERY_TUNING_OTHER, gettext_noop("Enables the planner to use constraints to optimize queries."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index d06074b86f..bbf95af59d 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -623,6 +623,7 @@ # - Monitoring - #compute_query_id = auto +#utility_query_id = string # string, jumble #log_statement_stats = off #log_parser_stats = off #log_planner_stats = off diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index f985afc009..4ccd148471 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8241,6 +8241,37 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + utility_query_id (enum) + + utility_query_id configuration parameter + + + + + Controls the method used to compute the query identifier of a utility + query. Valid values are string to use a hash of the + query string and jumble to compute the query + identifier depending on the parsed tree of the utility query. + The default is string. + + + jumble is more costly than string + as the computation of the query identifier walks through the + post-parse-analysis representation of the queries for utility queries. + However, jumble is able to apply normalization + to the queries computed, meaning that queries written differently + but having the same query representation may be able to use the same + identifier. + For example, BEGIN; and begin; + will have the same query identifier under jumble as + both queries have the same query representation. The query identifier + would be different under string, because the query + strings are different. + + + + log_statement_stats (boolean) diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out index 9ac5c87c3a..8bdf8beec3 100644 --- a/contrib/pg_stat_statements/expected/pg_stat_statements.out +++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out @@ -554,6 +554,7 @@ DROP TABLE pgss_a, pgss_b CASCADE; -- utility commands -- SET pg_stat_statements.track_utility = TRUE; +SET utility_query_id = 'string'; SELECT pg_stat_statements_reset(); pg_stat_statements_reset -------------------------- @@ -592,6 +593,36 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 | 0 (9 rows) +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SET utility_query_id = 'jumble'; +-- These queries have a different string, but the same parsing +-- representation. +Begin; +Create Table test_utility_query (a int); +Drop Table test_utility_query; +Commit; +BEGIN; +CREATE TABLE test_utility_query (a int); +DROP TABLE test_utility_query; +COMMIT; +SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls | rows +------------------------------------------------------------------------------+-------+------ + Begin | 2 | 0 + Commit | 2 | 0 + Create Table test_utility_query (a int) | 2 | 0 + Drop Table test_utility_query | 2 | 0 + SELECT pg_stat_statements_reset() | 1 | 1 + SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 | 0 + SET utility_query_id = 'jumble' | 1 | 0 +(7 rows) + +RESET utility_query_id; -- -- Track the total number of rows retrieved or affected by the utility -- commands of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED VIEW, diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql index 8f5c866225..81d663f81c 100644 --- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql +++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql @@ -258,6 +258,7 @@ DROP TABLE pgss_a, pgss_b CASCADE; -- utility commands -- SET pg_stat_statements.track_utility = TRUE; +SET utility_query_id = 'string'; SELECT pg_stat_statements_reset(); SELECT 1; @@ -272,6 +273,22 @@ DROP FUNCTION PLUS_TWO(INTEGER); SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; +SELECT pg_stat_statements_reset(); +SET utility_query_id = 'jumble'; +-- These queries have a different string, but the same parsing +-- representation. +Begin; +Create Table test_utility_query (a int); +Drop Table test_utility_query; +Commit; +BEGIN; +CREATE TABLE test_utility_query (a int); +DROP TABLE test_utility_query; +COMMIT; + +SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; +RESET utility_query_id; + -- -- Track the total number of rows retrieved or affected by the utility -- commands of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED VIEW, -- 2.39.0