From ea023f4cd892d65e36ac24450fa57003047bd592 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 21 Mar 2025 14:09:39 +0900 Subject: [PATCH 1/2] Add more entropy to query jumbling NULL nodes have now some arbitrary data added to the jumbling, with a rule enforcing that all nodes need to participate in the query jumble. This last part counts for custom jumble functions. --- src/include/nodes/queryjumble.h | 15 +++- src/backend/nodes/queryjumblefuncs.c | 27 ++++++ .../pg_stat_statements/expected/select.out | 87 ++++++++++++++++++- contrib/pg_stat_statements/sql/select.sql | 20 +++++ 4 files changed, 147 insertions(+), 2 deletions(-) diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h index 905f66bc0bd4..e13fd49942ce 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/queryjumble.h @@ -40,9 +40,22 @@ typedef struct JumbleState /* Jumble of current query tree */ unsigned char *jumble; - /* Number of bytes used in jumble[] */ + /* Number of bytes used in jumble[], capped at JUMBLE_SIZE */ Size jumble_len; + /* + * Total number of number bytes used in a query jumble, used for + * sanity checks and debugging. + */ + Size jumble_total_len; + + /* + * Extra counter provided for the case of NULL entries. This counter is + * incremented each time a NULL node or string is found, providing some + * data then appended to a query jumble. + */ + int null_count; + /* Array of locations of constants that should be removed */ LocationLen *clocations; diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index 189bfda610aa..cc3c73117f6c 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -129,6 +129,8 @@ JumbleQuery(Query *query) /* Set up workspace for query jumbling */ jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE); jstate->jumble_len = 0; + jstate->jumble_total_len = 0; + jstate->null_count = 0; jstate->clocations_buf_size = 32; jstate->clocations = (LocationLen *) palloc(jstate->clocations_buf_size * sizeof(LocationLen)); @@ -179,6 +181,8 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size) unsigned char *jumble = jstate->jumble; Size jumble_len = jstate->jumble_len; + Assert(size > 0); + /* * Whenever the jumble buffer is full, we hash the current contents and * reset the buffer to contain just that hash value, thus relying on the @@ -202,6 +206,7 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size) jumble_len += part_size; item += part_size; size -= part_size; + jstate->jumble_total_len += part_size; } jstate->jumble_len = jumble_len; } @@ -328,10 +333,17 @@ IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item)) #define JUMBLE_FIELD_SINGLE(item) \ AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item)) + +/* Append string to the jumble, including NULL values */ #define JUMBLE_STRING(str) \ do { \ if (expr->str) \ AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \ + else \ + { \ + jstate->null_count++; \ + JUMBLE_FIELD_SINGLE(jstate->null_count); \ + } \ } while(0) #include "queryjumblefuncs.funcs.c" @@ -379,9 +391,18 @@ static void _jumbleNode(JumbleState *jstate, Node *node) { Node *expr = node; + Size initial_total_len = jstate->jumble_total_len; if (expr == NULL) + { + /* + * Increment the NULL counter, and add it to the jumble to force + * more entropy to the computation. + */ + jstate->null_count++; + JUMBLE_FIELD_SINGLE(jstate->null_count); return; + } /* Guard against stack overflow due to overly complex expressions */ check_stack_depth(); @@ -429,6 +450,12 @@ _jumbleNode(JumbleState *jstate, Node *node) default: break; } + + /* + * All nodes must participate in the computation, even nodes with + * custom functions. + */ + Assert(initial_total_len < jstate->jumble_total_len); } static void diff --git a/contrib/pg_stat_statements/expected/select.out b/contrib/pg_stat_statements/expected/select.out index 37a30af034a6..1587d2cafb3a 100644 --- a/contrib/pg_stat_statements/expected/select.out +++ b/contrib/pg_stat_statements/expected/select.out @@ -19,6 +19,86 @@ SELECT 1 AS "int"; 1 (1 row) +-- LIMIT and OFFSET patterns +-- These require more entropy with parsing node offsets. +SELECT 1 AS "int" LIMIT 1; + int +----- + 1 +(1 row) + +SELECT 1 AS "int" LIMIT 2; + int +----- + 1 +(1 row) + +SELECT 1 AS "int" OFFSET 1; + int +----- +(0 rows) + +SELECT 1 AS "int" OFFSET 2; + int +----- +(0 rows) + +SELECT 1 AS "int" OFFSET 1 LIMIT 1; + int +----- +(0 rows) + +SELECT 1 AS "int" OFFSET 2 LIMIT 2; + int +----- +(0 rows) + +SELECT 1 AS "int" LIMIT 1 OFFSET 1; + int +----- +(0 rows) + +SELECT 1 AS "int" LIMIT 3 OFFSET 3; + int +----- +(0 rows) + +SELECT 1 AS "int" OFFSET 1 FETCH FIRST 2 ROW ONLY; + int +----- +(0 rows) + +SELECT 1 AS "int" OFFSET 2 FETCH FIRST 3 ROW ONLY; + int +----- +(0 rows) + +-- DISTINCT and ORDER BY patterns +-- These require more entropy with parsing node offsets. +SELECT DISTINCT 1 AS "int"; + int +----- + 1 +(1 row) + +SELECT DISTINCT 2 AS "int"; + int +----- + 2 +(1 row) + +SELECT 1 AS "int" ORDER BY 1; + int +----- + 1 +(1 row) + +SELECT 2 AS "int" ORDER BY 1; + int +----- + 2 +(1 row) + /* this comment should not appear in the output */ SELECT 'hello' -- but this one will appear @@ -135,9 +215,14 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"; 3 | 3 | SELECT $1 + $2 + $3 AS "add" 1 | 1 | SELECT $1 AS "float" 2 | 2 | SELECT $1 AS "int" + 2 | 2 | SELECT $1 AS "int" LIMIT $2 + 2 | 0 | SELECT $1 AS "int" OFFSET $2 + 6 | 0 | SELECT $1 AS "int" OFFSET $2 LIMIT $3 + 2 | 2 | SELECT $1 AS "int" ORDER BY 1 1 | 2 | SELECT $1 AS i UNION SELECT $2 ORDER BY i 1 | 1 | SELECT $1 || $2 1 | 1 | SELECT $1, $2 LIMIT $3 + 2 | 2 | SELECT DISTINCT $1 AS "int" 0 | 0 | SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C" 1 | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t 1 | 2 | WITH t(f) AS ( + @@ -145,7 +230,7 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"; | | ) + | | SELECT f FROM t ORDER BY f 1 | 1 | select $1::jsonb ? $2 -(12 rows) +(17 rows) SELECT pg_stat_statements_reset() IS NOT NULL AS t; t diff --git a/contrib/pg_stat_statements/sql/select.sql b/contrib/pg_stat_statements/sql/select.sql index e0be58d5e24b..4dcfa8ef74dc 100644 --- a/contrib/pg_stat_statements/sql/select.sql +++ b/contrib/pg_stat_statements/sql/select.sql @@ -12,6 +12,26 @@ SELECT pg_stat_statements_reset() IS NOT NULL AS t; -- SELECT 1 AS "int"; +-- LIMIT and OFFSET patterns +-- These require more entropy with parsing node offsets. +SELECT 1 AS "int" LIMIT 1; +SELECT 1 AS "int" LIMIT 2; +SELECT 1 AS "int" OFFSET 1; +SELECT 1 AS "int" OFFSET 2; +SELECT 1 AS "int" OFFSET 1 LIMIT 1; +SELECT 1 AS "int" OFFSET 2 LIMIT 2; +SELECT 1 AS "int" LIMIT 1 OFFSET 1; +SELECT 1 AS "int" LIMIT 3 OFFSET 3; +SELECT 1 AS "int" OFFSET 1 FETCH FIRST 2 ROW ONLY; +SELECT 1 AS "int" OFFSET 2 FETCH FIRST 3 ROW ONLY; + +-- DISTINCT and ORDER BY patterns +-- These require more entropy with parsing node offsets. +SELECT DISTINCT 1 AS "int"; +SELECT DISTINCT 2 AS "int"; +SELECT 1 AS "int" ORDER BY 1; +SELECT 2 AS "int" ORDER BY 1; + /* this comment should not appear in the output */ SELECT 'hello' -- but this one will appear -- 2.49.0