From 371865589fe3e456114caff5e1109e273d18c25f Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Veldanda Date: Mon, 14 Apr 2025 21:50:07 +0000 Subject: [PATCH v11 4/7] Dependency tracking mechanism to track compressed datum leaks to unrelated tables --- src/backend/catalog/dependency.c | 105 +++++++++++++++++++++ src/backend/catalog/pg_zstd_dictionaries.c | 61 ++++++++++++ src/backend/commands/createas.c | 16 +--- src/backend/commands/prepare.c | 10 +- src/backend/executor/nodeModifyTable.c | 5 +- src/include/catalog/dependency.h | 2 + src/include/catalog/pg_proc.dat | 5 + src/include/commands/createas.h | 12 +++ 8 files changed, 202 insertions(+), 14 deletions(-) diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 0ea61ed1dae..c9a647d62ca 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -2838,3 +2838,108 @@ DeleteInitPrivs(const ObjectAddress *object) table_close(relation, RowExclusiveLock); } + +/* + * inheritZstdDictionaryDependencies - Record dictionary dependencies for a destination table. + * + * This function receives a list of relation OIDs. For each relation OID, it scans + * pg_depend to collect all associated dictids and then creates + * equivalent dependency entries for the destination table. + */ +void +inheritZstdDictionaryDependencies(List *relationOids, Oid destRelid) +{ + List *relids; + List *src_dictids = NIL; + List *dest_dictids = NIL; + List *new_dictids; + ListCell *lc; + Relation depRel; + ScanKeyData skey[2]; + SysScanDesc scan; + HeapTuple tup; + + /* Build and deduplicate the list of all relation OIDs including destRelid */ + relids = list_copy(relationOids); + relids = lappend_oid(relids, destRelid); + list_sort(relids, list_oid_cmp); + list_deduplicate_oid(relids); + + /** Early exit if only destination relation is present + * During pg upgrade, dictionaries in the database are copied explicitly and their dependencies too. + */ + if (list_length(relids) == 1 || IsBinaryUpgrade) + { + list_free(relids); + return; + } + + depRel = table_open(DependRelationId, AccessShareLock); + + /* Collect source and destination dictionaries from dependency table */ + foreach(lc, relids) + { + Oid relOid = lfirst_oid(lc); + + /* Initialize scan keys for current relation */ + ScanKeyInit(&skey[0], Anum_pg_depend_classid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&skey[1], Anum_pg_depend_objid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relOid)); + + scan = systable_beginscan(depRel, DependDependerIndexId, true, + NULL, 2, skey); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend dep = (Form_pg_depend) GETSTRUCT(tup); + + /* Interested only in Zstd dictionary dependencies */ + if (dep->refclassid != ZstdDictionariesRelationId) + continue; + + if (dep->objid == destRelid) + dest_dictids = list_append_unique_oid(dest_dictids, dep->refobjid); + else + src_dictids = list_append_unique_oid(src_dictids, dep->refobjid); + } + + systable_endscan(scan); + } + + table_close(depRel, AccessShareLock); + + /* + * Identify dictionaries from sources not already referenced by + * destination + */ + new_dictids = list_difference_oid(src_dictids, dest_dictids); + + /* Add new dictionary dependencies to the destination table if necessary */ + if (new_dictids) + { + ObjectAddress depender; + ObjectAddresses *referenced = new_object_addresses(); + + ObjectAddressSet(depender, RelationRelationId, destRelid); + + foreach(lc, new_dictids) + { + ObjectAddress dictObj; + + ObjectAddressSet(dictObj, ZstdDictionariesRelationId, lfirst_oid(lc)); + add_exact_object_address(&dictObj, referenced); + } + + record_object_address_dependencies(&depender, referenced, DEPENDENCY_NORMAL); + free_object_addresses(referenced); + } + + /* Clean up temporary lists */ + list_free(relids); + list_free(src_dictids); + list_free(dest_dictids); + list_free(new_dictids); +} diff --git a/src/backend/catalog/pg_zstd_dictionaries.c b/src/backend/catalog/pg_zstd_dictionaries.c index 58964a600a3..5ae8ed71e48 100644 --- a/src/backend/catalog/pg_zstd_dictionaries.c +++ b/src/backend/catalog/pg_zstd_dictionaries.c @@ -507,6 +507,67 @@ build_zstd_dict_for_attribute(PG_FUNCTION_ARGS) #endif } +Datum +cleanup_unused_zstd_dictionaries(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT32(cleanup_unused_zstd_dictionaries_internal()); +} + +static int +cleanup_unused_zstd_dictionaries_internal(void) +{ + Relation dictRel, + depRel; + SysScanDesc dictScan, + depScan; + HeapTuple tuple; + List *used_dictids = NIL; + int dropped_count = 0; + ScanKeyData depKey; + + /* Open necessary catalog relations */ + dictRel = table_open(ZstdDictionariesRelationId, ShareRowExclusiveLock); + depRel = table_open(DependRelationId, AccessShareLock); + + /* Find dictionary OIDs with dependencies */ + ScanKeyInit(&depKey, + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(ZstdDictionariesRelationId)); + + depScan = systable_beginscan(depRel, DependReferenceIndexId, true, NULL, 1, &depKey); + while ((tuple = systable_getnext(depScan)) != NULL) + { + Form_pg_depend dep = (Form_pg_depend) GETSTRUCT(tuple); + + used_dictids = list_append_unique_oid(used_dictids, dep->refobjid); + } + systable_endscan(depScan); + + /* Drop unused dictionaries */ + dictScan = systable_beginscan(dictRel, InvalidOid, false, NULL, 0, NULL); + while ((tuple = systable_getnext(dictScan)) != NULL) + { + Oid dictid = ((Form_pg_zstd_dictionaries) GETSTRUCT(tuple))->dictid; + + if (!list_member_oid(used_dictids, dictid)) + { + ObjectAddress dictAddr; + + ObjectAddressSet(dictAddr, ZstdDictionariesRelationId, dictid); + performDeletion(&dictAddr, DROP_RESTRICT, 0); + dropped_count++; + } + } + systable_endscan(dictScan); + + /* Close catalog relations */ + table_close(depRel, NoLock); + table_close(dictRel, NoLock); + + return dropped_count; +} + /* * get_zstd_dict - Fetches the ZSTD dictionary from the catalog * diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 0a4155773eb..2361ad77db4 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -47,18 +47,7 @@ #include "utils/lsyscache.h" #include "utils/rls.h" #include "utils/snapmgr.h" - -typedef struct -{ - DestReceiver pub; /* publicly-known function pointers */ - IntoClause *into; /* target relation specification */ - /* These fields are filled by intorel_startup: */ - Relation rel; /* relation to write to */ - ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */ - CommandId output_cid; /* cmin to insert in output tuples */ - int ti_options; /* table_tuple_insert performance options */ - BulkInsertState bistate; /* bulk insert state */ -} DR_intorel; +#include "catalog/dependency.h" /* utility functions for CTAS definition creation */ static ObjectAddress create_ctas_internal(List *attrList, IntoClause *into); @@ -352,6 +341,9 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, /* get object address that intorel_startup saved for us */ address = ((DR_intorel *) dest)->reladdr; + /* Inherit zstd dictionary dependencies */ + inheritZstdDictionaryDependencies(plan->relationOids, address.objectId); + /* and clean up */ ExecutorFinish(queryDesc); ExecutorEnd(queryDesc); diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c index bf7d2b2309f..a867f4d7711 100644 --- a/src/backend/commands/prepare.c +++ b/src/backend/commands/prepare.c @@ -36,7 +36,7 @@ #include "utils/builtins.h" #include "utils/snapmgr.h" #include "utils/timestamp.h" - +#include "catalog/dependency.h" /* * The hash table in which prepared queries are stored. This is @@ -161,6 +161,7 @@ ExecuteQuery(ParseState *pstate, char *query_string; int eflags; long count; + List *relationOids = NIL; /* Look it up in the hash table */ entry = FetchPreparedStatement(stmt->name, true); @@ -242,7 +243,10 @@ ExecuteQuery(ParseState *pstate, if (intoClause->skipData) count = 0; else + { count = FETCH_ALL; + relationOids = pstmt->relationOids; + } } else { @@ -258,6 +262,10 @@ ExecuteQuery(ParseState *pstate, (void) PortalRun(portal, count, false, dest, dest, qc); + /* Inherit zstd dictionary dependencies */ + if (intoClause && count != 0) + inheritZstdDictionaryDependencies(relationOids, ((DR_intorel *) dest)->reladdr.objectId); + PortalDrop(portal, false); if (estate) diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 333cbf78343..e6847fd31c7 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -69,7 +69,7 @@ #include "utils/datum.h" #include "utils/rel.h" #include "utils/snapmgr.h" - +#include "catalog/dependency.h" typedef struct MTTargetRelLookup { @@ -4416,6 +4416,9 @@ ExecModifyTable(PlanState *pstate) if (estate->es_insert_pending_result_relations != NIL) ExecPendingInserts(estate); + /* Inherit zstd dictionary dependencies */ + inheritZstdDictionaryDependencies(estate->es_plannedstmt->relationOids, RelationGetRelid(resultRelInfo->ri_RelationDesc)); + /* * We're done, but fire AFTER STATEMENT triggers before exiting. */ diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h index 0ea7ccf5243..a1e91f2c8f1 100644 --- a/src/include/catalog/dependency.h +++ b/src/include/catalog/dependency.h @@ -225,4 +225,6 @@ extern void shdepDropOwned(List *roleids, DropBehavior behavior); extern void shdepReassignOwned(List *roleids, Oid newrole); +extern void inheritZstdDictionaryDependencies(List *relationOids, Oid destRelid); + #endif /* DEPENDENCY_H */ diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 7d2286850dc..c98e9dca653 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12592,6 +12592,11 @@ proargtypes => 'text int4', proparallel => 'u', prosrc => 'build_zstd_dict_for_attribute' }, +{ oid => '9246', descr => 'cleanup unused dictionaries.', + proname => 'cleanup_unused_zstd_dictionaries', provolatile => 'v', prorettype => 'int4', + proargtypes => '', proparallel => 'u', + prosrc => 'cleanup_unused_zstd_dictionaries' }, + { oid => '9247', descr => 'ZSTD standard sampling for jsonb', proname => 'std_zstd_sampling_for_jsonb', provolatile => 'v', prorettype => 'bool', proargtypes => 'internal internal', diff --git a/src/include/commands/createas.h b/src/include/commands/createas.h index 90612ebbb0e..2ee78652f28 100644 --- a/src/include/commands/createas.h +++ b/src/include/commands/createas.h @@ -19,7 +19,19 @@ #include "parser/parse_node.h" #include "tcop/dest.h" #include "utils/queryenvironment.h" +#include "access/heapam.h" +typedef struct +{ + DestReceiver pub; /* publicly-known function pointers */ + IntoClause *into; /* target relation specification */ + /* These fields are filled by intorel_startup: */ + Relation rel; /* relation to write to */ + ObjectAddress reladdr; /* address of rel, for ExecCreateTableAs */ + CommandId output_cid; /* cmin to insert in output tuples */ + int ti_options; /* table_tuple_insert performance options */ + BulkInsertState bistate; /* bulk insert state */ +} DR_intorel; extern ObjectAddress ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, ParamListInfo params, QueryEnvironment *queryEnv, -- 2.47.1