From d1772e45e937b9b7019a29b5a76583cdf52066de Mon Sep 17 00:00:00 2001 From: "dgrowley@gmail.com" Date: Sun, 2 Dec 2018 12:25:56 +1300 Subject: [PATCH v3] Allow lock acquisitions for partitions to be delayed Normally during query execution, AcquireExecutorLocks will obtain locks on all RangeTblEntry objects. When many RangeTblEntrys are table partitions then a large number of locks may be required. This can slow down execution of such plans. Now that we have run-time partition pruning we may end up not scanning some of these partitions and if so we have no need to obtain a lock on them. Here we modify things so that locks are only obtained on partitions the first time they are accessed in the executor, instead of at the start of execution. --- src/backend/catalog/dependency.c | 1 + src/backend/commands/createas.c | 1 + src/backend/executor/execUtils.c | 20 +++++++++++--------- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/equalfuncs.c | 1 + src/backend/nodes/outfuncs.c | 1 + src/backend/nodes/readfuncs.c | 1 + src/backend/optimizer/plan/planner.c | 2 ++ src/backend/optimizer/util/inherit.c | 11 +++++++++++ src/backend/parser/parse_relation.c | 2 ++ src/backend/replication/logical/worker.c | 1 + src/backend/rewrite/rewriteHandler.c | 1 + src/backend/utils/adt/ri_triggers.c | 2 ++ src/backend/utils/adt/ruleutils.c | 3 +++ src/backend/utils/cache/plancache.c | 15 +++++++++++---- src/include/nodes/parsenodes.h | 2 ++ 16 files changed, 52 insertions(+), 13 deletions(-) diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 2c54895831..e1e7e98075 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -1478,6 +1478,7 @@ recordDependencyOnSingleRelExpr(const ObjectAddress *depender, rte.rtekind = RTE_RELATION; rte.relid = relId; rte.relkind = RELKIND_RELATION; /* no need for exactness here */ + rte.delaylock = false; rte.rellockmode = AccessShareLock; context.rtables = list_make1(list_make1(&rte)); diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 2bc8f928ea..41a4a2d063 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -516,6 +516,7 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) rte->rtekind = RTE_RELATION; rte->relid = intoRelationAddr.objectId; rte->relkind = relkind; + rte->delaylock = false; rte->rellockmode = RowExclusiveLock; rte->requiredPerms = ACL_INSERT; diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 312a0dc805..250461dc11 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -771,14 +771,15 @@ ExecGetRangeTableRelation(EState *estate, Index rti) Assert(rte->rtekind == RTE_RELATION); - if (!IsParallelWorker()) + if (!rte->delaylock && !IsParallelWorker()) { /* - * In a normal query, we should already have the appropriate lock, - * but verify that through an Assert. Since there's already an - * Assert inside table_open that insists on holding some lock, it - * seems sufficient to check this only when rellockmode is higher - * than the minimum. + * In a normal query, unless the planner set the delaylock flag, + * we should already have the appropriate lock, but verify that + * through an Assert. Since there's already an Assert inside + * heap_open that insists on holding some lock, it seems + * sufficient to check this only when rellockmode is higher than + * the minimum. */ rel = table_open(rte->relid, NoLock); Assert(rte->rellockmode == AccessShareLock || @@ -787,9 +788,10 @@ ExecGetRangeTableRelation(EState *estate, Index rti) else { /* - * If we are a parallel worker, we need to obtain our own local - * lock on the relation. This ensures sane behavior in case the - * parent process exits before we do. + * If we are a parallel worker or delaylock is set, we need to + * obtain a lock on the relation. For parallel workers, this + * ensures sane behavior in case the parent process exits before + * we do. */ rel = table_open(rte->relid, rte->rellockmode); } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 3eb7e95d64..3b553a5856 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2353,6 +2353,7 @@ _copyRangeTblEntry(const RangeTblEntry *from) COPY_SCALAR_FIELD(rtekind); COPY_SCALAR_FIELD(relid); COPY_SCALAR_FIELD(relkind); + COPY_SCALAR_FIELD(delaylock); COPY_SCALAR_FIELD(rellockmode); COPY_NODE_FIELD(tablesample); COPY_NODE_FIELD(subquery); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 5c4fa7d077..fea26bbfdc 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2630,6 +2630,7 @@ _equalRangeTblEntry(const RangeTblEntry *a, const RangeTblEntry *b) COMPARE_SCALAR_FIELD(rtekind); COMPARE_SCALAR_FIELD(relid); COMPARE_SCALAR_FIELD(relkind); + COMPARE_SCALAR_FIELD(delaylock); COMPARE_SCALAR_FIELD(rellockmode); COMPARE_NODE_FIELD(tablesample); COMPARE_NODE_FIELD(subquery); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 0fde876c77..2180c59547 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -3020,6 +3020,7 @@ _outRangeTblEntry(StringInfo str, const RangeTblEntry *node) case RTE_RELATION: WRITE_OID_FIELD(relid); WRITE_CHAR_FIELD(relkind); + WRITE_BOOL_FIELD(delaylock); WRITE_INT_FIELD(rellockmode); WRITE_NODE_FIELD(tablesample); break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index ec6f2569ab..ec4aafdf0b 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1362,6 +1362,7 @@ _readRangeTblEntry(void) case RTE_RELATION: READ_OID_FIELD(relid); READ_CHAR_FIELD(relkind); + READ_BOOL_FIELD(delaylock); READ_INT_FIELD(rellockmode); READ_NODE_FIELD(tablesample); break; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 4465f002c8..baa805334e 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -6057,6 +6057,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) rte->rtekind = RTE_RELATION; rte->relid = tableOid; rte->relkind = RELKIND_RELATION; /* Don't be too picky. */ + rte->delaylock = false; rte->rellockmode = AccessShareLock; rte->lateral = false; rte->inh = false; @@ -6180,6 +6181,7 @@ plan_create_index_workers(Oid tableOid, Oid indexOid) rte->rtekind = RTE_RELATION; rte->relid = tableOid; rte->relkind = RELKIND_RELATION; /* Don't be too picky. */ + rte->delaylock = false; rte->rellockmode = AccessShareLock; rte->lateral = false; rte->inh = true; diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c index eaf788e578..d3f32bd4c6 100644 --- a/src/backend/optimizer/util/inherit.c +++ b/src/backend/optimizer/util/inherit.c @@ -365,6 +365,17 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, *childrte_p = childrte; childrte->relid = childOID; childrte->relkind = childrel->rd_rel->relkind; + + /* + * For leaf partitions, we've no need to obtain the lock on the relation + * during query execution until the partition is first required. This can + * drastically reduce the number of partitions we must lock when many + * partitions are run-time pruned. + */ + childrte->delaylock = (childOID != parentOID && + parentrte->relkind == RELKIND_PARTITIONED_TABLE && + childrte->relkind != RELKIND_PARTITIONED_TABLE); + /* A partitioned child will need to be expanded further. */ if (childOID != parentOID && childrte->relkind == RELKIND_PARTITIONED_TABLE) diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index 09fbb588af..e1fa771da1 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -1226,6 +1226,7 @@ addRangeTableEntry(ParseState *pstate, rel = parserOpenTable(pstate, relation, lockmode); rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; + rte->delaylock = false; rte->rellockmode = lockmode; /* @@ -1304,6 +1305,7 @@ addRangeTableEntryForRelation(ParseState *pstate, rte->alias = alias; rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; + rte->delaylock = false; rte->rellockmode = lockmode; /* diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 2c49c711e3..d86945e1b1 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -199,6 +199,7 @@ create_estate_for_relation(LogicalRepRelMapEntry *rel) rte->rtekind = RTE_RELATION; rte->relid = RelationGetRelid(rel->localrel); rte->relkind = rel->localrel->rd_rel->relkind; + rte->delaylock = false; rte->rellockmode = AccessShareLock; ExecInitRangeTable(estate, list_make1(rte)); diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 1eca69873b..658513d69c 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -1597,6 +1597,7 @@ ApplyRetrieveRule(Query *parsetree, /* Clear fields that should not be set in a subquery RTE */ rte->relid = InvalidOid; rte->relkind = 0; + rte->delaylock = false; rte->rellockmode = 0; rte->tablesample = NULL; rte->inh = false; /* must not be set for a subquery */ diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index e1aa3d0044..f3301bd61f 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -1731,6 +1731,7 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel) pkrte->rtekind = RTE_RELATION; pkrte->relid = RelationGetRelid(pk_rel); pkrte->relkind = pk_rel->rd_rel->relkind; + pkrte->delaylock = false; pkrte->rellockmode = AccessShareLock; pkrte->requiredPerms = ACL_SELECT; @@ -1738,6 +1739,7 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel) fkrte->rtekind = RTE_RELATION; fkrte->relid = RelationGetRelid(fk_rel); fkrte->relkind = fk_rel->rd_rel->relkind; + fkrte->delaylock = false; fkrte->rellockmode = AccessShareLock; fkrte->requiredPerms = ACL_SELECT; diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 302df16b4a..2c85f7e286 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -1003,6 +1003,7 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty) oldrte->rtekind = RTE_RELATION; oldrte->relid = trigrec->tgrelid; oldrte->relkind = relkind; + oldrte->delaylock = false; oldrte->rellockmode = AccessShareLock; oldrte->alias = makeAlias("old", NIL); oldrte->eref = oldrte->alias; @@ -1014,6 +1015,7 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty) newrte->rtekind = RTE_RELATION; newrte->relid = trigrec->tgrelid; newrte->relkind = relkind; + newrte->delaylock = false; newrte->rellockmode = AccessShareLock; newrte->alias = makeAlias("new", NIL); newrte->eref = newrte->alias; @@ -3210,6 +3212,7 @@ deparse_context_for(const char *aliasname, Oid relid) rte->rtekind = RTE_RELATION; rte->relid = relid; rte->relkind = RELKIND_RELATION; /* no need for exactness here */ + rte->delaylock = false; rte->rellockmode = AccessShareLock; rte->alias = makeAlias(aliasname, NIL); rte->eref = rte->alias; diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 3f46b5dfb2..0cfa8baca5 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -1593,10 +1593,17 @@ AcquireExecutorLocks(List *stmt_list, bool acquire) continue; /* - * Acquire the appropriate type of lock on each relation OID. Note - * that we don't actually try to open the rel, and hence will not - * fail if it's been dropped entirely --- we'll just transiently - * acquire a non-conflicting lock. + * delaylock relations will be locked only when they are going + * to be accessed for the first time. + */ + if (rte->delaylock) + continue; + + /* + * Otherwise, acquire the appropriate type of lock on the + * relation's OID. Note that we don't actually try to open the + * rel, and hence will not fail if it's been dropped entirely --- + * we'll just transiently acquire a non-conflicting lock. */ if (acquire) LockRelationOid(rte->relid, rte->rellockmode); diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index addc2c2ec7..d3aebff942 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -986,6 +986,8 @@ typedef struct RangeTblEntry */ Oid relid; /* OID of the relation */ char relkind; /* relation kind (see pg_class.relkind) */ + bool delaylock; /* delay locking until executor needs to + * access this relation */ int rellockmode; /* lock level that query requires on the rel */ struct TableSampleClause *tablesample; /* sampling info, or NULL */ -- 2.16.2.windows.1