From f55a622383c90c3f300dede0d04247f7cf2d9e77 Mon Sep 17 00:00:00 2001 From: amitlan Date: Wed, 22 Dec 2021 16:55:17 +0900 Subject: [PATCH v12] Optimize AcquireExecutorLocks() to skip pruned partitions --- src/backend/commands/copyto.c | 2 +- src/backend/commands/createas.c | 2 +- src/backend/commands/explain.c | 7 +- src/backend/commands/extension.c | 2 +- src/backend/commands/matview.c | 2 +- src/backend/commands/prepare.c | 13 +- src/backend/executor/README | 28 +++ src/backend/executor/execMain.c | 46 +++++ src/backend/executor/execParallel.c | 28 ++- src/backend/executor/execPartition.c | 238 ++++++++++++++++++++---- src/backend/executor/execUtils.c | 1 + src/backend/executor/functions.c | 2 +- src/backend/executor/nodeAppend.c | 16 +- src/backend/executor/nodeMergeAppend.c | 9 +- src/backend/executor/spi.c | 10 +- src/backend/nodes/copyfuncs.c | 33 +++- src/backend/nodes/outfuncs.c | 36 +++- src/backend/nodes/readfuncs.c | 56 +++++- src/backend/optimizer/plan/createplan.c | 20 +- src/backend/optimizer/plan/planner.c | 3 + src/backend/optimizer/plan/setrefs.c | 104 ++++++++--- src/backend/partitioning/partprune.c | 41 +++- src/backend/tcop/pquery.c | 28 ++- src/backend/utils/cache/plancache.c | 236 ++++++++++++++++++++--- src/include/commands/explain.h | 3 +- src/include/executor/execPartition.h | 12 +- src/include/executor/execdesc.h | 3 + src/include/executor/executor.h | 2 + src/include/nodes/execnodes.h | 15 ++ src/include/nodes/nodes.h | 4 + src/include/nodes/pathnodes.h | 15 ++ src/include/nodes/plannodes.h | 39 +++- src/include/utils/plancache.h | 7 + 33 files changed, 919 insertions(+), 144 deletions(-) diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index 55c38b04c4..d403eb2309 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -542,7 +542,7 @@ BeginCopyTo(ParseState *pstate, ((DR_copy *) dest)->cstate = cstate; /* Create a QueryDesc requesting no output */ - cstate->queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext, + cstate->queryDesc = CreateQueryDesc(plan, NULL, pstate->p_sourcetext, GetActiveSnapshot(), InvalidSnapshot, dest, NULL, NULL, 0); diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 9abbb6b555..f6607f2454 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -325,7 +325,7 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, UpdateActiveSnapshotCommandId(); /* Create a QueryDesc, redirecting output to our tuple receiver */ - queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext, + queryDesc = CreateQueryDesc(plan, NULL, pstate->p_sourcetext, GetActiveSnapshot(), InvalidSnapshot, dest, params, queryEnv, 0); diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 1e5701b8eb..7ba9852e51 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -407,7 +407,7 @@ ExplainOneQuery(Query *query, int cursorOptions, } /* run it (if needed) and produce output */ - ExplainOnePlan(plan, into, es, queryString, params, queryEnv, + ExplainOnePlan(plan, NULL, into, es, queryString, params, queryEnv, &planduration, (es->buffers ? &bufusage : NULL)); } } @@ -515,7 +515,8 @@ ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, * to call it. */ void -ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, +ExplainOnePlan(PlannedStmt *plannedstmt, PartitionPruneResult *part_prune_result, + IntoClause *into, ExplainState *es, const char *queryString, ParamListInfo params, QueryEnvironment *queryEnv, const instr_time *planduration, const BufferUsage *bufusage) @@ -563,7 +564,7 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, dest = None_Receiver; /* Create a QueryDesc for the query */ - queryDesc = CreateQueryDesc(plannedstmt, queryString, + queryDesc = CreateQueryDesc(plannedstmt, part_prune_result, queryString, GetActiveSnapshot(), InvalidSnapshot, dest, params, queryEnv, instrument_option); diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c index 1013790dbb..54734a3a93 100644 --- a/src/backend/commands/extension.c +++ b/src/backend/commands/extension.c @@ -776,7 +776,7 @@ execute_sql_string(const char *sql) { QueryDesc *qdesc; - qdesc = CreateQueryDesc(stmt, + qdesc = CreateQueryDesc(stmt, NULL, sql, GetActiveSnapshot(), NULL, dest, NULL, NULL, 0); diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 9ab248d25e..2be1782bc4 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -416,7 +416,7 @@ refresh_matview_datafill(DestReceiver *dest, Query *query, UpdateActiveSnapshotCommandId(); /* Create a QueryDesc, redirecting output to our tuple receiver */ - queryDesc = CreateQueryDesc(plan, queryString, + queryDesc = CreateQueryDesc(plan, NULL, queryString, GetActiveSnapshot(), InvalidSnapshot, dest, NULL, NULL, 0); diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c index 80738547ed..45039e64be 100644 --- a/src/backend/commands/prepare.c +++ b/src/backend/commands/prepare.c @@ -576,7 +576,9 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, const char *query_string; CachedPlan *cplan; List *plan_list; - ListCell *p; + List *plan_part_prune_result_list; + ListCell *p, + *pp; ParamListInfo paramLI = NULL; EState *estate = NULL; instr_time planstart; @@ -632,15 +634,18 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, } plan_list = cplan->stmt_list; + plan_part_prune_result_list = cplan->part_prune_result_list; /* Explain each query */ - foreach(p, plan_list) + forboth(p, plan_list, pp, plan_part_prune_result_list) { PlannedStmt *pstmt = lfirst_node(PlannedStmt, p); + PartitionPruneResult *part_prune_result = lfirst_node(PartitionPruneResult, pp); if (pstmt->commandType != CMD_UTILITY) - ExplainOnePlan(pstmt, into, es, query_string, paramLI, queryEnv, - &planduration, (es->buffers ? &bufusage : NULL)); + ExplainOnePlan(pstmt, part_prune_result, into, es, query_string, + paramLI, queryEnv, &planduration, + (es->buffers ? &bufusage : NULL)); else ExplainOneUtility(pstmt->utilityStmt, into, es, query_string, paramLI, queryEnv); diff --git a/src/backend/executor/README b/src/backend/executor/README index 0b5183fc4a..8418e758da 100644 --- a/src/backend/executor/README +++ b/src/backend/executor/README @@ -65,6 +65,30 @@ found there. This currently only occurs for Append and MergeAppend nodes. In this case the non-required subplans are ignored and the executor state's subnode array will become out of sequence to the plan's subplan list. +Actually, the so-called execution time pruning may also occur even before the +execution has started. One case where that occurs is when a cached generic +plan is being validated for execution by plancache.c: GetCachedPlan(), which +proceeds by locking all the relations that will be scanned by that plan. If +the generic plan has nodes that contain so-called initial pruning steps (a +subset of execution pruning steps that do not depend on full-fledged execution +having started), they are performed at this point to figure out the minimal +set of child subplans that satisfy those pruning instructions and the result +of performing that pruning is saved in a data structure that gets passed to +the executor alongside the plan tree. Relations scanned by only those +surviving subplans are then locked while those scanned by the pruned subplans +are not, even though the pruned subplans themselves are not removed from the +plan tree. So, it is imperative that the executor and any third party code +invoked by it that gets passed the plan tree look at the initial pruning result +made available via the aforementioned data structure to determine whether or +not a particular subplan is valid. The data structure basically consists of +a PartitionPruneResult node passed through the QueryDesc (subsequently added +to EState) containing a list of bitmapsets with one element for every +PartitionPruneInfo found in PlannedStmt.partPruneInfos. The list is indexed +with part_prune_index of the individual PartitionPruneInfos that's stored in +the parent plan nodes to which a given PartitionPruneInfo belongs. Each +bitmapset of the indexes of the child subplans of the given parent plan +node that survive initial partiiton pruning. + Each Plan node may have expression trees associated with it, to represent its target list, qualification conditions, etc. These trees are also read-only to the executor, but the executor state for expression evaluation @@ -286,6 +310,10 @@ Query Processing Control Flow This is a sketch of control flow for full query processing: + [ ExecutorDoInitialPruning ] --- an optional step to perform initial + partition pruning on the plan tree the result of which is passed + to the executor via QueryDesc + CreateQueryDesc ExecutorStart diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index ef2fd46092..05cc99df8f 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -49,11 +49,13 @@ #include "commands/matview.h" #include "commands/trigger.h" #include "executor/execdebug.h" +#include "executor/execPartition.h" #include "executor/nodeSubplan.h" #include "foreign/fdwapi.h" #include "jit/jit.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "nodes/nodeFuncs.h" #include "parser/parsetree.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" @@ -104,6 +106,47 @@ static void EvalPlanQualStart(EPQState *epqstate, Plan *planTree); /* end of local decls */ +/* ---------------------------------------------------------------- + * ExecutorDoInitialPruning + * + * Performs initial partition pruning to figure out the minimal set of + * subplans to be executed and the set of RT indexes of the corresponding + * leaf partitions + * + * Returned PartitionPruneResult must be subsequently passed to the executor + * so that it can reuse the result of pruning. It's important that the + * has the same view of which partitions are initially pruned (by not doing + * the pruning again itself) or otherwise it risks initializing subplans whose + * partitions would not have been locked. + * + * Note: Partitioned tables mentioned in PartitionedRelPruneInfo nodes that + * drive the pruning will be locked before doing the pruning. + */ +PartitionPruneResult * +ExecutorDoInitialPruning(PlannedStmt *plannedstmt, ParamListInfo params) +{ + PartitionPruneResult *result; + ListCell *lc; + + /* Only get here if there is any pruning to do. */ + Assert(plannedstmt->containsInitialPruning); + + result = makeNode(PartitionPruneResult); + foreach(lc, plannedstmt->partPruneInfos) + { + PartitionPruneInfo *pruneinfo = lfirst(lc); + Bitmapset *valid_subplan_offs; + + valid_subplan_offs = + ExecPartitionDoInitialPruning(plannedstmt, params, pruneinfo, + &result->scan_leafpart_rtis); + result->valid_subplan_offs_list = + lappend(result->valid_subplan_offs_list, + valid_subplan_offs); + } + + return result; +} /* ---------------------------------------------------------------- * ExecutorStart @@ -806,6 +849,7 @@ InitPlan(QueryDesc *queryDesc, int eflags) { CmdType operation = queryDesc->operation; PlannedStmt *plannedstmt = queryDesc->plannedstmt; + PartitionPruneResult *part_prune_result = queryDesc->part_prune_result; Plan *plan = plannedstmt->planTree; List *rangeTable = plannedstmt->rtable; EState *estate = queryDesc->estate; @@ -825,6 +869,8 @@ InitPlan(QueryDesc *queryDesc, int eflags) ExecInitRangeTable(estate, rangeTable); estate->es_plannedstmt = plannedstmt; + estate->es_part_prune_infos = plannedstmt->partPruneInfos; + estate->es_part_prune_result = part_prune_result; /* * Next, build the ExecRowMark array from the PlanRowMark(s), if any. diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 9a0d5d59ef..805f86c503 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -66,6 +66,7 @@ #define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xE000000000000008) #define PARALLEL_KEY_JIT_INSTRUMENTATION UINT64CONST(0xE000000000000009) #define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xE00000000000000A) +#define PARALLEL_KEY_PARTITIONPRUNERESULT UINT64CONST(0xE00000000000000B) #define PARALLEL_TUPLE_QUEUE_SIZE 65536 @@ -182,7 +183,9 @@ ExecSerializePlan(Plan *plan, EState *estate) pstmt->transientPlan = false; pstmt->dependsOnRole = false; pstmt->parallelModeNeeded = false; + pstmt->containsInitialPruning = false; pstmt->planTree = plan; + pstmt->partPruneInfos = estate->es_part_prune_infos; pstmt->rtable = estate->es_range_table; pstmt->resultRelations = NIL; pstmt->appendRelations = NIL; @@ -596,12 +599,15 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, FixedParallelExecutorState *fpes; char *pstmt_data; char *pstmt_space; + char *part_prune_result_data; + char *part_prune_result_space; char *paramlistinfo_space; BufferUsage *bufusage_space; WalUsage *walusage_space; SharedExecutorInstrumentation *instrumentation = NULL; SharedJitInstrumentation *jit_instrumentation = NULL; int pstmt_len; + int part_prune_result_len; int paramlistinfo_len; int instrumentation_len = 0; int jit_instrumentation_len = 0; @@ -630,6 +636,7 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, /* Fix up and serialize plan to be sent to workers. */ pstmt_data = ExecSerializePlan(planstate->plan, estate); + part_prune_result_data = nodeToString(estate->es_part_prune_result); /* Create a parallel context. */ pcxt = CreateParallelContext("postgres", "ParallelQueryMain", nworkers); @@ -656,6 +663,11 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, shm_toc_estimate_chunk(&pcxt->estimator, pstmt_len); shm_toc_estimate_keys(&pcxt->estimator, 1); + /* Estimate space for serialized PartitionPruneResult. */ + part_prune_result_len = strlen(part_prune_result_data) + 1; + shm_toc_estimate_chunk(&pcxt->estimator, part_prune_result_len); + shm_toc_estimate_keys(&pcxt->estimator, 1); + /* Estimate space for serialized ParamListInfo. */ paramlistinfo_len = EstimateParamListSpace(estate->es_param_list_info); shm_toc_estimate_chunk(&pcxt->estimator, paramlistinfo_len); @@ -750,6 +762,12 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, memcpy(pstmt_space, pstmt_data, pstmt_len); shm_toc_insert(pcxt->toc, PARALLEL_KEY_PLANNEDSTMT, pstmt_space); + /* Store serialized PartitionPruneResult */ + part_prune_result_space = shm_toc_allocate(pcxt->toc, part_prune_result_len); + memcpy(part_prune_result_space, part_prune_result_data, part_prune_result_len); + shm_toc_insert(pcxt->toc, PARALLEL_KEY_PARTITIONPRUNERESULT, + part_prune_result_space); + /* Store serialized ParamListInfo. */ paramlistinfo_space = shm_toc_allocate(pcxt->toc, paramlistinfo_len); shm_toc_insert(pcxt->toc, PARALLEL_KEY_PARAMLISTINFO, paramlistinfo_space); @@ -1231,8 +1249,10 @@ ExecParallelGetQueryDesc(shm_toc *toc, DestReceiver *receiver, int instrument_options) { char *pstmtspace; + char *part_prune_result_space; char *paramspace; PlannedStmt *pstmt; + PartitionPruneResult *part_prune_result; ParamListInfo paramLI; char *queryString; @@ -1243,12 +1263,18 @@ ExecParallelGetQueryDesc(shm_toc *toc, DestReceiver *receiver, pstmtspace = shm_toc_lookup(toc, PARALLEL_KEY_PLANNEDSTMT, false); pstmt = (PlannedStmt *) stringToNode(pstmtspace); + /* Reconstruct leader-supplied PartitionPruneResult. */ + part_prune_result_space = + shm_toc_lookup(toc, PARALLEL_KEY_PARTITIONPRUNERESULT, false); + part_prune_result = (PartitionPruneResult *) + stringToNode(part_prune_result_space); + /* Reconstruct ParamListInfo. */ paramspace = shm_toc_lookup(toc, PARALLEL_KEY_PARAMLISTINFO, false); paramLI = RestoreParamList(¶mspace); /* Create a QueryDesc for the query. */ - return CreateQueryDesc(pstmt, + return CreateQueryDesc(pstmt, part_prune_result, queryString, GetActiveSnapshot(), InvalidSnapshot, receiver, paramLI, NULL, instrument_options); diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 615bd80973..3037742b8d 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -25,6 +25,7 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#include "parser/parsetree.h" #include "partitioning/partbounds.h" #include "partitioning/partdesc.h" #include "partitioning/partprune.h" @@ -185,7 +186,11 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation rel, static List *adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri); static List *adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap); static PartitionPruneState *CreatePartitionPruneState(PlanState *planstate, - PartitionPruneInfo *pruneinfo); + PartitionPruneInfo *pruneinfo, + bool consider_initial_steps, + bool consider_exec_steps, + List *rtable, ExprContext *econtext, + PartitionDirectory partdir); static void InitPartitionPruneContext(PartitionPruneContext *context, List *pruning_steps, PartitionDesc partdesc, @@ -198,7 +203,8 @@ static void PartitionPruneFixSubPlanMap(PartitionPruneState *prunestate, static void find_matching_subplans_recurse(PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, - Bitmapset **validsubplans); + Bitmapset **validsubplans, + Bitmapset **scan_leafpart_rtis); /* @@ -1587,8 +1593,10 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap) * considered to be a stable expression, it can change value from one plan * node scan to the next during query execution. Stable comparison * expressions that don't involve such Params allow partition pruning to be - * done once during executor startup. Expressions that do involve such Params - * require us to prune separately for each scan of the parent plan node. + * done once during executor startup or during ExecutorDoInitialPruning() that + * runs as part of performing AcquireExecutorLocks() on a given plan tree. + * Expressions that do involve such Params require us to prune separately for + * each scan of the parent plan node. * * Note that pruning away unneeded subplans during executor startup has the * added benefit of not having to initialize the unneeded subplans at all. @@ -1605,6 +1613,13 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap) * account for initial pruning possibly having eliminated some of the * subplans. * + * ExecPartitionDoInitialPruning: + * Do initial pruning with the information contained in a given + * PartitionPruneInfo to determine the minimal set of child subplans + * to be executed of the parent plan node to which the PartitionPruneInfo + * belongs and also the set of the RT indexes of leaf partitions that will + * be scanned with those subplans. + * * ExecFindMatchingSubPlans: * Returns indexes of matching subplans after evaluating the expressions * that are safe to evaluate at a given point. This function is first @@ -1622,8 +1637,9 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap) * * On return, *initially_valid_subplans is assigned the set of indexes of * child subplans that must be initialized along with the parent plan node. - * Initial pruning is performed here if needed and in that case only the - * surviving subplans' indexes are added. + * Initial pruning is performed here if needed (unless it has already been done + * by ExecDoInitialPruning()), and in that case only the surviving subplans' + * indexes are added. * * If subplans are indeed pruned, subplan_map arrays contained in the returned * PartitionPruneState are re-sequenced to not count those, though only if the @@ -1632,23 +1648,59 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap) PartitionPruneState * ExecInitPartitionPruning(PlanState *planstate, int n_total_subplans, - PartitionPruneInfo *pruneinfo, + int part_prune_index, Bitmapset **initially_valid_subplans) { - PartitionPruneState *prunestate; EState *estate = planstate->state; + PartitionPruneInfo *pruneinfo = list_nth(estate->es_part_prune_infos, + part_prune_index); + PartitionPruneResult *pruneresult = estate->es_part_prune_result; + PartitionPruneState *prunestate; + bool do_pruning = (pruneinfo->needs_init_pruning || + pruneinfo->needs_exec_pruning); - /* We may need an expression context to evaluate partition exprs */ - ExecAssignExprContext(estate, planstate); + /* + * No need to do initial pruning if it was done already by + * ExecutorDoInitialPruning(), which it would be if es_part_prune_result + * has been set. + */ + if (pruneresult) + do_pruning = pruneinfo->needs_exec_pruning; + + prunestate = NULL; + if (do_pruning) + { + /* We may need an expression context to evaluate partition exprs */ + ExecAssignExprContext(estate, planstate); - /* Create the working data structure for pruning */ - prunestate = CreatePartitionPruneState(planstate, pruneinfo); + /* For data reading, executor always omits detached partitions */ + if (estate->es_partition_directory == NULL) + estate->es_partition_directory = + CreatePartitionDirectory(estate->es_query_cxt, false); + + /* + * Create the working data structure for pruning. No need to consider + * initial pruning steps if we have a PartitionPruneResult. + */ + prunestate = CreatePartitionPruneState(planstate, pruneinfo, + pruneresult == NULL, true, + NIL, planstate->ps_ExprContext, + estate->es_partition_directory); + } /* * Perform an initial partition prune pass, if required. */ - if (prunestate->do_initial_prune) - *initially_valid_subplans = ExecFindMatchingSubPlans(prunestate, true); + if (pruneresult) + { + *initially_valid_subplans = + list_nth(pruneresult->valid_subplan_offs_list, part_prune_index); + } + else if (prunestate && prunestate->do_initial_prune) + { + *initially_valid_subplans = ExecFindMatchingSubPlans(prunestate, true, + NULL); + } else { /* No pruning, so we'll need to initialize all subplans */ @@ -1669,7 +1721,7 @@ ExecInitPartitionPruning(PlanState *planstate, * leaves invalid data in prunestate, because that data won't be * consulted again (cf initial Assert in ExecFindMatchingSubPlans). */ - if (prunestate->do_exec_prune) + if (prunestate && prunestate->do_exec_prune) PartitionPruneFixSubPlanMap(prunestate, *initially_valid_subplans, n_total_subplans); @@ -1678,11 +1730,72 @@ ExecInitPartitionPruning(PlanState *planstate, return prunestate; } +/* + * ExecPartitionDoInitialPruning + * Perform initial pruning using given PartitionPruneInfo to determine + * the minimal set of child subplans to be executed of the parent plan + * node to which the PartitionPruneInfo belongs and also the set of RT + * indexes of leaf partitions that will scanned with those subplans. + */ +Bitmapset * +ExecPartitionDoInitialPruning(PlannedStmt *plannedstmt, ParamListInfo params, + PartitionPruneInfo *pruneinfo, + Bitmapset **scan_leafpart_rtis) +{ + List *rtable = plannedstmt->rtable; + ExprContext *econtext; + PartitionDirectory pdir; + MemoryContext oldcontext, + tmpcontext; + PartitionPruneState *prunestate; + Bitmapset *valid_subplan_offs; + + /* + * A temporary context to allocate stuff needded to run the pruning steps. + */ + tmpcontext = AllocSetContextCreate(CurrentMemoryContext, + "initial pruning working data", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(tmpcontext); + + /* + * PartitionDirectory to look up partition descriptors, which omits + * detached partitions, just like in the executor proper. + */ + pdir = CreatePartitionDirectory(CurrentMemoryContext, false); + + /* + * We don't yet have a PlanState for the parent plan node, so must create + * a standalone ExprContext to evaluate pruning expressions, equipped with + * the information about the EXTERN parameters that the caller passed us. + * Note that that's okay because the initial pruning steps do not contain + * anything that requires the execution to have started. + */ + econtext = CreateStandaloneExprContext(); + econtext->ecxt_param_list_info = params; + prunestate = CreatePartitionPruneState(NULL, pruneinfo, true, false, + rtable, econtext, pdir); + MemoryContextSwitchTo(oldcontext); + + /* Do the initial pruning. */ + valid_subplan_offs = ExecFindMatchingSubPlans(prunestate, true, + scan_leafpart_rtis); + + FreeExprContext(econtext, true); + DestroyPartitionDirectory(pdir); + MemoryContextDelete(tmpcontext); + + return valid_subplan_offs; +} + /* * CreatePartitionPruneState * Build the data structure required for calling ExecFindMatchingSubPlans * - * 'planstate' is the parent plan node's execution state. + * 'planstate', if not NULL, is the parent plan node's execution state. It + * can be NULL if being called before ExecutorStart(), in which case, + * 'rtable' (range table), 'econtext', and 'partdir' must be explicitly + * provided. * * 'pruneinfo' is a PartitionPruneInfo as generated by * make_partition_pruneinfo. Here we build a PartitionPruneState containing a @@ -1696,19 +1809,21 @@ ExecInitPartitionPruning(PlanState *planstate, * PartitionedRelPruneInfo. */ static PartitionPruneState * -CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) +CreatePartitionPruneState(PlanState *planstate, + PartitionPruneInfo *pruneinfo, + bool consider_initial_steps, + bool consider_exec_steps, + List *rtable, ExprContext *econtext, + PartitionDirectory partdir) { - EState *estate = planstate->state; + EState *estate = planstate ? planstate->state : NULL; PartitionPruneState *prunestate; int n_part_hierarchies; ListCell *lc; int i; - ExprContext *econtext = planstate->ps_ExprContext; - /* For data reading, executor always omits detached partitions */ - if (estate->es_partition_directory == NULL) - estate->es_partition_directory = - CreatePartitionDirectory(estate->es_query_cxt, false); + Assert((estate != NULL) || + (partdir != NULL && econtext != NULL && rtable != NIL)); n_part_hierarchies = list_length(pruneinfo->prune_infos); Assert(n_part_hierarchies > 0); @@ -1759,19 +1874,48 @@ CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) PartitionedRelPruneInfo *pinfo = lfirst_node(PartitionedRelPruneInfo, lc2); PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j]; Relation partrel; + bool close_partrel = false; PartitionDesc partdesc; PartitionKey partkey; /* - * We can rely on the copies of the partitioned table's partition - * key and partition descriptor appearing in its relcache entry, - * because that entry will be held open and locked for the - * duration of this executor run. + * Must open the relation by ourselves when called before the + * execution has started, such as, when called during + * ExecutorDoInitialPruning() on a cached plan. In that case, + * sub-partitions must be locked, because AcquirePlannerLocks() + * would not have seen them. (1st relation in a partrelpruneinfos + * list is always the root partitioned table appearing in the + * query, which AcquirePlannerLocks() would have locked; the + * Assert in relation_open() guards that assumption.) + */ + if (estate == NULL) + { + RangeTblEntry *rte = rt_fetch(pinfo->rtindex, rtable); + int lockmode = (j == 0) ? NoLock : rte->rellockmode; + + partrel = table_open(rte->relid, lockmode); + close_partrel = true; + } + else + partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex); + + /* + * We can rely on the copy of the partitioned table's partition + * key from in its relcache entry, because it can't change (or + * get destroyed) as long as the relation is locked. Partition + * descriptor is taken from the PartitionDirectory associated with + * the table that is held open long enough for the descriptor to + * remain valid while it's used to perform the pruning steps. */ - partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex); partkey = RelationGetPartitionKey(partrel); - partdesc = PartitionDirectoryLookup(estate->es_partition_directory, - partrel); + partdesc = PartitionDirectoryLookup(partdir, partrel); + + /* + * Must close partrel, keeping the lock taken, if we're not using + * EState's entry. + */ + if (close_partrel) + table_close(partrel, NoLock); /* * Initialize the subplan_map and subpart_map. @@ -1785,6 +1929,7 @@ CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) Assert(partdesc->nparts >= pinfo->nparts); pprune->nparts = partdesc->nparts; pprune->subplan_map = palloc(sizeof(int) * partdesc->nparts); + pprune->rti_map = palloc(sizeof(Index) * partdesc->nparts); if (partdesc->nparts == pinfo->nparts) { /* @@ -1795,6 +1940,8 @@ CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) pprune->subpart_map = pinfo->subpart_map; memcpy(pprune->subplan_map, pinfo->subplan_map, sizeof(int) * pinfo->nparts); + memcpy(pprune->rti_map, pinfo->rti_map, + sizeof(int) * pinfo->nparts); /* * Double-check that the list of unpruned relations has not @@ -1845,6 +1992,8 @@ CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) pinfo->subplan_map[pd_idx]; pprune->subpart_map[pp_idx] = pinfo->subpart_map[pd_idx]; + pprune->rti_map[pp_idx] = + pinfo->rti_map[pd_idx]; pd_idx++; } else @@ -1852,6 +2001,7 @@ CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) /* this partdesc entry is not in the plan */ pprune->subplan_map[pp_idx] = -1; pprune->subpart_map[pp_idx] = -1; + pprune->rti_map[pp_idx] = 0; } } @@ -1873,7 +2023,7 @@ CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) * Initialize pruning contexts as needed. */ pprune->initial_pruning_steps = pinfo->initial_pruning_steps; - if (pinfo->initial_pruning_steps) + if (consider_initial_steps && pinfo->initial_pruning_steps) { InitPartitionPruneContext(&pprune->initial_context, pinfo->initial_pruning_steps, @@ -1883,7 +2033,7 @@ CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) prunestate->do_initial_prune = true; } pprune->exec_pruning_steps = pinfo->exec_pruning_steps; - if (pinfo->exec_pruning_steps) + if (consider_exec_steps && pinfo->exec_pruning_steps) { InitPartitionPruneContext(&pprune->exec_context, pinfo->exec_pruning_steps, @@ -2111,10 +2261,14 @@ PartitionPruneFixSubPlanMap(PartitionPruneState *prunestate, * Pass initial_prune if PARAM_EXEC Params cannot yet be evaluated. This * differentiates the initial executor-time pruning step from later * runtime pruning. + * + * RT indexes of leaf partitions scanned by the chosen subplans are added to + * *scan_leafpart_rtis if the pointer is non-NULL. */ Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate, - bool initial_prune) + bool initial_prune, + Bitmapset **scan_leafpart_rtis) { Bitmapset *result = NULL; MemoryContext oldcontext; @@ -2149,7 +2303,7 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, */ pprune = &prunedata->partrelprunedata[0]; find_matching_subplans_recurse(prunedata, pprune, initial_prune, - &result); + &result, scan_leafpart_rtis); /* Expression eval may have used space in ExprContext too */ if (pprune->exec_pruning_steps) @@ -2163,6 +2317,8 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, /* Copy result out of the temp context before we reset it */ result = bms_copy(result); + if (scan_leafpart_rtis) + *scan_leafpart_rtis = bms_copy(*scan_leafpart_rtis); MemoryContextReset(prunestate->prune_context); @@ -2173,13 +2329,15 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, * find_matching_subplans_recurse * Recursive worker function for ExecFindMatchingSubPlans * - * Adds valid (non-prunable) subplan IDs to *validsubplans + * Adds valid (non-prunable) subplan IDs to *validsubplans and RT indexes of + * of the corresponding leaf partitions to *scan_leafpart_rtis (if asked for). */ static void find_matching_subplans_recurse(PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, - Bitmapset **validsubplans) + Bitmapset **validsubplans, + Bitmapset **scan_leafpart_rtis) { Bitmapset *partset; int i; @@ -2206,8 +2364,13 @@ find_matching_subplans_recurse(PartitionPruningData *prunedata, while ((i = bms_next_member(partset, i)) >= 0) { if (pprune->subplan_map[i] >= 0) + { *validsubplans = bms_add_member(*validsubplans, pprune->subplan_map[i]); + if (scan_leafpart_rtis && pprune->rti_map[i] > 0) + *scan_leafpart_rtis = bms_add_member(*scan_leafpart_rtis, + pprune->rti_map[i]); + } else { int partidx = pprune->subpart_map[i]; @@ -2215,7 +2378,8 @@ find_matching_subplans_recurse(PartitionPruningData *prunedata, if (partidx >= 0) find_matching_subplans_recurse(prunedata, &prunedata->partrelprunedata[partidx], - initial_prune, validsubplans); + initial_prune, validsubplans, + scan_leafpart_rtis); else { /* diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 9df1f81ea8..639145abe9 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -119,6 +119,7 @@ CreateExecutorState(void) estate->es_relations = NULL; estate->es_rowmarks = NULL; estate->es_plannedstmt = NULL; + estate->es_part_prune_result = NULL; estate->es_junkFilter = NULL; diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c index f9460ae506..a2182a6b1f 100644 --- a/src/backend/executor/functions.c +++ b/src/backend/executor/functions.c @@ -844,7 +844,7 @@ postquel_start(execution_state *es, SQLFunctionCachePtr fcache) else dest = None_Receiver; - es->qd = CreateQueryDesc(es->stmt, + es->qd = CreateQueryDesc(es->stmt, NULL, fcache->src, GetActiveSnapshot(), InvalidSnapshot, diff --git a/src/backend/executor/nodeAppend.c b/src/backend/executor/nodeAppend.c index 357e10a1d7..09f26658e2 100644 --- a/src/backend/executor/nodeAppend.c +++ b/src/backend/executor/nodeAppend.c @@ -94,6 +94,7 @@ static bool ExecAppendAsyncRequest(AppendState *node, TupleTableSlot **result); static void ExecAppendAsyncEventWait(AppendState *node); static void classify_matching_subplans(AppendState *node); + /* ---------------------------------------------------------------- * ExecInitAppend * @@ -134,7 +135,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags) appendstate->as_begun = false; /* If run-time partition pruning is enabled, then set that up now */ - if (node->part_prune_info != NULL) + if (node->part_prune_index >= 0) { PartitionPruneState *prunestate; @@ -145,7 +146,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags) */ prunestate = ExecInitPartitionPruning(&appendstate->ps, list_length(node->appendplans), - node->part_prune_info, + node->part_prune_index, &validsubplans); appendstate->as_prune_state = prunestate; nplans = bms_num_members(validsubplans); @@ -155,7 +156,8 @@ ExecInitAppend(Append *node, EState *estate, int eflags) * subplan, we can fill as_valid_subplans immediately, preventing * later calls to ExecFindMatchingSubPlans. */ - if (!prunestate->do_exec_prune && nplans > 0) + if (appendstate->as_prune_state == NULL || + (!appendstate->as_prune_state->do_exec_prune && nplans > 0)) appendstate->as_valid_subplans = bms_add_range(NULL, 0, nplans - 1); } else @@ -577,7 +579,7 @@ choose_next_subplan_locally(AppendState *node) } else if (node->as_valid_subplans == NULL) node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); whichplan = -1; } @@ -642,7 +644,7 @@ choose_next_subplan_for_leader(AppendState *node) if (node->as_valid_subplans == NULL) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); /* * Mark each invalid plan as finished to allow the loop below to @@ -717,7 +719,7 @@ choose_next_subplan_for_worker(AppendState *node) else if (node->as_valid_subplans == NULL) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); mark_invalid_subplans_as_finished(node); } @@ -868,7 +870,7 @@ ExecAppendAsyncBegin(AppendState *node) if (node->as_valid_subplans == NULL) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); classify_matching_subplans(node); } diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c index ecf9052e03..7708cfffda 100644 --- a/src/backend/executor/nodeMergeAppend.c +++ b/src/backend/executor/nodeMergeAppend.c @@ -82,7 +82,7 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) mergestate->ps.ExecProcNode = ExecMergeAppend; /* If run-time partition pruning is enabled, then set that up now */ - if (node->part_prune_info != NULL) + if (node->part_prune_index >= 0) { PartitionPruneState *prunestate; @@ -93,7 +93,7 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) */ prunestate = ExecInitPartitionPruning(&mergestate->ps, list_length(node->mergeplans), - node->part_prune_info, + node->part_prune_index, &validsubplans); mergestate->ms_prune_state = prunestate; nplans = bms_num_members(validsubplans); @@ -103,7 +103,8 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) * subplan, we can fill as_valid_subplans immediately, preventing * later calls to ExecFindMatchingSubPlans. */ - if (!prunestate->do_exec_prune && nplans > 0) + if (mergestate->ms_prune_state == NULL || + (!mergestate->ms_prune_state->do_exec_prune && nplans > 0)) mergestate->ms_valid_subplans = bms_add_range(NULL, 0, nplans - 1); } else @@ -218,7 +219,7 @@ ExecMergeAppend(PlanState *pstate) */ if (node->ms_valid_subplans == NULL) node->ms_valid_subplans = - ExecFindMatchingSubPlans(node->ms_prune_state, false); + ExecFindMatchingSubPlans(node->ms_prune_state, false, NULL); /* * First time through: pull the first tuple from each valid subplan, diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 042a5f8b0a..05db2e9de1 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -2473,7 +2473,9 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, { CachedPlanSource *plansource = (CachedPlanSource *) lfirst(lc1); List *stmt_list; - ListCell *lc2; + List *part_prune_result_list; + ListCell *lc2, + *lc3; spicallbackarg.query = plansource->query_string; @@ -2552,6 +2554,7 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, plan_owner, _SPI_current->queryEnv); stmt_list = cplan->stmt_list; + part_prune_result_list = cplan->part_prune_result_list; /* * If we weren't given a specific snapshot to use, and the statement @@ -2589,9 +2592,10 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, } } - foreach(lc2, stmt_list) + forboth(lc2, stmt_list, lc3, part_prune_result_list) { PlannedStmt *stmt = lfirst_node(PlannedStmt, lc2); + PartitionPruneResult *part_prune_result = lfirst_node(PartitionPruneResult, lc3); bool canSetTag = stmt->canSetTag; DestReceiver *dest; @@ -2663,7 +2667,7 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, else snap = InvalidSnapshot; - qdesc = CreateQueryDesc(stmt, + qdesc = CreateQueryDesc(stmt, part_prune_result, plansource->query_string, snap, crosscheck_snapshot, dest, diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 46a1943d97..c5c70593de 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -96,7 +96,10 @@ _copyPlannedStmt(const PlannedStmt *from) COPY_SCALAR_FIELD(parallelModeNeeded); COPY_SCALAR_FIELD(jitFlags); COPY_NODE_FIELD(planTree); + COPY_NODE_FIELD(partPruneInfos); + COPY_SCALAR_FIELD(containsInitialPruning); COPY_NODE_FIELD(rtable); + COPY_BITMAPSET_FIELD(minLockRelids); COPY_NODE_FIELD(resultRelations); COPY_NODE_FIELD(appendRelations); COPY_NODE_FIELD(subplans); @@ -253,7 +256,7 @@ _copyAppend(const Append *from) COPY_NODE_FIELD(appendplans); COPY_SCALAR_FIELD(nasyncplans); COPY_SCALAR_FIELD(first_partial_plan); - COPY_NODE_FIELD(part_prune_info); + COPY_SCALAR_FIELD(part_prune_index); return newnode; } @@ -281,7 +284,7 @@ _copyMergeAppend(const MergeAppend *from) COPY_POINTER_FIELD(sortOperators, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(collations, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(nullsFirst, from->numCols * sizeof(bool)); - COPY_NODE_FIELD(part_prune_info); + COPY_SCALAR_FIELD(part_prune_index); return newnode; } @@ -1280,6 +1283,8 @@ _copyPartitionPruneInfo(const PartitionPruneInfo *from) PartitionPruneInfo *newnode = makeNode(PartitionPruneInfo); COPY_NODE_FIELD(prune_infos); + COPY_SCALAR_FIELD(needs_init_pruning); + COPY_SCALAR_FIELD(needs_exec_pruning); COPY_BITMAPSET_FIELD(other_subplans); return newnode; @@ -1296,6 +1301,7 @@ _copyPartitionedRelPruneInfo(const PartitionedRelPruneInfo *from) COPY_POINTER_FIELD(subplan_map, from->nparts * sizeof(int)); COPY_POINTER_FIELD(subpart_map, from->nparts * sizeof(int)); COPY_POINTER_FIELD(relid_map, from->nparts * sizeof(Oid)); + COPY_POINTER_FIELD(rti_map, from->nparts * sizeof(Index)); COPY_NODE_FIELD(initial_pruning_steps); COPY_NODE_FIELD(exec_pruning_steps); COPY_BITMAPSET_FIELD(execparamids); @@ -5469,6 +5475,21 @@ _copyExtensibleNode(const ExtensibleNode *from) return newnode; } +/* **************************************************************** + * execnodes.h copy functions + * **************************************************************** + */ +static PartitionPruneResult * +_copyPartitionPruneResult(const PartitionPruneResult *from) +{ + PartitionPruneResult *newnode = makeNode(PartitionPruneResult); + + COPY_BITMAPSET_FIELD(scan_leafpart_rtis); + COPY_NODE_FIELD(valid_subplan_offs_list); + + return newnode; +} + /* **************************************************************** * value.h copy functions * **************************************************************** @@ -5523,7 +5544,6 @@ _copyBitString(const BitString *from) return newnode; } - static ForeignKeyCacheInfo * _copyForeignKeyCacheInfo(const ForeignKeyCacheInfo *from) { @@ -6565,6 +6585,13 @@ copyObjectImpl(const void *from) retval = _copyPublicationTable(from); break; + /* + * EXECUTION NODES + */ + case T_PartitionPruneResult: + retval = _copyPartitionPruneResult(from); + break; + /* * MISCELLANEOUS NODES */ diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 13e1643530..ca54022fee 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -314,7 +314,10 @@ _outPlannedStmt(StringInfo str, const PlannedStmt *node) WRITE_BOOL_FIELD(parallelModeNeeded); WRITE_INT_FIELD(jitFlags); WRITE_NODE_FIELD(planTree); + WRITE_NODE_FIELD(partPruneInfos); + WRITE_BOOL_FIELD(containsInitialPruning); WRITE_NODE_FIELD(rtable); + WRITE_BITMAPSET_FIELD(minLockRelids); WRITE_NODE_FIELD(resultRelations); WRITE_NODE_FIELD(appendRelations); WRITE_NODE_FIELD(subplans); @@ -443,7 +446,7 @@ _outAppend(StringInfo str, const Append *node) WRITE_NODE_FIELD(appendplans); WRITE_INT_FIELD(nasyncplans); WRITE_INT_FIELD(first_partial_plan); - WRITE_NODE_FIELD(part_prune_info); + WRITE_INT_FIELD(part_prune_index); } static void @@ -460,7 +463,7 @@ _outMergeAppend(StringInfo str, const MergeAppend *node) WRITE_OID_ARRAY(sortOperators, node->numCols); WRITE_OID_ARRAY(collations, node->numCols); WRITE_BOOL_ARRAY(nullsFirst, node->numCols); - WRITE_NODE_FIELD(part_prune_info); + WRITE_INT_FIELD(part_prune_index); } static void @@ -1006,6 +1009,8 @@ _outPartitionPruneInfo(StringInfo str, const PartitionPruneInfo *node) WRITE_NODE_TYPE("PARTITIONPRUNEINFO"); WRITE_NODE_FIELD(prune_infos); + WRITE_BOOL_FIELD(needs_init_pruning); + WRITE_BOOL_FIELD(needs_exec_pruning); WRITE_BITMAPSET_FIELD(other_subplans); } @@ -1020,6 +1025,7 @@ _outPartitionedRelPruneInfo(StringInfo str, const PartitionedRelPruneInfo *node) WRITE_INT_ARRAY(subplan_map, node->nparts); WRITE_INT_ARRAY(subpart_map, node->nparts); WRITE_OID_ARRAY(relid_map, node->nparts); + WRITE_INDEX_ARRAY(rti_map, node->nparts); WRITE_NODE_FIELD(initial_pruning_steps); WRITE_NODE_FIELD(exec_pruning_steps); WRITE_BITMAPSET_FIELD(execparamids); @@ -2420,6 +2426,9 @@ _outPlannerGlobal(StringInfo str, const PlannerGlobal *node) WRITE_NODE_FIELD(finalrowmarks); WRITE_NODE_FIELD(resultRelations); WRITE_NODE_FIELD(appendRelations); + WRITE_NODE_FIELD(partPruneInfos); + WRITE_BOOL_FIELD(containsInitialPruning); + WRITE_BITMAPSET_FIELD(minLockRelids); WRITE_NODE_FIELD(relationOids); WRITE_NODE_FIELD(invalItems); WRITE_NODE_FIELD(paramExecTypes); @@ -2487,6 +2496,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node) WRITE_BITMAPSET_FIELD(curOuterRels); WRITE_NODE_FIELD(curOuterParams); WRITE_BOOL_FIELD(partColsUpdated); + WRITE_NODE_FIELD(partPruneInfos); } static void @@ -2840,6 +2850,21 @@ _outExtensibleNode(StringInfo str, const ExtensibleNode *node) methods->nodeOut(str, node); } +/***************************************************************************** + * + * Stuff from execnodes.h + * + *****************************************************************************/ + +static void +_outPartitionPruneResult(StringInfo str, const PartitionPruneResult *node) +{ + WRITE_NODE_TYPE("PARTITIONPRUNERESULT"); + + WRITE_BITMAPSET_FIELD(scan_leafpart_rtis); + WRITE_NODE_FIELD(valid_subplan_offs_list); +} + /***************************************************************************** * * Stuff from parsenodes.h. @@ -4748,6 +4773,13 @@ outNode(StringInfo str, const void *obj) _outJsonTableSibling(str, obj); break; + /* + * EXECUTION NODES + */ + case T_PartitionPruneResult: + _outPartitionPruneResult(str, obj); + break; + default: /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 48f7216c9e..acce5e29cc 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -164,6 +164,11 @@ token = pg_strtok(&length); /* skip :fldname */ \ local_node->fldname = readIntCols(len) +/* Read an Index array */ +#define READ_INDEX_ARRAY(fldname, len) \ + token = pg_strtok(&length); /* skip :fldname */ \ + local_node->fldname = readIndexCols(len) + /* Read a bool array */ #define READ_BOOL_ARRAY(fldname, len) \ token = pg_strtok(&length); /* skip :fldname */ \ @@ -1814,7 +1819,10 @@ _readPlannedStmt(void) READ_BOOL_FIELD(parallelModeNeeded); READ_INT_FIELD(jitFlags); READ_NODE_FIELD(planTree); + READ_NODE_FIELD(partPruneInfos); + READ_BOOL_FIELD(containsInitialPruning); READ_NODE_FIELD(rtable); + READ_BITMAPSET_FIELD(minLockRelids); READ_NODE_FIELD(resultRelations); READ_NODE_FIELD(appendRelations); READ_NODE_FIELD(subplans); @@ -1946,7 +1954,7 @@ _readAppend(void) READ_NODE_FIELD(appendplans); READ_INT_FIELD(nasyncplans); READ_INT_FIELD(first_partial_plan); - READ_NODE_FIELD(part_prune_info); + READ_INT_FIELD(part_prune_index); READ_DONE(); } @@ -1968,7 +1976,7 @@ _readMergeAppend(void) READ_OID_ARRAY(sortOperators, local_node->numCols); READ_OID_ARRAY(collations, local_node->numCols); READ_BOOL_ARRAY(nullsFirst, local_node->numCols); - READ_NODE_FIELD(part_prune_info); + READ_INT_FIELD(part_prune_index); READ_DONE(); } @@ -2763,6 +2771,8 @@ _readPartitionPruneInfo(void) READ_LOCALS(PartitionPruneInfo); READ_NODE_FIELD(prune_infos); + READ_BOOL_FIELD(needs_init_pruning); + READ_BOOL_FIELD(needs_exec_pruning); READ_BITMAPSET_FIELD(other_subplans); READ_DONE(); @@ -2779,6 +2789,7 @@ _readPartitionedRelPruneInfo(void) READ_INT_ARRAY(subplan_map, local_node->nparts); READ_INT_ARRAY(subpart_map, local_node->nparts); READ_OID_ARRAY(relid_map, local_node->nparts); + READ_INDEX_ARRAY(rti_map, local_node->nparts); READ_NODE_FIELD(initial_pruning_steps); READ_NODE_FIELD(exec_pruning_steps); READ_BITMAPSET_FIELD(execparamids); @@ -2932,6 +2943,21 @@ _readPartitionRangeDatum(void) READ_DONE(); } + +/* + * _readPartitionPruneResult + */ +static PartitionPruneResult * +_readPartitionPruneResult(void) +{ + READ_LOCALS(PartitionPruneResult); + + READ_BITMAPSET_FIELD(scan_leafpart_rtis); + READ_NODE_FIELD(valid_subplan_offs_list); + + READ_DONE(); +} + /* * parseNodeString * @@ -3229,6 +3255,8 @@ parseNodeString(void) return_value = _readJsonTableParent(); else if (MATCH("JSONTABSNODE", 12)) return_value = _readJsonTableSibling(); + else if (MATCH("PARTITIONPRUNERESULT", 20)) + return_value = _readPartitionPruneResult(); else { elog(ERROR, "badly formatted node string \"%.32s\"...", token); @@ -3372,6 +3400,30 @@ readIntCols(int numCols) return int_vals; } +/* + * readIndexCols + */ +Index * +readIndexCols(int numCols) +{ + int tokenLength, + i; + const char *token; + Index *index_vals; + + if (numCols <= 0) + return NULL; + + index_vals = (Index *) palloc(numCols * sizeof(Index)); + for (i = 0; i < numCols; i++) + { + token = pg_strtok(&tokenLength); + index_vals[i] = atoui(token); + } + + return index_vals; +} + /* * readBoolCols */ diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 51591bb812..453f720759 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1366,7 +1366,15 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) plan->appendplans = subplans; plan->nasyncplans = nasyncplans; plan->first_partial_plan = best_path->first_partial_path; - plan->part_prune_info = partpruneinfo; + + if (partpruneinfo) + { + root->partPruneInfos = lappend(root->partPruneInfos, partpruneinfo); + /* Will be updated later in set_plan_references(). */ + plan->part_prune_index = list_length(root->partPruneInfos) - 1; + } + else + plan->part_prune_index = -1; copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -1528,7 +1536,15 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, } node->mergeplans = subplans; - node->part_prune_info = partpruneinfo; + + if (partpruneinfo) + { + root->partPruneInfos = lappend(root->partPruneInfos, partpruneinfo); + /* Will be updated later in set_plan_references(). */ + node->part_prune_index = list_length(root->partPruneInfos) - 1; + } + else + node->part_prune_index = -1; /* * If prepare_sort_from_pathkeys added sort columns, but we were told to diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index b2569c5d0c..2aa051d862 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -518,7 +518,10 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->dependsOnRole = glob->dependsOnRole; result->parallelModeNeeded = glob->parallelModeNeeded; result->planTree = top_plan; + result->partPruneInfos = glob->partPruneInfos; + result->containsInitialPruning = glob->containsInitialPruning; result->rtable = glob->finalrtable; + result->minLockRelids = glob->minLockRelids; result->resultRelations = glob->resultRelations; result->appendRelations = glob->appendRelations; result->subplans = glob->subplans; diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 7519723081..fc66986e1c 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -251,7 +251,7 @@ set_plan_references(PlannerInfo *root, Plan *plan) Plan *result; PlannerGlobal *glob = root->glob; int rtoffset = list_length(glob->finalrtable); - ListCell *lc; + ListCell *lc; /* * Add all the query's RTEs to the flattened rangetable. The live ones @@ -260,6 +260,16 @@ set_plan_references(PlannerInfo *root, Plan *plan) */ add_rtes_to_flat_rtable(root, false); + /* + * Add the query's adjusted range of RT indexes to glob->minLockRelids. + * The adjusted RT indexes of prunable relations will be deleted from the + * set below where PartitionPruneInfos are processed. + */ + glob->minLockRelids = + bms_add_range(glob->minLockRelids, + rtoffset + 1, + rtoffset + list_length(root->parse->rtable)); + /* * Adjust RT indexes of PlanRowMarks and add to final rowmarks list */ @@ -338,6 +348,56 @@ set_plan_references(PlannerInfo *root, Plan *plan) } } + /* Also fix up the information in PartitionPruneInfos. */ + foreach (lc, root->partPruneInfos) + { + PartitionPruneInfo *pruneinfo = lfirst(lc); + Bitmapset *leafpart_rtis = NULL; + ListCell *l; + + foreach(l, pruneinfo->prune_infos) + { + List *prune_infos = lfirst(l); + ListCell *l2; + + foreach(l2, prune_infos) + { + PartitionedRelPruneInfo *pinfo = lfirst(l2); + int i; + + /* RT index of the partitione table. */ + pinfo->rtindex += rtoffset; + + /* And also those of the leaf partitions. */ + for (i = 0; i < pinfo->nparts; i++) + { + if (pinfo->rti_map[i] > 0) + { + pinfo->rti_map[i] += rtoffset; + leafpart_rtis = bms_add_member(leafpart_rtis, + pinfo->rti_map[i]); + } + } + } + } + + if (pruneinfo->needs_init_pruning) + { + glob->containsInitialPruning = true; + + /* + * Delete the leaf partition RTIs from the global set of relations + * to be locked before executing the plan. AcquireExecutorLocks() + * will find the ones to add to the set after performing initial + * pruning. + */ + glob->minLockRelids = bms_del_members(glob->minLockRelids, + leafpart_rtis); + } + + glob->partPruneInfos = lappend(glob->partPruneInfos, pruneinfo); + } + return result; } @@ -1610,21 +1670,12 @@ set_append_references(PlannerInfo *root, aplan->apprelids = offset_relid_set(aplan->apprelids, rtoffset); - if (aplan->part_prune_info) - { - foreach(l, aplan->part_prune_info->prune_infos) - { - List *prune_infos = lfirst(l); - ListCell *l2; - - foreach(l2, prune_infos) - { - PartitionedRelPruneInfo *pinfo = lfirst(l2); - - pinfo->rtindex += rtoffset; - } - } - } + /* + * PartitionPruneInfos will be added to a list in PlannerGlobal, so update + * the index. + */ + if (aplan->part_prune_index >= 0) + aplan->part_prune_index += list_length(root->glob->partPruneInfos); /* We don't need to recurse to lefttree or righttree ... */ Assert(aplan->plan.lefttree == NULL); @@ -1682,21 +1733,12 @@ set_mergeappend_references(PlannerInfo *root, mplan->apprelids = offset_relid_set(mplan->apprelids, rtoffset); - if (mplan->part_prune_info) - { - foreach(l, mplan->part_prune_info->prune_infos) - { - List *prune_infos = lfirst(l); - ListCell *l2; - - foreach(l2, prune_infos) - { - PartitionedRelPruneInfo *pinfo = lfirst(l2); - - pinfo->rtindex += rtoffset; - } - } - } + /* + * PartitionPruneInfos will be added to a list in PlannerGlobal, so update + * the index. + */ + if (mplan->part_prune_index >= 0) + mplan->part_prune_index += list_length(root->glob->partPruneInfos); /* We don't need to recurse to lefttree or righttree ... */ Assert(mplan->plan.lefttree == NULL); diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index 9d3c05aed3..0eaff15ed0 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -144,7 +144,9 @@ static List *make_partitionedrel_pruneinfo(PlannerInfo *root, List *prunequal, Bitmapset *partrelids, int *relid_subplan_map, - Bitmapset **matchedsubplans); + Bitmapset **matchedsubplans, + bool *needs_init_pruning, + bool *needs_exec_pruning); static void gen_partprune_steps(RelOptInfo *rel, List *clauses, PartClauseTarget target, GeneratePruningStepsContext *context); @@ -230,6 +232,8 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, int *relid_subplan_map; ListCell *lc; int i; + bool needs_init_pruning = false; + bool needs_exec_pruning = false; /* * Scan the subpaths to see which ones are scans of partition child @@ -309,12 +313,16 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, Bitmapset *partrelids = (Bitmapset *) lfirst(lc); List *pinfolist; Bitmapset *matchedsubplans = NULL; + bool partrel_needs_init_pruning; + bool partrel_needs_exec_pruning; pinfolist = make_partitionedrel_pruneinfo(root, parentrel, prunequal, partrelids, relid_subplan_map, - &matchedsubplans); + &matchedsubplans, + &partrel_needs_init_pruning, + &partrel_needs_exec_pruning); /* When pruning is possible, record the matched subplans */ if (pinfolist != NIL) @@ -323,6 +331,10 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, allmatchedsubplans = bms_join(matchedsubplans, allmatchedsubplans); } + if (!needs_init_pruning) + needs_init_pruning = partrel_needs_init_pruning; + if (!needs_exec_pruning) + needs_exec_pruning = partrel_needs_exec_pruning; } pfree(relid_subplan_map); @@ -337,6 +349,8 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, /* Else build the result data structure */ pruneinfo = makeNode(PartitionPruneInfo); pruneinfo->prune_infos = prunerelinfos; + pruneinfo->needs_init_pruning = needs_init_pruning; + pruneinfo->needs_exec_pruning = needs_exec_pruning; /* * Some subplans may not belong to any of the identified partitioned rels. @@ -435,13 +449,18 @@ add_part_relids(List *allpartrelids, Bitmapset *partrelids) * If we cannot find any useful run-time pruning steps, return NIL. * However, on success, each rel identified in partrelids will have * an element in the result list, even if some of them are useless. + * *needs_init_pruning and *needs_exec_pruning are set to indicate that the + * returned PartitionedRelPruneInfos contains pruning steps that can be + * performed before and after execution begins, respectively. */ static List * make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, List *prunequal, Bitmapset *partrelids, int *relid_subplan_map, - Bitmapset **matchedsubplans) + Bitmapset **matchedsubplans, + bool *needs_init_pruning, + bool *needs_exec_pruning) { RelOptInfo *targetpart = NULL; List *pinfolist = NIL; @@ -452,6 +471,10 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, int rti; int i; + /* Will find out below. */ + *needs_init_pruning = false; + *needs_exec_pruning = false; + /* * Examine each partitioned rel, constructing a temporary array to map * from planner relids to index of the partitioned rel, and building a @@ -539,6 +562,9 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, * executor per-scan pruning steps. This first pass creates startup * pruning steps and detects whether there's any possibly-useful quals * that would require per-scan pruning. + * + * In the first pass, we note whether the 2nd pass is necessary by + * by noting the presence of EXEC parameters. */ gen_partprune_steps(subpart, partprunequal, PARTTARGET_INITIAL, &context); @@ -613,6 +639,11 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, pinfo->execparamids = execparamids; /* Remaining fields will be filled in the next loop */ + if (!*needs_init_pruning) + *needs_init_pruning = (initial_pruning_steps != NIL); + if (!*needs_exec_pruning) + *needs_exec_pruning = (exec_pruning_steps != NIL); + pinfolist = lappend(pinfolist, pinfo); } @@ -640,6 +671,7 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, int *subplan_map; int *subpart_map; Oid *relid_map; + Index *rti_map; /* * Construct the subplan and subpart maps for this partitioning level. @@ -652,6 +684,7 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, subpart_map = (int *) palloc(nparts * sizeof(int)); memset(subpart_map, -1, nparts * sizeof(int)); relid_map = (Oid *) palloc0(nparts * sizeof(Oid)); + rti_map = (Index *) palloc0(nparts * sizeof(Index)); present_parts = NULL; i = -1; @@ -666,6 +699,7 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, subplan_map[i] = subplanidx = relid_subplan_map[partrel->relid] - 1; subpart_map[i] = subpartidx = relid_subpart_map[partrel->relid] - 1; relid_map[i] = planner_rt_fetch(partrel->relid, root)->relid; + rti_map[i] = partrel->relid; if (subplanidx >= 0) { present_parts = bms_add_member(present_parts, i); @@ -690,6 +724,7 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, pinfo->subplan_map = subplan_map; pinfo->subpart_map = subpart_map; pinfo->relid_map = relid_map; + pinfo->rti_map = rti_map; } pfree(relid_subpart_map); diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 5aa5a350f3..163ba956c4 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -35,7 +35,7 @@ Portal ActivePortal = NULL; -static void ProcessQuery(PlannedStmt *plan, +static void ProcessQuery(PlannedStmt *plan, PartitionPruneResult *part_prune_result, const char *sourceText, ParamListInfo params, QueryEnvironment *queryEnv, @@ -65,6 +65,7 @@ static void DoPortalRewind(Portal portal); */ QueryDesc * CreateQueryDesc(PlannedStmt *plannedstmt, + PartitionPruneResult *part_prune_result, const char *sourceText, Snapshot snapshot, Snapshot crosscheck_snapshot, @@ -77,6 +78,8 @@ CreateQueryDesc(PlannedStmt *plannedstmt, qd->operation = plannedstmt->commandType; /* operation */ qd->plannedstmt = plannedstmt; /* plan */ + qd->part_prune_result = part_prune_result; /* ExecutorDoInitialPruning() + * output for plan */ qd->sourceText = sourceText; /* query text */ qd->snapshot = RegisterSnapshot(snapshot); /* snapshot */ /* RI check snapshot */ @@ -122,6 +125,7 @@ FreeQueryDesc(QueryDesc *qdesc) * PORTAL_ONE_RETURNING, or PORTAL_ONE_MOD_WITH portal * * plan: the plan tree for the query + * part_prune_result: ExecutorDoInitialPruning() output for the plan tree * sourceText: the source text of the query * params: any parameters needed * dest: where to send results @@ -134,6 +138,7 @@ FreeQueryDesc(QueryDesc *qdesc) */ static void ProcessQuery(PlannedStmt *plan, + PartitionPruneResult *part_prune_result, const char *sourceText, ParamListInfo params, QueryEnvironment *queryEnv, @@ -145,7 +150,7 @@ ProcessQuery(PlannedStmt *plan, /* * Create the QueryDesc object */ - queryDesc = CreateQueryDesc(plan, sourceText, + queryDesc = CreateQueryDesc(plan, part_prune_result, sourceText, GetActiveSnapshot(), InvalidSnapshot, dest, params, queryEnv, 0); @@ -491,8 +496,14 @@ PortalStart(Portal portal, ParamListInfo params, /* * Create QueryDesc in portal's context; for the moment, set * the destination to DestNone. + * + * There is no PartitionPruneResult unless the PlannedStmt is + * from a CachedPlan. */ queryDesc = CreateQueryDesc(linitial_node(PlannedStmt, portal->stmts), + portal->cplan == NULL ? NULL : + linitial_node(PartitionPruneResult, + portal->cplan->part_prune_result_list), portal->sourceText, GetActiveSnapshot(), InvalidSnapshot, @@ -1194,6 +1205,9 @@ PortalRunMulti(Portal portal, { bool active_snapshot_set = false; ListCell *stmtlist_item; + int i; + List *part_prune_results = portal->cplan == NULL ? NIL: + portal->cplan->part_prune_result_list; /* * If the destination is DestRemoteExecute, change to DestNone. The @@ -1214,9 +1228,15 @@ PortalRunMulti(Portal portal, * Loop to handle the individual queries generated from a single parsetree * by analysis and rewrite. */ + i = 0; foreach(stmtlist_item, portal->stmts) { PlannedStmt *pstmt = lfirst_node(PlannedStmt, stmtlist_item); + PartitionPruneResult *part_prune_result = part_prune_results ? + list_nth(part_prune_results, i) : + NULL; + + i++; /* * If we got a cancel signal in prior command, quit @@ -1274,7 +1294,7 @@ PortalRunMulti(Portal portal, if (pstmt->canSetTag) { /* statement can set tag string */ - ProcessQuery(pstmt, + ProcessQuery(pstmt, part_prune_result, portal->sourceText, portal->portalParams, portal->queryEnv, @@ -1283,7 +1303,7 @@ PortalRunMulti(Portal portal, else { /* stmt added by rewrite cannot set tag */ - ProcessQuery(pstmt, + ProcessQuery(pstmt, part_prune_result, portal->sourceText, portal->portalParams, portal->queryEnv, diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 4cf6db504f..216401bcfb 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -99,14 +99,16 @@ static dlist_head cached_expression_list = DLIST_STATIC_INIT(cached_expression_l static void ReleaseGenericPlan(CachedPlanSource *plansource); static List *RevalidateCachedQuery(CachedPlanSource *plansource, QueryEnvironment *queryEnv); -static bool CheckCachedPlan(CachedPlanSource *plansource); +static bool CheckCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams); static CachedPlan *BuildCachedPlan(CachedPlanSource *plansource, List *qlist, ParamListInfo boundParams, QueryEnvironment *queryEnv); +static void CachedPlanSavePartitionPruneResults(CachedPlan *plan, List *part_prune_result_list); static bool choose_custom_plan(CachedPlanSource *plansource, ParamListInfo boundParams); static double cached_plan_cost(CachedPlan *plan, bool include_planner); static Query *QueryListGetPrimaryStmt(List *stmts); -static void AcquireExecutorLocks(List *stmt_list, bool acquire); +static List *AcquireExecutorLocks(List *stmt_list, ParamListInfo boundParams); +static void ReleaseExecutorLocks(List *stmt_list, List *part_prune_result_list); static void AcquirePlannerLocks(List *stmt_list, bool acquire); static void ScanQueryForLocks(Query *parsetree, bool acquire); static bool ScanQueryWalker(Node *node, bool *acquire); @@ -790,9 +792,21 @@ RevalidateCachedQuery(CachedPlanSource *plansource, * * On a "true" return, we have acquired the locks needed to run the plan. * (We must do this for the "true" result to be race-condition-free.) + * + * If the CachedPlan is valid, this may in some cases call + * ExecutorDoInitialPruning() on each PlannedStmt contained in it to determine + * the set of relations to be locked by AcquireExecutorLocks(), instead of just + * scanning its range table, which is done to prune away any nodes in the tree + * that need not be executed based on the result of initial partition pruning. + * The result of pruning which consists of List of Lists of bitmapsets of child + * subplan indexes, allocated in a child context of the context containing the + * plan itself, are added into plan->part_prune_results. The previous contents + * of the list from the last invocation on the same CachedPlan are deleted, + * because they would no longer be valid given the fresh set of parameter + * values which may be used as pruning parameters. */ static bool -CheckCachedPlan(CachedPlanSource *plansource) +CheckCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams) { CachedPlan *plan = plansource->gplan; @@ -820,13 +834,24 @@ CheckCachedPlan(CachedPlanSource *plansource) */ if (plan->is_valid) { + List *part_prune_result_list; + /* * Plan must have positive refcount because it is referenced by * plansource; so no need to fear it disappears under us here. */ Assert(plan->refcount > 0); - AcquireExecutorLocks(plan->stmt_list, true); + /* + * Lock relations scanned by the plan. If ExecutorDoInitialPruning() + * asked to omit some relations because the plan nodes that scan them + * were found to be pruned, the executor will be informed of the + * omission of the plan nodes themselves via part_prune_result_list + * that is passed to it along with the list of PlannedStmts, so that + * it doesn't accidentally try to execute those nodes. + */ + part_prune_result_list = AcquireExecutorLocks(plan->stmt_list, + boundParams); /* * If plan was transient, check to see if TransactionXmin has @@ -844,11 +869,14 @@ CheckCachedPlan(CachedPlanSource *plansource) if (plan->is_valid) { /* Successfully revalidated and locked the query. */ + + /* Remember pruning results in the CachedPlan. */ + CachedPlanSavePartitionPruneResults(plan, part_prune_result_list); return true; } /* Oops, the race case happened. Release useless locks. */ - AcquireExecutorLocks(plan->stmt_list, false); + ReleaseExecutorLocks(plan->stmt_list, part_prune_result_list); } /* @@ -880,10 +908,12 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist, ParamListInfo boundParams, QueryEnvironment *queryEnv) { CachedPlan *plan; - List *plist; + List *plist, + *dummy_part_prune_result_list; bool snapshot_set; bool is_transient; - MemoryContext plan_context; + MemoryContext plan_context, + part_prune_result_context; MemoryContext oldcxt = CurrentMemoryContext; ListCell *lc; @@ -962,6 +992,16 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist, else plan_context = CurrentMemoryContext; + /* + * Also create a dedicated context for part_prune_result_list, making it + * a child of plan_context. + */ + part_prune_result_context = AllocSetContextCreate(CurrentMemoryContext, + "CachedPlan part_prune_results list", + ALLOCSET_START_SMALL_SIZES); + MemoryContextSetParent(part_prune_result_context, plan_context); + MemoryContextSetIdentifier(part_prune_result_context, plan_context->ident); + /* * Create and fill the CachedPlan struct within the new context. */ @@ -977,10 +1017,20 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist, plan->planRoleId = GetUserId(); plan->dependsOnRole = plansource->dependsOnRLS; is_transient = false; + dummy_part_prune_result_list = NIL; foreach(lc, plist) { PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc); + /* + * Real values will be added during subsequent CheckCachedPlan() calls + * on this plan, but must add "something" for now, becasue users of + * CachedPlan expect stmt_list and part_prune_result_list to have + * the same number of elements. + */ + dummy_part_prune_result_list = lappend(dummy_part_prune_result_list, + NULL); + if (plannedstmt->commandType == CMD_UTILITY) continue; /* Ignore utility statements */ @@ -1002,6 +1052,13 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist, plan->is_saved = false; plan->is_valid = true; + /* + * While still dummy, save the list so that it is discarded on next use of + * the CachedPlan. + */ + plan->part_prune_result_context = part_prune_result_context; + CachedPlanSavePartitionPruneResults(plan, dummy_part_prune_result_list); + /* assign generation number to new plan */ plan->generation = ++(plansource->generation); @@ -1160,7 +1217,7 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, if (!customplan) { - if (CheckCachedPlan(plansource)) + if (CheckCachedPlan(plansource, boundParams)) { /* We want a generic plan, and we already have a valid one */ plan = plansource->gplan; @@ -1586,6 +1643,36 @@ CopyCachedPlan(CachedPlanSource *plansource) return newsource; } +/* + * CachedPlanSavePartitionPruneResults + * Save the list containing PartitionPruneResult nodes into the given + * CachedPlan + * + * They must be hanged on to for the duration of a given execution of the + * CachedPlan. The provided list is copied into a dedicated context that is + * a child of plan->context after dropping the existing contents of the list, + * because any PartitionPruneResult contained therein would no longer be + * valid for the current execution. + */ +static void +CachedPlanSavePartitionPruneResults(CachedPlan *plan, + List *part_prune_result_list) +{ + MemoryContext part_prune_result_context = plan->part_prune_result_context, + oldcontext = CurrentMemoryContext; + List *part_prune_result_list_copy; + + /* First clear the existing contents of the list. */ + Assert(MemoryContextIsValid(part_prune_result_context)); + MemoryContextReset(part_prune_result_context); + + MemoryContextSwitchTo(part_prune_result_context); + part_prune_result_list_copy = copyObject(part_prune_result_list); + MemoryContextSwitchTo(oldcontext); + + plan->part_prune_result_list = part_prune_result_list_copy; +} + /* * CachedPlanIsValid: test whether the rewritten querytree within a * CachedPlanSource is currently valid (that is, not marked as being in need @@ -1737,17 +1824,21 @@ QueryListGetPrimaryStmt(List *stmts) /* * AcquireExecutorLocks: acquire locks needed for execution of a cached plan; - * or release them if acquire is false. + * + * Returns a list of PartitionPruneResult nodes containing one element for each + * PlannedStmt in stmt_list or NULL if the latter is utility statement or its + * containsInitialPruning is false. */ -static void -AcquireExecutorLocks(List *stmt_list, bool acquire) +static List * +AcquireExecutorLocks(List *stmt_list, ParamListInfo boundParams) { ListCell *lc1; + List *part_prune_result_list = NIL; foreach(lc1, stmt_list) { PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc1); - ListCell *lc2; + PartitionPruneResult *part_prune_result = NULL; if (plannedstmt->commandType == CMD_UTILITY) { @@ -1761,27 +1852,122 @@ AcquireExecutorLocks(List *stmt_list, bool acquire) Query *query = UtilityContainsQuery(plannedstmt->utilityStmt); if (query) - ScanQueryForLocks(query, acquire); - continue; + ScanQueryForLocks(query, true); } - - foreach(lc2, plannedstmt->rtable) + else { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc2); - - if (rte->rtekind != RTE_RELATION) - continue; + Bitmapset *lockRelids; + int rti; /* - * Acquire the appropriate type of lock on each relation OID. Note - * that we don't actually try to open the rel, and hence will not - * fail if it's been dropped entirely --- we'll just transiently - * acquire a non-conflicting lock. + * Figure out the set of relations that would need to be locked + * before executing the plan. */ - if (acquire) + if (plannedstmt->containsInitialPruning) + { + /* + * Obtain the set of partitions to be locked from the + * PartitionPruneInfos by considering the result of performing + * initial partition pruning. + */ + PartitionPruneResult *part_prune_result = + ExecutorDoInitialPruning(plannedstmt, boundParams); + + lockRelids = bms_union(plannedstmt->minLockRelids, + part_prune_result->scan_leafpart_rtis); + } + else + lockRelids = plannedstmt->minLockRelids; + + rti = -1; + while ((rti = bms_next_member(lockRelids, rti)) > 0) + { + RangeTblEntry *rte = rt_fetch(rti, plannedstmt->rtable); + + if (rte->rtekind != RTE_RELATION) + continue; + + /* + * Acquire the appropriate type of lock on each relation OID. + * Note that we don't actually try to open the rel, and hence + * will not fail if it's been dropped entirely --- we'll just + * transiently acquire a non-conflicting lock. + */ LockRelationOid(rte->relid, rte->rellockmode); + } + } + + /* + * Remember PartitionPruneResult for later adding to the QueryDesc that + * will be passed to the executor when executing this plan. May be + * NULL, but must keep the list the same length as stmt_list. + */ + part_prune_result_list = lappend(part_prune_result_list, + part_prune_result); + } + + return part_prune_result_list; +} + +/* + * ReleaseExecutorLocks + * Release locks that would've been acquired by an earlier call to + * AcquireExecutorLocks() + */ +static void +ReleaseExecutorLocks(List *stmt_list, List *part_prune_result_list) +{ + ListCell *lc1, + *lc2; + + forboth(lc1, stmt_list, lc2, part_prune_result_list) + { + PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc1); + PartitionPruneResult *part_prune_result = lfirst_node(PartitionPruneResult, lc2); + + if (plannedstmt->commandType == CMD_UTILITY) + { + /* + * Ignore utility statements, except those (such as EXPLAIN) that + * contain a parsed-but-not-planned query. Note: it's okay to use + * ScanQueryForLocks, even though the query hasn't been through + * rule rewriting, because rewriting doesn't change the query + * representation. + */ + Query *query = UtilityContainsQuery(plannedstmt->utilityStmt); + + if (query) + ScanQueryForLocks(query, false); + } + else + { + Bitmapset *lockRelids; + int rti; + + if (part_prune_result == NULL) + { + Assert(!plannedstmt->containsInitialPruning); + lockRelids = plannedstmt->minLockRelids; + } else + { + Assert(plannedstmt->containsInitialPruning); + lockRelids = bms_union(plannedstmt->minLockRelids, + part_prune_result->scan_leafpart_rtis); + } + + rti = -1; + while ((rti = bms_next_member(lockRelids, rti)) >= 0) + { + RangeTblEntry *rte = rt_fetch(rti, plannedstmt->rtable); + + if (rte->rtekind != RTE_RELATION) + continue; + + /* See the comment in AcquireExecutorLocks(). */ UnlockRelationOid(rte->relid, rte->rellockmode); + } + } } } diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h index 666977fb1f..34975c69ee 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -87,7 +87,8 @@ extern void ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, const char *queryString, ParamListInfo params, QueryEnvironment *queryEnv); -extern void ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, +extern void ExplainOnePlan(PlannedStmt *plannedstmt, PartitionPruneResult *part_prune_resul, + IntoClause *into, ExplainState *es, const char *queryString, ParamListInfo params, QueryEnvironment *queryEnv, const instr_time *planduration, diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 708435e952..bd8776402e 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -45,6 +45,7 @@ extern void ExecCleanupTupleRouting(ModifyTableState *mtstate, * nparts Length of subplan_map[] and subpart_map[]. * subplan_map Subplan index by partition index, or -1. * subpart_map Subpart index by partition index, or -1. + * rti_map Range table index by partition index, or 0. * present_parts A Bitmapset of the partition indexes that we * have subplans or subparts for. * initial_pruning_steps List of PartitionPruneSteps used to @@ -61,6 +62,7 @@ typedef struct PartitionedRelPruningData int nparts; int *subplan_map; int *subpart_map; + Index *rti_map; Bitmapset *present_parts; List *initial_pruning_steps; List *exec_pruning_steps; @@ -123,9 +125,13 @@ typedef struct PartitionPruneState extern PartitionPruneState *ExecInitPartitionPruning(PlanState *planstate, int n_total_subplans, - PartitionPruneInfo *pruneinfo, + int part_prune_index, Bitmapset **initially_valid_subplans); extern Bitmapset *ExecFindMatchingSubPlans(PartitionPruneState *prunestate, - bool initial_prune); - + bool initial_prune, + Bitmapset **scan_leafpart_rtis); +extern Bitmapset *ExecPartitionDoInitialPruning(PlannedStmt *plannedstmt, + ParamListInfo params, + PartitionPruneInfo *pruneinfo, + Bitmapset **scan_leafpart_rtis); #endif /* EXECPARTITION_H */ diff --git a/src/include/executor/execdesc.h b/src/include/executor/execdesc.h index e79e2c001f..60d5644908 100644 --- a/src/include/executor/execdesc.h +++ b/src/include/executor/execdesc.h @@ -35,6 +35,8 @@ typedef struct QueryDesc /* These fields are provided by CreateQueryDesc */ CmdType operation; /* CMD_SELECT, CMD_UPDATE, etc. */ PlannedStmt *plannedstmt; /* planner's output (could be utility, too) */ + PartitionPruneResult *part_prune_result; /* ExecutorDoInitialPruning()'s + * output for plannedstmt */ const char *sourceText; /* source text of the query */ Snapshot snapshot; /* snapshot to use for query */ Snapshot crosscheck_snapshot; /* crosscheck for RI update/delete */ @@ -57,6 +59,7 @@ typedef struct QueryDesc /* in pquery.c */ extern QueryDesc *CreateQueryDesc(PlannedStmt *plannedstmt, + PartitionPruneResult *part_prune_result, const char *sourceText, Snapshot snapshot, Snapshot crosscheck_snapshot, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 873772f188..57dc0e8077 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -185,6 +185,8 @@ ExecGetJunkAttribute(TupleTableSlot *slot, AttrNumber attno, bool *isNull) /* * prototypes from functions in execMain.c */ +extern PartitionPruneResult *ExecutorDoInitialPruning(PlannedStmt *plannedstmt, + ParamListInfo params); extern void ExecutorStart(QueryDesc *queryDesc, int eflags); extern void standard_ExecutorStart(QueryDesc *queryDesc, int eflags); extern void ExecutorRun(QueryDesc *queryDesc, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index cbbcff81d2..b5a7fd7e16 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -596,6 +596,8 @@ typedef struct EState struct ExecRowMark **es_rowmarks; /* Array of per-range-table-entry * ExecRowMarks, or NULL if none */ PlannedStmt *es_plannedstmt; /* link to top of plan tree */ + List *es_part_prune_infos; /* PlannedStmt.partPruneInfos */ + struct PartitionPruneResult *es_part_prune_result; /* QueryDesc.part_prune_result */ const char *es_sourceText; /* Source text from QueryDesc */ JunkFilter *es_junkFilter; /* top-level junk filter, if any */ @@ -984,6 +986,19 @@ typedef struct DomainConstraintState */ typedef TupleTableSlot *(*ExecProcNodeMtd) (struct PlanState *pstate); +/*---------------- + * PartitionPruneResult + * + * Result of ExecutorDoInitialPruning() invocation on a given plan. + */ +typedef struct PartitionPruneResult +{ + NodeTag type; + + Bitmapset *scan_leafpart_rtis; + List *valid_subplan_offs_list; +} PartitionPruneResult; + /* ---------------- * PlanState node * diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 300824258e..de312b9215 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -97,6 +97,9 @@ typedef enum NodeTag T_PartitionPruneStepCombine, T_PlanInvalItem, + /* TAGS FOR EXECUTOR PREP NODES (execnodes.h) */ + T_PartitionPruneResult, + /* * TAGS FOR PLAN STATE NODES (execnodes.h) * @@ -673,6 +676,7 @@ extern struct Bitmapset *readBitmapset(void); extern uintptr_t readDatum(bool typbyval); extern bool *readBoolCols(int numCols); extern int *readIntCols(int numCols); +extern Index *readIndexCols(int numCols); extern Oid *readOidCols(int numCols); extern int16 *readAttrNumberCols(int numCols); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 6cbcb67bdf..f2039071c9 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -107,6 +107,18 @@ typedef struct PlannerGlobal List *appendRelations; /* "flat" list of AppendRelInfos */ + List *partPruneInfos; /* List of PartitionPruneInfo contained in + * the plan */ + + bool containsInitialPruning; /* Do any of those PartitionPruneInfos + * have initial (pre-exec) pruning + * steps in them? */ + + Bitmapset *minLockRelids; /* RT indexes of RTE_RELATION entries that + * must always be locked to execute the plan; + * those scanned by initial-prunable plan + * nodes are not included */ + List *relationOids; /* OIDs of relations the plan depends on */ List *invalItems; /* other dependencies, as PlanInvalItems */ @@ -377,6 +389,9 @@ struct PlannerInfo /* Does this query modify any partition key columns? */ bool partColsUpdated; + + /* PartitionPruneInfos added in this query's plan. */ + List *partPruneInfos; }; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 10dd35f011..ecdc950fde 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -64,8 +64,19 @@ typedef struct PlannedStmt struct Plan *planTree; /* tree of Plan nodes */ + List *partPruneInfos; /* List of PartitionPruneInfo contained in + * the plan */ + + bool containsInitialPruning; /* Do any of those PartitionPruneInfos + * have initial (pre-exec) pruning + * steps in them? */ + List *rtable; /* list of RangeTblEntry nodes */ + Bitmapset *minLockRelids; /* RT indexes of RTE_RELATION entries that + * must be locked, except those scanned by + * initial-prunable plan nodes */ + /* rtable indexes of target relations for INSERT/UPDATE/DELETE */ List *resultRelations; /* integer list of RT indexes, or NIL */ @@ -262,8 +273,12 @@ typedef struct Append */ int first_partial_plan; - /* Info for run-time subplan pruning; NULL if we're not doing that */ - struct PartitionPruneInfo *part_prune_info; + /* + * Index of this plan's PartitionPruneInfo in PlannedStmt.part_prune_infos + * to be used for run-time subplan pruning; -1 if run-time pruning is + * not needed. + */ + int part_prune_index; } Append; /* ---------------- @@ -282,8 +297,13 @@ typedef struct MergeAppend Oid *sortOperators; /* OIDs of operators to sort them by */ Oid *collations; /* OIDs of collations */ bool *nullsFirst; /* NULLS FIRST/LAST directions */ - /* Info for run-time subplan pruning; NULL if we're not doing that */ - struct PartitionPruneInfo *part_prune_info; + + /* + * Index of this plan's PartitionPruneInfo in PlannedStmt.part_prune_infos + * to be used for run-time subplan pruning; -1 if run-time pruning is + * not needed. + */ + int part_prune_index; } MergeAppend; /* ---------------- @@ -1187,6 +1207,13 @@ typedef struct PlanRowMark * prune_infos List of Lists containing PartitionedRelPruneInfo nodes, * one sublist per run-time-prunable partition hierarchy * appearing in the parent plan node's subplans. + * + * needs_init_pruning Does any of the PartitionedRelPruneInfos in + * prune_infos have its initial_pruning_steps set? + * + * needs_exec_pruning Does any of the PartitionedRelPruneInfos in + * prune_infos have its exec_pruning_steps set? + * * other_subplans Indexes of any subplans that are not accounted for * by any of the PartitionedRelPruneInfo nodes in * "prune_infos". These subplans must not be pruned. @@ -1195,6 +1222,9 @@ typedef struct PartitionPruneInfo { NodeTag type; List *prune_infos; + Bitmapset *leafpart_rtis; + bool needs_init_pruning; + bool needs_exec_pruning; Bitmapset *other_subplans; } PartitionPruneInfo; @@ -1225,6 +1255,7 @@ typedef struct PartitionedRelPruneInfo int *subplan_map; /* subplan index by partition index, or -1 */ int *subpart_map; /* subpart index by partition index, or -1 */ Oid *relid_map; /* relation OID by partition index, or 0 */ + Index *rti_map; /* Range table index by partition index, 0. */ /* * initial_pruning_steps shows how to prune during executor startup (i.e., diff --git a/src/include/utils/plancache.h b/src/include/utils/plancache.h index 95b99e3d25..fd7f129aea 100644 --- a/src/include/utils/plancache.h +++ b/src/include/utils/plancache.h @@ -148,6 +148,9 @@ typedef struct CachedPlan { int magic; /* should equal CACHEDPLAN_MAGIC */ List *stmt_list; /* list of PlannedStmts */ + List *part_prune_result_list; /* list of PartitionPruneResult with + * one element for each of stmt_list; + * NIL if not a generic plan */ bool is_oneshot; /* is it a "oneshot" plan? */ bool is_saved; /* is CachedPlan in a long-lived context? */ bool is_valid; /* is the stmt_list currently valid? */ @@ -158,6 +161,10 @@ typedef struct CachedPlan int generation; /* parent's generation number for this plan */ int refcount; /* count of live references to this struct */ MemoryContext context; /* context containing this CachedPlan */ + MemoryContext part_prune_result_context; /* context containing + * part_prune_result_list, + * a child of the above + * context */ } CachedPlan; /* -- 2.24.1