From b6413e324c5be9273a3c33aa026c06cdfb710da7 Mon Sep 17 00:00:00 2001 From: Amit Langote Date: Tue, 4 Jul 2023 22:36:31 +0900 Subject: [PATCH v42 1/4] Add field to store parent relids to Append/MergeAppend There's no way currently in the executor to tell if the child subplans of Append/MergeAppend are scanning partitions, and if they indeed do, what the RT indexes of their parent/ancestor tables are. Executor doesn't need to see their RT indexes except for run-time pruning, in which case they can can be found in the PartitionPruneInfo, but a future commit will create a need for them to be available at all times for the purpose of locking those parent/ancestor tables when executing a cached plan. The code to look up partitioned parent relids for a given list of partition scan subpaths of an Append/MergeAppend is already present in make_partition_pruneinfo() but it's local to partprune.c. This commit refactors that code into its own function called add_append_subpath_partrelids() defined in appendinfo.c and generalizes it to consider child join and aggregate paths. To facilitate looking up of parent rels of child grouping rels in add_append_subpath_partrelids(), parent links are now also set in the RelOptInfos of child grouping rels too, like they are in those of child base and join rels. Discussion: https://postgr.es/m/CA+HiwqFGkMSge6TgC9KQzde0ohpAycLQuV7ooitEEpbKB0O_mg@mail.gmail.com --- src/backend/optimizer/plan/createplan.c | 41 ++++++-- src/backend/optimizer/plan/planner.c | 3 + src/backend/optimizer/plan/setrefs.c | 4 + src/backend/optimizer/util/appendinfo.c | 134 ++++++++++++++++++++++++ src/backend/partitioning/partprune.c | 124 +++------------------- src/include/nodes/plannodes.h | 14 +++ src/include/optimizer/appendinfo.h | 3 + src/include/partitioning/partprune.h | 3 +- 8 files changed, 203 insertions(+), 123 deletions(-) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index af48109058..8ac1d3909b 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -25,6 +25,7 @@ #include "nodes/extensible.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/appendinfo.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" @@ -1210,6 +1211,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) Oid *nodeCollations = NULL; bool *nodeNullsFirst = NULL; bool consider_async = false; + List *allpartrelids = NIL; /* * The subpaths list could be empty, if every child was proven empty by @@ -1351,15 +1353,23 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) ++nasyncplans; } + /* + * Find partitioned parent rel(s) of the subpath's rel(s). + */ + allpartrelids = add_append_subpath_partrelids(root, subpath, rel, + allpartrelids); + subplans = lappend(subplans, subplan); } + plan->allpartrelids = allpartrelids; + /* - * If any quals exist, they may be useful to perform further partition - * pruning during execution. Gather information needed by the executor to - * do partition pruning. + * If scanning partitions, check if there are quals that may be useful to + * perform further partition pruning during execution. Gather information + * needed by the executor to do partition pruning. */ - if (enable_partition_pruning) + if (enable_partition_pruning && allpartrelids != NIL) { List *prunequal; @@ -1380,7 +1390,8 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) partpruneinfo = make_partition_pruneinfo(root, rel, best_path->subpaths, - prunequal); + prunequal, + allpartrelids); } plan->appendplans = subplans; @@ -1426,6 +1437,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, ListCell *subpaths; RelOptInfo *rel = best_path->path.parent; PartitionPruneInfo *partpruneinfo = NULL; + List *allpartrelids = NIL; /* * We don't have the actual creation of the MergeAppend node split out @@ -1515,15 +1527,23 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, subplan = (Plan *) sort; } + /* + * Find partitioned parent rel(s) of the subpath's rel(s). + */ + allpartrelids = add_append_subpath_partrelids(root, subpath, rel, + allpartrelids); + subplans = lappend(subplans, subplan); } + node->allpartrelids = allpartrelids; + /* - * If any quals exist, they may be useful to perform further partition - * pruning during execution. Gather information needed by the executor to - * do partition pruning. + * If scanning partitions, check if there are quals that may be useful to + * perform further partition pruning during execution. Gather information + * needed by the executor to do partition pruning. */ - if (enable_partition_pruning) + if (enable_partition_pruning && allpartrelids != NIL) { List *prunequal; @@ -1535,7 +1555,8 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, if (prunequal != NIL) partpruneinfo = make_partition_pruneinfo(root, rel, best_path->subpaths, - prunequal); + prunequal, + allpartrelids); } node->mergeplans = subplans; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 44efb1f4eb..f97bc09113 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -7855,8 +7855,11 @@ create_partitionwise_grouping_paths(PlannerInfo *root, agg_costs, gd, &child_extra, &child_partially_grouped_rel); + /* Mark as child of grouped_rel. */ + child_grouped_rel->parent = grouped_rel; if (child_partially_grouped_rel) { + child_partially_grouped_rel->parent = grouped_rel; partially_grouped_live_children = lappend(partially_grouped_live_children, child_partially_grouped_rel); diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 97fa561e4e..854dd7c8af 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -1766,6 +1766,8 @@ set_append_references(PlannerInfo *root, set_dummy_tlist_references((Plan *) aplan, rtoffset); aplan->apprelids = offset_relid_set(aplan->apprelids, rtoffset); + foreach(l, aplan->allpartrelids) + lfirst(l) = offset_relid_set((Relids) lfirst(l), rtoffset); if (aplan->part_prune_info) { @@ -1842,6 +1844,8 @@ set_mergeappend_references(PlannerInfo *root, set_dummy_tlist_references((Plan *) mplan, rtoffset); mplan->apprelids = offset_relid_set(mplan->apprelids, rtoffset); + foreach(l, mplan->allpartrelids) + lfirst(l) = offset_relid_set((Relids) lfirst(l), rtoffset); if (mplan->part_prune_info) { diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c index f456b3b0a4..5bd8e82b9b 100644 --- a/src/backend/optimizer/util/appendinfo.c +++ b/src/backend/optimizer/util/appendinfo.c @@ -41,6 +41,7 @@ static void make_inh_translation_list(Relation oldrelation, AppendRelInfo *appinfo); static Node *adjust_appendrel_attrs_mutator(Node *node, adjust_appendrel_attrs_context *context); +static List *add_part_relids(List *allpartrelids, Bitmapset *partrelids); /* @@ -1035,3 +1036,136 @@ distribute_row_identity_vars(PlannerInfo *root) } } } + +/* + * add_append_subpath_partrelids + * Look up a child subpath's rel's partitioned parent relids up to + * parentrel and add the bitmapset containing those into + * 'allpartrelids' + */ +List * +add_append_subpath_partrelids(PlannerInfo *root, Path *subpath, + RelOptInfo *parentrel, + List *allpartrelids) +{ + RelOptInfo *prel = subpath->parent; + Relids partrelids = NULL; + + /* Nothing to do if there's no parent to begin with. */ + if (!IS_OTHER_REL(prel)) + return allpartrelids; + + /* + * Traverse up to the pathrel's topmost partitioned parent, collecting + * parent relids as we go; but stop if we reach parentrel. (Normally, a + * pathrel's topmost partitioned parent is either parentrel or a UNION ALL + * appendrel child of parentrel. But when handling partitionwise joins of + * multi-level partitioning trees, we can see an append path whose + * parentrel is an intermediate partitioned table.) + */ + do + { + Relids parent_relids = NULL; + + /* + * For simple child rels, we can simply set the parent_relids to + * prel->parent->relids. But for partitionwise join and aggregate + * child rels, while we can use prel->parent to move up the tree, + * parent_relids must be found the hard way through AppendInfoInfos, + * because 1) a joinrel's relids may point to RTE_JOIN entries, + * 2) topmost parent grouping rel's relids field is NULL. + */ + if (IS_SIMPLE_REL(prel)) + { + prel = prel->parent; + /* Stop once we reach the root partitioned rel. */ + if (!IS_PARTITIONED_REL(prel)) + break; + parent_relids = bms_add_members(parent_relids, prel->relids); + } + else + { + AppendRelInfo **appinfos; + int nappinfos, + i; + + appinfos = find_appinfos_by_relids(root, prel->relids, + &nappinfos); + for (i = 0; i < nappinfos; i++) + { + AppendRelInfo *appinfo = appinfos[i]; + + parent_relids = bms_add_member(parent_relids, + appinfo->parent_relid); + } + pfree(appinfos); + prel = prel->parent; + } + /* accept this level as an interesting parent */ + partrelids = bms_add_members(partrelids, parent_relids); + if (prel == parentrel) + break; /* don't traverse above parentrel */ + } while (IS_OTHER_REL(prel)); + + if (partrelids == NULL) + return allpartrelids; + + return add_part_relids(allpartrelids, partrelids); +} + +/* + * add_part_relids + * Add new info to a list of Bitmapsets of partitioned relids. + * + * Within 'allpartrelids', there is one Bitmapset for each topmost parent + * partitioned rel. Each Bitmapset contains the RT indexes of the topmost + * parent as well as its relevant non-leaf child partitions. Since (by + * construction of the rangetable list) parent partitions must have lower + * RT indexes than their children, we can distinguish the topmost parent + * as being the lowest set bit in the Bitmapset. + * + * 'partrelids' contains the RT indexes of a parent partitioned rel, and + * possibly some non-leaf children, that are newly identified as parents of + * some subpath rel passed to make_partition_pruneinfo(). These are added + * to an appropriate member of 'allpartrelids'. + * + * Note that the list contains only RT indexes of partitioned tables that + * are parents of some scan-level relation appearing in the 'subpaths' that + * make_partition_pruneinfo() is dealing with. Also, "topmost" parents are + * not allowed to be higher than the 'parentrel' associated with the append + * path. In this way, we avoid expending cycles on partitioned rels that + * can't contribute useful pruning information for the problem at hand. + * (It is possible for 'parentrel' to be a child partitioned table, and it + * is also possible for scan-level relations to be child partitioned tables + * rather than leaf partitions. Hence we must construct this relation set + * with reference to the particular append path we're dealing with, rather + * than looking at the full partitioning structure represented in the + * RelOptInfos.) + */ +static List * +add_part_relids(List *allpartrelids, Bitmapset *partrelids) +{ + Index targetpart; + ListCell *lc; + + /* We can easily get the lowest set bit this way: */ + targetpart = bms_next_member(partrelids, -1); + Assert(targetpart > 0); + + /* Look for a matching topmost parent */ + foreach(lc, allpartrelids) + { + Bitmapset *currpartrelids = (Bitmapset *) lfirst(lc); + Index currtarget = bms_next_member(currpartrelids, -1); + + if (targetpart == currtarget) + { + /* Found a match, so add any new RT indexes to this hierarchy */ + currpartrelids = bms_add_members(currpartrelids, partrelids); + lfirst(lc) = currpartrelids; + return allpartrelids; + } + } + /* No match, so add the new partition hierarchy to the list */ + return lappend(allpartrelids, partrelids); +} diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index 7179b22a05..213512a5f4 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -138,7 +138,6 @@ typedef struct PruneStepResult } PruneStepResult; -static List *add_part_relids(List *allpartrelids, Bitmapset *partrelids); static List *make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, List *prunequal, @@ -218,33 +217,32 @@ static void partkey_datum_from_expr(PartitionPruneContext *context, * of scan paths for its child rels. * 'prunequal' is a list of potential pruning quals (i.e., restriction * clauses that are applicable to the appendrel). + * 'allpartrelids' contains Bitmapsets of RT indexes of partitioned parents + * whose partitions' Paths are in 'subpaths'; there's one Bitmapset for every + * partition tree involved. */ PartitionPruneInfo * make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, List *subpaths, - List *prunequal) + List *prunequal, + List *allpartrelids) { PartitionPruneInfo *pruneinfo; Bitmapset *allmatchedsubplans = NULL; - List *allpartrelids; List *prunerelinfos; int *relid_subplan_map; ListCell *lc; int i; + Assert(list_length(allpartrelids) > 0); + /* - * Scan the subpaths to see which ones are scans of partition child - * relations, and identify their parent partitioned rels. (Note: we must - * restrict the parent partitioned rels to be parentrel or children of - * parentrel, otherwise we couldn't translate prunequal to match.) - * - * Also construct a temporary array to map from partition-child-relation - * relid to the index in 'subpaths' of the scan plan for that partition. + * Construct a temporary array to map from partition-child-relation relid + * to the index in 'subpaths' of the scan plan for that partition. * (Use of "subplan" rather than "subpath" is a bit of a misnomer, but * we'll let it stand.) For convenience, we use 1-based indexes here, so * that zero can represent an un-filled array entry. */ - allpartrelids = NIL; relid_subplan_map = palloc0(sizeof(int) * root->simple_rel_array_size); i = 1; @@ -253,50 +251,9 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, Path *path = (Path *) lfirst(lc); RelOptInfo *pathrel = path->parent; - /* We don't consider partitioned joins here */ - if (pathrel->reloptkind == RELOPT_OTHER_MEMBER_REL) - { - RelOptInfo *prel = pathrel; - Bitmapset *partrelids = NULL; - - /* - * Traverse up to the pathrel's topmost partitioned parent, - * collecting parent relids as we go; but stop if we reach - * parentrel. (Normally, a pathrel's topmost partitioned parent - * is either parentrel or a UNION ALL appendrel child of - * parentrel. But when handling partitionwise joins of - * multi-level partitioning trees, we can see an append path whose - * parentrel is an intermediate partitioned table.) - */ - do - { - AppendRelInfo *appinfo; - - Assert(prel->relid < root->simple_rel_array_size); - appinfo = root->append_rel_array[prel->relid]; - prel = find_base_rel(root, appinfo->parent_relid); - if (!IS_PARTITIONED_REL(prel)) - break; /* reached a non-partitioned parent */ - /* accept this level as an interesting parent */ - partrelids = bms_add_member(partrelids, prel->relid); - if (prel == parentrel) - break; /* don't traverse above parentrel */ - } while (prel->reloptkind == RELOPT_OTHER_MEMBER_REL); - - if (partrelids) - { - /* - * Found some relevant parent partitions, which may or may not - * overlap with partition trees we already found. Add new - * information to the allpartrelids list. - */ - allpartrelids = add_part_relids(allpartrelids, partrelids); - /* Also record the subplan in relid_subplan_map[] */ - /* No duplicates please */ - Assert(relid_subplan_map[pathrel->relid] == 0); - relid_subplan_map[pathrel->relid] = i; - } - } + /* No duplicates please */ + Assert(relid_subplan_map[pathrel->relid] == 0); + relid_subplan_map[pathrel->relid] = i; i++; } @@ -362,63 +319,6 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, return pruneinfo; } -/* - * add_part_relids - * Add new info to a list of Bitmapsets of partitioned relids. - * - * Within 'allpartrelids', there is one Bitmapset for each topmost parent - * partitioned rel. Each Bitmapset contains the RT indexes of the topmost - * parent as well as its relevant non-leaf child partitions. Since (by - * construction of the rangetable list) parent partitions must have lower - * RT indexes than their children, we can distinguish the topmost parent - * as being the lowest set bit in the Bitmapset. - * - * 'partrelids' contains the RT indexes of a parent partitioned rel, and - * possibly some non-leaf children, that are newly identified as parents of - * some subpath rel passed to make_partition_pruneinfo(). These are added - * to an appropriate member of 'allpartrelids'. - * - * Note that the list contains only RT indexes of partitioned tables that - * are parents of some scan-level relation appearing in the 'subpaths' that - * make_partition_pruneinfo() is dealing with. Also, "topmost" parents are - * not allowed to be higher than the 'parentrel' associated with the append - * path. In this way, we avoid expending cycles on partitioned rels that - * can't contribute useful pruning information for the problem at hand. - * (It is possible for 'parentrel' to be a child partitioned table, and it - * is also possible for scan-level relations to be child partitioned tables - * rather than leaf partitions. Hence we must construct this relation set - * with reference to the particular append path we're dealing with, rather - * than looking at the full partitioning structure represented in the - * RelOptInfos.) - */ -static List * -add_part_relids(List *allpartrelids, Bitmapset *partrelids) -{ - Index targetpart; - ListCell *lc; - - /* We can easily get the lowest set bit this way: */ - targetpart = bms_next_member(partrelids, -1); - Assert(targetpart > 0); - - /* Look for a matching topmost parent */ - foreach(lc, allpartrelids) - { - Bitmapset *currpartrelids = (Bitmapset *) lfirst(lc); - Index currtarget = bms_next_member(currpartrelids, -1); - - if (targetpart == currtarget) - { - /* Found a match, so add any new RT indexes to this hierarchy */ - currpartrelids = bms_add_members(currpartrelids, partrelids); - lfirst(lc) = currpartrelids; - return allpartrelids; - } - } - /* No match, so add the new partition hierarchy to the list */ - return lappend(allpartrelids, partrelids); -} - /* * make_partitionedrel_pruneinfo * Build a List of PartitionedRelPruneInfos, one for each interesting diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 1b787fe031..7a5f3ba625 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -267,6 +267,13 @@ typedef struct Append List *appendplans; int nasyncplans; /* # of asynchronous plans */ + /* + * RTIs of all partitioned tables whose children are scanned by + * appendplans. The list contains a bitmapset for every partition tree + * covered by this Append. + */ + List *allpartrelids; + /* * All 'appendplans' preceding this index are non-partial plans. All * 'appendplans' from this index onwards are partial plans. @@ -291,6 +298,13 @@ typedef struct MergeAppend List *mergeplans; + /* + * RTIs of all partitioned tables whose children are scanned by + * mergeplans. The list contains a bitmapset for every partition tree + * covered by this MergeAppend. + */ + List *allpartrelids; + /* these fields are just like the sort-key info in struct Sort: */ /* number of sort-key columns */ diff --git a/src/include/optimizer/appendinfo.h b/src/include/optimizer/appendinfo.h index a05f91f77d..1621a7319a 100644 --- a/src/include/optimizer/appendinfo.h +++ b/src/include/optimizer/appendinfo.h @@ -46,5 +46,8 @@ extern void add_row_identity_columns(PlannerInfo *root, Index rtindex, RangeTblEntry *target_rte, Relation target_relation); extern void distribute_row_identity_vars(PlannerInfo *root); +extern List *add_append_subpath_partrelids(PlannerInfo *root, Path *subpath, + RelOptInfo *parentrel, + List *allpartrelids); #endif /* APPENDINFO_H */ diff --git a/src/include/partitioning/partprune.h b/src/include/partitioning/partprune.h index 8636e04e37..caa774a111 100644 --- a/src/include/partitioning/partprune.h +++ b/src/include/partitioning/partprune.h @@ -73,7 +73,8 @@ typedef struct PartitionPruneContext extern PartitionPruneInfo *make_partition_pruneinfo(struct PlannerInfo *root, struct RelOptInfo *parentrel, List *subpaths, - List *prunequal); + List *prunequal, + List *allpartrelids); extern Bitmapset *prune_append_rel_partitions(struct RelOptInfo *rel); extern Bitmapset *get_matching_partitions(PartitionPruneContext *context, List *pruning_steps); -- 2.35.3