From 3b0f425cd220303c66548372960b011899cff6a2 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Wed, 28 Feb 2024 10:03:41 +0800 Subject: [PATCH v4 6/9] Build grouped relations out of base relations This commit builds grouped relations for each base relation if possible, and generates aggregation paths for the grouped base relations. --- src/backend/optimizer/path/allpaths.c | 91 +++++++++++++++++++++++ src/backend/optimizer/util/relnode.c | 101 ++++++++++++++++++++++++++ src/include/optimizer/pathnode.h | 4 + 3 files changed, 196 insertions(+) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 633b5b0af1..b21f21589a 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -93,6 +93,7 @@ join_search_hook_type join_search_hook = NULL; static void set_base_rel_consider_startup(PlannerInfo *root); static void set_base_rel_sizes(PlannerInfo *root); +static void setup_base_grouped_rels(PlannerInfo *root); static void set_base_rel_pathlists(PlannerInfo *root); static void set_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); @@ -117,6 +118,7 @@ static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); +static void set_grouped_rel_pathlist(PlannerInfo *root, RelOptInfo *rel); static void generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel, List *live_childrels, List *all_child_pathkeys); @@ -185,6 +187,11 @@ make_one_rel(PlannerInfo *root, List *joinlist) */ set_base_rel_sizes(root); + /* + * Build grouped base relations for each base rel if possible. + */ + setup_base_grouped_rels(root); + /* * We should now have size estimates for every actual table involved in * the query, and we also know which if any have been deleted from the @@ -326,6 +333,59 @@ set_base_rel_sizes(PlannerInfo *root) } } +/* + * setup_base_grouped_rels + * For each "plain" base relation build a grouped base relation if eager + * aggregation is possible and if this relation can produce grouped paths. + */ +static void +setup_base_grouped_rels(PlannerInfo *root) +{ + Index rti; + + /* + * If there are no aggregate expressions or grouping expressions, eager + * aggregation is not possible. + */ + if (root->agg_clause_list == NIL || + root->group_expr_list == NIL) + return; + + /* + * Eager aggregation only makes sense if there are multiple base rels in + * the query. + */ + if (bms_membership(root->all_baserels) != BMS_MULTIPLE) + return; + + for (rti = 1; rti < root->simple_rel_array_size; rti++) + { + RelOptInfo *rel = root->simple_rel_array[rti]; + RelOptInfo *rel_grouped; + RelAggInfo *agg_info; + + /* there may be empty slots corresponding to non-baserel RTEs */ + if (rel == NULL) + continue; + + Assert(rel->relid == rti); /* sanity check on array */ + + /* + * Ignore RTEs that are not simple rels. Note that we need to consider + * "other rels" here. + */ + if (!IS_SIMPLE_REL(rel)) + continue; + + rel_grouped = build_simple_grouped_rel(root, rel->relid, &agg_info); + if (rel_grouped) + { + /* Make the grouped relation available for joining. */ + add_grouped_rel(root, rel_grouped, agg_info); + } + } +} + /* * set_base_rel_pathlists * Finds all paths available for scanning each base-relation entry. @@ -562,6 +622,15 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, /* Now find the cheapest of the paths for this rel */ set_cheapest(rel); + /* + * If a grouped relation for this rel exists, build partial aggregation + * paths for it. + * + * Note that this can only happen after we've called set_cheapest() for + * this base rel, because we need its cheapest paths. + */ + set_grouped_rel_pathlist(root, rel); + #ifdef OPTIMIZER_DEBUG pprint(rel); #endif @@ -1289,6 +1358,28 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, add_paths_to_append_rel(root, rel, live_childrels); } +/* + * set_grouped_rel_pathlist + * If a grouped relation for the given 'rel' exists, build partial + * aggregation paths for it. + */ +static void +set_grouped_rel_pathlist(PlannerInfo *root, RelOptInfo *rel) +{ + RelOptInfo *rel_grouped; + RelAggInfo *agg_info; + + /* Add paths to the grouped base relation if one exists. */ + rel_grouped = find_grouped_rel(root, rel->relids, + &agg_info); + if (rel_grouped) + { + generate_grouped_paths(root, rel_grouped, rel, + agg_info); + set_cheapest(rel_grouped); + } +} + /* * add_paths_to_append_rel diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 79288fb2d3..0b11ba15ef 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -16,6 +16,7 @@ #include +#include "catalog/pg_constraint.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "optimizer/appendinfo.h" @@ -27,12 +28,15 @@ #include "optimizer/paths.h" #include "optimizer/placeholder.h" #include "optimizer/plancat.h" +#include "optimizer/planner.h" #include "optimizer/restrictinfo.h" #include "optimizer/tlist.h" +#include "parser/parse_oper.h" #include "parser/parse_relation.h" #include "rewrite/rewriteManip.h" #include "utils/hsearch.h" #include "utils/lsyscache.h" +#include "utils/selfuncs.h" /* @@ -411,6 +415,103 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) return rel; } +/* + * build_simple_grouped_rel + * Construct a new RelOptInfo for a grouped base relation out of an existing + * non-grouped base relation. + * + * On success, the new RelOptInfo is returned and the corresponding RelAggInfo + * is stored in *agg_info_p. + */ +RelOptInfo * +build_simple_grouped_rel(PlannerInfo *root, int relid, + RelAggInfo **agg_info_p) +{ + RelOptInfo *rel_plain; + RelOptInfo *rel_grouped; + RelAggInfo *agg_info; + + /* + * We should have available aggregate expressions and grouping expressions, + * otherwise we cannot reach here. + */ + Assert(root->agg_clause_list != NIL); + Assert(root->group_expr_list != NIL); + + rel_plain = root->simple_rel_array[relid]; + Assert(rel_plain != NULL); + Assert(IS_SIMPLE_REL(rel_plain)); + + /* nothing to do for dummy rel */ + if (IS_DUMMY_REL(rel_plain)) + return NULL; + + /* + * Prepare the information we need to create grouped paths for this base + * relation. + */ + agg_info = create_rel_agg_info(root, rel_plain); + if (agg_info == NULL) + return NULL; + + /* build a grouped relation out of the plain relation */ + rel_grouped = build_grouped_rel(root, rel_plain); + rel_grouped->reltarget = agg_info->target; + rel_grouped->rows = agg_info->grouped_rows; + + /* return the RelAggInfo structure */ + *agg_info_p = agg_info; + + return rel_grouped; +} + +/* + * build_grouped_rel + * Build a grouped relation by flat copying a plain relation and resetting + * the necessary fields. + */ +RelOptInfo * +build_grouped_rel(PlannerInfo *root, RelOptInfo *rel_plain) +{ + RelOptInfo *rel_grouped; + + rel_grouped = makeNode(RelOptInfo); + memcpy(rel_grouped, rel_plain, sizeof(RelOptInfo)); + + /* + * clear path info + */ + rel_grouped->pathlist = NIL; + rel_grouped->ppilist = NIL; + rel_grouped->partial_pathlist = NIL; + rel_grouped->cheapest_startup_path = NULL; + rel_grouped->cheapest_total_path = NULL; + rel_grouped->cheapest_unique_path = NULL; + rel_grouped->cheapest_parameterized_paths = NIL; + + /* + * clear partition info + */ + rel_grouped->part_scheme = NULL; + rel_grouped->nparts = -1; + rel_grouped->boundinfo = NULL; + rel_grouped->partbounds_merged = false; + rel_grouped->partition_qual = NIL; + rel_grouped->part_rels = NULL; + rel_grouped->live_parts = NULL; + rel_grouped->all_partrels = NULL; + rel_grouped->partexprs = NULL; + rel_grouped->nullable_partexprs = NULL; + rel_grouped->consider_partitionwise_join = false; + + /* + * clear size estimates + */ + rel_grouped->rows = 0; + + return rel_grouped; +} + /* * find_base_rel * Find a base or otherrel relation entry, which must already exist. diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 91ce637f9e..41818c5189 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -308,6 +308,10 @@ extern void setup_simple_rel_arrays(PlannerInfo *root); extern void expand_planner_arrays(PlannerInfo *root, int add_size); extern RelOptInfo *build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent); +extern RelOptInfo *build_simple_grouped_rel(PlannerInfo *root, int relid, + RelAggInfo **agg_info_p); +extern RelOptInfo *build_grouped_rel(PlannerInfo *root, + RelOptInfo *rel_plain); extern RelOptInfo *find_base_rel(PlannerInfo *root, int relid); extern RelOptInfo *find_base_rel_noerr(PlannerInfo *root, int relid); extern RelOptInfo *find_base_rel_ignore_join(PlannerInfo *root, int relid); -- 2.31.0