From 65cb86a4ec954786c9d6533c13ea71ba76224372 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Fri, 23 Feb 2024 14:19:39 +0800 Subject: [PATCH v6 5/9] Implement functions that generate paths for grouped relations This commit implements the functions that generate paths for grouped relations by adding sorted and hashed partial aggregation paths on top of paths of the plain base or join relations. --- src/backend/optimizer/path/allpaths.c | 307 ++++++++++++++++++++++++++ src/backend/optimizer/util/pathnode.c | 12 +- src/include/optimizer/paths.h | 4 + 3 files changed, 315 insertions(+), 8 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 586c0e07c0..3f3dbc486e 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -40,6 +40,7 @@ #include "optimizer/paths.h" #include "optimizer/plancat.h" #include "optimizer/planner.h" +#include "optimizer/prep.h" #include "optimizer/tlist.h" #include "parser/parse_clause.h" #include "parser/parsetree.h" @@ -47,6 +48,7 @@ #include "port/pg_bitutils.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" +#include "utils/selfuncs.h" /* Bitmask flags for pushdown_safety_info.unsafeFlags */ @@ -3308,6 +3310,311 @@ generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_r } } +/* + * generate_grouped_paths + * Generate paths for a grouped relation by adding sorted and hashed + * partial aggregation paths on top of paths of the plain base or join + * relation. + * + * The information needed are provided by the RelAggInfo structure. + */ +void +generate_grouped_paths(PlannerInfo *root, RelOptInfo *rel_grouped, + RelOptInfo *rel_plain, RelAggInfo *agg_info) +{ + AggClauseCosts agg_costs; + bool can_hash; + bool can_sort; + Path *cheapest_total_path = NULL; + Path *cheapest_partial_path = NULL; + double dNumGroups = 0; + double dNumPartialGroups = 0; + + if (IS_DUMMY_REL(rel_plain)) + { + mark_dummy_rel(rel_grouped); + return; + } + + MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); + get_agg_clause_costs(root, AGGSPLIT_INITIAL_SERIAL, &agg_costs); + + /* + * Determine whether it's possible to perform sort-based implementations of + * grouping. + */ + can_sort = grouping_is_sortable(agg_info->group_clauses); + + /* + * Determine whether we should consider hash-based implementations of + * grouping. + */ + Assert(root->numOrderedAggs == 0); + can_hash = (agg_info->group_clauses != NIL && + grouping_is_hashable(agg_info->group_clauses)); + + /* + * Consider whether we should generate partially aggregated non-partial + * paths. We can only do this if we have a non-partial path. + */ + if (rel_plain->pathlist != NIL) + { + cheapest_total_path = rel_plain->cheapest_total_path; + Assert(cheapest_total_path != NULL); + } + + /* + * If parallelism is possible for rel_grouped, then we should consider + * generating partially-grouped partial paths. However, if the plain rel + * has no partial paths, then we can't. + */ + if (rel_grouped->consider_parallel && rel_plain->partial_pathlist != NIL) + { + cheapest_partial_path = linitial(rel_plain->partial_pathlist); + Assert(cheapest_partial_path != NULL); + } + + /* Estimate number of partial groups. */ + if (cheapest_total_path != NULL) + dNumGroups = estimate_num_groups(root, + agg_info->group_exprs, + cheapest_total_path->rows, + NULL, NULL); + if (cheapest_partial_path != NULL) + dNumPartialGroups = estimate_num_groups(root, + agg_info->group_exprs, + cheapest_partial_path->rows, + NULL, NULL); + + if (can_sort && cheapest_total_path != NULL) + { + ListCell *lc; + + /* + * Use any available suitably-sorted path as input, and also consider + * sorting the cheapest-total path. + */ + foreach(lc, rel_plain->pathlist) + { + Path *input_path = (Path *) lfirst(lc); + Path *path; + bool is_sorted; + int presorted_keys; + + /* + * Since the path originates from the non-grouped relation which is + * not aware of eager aggregation, we must ensure that it provides + * the correct input for the partial aggregation. + */ + path = (Path *) create_projection_path(root, + rel_grouped, + input_path, + agg_info->agg_input); + + is_sorted = pathkeys_count_contained_in(agg_info->group_pathkeys, + path->pathkeys, + &presorted_keys); + if (!is_sorted) + { + /* + * Try at least sorting the cheapest path and also try + * incrementally sorting any path which is partially sorted + * already (no need to deal with paths which have presorted + * keys when incremental sort is disabled unless it's the + * cheapest input path). + */ + if (input_path != cheapest_total_path && + (presorted_keys == 0 || !enable_incremental_sort)) + continue; + + /* + * We've no need to consider both a sort and incremental sort. + * We'll just do a sort if there are no presorted keys and an + * incremental sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + path = (Path *) create_sort_path(root, + rel_grouped, + path, + agg_info->group_pathkeys, + -1.0); + else + path = (Path *) create_incremental_sort_path(root, + rel_grouped, + path, + agg_info->group_pathkeys, + presorted_keys, + -1.0); + } + + /* + * qual is NIL because the HAVING clause cannot be evaluated until the + * final value of the aggregate is known. + */ + path = (Path *) create_agg_path(root, + rel_grouped, + path, + agg_info->target, + AGG_SORTED, + AGGSPLIT_INITIAL_SERIAL, + agg_info->group_clauses, + NIL, + &agg_costs, + dNumGroups); + + add_path(rel_grouped, path); + } + } + + if (can_sort && cheapest_partial_path != NULL) + { + ListCell *lc; + + /* Similar to above logic, but for partial paths. */ + foreach(lc, rel_plain->partial_pathlist) + { + Path *input_path = (Path *) lfirst(lc); + Path *path; + bool is_sorted; + int presorted_keys; + + /* + * Since the path originates from the non-grouped relation which is + * not aware of eager aggregation, we must ensure that it provides + * the correct input for the partial aggregation. + */ + path = (Path *) create_projection_path(root, + rel_grouped, + input_path, + agg_info->agg_input); + + is_sorted = pathkeys_count_contained_in(agg_info->group_pathkeys, + path->pathkeys, + &presorted_keys); + + if (!is_sorted) + { + /* + * Try at least sorting the cheapest path and also try + * incrementally sorting any path which is partially sorted + * already (no need to deal with paths which have presorted + * keys when incremental sort is disabled unless it's the + * cheapest input path). + */ + if (input_path != cheapest_partial_path && + (presorted_keys == 0 || !enable_incremental_sort)) + continue; + + /* + * We've no need to consider both a sort and incremental sort. + * We'll just do a sort if there are no presorted keys and an + * incremental sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + path = (Path *) create_sort_path(root, + rel_grouped, + path, + agg_info->group_pathkeys, + -1.0); + else + path = (Path *) create_incremental_sort_path(root, + rel_grouped, + path, + agg_info->group_pathkeys, + presorted_keys, + -1.0); + } + + /* + * qual is NIL because the HAVING clause cannot be evaluated until the + * final value of the aggregate is known. + */ + path = (Path *) create_agg_path(root, + rel_grouped, + path, + agg_info->target, + AGG_SORTED, + AGGSPLIT_INITIAL_SERIAL, + agg_info->group_clauses, + NIL, + &agg_costs, + dNumPartialGroups); + + add_partial_path(rel_grouped, path); + } + } + + /* + * Add a partially-grouped HashAgg Path where possible + */ + if (can_hash && cheapest_total_path != NULL) + { + Path *path; + + /* + * Since the path originates from the non-grouped relation which is + * not aware of eager aggregation, we must ensure that it provides + * the correct input for the partial aggregation. + */ + path = (Path *) create_projection_path(root, + rel_grouped, + cheapest_total_path, + agg_info->agg_input); + + /* + * qual is NIL because the HAVING clause cannot be evaluated until + * the final value of the aggregate is known. + */ + path = (Path *) create_agg_path(root, + rel_grouped, + path, + agg_info->target, + AGG_HASHED, + AGGSPLIT_INITIAL_SERIAL, + agg_info->group_clauses, + NIL, + &agg_costs, + dNumGroups); + + add_path(rel_grouped, path); + } + + /* + * Now add a partially-grouped HashAgg partial Path where possible + */ + if (can_hash && cheapest_partial_path != NULL) + { + Path *path; + + /* + * Since the path originates from the non-grouped relation which is + * not aware of eager aggregation, we must ensure that it provides + * the correct input for the partial aggregation. + */ + path = (Path *) create_projection_path(root, + rel_grouped, + cheapest_partial_path, + agg_info->agg_input); + + /* + * qual is NIL because the HAVING clause cannot be evaluated until + * the final value of the aggregate is known. + */ + path = (Path *) create_agg_path(root, + rel_grouped, + path, + agg_info->target, + AGG_HASHED, + AGGSPLIT_INITIAL_SERIAL, + agg_info->group_clauses, + NIL, + &agg_costs, + dNumPartialGroups); + + add_partial_path(rel_grouped, path); + } +} + /* * make_rel_from_joinlist * Build access paths using a "joinlist" to guide the join path search. diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 3cf1dac087..70fa25a67b 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2709,8 +2709,7 @@ create_projection_path(PlannerInfo *root, pathnode->path.pathtype = T_Result; pathnode->path.parent = rel; pathnode->path.pathtarget = target; - /* For now, assume we are above any joins, so no parameterization */ - pathnode->path.param_info = NULL; + pathnode->path.param_info = subpath->param_info; pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && subpath->parallel_safe && @@ -2962,8 +2961,7 @@ create_incremental_sort_path(PlannerInfo *root, pathnode->path.parent = rel; /* Sort doesn't project, so use source path's pathtarget */ pathnode->path.pathtarget = subpath->pathtarget; - /* For now, assume we are above any joins, so no parameterization */ - pathnode->path.param_info = NULL; + pathnode->path.param_info = subpath->param_info; pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && subpath->parallel_safe; @@ -3009,8 +3007,7 @@ create_sort_path(PlannerInfo *root, pathnode->path.parent = rel; /* Sort doesn't project, so use source path's pathtarget */ pathnode->path.pathtarget = subpath->pathtarget; - /* For now, assume we are above any joins, so no parameterization */ - pathnode->path.param_info = NULL; + pathnode->path.param_info = subpath->param_info; pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && subpath->parallel_safe; @@ -3168,8 +3165,7 @@ create_agg_path(PlannerInfo *root, pathnode->path.pathtype = T_Agg; pathnode->path.parent = rel; pathnode->path.pathtarget = target; - /* For now, assume we are above any joins, so no parameterization */ - pathnode->path.param_info = NULL; + pathnode->path.param_info = subpath->param_info; pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && subpath->parallel_safe; diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 31eed6b6a8..947f814f4f 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -58,6 +58,10 @@ extern void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows); extern void generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows); +extern void generate_grouped_paths(PlannerInfo *root, + RelOptInfo *rel_grouped, + RelOptInfo *rel_plain, + RelAggInfo *agg_info); extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages, int max_workers); extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel, -- 2.31.0