From fec5fc5a1e0e8a6c6c800e948ecab053fa67faf2 Mon Sep 17 00:00:00 2001 From: erthalion <9erthalion6@gmail.com> Date: Fri, 29 Mar 2019 15:19:48 +0100 Subject: [PATCH v12 2/3] Reorder to match ORDER BY or index --- src/backend/optimizer/path/equivclass.c | 13 ++++- src/backend/optimizer/path/pathkeys.c | 91 ++++++++++++++++++++++++++++++++ src/backend/optimizer/plan/planner.c | 85 +++++++++++++++++++++-------- src/include/optimizer/paths.h | 3 ++ src/test/regress/expected/aggregates.out | 44 ++++++++------- 5 files changed, 189 insertions(+), 47 deletions(-) diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 61b5b119b0..7ffee2c165 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -686,7 +686,18 @@ get_eclass_for_sort_expr(PlannerInfo *root, if (opcintype == cur_em->em_datatype && equal(expr, cur_em->em_expr)) - return cur_ec; /* Match! */ + { + /* + * Match! + * + * Copy sortref if it wasn't set yet, it's possible if ec was + * constructed from WHERE clause, ie it doesn't have target + * reference at all + */ + if (cur_ec->ec_sortref == 0 && sortref > 0) + cur_ec->ec_sortref = sortref; + return cur_ec; + } } } diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 68b2204b3b..65e53ef854 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -27,6 +27,7 @@ #include "optimizer/paths.h" #include "partitioning/partbounds.h" #include "utils/lsyscache.h" +#include "utils/selfuncs.h" static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); @@ -331,6 +332,58 @@ pathkeys_contained_in(List *keys1, List *keys2) return false; } +/* + * Reorder GROUP BY pathkeys and clauses to match order of pathkeys. Function + * returns new lists, original GROUP BY lists stay untouched. + */ +int +group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys, + List **group_clauses) +{ + List *new_group_pathkeys= NIL, + *new_group_clauses = NIL; + ListCell *key; + int n; + + if (pathkeys == NIL || *group_pathkeys == NIL) + return 0; + + /* + * For each pathkey it tries to find corresponding GROUP BY pathkey and + * clause. + */ + foreach(key, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(key); + SortGroupClause *sgc; + + /* + * Pathkey should use the same allocated struct, so, equiality of + * pointers is enough + */ + if (!list_member_ptr(*group_pathkeys, pathkey)) + break; + + new_group_pathkeys = lappend(new_group_pathkeys, pathkey); + + sgc = get_sortgroupref_clause(pathkey->pk_eclass->ec_sortref, + *group_clauses); + new_group_clauses = lappend(new_group_clauses, sgc); + } + + n = list_length(new_group_pathkeys); + + /* + * Just append the rest of pathkeys and clauses + */ + *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys, + *group_pathkeys); + *group_clauses = list_concat_unique_ptr(new_group_clauses, + *group_clauses); + + return n; +} + /* * get_cheapest_path_for_pathkeys * Find the cheapest path (according to the specified criterion) that @@ -1774,6 +1827,39 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) return 0; /* path ordering not useful */ } +/* + * pathkeys_useful_for_grouping + * Count the number of pathkeys that are useful for grouping (instead of + * explicit sort) + * + * Group pathkeys could be reordered, so we don't bother about actual order in + * pathkeys + */ +static int +pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys) +{ + ListCell *key; + int n = 0; + + if (root->group_pathkeys == NIL) + return 0; /* no special ordering requested */ + + if (pathkeys == NIL) + return 0; /* unordered path */ + + foreach(key, pathkeys) + { + PathKey *pathkey = (PathKey *) lfirst(key); + + if (!list_member_ptr(root->group_pathkeys, pathkey)) + break; + + n++; + } + + return n; +} + /* * truncate_useless_pathkeys * Shorten the given pathkey list to just the useful pathkeys. @@ -1788,6 +1874,9 @@ truncate_useless_pathkeys(PlannerInfo *root, nuseful = pathkeys_useful_for_merging(root, rel, pathkeys); nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); + if (nuseful2 > nuseful) + nuseful = nuseful2; + nuseful2 = pathkeys_useful_for_grouping(root, pathkeys); if (nuseful2 > nuseful) nuseful = nuseful2; @@ -1823,6 +1912,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel) { if (rel->joininfo != NIL || rel->has_eclass_joins) return true; /* might be able to use pathkeys for merging */ + if (root->group_pathkeys != NIL) + return true; /* might be able to use pathkeys for grouping */ if (root->query_pathkeys != NIL) return true; /* might be able to use them for ordering */ return false; /* definitely useless */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 0a6710c73b..247e39d6ff 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -6387,9 +6387,28 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, { Path *path = (Path *) lfirst(lc); bool is_sorted; + List *group_pathkeys = root->group_pathkeys, + *group_clauses = parse->groupClause; + int n_preordered_groups; + + if (parse->groupingSets) + { + /* + * prevent group pathkey rreordering, just check the same + * order paths pathkeys and group pathkeys + */ + is_sorted = pathkeys_contained_in(group_pathkeys, + path->pathkeys); + } + else + { + n_preordered_groups = + group_keys_reorder_by_pathkeys(path->pathkeys, + &group_pathkeys, + &group_clauses); + is_sorted = (n_preordered_groups == list_length(group_pathkeys)); + } - is_sorted = pathkeys_contained_in(root->group_pathkeys, - path->pathkeys); if (path == cheapest_path || is_sorted) { /* Sort the cheapest-total path if it isn't already sorted */ @@ -6397,7 +6416,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, path = (Path *) create_sort_path(root, grouped_rel, path, - root->group_pathkeys, + group_pathkeys, -1.0); /* Now decide what to stick atop it */ @@ -6418,9 +6437,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, grouped_rel, path, grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, + group_clauses ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_SIMPLE, - parse->groupClause, + group_clauses, havingQual, agg_costs, dNumGroups)); @@ -6435,7 +6454,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, create_group_path(root, grouped_rel, path, - parse->groupClause, + group_clauses, havingQual, dNumGroups)); } @@ -6456,19 +6475,26 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, foreach(lc, partially_grouped_rel->pathlist) { Path *path = (Path *) lfirst(lc); + List *group_pathkeys = root->group_pathkeys, + *group_clauses = parse->groupClause; + int n_preordered_groups; + + n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys, + &group_pathkeys, + &group_clauses); /* * Insert a Sort node, if required. But there's no point in * sorting anything but the cheapest path. */ - if (!pathkeys_contained_in(root->group_pathkeys, path->pathkeys)) + if (n_preordered_groups != list_length(group_pathkeys)) { if (path != partially_grouped_rel->cheapest_total_path) continue; path = (Path *) create_sort_path(root, grouped_rel, path, - root->group_pathkeys, + group_pathkeys, -1.0); } @@ -6478,9 +6504,9 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, grouped_rel, path, grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, + group_clauses ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_FINAL_DESERIAL, - parse->groupClause, + group_clauses, havingQual, agg_final_costs, dNumGroups)); @@ -6489,7 +6515,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, create_group_path(root, grouped_rel, path, - parse->groupClause, + group_clauses, havingQual, dNumGroups)); } @@ -6726,9 +6752,15 @@ create_partial_grouping_paths(PlannerInfo *root, { Path *path = (Path *) lfirst(lc); bool is_sorted; + List *group_pathkeys = root->group_pathkeys, + *group_clauses = parse->groupClause; + int n_preordered_groups; + + n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys, + &group_pathkeys, + &group_clauses); + is_sorted = (n_preordered_groups == list_length(group_pathkeys)); - is_sorted = pathkeys_contained_in(root->group_pathkeys, - path->pathkeys); if (path == cheapest_total_path || is_sorted) { /* Sort the cheapest partial path, if it isn't already */ @@ -6736,7 +6768,7 @@ create_partial_grouping_paths(PlannerInfo *root, path = (Path *) create_sort_path(root, partially_grouped_rel, path, - root->group_pathkeys, + group_pathkeys, -1.0); if (parse->hasAggs) @@ -6745,9 +6777,9 @@ create_partial_grouping_paths(PlannerInfo *root, partially_grouped_rel, path, partially_grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, + group_clauses ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_INITIAL_SERIAL, - parse->groupClause, + group_clauses, NIL, agg_partial_costs, dNumPartialGroups)); @@ -6756,7 +6788,7 @@ create_partial_grouping_paths(PlannerInfo *root, create_group_path(root, partially_grouped_rel, path, - parse->groupClause, + group_clauses, NIL, dNumPartialGroups)); } @@ -6770,17 +6802,24 @@ create_partial_grouping_paths(PlannerInfo *root, { Path *path = (Path *) lfirst(lc); bool is_sorted; + List *group_pathkeys = root->group_pathkeys, + *group_clauses = parse->groupClause; + int n_preordered_groups; + + n_preordered_groups = group_keys_reorder_by_pathkeys(path->pathkeys, + &group_pathkeys, + &group_clauses); + is_sorted = (n_preordered_groups == list_length(group_pathkeys)); - is_sorted = pathkeys_contained_in(root->group_pathkeys, - path->pathkeys); if (path == cheapest_partial_path || is_sorted) { + /* Sort the cheapest partial path, if it isn't already */ if (!is_sorted) path = (Path *) create_sort_path(root, partially_grouped_rel, path, - root->group_pathkeys, + group_pathkeys, -1.0); if (parse->hasAggs) @@ -6789,9 +6828,9 @@ create_partial_grouping_paths(PlannerInfo *root, partially_grouped_rel, path, partially_grouped_rel->reltarget, - parse->groupClause ? AGG_SORTED : AGG_PLAIN, + group_clauses ? AGG_SORTED : AGG_PLAIN, AGGSPLIT_INITIAL_SERIAL, - parse->groupClause, + group_clauses, NIL, agg_partial_costs, dNumPartialPartialGroups)); @@ -6800,7 +6839,7 @@ create_partial_grouping_paths(PlannerInfo *root, create_group_path(root, partially_grouped_rel, path, - parse->groupClause, + group_clauses, NIL, dNumPartialPartialGroups)); } diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 0e858097c8..faf6449f4d 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -183,6 +183,9 @@ typedef enum extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2); extern bool pathkeys_contained_in(List *keys1, List *keys2); +extern int group_keys_reorder_by_pathkeys(List *pathkeys, + List **group_pathkeys, + List **group_clauses); extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index a20c69469f..265c996d5e 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -2360,14 +2360,12 @@ SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v; EXPLAIN (COSTS off) SELECT count(*) FROM btg GROUP BY v, p; - QUERY PLAN ------------------------------------------------------- + QUERY PLAN +------------------------------------------------ GroupAggregate - Group Key: v, p - -> Sort - Sort Key: v, p - -> Index Only Scan using btg_p_v_idx on btg -(5 rows) + Group Key: p, v + -> Index Only Scan using btg_p_v_idx on btg +(3 rows) EXPLAIN (COSTS off) SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v; @@ -2380,46 +2378,46 @@ SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v; EXPLAIN (COSTS off) SELECT count(*) FROM btg GROUP BY v, p, c; - QUERY PLAN ------------------------------ + QUERY PLAN +------------------------------------------------- GroupAggregate - Group Key: v, p, c + Group Key: p, v, c -> Sort - Sort Key: v, p, c - -> Seq Scan on btg + Sort Key: p, v, c + -> Index Scan using btg_p_v_idx on btg (5 rows) EXPLAIN (COSTS off) SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v; - QUERY PLAN ------------------------------ + QUERY PLAN +------------------------------------------------- GroupAggregate Group Key: p, v, c -> Sort Sort Key: p, v, c - -> Seq Scan on btg + -> Index Scan using btg_p_v_idx on btg (5 rows) EXPLAIN (COSTS off) SELECT count(*) FROM btg GROUP BY v, c, p, d; - QUERY PLAN ------------------------------- + QUERY PLAN +------------------------------------------------- GroupAggregate - Group Key: v, c, p, d + Group Key: p, v, c, d -> Sort - Sort Key: v, c, p, d - -> Seq Scan on btg + Sort Key: p, v, c, d + -> Index Scan using btg_p_v_idx on btg (5 rows) EXPLAIN (COSTS off) SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v; - QUERY PLAN ------------------------------- + QUERY PLAN +------------------------------------------------- GroupAggregate Group Key: p, v, c, d -> Sort Sort Key: p, v, c, d - -> Seq Scan on btg + -> Index Scan using btg_p_v_idx on btg (5 rows) RESET enable_hashagg; -- 2.16.4