Get more from indices. - Mailing list pgsql-hackers
From | Kyotaro HORIGUCHI |
---|---|
Subject | Get more from indices. |
Date | |
Msg-id | 20131031.194310.212107585.horiguchi.kyotaro@lab.ntt.co.jp Whole thread Raw |
Responses |
Re: Get more from indices.
Re: Get more from indices. |
List | pgsql-hackers |
Hello, This is the last episode of the 'dance with indices'? series. Unique indexes can sort the tuples in corresponding tables prefectly. So this query might can use index. > uniquetest=# create table t (a int, b int, c int, d text); > uniquetest=# create unique index i_t_pkey on t(a, b); > uniquetest=# insert into t > (select a % 10, a / 10, a, 't' from generate_series(0, 100000) a); > uniquetest=# analyze; > > uniquetest=# explain (costs off, analyze on) select distinct * from t; > QUERY PLAN > --------------------------------------------------------------------------- > Unique (actual time=149.625..245.978 rows=100001 loops=1) > -> Sort (actual time=149.624..199.806 rows=100001 loops=1) > Sort Key: a, b, c, d > Sort Method: external merge Disk: 2328kB > -> Seq Scan on t (actual time=0.016..15.597 rows=100001 loops=1) > Total runtime: 251.272 ms With this patch, the plan for this query becomes as follows, > uniquetest=# explain (costs off, analyze on) select distinct * from t; > QUERY PLAN > --------------------------------------------------------------------- > Index Scan using i_t_pkey on t > (actual time=0.019..32.457 rows=100001 loops=1) > Total runtime: 37.488 ms This only seems not so valuable to archive, but with my previous MergeAppend for UNION patch, this will have more value. > uniquetest=# create table pu1 (a int, b int, c int, d text); > uniquetest=# create unique index pu1_pkey_idx on pu1 (a, b); > uniquetest=# create table cu11 (like pu1 including all) inherits (pu1); > uniquetest=# create table cu12 (like pu1 including all) inherits (pu1); > uniquetest=# create table pu2 (like pu1 including all); > uniquetest=# create table cu21 (like pu1 including all) inherits (pu2); > uniquetest=# create table cu22 (like pu1 including all) inherits (pu2); > uniquetest=# insert into cu11 (select a / 5, 4 - (a % 5), a, 'cu11' > from generate_series(000000, 099999) a); > uniquetest=# insert into cu12 (select a / 5, 4 - (a % 5), a, 'cu12' > from generate_series(100000, 199999) a); > uniquetest=# insert into cu21 (select a / 5, 4 - (a % 5), a, 'cu21' > from generate_series(150000, 249999) a); > uniquetest=# insert into cu22 (select a / 5, 4 - (a % 5), a, 'cu22' > from generate_series(250000, 349999) a); > uniquetest=# explain (analyze on, costs off) > select * from pu1 union select * from pu2 order by a, b limit 100; > QUERY PLAN > -------------------------------------------------------------------------- > Limit (actual time=0.226..0.591 rows=100 loops=1) > -> Unique (actual time=0.223..0.561 rows=100 loops=1) > -> Merge Append (actual time=0.223..0.470 rows=100 loops=1) > Sort Key: pu1.a, pu1.b, pu1.c, pu1.d > -> Limit (actual time=0.125..0.326 rows=100 loops=1) > -> Unique (actual time=0.125..0.303 rows=100 loops=1) > -> Merge Append (actual time=0.123..0.220 rows=100 loops=1) > Sort Key: pu1.a, pu1.b, pu1.c, pu1.d > -> Index Scan using..on pu1 (actual time=0.005..0.005 rows=0 loops=1) > -> Index Scan using..on cu11(actual time=0.071..0.128 rows=100 loops=1) > -> Index Scan using..on cu12 (actual time=0.045..0.045 rows=1 loops=1) > -> Limit (actual time=0.096..0.096 rows=1 loops=1) > -> Unique (actual time=0.096..0.096 rows=1 loops=1) > -> Merge Append (actual time=0.094..0.094 rows=1 loops=1) > Sort Key: pu2.a, pu2.b, pu2.c, pu2.d > -> Index Scan using..on pu2 (actual time=0.002..0.002 rows=0 loops=1) > -> Index Scan using..on cu21 (actual time=0.047..0.047 rows=1 loops=1) > -> Index Scan using..on cu22 (actual time=0.043..0.043 rows=1 loops=1) > Total runtime: 1.987 ms On the other hand, what we will get from the unpatched PostgreSQL is, > uniquetest=# explain (analyze on, costs off) > select * from pu1 union select * from pu2 order by a, b limit 100; > QUERY PLAN > ------------------------------------------------------------------------- > Limit (actual time=535.620..535.706 rows=100 loops=1) > -> Unique (actual time=535.618..535.695 rows=100 loops=1) > -> Sort (actual time=535.617..535.637 rows=100 loops=1) > Sort Key: pu1.a, pu1.b, pu1.c, pu1.d > Sort Method: external sort Disk: 10568kB > -> Append (actual time=0.012..144.144 rows=400000 loops=1) > -> Append (actual time=0.012..49.570 rows=200000 loops=1) > -> Seq Scan on pu1 (actual time=0.001..0.001 rows=0 loops=1) > -> Seq Scan on cu11 (actual time=0.011..16.202 rows=100000 loops=1) > -> Seq Scan on cu12 (actual time=0.008..16.334 rows=100000 loops=1) > -> Append (actual time=0.010..54.052 rows=200000 loops=1) > -> Seq Scan on pu2 (actual time=0.000..0.000 rows=0 loops=1) > -> Seq Scan on cu21 (actual time=0.009..16.444 rows=100000 loops=1) > -> Seq Scan on cu22 (actual time=0.021..18.923 rows=100000 loops=1) > Total runtime: 537.765 ms What do you think about this? This could be realized by following modifications, - Consider a query pathekeys is useful for ordering when a index pathkey is a subset of that when the index is UNIQUE. (build_index_paths, pathkeys_useful_for_ordering, truncate_useless_pathkeys, grouping_planner) - Judge a path is orderd or not counting the uniqueness of the path. - Regard IndexPath on UNIQUE index is reagarded uniquely ordered. - Regard MergeAppendPath of which all children is uniquely orderd as (not necessarily uniquely) ordered. (path_is_ordered) - Judge the necessity of sorting on above premises. (grouping_planner) Needless to say, theses patches have no effect on any other set operations than UNION(ALL). They are, INTERSECT(ALL), EXCEPT(ALL). regards, -- Kyotaro Horiguchi NTT Open Source Software Center diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 606734a..bc1ab9a 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -953,7 +953,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, index_pathkeys = build_index_pathkeys(root,index, ForwardScanDirection); useful_pathkeys= truncate_useless_pathkeys(root, rel, - index_pathkeys); + index_pathkeys, + index->unique); orderbyclauses = NIL; orderbyclausecols= NIL; } @@ -1015,7 +1016,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, index_pathkeys = build_index_pathkeys(root,index, BackwardScanDirection); useful_pathkeys= truncate_useless_pathkeys(root, rel, - index_pathkeys); + index_pathkeys, + index->unique); if (useful_pathkeys != NIL) { ipath = create_index_path(root, index, diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 6724996..87070e7 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -363,6 +363,68 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,}/* + * path_is_uniquely_ordered + * Return true if the path is apparently uniquely ordered on the pathkeys. + */ +bool +path_is_uniquely_ordered(Path *path, List *pathkeys) +{ + if (pathkeys && path->pathkeys && + IsA(path, IndexPath) && + ((IndexPath*)path)->indexinfo->unique && + (pathkeys_contained_in(path->pathkeys, pathkeys))) + { + return true; + } + + return false; +} + + +/* + * path_is_ordered + * Return true if the path is apparently ordered on the pathkeys. The given + * path might be modified so that it will be recognized later as sufficiently + * ordered. + */ +bool +path_is_ordered(Path *path, List *pathkeys) +{ + if (pathkeys_contained_in(pathkeys, path->pathkeys)) + return true; + + /* path is obviously ordered when uniquely ordered */ + if (path_is_uniquely_ordered(path, pathkeys)) + return true; + + /* + * MergeAppendPath's pathkeys can be replaced by arbitrary pathkeys when + * all subpaths are uniquely ordered on the target pathkeys. + */ + if (IsA(path, MergeAppendPath)) + { + ListCell *lc; + MergeAppendPath *mpath = (MergeAppendPath *)path; + + foreach (lc, mpath->subpaths) + { + Path *subpath = (Path *) lfirst(lc); + if (!path_is_uniquely_ordered(subpath, pathkeys)) break; + } + if (!lc) + { + /* + * This MergeAppendPath is sufficiently ordered on the target + * pathkeys. Reflect that on this path. + */ + mpath->path.pathkeys = pathkeys; + return true; + } + } + return false; +} + +/* * get_cheapest_fractional_path_for_pathkeys * Find the cheapest path (for retrieving a specified fraction of all* the tuples) that satisfies the given pathkeys and parameterization. @@ -397,7 +459,7 @@ get_cheapest_fractional_path_for_pathkeys(List *paths, compare_fractional_path_costs(matched_path,path, fraction) <= 0) continue; - if (pathkeys_contained_in(pathkeys, path->pathkeys) && + if (path_is_ordered(path, pathkeys) && bms_is_subset(PATH_REQ_OUTER(path), required_outer)) matched_path = path; } @@ -733,7 +795,7 @@ build_join_pathkeys(PlannerInfo *root, * contain pathkeys that were useful for forming this joinrelbut are * uninteresting to higher levels. */ - return truncate_useless_pathkeys(root, joinrel, outer_pathkeys); + return truncate_useless_pathkeys(root, joinrel, outer_pathkeys, false);}/**************************************************************************** @@ -1378,9 +1440,12 @@ right_merge_direction(PlannerInfo *root, PathKey *pathkey) * Unlike merge pathkeys, this is an all-or-nothingaffair: it does us * no good to order by just the first key(s) of the requested ordering. * So the result isalways either 0 or list_length(root->query_pathkeys). + * pk_uniq can be true when pathkeys are unique key itself. Tuples are + * sufficiently ordered when the pathkeys are the subset of the + * query_pathkeys for the case. */static int -pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) +pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys, bool pk_uniq){ if (root->query_pathkeys == NIL) return 0; /* no special ordering requested */ @@ -1394,23 +1459,34 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) return list_length(root->query_pathkeys); } + /* + * Given that the pathkeys compose an unique key, they are useful for + * ordering when they are a sublist of query_pathkeys. + */ + if (pk_uniq && pathkeys_contained_in(pathkeys, root->query_pathkeys)) + return list_length(pathkeys); + return 0; /* path ordering not useful */}/* * truncate_useless_pathkeys * Shorten the givenpathkey list to just the useful pathkeys. + * + * When pathkeys_unique is true, reconsider counting the uniqueness even if + * the pathkeys are once judged as useless by the general criteria. */List *truncate_useless_pathkeys(PlannerInfo *root, RelOptInfo *rel, - List *pathkeys) + List *pathkeys, + bool pathkeys_unique){ int nuseful; int nuseful2; nuseful = pathkeys_useful_for_merging(root,rel, pathkeys); - nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); + nuseful2 = pathkeys_useful_for_ordering(root, pathkeys, pathkeys_unique); if (nuseful2 > nuseful) nuseful= nuseful2; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 9b9eb2f..7b1490f 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -809,7 +809,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) numsortkeys* sizeof(bool)) == 0); /* Now, insert a Sort node if subplan isn't sufficiently ordered */ - if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + if (!path_is_ordered(subpath, pathkeys)) subplan = (Plan *) make_sort(root, subplan, numsortkeys, sortColIdx, sortOperators, collations, nullsFirst, diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 86abdf6..1ce04ac 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -258,6 +258,31 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) return result;} +/* + * Check if a path is sufficiently ordered on target pathkeys. This function + * is intended to be used as a replace for pathkeys_contained_in() when the + * focused path might be uniquely ordered on the current_pathkeys. + */ +static bool +sort_needed(List *current_pathkeys, bool path_is_unique, List *target_pathkeys) +{ + /* + * If target pathkeys are a sublist of current pathkeys, the path is + * ordered also on target pathkeys + */ + if (pathkeys_contained_in(target_pathkeys, current_pathkeys)) + return false; + + /* + * If a plan is uniquely ordered on current_pathkeys, it is sufficiently + * ordered on any pathkeys containing it. + */ + if (path_is_unique && current_pathkeys && + pathkeys_contained_in(current_pathkeys, target_pathkeys)) + return false; + + return true; +}/*-------------------- * subquery_planner @@ -1043,6 +1068,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) double dNumGroups = 0; bool use_hashed_distinct = false; bool tested_hashed_distinct = false; + bool result_plan_is_unique = false; /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */ if(parse->limitCount || parse->limitOffset) @@ -1451,18 +1477,24 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) result_plan = create_plan(root,best_path); current_pathkeys = best_path->pathkeys; + result_plan_is_unique = + path_is_uniquely_ordered(best_path, root->query_pathkeys); /* Detect if we'll need an explicitsort for grouping */ - if (parse->groupClause && !use_hashed_grouping && - !pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) + if (parse->groupClause && !use_hashed_grouping) { - need_sort_for_grouping = true; + if (path_is_ordered(best_path, root->group_pathkeys)) + current_pathkeys = root->group_pathkeys; + else + { + need_sort_for_grouping = true; - /* - * Always override create_plan's tlist, so that we don't sort - * useless data from a "physical" tlist. - */ - need_tlist_eval = true; + /* + * Always override create_plan's tlist, so that we don't + * sort useless data from a "physical" tlist. + */ + need_tlist_eval = true; + } } /* @@ -1575,6 +1607,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) extract_grouping_ops(parse->groupClause), numGroups, result_plan); + + /* Aggregation might break the tuple uniqueness. */ + result_plan_is_unique = false; } else if (parse->groupClause) { @@ -1603,7 +1638,11 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) extract_grouping_ops(parse->groupClause), dNumGroups, result_plan); - /* The Group node won't change sort ordering */ + /* + * The Group node won't change sort ordering, however might + * break the uniqueness. + */ + result_plan_is_unique = false; } else if (root->hasHavingQual) { @@ -1713,8 +1752,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) result_plan, window_pathkeys, -1.0); - if (!pathkeys_contained_in(window_pathkeys, - current_pathkeys)) + + if (sort_needed(current_pathkeys, result_plan_is_unique, + window_pathkeys)) { /* we do indeed need tosort */ result_plan = (Plan *) sort_plan; @@ -1770,6 +1810,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) wc->startOffset, wc->endOffset, result_plan); + /* Aggregation might break the tuple uniqueness. */ + result_plan_is_unique = false; } } } /* end of if (setOperations)*/ @@ -1855,8 +1897,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) needed_pathkeys = root->sort_pathkeys; else needed_pathkeys = root->distinct_pathkeys; - - if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys)) + + if (sort_needed(current_pathkeys, result_plan_is_unique, + needed_pathkeys)) { if (list_length(root->distinct_pathkeys) >= list_length(root->sort_pathkeys)) @@ -1875,8 +1918,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) -1.0); } - result_plan = (Plan *) make_unique(result_plan, - parse->distinctClause); + if (!result_plan_is_unique) + result_plan = (Plan *) make_unique(result_plan, + parse->distinctClause); result_plan->plan_rows = dNumDistinctRows; /* The Unique node won't change sort ordering */ } @@ -1888,7 +1932,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) */ if (parse->sortClause) { - if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) + if (sort_needed(current_pathkeys, result_plan_is_unique, + root->sort_pathkeys)) { result_plan = (Plan *) make_sort_from_pathkeys(root, result_plan, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9ef93c7..8e6d0e7 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -156,6 +156,8 @@ typedef enumextern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);extern bool pathkeys_contained_in(List*keys1, List *keys2); +extern bool path_is_ordered(Path *path, List *pathkeys); +extern bool path_is_uniquely_ordered(Path *path, List *pathkeys);extern Path *get_cheapest_path_for_pathkeys(List *paths,List *pathkeys, Relids required_outer, CostSelector cost_criterion); @@ -190,7 +192,8 @@ extern List *make_inner_pathkeys_for_merge(PlannerInfo *root, List *outer_pathkeys);externList *truncate_useless_pathkeys(PlannerInfo *root, RelOptInfo *rel, - List *pathkeys); + List *pathkeys, + bool pathkeys_unique);extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);#endif /* PATHS_H */
pgsql-hackers by date: