Re: Get more from indices. - Mailing list pgsql-hackers
From | Kyotaro HORIGUCHI |
---|---|
Subject | Re: Get more from indices. |
Date | |
Msg-id | 20131119.203516.251520490.horiguchi.kyotaro@lab.ntt.co.jp Whole thread Raw |
In response to | Re: Get more from indices. (Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>) |
Responses |
Re: Get more from indices.
Re: Get more from indices. |
List | pgsql-hackers |
Hello, I've totally refactored the series of patches and cut out the appropriate portion as 'unique (and non-nullable) index stuff'. As the discussion before, it got rid of path distinctness. This patch works only on index 'full-orederedness', i.e., unique index on non-nullable columns. This patch itself does not so much. Will have power applied with 'Using indices for UNION' patch. === Making test table create table t (a int not null, b int not null, c int, d text); create unique index i_t_ab on t (a, b); insert into t (select a / 5, 4 - (a % 5), a, 't' from generate_series(000000, 099999) a); === Example 1. not-patched=# explain select * from t order by a, b ,c limit 1; > QUERY PLAN > ---------------------------------------------------------------------- > Limit (cost=2041.00..2041.00 rows=1 width=14) > -> Sort (cost=2041.00..2291.00 rows=100000 width=14) > Sort Key: a, b, c > -> Seq Scan on t (cost=0.00..1541.00 rows=100000 width=14) patched=# explain select * from t order by a, b ,c limit 1; > QUERY PLAN > ----------------------------------------------------------------------------- > Limit (cost=0.29..0.33 rows=1 width=14) > -> Index Scan using i_t_ab on t (cost=0.29..3857.04 rows=100000 width=14) === Example 2. not-patched=# explain select distinct * from t order by a limit 1; > QUERY PLAN > --------------------------------------------------------------------------- > Limit (cost=1820.46..1820.47 rows=1 width=44) > -> Sort (cost=1820.46..1835.34 rows=5951 width=44) > Sort Key: a > -> HashAggregate (cost=1731.20..1790.71 rows=5951 width=44) > -> Seq Scan on t (cost=0.00..1136.10 rows=59510 width=44) patched=# explain select distinct * from t order by a limit 1; > QUERY PLAN > ------------------------------------------------------------------------------------ > Limit (cost=0.29..1.09 rows=1 width=44) > -> Unique (cost=0.29..4756.04 rows=5951 width=44) > -> Index Scan using i_t_ab on t (cost=0.29..4160.94 rows=59510 width=44) The unique node could be removed technically but it requires to care the distinctness of path/plan. So it has been put out to "Using indeces for UNION" patch. Any comments? regards, -- Kyotaro Horiguchi NTT Open Source Software Center diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 606734a..43be0a5 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -953,7 +953,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, index_pathkeys = build_index_pathkeys(root,index, ForwardScanDirection); useful_pathkeys= truncate_useless_pathkeys(root, rel, - index_pathkeys); + index_pathkeys, + index->full_ordered); orderbyclauses = NIL; orderbyclausecols= NIL; } @@ -1015,7 +1016,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, index_pathkeys = build_index_pathkeys(root,index, BackwardScanDirection); useful_pathkeys= truncate_useless_pathkeys(root, rel, - index_pathkeys); + index_pathkeys, + index->full_ordered); if (useful_pathkeys != NIL) { ipath = create_index_path(root, index, diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 032b2cd..5d8ee04 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -369,6 +369,29 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,}/* + * path_is_ordered + * Return true if the path is apparently ordered on the pathkeys. + */ +bool +path_is_ordered(Path *path, List *pathkeys) +{ + if (pathkeys_contained_in(pathkeys, path->pathkeys)) + return true; + + /* + * If IndexPath is fully ordered, it is sufficiently ordered if index + * pathkeys is a subset of target pathkeys + */ + if (pathkeys && path->pathkeys && + IsA(path, IndexPath) && + ((IndexPath*)path)->indexinfo->full_ordered && + (pathkeys_contained_in(path->pathkeys, pathkeys))) + return true; + + return false; +} + +/* * get_cheapest_fractional_path_for_pathkeys * Find the cheapest path (for retrieving a specified fraction of all* the tuples) that satisfies the given pathkeys and parameterization. @@ -381,6 +404,8 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, * 'pathkeys' represents a required ordering(in canonical form!) * 'required_outer' denotes allowable outer relations for parameterized paths * 'fraction' isthe fraction of the total tuples expected to be retrieved + * paths->pathkeys might be replaced with pathkeys so as to declare that the + * path is ordered on it. */Path *get_cheapest_fractional_path_for_pathkeys(List *paths, @@ -403,9 +428,17 @@ get_cheapest_fractional_path_for_pathkeys(List *paths, compare_fractional_path_costs(matched_path,path, fraction) <= 0) continue; - if (pathkeys_contained_in(pathkeys, path->pathkeys) && + if (path_is_ordered(path, pathkeys) && bms_is_subset(PATH_REQ_OUTER(path), required_outer)) + { + /* + * Set required pathkeys as the path's pathkeys so as to declare + * that this path is ordred on the pathkeys. + */ + if (list_length(path->pathkeys) < list_length(pathkeys)) + path->pathkeys = pathkeys; matched_path = path; + } } return matched_path;} @@ -747,7 +780,7 @@ build_join_pathkeys(PlannerInfo *root, * contain pathkeys that were useful for forming this joinrelbut are * uninteresting to higher levels. */ - return truncate_useless_pathkeys(root, joinrel, outer_pathkeys); + return truncate_useless_pathkeys(root, joinrel, outer_pathkeys, false);}/**************************************************************************** @@ -1404,7 +1437,8 @@ right_merge_direction(PlannerInfo *root, PathKey *pathkey) * So the result is always either 0 or list_length(root->query_pathkeys).*/static int -pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) +pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys, + bool pk_full_ordered){ if (root->query_pathkeys == NIL) return 0; /* no special ordering requested */ @@ -1418,23 +1452,35 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys) return list_length(root->query_pathkeys); } + /* + * Given that the pathkeys compose an unique key, they are useful for + * ordering when they are a sublist of query_pathkeys. + */ + if (pk_full_ordered && + pathkeys_contained_in(pathkeys, root->query_pathkeys)) + return list_length(pathkeys); + return 0; /* path ordering not useful */}/* * truncate_useless_pathkeys * Shorten the givenpathkey list to just the useful pathkeys. + * + * When pk_full_ordered is true, reconsider counting the uniqueness even if + * the pathkeys are once judged as useless by the general criteria. */List *truncate_useless_pathkeys(PlannerInfo *root, RelOptInfo *rel, - List *pathkeys) + List *pathkeys, + bool pk_full_ordered){ int nuseful; int nuseful2; nuseful = pathkeys_useful_for_merging(root,rel, pathkeys); - nuseful2 = pathkeys_useful_for_ordering(root, pathkeys); + nuseful2 = pathkeys_useful_for_ordering(root, pathkeys, pk_full_ordered); if (nuseful2 > nuseful) nuseful= nuseful2; diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 954666c..ee92545 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -231,6 +231,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, */ if (info->relam == BTREE_AM_OID) { + bool has_nullable_keycol = false; + /* * If it's a btree index, we can use its opfamily OIDs * directly as thesort ordering opfamily OIDs. @@ -244,10 +246,25 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, for (i =0; i < ncolumns; i++) { int16 opt = indexRelation->rd_indoption[i]; + int16 keyattno = index->indkey.values[i]; info->reverse_sort[i] = (opt & INDOPTION_DESC)!= 0; info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0; - } + + if (keyattno > 0) + { + has_nullable_keycol |= + !relation->rd_att->attrs[keyattno - 1]->attnotnull; + } + else if (keyattno != ObjectIdAttributeNumber) + has_nullable_keycol = true; + } + + /* Check to see if it is a full-ordered index */ + if (IndexIsValid(index) && + index->indisunique && index->indimmediate && + !has_nullable_keycol) + info->full_ordered = true; } else if (indexRelation->rd_am->amcanorder) { diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 6d7b594..b38c667 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -524,6 +524,7 @@ typedef struct IndexOptInfo bool predOK; /* true if predicate matches query */ bool unique; /* true if a unique index */ bool immediate; /* is uniqueness enforcedimmediately? */ + bool full_ordered; /* is ordered rows not duplicate ? */ bool hypothetical; /* true if indexdoesn't really exist */ bool canreturn; /* can index return IndexTuples? */ bool amcanorderbyop;/* does AM support order by operator result? */ diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 96ffdb1..9e53b42 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -160,6 +160,7 @@ extern bool pathkeys_contained_in(List *keys1, List *keys2);extern Path *get_cheapest_path_for_pathkeys(List*paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion); +extern bool path_is_ordered(Path *path, List *pathkeys);extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, @@ -191,7 +192,8 @@ extern List *make_inner_pathkeys_for_merge(PlannerInfo *root, List *outer_pathkeys);externList *truncate_useless_pathkeys(PlannerInfo *root, RelOptInfo *rel, - List *pathkeys); + List *pathkeys, + bool pk_full_ordered);extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);#endif /* PATHS_H */ diff --git a/src/test/isolation/expected/drop-index-concurrently-1.out b/src/test/isolation/expected/drop-index-concurrently-1.out index 75dff56..ab96fa0 100644 --- a/src/test/isolation/expected/drop-index-concurrently-1.out +++ b/src/test/isolation/expected/drop-index-concurrently-1.out @@ -19,10 +19,8 @@ Sortstep explains: EXPLAIN (COSTS OFF) EXECUTE getrow_seq;QUERY PLAN -Sort - Sort Key: id, data - -> Seq Scan on test_dc - Filter: ((data)::text = '34'::text) +Index Scan using test_dc_pkey on test_dc + Filter: ((data)::text = '34'::text)step select2: SELECT * FROM test_dc WHERE data=34 ORDER BY id,data;id data
pgsql-hackers by date: