Home > mailing lists
Re: Get more from indices. - Mailing list pgsql-hackers

From	Kyotaro HORIGUCHI
Subject	Re: Get more from indices.
Date	November 19, 2013 11:36:32
Msg-id	20131119.203516.251520490.horiguchi.kyotaro@lab.ntt.co.jp Whole thread Raw
In response to	Re: Get more from indices. (Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>)
Responses	Re: Get more from indices. Re: Get more from indices.
List	pgsql-hackers
Tree view
Hello, I've totally refactored the series of patches and cut out
the appropriate portion as 'unique (and non-nullable) index
stuff'.

As the discussion before, it got rid of path distinctness. This
patch works only on index 'full-orederedness', i.e., unique index
on non-nullable columns.

This patch itself does not so much. Will have power applied with
'Using indices for UNION' patch.


=== Making test table

create table t (a int not null, b int not null, c int, d text);
create unique index i_t_ab on t (a, b);
insert into t (select a / 5, 4 - (a % 5), a, 't' from generate_series(000000, 099999) a);


=== Example 1.

not-patched=# explain select * from t order by a, b ,c limit 1;
>                               QUERY PLAN                              
> ----------------------------------------------------------------------
>  Limit  (cost=2041.00..2041.00 rows=1 width=14)
>    ->  Sort  (cost=2041.00..2291.00 rows=100000 width=14)
>          Sort Key: a, b, c
>          ->  Seq Scan on t  (cost=0.00..1541.00 rows=100000 width=14)

patched=# explain select * from t order by a, b ,c limit 1;
>                                   QUERY PLAN                             
> -----------------------------------------------------------------------------
>  Limit  (cost=0.29..0.33 rows=1 width=14)
>    ->  Index Scan using i_t_ab on t  (cost=0.29..3857.04 rows=100000 width=14)


=== Example 2.

not-patched=# explain select distinct * from t order by a limit 1;
>                                 QUERY PLAN                                 
> ---------------------------------------------------------------------------
>  Limit  (cost=1820.46..1820.47 rows=1 width=44)
>    ->  Sort  (cost=1820.46..1835.34 rows=5951 width=44)
>          Sort Key: a
>          ->  HashAggregate  (cost=1731.20..1790.71 rows=5951 width=44)
>                ->  Seq Scan on t  (cost=0.00..1136.10 rows=59510 width=44)

patched=# explain select distinct * from t order by a limit 1;
>                                      QUERY PLAN                                     
> ------------------------------------------------------------------------------------
>  Limit  (cost=0.29..1.09 rows=1 width=44)
>    ->  Unique  (cost=0.29..4756.04 rows=5951 width=44)
>          ->  Index Scan using i_t_ab on t  (cost=0.29..4160.94 rows=59510 width=44)

The unique node could be removed technically but it requires to
care the distinctness of path/plan. So it has been put out to
"Using indeces for UNION" patch.


Any comments?

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 606734a..43be0a5 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -953,7 +953,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,        index_pathkeys =
build_index_pathkeys(root,index,                                              ForwardScanDirection);
useful_pathkeys= truncate_useless_pathkeys(root, rel,
 
-                                                    index_pathkeys);
+                                                    index_pathkeys,
+                                                    index->full_ordered);        orderbyclauses = NIL;
orderbyclausecols= NIL;    }
 
@@ -1015,7 +1016,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,        index_pathkeys =
build_index_pathkeys(root,index,                                              BackwardScanDirection);
useful_pathkeys= truncate_useless_pathkeys(root, rel,
 
-                                                    index_pathkeys);
+                                                    index_pathkeys,
+                                                    index->full_ordered);        if (useful_pathkeys != NIL)        {
         ipath = create_index_path(root, index,
 
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 032b2cd..5d8ee04 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -369,6 +369,29 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,}/*
+ * path_is_ordered
+ * Return true if the path is apparently ordered on the pathkeys.
+ */
+bool
+path_is_ordered(Path *path, List *pathkeys)
+{
+    if (pathkeys_contained_in(pathkeys, path->pathkeys))
+        return true;
+
+    /*
+     * If IndexPath is fully ordered, it is sufficiently ordered if index
+     * pathkeys is a subset of target pathkeys
+     */
+    if (pathkeys && path->pathkeys &&
+        IsA(path, IndexPath) &&
+        ((IndexPath*)path)->indexinfo->full_ordered &&
+        (pathkeys_contained_in(path->pathkeys, pathkeys)))
+        return true;
+
+    return false;
+}
+
+/* * get_cheapest_fractional_path_for_pathkeys *      Find the cheapest path (for retrieving a specified fraction of
all*      the tuples) that satisfies the given pathkeys and parameterization.
 
@@ -381,6 +404,8 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, * 'pathkeys' represents a required
ordering(in canonical form!) * 'required_outer' denotes allowable outer relations for parameterized paths * 'fraction'
isthe fraction of the total tuples expected to be retrieved
 
+ * paths->pathkeys might be replaced with pathkeys so as to declare that the
+ * path is ordered on it. */Path *get_cheapest_fractional_path_for_pathkeys(List *paths,
@@ -403,9 +428,17 @@ get_cheapest_fractional_path_for_pathkeys(List *paths,
compare_fractional_path_costs(matched_path,path, fraction) <= 0)            continue;
 
-        if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
+        if (path_is_ordered(path, pathkeys) &&            bms_is_subset(PATH_REQ_OUTER(path), required_outer))
+        {
+            /*
+             * Set required pathkeys as the path's pathkeys so as to declare
+             * that this path is ordred on the pathkeys.
+             */
+            if (list_length(path->pathkeys) < list_length(pathkeys))
+                path->pathkeys = pathkeys;            matched_path = path;
+        }    }    return matched_path;}
@@ -747,7 +780,7 @@ build_join_pathkeys(PlannerInfo *root,     * contain pathkeys that were useful for forming this
joinrelbut are     * uninteresting to higher levels.     */
 
-    return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
+    return truncate_useless_pathkeys(root, joinrel, outer_pathkeys,
false);}/****************************************************************************
@@ -1404,7 +1437,8 @@ right_merge_direction(PlannerInfo *root, PathKey *pathkey) * So the result is always either 0 or
list_length(root->query_pathkeys).*/static int
 
-pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
+pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys,
+                             bool pk_full_ordered){    if (root->query_pathkeys == NIL)        return 0;
/* no special ordering requested */
 
@@ -1418,23 +1452,35 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)        return
list_length(root->query_pathkeys);   }
 
+    /*
+     * Given that the pathkeys compose an unique key, they are useful for
+     * ordering when they are a sublist of query_pathkeys.
+     */
+    if (pk_full_ordered &&
+        pathkeys_contained_in(pathkeys, root->query_pathkeys))
+        return list_length(pathkeys);
+    return 0;                    /* path ordering not useful */}/* * truncate_useless_pathkeys *        Shorten the
givenpathkey list to just the useful pathkeys.
 
+ *
+ * When pk_full_ordered is true, reconsider counting the uniqueness even if
+ * the pathkeys are once judged as useless by the general criteria. */List *truncate_useless_pathkeys(PlannerInfo
*root,                         RelOptInfo *rel,
 
-                          List *pathkeys)
+                          List *pathkeys,
+                          bool pk_full_ordered){    int            nuseful;    int            nuseful2;    nuseful =
pathkeys_useful_for_merging(root,rel, pathkeys);
 
-    nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+    nuseful2 = pathkeys_useful_for_ordering(root, pathkeys, pk_full_ordered);    if (nuseful2 > nuseful)
nuseful= nuseful2;
 
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 954666c..ee92545 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -231,6 +231,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,             */
if (info->relam == BTREE_AM_OID)            {
 
+                bool has_nullable_keycol = false;
+                /*                 * If it's a btree index, we can use its opfamily OIDs                 * directly as
thesort ordering opfamily OIDs.
 
@@ -244,10 +246,25 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,                for (i
=0; i < ncolumns; i++)                {                    int16        opt = indexRelation->rd_indoption[i];
 
+                    int16        keyattno = index->indkey.values[i];                    info->reverse_sort[i] = (opt &
INDOPTION_DESC)!= 0;                    info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
 
-                }
+
+                    if (keyattno > 0)
+                    {
+                        has_nullable_keycol |= 
+                            !relation->rd_att->attrs[keyattno - 1]->attnotnull;
+                    }
+                    else if (keyattno != ObjectIdAttributeNumber)
+                        has_nullable_keycol = true;
+                 }
+
+                /* Check to see if it is a full-ordered index */
+                if (IndexIsValid(index) &&
+                    index->indisunique && index->indimmediate &&
+                    !has_nullable_keycol)
+                    info->full_ordered = true;            }            else if (indexRelation->rd_am->amcanorder)
     {
 
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 6d7b594..b38c667 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -524,6 +524,7 @@ typedef struct IndexOptInfo    bool        predOK;            /* true if predicate matches query */
  bool        unique;            /* true if a unique index */    bool        immediate;        /* is uniqueness
enforcedimmediately? */
 
+    bool        full_ordered;    /* is ordered rows not duplicate ? */    bool        hypothetical;    /* true if
indexdoesn't really exist */    bool        canreturn;        /* can index return IndexTuples? */    bool
amcanorderbyop;/* does AM support order by operator result? */
 
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 96ffdb1..9e53b42 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -160,6 +160,7 @@ extern bool pathkeys_contained_in(List *keys1, List *keys2);extern Path
*get_cheapest_path_for_pathkeys(List*paths, List *pathkeys,                               Relids required_outer,
                      CostSelector cost_criterion);
 
+extern bool path_is_ordered(Path *path, List *pathkeys);extern Path *get_cheapest_fractional_path_for_pathkeys(List
*paths,                                         List *pathkeys,                                          Relids
required_outer,
@@ -191,7 +192,8 @@ extern List *make_inner_pathkeys_for_merge(PlannerInfo *root,                              List
*outer_pathkeys);externList *truncate_useless_pathkeys(PlannerInfo *root,                          RelOptInfo *rel,
 
-                          List *pathkeys);
+                          List *pathkeys,
+                          bool pk_full_ordered);extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo
*rel);#endif  /* PATHS_H */
 
diff --git a/src/test/isolation/expected/drop-index-concurrently-1.out
b/src/test/isolation/expected/drop-index-concurrently-1.out
index 75dff56..ab96fa0 100644
--- a/src/test/isolation/expected/drop-index-concurrently-1.out
+++ b/src/test/isolation/expected/drop-index-concurrently-1.out
@@ -19,10 +19,8 @@ Sortstep explains: EXPLAIN (COSTS OFF) EXECUTE getrow_seq;QUERY PLAN     
-Sort           
-  Sort Key: id, data
-  ->  Seq Scan on test_dc
-        Filter: ((data)::text = '34'::text)
+Index Scan using test_dc_pkey on test_dc
+  Filter: ((data)::text = '34'::text)step select2: SELECT * FROM test_dc WHERE data=34 ORDER BY id,data;id
data
pgsql-hackers by date:
From: Wim Dumon
Date: 19 November 2013, 10:55:38
Subject: Re: Windows build patch
From: Kyotaro HORIGUCHI
Date: 19 November 2013, 11:42:19
Subject: Re: Using indices for UNION.
Re: Get more from indices. - Mailing list pgsql-hackers

Previous

Next