diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c index 6b2b9e3742..74ed15bfeb 100644 --- a/contrib/bloom/blutils.c +++ b/contrib/bloom/blutils.c @@ -129,6 +129,7 @@ blhandler(PG_FUNCTION_ARGS) amroutine->ambulkdelete = blbulkdelete; amroutine->amvacuumcleanup = blvacuumcleanup; amroutine->amcanreturn = NULL; + amroutine->amskip = NULL; amroutine->amcostestimate = blcostestimate; amroutine->amoptions = bloptions; amroutine->amproperty = NULL; diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index e1073ac6d3..8c79fc33ba 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3725,6 +3725,22 @@ ANY num_sync ( + enable_indexskipscan (boolean) + + enable_indexskipscan configuration parameter + + + + + Enables or disables the query planner's use of index-skip-scan plan + types (see ). This parameter requires + that enable_indexonlyscan is on. + The default is on. + + + + enable_material (boolean) diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml index beb99d1831..ccbb44288d 100644 --- a/doc/src/sgml/indexam.sgml +++ b/doc/src/sgml/indexam.sgml @@ -135,6 +135,7 @@ typedef struct IndexAmRoutine amendscan_function amendscan; ammarkpos_function ammarkpos; /* can be NULL */ amrestrpos_function amrestrpos; /* can be NULL */ + amskip_function amskip; /* can be NULL */ /* interface functions to support parallel index scans */ amestimateparallelscan_function amestimateparallelscan; /* can be NULL */ @@ -665,6 +666,14 @@ amrestrpos (IndexScanDesc scan); +bool +amskip (IndexScanDesc scan, ScanDirection direction, int prefix); + + TODO + + + + Size amestimateparallelscan (void); diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml index df7d16ff68..a5b1835e72 100644 --- a/doc/src/sgml/indices.sgml +++ b/doc/src/sgml/indices.sgml @@ -1319,6 +1319,22 @@ SELECT target FROM tests WHERE subject = 'some-subject' AND success; such cases and allow index-only scans to be generated, but older versions will not. + + + Index Skip Scans + + + index + index-skip scans + + + index-skip scan + + + + TODO + + diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index e95fbbcea7..85d6571c6d 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -106,6 +106,7 @@ brinhandler(PG_FUNCTION_ARGS) amroutine->ambulkdelete = brinbulkdelete; amroutine->amvacuumcleanup = brinvacuumcleanup; amroutine->amcanreturn = NULL; + amroutine->amskip = NULL; amroutine->amcostestimate = brincostestimate; amroutine->amoptions = brinoptions; amroutine->amproperty = NULL; diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 0a32182dd7..162639090d 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -61,6 +61,7 @@ ginhandler(PG_FUNCTION_ARGS) amroutine->ambulkdelete = ginbulkdelete; amroutine->amvacuumcleanup = ginvacuumcleanup; amroutine->amcanreturn = NULL; + amroutine->amskip = NULL; amroutine->amcostestimate = gincostestimate; amroutine->amoptions = ginoptions; amroutine->amproperty = NULL; diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 8a42effdf7..ecd4af49d8 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -83,6 +83,7 @@ gisthandler(PG_FUNCTION_ARGS) amroutine->ambulkdelete = gistbulkdelete; amroutine->amvacuumcleanup = gistvacuumcleanup; amroutine->amcanreturn = gistcanreturn; + amroutine->amskip = NULL; amroutine->amcostestimate = gistcostestimate; amroutine->amoptions = gistoptions; amroutine->amproperty = gistproperty; diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 0002df30c0..7120950868 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -79,6 +79,7 @@ hashhandler(PG_FUNCTION_ARGS) amroutine->ambulkdelete = hashbulkdelete; amroutine->amvacuumcleanup = hashvacuumcleanup; amroutine->amcanreturn = NULL; + amroutine->amskip = NULL; amroutine->amcostestimate = hashcostestimate; amroutine->amoptions = hashoptions; amroutine->amproperty = NULL; diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index eade540ef5..7d04388b18 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -33,6 +33,7 @@ * index_can_return - does index support index-only scans? * index_getprocid - get a support procedure OID * index_getprocinfo - get a support procedure's lookup info + * index_skip - advance past duplicate key values in a scan * * NOTES * This file contains the index_ routines which used @@ -792,6 +793,21 @@ index_can_return(Relation indexRelation, int attno) return indexRelation->rd_amroutine->amcanreturn(indexRelation, attno); } +/* ---------------- + * index_skip + * + * Skip past all tuples where the first 'prefix' columns have the + * same value as the last tuple returned in the current scan. + * ---------------- + */ +bool +index_skip(IndexScanDesc scan, ScanDirection direction, int prefix) +{ + SCAN_CHECKS; + + return scan->indexRelation->rd_amroutine->amskip(scan, direction, prefix); +} + /* ---------------- * index_getprocid * diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index e8725fbbe1..3d02a96dad 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -130,6 +130,7 @@ bthandler(PG_FUNCTION_ARGS) amroutine->ambulkdelete = btbulkdelete; amroutine->amvacuumcleanup = btvacuumcleanup; amroutine->amcanreturn = btcanreturn; + amroutine->amskip = btskip; amroutine->amcostestimate = btcostestimate; amroutine->amoptions = btoptions; amroutine->amproperty = btproperty; @@ -378,6 +379,8 @@ btbeginscan(Relation rel, int nkeys, int norderbys) */ so->currTuples = so->markTuples = NULL; + so->skipScanKey = NULL; + scan->xs_itupdesc = RelationGetDescr(rel); scan->opaque = so; @@ -445,6 +448,15 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, _bt_preprocess_array_keys(scan); } +/* + * btskip() -- skip to the beginning of the next key prefix + */ +bool +btskip(IndexScanDesc scan, ScanDirection direction, int prefix) +{ + return _bt_skip(scan, direction, prefix); +} + /* * btendscan() -- close down a scan */ diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index d3700bd082..52c24c7541 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -1193,6 +1193,121 @@ _bt_next(IndexScanDesc scan, ScanDirection dir) return true; } +/* + * _bt_skip() -- Skip items that have the same prefix as the most recently + * fetched index tuple. The current position is set so that a subsequent call + * to _bt_next will fetch the first tuple that differs in the leading 'prefix' + * keys. + */ +bool +_bt_skip(IndexScanDesc scan, ScanDirection dir, int prefix) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTStack stack; + Buffer buf; + OffsetNumber offnum; + BTScanPosItem *currItem; + + /* We want to return tuples, and we need a starting point */ + Assert(scan->xs_want_itup); + Assert(scan->xs_itup); + + /* Release the current associated buffer */ + if (BTScanPosIsValid(so->currPos)) + { + ReleaseBuffer(so->currPos.buf); + so->currPos.buf = InvalidBuffer; + } + + /* + * If skipScanKey is NULL then we initialize it with _bt_mkscankey, + * otherwise we will just update the sk_flags / sk_argument elements + * in order to eliminate repeated free/realloc. + */ + if (so->skipScanKey == NULL) + { + so->skipScanKey = _bt_mkscankey(scan->indexRelation, scan->xs_itup); + } + else + { + Relation rel; + TupleDesc itupdesc; + int indnkeyatts; + int i; + + rel = scan->indexRelation; + itupdesc = RelationGetDescr(rel); + indnkeyatts = IndexRelationGetNumberOfKeyAttributes(rel); + for (i = 0; i < indnkeyatts; i++) + { + Datum datum; + bool null; + int flags; + + datum = index_getattr(scan->xs_itup, i + 1, itupdesc, &null); + flags = (null ? SK_ISNULL : 0) | (rel->rd_indoption[i] << SK_BT_INDOPTION_SHIFT); + so->skipScanKey[i].sk_flags = flags; + so->skipScanKey[i].sk_argument = datum; + } + } + + /* Use _bt_search and _bt_binsrch to get the buffer and offset number */ + stack =_bt_search(scan->indexRelation, prefix, so->skipScanKey, + ScanDirectionIsForward(dir), &buf, BT_READ, + scan->xs_snapshot); + _bt_freestack(stack); + so->currPos.buf = buf; + offnum = _bt_binsrch(scan->indexRelation, buf, prefix, so->skipScanKey, + ScanDirectionIsForward(dir)); + + /* Lock the page for SERIALIZABLE transactions */ + PredicateLockPage(scan->indexRelation, BufferGetBlockNumber(buf), + scan->xs_snapshot); + + /* We know in which direction to look */ + if (ScanDirectionIsForward(dir)) + { + so->currPos.moreLeft = false; + so->currPos.moreRight = true; + + /* Move back for _bt_next */ + offnum = OffsetNumberPrev(offnum); + } + else + { + so->currPos.moreLeft = true; + so->currPos.moreRight = false; + } + + /* Now read the data */ + if (!_bt_readpage(scan, dir, offnum)) + { + /* + * There's no actually-matching data on this page. Try to advance to + * the next page. Return false if there's no matching data at all. + */ + LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); + if (!_bt_steppage(scan, dir)) + { + _bt_freeskey(so->skipScanKey); + so->skipScanKey = NULL; + return false; + } + } + else + { + /* Drop the lock, and maybe the pin, on the current page */ + _bt_drop_lock_and_maybe_pin(scan, &so->currPos); + } + + /* And set IndexTuple */ + currItem = &so->currPos.items[so->currPos.itemIndex]; + scan->xs_ctup.t_self = currItem->heapTid; + scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); + + return true; +} + /* * _bt_readpage() -- Load data from current index page into so->currPos * diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index 9919e6f0d7..0b77998886 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -68,6 +68,7 @@ spghandler(PG_FUNCTION_ARGS) amroutine->ambulkdelete = spgbulkdelete; amroutine->amvacuumcleanup = spgvacuumcleanup; amroutine->amcanreturn = spgcanreturn; + amroutine->amskip = NULL; amroutine->amcostestimate = spgcostestimate; amroutine->amoptions = spgoptions; amroutine->amproperty = spgproperty; diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 16a80a0ea1..88ef72bc62 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1222,6 +1222,14 @@ ExplainNode(PlanState *planstate, List *ancestors, { IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan; + if (indexonlyscan->distinctPrefix > 0) + { + if (es->format != EXPLAIN_FORMAT_TEXT) + ExplainPropertyInteger("Distinct Prefix", NULL, + indexonlyscan->distinctPrefix, + es); + } + ExplainIndexScanDetails(indexonlyscan->indexid, indexonlyscan->indexorderdir, es); @@ -1444,6 +1452,10 @@ ExplainNode(PlanState *planstate, List *ancestors, planstate, es); break; case T_IndexOnlyScan: + if (((IndexOnlyScan *) plan)->distinctPrefix > 0) + { + ExplainPropertyText("Scan mode", "Skip scan", es); + } show_scan_qual(((IndexOnlyScan *) plan)->indexqual, "Index Cond", planstate, ancestors, es); if (((IndexOnlyScan *) plan)->indexqual) diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 8c32a74d39..b2acc22ef9 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -112,6 +112,21 @@ IndexOnlyNext(IndexOnlyScanState *node) node->ioss_NumOrderByKeys); } + /* + * Check if we need to skip to the next key prefix, because we've been + * asked to implement DISTINCT. + */ + if (node->ioss_NumDistinctKeys > 0 && node->ioss_FirstTupleEmitted + && node->ioss_NumOfSkips < 10 * node->ioss_PlanRows) + { + node->ioss_NumOfSkips += 1; + if (!index_skip(scandesc, direction, node->ioss_NumDistinctKeys)) + { + /* Reached end of index. */ + return ExecClearTuple(slot); + } + } + /* * OK, now that we have what we need, fetch the next tuple. */ @@ -247,6 +262,8 @@ IndexOnlyNext(IndexOnlyScanState *node) ItemPointerGetBlockNumber(tid), estate->es_snapshot); + node->ioss_FirstTupleEmitted = true; + return slot; } @@ -509,6 +526,10 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) indexstate->ss.ps.plan = (Plan *) node; indexstate->ss.ps.state = estate; indexstate->ss.ps.ExecProcNode = ExecIndexOnlyScan; + indexstate->ioss_NumDistinctKeys = node->distinctPrefix; + indexstate->ioss_FirstTupleEmitted = false; + indexstate->ioss_NumOfSkips = 0; + indexstate->ioss_PlanRows = node->scan.plan.plan_rows; /* * Miscellaneous initialization diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 7c8220cf65..012f61d1ad 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -517,6 +517,7 @@ _copyIndexOnlyScan(const IndexOnlyScan *from) COPY_NODE_FIELD(indexorderby); COPY_NODE_FIELD(indextlist); COPY_SCALAR_FIELD(indexorderdir); + COPY_SCALAR_FIELD(distinctPrefix); return newnode; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 93f1e2c4eb..d28ef70db0 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -586,6 +586,7 @@ _outIndexOnlyScan(StringInfo str, const IndexOnlyScan *node) WRITE_NODE_FIELD(indexorderby); WRITE_NODE_FIELD(indextlist); WRITE_ENUM_FIELD(indexorderdir, ScanDirection); + WRITE_INT_FIELD(distinctPrefix); } static void diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 519deab63a..ddf565d92d 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1800,6 +1800,7 @@ _readIndexOnlyScan(void) READ_NODE_FIELD(indexorderby); READ_NODE_FIELD(indextlist); READ_ENUM_FIELD(indexorderdir, ScanDirection); + READ_INT_FIELD(distinctPrefix); READ_DONE(); } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 7bf67a0529..b4c4edd276 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -122,6 +122,7 @@ int max_parallel_workers_per_gather = 2; bool enable_seqscan = true; bool enable_indexscan = true; bool enable_indexonlyscan = true; +bool enable_indexskipscan = true; bool enable_bitmapscan = true; bool enable_tidscan = true; bool enable_sort = true; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index ae41c9efa0..9569f45745 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -175,7 +175,8 @@ static IndexOnlyScan *make_indexonlyscan(List *qptlist, List *qpqual, Index scanrelid, Oid indexid, List *indexqual, List *indexorderby, List *indextlist, - ScanDirection indexscandir); + ScanDirection indexscandir, + int skipprefix); static BitmapIndexScan *make_bitmap_indexscan(Index scanrelid, Oid indexid, List *indexqual, List *indexqualorig); @@ -2706,7 +2707,8 @@ create_indexscan_plan(PlannerInfo *root, fixed_indexquals, fixed_indexorderbys, best_path->indexinfo->indextlist, - best_path->indexscandir); + best_path->indexscandir, + best_path->indexskipprefix); else scan_plan = (Scan *) make_indexscan(tlist, qpqual, @@ -5115,7 +5117,8 @@ make_indexonlyscan(List *qptlist, List *indexqual, List *indexorderby, List *indextlist, - ScanDirection indexscandir) + ScanDirection indexscandir, + int skipprefix) { IndexOnlyScan *node = makeNode(IndexOnlyScan); Plan *plan = &node->scan.plan; @@ -5130,6 +5133,7 @@ make_indexonlyscan(List *qptlist, node->indexorderby = indexorderby; node->indextlist = indextlist; node->indexorderdir = indexscandir; + node->distinctPrefix = skipprefix; return node; } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 22c010c19e..e28f98e4ce 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4731,6 +4731,25 @@ create_distinct_paths(PlannerInfo *root, path, list_length(root->distinct_pathkeys), numDistinctRows)); + + /* Also consider a skip scan, if possible. */ + if (IsA(path, IndexPath) && + path->pathtype == T_IndexOnlyScan && + enable_indexskipscan && + ((IndexPath *) path)->indexinfo->amcanskip) + { + Path *subpath = (Path *) + create_skipscan_unique_path(root, + distinct_rel, + path, + list_length(root->distinct_pathkeys), + numDistinctRows); + add_path(distinct_rel, (Path *) + create_upper_unique_path(root, distinct_rel, + subpath, + list_length(root->distinct_pathkeys), + numDistinctRows)); + } } } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index c5aaaf5c22..b9e7baa5d4 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2768,6 +2768,45 @@ create_upper_unique_path(PlannerInfo *root, return pathnode; } +/* + * create_skipscan_unique_path + * Creates a pathnode the same as an existing IndexPath except based on + * skipping duplicate values. This may or may not be cheaper than using + * create_upper_unique_path. + * + * The input path must be an IndexPath for an index that supports amskip. + */ +IndexPath * +create_skipscan_unique_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + int numCols, + double numGroups) +{ + IndexPath *pathnode = makeNode(IndexPath); + + Assert(IsA(subpath, IndexPath)); + + /* We don't want to modify subpath, so make a copy. */ + memcpy(pathnode, subpath, sizeof(IndexPath)); + + /* The size of the prefix we'll use for skipping. */ + Assert(pathnode->indexinfo->amcanskip); + Assert(numCols > 0); + pathnode->indexskipprefix = numCols; + + /* + * The cost to skip to each distinct value should be roughly the same as + * the cost of finding the first key times the number of distinct values + * we expect to find. + */ + pathnode->path.startup_cost = subpath->startup_cost; + pathnode->path.total_cost = subpath->startup_cost * numGroups; + pathnode->path.rows = numGroups; + + return pathnode; +} + /* * create_agg_path * Creates a pathnode that represents performing aggregation/grouping diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 8369e3ad62..f07aba1c9c 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -270,6 +270,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, info->amoptionalkey = amroutine->amoptionalkey; info->amsearcharray = amroutine->amsearcharray; info->amsearchnulls = amroutine->amsearchnulls; + info->amcanskip = (amroutine->amskip != NULL); info->amcanparallel = amroutine->amcanparallel; info->amhasgettuple = (amroutine->amgettuple != NULL); info->amhasgetbitmap = (amroutine->amgetbitmap != NULL); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 77662aff7f..2bae6e06b1 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -852,6 +852,15 @@ static struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"enable_indexskipscan", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables the planner's use of index-skip-scan plans."), + NULL + }, + &enable_indexskipscan, + true, + NULL, NULL, NULL + }, { {"enable_bitmapscan", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of bitmap-scan plans."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 4e61bc6521..87ff031a85 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -297,6 +297,7 @@ #enable_hashjoin = on #enable_indexscan = on #enable_indexonlyscan = on +#enable_indexskipscan = on #enable_material = on #enable_mergejoin = on #enable_nestloop = on diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 14526a6bb2..81e1ea5d5f 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -127,6 +127,10 @@ typedef void (*amrescan_function) (IndexScanDesc scan, typedef bool (*amgettuple_function) (IndexScanDesc scan, ScanDirection direction); +/* skip past duplicates in a given prefix */ +typedef bool (*amskip_function) (IndexScanDesc scan, + ScanDirection dir, int prefix); + /* fetch all valid tuples */ typedef int64 (*amgetbitmap_function) (IndexScanDesc scan, TIDBitmap *tbm); @@ -221,6 +225,7 @@ typedef struct IndexAmRoutine amendscan_function amendscan; ammarkpos_function ammarkpos; /* can be NULL */ amrestrpos_function amrestrpos; /* can be NULL */ + amskip_function amskip; /* can be NULL */ /* interface functions to support parallel index scans */ amestimateparallelscan_function amestimateparallelscan; /* can be NULL */ diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 534fac7bf2..ab973f0b5f 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -170,6 +170,7 @@ extern IndexBulkDeleteResult *index_bulk_delete(IndexVacuumInfo *info, extern IndexBulkDeleteResult *index_vacuum_cleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats); extern bool index_can_return(Relation indexRelation, int attno); +extern bool index_skip(IndexScanDesc scan, ScanDirection direction, int prefix); extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum, uint16 procnum); extern FmgrInfo *index_getprocinfo(Relation irel, AttrNumber attnum, diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 04ecb4cbc0..6009edb22d 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -471,6 +471,9 @@ typedef struct BTScanOpaqueData */ int markItemIndex; /* itemIndex, or -1 if not valid */ + /* Work space for _bt_skip */ + ScanKey skipScanKey; /* used to control skipping */ + /* keep these last in struct for efficiency */ BTScanPosData currPos; /* current position data */ BTScanPosData markPos; /* marked position, if any */ @@ -571,6 +574,7 @@ extern int32 _bt_compare(Relation rel, int keysz, ScanKey scankey, Page page, OffsetNumber offnum); extern bool _bt_first(IndexScanDesc scan, ScanDirection dir); extern bool _bt_next(IndexScanDesc scan, ScanDirection dir); +extern bool _bt_skip(IndexScanDesc scan, ScanDirection dir, int prefix); extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost, Snapshot snapshot); @@ -598,6 +602,7 @@ extern void _bt_end_vacuum_callback(int code, Datum arg); extern Size BTreeShmemSize(void); extern void BTreeShmemInit(void); extern bytea *btoptions(Datum reloptions, bool validate); +extern bool btskip(IndexScanDesc scan, ScanDirection dir, int prefix); extern bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 687d7cd2f4..319d3ba342 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1323,6 +1323,8 @@ typedef struct IndexScanState * RelationDesc index relation descriptor * ScanDesc index scan descriptor * VMBuffer buffer in use for visibility map testing, if any + * NumDistinctKeys number of keys for skip-based DISTINCT + * FirstTupleEmitted has the first tuple been emitted * ioss_PscanLen Size of parallel index-only scan descriptor * ---------------- */ @@ -1341,7 +1343,11 @@ typedef struct IndexOnlyScanState Relation ioss_RelationDesc; IndexScanDesc ioss_ScanDesc; Buffer ioss_VMBuffer; + int ioss_NumDistinctKeys; + bool ioss_FirstTupleEmitted; Size ioss_PscanLen; + int ioss_PlanRows; + int ioss_NumOfSkips; } IndexOnlyScanState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 7c2abbd03a..1e572853d8 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -438,6 +438,7 @@ typedef struct IndexOnlyScan List *indexorderby; /* list of index ORDER BY exprs */ List *indextlist; /* TargetEntry list describing index's cols */ ScanDirection indexorderdir; /* forward or backward or don't care */ + int distinctPrefix; /* the size of the prefix for distinct scans */ } IndexOnlyScan; /* ---------------- diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index adb4265047..27adafd6a6 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -811,6 +811,7 @@ typedef struct IndexOptInfo bool amsearchnulls; /* can AM search for NULL/NOT NULL entries? */ bool amhasgettuple; /* does AM have amgettuple interface? */ bool amhasgetbitmap; /* does AM have amgetbitmap interface? */ + bool amcanskip; /* can AM skip duplicate values? */ bool amcanparallel; /* does AM support parallel scan? */ /* Rather than include amapi.h here, we declare amcostestimate like this */ void (*amcostestimate) (); /* AM's cost estimator */ @@ -1161,6 +1162,9 @@ typedef struct Path * we need not recompute them when considering using the same index in a * bitmap index/heap scan (see BitmapHeapPath). The costs of the IndexPath * itself represent the costs of an IndexScan or IndexOnlyScan plan type. + * + * 'indexskipprefix' represents the number of columns to consider for skip + * scans. *---------- */ typedef struct IndexPath @@ -1175,6 +1179,7 @@ typedef struct IndexPath ScanDirection indexscandir; Cost indextotalcost; Selectivity indexselectivity; + int indexskipprefix; } IndexPath; /* diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 77ca7ff837..1fb3de6fa6 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -58,6 +58,7 @@ extern PGDLLIMPORT int max_parallel_workers_per_gather; extern PGDLLIMPORT bool enable_seqscan; extern PGDLLIMPORT bool enable_indexscan; extern PGDLLIMPORT bool enable_indexonlyscan; +extern PGDLLIMPORT bool enable_indexskipscan; extern PGDLLIMPORT bool enable_bitmapscan; extern PGDLLIMPORT bool enable_tidscan; extern PGDLLIMPORT bool enable_sort; diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 7c5ff22650..349b062ee2 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -186,6 +186,11 @@ extern UpperUniquePath *create_upper_unique_path(PlannerInfo *root, Path *subpath, int numCols, double numGroups); +extern IndexPath *create_skipscan_unique_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + int numCols, + double numGroups); extern AggPath *create_agg_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 0065e325c2..61053f0fb2 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -19,6 +19,7 @@ CREATE INDEX tenk1_unique1 ON tenk1 USING btree(unique1 int4_ops); CREATE INDEX tenk1_unique2 ON tenk1 USING btree(unique2 int4_ops); CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops); CREATE INDEX tenk1_thous_tenthous ON tenk1 (thousand, tenthous); +CREATE INDEX tenk1_four ON tenk1 (four); CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops); CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops); CREATE INDEX tenk2_hundred ON tenk2 USING btree(hundred int4_ops); diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index f3696c6d1d..485454fcfa 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -244,3 +244,24 @@ SELECT null IS NOT DISTINCT FROM null as "yes"; t (1 row) +-- index skip scan +SELECT DISTINCT four FROM tenk1; + four +------ + 0 + 1 + 2 + 3 +(4 rows) + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT DISTINCT four FROM tenk1; + QUERY PLAN +-------------------------------------------------------- + Unique + Output: four + -> Index Only Scan using tenk1_four on public.tenk1 + Output: four + Scan mode: Skip scan +(5 rows) + diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index a1c90eb905..bd3b373515 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -78,6 +78,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_hashjoin | on enable_indexonlyscan | on enable_indexscan | on + enable_indexskipscan | on enable_material | on enable_mergejoin | on enable_nestloop | on @@ -89,7 +90,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(17 rows) +(18 rows) -- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- more-or-less working. We can't test their contents in any great detail diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index be7f261871..3c73198179 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -26,6 +26,8 @@ CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops); CREATE INDEX tenk1_thous_tenthous ON tenk1 (thousand, tenthous); +CREATE INDEX tenk1_four ON tenk1 (four); + CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops); CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops); diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index a605e86449..db222fa510 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -73,3 +73,9 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no"; SELECT 2 IS NOT DISTINCT FROM 2 as "yes"; SELECT 2 IS NOT DISTINCT FROM null as "no"; SELECT null IS NOT DISTINCT FROM null as "yes"; + +-- index skip scan +SELECT DISTINCT four FROM tenk1; + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT DISTINCT four FROM tenk1;