diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 54b71cb..bdd99f2 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -29,6 +29,7 @@ * index_can_return - does index support index-only scans? * index_getprocid - get a support procedure OID * index_getprocinfo - get a support procedure's lookup info + * index_skip - advance past duplicate key values in a scan * * NOTES * This file contains the index_ routines which used @@ -666,6 +667,21 @@ index_can_return(Relation indexRelation, int attno) } /* ---------------- + * index_skip + * + * Skip past all tuples where the first 'prefix' columns have the + * same value as the last tuple returned in the current scan. + * ---------------- + */ +bool +index_skip(IndexScanDesc scan, ScanDirection direction, int prefix) +{ + SCAN_CHECKS; + + return scan->indexRelation->rd_amroutine->amskip(scan, direction, prefix); +} + +/* ---------------- * index_getprocid * * Index access methods typically require support routines that are diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 128744c..1a1cc6e 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -106,6 +106,7 @@ bthandler(PG_FUNCTION_ARGS) amroutine->ambulkdelete = btbulkdelete; amroutine->amvacuumcleanup = btvacuumcleanup; amroutine->amcanreturn = btcanreturn; + amroutine->amskip = btskip; amroutine->amcostestimate = btcostestimate; amroutine->amoptions = btoptions; amroutine->amproperty = btproperty; @@ -454,6 +455,8 @@ btbeginscan(Relation rel, int nkeys, int norderbys) */ so->currTuples = so->markTuples = NULL; + so->skipScanKey = NULL; + scan->xs_itupdesc = RelationGetDescr(rel); scan->opaque = so; @@ -521,6 +524,15 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, } /* + * btskip() -- skip to the beginning of the next key prefix + */ +bool +btskip(IndexScanDesc scan, ScanDirection direction, int prefix) +{ + return _bt_skip(scan, direction, prefix); +} + +/* * btendscan() -- close down a scan */ void diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index ee46023..9cd58ae 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -1120,6 +1120,90 @@ _bt_next(IndexScanDesc scan, ScanDirection dir) } /* + * _bt_skip() -- Skip items that have the same prefix as the most recently + * fetched index tuple. The current position is set so that a subsequent call + * to _bt_next will fetch the first tuple that differs in the leading 'prefix' + * keys. + * + * TODO:TM The 'step back one tuple' behaviour is only necessary because I + * wanted the nodeIndexonlyscan code to be able to use its existing + * tuple-fetch-visibility-check loop without a special case for the first + * iteration. An alernative woudl be for skip to actually fetch the desired + * tuple immediately (ie without a subsequent call to _bt_next). + */ +bool +_bt_skip(IndexScanDesc scan, ScanDirection dir, int prefix) +{ + /* + * TODO:TM For now, we use _bt_search to search from the root; in theory + * we should be able to do a local traversal ie from the current page, but + * I don't know if it would actually be better in general. + */ + + BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTStack stack; + Buffer buf; + OffsetNumber offnum; + BTScanPosItem *currItem; + + if (!scan->xs_want_itup) + elog(ERROR, "_bt_skip cannot skip if not returning tuples"); + if (!scan->xs_itup) + elog(ERROR, "_bt_skip cannot skip if a tuple has not been fetched yet"); + + if (BTScanPosIsValid(so->currPos)) + { + ReleaseBuffer(so->currPos.buf); + so->currPos.buf = InvalidBuffer; + } + + /* + * TODO:TM lazily call _bt_mkscankey the first time and then just update + * the values in so->skipScanKey each time after that, instead of the + * repeated free/realloc + */ + if (so->skipScanKey != NULL) + _bt_freeskey(so->skipScanKey); + so->skipScanKey = _bt_mkscankey(scan->indexRelation, scan->xs_itup); + + /* TODO:TM share some of the code below with _bt_search? */ + stack =_bt_search(scan->indexRelation, prefix, so->skipScanKey, + ScanDirectionIsForward(dir), &buf, BT_READ, + scan->xs_snapshot); + _bt_freestack(stack); + so->currPos.buf = buf; + offnum = _bt_binsrch(scan->indexRelation, buf, prefix, so->skipScanKey, + ScanDirectionIsForward(dir)); + PredicateLockPage(scan->indexRelation, BufferGetBlockNumber(buf), + scan->xs_snapshot); + if (ScanDirectionIsForward(dir)) + { + so->currPos.moreLeft = false; + so->currPos.moreRight = true; + } + else + { + so->currPos.moreLeft = true; + so->currPos.moreRight = false; + } + if (ScanDirectionIsForward(dir)) + offnum = OffsetNumberPrev(offnum); + if (!_bt_readpage(scan, dir, offnum)) + { + if (!_bt_steppage(scan, dir)) + { + return false; + } + } + LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); + currItem = &so->currPos.items[so->currPos.itemIndex]; + scan->xs_ctup.t_self = currItem->heapTid; + if (scan->xs_want_itup) + scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); + return true; +} + +/* * _bt_readpage() -- Load data from current index page into so->currPos * * Caller must have pinned and read-locked so->currPos.buf; the buffer's state diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 5d18206..824c418 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -239,6 +239,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, info->amoptionalkey = amroutine->amoptionalkey; info->amsearcharray = amroutine->amsearcharray; info->amsearchnulls = amroutine->amsearchnulls; + info->amcanskip = (amroutine->amskip != NULL); info->amhasgettuple = (amroutine->amgettuple != NULL); info->amhasgetbitmap = (amroutine->amgetbitmap != NULL); info->amcostestimate = amroutine->amcostestimate; diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 1036cca..09e08b7 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -124,6 +124,10 @@ typedef void (*amrescan_function) (IndexScanDesc scan, typedef bool (*amgettuple_function) (IndexScanDesc scan, ScanDirection direction); +/* skip past duplicates in a given prefix */ +typedef bool (*amskip_function) (IndexScanDesc scan, + ScanDirection dir, int prefix); + /* fetch all valid tuples */ typedef int64 (*amgetbitmap_function) (IndexScanDesc scan, TIDBitmap *tbm); @@ -196,6 +200,7 @@ typedef struct IndexAmRoutine amendscan_function amendscan; ammarkpos_function ammarkpos; /* can be NULL */ amrestrpos_function amrestrpos; /* can be NULL */ + amskip_function amskip; /* can be NULL */ } IndexAmRoutine; diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 81907d5..09f9aba 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -157,6 +157,7 @@ extern IndexBulkDeleteResult *index_bulk_delete(IndexVacuumInfo *info, extern IndexBulkDeleteResult *index_vacuum_cleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats); extern bool index_can_return(Relation indexRelation, int attno); +extern bool index_skip(IndexScanDesc scan, ScanDirection direction, int prefix); extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum, uint16 procnum); extern FmgrInfo *index_getprocinfo(Relation irel, AttrNumber attnum, diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index c580f51..836f8de 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -636,6 +636,9 @@ typedef struct BTScanOpaqueData /* keep these last in struct for efficiency */ BTScanPosData currPos; /* current position data */ BTScanPosData markPos; /* marked position, if any */ + + /* workspace for _bt_skip */ + ScanKey skipScanKey; /* used to control skipping */ } BTScanOpaqueData; typedef BTScanOpaqueData *BTScanOpaque; @@ -721,6 +724,7 @@ extern int32 _bt_compare(Relation rel, int keysz, ScanKey scankey, Page page, OffsetNumber offnum); extern bool _bt_first(IndexScanDesc scan, ScanDirection dir); extern bool _bt_next(IndexScanDesc scan, ScanDirection dir); +extern bool _bt_skip(IndexScanDesc scan, ScanDirection dir, int prefix); extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost, Snapshot snapshot); @@ -748,6 +752,7 @@ extern void _bt_end_vacuum_callback(int code, Datum arg); extern Size BTreeShmemSize(void); extern void BTreeShmemInit(void); extern bytea *btoptions(Datum reloptions, bool validate); +extern bool btskip(IndexScanDesc scan, ScanDirection dir, int prefix); extern bool btproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 3a1255a..0e1c8d0 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -622,6 +622,7 @@ typedef struct IndexOptInfo bool amsearchnulls; /* can AM search for NULL/NOT NULL entries? */ bool amhasgettuple; /* does AM have amgettuple interface? */ bool amhasgetbitmap; /* does AM have amgetbitmap interface? */ + bool amcanskip; /* can AM skip duplicate values? */ /* Rather than include amapi.h here, we declare amcostestimate like this */ void (*amcostestimate) (); /* AM's cost estimator */ } IndexOptInfo;