From a4140e87691f235b9ac0d9755b214f98ea3b1b05 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Mon, 12 Nov 2018 13:11:21 -0800 Subject: [PATCH v9 5/7] Add high key "continuescan" optimization. Teach B-Tree forward index scans to check the high key before moving to the next page in the hopes of finding that it isn't actually necessary to move to the next page. We already opportunistically force a key check of the last item on leaf pages, even when it's clear that it cannot be returned to the scan due to being dead-to-all, for the same reason. Since forcing the last item to be key checked no longer makes any difference in the case of forward scans, the existing extra key check is now only used for backwards scans. Like the existing check, the new check won't always work out, but that seems like an acceptable price to pay. The new approach is more effective than just checking non-pivot tuples, especially with composite indexes and non-unique indexes. The high key represents an upper bound on all values that can appear on the page, which is often greater than whatever tuple happens to appear last at the time of the check. Also, suffix truncation's new logic for picking a split point will often result in high keys that are relatively dissimilar to the other (non-pivot) tuples on the page, and therefore more likely to indicate that the scan need not proceed to the next page. Note that even pre-pg_upgrade'd v3 indexes make use of this optimization. --- src/backend/access/nbtree/nbtsearch.c | 23 +++++++--- src/backend/access/nbtree/nbtutils.c | 60 +++++++++++++++++++++------ 2 files changed, 65 insertions(+), 18 deletions(-) diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 5d9cf856f8..c1a483e8d1 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -1428,7 +1428,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) OffsetNumber maxoff; int itemIndex; IndexTuple itup; - bool continuescan; + bool continuescan = true; /* * We must have the buffer pinned and locked, but the usual macro can't be @@ -1496,16 +1496,29 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) _bt_saveitem(so, itemIndex, offnum, itup); itemIndex++; } + /* When !continuescan, there can't be any more matches, so stop */ if (!continuescan) - { - /* there can't be any more matches, so stop */ - so->currPos.moreRight = false; break; - } offnum = OffsetNumberNext(offnum); } + /* + * Forward scans need not visit page to the right when high key + * indicates no more matches will be found there. + * + * Checking the high key like this works out more often than you'd + * think. Leaf page splits pick a split point between the two most + * dissimilar tuples within a range of acceptable split points. There + * is often natural locality around what ends up on each leaf page, + * which is worth taking advantage of here. + */ + if (!P_RIGHTMOST(opaque) && continuescan) + (void) _bt_checkkeys(scan, page, P_HIKEY, dir, &continuescan); + + if (!continuescan) + so->currPos.moreRight = false; + Assert(itemIndex <= MaxIndexTuplesPerPage); so->currPos.firstItem = 0; so->currPos.lastItem = itemIndex - 1; diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 2c1be82acb..a4964dc22c 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -48,7 +48,7 @@ static bool _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op, static bool _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption); static void _bt_mark_scankey_required(ScanKey skey); static bool _bt_check_rowcompare(ScanKey skey, - IndexTuple tuple, TupleDesc tupdesc, + IndexTuple tuple, int tupnatts, TupleDesc tupdesc, ScanDirection dir, bool *continuescan); static int _bt_leave_natts(Relation rel, IndexTuple lastleft, IndexTuple firstright, bool build); @@ -1398,7 +1398,10 @@ _bt_mark_scankey_required(ScanKey skey) * dir: direction we are scanning in * continuescan: output parameter (will be set correctly in all cases) * - * Caller must hold pin and lock on the index page. + * Caller must hold pin and lock on the index page. Caller can pass a high + * key offnum in the hopes of discovering that the scan need not continue on + * to a page to the right. We don't currently bother limiting high key + * comparisons to SK_BT_REQFWD scan keys. */ IndexTuple _bt_checkkeys(IndexScanDesc scan, @@ -1408,6 +1411,7 @@ _bt_checkkeys(IndexScanDesc scan, ItemId iid = PageGetItemId(page, offnum); bool tuple_alive; IndexTuple tuple; + int tupnatts; TupleDesc tupdesc; BTScanOpaque so; int keysz; @@ -1421,21 +1425,24 @@ _bt_checkkeys(IndexScanDesc scan, * killed tuple as not passing the qual. Most of the time, it's a win to * not bother examining the tuple's index keys, but just return * immediately with continuescan = true to proceed to the next tuple. - * However, if this is the last tuple on the page, we should check the - * index keys to prevent uselessly advancing to the next page. + * However, if this is the first tuple on the page, and we're doing a + * backward scan, we should check the index keys to prevent uselessly + * advancing to the page to the left. This is similar to the high key + * optimization used by forward scan callers. */ if (scan->ignore_killed_tuples && ItemIdIsDead(iid)) { - /* return immediately if there are more tuples on the page */ + BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + Assert(offnum != P_HIKEY || P_RIGHTMOST(opaque)); if (ScanDirectionIsForward(dir)) { - if (offnum < PageGetMaxOffsetNumber(page)) - return NULL; + /* forward scan callers check high key instead */ + return NULL; } else { - BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); - + /* return immediately if there are more tuples on the page */ if (offnum > P_FIRSTDATAKEY(opaque)) return NULL; } @@ -1450,6 +1457,7 @@ _bt_checkkeys(IndexScanDesc scan, tuple_alive = true; tuple = (IndexTuple) PageGetItem(page, iid); + tupnatts = BTreeTupleGetNAtts(tuple, scan->indexRelation); tupdesc = RelationGetDescr(scan->indexRelation); so = (BTScanOpaque) scan->opaque; @@ -1461,11 +1469,24 @@ _bt_checkkeys(IndexScanDesc scan, bool isNull; Datum test; - Assert(key->sk_attno <= BTreeTupleGetNAtts(tuple, scan->indexRelation)); + /* + * Assume that truncated attribute (from high key) passes the qual. + * The value of a truncated attribute for the first tuple on the right + * page could be any possible value, so we may have to visit the next + * page. + */ + if (key->sk_attno > tupnatts) + { + Assert(offnum == P_HIKEY); + Assert(ScanDirectionIsForward(dir)); + continue; + } + /* row-comparison keys need special processing */ if (key->sk_flags & SK_ROW_HEADER) { - if (_bt_check_rowcompare(key, tuple, tupdesc, dir, continuescan)) + if (_bt_check_rowcompare(key, tuple, tupnatts, tupdesc, dir, + continuescan)) continue; return NULL; } @@ -1596,8 +1617,8 @@ _bt_checkkeys(IndexScanDesc scan, * This is a subroutine for _bt_checkkeys, which see for more info. */ static bool -_bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc, - ScanDirection dir, bool *continuescan) +_bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, + TupleDesc tupdesc, ScanDirection dir, bool *continuescan) { ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument); int32 cmpresult = 0; @@ -1614,6 +1635,19 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc, Assert(subkey->sk_flags & SK_ROW_MEMBER); + /* + * Assume that truncated attribute (from high key) passes the qual. + * The value of a truncated attribute for the first tuple on the right + * page could be any possible value, so we may have to visit the next + * page. + */ + if (subkey->sk_attno > tupnatts) + { + Assert(ScanDirectionIsForward(dir)); + cmpresult = 0; + continue; + } + datum = index_getattr(tuple, subkey->sk_attno, tupdesc, -- 2.17.1