commit f433c2cc817eb5b6b11e4974127ec3e6904bcec0 Author: Alexander Korotkov Date: Thu Dec 6 01:04:33 2018 +0300 Improve checking for missing parent links Instead of collecting lossy bitmap, traverse from one downlink to subsequent using rightlinks. Intermediate pages are candidates to have lost parent downlinks. diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index 591e0a3e46a..f2b08d5dbdb 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -92,6 +92,8 @@ typedef struct BtreeCheckState BlockNumber targetblock; /* Target page's LSN */ XLogRecPtr targetlsn; + /* Previous downlink block number */ + BlockNumber prevdownlink; /* * Mutable state, for optional heapallindexed verification: @@ -99,8 +101,6 @@ typedef struct BtreeCheckState /* Bloom filter fingerprints B-Tree index */ bloom_filter *filter; - /* Bloom filter fingerprints downlink blocks within tree */ - bloom_filter *downlinkfilter; /* Right half of incomplete split marker */ bool rightsplit; /* Debug counter */ @@ -137,7 +137,10 @@ static void bt_target_page_check(BtreeCheckState *state); static BTScanInsert bt_right_page_check_scankey(BtreeCheckState *state); static void bt_downlink_check(BtreeCheckState *state, BTScanInsert targetkey, BlockNumber childblock); -static void bt_downlink_missing_check(BtreeCheckState *state); +static void bt_downlink_connectivity_check(BtreeCheckState *state, + BlockNumber nextdownlink, uint32 parent_level); +static void bt_downlink_missing_check(BtreeCheckState *state, bool rightsplit, + BlockNumber targetblock, Page target); static void bt_tuple_present_callback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *checkstate); @@ -420,23 +423,6 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, errmsg("index \"%s\" cannot be verified using transaction snapshot", RelationGetRelationName(rel)))); } - else - { - int64 total_pages; - - /* - * Extra readonly downlink check. - * - * In readonly case, we know that there cannot be a concurrent - * page split or a concurrent page deletion, which gives us the - * opportunity to verify that every non-ignorable page had a - * downlink one level up. We must be tolerant of interrupted page - * splits and page deletions, though. This is taken care of in - * bt_downlink_missing_check(). - */ - total_pages = (int64) state->rel->rd_rel->relpages; - state->downlinkfilter = bloom_create(total_pages, work_mem, seed); - } } Assert(!state->rootdescend || state->readonly); @@ -514,16 +500,6 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, IndexInfo *indexinfo = BuildIndexInfo(state->rel); TableScanDesc scan; - /* Report on extra downlink checks performed in readonly case */ - if (state->readonly) - { - ereport(DEBUG1, - (errmsg_internal("finished verifying presence of downlink blocks within index \"%s\" with bitset %.2f%% set", - RelationGetRelationName(rel), - 100.0 * bloom_prop_bits_set(state->downlinkfilter)))); - bloom_free(state->downlinkfilter); - } - /* * Create our own scan for table_index_build_scan(), rather than * getting it to do so for us. This is required so that we can @@ -626,6 +602,8 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level) level.istruerootlevel ? " (true root level)" : level.level == 0 ? " (leaf level)" : ""); + state->prevdownlink = InvalidBlockNumber; + do { /* Don't rely on CHECK_FOR_INTERRUPTS() calls at lower level */ @@ -923,14 +901,14 @@ bt_target_page_check(BtreeCheckState *state) (uint32) state->targetlsn))); } - /* Fingerprint downlink blocks in heapallindexed + readonly case */ - if (state->heapallindexed && state->readonly && !P_ISLEAF(topaque)) + /* + * Check connectivity of downlinks. + */ + if (!P_ISLEAF(topaque) && state->readonly) { BlockNumber childblock = BTreeInnerTupleGetDownLink(itup); - bloom_add_element(state->downlinkfilter, - (unsigned char *) &childblock, - sizeof(BlockNumber)); + bt_downlink_connectivity_check(state, childblock, topaque->btpo.level); } /* @@ -1219,13 +1197,10 @@ bt_target_page_check(BtreeCheckState *state) } } - /* - * * Check if page has a downlink in parent * - * - * This can only be checked in heapallindexed + readonly case. - */ - if (state->heapallindexed && state->readonly) - bt_downlink_missing_check(state); + if (!P_ISLEAF(topaque) && P_RIGHTMOST(topaque) && state->readonly) + { + bt_downlink_connectivity_check(state, P_NONE, topaque->btpo.level); + } } /* @@ -1441,6 +1416,83 @@ bt_right_page_check_scankey(BtreeCheckState *state) return bt_mkscankey_pivotsearch(state->rel, firstitup); } +/* + * Check connectivity of downlinks. Traverse rightlinks from previous downlink + * to the current one. Check that there are no intermediate pages with missing + * downlinks. + */ +static void +bt_downlink_connectivity_check(BtreeCheckState *state, + BlockNumber nextdownlink, + uint32 parent_level) +{ + BlockNumber blkno = state->prevdownlink; + Page page; + BTPageOpaque opaque; + bool rightsplit = true; + bool first = true; + + /* + * If no previos downlink is memorized, just save our downlink to future + * usage. + */ + if (!BlockNumberIsValid(blkno)) + { + state->prevdownlink = nextdownlink; + return; + } + + while (true) + { + /* Did we traverse the whole tree level and don't find next downlink? */ + if (blkno == P_NONE) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("can't traverse from downlink %u to downlink %u of index \"%s\"", + state->prevdownlink, nextdownlink, + RelationGetRelationName(state->rel)))); + + page = palloc_btree_page(state, blkno); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + /* Check level for non-ignorable page */ + if (!P_IGNORE(opaque) && opaque->btpo.level != parent_level - 1) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("block found while traversing rightlinks from downlink of index \"%s\" has invalid level", + RelationGetRelationName(state->rel)), + errdetail_internal("Block pointed to=%u expected level=%u level in pointed to block=%u.", + blkno, parent_level - 1, opaque->btpo.level))); + + /* Try to detect circular links */ + if ((!first && blkno == state->prevdownlink) || blkno == opaque->btpo_prev) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("circular link chain found in block %u of index \"%s\"", + blkno, RelationGetRelationName(state->rel)))); + + if (!first && !P_IGNORE(opaque)) + { + /* blkno probably has missing parent downlink */ + bt_downlink_missing_check(state, rightsplit, blkno, page); + } + + /* Exit if we already found next downlink */ + if (opaque->btpo_next == nextdownlink) + { + state->prevdownlink = nextdownlink; + pfree(page); + return; + } + + /* Traverse to the next page using rightlink */ + blkno = opaque->btpo_next; + rightsplit = P_INCOMPLETE_SPLIT(opaque); + pfree(page); + first = false; + } +} + /* * Checks one of target's downlink against its child page. * @@ -1604,23 +1656,27 @@ bt_downlink_check(BtreeCheckState *state, BTScanInsert targetkey, * be concerned about concurrent page splits or page deletions. */ static void -bt_downlink_missing_check(BtreeCheckState *state) +bt_downlink_missing_check(BtreeCheckState *state, bool rightsplit, + BlockNumber targetblock, Page target) { - BTPageOpaque topaque = (BTPageOpaque) PageGetSpecialPointer(state->target); + BTPageOpaque topaque = (BTPageOpaque) PageGetSpecialPointer(target); ItemId itemid; IndexTuple itup; Page child; BTPageOpaque copaque; uint32 level; BlockNumber childblk; + XLogRecPtr targetlsn; - Assert(state->heapallindexed && state->readonly); + Assert(state->readonly); Assert(!P_IGNORE(topaque)); /* No next level up with downlinks to fingerprint from the true root */ if (P_ISROOT(topaque)) return; + targetlsn = PageGetLSN(target); + /* * Incomplete (interrupted) page splits can account for the lack of a * downlink. Some inserting transaction should eventually complete the @@ -1642,26 +1698,20 @@ bt_downlink_missing_check(BtreeCheckState *state) * by design, so it shouldn't be taken to indicate corruption. See * _bt_pagedel() for full details. */ - if (state->rightsplit) + if (rightsplit) { ereport(DEBUG1, (errcode(ERRCODE_NO_DATA), errmsg("harmless interrupted page split detected in index %s", RelationGetRelationName(state->rel)), errdetail_internal("Block=%u level=%u left sibling=%u page lsn=%X/%X.", - state->targetblock, topaque->btpo.level, + targetblock, topaque->btpo.level, topaque->btpo_prev, - (uint32) (state->targetlsn >> 32), - (uint32) state->targetlsn))); + (uint32) (targetlsn >> 32), + (uint32) targetlsn))); return; } - /* Target's downlink is typically present in parent/fingerprinted */ - if (!bloom_lacks_element(state->downlinkfilter, - (unsigned char *) &state->targetblock, - sizeof(BlockNumber))) - return; - /* * Target is probably the "top parent" of a multi-level page deletion. * We'll need to descend the subtree to make sure that descendant pages @@ -1678,17 +1728,17 @@ bt_downlink_missing_check(BtreeCheckState *state) errmsg("leaf index block lacks downlink in index \"%s\"", RelationGetRelationName(state->rel)), errdetail_internal("Block=%u page lsn=%X/%X.", - state->targetblock, - (uint32) (state->targetlsn >> 32), - (uint32) state->targetlsn))); + targetblock, + (uint32) (targetlsn >> 32), + (uint32) targetlsn))); /* Descend from the target page, which is an internal page */ elog(DEBUG1, "checking for interrupted multi-level deletion due to missing downlink in index \"%s\"", RelationGetRelationName(state->rel)); level = topaque->btpo.level; - itemid = PageGetItemId(state->target, P_FIRSTDATAKEY(topaque)); - itup = (IndexTuple) PageGetItem(state->target, itemid); + itemid = PageGetItemId(target, P_FIRSTDATAKEY(topaque)); + itup = (IndexTuple) PageGetItem(target, itemid); childblk = BTreeInnerTupleGetDownLink(itup); for (;;) { @@ -1707,7 +1757,7 @@ bt_downlink_missing_check(BtreeCheckState *state) errmsg_internal("downlink points to block in index \"%s\" whose level is not one level down", RelationGetRelationName(state->rel)), errdetail_internal("Top parent/target block=%u block pointed to=%u expected level=%u level in pointed to block=%u.", - state->targetblock, childblk, + targetblock, childblk, level - 1, copaque->btpo.level))); level = copaque->btpo.level; @@ -1744,9 +1794,9 @@ bt_downlink_missing_check(BtreeCheckState *state) errmsg_internal("downlink to deleted leaf page found in index \"%s\"", RelationGetRelationName(state->rel)), errdetail_internal("Top parent/target block=%u leaf block=%u top parent/target lsn=%X/%X.", - state->targetblock, childblk, - (uint32) (state->targetlsn >> 32), - (uint32) state->targetlsn))); + targetblock, childblk, + (uint32) (targetlsn >> 32), + (uint32) targetlsn))); /* * Iff leaf page is half-dead, its high key top parent link should point @@ -1762,7 +1812,7 @@ bt_downlink_missing_check(BtreeCheckState *state) { itemid = PageGetItemId(child, P_HIKEY); itup = (IndexTuple) PageGetItem(child, itemid); - if (BTreeTupleGetTopParent(itup) == state->targetblock) + if (BTreeTupleGetTopParent(itup) == targetblock) return; } @@ -1771,9 +1821,9 @@ bt_downlink_missing_check(BtreeCheckState *state) errmsg("internal index block lacks downlink in index \"%s\"", RelationGetRelationName(state->rel)), errdetail_internal("Block=%u level=%u page lsn=%X/%X.", - state->targetblock, topaque->btpo.level, - (uint32) (state->targetlsn >> 32), - (uint32) state->targetlsn))); + targetblock, topaque->btpo.level, + (uint32) (targetlsn >> 32), + (uint32) targetlsn))); } /*