From af86f97a04ca6ef40147a7c5f67e442cbd231ee9 Mon Sep 17 00:00:00 2001 From: Andrey Date: Thu, 28 Mar 2019 14:46:55 +0500 Subject: [PATCH] GiST amcheck from Andrey --- contrib/amcheck/Makefile | 9 +- contrib/amcheck/amcheck--1.2--1.3.sql | 14 + contrib/amcheck/amcheck.c | 160 ++++++++++ contrib/amcheck/amcheck.control | 2 +- contrib/amcheck/amcheck.h | 23 ++ contrib/amcheck/expected/check_gist.out | 18 ++ contrib/amcheck/sql/check_gist.sql | 9 + contrib/amcheck/verify_gist.c | 374 ++++++++++++++++++++++++ contrib/amcheck/verify_nbtree.c | 176 ++--------- doc/src/sgml/amcheck.sgml | 19 ++ 10 files changed, 653 insertions(+), 151 deletions(-) create mode 100644 contrib/amcheck/amcheck--1.2--1.3.sql create mode 100644 contrib/amcheck/amcheck.c create mode 100644 contrib/amcheck/amcheck.h create mode 100644 contrib/amcheck/expected/check_gist.out create mode 100644 contrib/amcheck/sql/check_gist.sql create mode 100644 contrib/amcheck/verify_gist.c diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile index dcec3b8520..c08d2a582b 100644 --- a/contrib/amcheck/Makefile +++ b/contrib/amcheck/Makefile @@ -1,13 +1,16 @@ # contrib/amcheck/Makefile MODULE_big = amcheck -OBJS = verify_nbtree.o $(WIN32RES) +OBJS = amcheck.o verify_gist.o verify_nbtree.o $(WIN32RES) EXTENSION = amcheck -DATA = amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql +DATA = amcheck--1.2--1.3.sql \ + amcheck--1.1--1.2.sql \ + amcheck--1.0--1.1.sql \ + amcheck--1.0.sql PGFILEDESC = "amcheck - function for verifying relation integrity" -REGRESS = check check_btree +REGRESS = check check_btree check_gist ifdef USE_PGXS PG_CONFIG = pg_config diff --git a/contrib/amcheck/amcheck--1.2--1.3.sql b/contrib/amcheck/amcheck--1.2--1.3.sql new file mode 100644 index 0000000000..44b88a40a0 --- /dev/null +++ b/contrib/amcheck/amcheck--1.2--1.3.sql @@ -0,0 +1,14 @@ +/* contrib/amcheck/amcheck--1.2--1.3.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.3'" to load this file. \quit + +-- +-- gist_index_parent_check() +-- +CREATE FUNCTION gist_index_parent_check(index regclass) +RETURNS VOID +AS 'MODULE_PATHNAME', 'gist_index_parent_check' +LANGUAGE C STRICT; + +REVOKE ALL ON FUNCTION gist_index_parent_check(regclass) FROM PUBLIC; diff --git a/contrib/amcheck/amcheck.c b/contrib/amcheck/amcheck.c new file mode 100644 index 0000000000..5bc3f831bc --- /dev/null +++ b/contrib/amcheck/amcheck.c @@ -0,0 +1,160 @@ +/*------------------------------------------------------------------------- + * + * amcheck.c + * Utility functions common to all access methods. + * + * Copyright (c) 2017-2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/amcheck.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/table.h" +#include "access/tableam.h" +#include "amcheck.h" +#include "catalog/index.h" +#include "commands/tablecmds.h" + + +/* + * Lock acquisition reused across different am types + */ +void +amcheck_lock_relation(Oid indrelid, Relation *indrel, + Relation *heaprel, LOCKMODE lockmode) +{ + Oid heapid; + + /* + * We must lock table before index to avoid deadlocks. However, if the + * passed indrelid isn't an index then IndexGetRelation() will fail. + * Rather than emitting a not-very-helpful error message, postpone + * complaining, expecting that the is-it-an-index test below will fail. + * + * In hot standby mode this will raise an error when lockmode is + * AccessExclusiveLock. + */ + heapid = IndexGetRelation(indrelid, true); + if (OidIsValid(heapid)) + *heaprel = table_open(heapid, lockmode); + else + *heaprel = NULL; + + /* + * Open the target index relations separately (like relation_openrv(), but + * with heap relation locked first to prevent deadlocking). In hot + * standby mode this will raise an error when lockmode is + * AccessExclusiveLock. + * + * There is no need for the usual indcheckxmin usability horizon test + * here, even in the nbtree heapallindexed case, because index undergoing + * verification only needs to have entries for a new transaction snapshot. + * (If caller is about to do nbtree parentcheck verification, there is no + * question about committed or recently dead heap tuples lacking index + * entries due to concurrent activity.) + */ + *indrel = index_open(indrelid, lockmode); + + /* + * Since we did the IndexGetRelation call above without any lock, it's + * barely possible that a race against an index drop/recreation could have + * netted us the wrong table. + */ + if (*heaprel == NULL || heapid != IndexGetRelation(indrelid, false)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg("could not open parent table of index %s", + RelationGetRelationName(*indrel)))); +} + +/* + * Unlock index and heap relations early for amcheck_lock_relation() caller. + * + * This is ok because nothing in the called routines will trigger shared cache + * invalidations to be sent, so we can relax the usual pattern of only + * releasing locks after commit. + */ +void +amcheck_unlock_relation(Oid indrelid, Relation indrel, Relation heaprel, + LOCKMODE lockmode) +{ + index_close(indrel, lockmode); + if (heaprel) + table_close(heaprel, lockmode); +} + +/* + * Check if index relation should have a file for its main relation + * fork. Verification uses this to skip unlogged indexes when in hot standby + * mode, where there is simply nothing to verify. + * + * NB: Caller should call btree_index_checkable() or gist_index_checkable() + * before calling here. + */ +bool +amcheck_index_mainfork_expected(Relation rel) +{ + if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED || + !RecoveryInProgress()) + return true; + + ereport(NOTICE, + (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), + errmsg("cannot verify unlogged index \"%s\" during recovery, skipping", + RelationGetRelationName(rel)))); + + return false; +} + +/* + * PageGetItemId() wrapper that validates returned line pointer. + * + * Buffer page/page item access macros generally trust that line pointers are + * not corrupt, which might cause problems for verification itself. For + * example, there is no bounds checking in PageGetItem(). Passing it a + * corrupt line pointer can cause it to return a tuple/pointer that is unsafe + * to dereference. + * + * Validating line pointers before tuples avoids undefined behavior and + * assertion failures with corrupt indexes, making the verification process + * more robust and predictable. + */ +ItemId +PageGetItemIdCareful(Relation rel, BlockNumber block, Page page, + OffsetNumber offset, size_t opaquesize) +{ + ItemId itemid = PageGetItemId(page, offset); + + if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) > + BLCKSZ - opaquesize) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("line pointer points past end of tuple space in index \"%s\"", + RelationGetRelationName(rel)), + errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.", + block, offset, ItemIdGetOffset(itemid), + ItemIdGetLength(itemid), + ItemIdGetFlags(itemid)))); + + /* + * Verify that line pointer isn't LP_REDIRECT or LP_UNUSED, since nbtree and gist + * never uses either. Verify that line pointer has storage, too, since + * even LP_DEAD items should. + */ + if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) || + ItemIdGetLength(itemid) == 0) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("invalid line pointer storage in index \"%s\"", + RelationGetRelationName(rel)), + errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.", + block, offset, ItemIdGetOffset(itemid), + ItemIdGetLength(itemid), + ItemIdGetFlags(itemid)))); + + return itemid; +} \ No newline at end of file diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control index c6e310046d..ab50931f75 100644 --- a/contrib/amcheck/amcheck.control +++ b/contrib/amcheck/amcheck.control @@ -1,5 +1,5 @@ # amcheck extension comment = 'functions for verifying relation integrity' -default_version = '1.2' +default_version = '1.3' module_pathname = '$libdir/amcheck' relocatable = true diff --git a/contrib/amcheck/amcheck.h b/contrib/amcheck/amcheck.h new file mode 100644 index 0000000000..b19e617177 --- /dev/null +++ b/contrib/amcheck/amcheck.h @@ -0,0 +1,23 @@ +/*------------------------------------------------------------------------- + * + * amcheck.h + * Shared routines for amcheck verifications. + * + * Copyright (c) 2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/amcheck.h + * + *------------------------------------------------------------------------- + */ +#include "storage/lockdefs.h" +#include "utils/relcache.h" + +extern void amcheck_lock_relation(Oid indrelid, Relation *indrel, + Relation *heaprel, LOCKMODE lockmode); +extern void amcheck_unlock_relation(Oid indrelid, Relation indrel, + Relation heaprel, LOCKMODE lockmode); +extern bool amcheck_index_mainfork_expected(Relation rel); + +extern ItemId PageGetItemIdCareful(Relation rel, BlockNumber block, + Page page, OffsetNumber offset, size_t opaquesize); \ No newline at end of file diff --git a/contrib/amcheck/expected/check_gist.out b/contrib/amcheck/expected/check_gist.out new file mode 100644 index 0000000000..8f3ec20946 --- /dev/null +++ b/contrib/amcheck/expected/check_gist.out @@ -0,0 +1,18 @@ +-- minimal test, basically just verifying that amcheck works with GiST +SELECT setseed(1); + setseed +--------- + +(1 row) + +CREATE TABLE gist_check AS SELECT point(random(),s) c FROM generate_series(1,10000) s; +INSERT INTO gist_check SELECT point(random(),s) c FROM generate_series(1,100000) s; +CREATE INDEX gist_check_idx ON gist_check USING gist(c); +SELECT gist_index_parent_check('gist_check_idx'); + gist_index_parent_check +------------------------- + +(1 row) + +-- cleanup +DROP TABLE gist_check; diff --git a/contrib/amcheck/sql/check_gist.sql b/contrib/amcheck/sql/check_gist.sql new file mode 100644 index 0000000000..0ee2e943c9 --- /dev/null +++ b/contrib/amcheck/sql/check_gist.sql @@ -0,0 +1,9 @@ +-- minimal test, basically just verifying that amcheck works with GiST +SELECT setseed(1); +CREATE TABLE gist_check AS SELECT point(random(),s) c FROM generate_series(1,10000) s; +INSERT INTO gist_check SELECT point(random(),s) c FROM generate_series(1,100000) s; +CREATE INDEX gist_check_idx ON gist_check USING gist(c); +SELECT gist_index_parent_check('gist_check_idx'); + +-- cleanup +DROP TABLE gist_check; diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c new file mode 100644 index 0000000000..3c741071f0 --- /dev/null +++ b/contrib/amcheck/verify_gist.c @@ -0,0 +1,374 @@ +/*------------------------------------------------------------------------- + * + * verify_gist.c + * Verifies the integrity of GiST indexes based on invariants. + * + * Verification checks that all paths in GiST graph contain + * consistent keys: tuples on parent pages consistently include tuples + * from children pages. Also, verification checks graph invariants: + * internal page must have at least one downlinks, internal page can + * reference either only leaf pages or only internal pages. + * + * + * Copyright (c) 2017-2019, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/amcheck/verify_gist.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/gist_private.h" +#include "amcheck.h" +#include "catalog/pg_am.h" +#include "miscadmin.h" +#include "utils/memutils.h" +#include "utils/rel.h" + +/* + * GistScanItem represents one item of depth-first scan of GiST index. + */ +typedef struct GistScanItem +{ + int depth; + IndexTuple parenttup; + BlockNumber parentblk; + XLogRecPtr parentlsn; + BlockNumber blkno; + struct GistScanItem *next; +} GistScanItem; + +PG_FUNCTION_INFO_V1(gist_index_parent_check); + +static void gist_index_checkable(Relation rel); +static void gist_check_parent_keys_consistency(Relation rel); +static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo); +static IndexTuple gist_refind_parent(Relation rel, BlockNumber parentblkno, + BlockNumber childblkno, + BufferAccessStrategy strategy); + +/* + * gist_index_parent_check(index regclass) + * + * Verify integrity of GiST index. + * + * Acquires AccessShareLock on heap & index relations. + */ +Datum gist_index_parent_check(PG_FUNCTION_ARGS) +{ + Oid indrelid = PG_GETARG_OID(0); + Relation indrel; + Relation heaprel; + LOCKMODE lockmode = AccessShareLock; + + /* lock table and index with neccesary level */ + amcheck_lock_relation(indrelid, &indrel, &heaprel, lockmode); + + /* verify that this is GiST eligible for check */ + gist_index_checkable(indrel); + + if (amcheck_index_mainfork_expected(indrel)) + gist_check_parent_keys_consistency(indrel); + + /* Unlock index and table */ + amcheck_unlock_relation(indrelid, indrel, heaprel, lockmode); + + PG_RETURN_VOID(); +} + +/* + * Check that relation is eligible for GiST verification + */ +static void +gist_index_checkable(Relation rel) +{ + if (rel->rd_rel->relkind != RELKIND_INDEX || + rel->rd_rel->relam != GIST_AM_OID) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only GiST indexes are supported as targets for this verification"), + errdetail("Relation \"%s\" is not a GiST index.", + RelationGetRelationName(rel)))); + + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"), + errdetail("Index \"%s\" is associated with temporary relation.", + RelationGetRelationName(rel)))); + + if (!rel->rd_index->indisvalid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot check index \"%s\"", + RelationGetRelationName(rel)), + errdetail("Index is not valid"))); +} + +/* + * Main entry point for GiST check. Allocates memory context and scans through + * GiST graph. This function verifies that tuples of internal pages cover all + * the key space of each tuple on leaf page. To do this we invoke + * gist_check_internal_page() for every internal page. + * + * gist_check_internal_page() in it's turn takes every tuple and tries to + * adjust it by tuples on referenced child page. Parent gist tuple should + * never require any adjustments. + */ +static void +gist_check_parent_keys_consistency(Relation rel) +{ + BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD); + GistScanItem *stack; + MemoryContext mctx; + MemoryContext oldcontext; + GISTSTATE *state; + int leafdepth; + + mctx = AllocSetContextCreate(CurrentMemoryContext, + "amcheck context", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(mctx); + + state = initGISTstate(rel); + + /* + * We don't know the height of the tree yet, but as soon as we encounter a + * leaf page, we will set 'leafdepth' to its depth. + */ + leafdepth = -1; + + /* Start the scan at the root page */ + stack = (GistScanItem *) palloc0(sizeof(GistScanItem)); + stack->depth = 0; + stack->parenttup = NULL; + stack->parentblk = InvalidBlockNumber; + stack->parentlsn = InvalidXLogRecPtr; + stack->blkno = GIST_ROOT_BLKNO; + + while (stack) + { + GistScanItem *stack_next; + Buffer buffer; + Page page; + OffsetNumber i, maxoff; + XLogRecPtr lsn; + + CHECK_FOR_INTERRUPTS(); + + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno, + RBM_NORMAL, strategy); + LockBuffer(buffer, GIST_SHARE); + page = (Page) BufferGetPage(buffer); + lsn = BufferGetLSNAtomic(buffer); + + /* Do basic sanity checks on the page headers */ + check_index_page(rel, buffer, stack->blkno); + + /* + * It's possible that the page was split since we looked at the + * parent, so that we didn't missed the downlink of the right sibling + * when we scanned the parent. If so, add the right sibling to the + * stack now. + */ + if (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page)) + { + /* split page detected, install right link to the stack */ + GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem)); + + ptr->depth = stack->depth; + ptr->parenttup = CopyIndexTuple(stack->parenttup); + ptr->parentblk = stack->parentblk; + ptr->parentlsn = stack->parentlsn; + ptr->blkno = GistPageGetOpaque(page)->rightlink; + ptr->next = stack->next; + stack->next = ptr; + } + + /* Check that the tree has the same height in all branches */ + if (GistPageIsLeaf(page)) + { + if (leafdepth == -1) + leafdepth = stack->depth; + else if (stack->depth != leafdepth) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u", + RelationGetRelationName(rel), stack->blkno))); + } + + /* + * Check that each tuple looks valid, and is consistent with the + * downlink we followed when we stepped on this page. + */ + maxoff = PageGetMaxOffsetNumber(page); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i, sizeof(GISTPageOpaqueData)); + IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); + + /* + * Check that it's not a leftover invalid tuple from pre-9.1 See + * also gistdoinsert() and gistbulkdelete() handling of such + * tuples. We do consider it error here. + */ + if (GistTupleIsInvalid(idxtuple)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("index \"%s\" contains an inner tuple marked as invalid, block %u, offset %u", + RelationGetRelationName(rel), stack->blkno, i), + errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."), + errhint("Please REINDEX it."))); + + if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple))) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u", + RelationGetRelationName(rel), stack->blkno, i))); + + /* + * Check if this tuple is consistent with the downlink in the + * parent. + */ + if (stack->parenttup && + gistgetadjusted(rel, stack->parenttup, idxtuple, state)) + { + /* + * There was a discrepancy between parent and child tuples. + * We need to verify it is not a result of concurrent call of + * gistplacetopage(). So, lock parent and try to find downlink + * for current page. It may be missing due to concurrent page + * split, this is OK. + */ + pfree(stack->parenttup); + stack->parenttup = gist_refind_parent(rel, stack->parentblk, + stack->blkno, strategy); + + /* We found it - make a final check before failing */ + if (!stack->parenttup) + elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split", + stack->blkno, stack->parentblk); + else if (gistgetadjusted(rel, stack->parenttup, idxtuple, state)) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has inconsistent records on page %u offset %u", + RelationGetRelationName(rel), stack->blkno, i))); + else + { + /* + * But now it is properly adjusted - nothing to do here. + */ + } + } + + /* If this is an internal page, recurse into the child */ + if (!GistPageIsLeaf(page)) + { + GistScanItem *ptr; + + ptr = (GistScanItem *) palloc(sizeof(GistScanItem)); + ptr->depth = stack->depth + 1; + ptr->parenttup = CopyIndexTuple(idxtuple); + ptr->parentblk = stack->blkno; + ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); + ptr->parentlsn = lsn; + ptr->next = stack->next; + stack->next = ptr; + } + } + + LockBuffer(buffer, GIST_UNLOCK); + ReleaseBuffer(buffer); + + /* Step to next item in the queue */ + stack_next = stack->next; + if (stack->parenttup) + pfree(stack->parenttup); + pfree(stack); + stack = stack_next; + } + + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(mctx); +} + +static void +check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo) +{ + Page page = BufferGetPage(buffer); + + gistcheckpage(rel, buffer); + + if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has corrupted page %d", + RelationGetRelationName(rel), blockNo))); + + if (GistPageIsDeleted(page)) + { + if (!GistPageIsLeaf(page)) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has deleted internal page %d", + RelationGetRelationName(rel), blockNo))); + if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has deleted page %d with tuples", + RelationGetRelationName(rel), blockNo))); + } + else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" has page %d with exceeding count of tuples", + RelationGetRelationName(rel), blockNo))); +} + +/* + * Try to re-find downlink pointing to 'blkno', in 'parentblkno'. + * + * If found, returns a palloc'd copy of the downlink tuple. Otherwise, + * returns NULL. + */ +static IndexTuple +gist_refind_parent(Relation rel, BlockNumber parentblkno, + BlockNumber childblkno, BufferAccessStrategy strategy) +{ + Buffer parentbuf; + Page parentpage; + OffsetNumber o, + parent_maxoff; + IndexTuple result = NULL; + + parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL, + strategy); + + LockBuffer(parentbuf, GIST_SHARE); + parentpage = BufferGetPage(parentbuf); + + if (GistPageIsLeaf(parentpage)) + { + UnlockReleaseBuffer(parentbuf); + return result; + } + + parent_maxoff = PageGetMaxOffsetNumber(parentpage); + for (o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o)) + { + ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o, sizeof(GISTPageOpaqueData)); + IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid); + + if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno) + { + /* Found it! Make copy and return it */ + result = CopyIndexTuple(itup); + break; + } + } + + UnlockReleaseBuffer(parentbuf); + + return result; +} diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index 05e7d678ed..6f8b5714a3 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -25,16 +25,14 @@ #include "access/htup_details.h" #include "access/nbtree.h" -#include "access/table.h" #include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" +#include "amcheck.h" #include "catalog/index.h" #include "catalog/pg_am.h" -#include "commands/tablecmds.h" #include "lib/bloomfilter.h" #include "miscadmin.h" -#include "storage/lmgr.h" #include "storage/smgr.h" #include "utils/memutils.h" #include "utils/snapmgr.h" @@ -129,7 +127,6 @@ PG_FUNCTION_INFO_V1(bt_index_parent_check); static void bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed, bool rootdescend); static inline void btree_index_checkable(Relation rel); -static inline bool btree_index_mainfork_expected(Relation rel); static void bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, bool readonly, bool heapallindexed, bool rootdescend); @@ -163,8 +160,6 @@ static inline bool invariant_l_nontarget_offset(BtreeCheckState *state, static Page palloc_btree_page(BtreeCheckState *state, BlockNumber blocknum); static inline BTScanInsert bt_mkscankey_pivotsearch(Relation rel, IndexTuple itup); -static ItemId PageGetItemIdCareful(BtreeCheckState *state, BlockNumber block, - Page page, OffsetNumber offset); static inline ItemPointer BTreeTupleGetHeapTIDCareful(BtreeCheckState *state, IndexTuple itup, bool nonpivot); @@ -224,7 +219,6 @@ static void bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed, bool rootdescend) { - Oid heapid; Relation indrel; Relation heaprel; LOCKMODE lockmode; @@ -234,51 +228,15 @@ bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed, else lockmode = AccessShareLock; - /* - * We must lock table before index to avoid deadlocks. However, if the - * passed indrelid isn't an index then IndexGetRelation() will fail. - * Rather than emitting a not-very-helpful error message, postpone - * complaining, expecting that the is-it-an-index test below will fail. - * - * In hot standby mode this will raise an error when parentcheck is true. - */ - heapid = IndexGetRelation(indrelid, true); - if (OidIsValid(heapid)) - heaprel = table_open(heapid, lockmode); - else - heaprel = NULL; - - /* - * Open the target index relations separately (like relation_openrv(), but - * with heap relation locked first to prevent deadlocking). In hot - * standby mode this will raise an error when parentcheck is true. - * - * There is no need for the usual indcheckxmin usability horizon test - * here, even in the heapallindexed case, because index undergoing - * verification only needs to have entries for a new transaction snapshot. - * (If this is a parentcheck verification, there is no question about - * committed or recently dead heap tuples lacking index entries due to - * concurrent activity.) - */ - indrel = index_open(indrelid, lockmode); - - /* - * Since we did the IndexGetRelation call above without any lock, it's - * barely possible that a race against an index drop/recreation could have - * netted us the wrong table. - */ - if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false)) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_TABLE), - errmsg("could not open parent table of index %s", - RelationGetRelationName(indrel)))); + /* lock table and index with neccesary level */ + amcheck_lock_relation(indrelid, &indrel, &heaprel, lockmode); /* Relation suitable for checking as B-Tree? */ btree_index_checkable(indrel); - if (btree_index_mainfork_expected(indrel)) + if (amcheck_index_mainfork_expected(indrel)) { - bool heapkeyspace; + bool heapkeyspace; RelationOpenSmgr(indrel); if (!smgrexists(indrel->rd_smgr, MAIN_FORKNUM)) @@ -293,14 +251,8 @@ bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed, heapallindexed, rootdescend); } - /* - * Release locks early. That's ok here because nothing in the called - * routines will trigger shared cache invalidations to be sent, so we can - * relax the usual pattern of only releasing locks after commit. - */ - index_close(indrel, lockmode); - if (heaprel) - table_close(heaprel, lockmode); + /* Unlock index and table */ + amcheck_unlock_relation(indrelid, indrel, heaprel, lockmode); } /* @@ -337,28 +289,6 @@ btree_index_checkable(Relation rel) errdetail("Index is not valid."))); } -/* - * Check if B-Tree index relation should have a file for its main relation - * fork. Verification uses this to skip unlogged indexes when in hot standby - * mode, where there is simply nothing to verify. - * - * NB: Caller should call btree_index_checkable() before calling here. - */ -static inline bool -btree_index_mainfork_expected(Relation rel) -{ - if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED || - !RecoveryInProgress()) - return true; - - ereport(NOTICE, - (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), - errmsg("cannot verify unlogged index \"%s\" during recovery, skipping", - RelationGetRelationName(rel)))); - - return false; -} - /* * Main entry point for B-Tree SQL-callable functions. Walks the B-Tree in * logical order, verifying invariants as it goes. Optionally, verification @@ -754,9 +684,9 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level) ItemId itemid; /* Internal page -- downlink gets leftmost on next level */ - itemid = PageGetItemIdCareful(state, state->targetblock, + itemid = PageGetItemIdCareful(state->rel, state->targetblock, state->target, - P_FIRSTDATAKEY(opaque)); + P_FIRSTDATAKEY(opaque), sizeof(BTPageOpaqueData)); itup = (IndexTuple) PageGetItem(state->target, itemid); nextleveldown.leftmost = BTreeInnerTupleGetDownLink(itup); nextleveldown.level = opaque->btpo.level - 1; @@ -891,8 +821,8 @@ bt_target_page_check(BtreeCheckState *state) IndexTuple itup; /* Verify line pointer before checking tuple */ - itemid = PageGetItemIdCareful(state, state->targetblock, - state->target, P_HIKEY); + itemid = PageGetItemIdCareful(state->rel, state->targetblock, + state->target, P_HIKEY, sizeof(BTPageOpaqueData)); if (!_bt_check_natts(state->rel, state->heapkeyspace, state->target, P_HIKEY)) { @@ -927,8 +857,8 @@ bt_target_page_check(BtreeCheckState *state) CHECK_FOR_INTERRUPTS(); - itemid = PageGetItemIdCareful(state, state->targetblock, - state->target, offset); + itemid = PageGetItemIdCareful(state->rel, state->targetblock, + state->target, offset, sizeof(BTPageOpaqueData)); itup = (IndexTuple) PageGetItem(state->target, itemid); tupsize = IndexTupleSize(itup); @@ -1173,9 +1103,9 @@ bt_target_page_check(BtreeCheckState *state) OffsetNumberNext(offset)); /* Reuse itup to get pointed-to heap location of second item */ - itemid = PageGetItemIdCareful(state, state->targetblock, + itemid = PageGetItemIdCareful(state->rel, state->targetblock, state->target, - OffsetNumberNext(offset)); + OffsetNumberNext(offset), sizeof(BTPageOpaqueData)); itup = (IndexTuple) PageGetItem(state->target, itemid); nhtid = psprintf("(%u,%u)", ItemPointerGetBlockNumberNoCheck(&(itup->t_tid)), @@ -1460,8 +1390,8 @@ bt_right_page_check_scankey(BtreeCheckState *state) if (P_ISLEAF(opaque) && nline >= P_FIRSTDATAKEY(opaque)) { /* Return first data item (if any) */ - rightitem = PageGetItemIdCareful(state, targetnext, rightpage, - P_FIRSTDATAKEY(opaque)); + rightitem = PageGetItemIdCareful(state->rel, targetnext, rightpage, + P_FIRSTDATAKEY(opaque), sizeof(BTPageOpaqueData)); } else if (!P_ISLEAF(opaque) && nline >= OffsetNumberNext(P_FIRSTDATAKEY(opaque))) @@ -1470,8 +1400,9 @@ bt_right_page_check_scankey(BtreeCheckState *state) * Return first item after the internal page's "negative infinity" * item */ - rightitem = PageGetItemIdCareful(state, targetnext, rightpage, - OffsetNumberNext(P_FIRSTDATAKEY(opaque))); + rightitem = PageGetItemIdCareful(state->rel, targetnext, rightpage, + OffsetNumberNext(P_FIRSTDATAKEY(opaque)), + sizeof(BTPageOpaqueData)); } else { @@ -1743,8 +1674,8 @@ bt_downlink_missing_check(BtreeCheckState *state) RelationGetRelationName(state->rel)); level = topaque->btpo.level; - itemid = PageGetItemIdCareful(state, state->targetblock, state->target, - P_FIRSTDATAKEY(topaque)); + itemid = PageGetItemIdCareful(state->rel, state->targetblock, state->target, + P_FIRSTDATAKEY(topaque), sizeof(BTPageOpaqueData)); itup = (IndexTuple) PageGetItem(state->target, itemid); childblk = BTreeInnerTupleGetDownLink(itup); for (;;) @@ -1768,8 +1699,8 @@ bt_downlink_missing_check(BtreeCheckState *state) level - 1, copaque->btpo.level))); level = copaque->btpo.level; - itemid = PageGetItemIdCareful(state, childblk, child, - P_FIRSTDATAKEY(copaque)); + itemid = PageGetItemIdCareful(state->rel, childblk, child, + P_FIRSTDATAKEY(copaque), sizeof(BTPageOpaqueData)); itup = (IndexTuple) PageGetItem(child, itemid); childblk = BTreeInnerTupleGetDownLink(itup); /* Be slightly more pro-active in freeing this memory, just in case */ @@ -1818,7 +1749,7 @@ bt_downlink_missing_check(BtreeCheckState *state) */ if (P_ISHALFDEAD(copaque) && !P_RIGHTMOST(copaque)) { - itemid = PageGetItemIdCareful(state, childblk, child, P_HIKEY); + itemid = PageGetItemIdCareful(state->rel, childblk, child, P_HIKEY, sizeof(BTPageOpaqueData)); itup = (IndexTuple) PageGetItem(child, itemid); if (BTreeTupleGetTopParent(itup) == state->targetblock) return; @@ -2161,8 +2092,8 @@ invariant_l_offset(BtreeCheckState *state, BTScanInsert key, Assert(key->pivotsearch); /* Verify line pointer before checking tuple */ - itemid = PageGetItemIdCareful(state, state->targetblock, state->target, - upperbound); + itemid = PageGetItemIdCareful(state->rel, state->targetblock, state->target, + upperbound, sizeof(BTPageOpaqueData)); /* pg_upgrade'd indexes may legally have equal sibling tuples */ if (!key->heapkeyspace) return invariant_leq_offset(state, key, upperbound); @@ -2284,8 +2215,8 @@ invariant_l_nontarget_offset(BtreeCheckState *state, BTScanInsert key, Assert(key->pivotsearch); /* Verify line pointer before checking tuple */ - itemid = PageGetItemIdCareful(state, nontargetblock, nontarget, - upperbound); + itemid = PageGetItemIdCareful(state->rel, nontargetblock, nontarget, + upperbound, sizeof(BTPageOpaqueData)); cmp = _bt_compare(state->rel, key, nontarget, upperbound); /* pg_upgrade'd indexes may legally have equal sibling tuples */ @@ -2498,55 +2429,6 @@ bt_mkscankey_pivotsearch(Relation rel, IndexTuple itup) return skey; } -/* - * PageGetItemId() wrapper that validates returned line pointer. - * - * Buffer page/page item access macros generally trust that line pointers are - * not corrupt, which might cause problems for verification itself. For - * example, there is no bounds checking in PageGetItem(). Passing it a - * corrupt line pointer can cause it to return a tuple/pointer that is unsafe - * to dereference. - * - * Validating line pointers before tuples avoids undefined behavior and - * assertion failures with corrupt indexes, making the verification process - * more robust and predictable. - */ -static ItemId -PageGetItemIdCareful(BtreeCheckState *state, BlockNumber block, Page page, - OffsetNumber offset) -{ - ItemId itemid = PageGetItemId(page, offset); - - if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) > - BLCKSZ - sizeof(BTPageOpaqueData)) - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("line pointer points past end of tuple space in index \"%s\"", - RelationGetRelationName(state->rel)), - errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.", - block, offset, ItemIdGetOffset(itemid), - ItemIdGetLength(itemid), - ItemIdGetFlags(itemid)))); - - /* - * Verify that line pointer isn't LP_REDIRECT or LP_UNUSED, since nbtree - * never uses either. Verify that line pointer has storage, too, since - * even LP_DEAD items should within nbtree. - */ - if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) || - ItemIdGetLength(itemid) == 0) - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("invalid line pointer storage in index \"%s\"", - RelationGetRelationName(state->rel)), - errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.", - block, offset, ItemIdGetOffset(itemid), - ItemIdGetLength(itemid), - ItemIdGetFlags(itemid)))); - - return itemid; -} - /* * BTreeTupleGetHeapTID() wrapper that lets caller enforce that a heap TID must * be present in cases where that is mandatory. diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml index 627651d8d4..6a02e288b2 100644 --- a/doc/src/sgml/amcheck.sgml +++ b/doc/src/sgml/amcheck.sgml @@ -165,6 +165,25 @@ ORDER BY c.relpages DESC LIMIT 10; + + + + gist_index_parent_check(index regclass) returns void + + gist_index_parent_check + + + + + + gist_index_parent_check tests that its target GiST + has consistent parent-child tuples relations (no parent tuples + require tuple adjustement) and page graph respects balanced-tree + invariants (internal pages reference only leaf page or only internal + pages). + + + -- 2.17.1