From 3a783b0e62c6f93eba808e6a3c6be3c479484a5d Mon Sep 17 00:00:00 2001 From: kommih Date: Fri, 28 Sep 2018 11:25:07 +1000 Subject: [PATCH 3/3] validate index scan hook addition Slotify the validate index is having problems as it tries to access the buffer stored in the scandesc, so made a callback to get the control from back. This may needs further visit as the callback may need further abstraction --- src/backend/access/heap/heapam_handler.c | 243 ++++++++++++++++- src/backend/catalog/index.c | 318 +++-------------------- src/include/access/tableam.h | 27 ++ src/include/catalog/index.h | 48 ++++ 4 files changed, 352 insertions(+), 284 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 2d5074734b..ee8a658c6d 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -1120,6 +1120,246 @@ IndexBuildHeapRangeScan(Relation heapRelation, return reltuples; } +/* + * validate_index_heapscan - second table scan for concurrent index build + * + * This has much code in common with IndexBuildHeapScan, but it's enough + * different that it seems cleaner to have two routines not one. + */ +static uint64 +validate_index_heapscan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + Snapshot snapshot, + Tuplesortstate *tuplesort, + IndexValidateCallback callback, + void *callback_state) +{ + TableScanDesc sscan; + HeapScanDesc scan; + HeapTuple heapTuple; + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + ExprState *predicate; + TupleTableSlot *slot; + EState *estate; + ExprContext *econtext; + BlockNumber root_blkno = InvalidBlockNumber; + OffsetNumber root_offsets[MaxHeapTuplesPerPage]; + bool in_index[MaxHeapTuplesPerPage]; + + /* state variables for the merge */ + ItemPointer indexcursor = NULL; + ItemPointerData decoded; + bool tuplesort_empty = false; + uint64 nhtups = 0; + + /* + * sanity checks + */ + Assert(OidIsValid(indexRelation->rd_rel->relam)); + + /* + * Need an EState for evaluation of index expressions and partial-index + * predicates. Also a slot to hold the current tuple. + */ + estate = CreateExecutorState(); + econtext = GetPerTupleExprContext(estate); + slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation), + &TTSOpsHeapTuple); + + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; + + /* Set up execution state for predicate, if any. */ + predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate); + + /* + * Prepare for scan of the base relation. We need just those tuples + * satisfying the passed-in reference snapshot. We must disable syncscan + * here, because it's critical that we read from block zero forward to + * match the sorted TIDs. + */ + sscan = heap_beginscan(heapRelation, /* relation */ + snapshot, /* snapshot */ + 0, /* number of keys */ + NULL, /* scan key */ + NULL, + true, /* buffer access strategy OK */ + false, /* syncscan not OK */ + true, + false, + false, + false); + + scan = (HeapScanDesc)sscan; + + /* + * Scan all tuples matching the snapshot. + */ + while ((heapTuple = heap_getnext(sscan, ForwardScanDirection)) != NULL) + { + ItemPointer heapcursor = &heapTuple->t_self; + ItemPointerData rootTuple; + OffsetNumber root_offnum; + + CHECK_FOR_INTERRUPTS(); + + nhtups += 1; + + /* + * As commented in IndexBuildHeapScan, we should index heap-only + * tuples under the TIDs of their root tuples; so when we advance onto + * a new heap page, build a map of root item offsets on the page. + * + * This complicates merging against the tuplesort output: we will + * visit the live tuples in order by their offsets, but the root + * offsets that we need to compare against the index contents might be + * ordered differently. So we might have to "look back" within the + * tuplesort output, but only within the current page. We handle that + * by keeping a bool array in_index[] showing all the + * already-passed-over tuplesort output TIDs of the current page. We + * clear that array here, when advancing onto a new heap page. + */ + if (scan->rs_cblock != root_blkno) + { + Page page = BufferGetPage(scan->rs_cbuf); + + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + heap_get_root_tuples(page, root_offsets); + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + + memset(in_index, 0, sizeof(in_index)); + + root_blkno = scan->rs_cblock; + } + + /* Convert actual tuple TID to root TID */ + rootTuple = *heapcursor; + root_offnum = ItemPointerGetOffsetNumber(heapcursor); + + if (HeapTupleIsHeapOnly(heapTuple)) + { + root_offnum = root_offsets[root_offnum - 1]; + if (!OffsetNumberIsValid(root_offnum)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"", + ItemPointerGetBlockNumber(heapcursor), + ItemPointerGetOffsetNumber(heapcursor), + RelationGetRelationName(heapRelation)))); + ItemPointerSetOffsetNumber(&rootTuple, root_offnum); + } + + /* + * "merge" by skipping through the index tuples until we find or pass + * the current root tuple. + */ + while (!tuplesort_empty && + (!indexcursor || + ItemPointerCompare(indexcursor, &rootTuple) < 0)) + { + Datum ts_val; + bool ts_isnull; + + if (indexcursor) + { + /* + * Remember index items seen earlier on the current heap page + */ + if (ItemPointerGetBlockNumber(indexcursor) == root_blkno) + in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true; + } + + tuplesort_empty = !tuplesort_getdatum(tuplesort, true, + &ts_val, &ts_isnull, NULL); + Assert(tuplesort_empty || !ts_isnull); + if (!tuplesort_empty) + { + itemptr_decode(&decoded, DatumGetInt64(ts_val)); + indexcursor = &decoded; + + /* If int8 is pass-by-ref, free (encoded) TID Datum memory */ +#ifndef USE_FLOAT8_BYVAL + pfree(DatumGetPointer(ts_val)); +#endif + } + else + { + /* Be tidy */ + indexcursor = NULL; + } + } + + /* + * If the tuplesort has overshot *and* we didn't see a match earlier, + * then this tuple is missing from the index, so insert it. + */ + if ((tuplesort_empty || + ItemPointerCompare(indexcursor, &rootTuple) > 0) && + !in_index[root_offnum - 1]) + { + MemoryContextReset(econtext->ecxt_per_tuple_memory); + + /* Set up for predicate or expression evaluation */ + ExecStoreHeapTuple(heapTuple, slot, false); + + /* + * In a partial index, discard tuples that don't satisfy the + * predicate. + */ + if (predicate != NULL) + { + if (!ExecQual(predicate, econtext)) + continue; + } + + /* + * For the current heap tuple, extract all the attributes we use + * in this index, and note which are null. This also performs + * evaluation of any expressions needed. + */ + FormIndexDatum(indexInfo, + slot, + estate, + values, + isnull); + + /* + * You'd think we should go ahead and build the index tuple here, + * but some index AMs want to do further processing on the data + * first. So pass the values[] and isnull[] arrays, instead. + */ + + /* + * If the tuple is already committed dead, you might think we + * could suppress uniqueness checking, but this is no longer true + * in the presence of HOT, because the insert is actually a proxy + * for a uniqueness check on the whole HOT-chain. That is, the + * tuple we have here could be dead because it was already + * HOT-updated, and if so the updating transaction will not have + * thought it should insert index entries. The index AM will + * check the whole HOT-chain and correctly detect a conflict if + * there is one. + */ + + callback(indexRelation, values, isnull, &rootTuple, heapRelation, + indexInfo, callback_state); + } + } + + table_endscan(sscan); + + ExecDropSingleTupleTableSlot(slot); + + FreeExecutorState(estate); + + /* These may have been pointing to the now-gone estate */ + indexInfo->ii_ExpressionsState = NIL; + indexInfo->ii_PredicateState = NULL; + + return nhtups; +} static bool heapam_scan_bitmap_pagescan(TableScanDesc sscan, @@ -1775,7 +2015,8 @@ static const TableAmRoutine heapam_methods = { .reset_index_fetch = heapam_reset_index_fetch, .end_index_fetch = heapam_end_index_fetch, - .index_build_range_scan = IndexBuildHeapRangeScan + .index_build_range_scan = IndexBuildHeapRangeScan, + .validate_index_scan = validate_index_heapscan }; const TableAmRoutine * diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 2fe66972a1..a0096e60ca 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -81,7 +81,7 @@ /* Potentially set by pg_upgrade_support functions */ Oid binary_upgrade_next_index_pg_class_oid = InvalidOid; -/* state info for validate_index bulkdelete callback */ +/* state info for validate_index bulkdelete callback */ typedef struct { Tuplesortstate *tuplesort; /* for sorting the index TIDs */ @@ -134,11 +134,13 @@ static void IndexCheckExclusion(Relation heapRelation, static inline int64 itemptr_encode(ItemPointer itemptr); static inline void itemptr_decode(ItemPointer itemptr, int64 encoded); static bool validate_index_callback(ItemPointer itemptr, void *opaque); -static void validate_index_heapscan(Relation heapRelation, - Relation indexRelation, - IndexInfo *indexInfo, - Snapshot snapshot, - v_i_state *state); +static void validate_index_scan_callbck(Relation indexRelation, + Datum *values, + bool *isnull, + ItemPointer rootTuple, + Relation heapRelation, + IndexInfo *indexInfo, + void *opaque); static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid); static void SetReindexProcessing(Oid heapOid, Oid indexOid); static void ResetReindexProcessing(void); @@ -2638,11 +2640,13 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot) /* * Now scan the heap and "merge" it with the index */ - validate_index_heapscan(heapRelation, - indexRelation, - indexInfo, - snapshot, - &state); + state.htups = table_validate_index(heapRelation, + indexRelation, + indexInfo, + snapshot, + state.tuplesort, + validate_index_scan_callbck, + &state); /* Done with tuplesort object */ tuplesort_end(state.tuplesort); @@ -2662,45 +2666,6 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot) heap_close(heapRelation, NoLock); } -/* - * itemptr_encode - Encode ItemPointer as int64/int8 - * - * This representation must produce values encoded as int64 that sort in the - * same order as their corresponding original TID values would (using the - * default int8 opclass to produce a result equivalent to the default TID - * opclass). - * - * As noted in validate_index(), this can be significantly faster. - */ -static inline int64 -itemptr_encode(ItemPointer itemptr) -{ - BlockNumber block = ItemPointerGetBlockNumber(itemptr); - OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr); - int64 encoded; - - /* - * Use the 16 least significant bits for the offset. 32 adjacent bits are - * used for the block number. Since remaining bits are unused, there - * cannot be negative encoded values (We assume a two's complement - * representation). - */ - encoded = ((uint64) block << 16) | (uint16) offset; - - return encoded; -} - -/* - * itemptr_decode - Decode int64/int8 representation back to ItemPointer - */ -static inline void -itemptr_decode(ItemPointer itemptr, int64 encoded) -{ - BlockNumber block = (BlockNumber) (encoded >> 16); - OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF); - - ItemPointerSet(itemptr, block, offset); -} /* * validate_index_callback - bulkdelete callback to collect the index TIDs @@ -2717,242 +2682,29 @@ validate_index_callback(ItemPointer itemptr, void *opaque) } /* - * validate_index_heapscan - second table scan for concurrent index build - * - * This has much code in common with IndexBuildHeapScan, but it's enough - * different that it seems cleaner to have two routines not one. + * validate_index_scan_callbck - callback to insert into the index */ static void -validate_index_heapscan(Relation heapRelation, - Relation indexRelation, - IndexInfo *indexInfo, - Snapshot snapshot, - v_i_state *state) +validate_index_scan_callbck(Relation indexRelation, + Datum *values, + bool *isnull, + ItemPointer rootTuple, + Relation heapRelation, + IndexInfo *indexInfo, + void *opaque) { - TableScanDesc sscan; - HeapScanDesc scan; - HeapTuple heapTuple; - Datum values[INDEX_MAX_KEYS]; - bool isnull[INDEX_MAX_KEYS]; - ExprState *predicate; - TupleTableSlot *slot; - EState *estate; - ExprContext *econtext; - BlockNumber root_blkno = InvalidBlockNumber; - OffsetNumber root_offsets[MaxHeapTuplesPerPage]; - bool in_index[MaxHeapTuplesPerPage]; - - /* state variables for the merge */ - ItemPointer indexcursor = NULL; - ItemPointerData decoded; - bool tuplesort_empty = false; - - /* - * sanity checks - */ - Assert(OidIsValid(indexRelation->rd_rel->relam)); - - /* - * Need an EState for evaluation of index expressions and partial-index - * predicates. Also a slot to hold the current tuple. - */ - estate = CreateExecutorState(); - econtext = GetPerTupleExprContext(estate); - slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation), - &TTSOpsHeapTuple); - - /* Arrange for econtext's scan tuple to be the tuple under test */ - econtext->ecxt_scantuple = slot; - - /* Set up execution state for predicate, if any. */ - predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate); - - /* - * Prepare for scan of the base relation. We need just those tuples - * satisfying the passed-in reference snapshot. We must disable syncscan - * here, because it's critical that we read from block zero forward to - * match the sorted TIDs. - */ - sscan = table_beginscan_strat(heapRelation, /* relation */ - snapshot, /* snapshot */ - 0, /* number of keys */ - NULL, /* scan key */ - true, /* buffer access strategy OK */ - false); /* syncscan not OK */ - scan = (HeapScanDesc) sscan; - - /* - * Scan all tuples matching the snapshot. - */ - // PBORKED: slotify - while ((heapTuple = heap_scan_getnext(sscan, ForwardScanDirection)) != NULL) - { - ItemPointer heapcursor = &heapTuple->t_self; - ItemPointerData rootTuple; - OffsetNumber root_offnum; - - CHECK_FOR_INTERRUPTS(); - - state->htups += 1; - - /* - * As commented in IndexBuildHeapScan, we should index heap-only - * tuples under the TIDs of their root tuples; so when we advance onto - * a new heap page, build a map of root item offsets on the page. - * - * This complicates merging against the tuplesort output: we will - * visit the live tuples in order by their offsets, but the root - * offsets that we need to compare against the index contents might be - * ordered differently. So we might have to "look back" within the - * tuplesort output, but only within the current page. We handle that - * by keeping a bool array in_index[] showing all the - * already-passed-over tuplesort output TIDs of the current page. We - * clear that array here, when advancing onto a new heap page. - */ - if (scan->rs_cblock != root_blkno) - { - Page page = BufferGetPage(scan->rs_cbuf); - - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); - heap_get_root_tuples(page, root_offsets); - LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); - - memset(in_index, 0, sizeof(in_index)); - - root_blkno = scan->rs_cblock; - } - - /* Convert actual tuple TID to root TID */ - rootTuple = *heapcursor; - root_offnum = ItemPointerGetOffsetNumber(heapcursor); - - if (HeapTupleIsHeapOnly(heapTuple)) - { - root_offnum = root_offsets[root_offnum - 1]; - if (!OffsetNumberIsValid(root_offnum)) - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"", - ItemPointerGetBlockNumber(heapcursor), - ItemPointerGetOffsetNumber(heapcursor), - RelationGetRelationName(heapRelation)))); - ItemPointerSetOffsetNumber(&rootTuple, root_offnum); - } - - /* - * "merge" by skipping through the index tuples until we find or pass - * the current root tuple. - */ - while (!tuplesort_empty && - (!indexcursor || - ItemPointerCompare(indexcursor, &rootTuple) < 0)) - { - Datum ts_val; - bool ts_isnull; - - if (indexcursor) - { - /* - * Remember index items seen earlier on the current heap page - */ - if (ItemPointerGetBlockNumber(indexcursor) == root_blkno) - in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true; - } - - tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true, - &ts_val, &ts_isnull, NULL); - Assert(tuplesort_empty || !ts_isnull); - if (!tuplesort_empty) - { - itemptr_decode(&decoded, DatumGetInt64(ts_val)); - indexcursor = &decoded; - - /* If int8 is pass-by-ref, free (encoded) TID Datum memory */ -#ifndef USE_FLOAT8_BYVAL - pfree(DatumGetPointer(ts_val)); -#endif - } - else - { - /* Be tidy */ - indexcursor = NULL; - } - } - - /* - * If the tuplesort has overshot *and* we didn't see a match earlier, - * then this tuple is missing from the index, so insert it. - */ - if ((tuplesort_empty || - ItemPointerCompare(indexcursor, &rootTuple) > 0) && - !in_index[root_offnum - 1]) - { - MemoryContextReset(econtext->ecxt_per_tuple_memory); - - /* Set up for predicate or expression evaluation */ - ExecStoreHeapTuple(heapTuple, slot, false); - - /* - * In a partial index, discard tuples that don't satisfy the - * predicate. - */ - if (predicate != NULL) - { - if (!ExecQual(predicate, econtext)) - continue; - } - - /* - * For the current heap tuple, extract all the attributes we use - * in this index, and note which are null. This also performs - * evaluation of any expressions needed. - */ - FormIndexDatum(indexInfo, - slot, - estate, - values, - isnull); - - /* - * You'd think we should go ahead and build the index tuple here, - * but some index AMs want to do further processing on the data - * first. So pass the values[] and isnull[] arrays, instead. - */ - - /* - * If the tuple is already committed dead, you might think we - * could suppress uniqueness checking, but this is no longer true - * in the presence of HOT, because the insert is actually a proxy - * for a uniqueness check on the whole HOT-chain. That is, the - * tuple we have here could be dead because it was already - * HOT-updated, and if so the updating transaction will not have - * thought it should insert index entries. The index AM will - * check the whole HOT-chain and correctly detect a conflict if - * there is one. - */ - - index_insert(indexRelation, - values, - isnull, - &rootTuple, - heapRelation, - indexInfo->ii_Unique ? - UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, - indexInfo); - - state->tups_inserted += 1; - } - } - - table_endscan(sscan); - - ExecDropSingleTupleTableSlot(slot); - - FreeExecutorState(estate); - - /* These may have been pointing to the now-gone estate */ - indexInfo->ii_ExpressionsState = NIL; - indexInfo->ii_PredicateState = NULL; + v_i_state *state = (v_i_state *)opaque; + + index_insert(indexRelation, + values, + isnull, + rootTuple, + heapRelation, + indexInfo->ii_Unique ? + UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, + indexInfo); + + state->tups_inserted += 1; } diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 691f687ade..27bf57a486 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -173,6 +173,14 @@ typedef double (*IndexBuildRangeScan_function)(Relation heapRelation, void *callback_state, TableScanDesc scan); +typedef uint64 (*ValidateIndexscan_function)(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + Snapshot snapshot, + Tuplesortstate *tuplesort, + IndexValidateCallback callback, + void *callback_state); + typedef bool (*BitmapPagescan_function)(TableScanDesc scan, TBMIterateResult *tbmres); @@ -236,6 +244,7 @@ typedef struct TableAmRoutine IndexBuildRangeScan_function index_build_range_scan; + ValidateIndexscan_function validate_index_scan; } TableAmRoutine; static inline const TupleTableSlotOps* @@ -691,6 +700,24 @@ table_index_build_range_scan(Relation heapRelation, scan); } +static inline uint64 +table_validate_index(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + Snapshot snapshot, + Tuplesortstate *tuplesort, + IndexValidateCallback callback, + void *callback_state) +{ + return heapRelation->rd_tableamroutine->validate_index_scan(heapRelation, + indexRelation, + indexInfo, + snapshot, + tuplesort, + callback, + callback_state); +} + extern BlockNumber table_parallelscan_nextpage(TableScanDesc scan); extern void table_parallelscan_startblock_init(TableScanDesc scan); extern Size table_parallelscan_estimate(Snapshot snapshot); diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 376907b616..874e956c8e 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -28,6 +28,15 @@ typedef void (*IndexBuildCallback) (Relation index, bool tupleIsAlive, void *state); +/* Typedef for callback function for table_validate_index_scan */ +typedef void (*IndexValidateCallback) (Relation indexRelation, + Datum *values, + bool *isnull, + ItemPointer heap_t_ctid, + Relation heapRelation, + IndexInfo *indexInfo, + void *state); + /* Action code for index_set_state_flags */ typedef enum { @@ -37,6 +46,45 @@ typedef enum INDEX_DROP_SET_DEAD } IndexStateFlagsAction; +/* + * itemptr_encode - Encode ItemPointer as int64/int8 + * + * This representation must produce values encoded as int64 that sort in the + * same order as their corresponding original TID values would (using the + * default int8 opclass to produce a result equivalent to the default TID + * opclass). + * + * As noted in validate_index(), this can be significantly faster. + */ +static inline int64 +itemptr_encode(ItemPointer itemptr) +{ + BlockNumber block = ItemPointerGetBlockNumber(itemptr); + OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr); + int64 encoded; + + /* + * Use the 16 least significant bits for the offset. 32 adjacent bits are + * used for the block number. Since remaining bits are unused, there + * cannot be negative encoded values (We assume a two's complement + * representation). + */ + encoded = ((uint64) block << 16) | (uint16) offset; + + return encoded; +} + +/* + * itemptr_decode - Decode int64/int8 representation back to ItemPointer + */ +static inline void +itemptr_decode(ItemPointer itemptr, int64 encoded) +{ + BlockNumber block = (BlockNumber) (encoded >> 16); + OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF); + + ItemPointerSet(itemptr, block, offset); +} extern void index_check_primary_key(Relation heapRel, IndexInfo *indexInfo, -- 2.18.0.windows.1