From c45d6550629bfec7e5f263f484c29b1571aba7ea Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Sun, 19 Sep 2021 16:14:43 -0700 Subject: [PATCH v1] Fix "single value strategy" index deletion bug. --- src/include/access/nbtree.h | 2 +- src/backend/access/nbtree/nbtdedup.c | 31 ++++++++++++++------------- src/backend/access/nbtree/nbtinsert.c | 2 +- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 42c66fac5..16480f93f 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -1155,7 +1155,7 @@ extern void _bt_parallel_advance_array_keys(IndexScanDesc scan); */ extern void _bt_dedup_pass(Relation rel, Buffer buf, Relation heapRel, IndexTuple newitem, Size newitemsz, - bool checkingunique); + bool bottomupfail); extern bool _bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel, Size newitemsz); extern void _bt_dedup_start_pending(BTDedupState state, IndexTuple base, diff --git a/src/backend/access/nbtree/nbtdedup.c b/src/backend/access/nbtree/nbtdedup.c index 271994b08..b6e890816 100644 --- a/src/backend/access/nbtree/nbtdedup.c +++ b/src/backend/access/nbtree/nbtdedup.c @@ -34,14 +34,18 @@ static bool _bt_posting_valid(IndexTuple posting); * * The general approach taken here is to perform as much deduplication as * possible to free as much space as possible. Note, however, that "single - * value" strategy is sometimes used for !checkingunique callers, in which - * case deduplication will leave a few tuples untouched at the end of the - * page. The general idea is to prepare the page for an anticipated page - * split that uses nbtsplitloc.c's "single value" strategy to determine a - * split point. (There is no reason to deduplicate items that will end up on - * the right half of the page after the anticipated page split; better to - * handle those if and when the anticipated right half page gets its own - * deduplication pass, following further inserts of duplicates.) + * value" strategy is used for !bottomupfail callers when the page is full of + * tuples of a single value. This strategy makes deduplication leave behind a + * few untouched tuples at the end of the page, preparing the page for an + * anticipated page split that uses nbtsplitloc.c's own single value strategy. + * We delay merging the untouched tuples until _after_ the page splits. + * + * We only consider applying single value strategy when a page split already + * appears to be inevitable, which is why we exclude !bottomupfail calls. + * _bt_bottomupdel_pass() expects to be able to trigger "version-orientated" + * deduplication passes by reporting "failure" to its caller. This is usually + * just a failure to free a significant amount of free space all together at + * once, which is no reason to give up and split the page. * * The page will have to be split if we cannot successfully free at least * newitemsz (we also need space for newitem's line pointer, which isn't @@ -52,7 +56,7 @@ static bool _bt_posting_valid(IndexTuple posting); */ void _bt_dedup_pass(Relation rel, Buffer buf, Relation heapRel, IndexTuple newitem, - Size newitemsz, bool checkingunique) + Size newitemsz, bool bottomupfail) { OffsetNumber offnum, minoff, @@ -98,7 +102,7 @@ _bt_dedup_pass(Relation rel, Buffer buf, Relation heapRel, IndexTuple newitem, maxoff = PageGetMaxOffsetNumber(page); /* Determine if "single value" strategy should be used */ - if (!checkingunique) + if (!bottomupfail) singlevalstrat = _bt_do_singleval(rel, page, state, minoff, newitem); /* @@ -767,11 +771,8 @@ _bt_bottomupdel_finish_pending(Page page, BTDedupState state, * * Note: We deliberately don't bother checking if the high key is a distinct * value (prior to the TID tiebreaker column) before proceeding, unlike - * nbtsplitloc.c. Its single value strategy only gets applied on the - * rightmost page of duplicates of the same value (other leaf pages full of - * duplicates will get a simple 50:50 page split instead of splitting towards - * the end of the page). There is little point in making the same distinction - * here. + * nbtsplitloc.c. It seems better to err in the direction of not applying the + * strategy. */ static bool _bt_do_singleval(Relation rel, Page page, BTDedupState state, diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 6ac205c98..7355e1dba 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -2748,7 +2748,7 @@ _bt_delete_or_dedup_one_page(Relation rel, Relation heapRel, /* Perform deduplication pass (when enabled and index-is-allequalimage) */ if (BTGetDeduplicateItems(rel) && itup_key->allequalimage) _bt_dedup_pass(rel, buffer, heapRel, insertstate->itup, - insertstate->itemsz, checkingunique); + insertstate->itemsz, (indexUnchanged || uniquedup)); } /* -- 2.27.0