From cc11b7e5d4cc950a0adfa83e98335e1252187ec2 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Tue, 18 Sep 2018 18:25:45 -0700 Subject: [PATCH v5 3/3] Allow nbtree to use ASC heap TID attribute order. When the macro BTREE_ASC_HEAP_TID is defined (uncommented), the patch will change the implementation to use ASC sort order rather than DESC sort order. This may be useful to reviewers. --- src/backend/access/nbtree/nbtinsert.c | 4 ++++ src/backend/access/nbtree/nbtsearch.c | 11 +++++++++++ src/backend/access/nbtree/nbtsort.c | 4 ++++ src/backend/access/nbtree/nbtutils.c | 12 ++++++++++++ src/backend/utils/sort/tuplesort.c | 10 ++++++++++ src/include/access/nbtree.h | 22 ++++++++++++++++++++++ 6 files changed, 63 insertions(+) diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 598e702bf1..507ed00373 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -2222,7 +2222,11 @@ _bt_perfect_firstdiff(Relation rel, Page page, OffsetNumber newitemoff, */ if (identical) { +#ifndef BTREE_ASC_HEAP_TID if (P_FIRSTDATAKEY(opaque) == newitemoff) +#else + if (maxoff < newitemoff) +#endif *secondmode = SPLIT_SINGLE_VALUE; else { diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index c229b7eed2..6c149113c8 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -604,8 +604,12 @@ _bt_tuple_compare(Relation rel, if (!heapTid) return 1; +#ifndef BTREE_ASC_HEAP_TID /* Deliberately invert the order, since TIDs "sort DESC" */ return ItemPointerCompare(heapTid, scantid); +#else + return ItemPointerCompare(scantid, heapTid); +#endif } /* @@ -1053,9 +1057,16 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) { scantid = &minscantid; +#ifndef BTREE_ASC_HEAP_TID /* Heap TID attribute uses DESC ordering */ ItemPointerSetBlockNumber(scantid, InvalidBlockNumber); ItemPointerSetOffsetNumber(scantid, InvalidOffsetNumber); +#else + /* Lowest possible block is 0 */ + ItemPointerSetBlockNumber(scantid, 0); + /* InvalidOffsetNumber less than any real offset */ + ItemPointerSetOffsetNumber(scantid, InvalidOffsetNumber); +#endif } /*---------- diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index e8f506cc09..1a62683ee8 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -1175,8 +1175,12 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2) */ if (compare == 0) { +#ifndef BTREE_ASC_HEAP_TID /* Deliberately invert the order, since TIDs "sort DESC" */ compare = ItemPointerCompare(&itup2->t_tid, &itup->t_tid); +#else + compare = ItemPointerCompare(&itup->t_tid, &itup2->t_tid); +#endif Assert(compare != 0); if (compare > 0) load1 = false; diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index f9f3ec7914..8c8fdd62f4 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -2242,7 +2242,14 @@ _bt_suffix_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright) */ pivotheaptid = (ItemPointer) ((char *) pivot + newsize - MAXALIGN(sizeof(ItemPointerData))); +#ifndef BTREE_ASC_HEAP_TID ItemPointerCopy(&lastleft->t_tid, pivotheaptid); +#else + /* Manufacture TID that's less than right TID, but only minimally */ + ItemPointerCopy(&firstright->t_tid, pivotheaptid); + ItemPointerSetOffsetNumber(pivotheaptid, + OffsetNumberPrev(ItemPointerGetOffsetNumber(pivotheaptid))); +#endif /* * Lehman and Yao require that the downlink to the right page, which is to @@ -2252,9 +2259,14 @@ _bt_suffix_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright) * split). */ +#ifndef BTREE_ASC_HEAP_TID /* Deliberately invert the order, since TIDs "sort DESC" */ Assert(ItemPointerCompare(&lastleft->t_tid, pivotheaptid) >= 0); Assert(ItemPointerCompare(&firstright->t_tid, pivotheaptid) < 0); +#else + Assert(ItemPointerCompare(pivotheaptid, &lastleft->t_tid) >= 0); + Assert(ItemPointerCompare(pivotheaptid, &firstright->t_tid) < 0); +#endif BTreeTupleSetNAtts(pivot, nkeyatts); BTreeTupleSetAltHeapTID(pivot); diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 5211cf5b98..1d9a2602d9 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -4066,17 +4066,27 @@ comparetup_index_btree(const SortTuple *a, const SortTuple *b, BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); +#ifndef BTREE_ASC_HEAP_TID /* Deliberately invert the order, since TIDs "sort DESC" */ if (blk1 != blk2) return (blk1 < blk2) ? 1 : -1; +#else + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; +#endif } { OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); +#ifndef BTREE_ASC_HEAP_TID /* Deliberately invert the order, since TIDs "sort DESC" */ if (pos1 != pos2) return (pos1 < pos2) ? 1 : -1; +#else + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; +#endif } return 0; diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 12f57773e7..c686cb7e47 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -117,6 +117,24 @@ typedef struct BTMetaPageData #define BTREE_VERSION 4 /* current version number */ #define BTREE_MIN_VERSION 4 /* minimal supported version number */ +/* + * Heap TID behaves as a final key value within nbtree as of + * BTREE_VERSION 4. This ensures that all entries keys are unique + * and relocatable. By default, heap TIDs are sorted in DESC sort + * order within nbtree indexes. ASC heap TID ordering may be + * useful during testing. + * + * DESC order was chosen because it allowed BTREE_VERSION 4 to + * maintain compatibility with unspecified BTREE_VERSION 2 + 3 + * behavior that dependency management nevertheless relied on. + * However, DESC order also seems like it might be slightly better + * on its own merits, since continually splitting the same leaf + * page may cut down on the total number of FPIs generated when + * continually inserting tuples with the same user-visible + * attribute values. +#define BTREE_ASC_HEAP_TID + */ + /* * Maximum size of a btree index entry, including its tuple header. * @@ -151,7 +169,11 @@ typedef struct BTMetaPageData #define BTREE_MIN_FILLFACTOR 10 #define BTREE_DEFAULT_FILLFACTOR 90 #define BTREE_NONLEAF_FILLFACTOR 70 +#ifndef BTREE_ASC_HEAP_TID #define BTREE_SINGLEVAL_FILLFACTOR 1 +#else +#define BTREE_SINGLEVAL_FILLFACTOR 99 +#endif /* * In general, the btree code tries to localize its knowledge about -- 2.17.1