From f9be0044ee6e35dd44bceca59d733ba8cdf5373e Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Mon, 17 Apr 2023 18:01:46 +0900 Subject: [PATCH v32 14/18] tidstore: Miscellaneous updates. comment updates, fix typos, etc. --- src/backend/access/common/tidstore.c | 78 +++++++++++-------- .../modules/test_tidstore/test_tidstore.c | 1 + 2 files changed, 47 insertions(+), 32 deletions(-) diff --git a/src/backend/access/common/tidstore.c b/src/backend/access/common/tidstore.c index 15b77b5bcb..9360520482 100644 --- a/src/backend/access/common/tidstore.c +++ b/src/backend/access/common/tidstore.c @@ -3,18 +3,19 @@ * tidstore.c * Tid (ItemPointerData) storage implementation. * - * This module provides a in-memory data structure to store Tids (ItemPointer). - * Internally, a tid is encoded as a pair of 64-bit key and 64-bit value, and - * stored in the radix tree. + * TidStore is a in-memory data structure to store tids (ItemPointerData). + * Internally, a tid is encoded as a pair of 64-bit key and 64-bit value, + * and stored in the radix tree. * * TidStore can be shared among parallel worker processes by passing DSA area * to TidStoreCreate(). Other backends can attach to the shared TidStore by * TidStoreAttach(). * - * Regarding the concurrency, it basically relies on the concurrency support in - * the radix tree, but we acquires the lock on a TidStore in some cases, for - * example, when to reset the store and when to access the number tids in the - * store (num_tids). + * Regarding the concurrency support, we use a single LWLock for the TidStore. + * The TidStore is exclusively locked when inserting encoded tids to the + * radix tree or when resetting itself. When searching on the TidStore or + * doing the iteration, it is not locked but the underlying radix tree is + * locked in shared mode. * * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -34,16 +35,18 @@ #include "utils/memutils.h" /* - * For encoding purposes, tids are represented as a pair of 64-bit key and - * 64-bit value. First, we construct 64-bit unsigned integer by combining - * the block number and the offset number. The number of bits used for the - * offset number is specified by max_offsets in tidstore_create(). We are - * frugal with the bits, because smaller keys could help keeping the radix - * tree shallow. + * For encoding purposes, a tid is represented as a pair of 64-bit key and + * 64-bit value. * - * For example, a tid of heap with 8kB blocks uses the lowest 9 bits for - * the offset number and uses the next 32 bits for the block number. That - * is, only 41 bits are used: + * First, we construct a 64-bit unsigned integer by combining the block + * number and the offset number. The number of bits used for the offset number + * is specified by max_off in TidStoreCreate(). We are frugal with the bits, + * because smaller keys could help keeping the radix tree shallow. + * + * For example, a tid of heap on a 8kB block uses the lowest 9 bits for + * the offset number and uses the next 32 bits for the block number. 9 bits + * are enough for the offset number, because MaxHeapTuplesPerPage < 2^9 + * on 8kB blocks. That is, only 41 bits are used: * * uuuuuuuY YYYYYYYY YYYYYYYY YYYYYYYY YYYYYYYX XXXXXXXX * @@ -52,25 +55,27 @@ * u = unused bit * (high on the left, low on the right) * - * 9 bits are enough for the offset number, because MaxHeapTuplesPerPage < 2^9 - * on 8kB blocks. - * - * The 64-bit value is the bitmap representation of the lowest 6 bits - * (TIDSTORE_VALUE_NBITS) of the integer, and the rest 35 bits are used - * as the key: + * Then, 64-bit value is the bitmap representation of the lowest 6 bits + * (LOWER_OFFSET_NBITS) of the integer, and 64-bit key consists of the + * upper 3 bits of the offset number and the block number, 35 bits in + * total: * * uuuuuuuY YYYYYYYY YYYYYYYY YYYYYYYY YYYYYYYX XXXXXXXX * |----| value - * |---------------------------------------------| key + * |--------------------------------------| key * * The maximum height of the radix tree is 5 in this case. + * + * If the number of bits required for offset numbers fits in LOWER_OFFSET_NBITS, + * 64-bit value is the bitmap representation of the offset number, and the + * 64-bit key is the block number. */ typedef uint64 tidkey; typedef uint64 offsetbm; #define LOWER_OFFSET_NBITS 6 /* log(sizeof(offsetbm), 2) */ #define LOWER_OFFSET_MASK ((1 << LOWER_OFFSET_NBITS) - 1) -/* A magic value used to identify our TidStores. */ +/* A magic value used to identify our TidStore. */ #define TIDSTORE_MAGIC 0x826f6a10 #define RT_PREFIX local_rt @@ -152,8 +157,10 @@ typedef struct TidStoreIter tidkey next_tidkey; offsetbm next_off_bitmap; - /* output for the caller */ - TidStoreIterResult result; + /* + * output for the caller. Must be last because variable-size. + */ + TidStoreIterResult output; } TidStoreIter; static void iter_decode_key_off(TidStoreIter *iter, tidkey key, offsetbm off_bitmap); @@ -205,7 +212,7 @@ TidStoreCreate(size_t max_bytes, int max_off, dsa_area *area) dp = dsa_allocate0(area, sizeof(TidStoreControl)); ts->control = (TidStoreControl *) dsa_get_address(area, dp); - ts->control->max_bytes = (uint64) (max_bytes * ratio); + ts->control->max_bytes = (size_t) (max_bytes * ratio); ts->area = area; ts->control->magic = TIDSTORE_MAGIC; @@ -353,7 +360,11 @@ TidStoreReset(TidStore *ts) } } -/* Add Tids on a block to TidStore */ +/* + * Set the given tids on the blkno to TidStore. + * + * NB: the offset numbers in offsets must be sorted in ascending order. + */ void TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets, int num_offsets) @@ -564,7 +575,7 @@ TidStoreEndIterate(TidStoreIter *iter) int64 TidStoreNumTids(TidStore *ts) { - uint64 num_tids; + int64 num_tids; Assert(!TidStoreIsShared(ts) || ts->control->magic == TIDSTORE_MAGIC); @@ -624,11 +635,14 @@ TidStoreGetHandle(TidStore *ts) return ts->control->handle; } -/* Extract tids from the given key-value pair */ +/* + * Decode the key and offset bitmap to tids and store them to the iteration + * result. + */ static void iter_decode_key_off(TidStoreIter *iter, tidkey key, offsetbm off_bitmap) { - TidStoreIterResult *result = (&iter->result); + TidStoreIterResult *output = (&iter->output); while (off_bitmap) { @@ -661,7 +675,7 @@ key_get_blkno(TidStore *ts, tidkey key) static inline tidkey encode_tid(TidStore *ts, ItemPointer tid, offsetbm *off_bit) { - uint32 offset = ItemPointerGetOffsetNumber(tid); + OffsetNumber offset = ItemPointerGetOffsetNumber(tid); BlockNumber block = ItemPointerGetBlockNumber(tid); return encode_blk_off(ts, block, offset, off_bit); diff --git a/src/test/modules/test_tidstore/test_tidstore.c b/src/test/modules/test_tidstore/test_tidstore.c index 12d3027624..8659e6780e 100644 --- a/src/test/modules/test_tidstore/test_tidstore.c +++ b/src/test/modules/test_tidstore/test_tidstore.c @@ -222,6 +222,7 @@ test_tidstore(PG_FUNCTION_ARGS) elog(NOTICE, "testing basic operations"); test_basic(MaxHeapTuplesPerPage); test_basic(10); + test_basic(MaxHeapTuplesPerPage * 2); PG_RETURN_VOID(); } -- 2.31.1