From 88c6a83543ff0ba1a788217983543163169f7432 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Thu, 21 Mar 2024 17:11:16 +0900 Subject: [PATCH v77 3/3] PoC: Make shared TidStore create its own DSA area. --- src/backend/access/common/tidstore.c | 85 +++++++++++++++---- src/backend/access/heap/vacuumlazy.c | 5 +- src/backend/commands/vacuumparallel.c | 38 +++------ src/include/access/tidstore.h | 7 +- .../modules/test_tidstore/test_tidstore.c | 14 +-- 5 files changed, 90 insertions(+), 59 deletions(-) diff --git a/src/backend/access/common/tidstore.c b/src/backend/access/common/tidstore.c index 745393806d..8f5df7d89f 100644 --- a/src/backend/access/common/tidstore.c +++ b/src/backend/access/common/tidstore.c @@ -7,9 +7,9 @@ * Internally it uses a radix tree as the storage for TIDs. The key is the * BlockNumber and the value is a bitmap of offsets, BlocktableEntry. * - * TidStore can be shared among parallel worker processes by passing DSA area - * to TidStoreCreate(). Other backends can attach to the shared TidStore by - * TidStoreAttach(). + * TidStore can be shared among parallel worker processes by using + * TidStoreCreateShared(). Other backends can attach to the shared TidStore + * by TidStoreAttach(). * * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -105,9 +105,25 @@ struct TidStoreIter TidStoreIterResult output; }; +static TidStore * tidstore_create_internal(size_t max_bytes, bool shared, + int tranche_id); static void tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno, BlocktableEntry *page); +/* Public APIs to create local or shared TidStore */ + +TidStore * +TidStoreCreateLocal(size_t max_bytes) +{ + return tidstore_create_internal(max_bytes, false, 0); +} + +TidStore * +TidStoreCreateShared(size_t max_bytes, int tranche_id) +{ + return tidstore_create_internal(max_bytes, true, tranche_id); +} + /* * Create a TidStore. The TidStore will live in the memory context that is * CurrentMemoryContext at the time of this call. The TID storage, backed @@ -118,8 +134,8 @@ static void tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno, * * The returned object is allocated in backend-local memory. */ -TidStore * -TidStoreCreate(size_t max_bytes, dsa_area *area, int tranche_id) +static TidStore * +tidstore_create_internal(size_t max_bytes, bool shared, int tranche_id) { TidStore *ts; size_t initBlockSize = ALLOCSET_DEFAULT_INITSIZE; @@ -143,8 +159,27 @@ TidStoreCreate(size_t max_bytes, dsa_area *area, int tranche_id) initBlockSize, maxBlockSize); - if (area != NULL) + if (shared) { + dsa_area *area; + size_t dsa_init_size = DSA_INITIAL_SEGMENT_SIZE; + size_t dsa_max_size = DSA_MAX_SEGMENT_SIZE; + + /* + * Choose the DSA initial and max segment sizes to be no longer than + * 1/16 and 1/8 of max_bytes, respectively. + */ + while (16 * dsa_init_size > max_bytes * 1024L) + dsa_init_size >>= 1; + while (8 * dsa_max_size > max_bytes * 1024L) + dsa_max_size >>= 1; + + if (dsa_init_size < DSA_MIN_SEGMENT_SIZE) + dsa_init_size = DSA_MIN_SEGMENT_SIZE; + if (dsa_max_size < DSA_MAX_SEGMENT_SIZE) + dsa_max_size = DSA_MAX_SEGMENT_SIZE; + + area = dsa_create_ext(tranche_id, dsa_init_size, dsa_max_size); ts->tree.shared = shared_ts_create(ts->rt_context, area, tranche_id); ts->area = area; @@ -156,20 +191,25 @@ TidStoreCreate(size_t max_bytes, dsa_area *area, int tranche_id) } /* - * Attach to the shared TidStore using the given handle. The returned object - * is allocated in backend-local memory using the CurrentMemoryContext. + * Attach to the shared TidStore. 'area_handle' is the DSA handle where + * the TidStore is created. 'handle' is the dsa_pointer returned by + * TidStoreGetHandle(). The returned object is allocated in backend-local + * memory using the CurrentMemoryContext. */ TidStore * -TidStoreAttach(dsa_area *area, dsa_pointer handle) +TidStoreAttach(dsa_handle area_handle, dsa_pointer handle) { TidStore *ts; + dsa_area *area; - Assert(area != NULL); + Assert(area_handle != DSA_HANDLE_INVALID); Assert(DsaPointerIsValid(handle)); /* create per-backend state */ ts = palloc0(sizeof(TidStore)); + area = dsa_attach(area_handle); + /* Find the shared the shared radix tree */ ts->tree.shared = shared_ts_attach(area, handle); ts->area = area; @@ -178,10 +218,8 @@ TidStoreAttach(dsa_area *area, dsa_pointer handle) } /* - * Detach from a TidStore. This detaches from radix tree and frees the - * backend-local resources. The radix tree will continue to exist until - * it is either explicitly destroyed, or the area that backs it is returned - * to the operating system. + * Detach from a TidStore. This also detaches from radix tree and frees + * the backend-local resources. */ void TidStoreDetach(TidStore *ts) @@ -189,6 +227,8 @@ TidStoreDetach(TidStore *ts) Assert(TidStoreIsShared(ts)); shared_ts_detach(ts->tree.shared); + dsa_detach(ts->area); + pfree(ts); } @@ -232,9 +272,13 @@ TidStoreUnlock(TidStore *ts) void TidStoreDestroy(TidStore *ts) { - /* Destroy underlying radix tree */ if (TidStoreIsShared(ts)) + { + /* Destroy underlying radix tree */ shared_ts_free(ts->tree.shared); + + dsa_detach(ts->area); + } else local_ts_free(ts->tree.local); @@ -420,6 +464,17 @@ TidStoreMemoryUsage(TidStore *ts) return local_ts_memory_usage(ts->tree.local); } +/* + * Return the DSA area where the TidStore lives. + */ +dsa_area * +TidStoreGetDSA(TidStore *ts) +{ + Assert(TidStoreIsShared(ts)); + + return ts->area; +} + dsa_pointer TidStoreGetHandle(TidStore *ts) { diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index f6c09c8da1..7bb2a95a82 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -3155,7 +3155,7 @@ dead_items_alloc(LVRelState *vacrel, int nworkers) } /* Serial VACUUM case */ - vacrel->dead_items = TidStoreCreate(vac_work_mem, NULL, 0); + vacrel->dead_items = TidStoreCreateLocal(vac_work_mem); dead_items_info = (VacDeadItemsInfo *) palloc(sizeof(VacDeadItemsInfo)); dead_items_info->max_bytes = vac_work_mem * 1024L; @@ -3196,8 +3196,7 @@ dead_items_reset(LVRelState *vacrel) /* Recreate the tidstore with the same max_bytes limitation */ TidStoreDestroy(dead_items); - vacrel->dead_items = TidStoreCreate(vacrel->dead_items_info->max_bytes, - NULL, 0); + vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes); /* Reset the counter */ vacrel->dead_items_info->num_items = 0; diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c index 4bd0df3b5e..6e45fa4b95 100644 --- a/src/backend/commands/vacuumparallel.c +++ b/src/backend/commands/vacuumparallel.c @@ -45,7 +45,7 @@ * use small integers. */ #define PARALLEL_VACUUM_KEY_SHARED 1 -#define PARALLEL_VACUUM_KEY_DEAD_ITEMS 2 +/* 2 was PARALLEL_VACUUM_KEY_DEAD_ITEMS */ #define PARALLEL_VACUUM_KEY_QUERY_TEXT 3 #define PARALLEL_VACUUM_KEY_BUFFER_USAGE 4 #define PARALLEL_VACUUM_KEY_WAL_USAGE 5 @@ -111,6 +111,9 @@ typedef struct PVShared /* Counter for vacuuming and cleanup */ pg_atomic_uint32 idx; + /* DSA handle where the TidStore lives */ + dsa_handle dead_items_dsa_handle; + /* DSA pointer to the shared TidStore */ dsa_pointer dead_items_handle; @@ -183,7 +186,6 @@ struct ParallelVacuumState /* Shared dead items space among parallel vacuum workers */ TidStore *dead_items; - dsa_area *dead_items_area; /* Points to buffer usage area in DSM */ BufferUsage *buffer_usage; @@ -249,12 +251,9 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, PVIndStats *indstats; BufferUsage *buffer_usage; WalUsage *wal_usage; - void *area_space; - dsa_area *dead_items_dsa; bool *will_parallel_vacuum; Size est_indstats_len; Size est_shared_len; - Size dsa_minsize = dsa_minimum_size(); int nindexes_mwm = 0; int parallel_workers = 0; int querylen; @@ -303,10 +302,6 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, shm_toc_estimate_chunk(&pcxt->estimator, est_shared_len); shm_toc_estimate_keys(&pcxt->estimator, 1); - /* Initial size of DSA for dead tuples -- PARALLEL_VACUUM_KEY_DEAD_ITEMS */ - shm_toc_estimate_chunk(&pcxt->estimator, dsa_minsize); - shm_toc_estimate_keys(&pcxt->estimator, 1); - /* * Estimate space for BufferUsage and WalUsage -- * PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE. @@ -371,15 +366,8 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, pvs->indstats = indstats; /* Prepare DSA space for dead items */ - area_space = shm_toc_allocate(pcxt->toc, dsa_minsize); - shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_ITEMS, area_space); - dead_items_dsa = dsa_create_in_place(area_space, dsa_minsize, - LWTRANCHE_PARALLEL_VACUUM_DSA, - pcxt->seg); - dead_items = TidStoreCreate(vac_work_mem, dead_items_dsa, - LWTRANCHE_PARALLEL_VACUUM_DSA); + dead_items = TidStoreCreateShared(vac_work_mem, LWTRANCHE_PARALLEL_VACUUM_DSA); pvs->dead_items = dead_items; - pvs->dead_items_area = dead_items_dsa; /* Prepare shared information */ shared = (PVShared *) shm_toc_allocate(pcxt->toc, est_shared_len); @@ -390,6 +378,7 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, (nindexes_mwm > 0) ? maintenance_work_mem / Min(parallel_workers, nindexes_mwm) : maintenance_work_mem; + shared->dead_items_dsa_handle = dsa_get_handle(TidStoreGetDSA(dead_items)); shared->dead_items_handle = TidStoreGetHandle(dead_items); shared->dead_items_info.max_bytes = vac_work_mem * 1024L; @@ -461,7 +450,6 @@ parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats) } TidStoreDestroy(pvs->dead_items); - dsa_detach(pvs->dead_items_area); DestroyParallelContext(pvs->pcxt); ExitParallelMode(); @@ -493,11 +481,11 @@ parallel_vacuum_reset_dead_items(ParallelVacuumState *pvs) * limitation we just used. */ TidStoreDestroy(dead_items); - dsa_trim(pvs->dead_items_area); - pvs->dead_items = TidStoreCreate(dead_items_info->max_bytes, pvs->dead_items_area, - LWTRANCHE_PARALLEL_VACUUM_DSA); + pvs->dead_items = TidStoreCreateShared(dead_items_info->max_bytes, + LWTRANCHE_PARALLEL_VACUUM_DSA); /* Update the DSA pointer for dead_items to the new one */ + pvs->shared->dead_items_dsa_handle = dsa_get_handle(TidStoreGetDSA(dead_items)); pvs->shared->dead_items_handle = TidStoreGetHandle(dead_items); /* Reset the counter */ @@ -1005,8 +993,6 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) PVIndStats *indstats; PVShared *shared; TidStore *dead_items; - void *area_space; - dsa_area *dead_items_area; BufferUsage *buffer_usage; WalUsage *wal_usage; int nindexes; @@ -1051,9 +1037,8 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) false); /* Set dead items */ - area_space = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_DEAD_ITEMS, false); - dead_items_area = dsa_attach_in_place(area_space, seg); - dead_items = TidStoreAttach(dead_items_area, shared->dead_items_handle); + dead_items = TidStoreAttach(shared->dead_items_dsa_handle, + shared->dead_items_handle); /* Set cost-based vacuum delay */ VacuumUpdateCosts(); @@ -1102,7 +1087,6 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) &wal_usage[ParallelWorkerNumber]); TidStoreDetach(dead_items); - dsa_detach(dead_items_area); /* Pop the error context stack */ error_context_stack = errcallback.previous; diff --git a/src/include/access/tidstore.h b/src/include/access/tidstore.h index 8cf4e94f12..1cc695f90a 100644 --- a/src/include/access/tidstore.h +++ b/src/include/access/tidstore.h @@ -29,9 +29,9 @@ typedef struct TidStoreIterResult OffsetNumber *offsets; } TidStoreIterResult; -extern TidStore *TidStoreCreate(size_t max_bytes, dsa_area *dsa, - int tranche_id); -extern TidStore *TidStoreAttach(dsa_area *dsa, dsa_pointer rt_dp); +extern TidStore *TidStoreCreateLocal(size_t max_bytes); +extern TidStore *TidStoreCreateShared(size_t max_bytes, int tranche_id); +extern TidStore *TidStoreAttach(dsa_handle dsa_handle, dsa_pointer rt_dp); extern void TidStoreDetach(TidStore *ts); extern void TidStoreLockExclusive(TidStore *ts); extern void TidStoreLockShare(TidStore *ts); @@ -45,5 +45,6 @@ extern TidStoreIterResult *TidStoreIterateNext(TidStoreIter *iter); extern void TidStoreEndIterate(TidStoreIter *iter); extern size_t TidStoreMemoryUsage(TidStore *ts); extern dsa_pointer TidStoreGetHandle(TidStore *ts); +extern dsa_area *TidStoreGetDSA(TidStore *ts); #endif /* TIDSTORE_H */ diff --git a/src/test/modules/test_tidstore/test_tidstore.c b/src/test/modules/test_tidstore/test_tidstore.c index c74ad2cf8b..3d4af77dda 100644 --- a/src/test/modules/test_tidstore/test_tidstore.c +++ b/src/test/modules/test_tidstore/test_tidstore.c @@ -34,7 +34,6 @@ PG_FUNCTION_INFO_V1(test_is_full); PG_FUNCTION_INFO_V1(test_destroy); static TidStore *tidstore = NULL; -static dsa_area *dsa = NULL; static size_t tidstore_empty_size; /* array for verification of some tests */ @@ -94,7 +93,6 @@ test_create(PG_FUNCTION_ARGS) size_t array_init_size = 1024; Assert(tidstore == NULL); - Assert(dsa == NULL); /* * Create the TidStore on TopMemoryContext so that the same process use it @@ -109,18 +107,16 @@ test_create(PG_FUNCTION_ARGS) tranche_id = LWLockNewTrancheId(); LWLockRegisterTranche(tranche_id, "test_tidstore"); - dsa = dsa_create(tranche_id); + tidstore = TidStoreCreateShared(tidstore_max_size, tranche_id); /* * Remain attached until end of backend or explicitly detached so that * the same process use the tidstore for subsequent tests. */ - dsa_pin_mapping(dsa); - - tidstore = TidStoreCreate(tidstore_max_size, dsa, tranche_id); + dsa_pin_mapping(TidStoreGetDSA(tidstore)); } else - tidstore = TidStoreCreate(tidstore_max_size, NULL, 0); + tidstore = TidStoreCreateLocal(tidstore_max_size); tidstore_empty_size = TidStoreMemoryUsage(tidstore); @@ -309,9 +305,5 @@ test_destroy(PG_FUNCTION_ARGS) pfree(items.lookup_tids); pfree(items.iter_tids); - if (dsa) - dsa_detach(dsa); - dsa = NULL; - PG_RETURN_VOID(); } -- 2.39.3