From 5f60aad7a5412398d9b0fa46b8115bb105e53ead Mon Sep 17 00:00:00 2001 From: David Rowley Date: Wed, 16 Feb 2022 10:23:32 +1300 Subject: [PATCH v3 1/2] Allow generation memory context block sizes to increase The standard allocator allows initial, minimum and maximum block sizes, but when the generation context type was added, it only allowed fixed-sized blocks. The problem with fixed-sized blocks is that it's difficult to choose how large to make blocks. If the chosen size is too small then we'd end up with a large number of blocks and a large number of malloc calls. If the block size is made too large, then memory is wasted. Here we allow generation contexts to grow the block size up until the maximum size, just like the standard allocator does. This also adds support for "keeper" blocks. This is a special block that is allocated along with the context itself but is never freed. Instead, when the last chunk in the keeper block is freed, we simply mark the block as empty to allow new allocations to make use of it. Additionally, here we change the logic around when generation blocks are freed. Previously a block would always be freed when the final chunk in a block is pfreed. Here we alter this logic so that if we pfree a chunk that's in the current block that we're filling and that chunk is the only remaining chunk in the block, we keep that block around as it may be useful for future allocations. This can save thrashing free/malloc when we continually palloc/pfree a chunk. This happens to be the case for an upcoming patch which makes use of generation contexts to store tuples in tuplesort.c. We can easily hit this case in bounded sorts when we continually store tuples that we throw away directly due to them sorting after the already stored tuples after having reached the bound. Author: Tomas Vondra, David Rowley Discussion: https://postgr.es/m/d987fd54-01f8-0f73-af6c-519f799a0ab8@enterprisedb.com --- src/backend/access/gist/gistvacuum.c | 6 + .../replication/logical/reorderbuffer.c | 7 + src/backend/utils/mmgr/generation.c | 288 ++++++++++++++---- src/include/utils/memutils.h | 4 +- 4 files changed, 242 insertions(+), 63 deletions(-) diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index aac4afab8f..f190decdff 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -158,9 +158,15 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, * pages in page_set_context. Internally, the integer set will remember * this context so that the subsequent allocations for these integer sets * will be done from the same context. + * + * XXX the allocation sizes used below pre-date generation context's block + * growing code. These values should likely be benchmarked and set to + * more suitable values. */ vstate.page_set_context = GenerationContextCreate(CurrentMemoryContext, "GiST VACUUM page set context", + 16 * 1024, + 16 * 1024, 16 * 1024); oldctx = MemoryContextSwitchTo(vstate.page_set_context); vstate.internal_page_set = intset_create(); diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index c2d9be81fa..4702750a2e 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -370,8 +370,15 @@ ReorderBufferAllocate(void) SLAB_DEFAULT_BLOCK_SIZE, sizeof(ReorderBufferTXN)); + /* + * XXX the allocation sizes used below pre-date generation context's block + * growing code. These values should likely be benchmarked and set to + * more suitable values. + */ buffer->tup_context = GenerationContextCreate(new_ctx, "Tuples", + SLAB_LARGE_BLOCK_SIZE, + SLAB_LARGE_BLOCK_SIZE, SLAB_LARGE_BLOCK_SIZE); hash_ctl.keysize = sizeof(TransactionId); diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c index 95c006f689..ebbd322bf2 100644 --- a/src/backend/utils/mmgr/generation.c +++ b/src/backend/utils/mmgr/generation.c @@ -20,18 +20,18 @@ * * The memory context uses a very simple approach to free space management. * Instead of a complex global freelist, each block tracks a number - * of allocated and freed chunks. Freed chunks are not reused, and once all - * chunks in a block are freed, the whole block is thrown away. When the - * chunks allocated in the same block have similar lifespan, this works + * of allocated and freed chunks. Generally, freed chunks are not reused, and + * once all chunks in a block are freed, the whole block is thrown away. When + * the chunks allocated in the same block have similar lifespan, this works * very well and is very cheap. * - * The current implementation only uses a fixed block size - maybe it should - * adapt a min/max block size range, and grow the blocks automatically. - * It already uses dedicated blocks for oversized chunks. - * - * XXX It might be possible to improve this by keeping a small freelist for - * only a small number of recent blocks, but it's not clear it's worth the - * additional complexity. + * An exception to the above is that we do not free the currently used block + * when the final chunk is freed from it. Instead we keep the empty block + * around in the hope that a future allocation can make use of this block. + * This can save a round of needless free/malloc calls. However, we must be + * careful to ensure that we do free this block when a future allocation does + * not fit inside it. Failing to do that would result in leaving a + * permanently empty block. * *------------------------------------------------------------------------- */ @@ -39,6 +39,7 @@ #include "postgres.h" #include "lib/ilist.h" +#include "port/pg_bitutils.h" #include "utils/memdebug.h" #include "utils/memutils.h" @@ -46,6 +47,8 @@ #define Generation_BLOCKHDRSZ MAXALIGN(sizeof(GenerationBlock)) #define Generation_CHUNKHDRSZ sizeof(GenerationChunk) +#define Generation_CHUNK_FRACTION 8 + typedef struct GenerationBlock GenerationBlock; /* forward reference */ typedef struct GenerationChunk GenerationChunk; @@ -60,20 +63,26 @@ typedef struct GenerationContext MemoryContextData header; /* Standard memory-context fields */ /* Generational context parameters */ - Size blockSize; /* standard block size */ - - GenerationBlock *block; /* current (most recently allocated) block */ + Size initBlockSize; /* initial block size */ + Size maxBlockSize; /* maximum block size */ + Size nextBlockSize; /* next block size to allocate */ + Size allocChunkLimit; /* effective chunk size limit */ + + GenerationBlock *block; /* current (most recently allocated) block, or + * NULL if we've just freed the most recent + * block */ + GenerationBlock *keeper; /* keep this block over resets */ dlist_head blocks; /* list of blocks */ } GenerationContext; /* * GenerationBlock * GenerationBlock is the unit of memory that is obtained by generation.c - * from malloc(). It contains one or more GenerationChunks, which are + * from malloc(). It contains zero or more GenerationChunks, which are * the units requested by palloc() and freed by pfree(). GenerationChunks * cannot be returned to malloc() individually, instead pfree() * updates the free counter of the block and when all chunks in a block - * are free the whole block is returned to malloc(). + * are free the whole block can be returned to malloc(). * * GenerationBlock is the header data for a block --- the usable space * within the block begins at the next alignment boundary. @@ -143,6 +152,12 @@ struct GenerationChunk #define GenerationChunkGetPointer(chk) \ ((GenerationPointer *)(((char *)(chk)) + Generation_CHUNKHDRSZ)) +/* Inlined helper functions */ +static inline void GenerationBlockInit(GenerationBlock *block, Size blksize); +static inline void GenerationBlockMarkEmpty(GenerationBlock *block); +static inline void GenerationBlockFree(GenerationContext *context, + GenerationBlock *block); + /* * These functions implement the MemoryContext API for Generation contexts. */ @@ -196,9 +211,14 @@ static const MemoryContextMethods GenerationMethods = { MemoryContext GenerationContextCreate(MemoryContext parent, const char *name, - Size blockSize) + Size minContextSize, + Size initBlockSize, + Size maxBlockSize) { + Size firstBlockSize; + Size allocSize; GenerationContext *set; + GenerationBlock *block; /* Assert we padded GenerationChunk properly */ StaticAssertStmt(Generation_CHUNKHDRSZ == MAXALIGN(Generation_CHUNKHDRSZ), @@ -208,16 +228,28 @@ GenerationContextCreate(MemoryContext parent, "padding calculation in GenerationChunk is wrong"); /* - * First, validate allocation parameters. (If we're going to throw an - * error, we should do so before the context is created, not after.) We - * somewhat arbitrarily enforce a minimum 1K block size, mostly because - * that's what AllocSet does. + * First, validate allocation parameters. Once these were regular runtime + * test and elog's, but in practice Asserts seem sufficient because nobody + * varies their parameters at runtime. We somewhat arbitrarily enforce a + * minimum 1K block size. */ - if (blockSize != MAXALIGN(blockSize) || - blockSize < 1024 || - !AllocHugeSizeIsValid(blockSize)) - elog(ERROR, "invalid blockSize for memory context: %zu", - blockSize); + Assert(initBlockSize == MAXALIGN(initBlockSize) && + initBlockSize >= 1024); + Assert(maxBlockSize == MAXALIGN(maxBlockSize) && + maxBlockSize >= initBlockSize && + AllocHugeSizeIsValid(maxBlockSize)); /* must be safe to double */ + Assert(minContextSize == 0 || + (minContextSize == MAXALIGN(minContextSize) && + minContextSize >= 1024 && + minContextSize <= maxBlockSize)); + + /* Determine size of initial block */ + allocSize = MAXALIGN(sizeof(GenerationContext)) + + Generation_BLOCKHDRSZ + Generation_CHUNKHDRSZ; + if (minContextSize != 0) + allocSize = Max(allocSize, minContextSize); + else + allocSize = Max(allocSize, initBlockSize); /* * Allocate the context header. Unlike aset.c, we never try to combine @@ -225,7 +257,7 @@ GenerationContextCreate(MemoryContext parent, * freeing the first generation of allocations. */ - set = (GenerationContext *) malloc(MAXALIGN(sizeof(GenerationContext))); + set = (GenerationContext *) malloc(allocSize); if (set == NULL) { MemoryContextStats(TopMemoryContext); @@ -240,11 +272,37 @@ GenerationContextCreate(MemoryContext parent, * Avoid writing code that can fail between here and MemoryContextCreate; * we'd leak the header if we ereport in this stretch. */ + dlist_init(&set->blocks); + + /* Fill in the initial block's block header */ + block = (GenerationBlock *) (((char *) set) + MAXALIGN(sizeof(GenerationContext))); + /* determine the block size and initialize it */ + firstBlockSize = allocSize - MAXALIGN(sizeof(GenerationContext)); + GenerationBlockInit(block, firstBlockSize); + + /* add it to the doubly-linked list of blocks */ + dlist_push_head(&set->blocks, &block->node); + + /* use it as the current allocation block */ + set->block = block; + + /* Mark block as not to be released at reset time */ + set->keeper = block; /* Fill in GenerationContext-specific header fields */ - set->blockSize = blockSize; - set->block = NULL; - dlist_init(&set->blocks); + set->initBlockSize = initBlockSize; + set->maxBlockSize = maxBlockSize; + set->nextBlockSize = initBlockSize; + + /* + * Compute the allocation chunk size limit for this context. + * + * Follows similar ideas as AllocSet, see aset.c for details ... + */ + set->allocChunkLimit = maxBlockSize; + while ((Size) (set->allocChunkLimit + Generation_CHUNKHDRSZ) > + (Size) ((Size) (maxBlockSize - Generation_BLOCKHDRSZ) / Generation_CHUNK_FRACTION)) + set->allocChunkLimit >>= 1; /* Finally, do the type-independent part of context creation */ MemoryContextCreate((MemoryContext) set, @@ -253,6 +311,8 @@ GenerationContextCreate(MemoryContext parent, parent, name); + ((MemoryContext)set)->mem_allocated = firstBlockSize; + return (MemoryContext) set; } @@ -280,20 +340,21 @@ GenerationReset(MemoryContext context) { GenerationBlock *block = dlist_container(GenerationBlock, node, miter.cur); - dlist_delete(miter.cur); - - context->mem_allocated -= block->blksize; - -#ifdef CLOBBER_FREED_MEMORY - wipe_mem(block, block->blksize); -#endif - - free(block); + if (block == set->keeper) + GenerationBlockMarkEmpty(block); + else + GenerationBlockFree(set, block); } - set->block = NULL; + /* set it so new allocations to make use of the keeper block */ + set->block = set->keeper; + + /* Reset block size allocation sequence, too */ + set->nextBlockSize = set->initBlockSize; - Assert(dlist_is_empty(&set->blocks)); + /* Ensure there is only 1 item in the dlist */ + Assert(!dlist_is_empty(&set->blocks)); + Assert(!dlist_has_next(&set->blocks, dlist_head_node(&set->blocks))); } /* @@ -331,7 +392,7 @@ GenerationAlloc(MemoryContext context, Size size) Size chunk_size = MAXALIGN(size); /* is it an over-sized chunk? if yes, allocate special block */ - if (chunk_size > set->blockSize / 8) + if (chunk_size > set->allocChunkLimit) { Size blksize = chunk_size + Generation_BLOCKHDRSZ + Generation_CHUNKHDRSZ; @@ -384,10 +445,33 @@ GenerationAlloc(MemoryContext context, Size size) */ block = set->block; - if ((block == NULL) || + if (block == NULL || (block->endptr - block->freeptr) < Generation_CHUNKHDRSZ + chunk_size) { - Size blksize = set->blockSize; + Size blksize; + Size required_size; + + /* + * If the current block is empty and this allocation just does not fit + * then we must free the current block, otherwise, because we must + * create a new block, the current block would be left forever empty. + */ + if (block != NULL && block->nchunks == 0 && block != set->keeper) + GenerationBlockFree(set, block); + + /* + * The first such block has size initBlockSize, and we double the + * space in each succeeding block, but not more than maxBlockSize. + */ + blksize = set->nextBlockSize; + set->nextBlockSize <<= 1; + if (set->nextBlockSize > set->maxBlockSize) + set->nextBlockSize = set->maxBlockSize; + + /* round the size up to the next power of 2 */ + required_size = chunk_size + Generation_BLOCKHDRSZ + Generation_CHUNKHDRSZ; + if (blksize < required_size) + blksize = pg_nextpower2_size_t(required_size); block = (GenerationBlock *) malloc(blksize); @@ -396,16 +480,8 @@ GenerationAlloc(MemoryContext context, Size size) context->mem_allocated += blksize; - block->blksize = blksize; - block->nchunks = 0; - block->nfree = 0; - - block->freeptr = ((char *) block) + Generation_BLOCKHDRSZ; - block->endptr = ((char *) block) + blksize; - - /* Mark unallocated space NOACCESS. */ - VALGRIND_MAKE_MEM_NOACCESS(block->freeptr, - blksize - Generation_BLOCKHDRSZ); + /* initialize the new block */ + GenerationBlockInit(block, blksize); /* add it to the doubly-linked list of blocks */ dlist_push_head(&set->blocks, &block->node); @@ -453,6 +529,73 @@ GenerationAlloc(MemoryContext context, Size size) return GenerationChunkGetPointer(chunk); } +/* + * GenerationBlockInit + * Initializes 'block' assuming 'blksize'. Does not update the context's + * mem_allocated field. + */ +static inline void +GenerationBlockInit(GenerationBlock *block, Size blksize) +{ + block->blksize = blksize; + block->nchunks = 0; + block->nfree = 0; + + block->freeptr = ((char *) block) + Generation_BLOCKHDRSZ; + block->endptr = ((char *) block) + blksize; + + /* Mark unallocated space NOACCESS. */ + VALGRIND_MAKE_MEM_NOACCESS(block->freeptr, + blksize - Generation_BLOCKHDRSZ); +} + +/* + * GenerationBlockMarkEmpty + * Set a block as empty. Does not free the block. + */ +static inline void +GenerationBlockMarkEmpty(GenerationBlock *block) +{ +#if defined(USE_VALGRIND) || defined(CLOBBER_FREED_MEMORY) + char *datastart = ((char *) block) + Generation_BLOCKHDRSZ; +#endif + +#ifdef CLOBBER_FREED_MEMORY + wipe_mem(datastart, block->freeptr - datastart); +#else + /* wipe_mem() would have done this */ + VALGRIND_MAKE_MEM_NOACCESS(datastart, block->freeptr - datastart); +#endif + + /* Reset the block, but don't return it to malloc */ + block->nchunks = 0; + block->nfree = 0; + block->freeptr = ((char *) block) + Generation_BLOCKHDRSZ; + +} + +/* + * GenerationBlockFree + * Remove 'block' from 'context' and release the memory consumed by it. + */ +static inline void +GenerationBlockFree(GenerationContext *context, GenerationBlock *block) +{ + /* Make sure nobody tries to free the keeper block */ + Assert(block != context->keeper); + + /* release the block from the list of blocks */ + dlist_delete(&block->node); + + ((MemoryContext) context)->mem_allocated -= block->blksize; + +#ifdef CLOBBER_FREED_MEMORY + wipe_mem(block, block->blksize); +#endif + + free(block); +} + /* * GenerationFree * Update number of chunks in the block, and if all chunks in the block @@ -500,15 +643,27 @@ GenerationFree(MemoryContext context, void *pointer) return; /* - * The block is empty, so let's get rid of it. First remove it from the - * list of blocks, then return it to malloc(). - */ - dlist_delete(&block->node); + * Don't try to free the keeper block, just mark it empty. Additionally + * we don't free the current block as some memory access patterns may + * continually palloc/pfree, which could cause malloc/free thrashing if + * that happens to be the only chunk in the block. + */ + if (block == set->keeper || block == set->block) + { + GenerationBlockMarkEmpty(block); + return; + } /* Also make sure the block is not marked as the current block. */ if (set->block == block) set->block = NULL; + /* + * The block is empty, so let's get rid of it. First remove it from the + * list of blocks, then return it to malloc(). + */ + dlist_delete(&block->node); + context->mem_allocated -= block->blksize; free(block); } @@ -655,8 +810,17 @@ static bool GenerationIsEmpty(MemoryContext context) { GenerationContext *set = (GenerationContext *) context; + dlist_iter iter; + + dlist_foreach(iter, &set->blocks) + { + GenerationBlock *block = dlist_container(GenerationBlock, node, iter.cur); + + if (block->nchunks > 0) + return false; + } - return dlist_is_empty(&set->blocks); + return true; } /* @@ -745,13 +909,13 @@ GenerationCheck(MemoryContext context) nchunks; char *ptr; - total_allocated += block->blksize; + total_allocated += block->endptr - ((char *) block); /* - * nfree > nchunks is surely wrong, and we don't expect to see - * equality either, because such a block should have gotten freed. + * nfree > nchunks is surely wrong. equality is allowed as we don't + * free blocks when the block to be freed is the set->block. */ - if (block->nfree >= block->nchunks) + if (block->nfree > block->nchunks) elog(WARNING, "problem in Generation %s: number of free chunks %d in block %p exceeds %d allocated", name, block->nfree, block, block->nchunks); diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h index 019700247a..495d1af201 100644 --- a/src/include/utils/memutils.h +++ b/src/include/utils/memutils.h @@ -183,7 +183,9 @@ extern MemoryContext SlabContextCreate(MemoryContext parent, /* generation.c */ extern MemoryContext GenerationContextCreate(MemoryContext parent, const char *name, - Size blockSize); + Size minContextSize, + Size initBlockSize, + Size maxBlockSize); /* * Recommended default alloc parameters, suitable for "ordinary" contexts -- 2.32.0