From 942b69a0876b0e83303e6704da54c4c002a5a2d8 Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthalion6@gmail.com> Date: Tue, 17 Jun 2025 11:22:02 +0200 Subject: [PATCH 07/16] Introduce multiple shmem segments for shared buffers Add more shmem segments to split shared buffers into following chunks: * BUFFERS_SHMEM_SEGMENT: contains buffer blocks * BUFFER_DESCRIPTORS_SHMEM_SEGMENT: contains buffer descriptors * BUFFER_IOCV_SHMEM_SEGMENT: contains condition variables for buffers * CHECKPOINT_BUFFERS_SHMEM_SEGMENT: contains checkpoint buffer ids * STRATEGY_SHMEM_SEGMENT: contains buffer strategy status Size of the corresponding shared data directly depends on NBuffers, meaning that if we would like to change NBuffers, they have to be resized correspondingly. Placing each of them in a separate shmem segment allows to achieve that. There are some asumptions made about each of shmem segments upper size limit. The buffer blocks have the largest, while the rest claim less extra room for resize. Ideally those limits have to be deduced from the maximum allowed shared memory. --- src/backend/port/sysv_shmem.c | 24 +++++++- src/backend/storage/buffer/buf_init.c | 79 +++++++++++++++++--------- src/backend/storage/buffer/buf_table.c | 6 +- src/backend/storage/buffer/freelist.c | 5 +- src/backend/storage/ipc/ipci.c | 2 +- src/include/storage/bufmgr.h | 2 +- src/include/storage/pg_shmem.h | 24 +++++++- 7 files changed, 105 insertions(+), 37 deletions(-) diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c index 363ddfd1fca..dac011b766b 100644 --- a/src/backend/port/sysv_shmem.c +++ b/src/backend/port/sysv_shmem.c @@ -139,10 +139,18 @@ static int next_free_segment = 0; * * The reserved space for each segment is calculated as a fraction of the total * reserved space (MaxAvailableMemory), as specified in the SHMEM_RESIZE_RATIO - * array. + * array. E.g. we allow BUFFERS_SHMEM_SEGMENT to take up to 60% of the whole + * space when resizing, based on the fact that it most likely will be the main + * consumer of this memory. Those numbers are pulled out of thin air for now, + * makes sense to evaluate them more precise. */ -static double SHMEM_RESIZE_RATIO[1] = { - 1.0, /* MAIN_SHMEM_SLOT */ +static double SHMEM_RESIZE_RATIO[6] = { + 0.1, /* MAIN_SHMEM_SEGMENT */ + 0.6, /* BUFFERS_SHMEM_SEGMENT */ + 0.1, /* BUFFER_DESCRIPTORS_SHMEM_SEGMENT */ + 0.1, /* BUFFER_IOCV_SHMEM_SEGMENT */ + 0.05, /* CHECKPOINT_BUFFERS_SHMEM_SEGMENT */ + 0.05, /* STRATEGY_SHMEM_SEGMENT */ }; /* @@ -167,6 +175,16 @@ MappingName(int shmem_segment) { case MAIN_SHMEM_SEGMENT: return "main"; + case BUFFERS_SHMEM_SEGMENT: + return "buffers"; + case BUFFER_DESCRIPTORS_SHMEM_SEGMENT: + return "descriptors"; + case BUFFER_IOCV_SHMEM_SEGMENT: + return "iocv"; + case CHECKPOINT_BUFFERS_SHMEM_SEGMENT: + return "checkpoint"; + case STRATEGY_SHMEM_SEGMENT: + return "strategy"; default: return "unknown"; } diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index 6fd3a6bbac5..5383442e213 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -62,7 +62,10 @@ CkptSortItem *CkptBufferIds; * Initialize shared buffer pool * * This is called once during shared-memory initialization (either in the - * postmaster, or in a standalone backend). + * postmaster, or in a standalone backend). Size of data structures initialized + * here depends on NBuffers, and to be able to change NBuffers without a + * restart we store each structure into a separate shared memory segment, which + * could be resized on demand. */ void BufferManagerShmemInit(void) @@ -74,22 +77,22 @@ BufferManagerShmemInit(void) /* Align descriptors to a cacheline boundary. */ BufferDescriptors = (BufferDescPadded *) - ShmemInitStruct("Buffer Descriptors", + ShmemInitStructInSegment("Buffer Descriptors", NBuffers * sizeof(BufferDescPadded), - &foundDescs); + &foundDescs, BUFFER_DESCRIPTORS_SHMEM_SEGMENT); /* Align buffer pool on IO page size boundary. */ BufferBlocks = (char *) TYPEALIGN(PG_IO_ALIGN_SIZE, - ShmemInitStruct("Buffer Blocks", + ShmemInitStructInSegment("Buffer Blocks", NBuffers * (Size) BLCKSZ + PG_IO_ALIGN_SIZE, - &foundBufs)); + &foundBufs, BUFFERS_SHMEM_SEGMENT)); /* Align condition variables to cacheline boundary. */ BufferIOCVArray = (ConditionVariableMinimallyPadded *) - ShmemInitStruct("Buffer IO Condition Variables", + ShmemInitStructInSegment("Buffer IO Condition Variables", NBuffers * sizeof(ConditionVariableMinimallyPadded), - &foundIOCV); + &foundIOCV, BUFFER_IOCV_SHMEM_SEGMENT); /* * The array used to sort to-be-checkpointed buffer ids is located in @@ -99,8 +102,9 @@ BufferManagerShmemInit(void) * painful. */ CkptBufferIds = (CkptSortItem *) - ShmemInitStruct("Checkpoint BufferIds", - NBuffers * sizeof(CkptSortItem), &foundBufCkpt); + ShmemInitStructInSegment("Checkpoint BufferIds", + NBuffers * sizeof(CkptSortItem), &foundBufCkpt, + CHECKPOINT_BUFFERS_SHMEM_SEGMENT); if (foundDescs || foundBufs || foundIOCV || foundBufCkpt) { @@ -147,33 +151,54 @@ BufferManagerShmemInit(void) * BufferManagerShmemSize * * compute the size of shared memory for the buffer pool including - * data pages, buffer descriptors, hash tables, etc. + * data pages, buffer descriptors, hash tables, etc. based on the + * shared memory segment. The main segment must not allocate anything + * related to buffers, every other segment will receive part of the + * data. */ Size -BufferManagerShmemSize(void) +BufferManagerShmemSize(int shmem_segment) { Size size = 0; - /* size of buffer descriptors */ - size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded))); - /* to allow aligning buffer descriptors */ - size = add_size(size, PG_CACHE_LINE_SIZE); + if (shmem_segment == MAIN_SHMEM_SEGMENT) + return size; - /* size of data pages, plus alignment padding */ - size = add_size(size, PG_IO_ALIGN_SIZE); - size = add_size(size, mul_size(NBuffers, BLCKSZ)); + if (shmem_segment == BUFFER_DESCRIPTORS_SHMEM_SEGMENT) + { + /* size of buffer descriptors */ + size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded))); + /* to allow aligning buffer descriptors */ + size = add_size(size, PG_CACHE_LINE_SIZE); + } - /* size of stuff controlled by freelist.c */ - size = add_size(size, StrategyShmemSize()); + if (shmem_segment == BUFFERS_SHMEM_SEGMENT) + { + /* size of data pages, plus alignment padding */ + size = add_size(size, PG_IO_ALIGN_SIZE); + size = add_size(size, mul_size(NBuffers, BLCKSZ)); + } - /* size of I/O condition variables */ - size = add_size(size, mul_size(NBuffers, - sizeof(ConditionVariableMinimallyPadded))); - /* to allow aligning the above */ - size = add_size(size, PG_CACHE_LINE_SIZE); + if (shmem_segment == STRATEGY_SHMEM_SEGMENT) + { + /* size of stuff controlled by freelist.c */ + size = add_size(size, StrategyShmemSize()); + } - /* size of checkpoint sort array in bufmgr.c */ - size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem))); + if (shmem_segment == BUFFER_IOCV_SHMEM_SEGMENT) + { + /* size of I/O condition variables */ + size = add_size(size, mul_size(NBuffers, + sizeof(ConditionVariableMinimallyPadded))); + /* to allow aligning the above */ + size = add_size(size, PG_CACHE_LINE_SIZE); + } + + if (shmem_segment == CHECKPOINT_BUFFERS_SHMEM_SEGMENT) + { + /* size of checkpoint sort array in bufmgr.c */ + size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem))); + } return size; } diff --git a/src/backend/storage/buffer/buf_table.c b/src/backend/storage/buffer/buf_table.c index 1f6e215a2ca..18a78967138 100644 --- a/src/backend/storage/buffer/buf_table.c +++ b/src/backend/storage/buffer/buf_table.c @@ -25,6 +25,7 @@ #include "funcapi.h" #include "storage/buf_internals.h" #include "storage/lwlock.h" +#include "storage/pg_shmem.h" #include "utils/rel.h" #include "utils/builtins.h" @@ -64,10 +65,11 @@ InitBufTable(int size) info.entrysize = sizeof(BufferLookupEnt); info.num_partitions = NUM_BUFFER_PARTITIONS; - SharedBufHash = ShmemInitHash("Shared Buffer Lookup Table", + SharedBufHash = ShmemInitHashInSegment("Shared Buffer Lookup Table", size, size, &info, - HASH_ELEM | HASH_BLOBS | HASH_PARTITION | HASH_FIXED_SIZE); + HASH_ELEM | HASH_BLOBS | HASH_PARTITION | HASH_FIXED_SIZE, + STRATEGY_SHMEM_SEGMENT); } /* diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 7d59a92bd1a..0bfbbb096d6 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -19,6 +19,7 @@ #include "port/atomics.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" +#include "storage/pg_shmem.h" #include "storage/proc.h" #define INT_ACCESS_ONCE(var) ((int)(*((volatile int *)&(var)))) @@ -381,9 +382,9 @@ StrategyInitialize(bool init) * Get or create the shared strategy control block */ StrategyControl = (BufferStrategyControl *) - ShmemInitStruct("Buffer Strategy Status", + ShmemInitStructInSegment("Buffer Strategy Status", sizeof(BufferStrategyControl), - &found); + &found, STRATEGY_SHMEM_SEGMENT); if (!found) { diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index b60f7ef9ce2..2dbd81afc87 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -113,7 +113,7 @@ CalculateShmemSize(int *num_semaphores, int shmem_segment) sizeof(ShmemIndexEnt))); size = add_size(size, dsm_estimate_size()); size = add_size(size, DSMRegistryShmemSize()); - size = add_size(size, BufferManagerShmemSize()); + size = add_size(size, BufferManagerShmemSize(shmem_segment)); size = add_size(size, LockManagerShmemSize()); size = add_size(size, PredicateLockShmemSize()); size = add_size(size, ProcGlobalShmemSize()); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 47360a3d3d8..f8d34513c7f 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -318,7 +318,7 @@ extern void EvictRelUnpinnedBuffers(Relation rel, /* in buf_init.c */ extern void BufferManagerShmemInit(void); -extern Size BufferManagerShmemSize(void); +extern Size BufferManagerShmemSize(int); /* in localbuf.c */ extern void AtProcExit_LocalBuffers(void); diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h index 79b0b1ef9eb..a7b275b4db9 100644 --- a/src/include/storage/pg_shmem.h +++ b/src/include/storage/pg_shmem.h @@ -52,7 +52,7 @@ typedef struct ShmemSegment } ShmemSegment; /* Number of available segments for anonymous memory mappings */ -#define ANON_MAPPINGS 1 +#define ANON_MAPPINGS 6 extern PGDLLIMPORT ShmemSegment Segments[ANON_MAPPINGS]; @@ -109,7 +109,29 @@ extern void GetHugePageSize(Size *hugepagesize, int *mmap_flags, int *memfd_flags); void PrepareHugePages(void); +/* + * To be able to dynamically resize largest parts of the data stored in shared + * memory, we split it into multiple shared memory mappings segments. Each + * segment contains only certain part of the data, which size depends on + * NBuffers. + */ + /* The main segment, contains everything except buffer blocks and related data. */ #define MAIN_SHMEM_SEGMENT 0 +/* Buffer blocks */ +#define BUFFERS_SHMEM_SEGMENT 1 + +/* Buffer descriptors */ +#define BUFFER_DESCRIPTORS_SHMEM_SEGMENT 2 + +/* Condition variables for buffers */ +#define BUFFER_IOCV_SHMEM_SEGMENT 3 + +/* Checkpoint BufferIds */ +#define CHECKPOINT_BUFFERS_SHMEM_SEGMENT 4 + +/* Buffer strategy status */ +#define STRATEGY_SHMEM_SEGMENT 5 + #endif /* PG_SHMEM_H */ -- 2.34.1