From 5b5469d7dcd8e98bfcaf14227e67356bbc1f5fe8 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy Date: Thu, 2 Nov 2023 15:10:51 +0000 Subject: [PATCH v14] Track oldest initialized WAL buffer page --- src/backend/access/transam/xlog.c | 170 ++++++++++++++++++++++++++++++ src/include/access/xlog.h | 1 + 2 files changed, 171 insertions(+) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b541be8eec..fdf2ef310b 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -504,6 +504,45 @@ typedef struct XLogCtlData XLogRecPtr *xlblocks; /* 1st byte ptr-s + XLOG_BLCKSZ */ int XLogCacheBlck; /* highest allocated xlog buffer index */ + /* + * Start address of oldest initialized page in XLog buffers. + * + * We mainly track oldest initialized page explicitly to quickly tell if a + * given WAL record is available in XLog buffers. It also can be used for + * other purposes, see notes below. + * + * OldestInitializedPage gives XLog buffers following properties: + * + * 1) At any given point of time, pages in XLog buffers array are sorted + * in an ascending order from OldestInitializedPage till InitializedUpTo. + * Note that we verify this property for assert-only builds, see + * IsXLogBuffersArraySorted() for more details. + * + * 2) OldestInitializedPage is monotonically increasing (by virtue of how + * postgres generates WAL records), that is, its value never decreases. + * This property lets someone read its value without a lock. There's no + * problem even if its value is slightly stale i.e. concurrently being + * updated. One can still use it for finding if a given WAL record is + * available in XLog buffers. At worst, one might get false positives + * (i.e. OldestInitializedPage may tell that the WAL record is available + * in XLog buffers, but when one actually looks at it, it isn't really + * available). This is more efficient and performant than acquiring a lock + * for reading. Note that we may not need a lock to read + * OldestInitializedPage but we need to update it holding + * WALBufMappingLock. + * + * 3) One can start traversing XLog buffers from OldestInitializedPage + * till InitializedUpTo to list out all valid WAL records and stats, and + * expose them via SQL-callable functions to users. + * + * 4) XLog buffers array is inherently organized as a circular, sorted and + * rotated array with OldestInitializedPage as pivot with the property + * where LSN of previous buffer page (if valid) is greater than + * OldestInitializedPage and LSN of next buffer page (if valid) is greater + * than OldestInitializedPage. + */ + XLogRecPtr OldestInitializedPage; + /* * InsertTimeLineID is the timeline into which new WAL is being inserted * and flushed. It is zero during recovery, and does not change once set. @@ -590,6 +629,10 @@ static ControlFileData *ControlFile = NULL; #define NextBufIdx(idx) \ (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1)) +/* Macro to retreat to previous buffer index. */ +#define PreviousBufIdx(idx) \ + (((idx) == 0) ? XLogCtl->XLogCacheBlck : ((idx) - 1)) + /* * XLogRecPtrToBufIdx returns the index of the WAL buffer that holds, or * would hold if it was in cache, the page containing 'recptr'. @@ -708,6 +751,10 @@ static void WALInsertLockAcquireExclusive(void); static void WALInsertLockRelease(void); static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); +#ifdef USE_ASSERT_CHECKING +static bool IsXLogBuffersArraySorted(void); +#endif + /* * Insert an XLOG record represented by an already-constructed chain of data * chunks. This is a low-level routine; to construct the WAL record header @@ -1992,6 +2039,52 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic) XLogCtl->InitializedUpTo = NewPageEndPtr; npages++; + + /* + * Try updating oldest initialized XLog buffer page. + * + * Update it if we are initializing an XLog buffer page for the first + * time or if XLog buffers are full and we are wrapping around. + */ + if (XLogRecPtrIsInvalid(XLogCtl->OldestInitializedPage) || + XLogRecPtrToBufIdx(XLogCtl->OldestInitializedPage) == nextidx) + { + Assert(XLogCtl->OldestInitializedPage < NewPageBeginPtr); + + XLogCtl->OldestInitializedPage = NewPageBeginPtr; + } + + /* + * Check some properties about XLog buffers array. We essentially + * perform these checks as asserts to avoid extra costs. + * + * XXX: Perhaps these extra checks are too much for an assert build, + * so placing them under WAL_DEBUG might be worth trying. + */ + + /* OldestInitializedPage must have already been initialized. */ + Assert(!XLogRecPtrIsInvalid(XLogCtl->OldestInitializedPage)); + + /* + * OldestInitializedPage is always a starting address of XLog buffer + * page. + */ + Assert((XLogCtl->OldestInitializedPage % XLOG_BLCKSZ) == 0); + + /* + * OldestInitializedPage and InitializedUpTo are always starting and + * ending addresses of (same or different) XLog buffer page + * respectively. Hence, they can never be same even if there's only + * one initialized page in XLog buffers. + */ + Assert(XLogCtl->OldestInitializedPage != XLogCtl->InitializedUpTo); + + /* + * At any given point of time, pages in XLog buffers array are sorted + * in an ascending order from OldestInitializedPage till + * InitializedUpTo. + */ + Assert(IsXLogBuffersArraySorted()); } LWLockRelease(WALBufMappingLock); @@ -4711,6 +4804,7 @@ XLOGShmemInit(void) XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH; XLogCtl->InstallXLogFileSegmentActive = false; XLogCtl->WalWriterSleeping = false; + XLogCtl->OldestInitializedPage = InvalidXLogRecPtr; SpinLockInit(&XLogCtl->Insert.insertpos_lck); SpinLockInit(&XLogCtl->info_lck); @@ -5717,6 +5811,14 @@ StartupXLOG(void) XLogCtl->xlblocks[firstIdx] = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ; XLogCtl->InitializedUpTo = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ; + XLogCtl->OldestInitializedPage = endOfRecoveryInfo->lastPageBeginPtr; + + /* + * OldestInitializedPage is always a starting address of XLog buffer + * page. + */ + Assert(!XLogRecPtrIsInvalid(XLogCtl->OldestInitializedPage)); + Assert((XLogCtl->OldestInitializedPage % XLOG_BLCKSZ) == 0); } else { @@ -9109,3 +9211,71 @@ SetWalWriterSleeping(bool sleeping) XLogCtl->WalWriterSleeping = sleeping; SpinLockRelease(&XLogCtl->info_lck); } + +#ifdef USE_ASSERT_CHECKING +/* + * Returns whether or not XLog buffers array is sorted. + * + * XXX: Perhaps this function is too much for an assert build, so placing it + * under WAL_DEBUG might be worth trying. + */ +static bool +IsXLogBuffersArraySorted(void) +{ + int start; + int end; + int current; + int next; + XLogRecPtr CurrentPage; + XLogRecPtr NextPage; + + start = XLogRecPtrToBufIdx(XLogCtl->OldestInitializedPage); + end = XLogRecPtrToBufIdx(XLogCtl->InitializedUpTo - XLOG_BLCKSZ); + + if (start == end) + return true; + + current = start; + + while (current != end) + { + CurrentPage = XLogCtl->xlblocks[current]; + + next = NextBufIdx(current); + NextPage = XLogCtl->xlblocks[next]; + + if (!XLogRecPtrIsInvalid(NextPage) && + CurrentPage > NextPage) + return false; + + current = next; + } + + Assert(XLogCtl->xlblocks[current] == XLogCtl->xlblocks[end]); + + return true; +} +#endif + +/* + * Returns whether or not a given WAL record is available in XLog buffers. + * + * Note that we don't read OldestInitializedPage under a lock, see description + * near its definition in xlog.c for more details. + * + * Note that caller needs to pass in an LSN known to the server, not a future + * or unwritten or unflushed LSN. + */ +bool +IsWALRecordAvailableInXLogBuffers(XLogRecPtr lsn) +{ + if (!XLogRecPtrIsInvalid(lsn) && + !XLogRecPtrIsInvalid(XLogCtl->OldestInitializedPage) && + lsn >= XLogCtl->OldestInitializedPage && + lsn < XLogCtl->InitializedUpTo) + { + return true; + } + + return false; +} diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index a14126d164..35235010e6 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -261,6 +261,7 @@ extern void ReachedEndOfBackup(XLogRecPtr EndRecPtr, TimeLineID tli); extern void SetInstallXLogFileSegmentActive(void); extern bool IsInstallXLogFileSegmentActive(void); extern void XLogShutdownWalRcv(void); +extern bool IsWALRecordAvailableInXLogBuffers(XLogRecPtr lsn); /* * Routines to start, stop, and get status of a base backup. -- 2.34.1