From 6654b0cd3073c4bed6c0cbf7a3b0e9fc7a4b9e2d Mon Sep 17 00:00:00 2001 From: Jehan-Guillaume de Rorthais Date: Wed, 1 Apr 2020 16:36:06 +0200 Subject: [PATCH 1/2] Fix WAL retention during production crash recovery During crash recovery of a production cluster with archive_mode=on, XLogArchiveCheckDone() was considering the cluster as inRecovery without archive_mode=always. Because of this non-arcived WAL and related .ready files were recycled or removed. --- src/backend/access/transam/xlog.c | 48 +++++++++++++++++++----- src/backend/access/transam/xlogarchive.c | 21 +++++++---- src/include/access/xlog.h | 21 +++++++++++ 3 files changed, 74 insertions(+), 16 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 11e32733c4..26920c7f3e 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -221,8 +221,8 @@ static TimeLineID receiveTLI = 0; static bool lastFullPageWrites; /* - * Local copy of SharedRecoveryInProgress variable. True actually means "not - * known, need to check the shared state". + * This is false when SharedRecoveryState is RECOVERT_STATE_NONE. + * True actually means "not known, need to check the shared state". */ static bool LocalRecoveryInProgress = true; @@ -653,10 +653,10 @@ typedef struct XLogCtlData TimeLineID PrevTimeLineID; /* - * SharedRecoveryInProgress indicates if we're still in crash or archive + * SharedRecoveryState indicates if we're either in crash or archive * recovery. Protected by info_lck. */ - bool SharedRecoveryInProgress; + RecoveryState SharedRecoveryState; /* * SharedHotStandbyActive indicates if we allow hot standby queries to be @@ -4434,6 +4434,16 @@ ReadRecord(XLogReaderState *xlogreader, int emode, updateMinRecoveryPoint = true; UpdateControlFile(); + + /* + * We update SharedRecoveryState while holding the lock on + * ControlFileLock so both states are consistent in shared + * memory. + */ + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->SharedRecoveryState = RECOVERT_STATE_ARCHIVE; + SpinLockRelease(&XLogCtl->info_lck); + LWLockRelease(ControlFileLock); CheckRecoveryConsistency(); @@ -5166,10 +5176,10 @@ XLOGShmemInit(void) * in additional info.) */ XLogCtl->XLogCacheBlck = XLOGbuffers - 1; - XLogCtl->SharedRecoveryInProgress = true; XLogCtl->SharedHotStandbyActive = false; XLogCtl->SharedPromoteIsTriggered = false; XLogCtl->WalWriterSleeping = false; + XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH; SpinLockInit(&XLogCtl->Insert.insertpos_lck); SpinLockInit(&XLogCtl->info_lck); @@ -7911,7 +7921,7 @@ StartupXLOG(void) ControlFile->time = (pg_time_t) time(NULL); SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->SharedRecoveryInProgress = false; + XLogCtl->SharedRecoveryState = RECOVERY_STATE_NONE; SpinLockRelease(&XLogCtl->info_lck); UpdateControlFile(); @@ -8057,7 +8067,7 @@ RecoveryInProgress(void) */ volatile XLogCtlData *xlogctl = XLogCtl; - LocalRecoveryInProgress = xlogctl->SharedRecoveryInProgress; + LocalRecoveryInProgress = (xlogctl->SharedRecoveryState != RECOVERY_STATE_NONE); /* * Initialize TimeLineID and RedoRecPtr when we discover that recovery @@ -8069,8 +8079,8 @@ RecoveryInProgress(void) { /* * If we just exited recovery, make sure we read TimeLineID and - * RedoRecPtr after SharedRecoveryInProgress (for machines with - * weak memory ordering). + * RedoRecPtr after SharedRecoveryState (for machines with weak + * memory ordering). */ pg_memory_barrier(); InitXLOGAccess(); @@ -8086,6 +8096,26 @@ RecoveryInProgress(void) } } +/* + * Returns current recovery state from shared memory. + * + * This returned state is accurate after StartupXLOG() + * finished. + * + * See details about RecoveryState in xlog.h + */ +RecoveryState +GetRecoveryState(void) +{ + RecoveryState retval; + + SpinLockAcquire(&XLogCtl->info_lck); + retval = XLogCtl->SharedRecoveryState; + SpinLockRelease(&XLogCtl->info_lck); + + return retval; +} + /* * Is HotStandby active yet? This is only important in special backends * since normal backends won't ever be able to connect until this returns diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c index d62c12310a..d2ffc7e1a2 100644 --- a/src/backend/access/transam/xlogarchive.c +++ b/src/backend/access/transam/xlogarchive.c @@ -572,18 +572,25 @@ XLogArchiveCheckDone(const char *xlog) { char archiveStatusPath[MAXPGPATH]; struct stat stat_buf; - bool inRecovery = RecoveryInProgress(); + RecoveryState recoveryState = GetRecoveryState(); + + /* The file is always deletable if archive_mode is "off". */ + if (!XLogArchivingActive()) + return true; /* - * The file is always deletable if archive_mode is "off". On standbys - * archiving is disabled if archive_mode is "on", and enabled with - * "always". On a primary, archiving is enabled if archive_mode is "on" - * or "always". + * On standbys, the file is deletable if archive_mode is not + * "always". */ - if (!((XLogArchivingActive() && !inRecovery) || - (XLogArchivingAlways() && inRecovery))) + if (recoveryState == RECOVERT_STATE_ARCHIVE && !XLogArchivingAlways()) return true; + /* + * We are either: + * * a primary with archive_mode set to "on" or "always" + * * a standby with archive_mode set to "always". + */ + /* First check for .done --- this means archiver is done with it */ StatusFilePath(archiveStatusPath, xlog, ".done"); if (stat(archiveStatusPath, &stat_buf) == 0) diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index f60ed2d36c..157a3fb104 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -166,6 +166,26 @@ typedef enum WalLevel WAL_LEVEL_LOGICAL } WalLevel; +/* + * Recovery state. + * + * There's two different reasons to recover WAL: when standby mode is requested + * or after a crash to catchup with consistency. + * + * RECOVERY_STATE_CRASH is set when recovering after a crash. + * RECOVERT_STATE_ARCHIVE is set when archive recovery or standby is requested. + * RECOVERY_STATE_NONE means the cluster is in production. + * + * Note that even if archive recovery or standby is requested, recovery state + * starts first with RECOVERY_STATE_CRASH before switching to ARCHIVE_RECOVERING + */ +typedef enum RecoveryState +{ + RECOVERY_STATE_NONE = 0, /* currently in production */ + RECOVERY_STATE_CRASH, /* recovering from a crash */ + RECOVERT_STATE_ARCHIVE /* recovering archives as requested */ +} RecoveryState; + extern PGDLLIMPORT int wal_level; /* Is WAL archiving enabled (always or only while server is running normally)? */ @@ -291,6 +311,7 @@ extern const char *xlog_identify(uint8 info); extern void issue_xlog_fsync(int fd, XLogSegNo segno); extern bool RecoveryInProgress(void); +extern RecoveryState GetRecoveryState(void); extern bool HotStandbyActive(void); extern bool HotStandbyActiveInReplay(void); extern bool XLogInsertAllowed(void); -- 2.20.1