diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index f60ed2d36c..2cce871dc9 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -166,6 +166,14 @@ typedef enum WalLevel WAL_LEVEL_LOGICAL } WalLevel; +/* Recovery states */ +typedef enum RecoveryState +{ + RECOVERY_STATE_CRASH = 0, /* crash recovery */ + RECOVERY_STATE_ARCHIVE, /* archive recovery */ + RECOVERY_STATE_NONE /* currently in production */ +} RecoveryState; + extern PGDLLIMPORT int wal_level; /* Is WAL archiving enabled (always or only while server is running normally)? */ @@ -291,6 +299,7 @@ extern const char *xlog_identify(uint8 info); extern void issue_xlog_fsync(int fd, XLogSegNo segno); extern bool RecoveryInProgress(void); +extern RecoveryState GetRecoveryState(void); extern bool HotStandbyActive(void); extern bool HotStandbyActiveInReplay(void); extern bool XLogInsertAllowed(void); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 11e32733c4..a1372b08f6 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -221,8 +221,9 @@ static TimeLineID receiveTLI = 0; static bool lastFullPageWrites; /* - * Local copy of SharedRecoveryInProgress variable. True actually means "not - * known, need to check the shared state". + * Local copy of the state tracked by SharedRecoveryState in shared memory, + * It is false if SharedRecoveryState is RECOVERY_STATE_NONE. True actually + * means "not known, need to check the shared state". */ static bool LocalRecoveryInProgress = true; @@ -653,10 +654,10 @@ typedef struct XLogCtlData TimeLineID PrevTimeLineID; /* - * SharedRecoveryInProgress indicates if we're still in crash or archive + * SharedRecoveryState indicates if we're either in crash or archive * recovery. Protected by info_lck. */ - bool SharedRecoveryInProgress; + RecoveryState SharedRecoveryState; /* * SharedHotStandbyActive indicates if we allow hot standby queries to be @@ -4434,6 +4435,16 @@ ReadRecord(XLogReaderState *xlogreader, int emode, updateMinRecoveryPoint = true; UpdateControlFile(); + + /* + * We update SharedRecoveryState while holding the lock on + * ControlFileLock so both states are consistent in shared + * memory. + */ + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE; + SpinLockRelease(&XLogCtl->info_lck); + LWLockRelease(ControlFileLock); CheckRecoveryConsistency(); @@ -5166,10 +5177,10 @@ XLOGShmemInit(void) * in additional info.) */ XLogCtl->XLogCacheBlck = XLOGbuffers - 1; - XLogCtl->SharedRecoveryInProgress = true; XLogCtl->SharedHotStandbyActive = false; XLogCtl->SharedPromoteIsTriggered = false; XLogCtl->WalWriterSleeping = false; + XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH; SpinLockInit(&XLogCtl->Insert.insertpos_lck); SpinLockInit(&XLogCtl->info_lck); @@ -7911,7 +7922,7 @@ StartupXLOG(void) ControlFile->time = (pg_time_t) time(NULL); SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->SharedRecoveryInProgress = false; + XLogCtl->SharedRecoveryState = RECOVERY_STATE_NONE; SpinLockRelease(&XLogCtl->info_lck); UpdateControlFile(); @@ -8057,7 +8068,7 @@ RecoveryInProgress(void) */ volatile XLogCtlData *xlogctl = XLogCtl; - LocalRecoveryInProgress = xlogctl->SharedRecoveryInProgress; + LocalRecoveryInProgress = (xlogctl->SharedRecoveryState != RECOVERY_STATE_NONE); /* * Initialize TimeLineID and RedoRecPtr when we discover that recovery @@ -8069,8 +8080,8 @@ RecoveryInProgress(void) { /* * If we just exited recovery, make sure we read TimeLineID and - * RedoRecPtr after SharedRecoveryInProgress (for machines with - * weak memory ordering). + * RedoRecPtr after SharedRecoveryState (for machines with weak + * memory ordering). */ pg_memory_barrier(); InitXLOGAccess(); @@ -8086,6 +8097,24 @@ RecoveryInProgress(void) } } +/* + * Returns current recovery state from shared memory. + * + * This returned state is accurate after StartupXLOG() finished. See + * details about the possible values of RecoveryState in xlog.h. + */ +RecoveryState +GetRecoveryState(void) +{ + RecoveryState retval; + + SpinLockAcquire(&XLogCtl->info_lck); + retval = XLogCtl->SharedRecoveryState; + SpinLockRelease(&XLogCtl->info_lck); + + return retval; +} + /* * Is HotStandby active yet? This is only important in special backends * since normal backends won't ever be able to connect until this returns diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c index d62c12310a..ce0127c3d0 100644 --- a/src/backend/access/transam/xlogarchive.c +++ b/src/backend/access/transam/xlogarchive.c @@ -572,17 +572,21 @@ XLogArchiveCheckDone(const char *xlog) { char archiveStatusPath[MAXPGPATH]; struct stat stat_buf; - bool inRecovery = RecoveryInProgress(); + + /* The file is always deletable if archive_mode is "off". */ + if (!XLogArchivingActive()) + return true; + + /* On standbys, the file is deletable if archive_mode is not "always". */ + if (!XLogArchivingAlways() && + GetRecoveryState() == RECOVERY_STATE_ARCHIVE) + return true; /* - * The file is always deletable if archive_mode is "off". On standbys - * archiving is disabled if archive_mode is "on", and enabled with - * "always". On a primary, archiving is enabled if archive_mode is "on" - * or "always". + * At this point of the logic, note that we are either a primary with + * with archive_mode set to "on" or "always", or a standby with + * archive_mode set to "always". */ - if (!((XLogArchivingActive() && !inRecovery) || - (XLogArchivingAlways() && inRecovery))) - return true; /* First check for .done --- this means archiver is done with it */ StatusFilePath(archiveStatusPath, xlog, ".done"); diff --git a/src/test/recovery/t/020_archive_status.pl b/src/test/recovery/t/020_archive_status.pl new file mode 100644 index 0000000000..a5f4fd821a --- /dev/null +++ b/src/test/recovery/t/020_archive_status.pl @@ -0,0 +1,193 @@ +# +# Tests relating to WAL archiving and cleanup +# +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; +use Config; + +my ($node, $standby1, $standby2); +my ($node_data, $standby1_data, $standby2_data); + +if ($Config{osname} eq 'MSWin32') +{ + + # some Windows Perls at least don't like IPC::Run's start/kill_kill regime. + plan skip_all => "Test fails on Windows perl"; +} +else +{ + plan tests => 16; +} + +$node = get_new_node('master'); +$node->init( + has_archiving => 1, + allows_streaming => 1 +); +$node->start; +$node_data = $node->data_dir; + +# temporary fail archive_command for futur tests +$node->safe_psql('postgres', q{ + ALTER SYSTEM SET archive_command TO 'false'; + SELECT pg_reload_conf(); +}); + +$node->safe_psql('postgres', q{ + CREATE TABLE mine AS SELECT generate_series(1,10) AS x; + SELECT pg_switch_wal(); + CHECKPOINT; +}); + +# wait for archive failure +$node->poll_query_until('postgres', + q{SELECT failed_count > 0 FROM pg_stat_archiver}, + 't') or die "Timed out while waiting for archiver to fail"; + +ok( -f "$node_data/pg_wal/archive_status/000000010000000000000001.ready", + "ready file exists for WAL waiting to be archived"); + +is($node->safe_psql('postgres', q{ + SELECT archived_count, last_failed_wal + FROM pg_stat_archiver + }), + '0|000000010000000000000001', 'pg_stat_archiver reports archive failure'); + +# We need to crash the cluster because next test checks the crash +# recovery step do not removes non-archived WAL. +$node->stop('immediate'); + +# Standby recovery tests checks the recovery behavior when restoring a +# backup taken using eg. a snapshot with no pg_start/stop_backup. +# In this situation, the recovered standby should enter first crash +# recovery then switch to regular archive recovery. +$node->backup_fs_cold('backup'); + +$node->start; + +ok( -f "$node_data/pg_wal/archive_status/000000010000000000000001.ready", + "WAL segment still ready to archive after crash recovery on primary"); + +# Allow WAL archiving again +$node->safe_psql('postgres', q{ + ALTER SYSTEM RESET archive_command; + SELECT pg_reload_conf(); +}); + +# wait for archive success +$node->poll_query_until('postgres', + q{SELECT archived_count FROM pg_stat_archiver}, + '1') or die "Timed out while waiting for archiver to success"; + +ok( ! -f "$node_data/pg_wal/archive_status/000000010000000000000001.ready", + "ready file for archived WAL removed"); + +ok( -f "$node_data/pg_wal/archive_status/000000010000000000000001.done", + "done file for archived WAL exists"); + +is($node->safe_psql('postgres', + q{ SELECT last_archived_wal FROM pg_stat_archiver }), + '000000010000000000000001', + 'Archive success reported in pg_stat_archiver'); + +# create some wal activity and a new checkpoint so futur standby can create +# a restartpoint. +# As standby start in crash recovery because of the backup method, they need +# a clean restartpoint to deal with existing status files. +$node->safe_psql('postgres', q{ + INSERT INTO mine SELECT generate_series(10,20) AS x; + SELECT pg_switch_wal(); + CHECKPOINT; +}); + +$node->poll_query_until('postgres', + q{ SELECT last_archived_wal FROM pg_stat_archiver }, + '000000010000000000000002') or die "Timed out while waiting for archiver to succeed"; + +# tests standby with archive_mode=on +$standby1 = get_new_node('standby'); +$standby1->init_from_backup($node, 'backup', has_restoring => 1); +$standby1->append_conf('postgresql.conf', "archive_mode = on"); +$standby1_data = $standby1->data_dir; +$standby1->start; +$standby1->safe_psql('postgres', q{CHECKPOINT}); + +# recovery with archive_mode=on does not keep .ready signal files inherited from backup. +# 000000010000000000000001.ready existed in the backup. +ok( ! -f "$standby1_data/pg_wal/archive_status/000000010000000000000001.ready", + ".ready signal file existing in backup removed with archive_mode=on on standby" ); + +# recovery with archive_mode=on should not create .ready signal files +# 000000010000000000000002.ready did not exist in the backup +ok( ! -f "$standby1_data/pg_wal/archive_status/000000010000000000000002.ready", + "standby doesn't create .ready signal file when archive_mode=on" ); + +# recovery with archive_mode=on creates .done signal files +ok( -f "$standby1_data/pg_wal/archive_status/000000010000000000000002.done", + "standby creates .done signal file when archive_mode=on" ); + +# test recovery with archive_mode=always keeps .ready WALs +$standby2 = get_new_node('standby2'); +$standby2->init_from_backup($node, 'backup', has_restoring => 1); +$standby2->append_conf('postgresql.conf', 'archive_mode = always'); +$standby1->append_conf('postgresql.auto.conf', "archive_command = false"); +$standby2_data = $standby2->data_dir; +$standby2->start; + +$standby2->safe_psql('postgres', q{CHECKPOINT}); + +ok( -f "$standby2_data/pg_wal/archive_status/000000010000000000000001.ready", + ".ready signal file existing in backup are kept with archive_mode=always on standby" ); + +ok( -f "$standby2_data/pg_wal/archive_status/000000010000000000000002.ready", + ".ready signal file are created with archive_mode=always on standby" ); + +# Allow WAL archiving again +$standby2->safe_psql('postgres', q{SELECT pg_stat_reset_shared('archiver')}); + +is($standby2->safe_psql('postgres', + q{ SELECT failed_count, archived_count FROM pg_stat_archiver }), + '0|0', + 'statistics reset from pg_stat_archiver succeed'); + +# We need to crash the cluster because next test checks the crash +# recovery step do not removes non-archived WAL on a standby. +$standby2->stop('immediate'); +$standby2->start; + +ok( -f "$standby2_data/pg_wal/archive_status/000000010000000000000001.ready", + "WAL segment still ready to archive after crash recovery on standby with archive_mode=always"); + +# Allow WAL archiving again +$standby2->safe_psql('postgres', q{ + ALTER SYSTEM RESET archive_command; + SELECT pg_reload_conf(); +}); + +# wait for archive success +$standby2->poll_query_until('postgres', + q{SELECT last_archived_wal FROM pg_stat_archiver}, + '000000010000000000000002') or die "Timed out while waiting for archiver to success"; + +is($standby2->safe_psql('postgres', + q{SELECT archived_count FROM pg_stat_archiver}), + '2', 'WAL segment archived from standby'); + +ok( ! -f "$standby2_data/pg_wal/archive_status/000000010000000000000001.ready" + && ! -f "$standby2_data/pg_wal/archive_status/000000010000000000000002.ready", + ".ready signal file removed after archiving with archive_mode=always on standby" ); + +ok( -f "$standby2_data/pg_wal/archive_status/000000010000000000000001.done" + and -f "$standby2_data/pg_wal/archive_status/000000010000000000000002.done", + ".done signal file created after archiving with archive_mode=always on standby" ); + +#ok(0); +# +## wait for archive success +#$standby2->poll_query_until('postgres', +# q{SELECT 1}, +# '0') or die "Timed out while waiting for archiver to success"; +