diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml index a91864b..9053bb0 100644 --- a/doc/src/sgml/recovery-config.sgml +++ b/doc/src/sgml/recovery-config.sgml @@ -349,6 +349,48 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows + + recovery_target_incomplete (enum) + + recovery_target_incomplete recovery parameter + + + + + Specifies what action the server should take once the recovery target is + not reached. The default is promote, which means recovery will + finish and the server will start to accept connections. pause means the recovery process will be paused. + Finally shutdown will stop the server if the recovery cannot proceed any further. + + + The intended use of the pause setting is to allow queries + to be executed against the database to check if this recovery target + is the most desirable point for recovery. + The paused state can be resumed by + using pg_xlog_replay_resume() (see + ), which then + causes recovery to end. If this recovery target is not the + desired stopping point, then shut down the server, change the + recovery target settings to a later target and restart to + continue recovery. + + + The shutdown setting is useful to have the instance ready + at the exact replay point desired. The instance will still be able to + replay more WAL records (and in fact will have to replay WAL records + since the last checkpoint next time it is started). + + + Note that because recovery.conf will not be renamed when + recovery_target_incomplete is set to shutdown, + any subsequent start will end with immediate shutdown unless the + configuration is changed or the recovery.conf file is + removed manually. + + + + diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 8973583..7257a49 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -259,6 +259,7 @@ static char *archiveCleanupCommand = NULL; static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET; static bool recoveryTargetInclusive = true; static RecoveryTargetAction recoveryTargetAction = RECOVERY_TARGET_ACTION_PAUSE; +static RecoveryTargetIncomplete recoveryTargetIncomplete = RECOVERY_TARGET_INCOMPLETE_PROMOTE; static TransactionId recoveryTargetXid; static TimestampTz recoveryTargetTime; static char *recoveryTargetName; @@ -838,6 +839,7 @@ static void exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog); static bool recoveryStopsBefore(XLogReaderState *record); static bool recoveryStopsAfter(XLogReaderState *record); static void recoveryPausesHere(void); +static void IncompleteRecoveryPause(void); static bool recoveryApplyDelay(XLogReaderState *record); static void SetLatestXTime(TimestampTz xtime); static void SetCurrentChunkStartTime(TimestampTz xtime); @@ -5163,6 +5165,26 @@ readRecoveryCommandFile(void) recoveryTargetActionSet = true; } + else if (strcmp(item->name, "recovery_target_incomplete") == 0) + { + if (strcmp(item->value, "pause") == 0) + recoveryTargetIncomplete = RECOVERY_TARGET_INCOMPLETE_PAUSE; + else if (strcmp(item->value, "promote") == 0) + recoveryTargetIncomplete = RECOVERY_TARGET_INCOMPLETE_PROMOTE; + else if (strcmp(item->value, "shutdown") == 0) + recoveryTargetIncomplete = RECOVERY_TARGET_INCOMPLETE_SHUTDOWN; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for recovery parameter \"%s\": \"%s\"", + "recovery_target_incomplete", + item->value), + errhint("Valid values are \"pause\", \"promote\", and \"shutdown\"."))); + + ereport(DEBUG2, + (errmsg_internal("recovery_target_incomplete = '%s'", + item->value))); + } else if (strcmp(item->name, "recovery_target_timeline") == 0) { rtliGiven = true; @@ -5889,6 +5911,21 @@ SetRecoveryPause(bool recoveryPause) SpinLockRelease(&XLogCtl->info_lck); } +static void +IncompleteRecoveryPause(void) +{ + /* Pause recovery at end-of-the-wal when recovery target is not reached */ + ereport(LOG, + (errmsg("recovery has paused"), + errhint("Execute pg_xlog_replay_resume() to continue."))); + + while (RecoveryIsPaused()) + { + pg_usleep(1000000L); /* 1000 ms */ + HandleStartupProcInterrupts(); + } +} + /* * When recovery_min_apply_delay is set, we wait long enough to make sure * certain record types are applied at least that interval behind the master. @@ -7205,6 +7242,46 @@ StartupXLOG(void) break; } } + else + { + ereport(LOG, + (errmsg("recovery has reached end-of-the-wal and has not reached the recovery target yet"), + errhint("This could be due to corrupt or missing WAL files.\n" + "All the WAL files needed for the recovery must be available to proceed to the recovery target " + "Or you might need to choose an earlier recovery target."))); + + /* + * This is the position where we can choose to shutdown, pause + * or promote at the end-of-the-wal if the intended recovery + * target is not reached + */ + switch (recoveryTargetIncomplete) + { + + case RECOVERY_TARGET_INCOMPLETE_SHUTDOWN: + + /* + * exit with special return code to request shutdown + * of postmaster. Log messages issued from + * postmaster. + */ + + ereport(LOG, + (errmsg("shutdown at end-of-the-wal"))); + proc_exit(2); + + case RECOVERY_TARGET_INCOMPLETE_PAUSE: + + SetRecoveryPause(true); + IncompleteRecoveryPause(); + + /* drop into promote */ + + case RECOVERY_TARGET_INCOMPLETE_PROMOTE: + break; + } + + } /* Allow resource managers to do any required cleanup. */ for (rmid = 0; rmid <= RM_MAX_ID; rmid++) diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 578bff5..b9d707c 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -256,6 +256,17 @@ typedef enum } RecoveryTargetAction; /* + * Recovery target incomplete. + */ + +typedef enum +{ + RECOVERY_TARGET_INCOMPLETE_PAUSE, + RECOVERY_TARGET_INCOMPLETE_PROMOTE, + RECOVERY_TARGET_INCOMPLETE_SHUTDOWN +} RecoveryTargetIncomplete; + +/* * Method table for resource managers. * * This struct must be kept in sync with the PG_RMGR definition in diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm index 4018f0a..119d43d 100644 --- a/src/test/perl/PostgresNode.pm +++ b/src/test/perl/PostgresNode.pm @@ -601,6 +601,11 @@ Restoring WAL segments from archives using restore_command can be enabled by passing the keyword parameter has_restoring => 1. This is disabled by default. +Restoring WAL segments from archives using restore command to perform PITR +can be enabled by passing the keyword parameter has_restoring_pitr => 1. This +is disabled by default. By enabling this parameter, the standby database will +not operate in standby mode. + The backup is copied, leaving the original unmodified. pg_hba.conf is unconditionally set to enable replication connections. @@ -618,6 +623,7 @@ sub init_from_backup $params{hba_permit_replication} = 1 unless defined $params{hba_permit_replication}; $params{has_restoring} = 0 unless defined $params{has_restoring}; + $params{has_restoring_pitr} = 0 unless defined $params{has_restoring_pitr}; print "# Initializing node \"$node_name\" from backup \"$backup_name\" of node \"$root_name\"\n"; @@ -641,6 +647,7 @@ port = $port $self->set_replication_conf if $params{hba_permit_replication}; $self->enable_streaming($root_node) if $params{has_streaming}; $self->enable_restoring($root_node) if $params{has_restoring}; + $self->enable_restoring_pitr($root_node) if $params{has_restoring_pitr}; } =pod @@ -673,6 +680,49 @@ sub start $self->_update_pid; } +=item $node->start_pitr() + +Wrapper for pg_ctl start + +Start the node and wait until it is ready to accept connections. + +=cut + +sub start_pitr +{ + my ($self) = @_; + my $port = $self->port; + my $pgdata = $self->data_dir; + my $name = $self->name; + my $log = $self->logfile; + print("### Starting node \"$name\"\n"); + my $ret = TestLib::system_log('pg_ctl', '-D', $self->data_dir, '-l', + $self->logfile, 'start'); + + if ($ret != 0) + { + my $s = "shutdown at end-of-the-wal"; + + my @rs = `grep $s, $log`; + + if (@rs) + { + print "Database was shutdown at end-of-the-wal. Test Passed\n"; + isnt($ret,0, "check the PITR node for successful SHUTDOWN at incomplete recovery"); + exit 0; + + } + else + { + print "# pg_ctl failed; logfile:\n"; + print TestLib::slurp_file($self->logfile); + BAIL_OUT("pg_ctl failed"); + } + } + + $self->_update_pid; +} + =pod =item $node->stop(mode) @@ -797,6 +847,32 @@ standby_mode = on )); } +# Internal routine to enable archive recovery command on a standby node for PITR +sub enable_restoring_pitr +{ + my ($self, $root_node) = @_; + my $path = $root_node->archive_dir; + my $name = $self->name; + + print "### Enabling WAL restore for node \"$name\"\n"; + + # On Windows, the path specified in the restore command needs to use + # double back-slashes to work properly and to be able to detect properly + # the file targeted by the copy command, so the directory value used + # in this routine, using only one back-slash, need to be properly changed + # first. Paths also need to be double-quoted to prevent failures where + # the path contains spaces. + $path =~ s{\\}{\\\\}g if ($TestLib::windows_os); + my $copy_command = + $TestLib::windows_os + ? qq{copy "$path\\\\%f" "%p"} + : qq{cp "$path/%f" "%p"}; + $self->append_conf( + 'recovery.conf', qq( +restore_command = '$copy_command' +)); +} + # Internal routine to enable archiving sub enable_archiving { diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl index b7b0caa..e8e9d14 100644 --- a/src/test/recovery/t/003_recovery_targets.pl +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -3,7 +3,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 9; +use Test::More tests => 12; # Create and test a standby from given backup, with a certain # recovery target. @@ -27,9 +27,7 @@ sub test_recovery_standby qq($param_item )); } - $node_standby->start; - # Wait until standby has replayed enough data my $caughtup_query = "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_location()"; @@ -41,6 +39,44 @@ sub test_recovery_standby $node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int"); is($result, qq($num_rows), "check standby content for $test_name"); + print "Rows in PITR : $result\n"; + print "Rows in master: $num_rows\n"; + + # Stop standby node + $node_standby->teardown_node; +} + + +# Create a node from given backup to perform PITR to a certain recovery target. + +sub test_recovery_pitr +{ + my $test_name = shift; + my $node_name = shift; + my $node_master = shift; + my $recovery_params = shift; + my $num_rows = shift; + my $until_lsn = shift; + + my $node_standby = get_new_node($node_name); + $node_standby->init_from_backup($node_master, 'my_pitr', + has_restoring_pitr => 1); + + foreach my $param_item (@$recovery_params) + { + $node_standby->append_conf( + 'recovery.conf', + qq($param_item +)); + } + $node_standby->start_pitr; + # Check the content on pitr + my $result = + $node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int"); + isnt($result, qq($num_rows), "check the PITR node for successful $test_name"); + print "Rows in PITR : $result\n"; + print "Rows in master: $num_rows\n"; + # Stop standby node $node_standby->teardown_node; } @@ -144,3 +180,72 @@ test_recovery_standby('XID + time + name', "recovery_target_lsn = '$recovery_lsn'",); test_recovery_standby('XID + time + name + LSN', 'standby_9', $node_master, \@recovery_params, "5000", $lsn5); + +# Test Incomplete recovery +# Initialize master node + +$node_master = get_new_node('master1'); +$node_master->init(has_archiving => 1, allows_streaming => 1); + +# Start it +$node_master->start; + +# Create data before taking the backup, aimed at testing + +$node_master->safe_psql('postgres', + "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a"); + +# Take backup from which all operations will be run +$node_master->backup('my_pitr'); + +# Generate enough data and more WAL Archives for a recovery target reference. + +$node_master->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(1001,10000))"); +# Force archiving of WAL file +$node_master->safe_psql('postgres', "SELECT pg_switch_wal()"); + +$node_master->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(10001,20000))"); +# Force archiving of WAL file +$node_master->safe_psql('postgres', "SELECT pg_switch_wal()"); + +$node_master->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(20001,30000))"); + +$node_master->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(30001,40000))"); + +$node_master->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(40001,60000))"); + +# current wal position in the master node +$ret = $node_master->safe_psql('postgres', + "SELECT pg_current_wal_location(), txid_current();"); +my ($lsn6, $recovery_xid) = split /\|/, $ret; + +# Test the server promotion when the recovery fails to reach the recovery target. +my $recovery_target_incomplete='promote'; + +@recovery_params = ( + "recovery_target_xid = '$recovery_xid'", + "recovery_target_incomplete = '$recovery_target_incomplete'"); +test_recovery_pitr('PROMOTION at incomplete recovery','pitr_1',$node_master, \@recovery_params, "60000", $lsn6); + +# Check if the server pauses when the recovery stops mid-way without reaching the recovery target. + +$recovery_target_incomplete='pause'; + +@recovery_params = ( + "recovery_target_xid = '$recovery_xid'", + "recovery_target_incomplete = '$recovery_target_incomplete'"); +test_recovery_pitr('PAUSE at incomplete recovery','pitr_2',$node_master, \@recovery_params, "60000", $lsn6); + +# Check if the server successfully shuts down when the recovery stops mid-way without reaching the recovery target. + +$recovery_target_incomplete='shutdown'; + +@recovery_params = ( + "recovery_target_xid = '$recovery_xid'", + "recovery_target_incomplete = '$recovery_target_incomplete'"); +test_recovery_pitr('SHUTDOWN at incomplete recovery','pitr_3',$node_master, \@recovery_params, "60000", $lsn6);