diff --git a/src/test/modules/test_checksums/t/006_concurrent_pgbench.pl b/src/test/modules/test_checksums/t/006_concurrent_pgbench.pl index b33ca6e0c26..5cee6d4a6b5 100644 --- a/src/test/modules/test_checksums/t/006_concurrent_pgbench.pl +++ b/src/test/modules/test_checksums/t/006_concurrent_pgbench.pl @@ -55,7 +55,7 @@ if ($ENV{enable_injection_points} ne 'yes') # whether to turn things off during testing. sub cointoss { - return int(rand(2) == 1); + return int(rand() < 0.5); } # Helper for injecting random sleeps here and there in the testrun. The sleep @@ -74,7 +74,7 @@ sub background_ro_pgbench my ($port, $stdin, $stdout, $stderr) = @_; my $pgbench_primary = IPC::Run::start( - [ 'pgbench', '-p', $port, '-S', '-T', '600', '-c', '10', 'postgres' ], + [ 'pgbench', '-n', '-p', $port, '-S', '-T', '600', '-c', '10', 'postgres' ], '<' => \$stdin, '>' => \$stdout, '2>' => \$stderr, @@ -224,6 +224,9 @@ background_rw_pgbench( $node_primary->port, $pgb_primary_stdin, $pgb_primary_stdout, $pgb_primary_stderr); +my $primary_shutdown_clean = 0; +my $standby_shutdown_clean = 0; + # Main test suite. This loop will start a pgbench run on the cluster and while # that's running flip the state of data checksums concurrently. It will then # randomly restart thec cluster (in fast or immediate) mode and then check for @@ -246,9 +249,11 @@ for (my $i = 0; $i < $TEST_ITERATIONS; $i++) $node_primary_loglocation = -s $node_primary->logfile; # If data checksums are enabled, take the opportunity to verify them - # while the cluster is offline + # while the cluster is offline (but only if stopped in a clean way, + # not after immediate shutdown) $node_primary->checksum_verify_offline() - unless $data_checksum_state eq 'off'; + unless $data_checksum_state eq 'off' or !$primary_shutdown_clean; + random_sleep(); $node_primary->start; # Start a pgbench in the background against the primary @@ -270,9 +275,11 @@ for (my $i = 0; $i < $TEST_ITERATIONS; $i++) $node_standby_1_loglocation = -s $node_standby_1->logfile; # If data checksums are enabled, take the opportunity to verify them - # while the cluster is offline + # while the cluster is offline (but only if stopped in a clean way, + # not after immediate shutdown) $node_standby_1->checksum_verify_offline() - unless $data_checksum_state eq 'off'; + unless $data_checksum_state eq 'off' or !$standby_shutdown_clean; + random_sleep(); $node_standby_1->start; # Start a select-only pgbench in the background on the standby @@ -287,13 +294,41 @@ for (my $i = 0; $i < $TEST_ITERATIONS; $i++) my $result = $node_primary->safe_psql('postgres', "SELECT count(*) FROM t WHERE a > 1"); is($result, '100000', 'ensure data pages can be read back on primary'); + random_sleep(); + $node_primary->wait_for_catchup($node_standby_1, 'write'); - # Potentially powercycle the cluster - $node_primary->stop($stop_modes[ int(rand(100)) ]) if cointoss(); random_sleep(); - $node_standby_1->stop($stop_modes[ int(rand(100)) ]) if cointoss(); + + # Potentially powercycle the cluster (the nodes independently) + # XXX should maybe try stopping nodes in the opposite order too? + if (cointoss()) + { + my $mode = $stop_modes[ int(rand(100)) ]; + $node_primary->stop($mode); + $primary_shutdown_clean = ($mode eq 'fast'); + } + + random_sleep(); + + if (cointoss()) + { + my $mode = $stop_modes[ int(rand(100)) ]; + $node_standby_1->stop($mode); + $standby_shutdown_clean = ($mode eq 'fast'); + } +} + +# make sure the nodes are running +if (!$node_primary->is_alive) +{ + $node_primary->start; +} + +if (!$node_standby_1->is_alive) +{ + $node_standby_1->start; } # Testrun is over, ensure that data reads back as expected and perform a final