From 018451f369a1931b1afe0ff5140058232ea52096 Mon Sep 17 00:00:00 2001 From: Hou Zhijie Date: Tue, 2 Apr 2024 08:41:02 +0800 Subject: [PATCH v6 2/2] test the data loss case --- .../t/040_standby_failover_slots_sync.pl | 76 ++++++++++++++++++- 1 file changed, 73 insertions(+), 3 deletions(-) diff --git a/src/test/recovery/t/040_standby_failover_slots_sync.pl b/src/test/recovery/t/040_standby_failover_slots_sync.pl index 0818c3c068..1566de668c 100644 --- a/src/test/recovery/t/040_standby_failover_slots_sync.pl +++ b/src/test/recovery/t/040_standby_failover_slots_sync.pl @@ -365,6 +365,67 @@ ok( $stderr =~ $cascading_standby->stop; +################################################## +# Create a failover slot and advance the restart_lsn to a position where a +# running transaction exists. This setup is for testing the synced slot's +# capability to reach the consistent point from the restart_lsn after +# promotion, guaranteeing no data loss from changes before reaching a +# consistent point. +################################################## + +$primary->safe_psql('postgres', + "SELECT pg_create_logical_replication_slot('snap_test_slot', 'test_decoding', false, false, true);" +); + +$standby1->safe_psql('postgres', "SELECT pg_sync_replication_slots();"); + +# Two xl_running_xacts logs are generated here. When decoding the first log, it +# only serializes the snapshot, without advancing the restart_lsn to the latest +# position. This is because if a transaction is running, the restart_lsn can +# only move to a position before that transaction. Hence, the second +# xl_running_xacts log is needed, decoding the second log allows the +# restart_lsn to advance to the last serialized snapshot's position (the first +# log). +$primary->safe_psql( + 'postgres', qq( + BEGIN; + SELECT txid_current(); + SELECT pg_log_standby_snapshot(); + COMMIT; + BEGIN; + SELECT txid_current(); + SELECT pg_log_standby_snapshot(); + COMMIT; +)); + +$primary->wait_for_replay_catchup($standby1); + +# Advance the restart_lsn to the position of the first xl_running_xacts log +# generated above. Note that there might be concurrent xl_running_xacts logs +# written by the bgwriter, which could cause the position to be advanced to a +# unexpected point, but this is a rare scenario and acceptable. +$primary->safe_psql('postgres', + "SELECT pg_replication_slot_advance('snap_test_slot', pg_current_wal_lsn());" +); + +# Log a message that will be consumed on the standby after promotion using the +# synced slot. +$primary->safe_psql('postgres', + "SELECT pg_logical_emit_message(false, 'test', 'test');" +); + +# Get the confirmed_flush_lsn for the logical slot snap_test_slot on the primary +my $confirmed_flush_lsn = $primary->safe_psql('postgres', + "SELECT confirmed_flush_lsn from pg_replication_slots WHERE slot_name = 'snap_test_slot';"); + +$standby1->safe_psql('postgres', "SELECT pg_sync_replication_slots();"); + +# Confirm that confirmed_flush_lsn of snap_test_slot slot are synced to the standby +ok( $standby1->poll_query_until( + 'postgres', + "SELECT '$confirmed_flush_lsn' = confirmed_flush_lsn from pg_replication_slots WHERE slot_name = 'snap_test_slot' AND synced AND NOT temporary;"), + 'confirmed_flush_lsn of slot snap_test_slot synced to standby'); + ################################################## # Test to confirm that the slot synchronization is protected from malicious # users. @@ -739,8 +800,9 @@ $primary->reload; ################################################## # Promote the standby1 to primary. Confirm that: -# a) the slot 'lsub1_slot' is retained on the new primary +# a) the slot 'lsub1_slot' and 'snap_test_slot' are retained on the new primary # b) logical replication for regress_mysub1 is resumed successfully after failover +# c) changes can be consumed from the synced slot 'snap_test_slot' ################################################## $standby1->start; $primary->wait_for_replay_catchup($standby1); @@ -754,8 +816,9 @@ $subscriber1->safe_psql('postgres', # Confirm the synced slot 'lsub1_slot' is retained on the new primary is($standby1->safe_psql('postgres', - q{SELECT slot_name FROM pg_replication_slots WHERE slot_name = 'lsub1_slot' AND synced AND NOT temporary;}), - 'lsub1_slot', + q{SELECT slot_name FROM pg_replication_slots WHERE slot_name IN ('lsub1_slot', 'snap_test_slot') AND synced AND NOT temporary;}), + 'lsub1_slot +snap_test_slot', 'synced slot retained on the new primary'); # Insert data on the new primary @@ -768,4 +831,11 @@ is( $subscriber1->safe_psql('postgres', q{SELECT count(*) FROM tab_int;}), "20", 'data replicated from the new primary'); +# Consume the data from the snap_test_slot +$result = $standby1->safe_psql('postgres', + "SELECT count(*) FROM pg_logical_slot_get_changes('snap_test_slot', NULL, NULL) WHERE data ~ 'message*';" +); + +is($result, '1', "data can be consumed using snap_test_slot"); + done_testing(); -- 2.30.0.windows.2