From a6ded0068211e0a304eb803763f632665831a419 Mon Sep 17 00:00:00 2001 From: Hayato Kuroda Date: Wed, 26 Mar 2025 14:19:50 +0900 Subject: [PATCH v3] Stabilize 035_standby_logical_decoding.pl by using the injection_points. This test tries to invalidate slots on standby server, by running VACUUM on primary and discarding needed tuples for slots. The problem is that xl_running_xacts records are sotimetimes generated while testing, it advances the catalog_xmin so that the invalidation might not happen in some cases. The fix is to skip generating the record when the instance attached to a new injection point. This failure can happen since logical decoding is allowed on the standby server. But the interface of injection_points we used exists only on master, so we do not backpatch. --- src/backend/storage/ipc/standby.c | 16 +++++++ .../t/035_standby_logical_decoding.pl | 45 +++++++++++++------ 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 5acb4508f85..0e621e9996a 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -31,6 +31,7 @@ #include "storage/sinvaladt.h" #include "storage/standby.h" #include "utils/hsearch.h" +#include "utils/injection_point.h" #include "utils/ps_status.h" #include "utils/timeout.h" #include "utils/timestamp.h" @@ -1287,6 +1288,21 @@ LogStandbySnapshot(void) Assert(XLogStandbyInfoActive()); + /* For testing slot invalidation */ +#ifdef USE_INJECTION_POINTS + if (IS_INJECTION_POINT_ATTACHED("log-running-xacts")) + { + /* + * RUNNING_XACTS could move slots's xmin forward and cause random + * failures in some tests. Skip generating to avoid it. + * + * XXX What value should we return here? Originally this returns the + * inserted location of RUNNING_XACT record. Based on that, here + * returns the latest insert location for now. + */ + return GetInsertRecPtr(); + } +#endif /* * Get details of any AccessExclusiveLocks being held at the moment. */ diff --git a/src/test/recovery/t/035_standby_logical_decoding.pl b/src/test/recovery/t/035_standby_logical_decoding.pl index c31cab06f1c..93b11a4de1e 100644 --- a/src/test/recovery/t/035_standby_logical_decoding.pl +++ b/src/test/recovery/t/035_standby_logical_decoding.pl @@ -10,6 +10,11 @@ use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; use Test::More; +if ($ENV{enable_injection_points} ne 'yes') +{ + plan skip_all => 'Injection points not supported by this build'; +} + my ($stdout, $stderr, $cascading_stdout, $cascading_stderr, $handle); my $node_primary = PostgreSQL::Test::Cluster->new('primary'); @@ -241,16 +246,19 @@ sub check_for_invalidation # VACUUM command, $sql the sql to launch before triggering the vacuum and # $to_vac the relation to vacuum. # -# Note that pg_current_snapshot() is used to get the horizon. It does -# not generate a Transaction/COMMIT WAL record, decreasing the risk of -# seeing a xl_running_xacts that would advance an active replication slot's -# catalog_xmin. Advancing the active replication slot's catalog_xmin -# would break some tests that expect the active slot to conflict with -# the catalog xmin horizon. +# Note that injection_point is used to avoid the seeing the xl_running_xacts +# that would advance an active replication slot's catalog_xmin. Advancing +# the active replication slot's catalog_xmin would break some tests that +# expect the active slot to conflict with the catalog xmin horizon. sub wait_until_vacuum_can_remove { my ($vac_option, $sql, $to_vac) = @_; + # Note that from this point the checkpointer and bgwriter will skip writing + # xl_running_xacts record. + $node_primary->safe_psql('testdb', + "SELECT injection_points_attach('log-running-xacts', 'error');"); + # Get the current xid horizon, my $xid_horizon = $node_primary->safe_psql('testdb', qq[select pg_snapshot_xmin(pg_current_snapshot());]); @@ -268,6 +276,12 @@ sub wait_until_vacuum_can_remove $node_primary->safe_psql( 'testdb', qq[VACUUM $vac_option verbose $to_vac; INSERT INTO flush_wal DEFAULT VALUES;]); + + $node_primary->wait_for_replay_catchup($node_standby); + + # Resume generating the xl_running_xacts record + $node_primary->safe_psql('testdb', + "SELECT injection_points_detach('log-running-xacts');"); } ######################## @@ -285,6 +299,14 @@ autovacuum = off $node_primary->dump_info; $node_primary->start; +# Check if the extension injection_points is available, as it may be +# possible that this script is run with installcheck, where the module +# would not be installed by default. +if (!$node_primary->check_extension('injection_points')) +{ + plan skip_all => 'Extension injection_points not installed'; +} + $node_primary->psql('postgres', q[CREATE DATABASE testdb]); $node_primary->safe_psql('testdb', @@ -528,6 +550,9 @@ is($result, qq(10), 'check replicated inserts after subscription on standby'); $node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub"); $node_subscriber->stop; +# Create the injection_points extension +$node_primary->safe_psql('testdb', 'CREATE EXTENSION injection_points;'); + ################################################## # Recovery conflict: Invalidate conflicting slots, including in-use slots # Scenario 1: hot_standby_feedback off and vacuum FULL @@ -557,8 +582,6 @@ wait_until_vacuum_can_remove( 'full', 'CREATE TABLE conflict_test(x integer, y text); DROP TABLE conflict_test;', 'pg_class'); -$node_primary->wait_for_replay_catchup($node_standby); - # Check invalidation in the logfile and in pg_stat_database_conflicts check_for_invalidation('vacuum_full_', 1, 'with vacuum FULL on pg_class'); @@ -656,8 +679,6 @@ wait_until_vacuum_can_remove( '', 'CREATE TABLE conflict_test(x integer, y text); DROP TABLE conflict_test;', 'pg_class'); -$node_primary->wait_for_replay_catchup($node_standby); - # Check invalidation in the logfile and in pg_stat_database_conflicts check_for_invalidation('row_removal_', $logstart, 'with vacuum on pg_class'); @@ -690,8 +711,6 @@ wait_until_vacuum_can_remove( '', 'CREATE ROLE create_trash; DROP ROLE create_trash;', 'pg_authid'); -$node_primary->wait_for_replay_catchup($node_standby); - # Check invalidation in the logfile and in pg_stat_database_conflicts check_for_invalidation('shared_row_removal_', $logstart, 'with vacuum on pg_authid'); @@ -724,8 +743,6 @@ wait_until_vacuum_can_remove( INSERT INTO conflict_test(x,y) SELECT s, s::text FROM generate_series(1,4) s; UPDATE conflict_test set x=1, y=1;', 'conflict_test'); -$node_primary->wait_for_replay_catchup($node_standby); - # message should not be issued ok( !$node_standby->log_contains( "invalidating obsolete slot \"no_conflict_inactiveslot\"", $logstart), -- 2.43.5