From 8c61b892587c417f092869b87268beb522e94305 Mon Sep 17 00:00:00 2001 From: Jasper Smit Date: Fri, 12 Dec 2025 13:29:45 +0100 Subject: [PATCH 1/2] Add repro for heap locking visibility bug as a tap --- src/backend/access/heap/heapam.c | 2 + src/test/modules/injection_points/Makefile | 3 +- .../expected/heap_lock_update.out | 33 +++++++++ src/test/modules/injection_points/meson.build | 1 + .../specs/heap_lock_update.spec | 72 +++++++++++++++++++ 5 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 src/test/modules/injection_points/expected/heap_lock_update.out create mode 100644 src/test/modules/injection_points/specs/heap_lock_update.spec diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 225f9829f22..a69df8bd431 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -6069,6 +6069,8 @@ static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple, const ItemPointerData *ctid, TransactionId xid, LockTupleMode mode) { + INJECTION_POINT("heap_lock_updated_tuple", NULL); + /* * If the tuple has not been updated, or has moved into another partition * (effectively a delete) stop here. diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile index c85034eb8cc..a9851b4a097 100644 --- a/src/test/modules/injection_points/Makefile +++ b/src/test/modules/injection_points/Makefile @@ -19,7 +19,8 @@ ISOLATION = basic \ index-concurrently-upsert-predicate \ reindex-concurrently-upsert \ reindex-concurrently-upsert-on-constraint \ - reindex-concurrently-upsert-partitioned + reindex-concurrently-upsert-partitioned \ + heap_lock_update # The injection points are cluster-wide, so disable installcheck NO_INSTALLCHECK = 1 diff --git a/src/test/modules/injection_points/expected/heap_lock_update.out b/src/test/modules/injection_points/expected/heap_lock_update.out new file mode 100644 index 00000000000..b6308dcd316 --- /dev/null +++ b/src/test/modules/injection_points/expected/heap_lock_update.out @@ -0,0 +1,33 @@ +Parsed test spec with 2 sessions + +starting permutation: s1begin s1update s2lock s1abort vacuum reinsert wake +step s1begin: begin; +step s1update: update t set id = 1000 where id = 1; +step s2lock: select * from t where id = 1 for update; +step s1abort: abort; +step vacuum: VACUUM (TRUNCATE off); +step reinsert: + insert into t values (453) returning ctid; -- Should be (2,1) + update t set id = 454 where id = 453 returning ctid; + +ctid +----- +(2,1) +(1 row) + +ctid +----- +(2,2) +(1 row) + +step wake: + SELECT FROM injection_points_detach('heap_lock_updated_tuple'); + SELECT FROM injection_points_wakeup('heap_lock_updated_tuple'); + +step s2lock: <... completed> +id +-- + 1 +(1 row) + +step wake: <... completed> diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build index 8d6f662040d..e92e7348a80 100644 --- a/src/test/modules/injection_points/meson.build +++ b/src/test/modules/injection_points/meson.build @@ -51,6 +51,7 @@ tests += { 'reindex-concurrently-upsert', 'reindex-concurrently-upsert-on-constraint', 'reindex-concurrently-upsert-partitioned', + 'heap_lock_update', ], 'runningcheck': false, # see syscache-update-pruned # Some tests wait for all snapshots, so avoid parallel execution diff --git a/src/test/modules/injection_points/specs/heap_lock_update.spec b/src/test/modules/injection_points/specs/heap_lock_update.spec new file mode 100644 index 00000000000..f85b11532e9 --- /dev/null +++ b/src/test/modules/injection_points/specs/heap_lock_update.spec @@ -0,0 +1,72 @@ +# XXX +# Test race conditions involving: +# - s1: heap_update($FROM_SYSCACHE), without a snapshot or pin +# - s2: ALTER TABLE making $FROM_SYSCACHE a dead tuple +# - s3: "VACUUM pg_class" making $FROM_SYSCACHE become LP_UNUSED + +# This is a derivative work of inplace.spec, which exercises the corresponding +# race condition for inplace updates. + +# Despite local injection points, this is incompatible with runningcheck. +# First, removable_cutoff() could move backward, per its header comment. +# Second, other activity could trigger sinval queue overflow, negating our +# efforts to delay inval. Third, this deadlock emerges: +# +# - step at2 waits at an injection point, with interrupts held +# - an unrelated backend waits for at2 to do PROCSIGNAL_BARRIER_SMGRRELEASE +# - step waitprunable4 waits for the unrelated backend to release its xmin + +# The alternative expected output is for -DCATCACHE_FORCE_RELEASE, a setting +# that thwarts testing the race conditions this spec seeks. + + +# XXX Need s2 to make a non-HOT update. Otherwise, "VACUUM pg_class" would leave +# an LP_REDIRECT that persists. To get non-HOT, make rels so the pg_class row +# for vactest.orig50 is on a filled page (assuming BLCKSZ=8192). Just to save +# on filesystem syscalls, use relkind=c for every other rel. +setup +{ + CREATE EXTENSION injection_points; + + create table t (id int); + insert into t (select generate_series(1, 452)); +} +teardown +{ + drop table t; + DROP EXTENSION injection_points; +} + +# Wait during GRANT. Disable debug_discard_caches, since we're here to +# exercise an outcome that happens under permissible cache staleness. +session s1 +step s1begin { begin; } +step s1update { update t set id = 1000 where id = 1; } +step s1abort { abort; } +step vacuum { VACUUM (TRUNCATE off); } +step reinsert { + insert into t values (453) returning ctid; -- Should be (2,1) + update t set id = 454 where id = 453 returning ctid; +} + +step wake { + SELECT FROM injection_points_detach('heap_lock_updated_tuple'); + SELECT FROM injection_points_wakeup('heap_lock_updated_tuple'); +} + +session s2 +setup { + SET debug_discard_caches = 0; + SELECT FROM injection_points_set_local(); + SELECT FROM injection_points_attach('heap_lock_updated_tuple', 'wait'); +} +step s2lock { select * from t where id = 1 for update; } + +permutation + s1begin + s1update + s2lock + s1abort + vacuum + reinsert + wake(s2lock) -- 2.39.5