From b1fccb275bd3619ead77ab93e0292ee9068627c9 Mon Sep 17 00:00:00 2001 From: xueyi Date: Mon, 10 Jul 2023 05:50:09 +0000 Subject: [PATCH 3/3] Check invalid pages at the end of recovery. Primary doesn't check invalid page during the crash recovery, but standby does at the end of crash recovery. And a abort transaction across checkpoint could cause invalid pages during crash recovery. Check invalid page at the end of recovery, so the primary can find any unexpected invalid pages. Flush the abort WAL record before truncating any files, so the invalid pages can be dropped by the abort WAL record. --- src/backend/access/transam/xact.c | 12 +++++++++++- src/backend/access/transam/xlogrecovery.c | 6 ++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 8daaa535ed..3c1553570f 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -1784,8 +1784,18 @@ RecordTransactionAbort(bool isSubXact) * find that large aborts leave us with a long backlog for when commits * occur after the abort, increasing our window of data loss should * problems occur at that point. + * + * This abort should be flushed before deleting any self-created + * relations. Otherwise, it's possible to miss flushing this abort + * transaction's WAL after deleting self-created relations. Then, it + * would leave some forgoten relation files behind forever during crash + * recovery. If this abort transaction is across the last checkpoint, + * it may also cause some invalid pages when startup process trys to + * read blocks from those truncated relation files. */ - if (!isSubXact) + if (nrels > 0) + XLogFlush(XactLastRecEnd); + else if (!isSubXact) XLogSetAsyncXactLSN(XactLastRecEnd); /* diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index becc2bda62..3779c07cb2 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -1795,6 +1795,12 @@ PerformWalRecovery(void) RmgrCleanup(); + /* + * Check to see if the XLOG sequence contained any unresolved + * references to uninitialized pages at the end of recovery. + */ + XLogCheckInvalidPages(); + ereport(LOG, (errmsg("redo done at %X/%X system usage: %s", LSN_FORMAT_ARGS(xlogreader->ReadRecPtr), -- 2.19.1.6.gb485710b