From a67bb9034800fcc2c7790a1547a2ddd8d4ec18ae Mon Sep 17 00:00:00 2001 From: "suyu.cmj" Date: Mon, 17 Jul 2023 05:52:59 +0000 Subject: [PATCH] Fix the bug of a 2PC transaction maybe recovered twice During recovery, a two-phase transaction should be restored either from the disk file or from the WAL. However, a two-phase transaction maybe restored from both way if we crashed after writing a 2PC file to disk, but before the redo LSN has been updated to pg_control during doing a checkpoint. We check if the 2PC file of the transaction being recovery is already in pg_twophase/ when adding an entry from a WAL record, if a consistent point has not been reached by recovery and we find a file on disk, then do nothing because we know restoreTwoPhaseData() has done that at the beginning of recovery. If a consistent point has been reached in recovery and we find a file on disk while replaying a WAL record for the same 2PC file, then fail. If there is no file in pg_twophase/ for the record replayed, then add it to the array. --- src/backend/access/transam/twophase.c | 47 +++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 068e59bec0..0be105bbbc 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -86,6 +86,7 @@ #include "access/xlog.h" #include "access/xloginsert.h" #include "access/xlogreader.h" +#include "access/xlogrecovery.h" #include "access/xlogutils.h" #include "catalog/pg_type.h" #include "catalog/storage.h" @@ -2477,6 +2478,52 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, * that it got added in the redo phase */ + /* + * During recovery, the two-phase data can be restored in two ways: + * 1) restored from disk file when its xid < checkPoint.nextxid; + * 2) restored from the WAL record when its prepare_start_lsn > checkPoint.redo. + * A two-phase transaction can be restored from either disk file or WAL, + * but can't both. However, a 2PC may satisfy above two conditions if + * we crashed after writing a 2PC file to disk, but before the redo LSN + * has been updated to pg_control during doing a checkpoint. + * + * We check if the 2PC file of the transaction being recovery is already + * in pg_twophase/ when adding an entry from a WAL record, if a consistent + * point has not been reached by recovery and we find a file on disk, then + * do nothing because we know restoreTwoPhaseData() has done that at the + * beginning of recovery. If a consistent point has been reached in recovery + * and we find a file on disk while replaying a WAL record for the same 2PC + * file, then fail. If there is no file in pg_twophase/ for the record + * replayed, then add it to the array. + */ + if (!XLogRecPtrIsInvalid(start_lsn)) + { + char path[MAXPGPATH]; + struct stat stat_buf; +#ifdef USE_ASSERT_CHECKING + int i; +#endif + + TwoPhaseFilePath(path, hdr->xid); + + if (stat(path, &stat_buf) == 0) + { + Assert(S_ISREG(stat_buf.st_mode)); + ereport((reachedConsistency ? FATAL : WARNING), + (errmsg("could not recover two-phase state file for transaction %u", hdr->xid), + errdetail("Two-phase state file has been found in WAL record %X/%X," + " but this transaction has already been restored from disk.", + LSN_FORMAT_ARGS(start_lsn)))); + return; + } + +#ifdef USE_ASSERT_CHECKING + /* cross-check for duplicates in array */ + for (i = 0; i < TwoPhaseState->numPrepXacts; i++) + Assert(TwoPhaseState->prepXacts[i]->xid != hdr->xid); +#endif + } + /* Get a free gxact from the freelist */ if (TwoPhaseState->freeGXacts == NULL) ereport(ERROR, -- 2.19.1.6.gb485710b