diff --git a/src/backend/access/undo/undoinsert.c b/src/backend/access/undo/undoinsert.c index 68e6adbfc6..14b9f3c59c 100644 --- a/src/backend/access/undo/undoinsert.c +++ b/src/backend/access/undo/undoinsert.c @@ -20,32 +20,39 @@ * this is entirely maintained and used by undo record layer. See * undorecord.h for detailed information about undo record header. * - * Handling multi log: + * Multiple logs: * - * It is possible that the undo record of a transaction can be spread across - * multiple undo log. And, we need some special handling while inserting the - * undo for discard and rollback to work sanely. + * It is possible that the undo records for a transaction spans across + * multiple undo logs. We need some special handling while inserting them to + * ensure that discard and rollbacks can work sanely. * - * If the undorecord goes to next log then we insert a transaction header for - * the first record in the new log and update the transaction header with this - * new log's location. This will allow us to connect transactions across logs - * when the same transaction span across log (for this we keep track of the - * previous logno in undo log meta) which is required to find the latest undo - * record pointer of the aborted transaction for executing the undo actions - * before discard. If the next log get processed first in that case we - * don't need to trace back the actual start pointer of the transaction, - * in such case we can only execute the undo actions from the current log - * because the undo pointer in the slot will be rewound and that will be enough - * to avoid executing same actions. However, there is possibility that after - * executing the undo actions the undo pointer got discarded, now in later - * stage while processing the previous log it might try to fetch the undo - * record in the discarded log while chasing the transaction header chain. - * To avoid this situation we first check if the next_urec of the transaction - * is already discarded then no need to access that and start executing from + * When the undorecord for a transaction gets inserted in the next log then we + * insert a transaction header for the first record in the new log and update + * the transaction header with this new logs location. We will also keep + * a back pointer to the last undo record of previous log in the first record + * of new log, so that we can traverse the previous record during rollback. + * Incase, this is not the first record in new log (aka new log already + * contains some other transactions data), we also update that transactions + * next start header with this new undo records location. This will allow us + * to connect transaction's undo records across logs when the same transaction + * span across log. + * + * There is some difference in the way the rollbacks work when the undo for + * same transaction spans across multiple logs depending on which log is + * processed first by the discard worker. If it processes the first log which + * contains the transactions first record, then it can get the last record + * of that transaction even if it is in different log and then processes all + * the undo records from last to first. OTOH, if the next log get processed + * first, we don't need to trace back the actual start pointer of the + * transaction, rather we only execute the undo actions from the current log + * and avoid re-executing them next time. There is a possibility that after + * executing the undo actions, the undo got discarded, now in later stage while + * processing the previous log, it might try to fetch the undo record in the + * discarded log while chasing the transaction header chain which can cause + * trouble. We avoid this situation by first checking if the next_urec of + * the transaction is already discarded and if so, we start executing from * the last undo record in the current log. * - * We only connect to next log if the same transaction spread to next log - * otherwise don't. *------------------------------------------------------------------------- */ @@ -81,12 +88,12 @@ #define MAX_PREPARED_UNDO 2 /* - * This defines the max number of previous xact info we need to update. + * This defines the max number of previous xact infos we need to update. * Usually it's 1 for updating next link of previous transaction's header - * if we are starting a new transaction. But, in some cases where the same - * transaction is spilled to the next log that time we update our own - * transaction's header in previous undo log as well as the header of the - * previous transaction in the new log. + * if we are starting a new transaction. But, in some cases where the same + * transaction is spilled to the next log, we update our own transaction's + * header in previous undo log as well as the header of the previous + * transaction in the new log. */ #define MAX_XACT_UNDO_INFO 2 @@ -529,9 +536,10 @@ resize: if (InRecovery) { /* - * During recovery we can directly identify by checking the prevlogurp - * from the MyUndoLogState which is stored in it by WAL and we - * immediately reset it. + * During recovery we can identify the log switch by checking the + * prevlogurp from the MyUndoLogState. The WAL replay action for log + * switch would have set the value and we need to clear it after + * retrieving the latest value. */ prevlogurp = UndoLogStateGetAndClearPrevLogXactUrp(); urecptr = UndoLogAllocateInRecovery(txid, size, upersistence); @@ -544,8 +552,9 @@ resize: else { /* - * Just check the current log which we are attached to, and if this - * got switched after the allocation then the undo log got switched. + * Check whether the current log is switched after allocation. We can + * determine that by simply checking to which log we are attached + * before and after allocation. */ prevlogno = UndoLogAmAttachedTo(upersistence); urecptr = UndoLogAllocate(size, upersistence); @@ -608,7 +617,7 @@ resize: UndoLogAdvance(urecptr, size, upersistence); /* - * WAL log, for log switch. This is required to identify the log switch + * Write WAL for log switch. This is required to identify the log switch * during recovery. */ if (!InRecovery && log_switched && upersistence == UNDO_PERMANENT) @@ -883,13 +892,13 @@ InsertPreparedUndo(void) SetCurrentUndoLocation(urp); } - /* Update previous transaction header. */ + /* Update previously prepared transaction headers. */ if (xact_urec_info_idx > 0) { - int i = 0; + int i = 0; for (i = 0; i < xact_urec_info_idx; i++) - UndoRecordUpdateTransInfo(i); + UndoRecordUpdateTransInfo(i); } } @@ -1106,10 +1115,8 @@ UndoFetchRecord(UndoRecPtr urp, BlockNumber blkno, OffsetNumber offset, /* * Return the previous undo record pointer. * - * If prevurp is valid undo record pointer then it will directly - * return that assuming the caller has detected the undo log got - * switched during the transaction and prevurp is a valid previous - * undo record pointer of the transaction in the previous undo log. + * A valid value of prevurp indicates that the previous undo record + * pointer is in some other log and caller can directly use that. * Otherwise this will calculate the previous undo record pointer * by using current urp and the prevlen. */ diff --git a/src/include/access/undorecord.h b/src/include/access/undorecord.h index 9f090558f9..0b29334652 100644 --- a/src/include/access/undorecord.h +++ b/src/include/access/undorecord.h @@ -117,10 +117,10 @@ typedef struct UndoRecordTransaction Oid urec_dbid; /* database id */ /* - * Transaction previous undo record pointer when transaction split across - * undo log. The first undo record in the new log will stores the - * previous undo record pointer in the previous log as we can not - * calculate that directly using prevlen during rollback. + * Transaction's previous undo record pointer when a transaction spans + * across undo logs. The first undo record in the new log stores the + * previous undo record pointer in the previous log as we can't calculate + * that directly using prevlen during rollback. */ uint64 urec_prevurp; uint64 urec_next; /* urec pointer of the next transaction */ @@ -175,7 +175,8 @@ typedef struct UnpackedUndoRecord OffsetNumber uur_offset; /* offset number */ Buffer uur_buffer; /* buffer in which undo record data points */ uint32 uur_xidepoch; /* epoch of the inserting transaction. */ - uint64 uur_prevurp; + uint64 uur_prevurp; /* urec pointer to the previous record in + * the different log */ uint64 uur_next; /* urec pointer of the next transaction */ Oid uur_dbid; /* database id */