diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 6259070..a7b0403 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -38,7 +38,20 @@ int vacuum_defer_cleanup_age; int max_standby_archive_delay = 30 * 1000; int max_standby_streaming_delay = 30 * 1000; -static List *RecoveryLockList; +/* + * Number of buckets to split RecoveryLockTable into. + * This must be a power of two. + */ +#define RECOVERYLOCKTABLE_SIZE 1024 +#define RECOVERYLOCKTABLE_MASK (RECOVERYLOCKTABLE_SIZE - 1) + +/* + * RecoveryLockTable is a poor man's hash table that allows us to partition + * the stored locks. Which partition a lock is stored in is determined by the + * xid which the lock belongs to. Splitting into partitions in this way avoids + * having to look through all locks to find one specific to a given xid. + */ +static List **RecoveryLockTable; static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, ProcSignalReason reason); @@ -64,6 +77,10 @@ InitRecoveryTransactionEnvironment(void) { VirtualTransactionId vxid; + /* Setup the recovery lock table */ + RecoveryLockTable = (List **) + palloc0(RECOVERYLOCKTABLE_SIZE * sizeof(List *)); + /* * Initialize shared invalidation management for Startup process, being * careful to register ourselves as a sendOnly process so we don't need to @@ -109,6 +126,9 @@ ShutdownRecoveryTransactionEnvironment(void) /* Cleanup our VirtualTransaction */ VirtualXactLockTableCleanup(); + + /* Wipe out the RecoveryLockTable */ + pfree(RecoveryLockTable); } @@ -607,6 +627,8 @@ StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid) { xl_standby_lock *newlock; LOCKTAG locktag; + size_t pidx; + /* Already processed? */ if (!TransactionIdIsValid(xid) || @@ -624,7 +646,8 @@ StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid) newlock->xid = xid; newlock->dbOid = dbOid; newlock->relOid = relOid; - RecoveryLockList = lappend(RecoveryLockList, newlock); + pidx = xid & RECOVERYLOCKTABLE_MASK; + RecoveryLockTable[pidx] = lappend(RecoveryLockTable[pidx], newlock); SET_LOCKTAG_RELATION(locktag, newlock->dbOid, newlock->relOid); @@ -637,18 +660,26 @@ StandbyReleaseLocks(TransactionId xid) ListCell *cell, *prev, *next; + size_t pidx; + + if (!TransactionIdIsValid(xid)) + { + StandbyReleaseAllLocks(); + return; + } /* * Release all matching locks and remove them from list */ + pidx = xid & RECOVERYLOCKTABLE_MASK; prev = NULL; - for (cell = list_head(RecoveryLockList); cell; cell = next) + for (cell = list_head(RecoveryLockTable[pidx]); cell; cell = next) { xl_standby_lock *lock = (xl_standby_lock *) lfirst(cell); next = lnext(cell); - if (!TransactionIdIsValid(xid) || lock->xid == xid) + if (lock->xid == xid) { LOCKTAG locktag; @@ -658,10 +689,10 @@ StandbyReleaseLocks(TransactionId xid) SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); if (!LockRelease(&locktag, AccessExclusiveLock, true)) elog(LOG, - "RecoveryLockList contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u", + "RecoveryLockTable contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u", lock->xid, lock->dbOid, lock->relOid); - RecoveryLockList = list_delete_cell(RecoveryLockList, cell, prev); + RecoveryLockTable[pidx] = list_delete_cell(RecoveryLockTable[pidx], cell, prev); pfree(lock); } else @@ -671,7 +702,7 @@ StandbyReleaseLocks(TransactionId xid) /* * Release locks for a transaction tree, starting at xid down, from - * RecoveryLockList. + * RecoveryLockTable. * * Called during WAL replay of COMMIT/ROLLBACK when in hot standby mode, * to remove any AccessExclusiveLocks requested by a transaction. @@ -697,26 +728,31 @@ StandbyReleaseAllLocks(void) *prev, *next; LOCKTAG locktag; + size_t pidx; elog(trace_recovery(DEBUG2), "release all standby locks"); - prev = NULL; - for (cell = list_head(RecoveryLockList); cell; cell = next) + for (pidx = 0; pidx < RECOVERYLOCKTABLE_SIZE; pidx++) { - xl_standby_lock *lock = (xl_standby_lock *) lfirst(cell); + prev = NULL; + for (cell = list_head(RecoveryLockTable[pidx]); cell; cell = next) + { + xl_standby_lock *lock = (xl_standby_lock *) lfirst(cell); - next = lnext(cell); + next = lnext(cell); - elog(trace_recovery(DEBUG4), - "releasing recovery lock: xid %u db %u rel %u", - lock->xid, lock->dbOid, lock->relOid); - SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); - if (!LockRelease(&locktag, AccessExclusiveLock, true)) - elog(LOG, - "RecoveryLockList contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u", + elog(trace_recovery(DEBUG4), + "releasing recovery lock: xid %u db %u rel %u", lock->xid, lock->dbOid, lock->relOid); - RecoveryLockList = list_delete_cell(RecoveryLockList, cell, prev); - pfree(lock); + SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); + if (!LockRelease(&locktag, AccessExclusiveLock, true)) + elog(LOG, + "RecoveryLockTable contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u", + lock->xid, lock->dbOid, lock->relOid); + RecoveryLockTable[pidx] = + list_delete_cell(RecoveryLockTable[pidx], cell, prev); + pfree(lock); + } } } @@ -732,55 +768,59 @@ StandbyReleaseOldLocks(int nxids, TransactionId *xids) *prev, *next; LOCKTAG locktag; + int pidx; - prev = NULL; - for (cell = list_head(RecoveryLockList); cell; cell = next) + for (pidx = 0; pidx < RECOVERYLOCKTABLE_SIZE; pidx++) { - xl_standby_lock *lock = (xl_standby_lock *) lfirst(cell); - bool remove = false; + prev = NULL; + for (cell = list_head(RecoveryLockTable[pidx]); cell; cell = next) + { + xl_standby_lock *lock = (xl_standby_lock *) lfirst(cell); + bool remove = false; - next = lnext(cell); + next = lnext(cell); - Assert(TransactionIdIsValid(lock->xid)); + Assert(TransactionIdIsValid(lock->xid)); - if (StandbyTransactionIdIsPrepared(lock->xid)) - remove = false; - else - { - int i; - bool found = false; - - for (i = 0; i < nxids; i++) + if (StandbyTransactionIdIsPrepared(lock->xid)) + remove = false; + else { - if (lock->xid == xids[i]) + int i; + bool found = false; + + for (i = 0; i < nxids; i++) { - found = true; - break; + if (lock->xid == xids[i]) + { + found = true; + break; + } } - } - /* - * If its not a running transaction, remove it. - */ - if (!found) - remove = true; - } + /* + * If its not a running transaction, remove it. + */ + if (!found) + remove = true; + } - if (remove) - { - elog(trace_recovery(DEBUG4), - "releasing recovery lock: xid %u db %u rel %u", - lock->xid, lock->dbOid, lock->relOid); - SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); - if (!LockRelease(&locktag, AccessExclusiveLock, true)) - elog(LOG, - "RecoveryLockList contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u", + if (remove) + { + elog(trace_recovery(DEBUG4), + "releasing recovery lock: xid %u db %u rel %u", lock->xid, lock->dbOid, lock->relOid); - RecoveryLockList = list_delete_cell(RecoveryLockList, cell, prev); - pfree(lock); + SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); + if (!LockRelease(&locktag, AccessExclusiveLock, true)) + elog(LOG, + "RecoveryLockTable contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u", + lock->xid, lock->dbOid, lock->relOid); + RecoveryLockTable[pidx] = list_delete_cell(RecoveryLockTable[pidx], cell, prev); + pfree(lock); + } + else + prev = cell; } - else - prev = cell; } }