diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index a6fda81feb6..b03287cac4d 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -99,6 +99,7 @@ extern slock_t *ShmemLock; #define LW_FLAG_HAS_WAITERS ((uint32) 1 << 30) #define LW_FLAG_RELEASE_OK ((uint32) 1 << 29) #define LW_FLAG_LOCKED ((uint32) 1 << 28) +#define LW_FLAG_FAIR ((uint32) 1 << 27) #define LW_VAL_EXCLUSIVE ((uint32) 1 << 24) #define LW_VAL_SHARED 1 @@ -137,6 +138,7 @@ typedef struct LWLockHandle { LWLock *lock; LWLockMode mode; + bool firstShared; } LWLockHandle; static int num_held_lwlocks = 0; @@ -736,7 +738,7 @@ GetLWLockIdentifier(uint32 classId, uint16 eventId) * Returns true if the lock isn't free and we need to wait. */ static bool -LWLockAttemptLock(LWLock *lock, LWLockMode mode) +LWLockAttemptLock(LWLock *lock, LWLockMode mode, bool wakeup, bool *firstShared) { uint32 old_state; @@ -760,13 +762,24 @@ LWLockAttemptLock(LWLock *lock, LWLockMode mode) { lock_free = (old_state & LW_LOCK_MASK) == 0; if (lock_free) + { desired_state += LW_VAL_EXCLUSIVE; + desired_state &= ~LW_FLAG_FAIR; + } } else { - lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0; + if (wakeup) + lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0; + else + lock_free = ((old_state & LW_VAL_EXCLUSIVE) == 0) && + ((old_state & (LW_FLAG_HAS_WAITERS | LW_FLAG_FAIR)) != + (LW_FLAG_HAS_WAITERS | LW_FLAG_FAIR)); if (lock_free) + { desired_state += LW_VAL_SHARED; + desired_state &= ~LW_FLAG_FAIR; + } } /* @@ -789,6 +802,14 @@ LWLockAttemptLock(LWLock *lock, LWLockMode mode) if (mode == LW_EXCLUSIVE) lock->owner = MyProc; #endif + if (mode == LW_SHARED) + { + if ((old_state & LW_SHARED_MASK) == 0) + *firstShared = true; + else + *firstShared = false; + } + return false; } else @@ -978,9 +999,11 @@ LWLockWakeup(LWLock *lock) * * NB: Mode can be LW_WAIT_UNTIL_FREE here! */ -static void +static bool LWLockQueueSelf(LWLock *lock, LWLockMode mode) { + bool first; + /* * If we don't have a PGPROC structure, there's no way to wait. This * should never occur, since MyProc should only be null during shared @@ -1002,9 +1025,15 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode) /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */ if (mode == LW_WAIT_UNTIL_FREE) + { + first = true; proclist_push_head(&lock->waiters, MyProc->pgprocno, lwWaitLink); + } else + { + first = proclist_is_empty(&lock->waiters); proclist_push_tail(&lock->waiters, MyProc->pgprocno, lwWaitLink); + } /* Can release the mutex now */ LWLockWaitListUnlock(lock); @@ -1013,6 +1042,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode) pg_atomic_fetch_add_u32(&lock->nwaiters, 1); #endif + return first; } /* @@ -1122,6 +1152,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) { PGPROC *proc = MyProc; bool result = true; + bool firstShared = false; int extraWaits = 0; #ifdef LWLOCK_STATS lwlock_stats *lwstats; @@ -1177,13 +1208,13 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) */ for (;;) { - bool mustwait; + bool mustwait, first; /* * Try to grab the lock the first time, we're not in the waitqueue * yet/anymore. */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, !result, &firstShared); if (!mustwait) { @@ -1203,10 +1234,10 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) */ /* add to the queue */ - LWLockQueueSelf(lock, mode); + first = LWLockQueueSelf(lock, mode); /* we're now guaranteed to be woken up if necessary */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, (!result) || first, &firstShared); /* ok, grabbed the lock the second time round, need to undo queueing */ if (!mustwait) @@ -1271,6 +1302,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) /* Add lock to list of locks held by this backend */ held_lwlocks[num_held_lwlocks].lock = lock; + held_lwlocks[num_held_lwlocks].firstShared = firstShared; held_lwlocks[num_held_lwlocks++].mode = mode; /* @@ -1292,7 +1324,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode) { - bool mustwait; + bool mustwait, firstShared = false; AssertArg(mode == LW_SHARED || mode == LW_EXCLUSIVE); @@ -1310,7 +1342,7 @@ LWLockConditionalAcquire(LWLock *lock, LWLockMode mode) HOLD_INTERRUPTS(); /* Check for the lock */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, false, &firstShared); if (mustwait) { @@ -1324,6 +1356,7 @@ LWLockConditionalAcquire(LWLock *lock, LWLockMode mode) { /* Add lock to list of locks held by this backend */ held_lwlocks[num_held_lwlocks].lock = lock; + held_lwlocks[num_held_lwlocks].firstShared = firstShared; held_lwlocks[num_held_lwlocks++].mode = mode; TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode); } @@ -1348,7 +1381,8 @@ bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) { PGPROC *proc = MyProc; - bool mustwait; + bool mustwait, + firstShared; int extraWaits = 0; #ifdef LWLOCK_STATS lwlock_stats *lwstats; @@ -1375,13 +1409,13 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) * NB: We're using nearly the same twice-in-a-row lock acquisition * protocol as LWLockAcquire(). Check its comments for details. */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, true, &firstShared); if (mustwait) { LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE); - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, true, &firstShared); if (mustwait) { @@ -1452,6 +1486,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded"); /* Add lock to list of locks held by this backend */ held_lwlocks[num_held_lwlocks].lock = lock; + held_lwlocks[num_held_lwlocks].firstShared = firstShared; held_lwlocks[num_held_lwlocks++].mode = mode; TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode); } @@ -1725,8 +1760,10 @@ void LWLockRelease(LWLock *lock) { LWLockMode mode; - uint32 oldstate; - bool check_waiters; + uint32 oldstate, + sub; + bool check_waiters, + firstShared; int i; /* @@ -1735,7 +1772,10 @@ LWLockRelease(LWLock *lock) */ for (i = num_held_lwlocks; --i >= 0;) if (lock == held_lwlocks[i].lock) + { + firstShared = held_lwlocks[i].firstShared; break; + } if (i < 0) elog(ERROR, "lock %s is not held", T_NAME(lock)); @@ -1753,14 +1793,23 @@ LWLockRelease(LWLock *lock) * others, even if we still have to wakeup other waiters. */ if (mode == LW_EXCLUSIVE) - oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE); + sub = LW_VAL_EXCLUSIVE; else - oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED); + sub = LW_VAL_SHARED; + + if (firstShared) + sub -= LW_FLAG_FAIR; + + oldstate = pg_atomic_fetch_sub_u32(&lock->state, sub); + + /* If we were first shared locker, LW_FLAG_FAIR shouldn't be set */ + Assert(!(oldstate & LW_FLAG_FAIR) || !firstShared); + + oldstate -= sub; /* nobody else can have that kind of lock */ Assert(!(oldstate & LW_VAL_EXCLUSIVE)); - /* * We're still waiting for backends to get scheduled, don't wake them up * again.