diff --git a/src/backend/port/atomics.c b/src/backend/port/atomics.c new file mode 100644 index 86b5308..533f252 *** a/src/backend/port/atomics.c --- b/src/backend/port/atomics.c *************** pg_atomic_fetch_add_u32_impl(volatile pg *** 158,160 **** --- 158,243 ---- } #endif /* PG_HAVE_ATOMIC_U32_SIMULATION */ + + #if (defined(__GNUC__) || defined(__INTEL_COMPILER)) && (defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)) + + /* + * Optimized implementation for Power processors. Atomic operations on Power + * processors are implemented using optimistic locking. 'lwarx' instruction + * 'reserves index', but that reservation could be broken on 'stwcx.' and then + * we have to retry. Thus, each CAS operation is a loop. But loop of CAS + * operation is two level nested loop. Experiments on multicore Power machines + * shows that we can have huge benefit from having this operation done using + * single loop in assembly. + */ + uint32 + pg_atomic_fetch_mask_add_u32(volatile pg_atomic_uint32 *ptr, + uint32 mask, uint32 increment) + { + uint32 result; + + __asm__ __volatile__( + "0: lwarx %0,0,%4 \n" /* read *ptr and reserve index */ + " and 3,%0,%2 \n" /* calculate '*ptr & mask" */ + " cmpwi 3,0 \n" /* compare '*ptr & mark' vs 0 */ + " bne- 1f \n" /* exit on '*ptr & mark != 0' */ + " add 3,%0,%3 \n" /* calculate '*ptr + increment' */ + " stwcx. 3,0,%4 \n" /* try to store '*ptr + increment' into *ptr */ + " bne- 0b \n" /* retry if index reservation is broken */ + #ifdef USE_PPC_LWSYNC + "1: lwsync \n" + #else + "1: isync \n" + #endif + : "=&r"(result), "+m"(*ptr) + : "r"(mask), "r"(increment), "r"(ptr) + : "memory", "cc", "r3"); + return result; + } + + #else + + /* + * Generic implementation via loop of compare & exchange. + */ + uint32 + pg_atomic_fetch_mask_add_u32(volatile pg_atomic_uint32 *ptr, + uint32 mask_, uint32 add_) + { + uint32 old_value; + + /* + * Read once outside the loop, later iterations will get the newer value + * via compare & exchange. + */ + old_value = pg_atomic_read_u32(ptr); + + /* loop until we've determined whether we could make an increment or not */ + while (true) + { + uint32 desired_value; + bool free; + + desired_value = old_value; + free = (old_value & mask_) == 0; + if (free) + desired_value += add_; + + /* + * Attempt to swap in the value we are expecting. If we didn't see + * masked bits to be clear, that's just the old value. If we saw them + * as clear, we'll attempt to make an increment. The reason that we + * always swap in the value is that this doubles as a memory barrier. + * We could try to be smarter and only swap in values if we saw the + * maked bits as clear, but benchmark haven't shown it as beneficial + * so far. + * + * Retry if the value changed since we last looked at it. + */ + if (pg_atomic_compare_exchange_u32(ptr, &old_value, desired_value)) + return old_value; + } + pg_unreachable(); + } + + #endif diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c new file mode 100644 index c196bb8..ec3bbc3 *** a/src/backend/storage/lmgr/lwlock.c --- b/src/backend/storage/lmgr/lwlock.c *************** GetLWLockIdentifier(uint32 classId, uint *** 727,790 **** static bool LWLockAttemptLock(LWLock *lock, LWLockMode mode) { ! uint32 old_state; AssertArg(mode == LW_EXCLUSIVE || mode == LW_SHARED); /* ! * Read once outside the loop, later iterations will get the newer value ! * via compare & exchange. */ ! old_state = pg_atomic_read_u32(&lock->state); ! /* loop until we've determined whether we could acquire the lock or not */ ! while (true) { ! uint32 desired_state; ! bool lock_free; ! ! desired_state = old_state; ! ! if (mode == LW_EXCLUSIVE) ! { ! lock_free = (old_state & LW_LOCK_MASK) == 0; ! if (lock_free) ! desired_state += LW_VAL_EXCLUSIVE; ! } ! else ! { ! lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0; ! if (lock_free) ! desired_state += LW_VAL_SHARED; ! } ! ! /* ! * Attempt to swap in the state we are expecting. If we didn't see ! * lock to be free, that's just the old value. If we saw it as free, ! * we'll attempt to mark it acquired. The reason that we always swap ! * in the value is that this doubles as a memory barrier. We could try ! * to be smarter and only swap in values if we saw the lock as free, ! * but benchmark haven't shown it as beneficial so far. ! * ! * Retry if the value changed since we last looked at it. ! */ ! if (pg_atomic_compare_exchange_u32(&lock->state, ! &old_state, desired_state)) ! { ! if (lock_free) ! { ! /* Great! Got the lock. */ #ifdef LOCK_DEBUG ! if (mode == LW_EXCLUSIVE) ! lock->owner = MyProc; #endif ! return false; ! } ! else ! return true; /* someobdy else has the lock */ ! } } - pg_unreachable(); } /* --- 727,772 ---- static bool LWLockAttemptLock(LWLock *lock, LWLockMode mode) { ! uint32 old_state, ! mask, ! increment; AssertArg(mode == LW_EXCLUSIVE || mode == LW_SHARED); + if (mode == LW_EXCLUSIVE) + { + mask = LW_LOCK_MASK; + increment = LW_VAL_EXCLUSIVE; + } + else + { + mask = LW_VAL_EXCLUSIVE; + increment = LW_VAL_SHARED; + } + /* ! * Use 'check mask then add' atomic which actually do all the useful job ! * for us. */ ! old_state = pg_atomic_fetch_mask_add_u32(&lock->state, mask, increment); ! /* ! * If state was free according to the mask, we assume that operation was ! * successful. ! */ ! if ((old_state & mask) == 0) { ! /* Great! Got the lock. */ #ifdef LOCK_DEBUG ! if (mode == LW_EXCLUSIVE) ! lock->owner = MyProc; #endif ! return false; ! } ! else ! { ! return true; /* somebody else has the lock */ } } /* diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h new file mode 100644 index 2e2ec27..4ec0219 *** a/src/include/port/atomics.h --- b/src/include/port/atomics.h *************** pg_atomic_sub_fetch_u32(volatile pg_atom *** 415,420 **** --- 415,433 ---- return pg_atomic_sub_fetch_u32_impl(ptr, sub_); } + /* + * pg_atomic_fetch_mask_add_u32 - atomically check that masked bits in variable + * and if they are clear then add to variable. + * + * Returns the value of ptr before the atomic operation. + * + * Full barrier semantics. + */ + extern uint32 + pg_atomic_fetch_mask_add_u32(volatile pg_atomic_uint32 *ptr, + uint32 mask_, uint32 add_); + + /* ---- * The 64 bit operations have the same semantics as their 32bit counterparts * if they are available. Check the corresponding 32bit function for