From 4ee7be145f977d23672b8e6066d3967895f21eef Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Wed, 26 Oct 2022 14:14:11 -0700 Subject: [PATCH v4 12/15] hio: Use ExtendBufferedRelBy() --- src/backend/access/heap/hio.c | 287 +++++++++++++++++----------------- 1 file changed, 147 insertions(+), 140 deletions(-) diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 65886839e70..48cfcff975f 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -185,90 +185,6 @@ GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2, } } -/* - * Extend a relation by multiple blocks to avoid future contention on the - * relation extension lock. Our goal is to pre-extend the relation by an - * amount which ramps up as the degree of contention ramps up, but limiting - * the result to some sane overall value. - */ -static void -RelationAddExtraBlocks(Relation relation, BulkInsertState bistate) -{ - BlockNumber blockNum, - firstBlock = InvalidBlockNumber; - int extraBlocks; - int lockWaiters; - - /* Use the length of the lock wait queue to judge how much to extend. */ - lockWaiters = RelationExtensionLockWaiterCount(relation); - if (lockWaiters <= 0) - return; - - /* - * It might seem like multiplying the number of lock waiters by as much as - * 20 is too aggressive, but benchmarking revealed that smaller numbers - * were insufficient. 512 is just an arbitrary cap to prevent - * pathological results. - */ - extraBlocks = Min(512, lockWaiters * 20); - - do - { - Buffer buffer; - Page page; - Size freespace; - - /* - * Extend by one page. This should generally match the main-line - * extension code in RelationGetBufferForTuple, except that we hold - * the relation extension lock throughout, and we don't immediately - * initialize the page (see below). - */ - buffer = ReadBufferBI(relation, P_NEW, RBM_ZERO_AND_LOCK, bistate); - page = BufferGetPage(buffer); - - if (!PageIsNew(page)) - elog(ERROR, "page %u of relation \"%s\" should be empty but is not", - BufferGetBlockNumber(buffer), - RelationGetRelationName(relation)); - - /* - * Add the page to the FSM without initializing. If we were to - * initialize here, the page would potentially get flushed out to disk - * before we add any useful content. There's no guarantee that that'd - * happen before a potential crash, so we need to deal with - * uninitialized pages anyway, thus avoid the potential for - * unnecessary writes. - */ - - /* we'll need this info below */ - blockNum = BufferGetBlockNumber(buffer); - freespace = BufferGetPageSize(buffer) - SizeOfPageHeaderData; - - UnlockReleaseBuffer(buffer); - - /* Remember first block number thus added. */ - if (firstBlock == InvalidBlockNumber) - firstBlock = blockNum; - - /* - * Immediately update the bottom level of the FSM. This has a good - * chance of making this page visible to other concurrently inserting - * backends, and we want that to happen without delay. - */ - RecordPageWithFreeSpace(relation, blockNum, freespace); - } - while (--extraBlocks > 0); - - /* - * Updating the upper levels of the free space map is too expensive to do - * for every block, but it's worth doing once at the end to make sure that - * subsequent insertion activity sees all of those nifty free pages we - * just inserted. - */ - FreeSpaceMapVacuumRange(relation, firstBlock, blockNum + 1); -} - /* * RelationGetBufferForTuple * @@ -354,6 +270,9 @@ RelationGetBufferForTuple(Relation relation, Size len, len = MAXALIGN(len); /* be conservative */ + if (num_pages <= 0) + num_pages = 1; + /* Bulk insert is not supported for updates, only inserts. */ Assert(otherBuffer == InvalidBuffer || !bistate); @@ -558,18 +477,46 @@ loop: ReleaseBuffer(buffer); } - /* Without FSM, always fall out of the loop and extend */ - if (!use_fsm) - break; + if (bistate + && bistate->next_free != InvalidBlockNumber + && bistate->next_free <= bistate->last_free) + { + /* + * We bulk extended the relation before, and there are still some + * unused pages from that extension, so we don't need to look in + * the FSM for a new page. But do record the free space from the + * last page, somebody might insert narrower tuples later. + */ + if (use_fsm) + RecordPageWithFreeSpace(relation, targetBlock, pageFreeSpace); - /* - * Update FSM as to condition of this page, and ask for another page - * to try. - */ - targetBlock = RecordAndGetPageWithFreeSpace(relation, - targetBlock, - pageFreeSpace, - targetFreeSpace); + Assert(bistate->last_free != InvalidBlockNumber && + bistate->next_free <= bistate->last_free); + targetBlock = bistate->next_free; + if (bistate->next_free >= bistate->last_free) + { + bistate->next_free = InvalidBlockNumber; + bistate->last_free = InvalidBlockNumber; + } + else + bistate->next_free++; + } + else if (!use_fsm) + { + /* Without FSM, always fall out of the loop and extend */ + break; + } + else + { + /* + * Update FSM as to condition of this page, and ask for another + * page to try. + */ + targetBlock = RecordAndGetPageWithFreeSpace(relation, + targetBlock, + pageFreeSpace, + targetFreeSpace); + } } /* @@ -582,60 +529,120 @@ loop: */ needLock = !RELATION_IS_LOCAL(relation); - /* - * If we need the lock but are not able to acquire it immediately, we'll - * consider extending the relation by multiple blocks at a time to manage - * contention on the relation extension lock. However, this only makes - * sense if we're using the FSM; otherwise, there's no point. - */ - if (needLock) { - if (!use_fsm) - LockRelationForExtension(relation, ExclusiveLock); - else if (!ConditionalLockRelationForExtension(relation, ExclusiveLock)) +#define MAX_BUFFERS 64 + Buffer victim_buffers[MAX_BUFFERS]; + BlockNumber firstBlock = InvalidBlockNumber; + BlockNumber firstBlockFSM = InvalidBlockNumber; + BlockNumber curBlock; + uint32 extend_by_pages; + uint32 no_fsm_pages; + uint32 waitcount; + + extend_by_pages = num_pages; + + /* + * Multiply the number of pages to extend by the number of waiters. Do + * this even if we're not using the FSM, as it does relieve + * contention. Pages will be found via bistate->next_free. + */ + if (needLock) + waitcount = RelationExtensionLockWaiterCount(relation); + else + waitcount = 0; + extend_by_pages += extend_by_pages * waitcount; + + /* + * can't extend by more than MAX_BUFFERS, we need to pin them all + * concurrently. FIXME: Need an NBuffers / MaxBackends type limit + * here. + */ + extend_by_pages = Min(extend_by_pages, MAX_BUFFERS); + + /* + * How many of the extended pages not to enter into the FSM. + * + * Only enter pages that we don't need ourselves into the FSM. + * Otherwise every other backend will immediately try to use the pages + * this backend neds itself, causing unnecessary contention. + * + * Bulk extended pages are remembered in bistate->next_free_buffer. So + * without a bistate we can't directly make use of them. + * + * Never enter the page returned into the FSM, we'll immediately use + * it. + */ + if (num_pages > 1 && bistate == NULL) + no_fsm_pages = 1; + else + no_fsm_pages = num_pages; + + if (bistate && bistate->current_buf != InvalidBuffer) { - /* Couldn't get the lock immediately; wait for it. */ - LockRelationForExtension(relation, ExclusiveLock); + ReleaseBuffer(bistate->current_buf); + bistate->current_buf = InvalidBuffer; + } - /* - * Check if some other backend has extended a block for us while - * we were waiting on the lock. - */ - targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace); + firstBlock = ExtendBufferedRelBy(EB_REL(relation), MAIN_FORKNUM, + bistate ? bistate->strategy : NULL, + EB_LOCK_FIRST, + extend_by_pages, + victim_buffers, + &extend_by_pages); - /* - * If some other waiter has already extended the relation, we - * don't need to do so; just use the existing freespace. - */ - if (targetBlock != InvalidBlockNumber) + /* + * Relation is now extended. Make all but the first buffer available + * to other backends. + * + * XXX: We don't necessarily need to release pin / update FSM while + * holding the extension lock. But there are some advantages. + */ + curBlock = firstBlock; + for (uint32 i = 0; i < extend_by_pages; i++, curBlock++) + { + Assert(curBlock == BufferGetBlockNumber(victim_buffers[i])); + Assert(BlockNumberIsValid(curBlock)); + + /* don't release the pin on the page returned by this function */ + if (i > 0) + ReleaseBuffer(victim_buffers[i]); + + if (i >= no_fsm_pages && use_fsm) { - UnlockRelationForExtension(relation, ExclusiveLock); - goto loop; - } + if (firstBlockFSM == InvalidBlockNumber) + firstBlockFSM = curBlock; - /* Time to bulk-extend. */ - RelationAddExtraBlocks(relation, bistate); + RecordPageWithFreeSpace(relation, + curBlock, + BufferGetPageSize(victim_buffers[i]) - SizeOfPageHeaderData); + } + } + + if (use_fsm && firstBlockFSM != InvalidBlockNumber) + FreeSpaceMapVacuumRange(relation, firstBlockFSM, firstBlock + num_pages); + + if (bistate) + { + if (extend_by_pages > 1) + { + bistate->next_free = firstBlock + 1; + bistate->last_free = firstBlock + extend_by_pages - 1; + } + else + { + bistate->next_free = InvalidBlockNumber; + bistate->last_free = InvalidBlockNumber; + } + } + + buffer = victim_buffers[0]; + if (bistate) + { + IncrBufferRefCount(buffer); + bistate->current_buf = buffer; } } - /* - * In addition to whatever extension we performed above, we always add at - * least one block to satisfy our own request. - * - * XXX This does an lseek - rather expensive - but at the moment it is the - * only way to accurately determine how many blocks are in a relation. Is - * it worth keeping an accurate file length in shared memory someplace, - * rather than relying on the kernel to do it for us? - */ - buffer = ReadBufferBI(relation, P_NEW, RBM_ZERO_AND_LOCK, bistate); - - /* - * Release the file-extension lock; it's now OK for someone else to extend - * the relation some more. - */ - if (needLock) - UnlockRelationForExtension(relation, ExclusiveLock); - /* * We need to initialize the empty new page. Double-check that it really * is empty (this should never happen, but if it does we don't want to -- 2.38.0