diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 56569cc..d09af92 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -85,7 +85,7 @@ static volatile BufferDesc *PinCountWaitBuf = NULL; static Buffer ReadBuffer_common(SMgrRelation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, - bool *hit); + bool flush_xlog, bool *hit); static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy); static void PinBuffer_Locked(volatile BufferDesc *buf); static void UnpinBuffer(volatile BufferDesc *buf, bool fixOwner); @@ -99,9 +99,10 @@ static void shared_buffer_write_error_callback(void *arg); static void local_buffer_write_error_callback(void *arg); static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, - BufferAccessStrategy strategy, + BufferAccessStrategy strategy, bool flush_xlog, bool *foundPtr); -static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln); +static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln, + bool flush_xlog); static void AtProcExit_Buffers(int code, Datum arg); @@ -242,7 +243,7 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, */ pgstat_count_buffer_read(reln); buf = ReadBuffer_common(reln->rd_smgr, forkNum, blockNum, - mode, strategy, &hit); + mode, strategy, RelationNeedsWAL(reln), &hit); if (hit) pgstat_count_buffer_hit(reln); return buf; @@ -254,9 +255,9 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, * a relcache entry for the relation. * * NB: At present, this function may not be used on temporary relations, which - * is OK, because we only use it during XLOG replay. If in the future we - * want to use it on temporary relations, we could pass the backend ID as an - * additional parameter. + * is OK, because we only use it during XLOG replay. We are therefore entitled + * to assume that the backend ID is InvalidBackendId and that XLOG flush is + * required. */ Buffer ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, @@ -267,7 +268,8 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, SMgrRelation smgr = smgropen(rnode, InvalidBackendId); - return ReadBuffer_common(smgr, forkNum, blockNum, mode, strategy, &hit); + return ReadBuffer_common(smgr, forkNum, blockNum, mode, strategy, + true, &hit); } @@ -279,7 +281,7 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, static Buffer ReadBuffer_common(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, - BufferAccessStrategy strategy, bool *hit) + BufferAccessStrategy strategy, bool flush_xlog, bool *hit) { volatile BufferDesc *bufHdr; Block bufBlock; @@ -319,7 +321,8 @@ ReadBuffer_common(SMgrRelation smgr, ForkNumber forkNum, * lookup the buffer. IO_IN_PROGRESS is set if the requested block is * not currently in memory. */ - bufHdr = BufferAlloc(smgr, forkNum, blockNum, strategy, &found); + bufHdr = BufferAlloc(smgr, forkNum, blockNum, strategy, flush_xlog, + &found); if (found) pgBufferUsage.shared_blks_hit++; else @@ -502,7 +505,7 @@ ReadBuffer_common(SMgrRelation smgr, ForkNumber forkNum, static volatile BufferDesc * BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, - BufferAccessStrategy strategy, + BufferAccessStrategy strategy, bool flush_xlog, bool *foundPtr) { BufferTag newTag; /* identity of requested block */ @@ -644,7 +647,7 @@ BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, smgr->smgr_rnode.node.dbNode, smgr->smgr_rnode.node.relNode); - FlushBuffer(buf, NULL); + FlushBuffer(buf, NULL, (oldFlags & BM_FLUSH_XLOG) != 0); LWLockRelease(buf->content_lock); TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum, @@ -798,7 +801,7 @@ BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, */ buf->tag = newTag; buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR); - buf->flags |= BM_TAG_VALID; + buf->flags |= BM_TAG_VALID | (flush_xlog ? BM_FLUSH_XLOG : 0); buf->usage_count = 1; UnlockBufHdr(buf); @@ -1574,6 +1577,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used) { volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id]; int result = 0; + bool flush_xlog; /* * Check whether buffer needs writing. @@ -1602,6 +1606,9 @@ SyncOneBuffer(int buf_id, bool skip_recently_used) return result; } + /* remember flush xlog flag */ + flush_xlog = (bufHdr->flags & BM_FLUSH_XLOG) != 0; + /* * Pin it, share-lock it, write it. (FlushBuffer will do nothing if the * buffer is clean by the time we've locked it.) @@ -1609,7 +1616,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used) PinBuffer_Locked(bufHdr); LWLockAcquire(bufHdr->content_lock, LW_SHARED); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, flush_xlog); LWLockRelease(bufHdr->content_lock); UnpinBuffer(bufHdr, true); @@ -1820,7 +1827,7 @@ BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum, * as the second parameter. If not, pass NULL. */ static void -FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln) +FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln, bool flush_xlog) { XLogRecPtr recptr; ErrorContextCallback errcontext; @@ -1854,8 +1861,11 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln) * rule that log updates must hit disk before any of the data-file changes * they describe do. */ - recptr = BufferGetLSN(buf); - XLogFlush(recptr); + if (flush_xlog) + { + recptr = BufferGetLSN(buf); + XLogFlush(recptr); + } /* * Now it's safe to write buffer to disk. Note that no one else should @@ -2067,6 +2077,7 @@ FlushRelationBuffers(Relation rel) { int i; volatile BufferDesc *bufHdr; + bool flush_xlog; /* Open rel at the smgr level if not already done */ RelationOpenSmgr(rel); @@ -2113,9 +2124,10 @@ FlushRelationBuffers(Relation rel) if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) && (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) { + flush_xlog = (bufHdr->flags & BM_FLUSH_XLOG) != 0; PinBuffer_Locked(bufHdr); LWLockAcquire(bufHdr->content_lock, LW_SHARED); - FlushBuffer(bufHdr, rel->rd_smgr); + FlushBuffer(bufHdr, rel->rd_smgr, flush_xlog); LWLockRelease(bufHdr->content_lock); UnpinBuffer(bufHdr, true); } @@ -2144,6 +2156,7 @@ FlushDatabaseBuffers(Oid dbid) { int i; volatile BufferDesc *bufHdr; + bool flush_xlog; /* Make sure we can handle the pin inside the loop */ ResourceOwnerEnlargeBuffers(CurrentResourceOwner); @@ -2155,9 +2168,10 @@ FlushDatabaseBuffers(Oid dbid) if (bufHdr->tag.rnode.dbNode == dbid && (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) { + flush_xlog = (bufHdr->flags & BM_FLUSH_XLOG) != 0; PinBuffer_Locked(bufHdr); LWLockAcquire(bufHdr->content_lock, LW_SHARED); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, flush_xlog); LWLockRelease(bufHdr->content_lock); UnpinBuffer(bufHdr, true); } diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 0c18fb5..b29c618 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -37,6 +37,7 @@ #define BM_JUST_DIRTIED (1 << 5) /* dirtied since write started */ #define BM_PIN_COUNT_WAITER (1 << 6) /* have waiter for sole pin */ #define BM_CHECKPOINT_NEEDED (1 << 7) /* must write for checkpoint */ +#define BM_FLUSH_XLOG (1 << 8) /* must flush xlog */ typedef bits16 BufFlags;