From 59783c7585e36043ebb2384937226c054d42c766 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Tue, 18 Mar 2025 14:40:06 -0400 Subject: [PATCH v2.10 02/28] bufmgr: Improve stats when buffer is read in concurrently Previously we would count buffers that were read in concurently as being read by the current backend, which leads to double-counting the read IOs globally. Similarly, the buffer hit in that case was never attributed to pg_stat_io and relation-level IO statistics, making cache hit ratios look worse. We also did not account for the cache hit in vacuum cost balancing. This behaviour has historically grown, but there doesn't seem to be any reason to just continue down that path. --- src/backend/storage/buffer/bufmgr.c | 41 ++++++++++++++++++----------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 79ca9d18d07..e76ff1f3224 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1433,19 +1433,6 @@ WaitReadBuffers(ReadBuffersOperation *operation) io_object = IOOBJECT_RELATION; } - /* - * We count all these blocks as read by this backend. This is traditional - * behavior, but might turn out to be not true if we find that someone - * else has beaten us and completed the read of some of these blocks. In - * that case the system globally double-counts, but we traditionally don't - * count this as a "hit", and we don't have a separate counter for "miss, - * but another backend completed the read". - */ - if (persistence == RELPERSISTENCE_TEMP) - pgBufferUsage.local_blks_read += nblocks; - else - pgBufferUsage.shared_blks_read += nblocks; - for (int i = 0; i < nblocks; ++i) { int io_buffers_len; @@ -1463,8 +1450,13 @@ WaitReadBuffers(ReadBuffersOperation *operation) if (!WaitReadBuffersCanStartIO(buffers[i], false)) { /* - * Report this as a 'hit' for this backend, even though it must - * have started out as a miss in PinBufferForBlock(). + * Report and track this as a 'hit' for this backend, even though + * it must have started out as a miss in PinBufferForBlock(). + * + * Some of the accesses would otherwise never be counted (e.g. + * pgBufferUsage) or counted as a miss (e.g. + * pgstat_count_buffer_hit(), as we always call + * pgstat_count_buffer_read()). */ TRACE_POSTGRESQL_BUFFER_READ_DONE(forknum, blocknum + i, operation->smgr->smgr_rlocator.locator.spcOid, @@ -1472,6 +1464,20 @@ WaitReadBuffers(ReadBuffersOperation *operation) operation->smgr->smgr_rlocator.locator.relNumber, operation->smgr->smgr_rlocator.backend, true); + + if (persistence == RELPERSISTENCE_TEMP) + pgBufferUsage.local_blks_hit += 1; + else + pgBufferUsage.shared_blks_hit += 1; + + if (operation->rel) + pgstat_count_buffer_hit(operation->rel); + + pgstat_count_io_op(io_object, io_context, IOOP_HIT, 1, 0); + + if (VacuumCostActive) + VacuumCostBalance += VacuumCostPageHit; + continue; } @@ -1557,6 +1563,11 @@ WaitReadBuffers(ReadBuffersOperation *operation) false); } + if (persistence == RELPERSISTENCE_TEMP) + pgBufferUsage.local_blks_read += io_buffers_len; + else + pgBufferUsage.shared_blks_read += io_buffers_len; + if (VacuumCostActive) VacuumCostBalance += VacuumCostPageMiss * io_buffers_len; } -- 2.48.1.76.g4e746b1a31.dirty