From 7ba696105c6a45d7b9c7c08fc178d8af4f60c910 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Wed, 29 Jun 2022 18:37:42 -0400 Subject: [PATCH v25 3/4] Track IO operation statistics Introduce "IOOp", an IO operation done by a backend, and "IOPath", the location or type of IO done by a backend. For example, the checkpointer may write a shared buffer out. This would be counted as an IOOp "write" on an IOPath IOPATH_SHARED by BackendType "checkpointer". Each IOOp (alloc, extend, fsync, read, write) is counted per IOPath (local, shared, or strategy) through a call to pgstat_count_io_op(). The primary concern of these statistics is IO operations on data blocks during the course of normal database operations. IO done by, for example, the archiver or syslogger is not counted in these statistics. IOPATH_LOCAL and IOPATH_SHARED IOPaths concern operations on local and shared buffers. The IOPATH_STRATEGY IOPath concerns buffers alloc'd/extended/fsync'd/read/written as part of a BufferAccessStrategy. IOOP_ALLOC is counted for IOPATH_SHARED and IOPATH_LOCAL whenever a buffer is acquired through [Local]BufferAlloc(). IOOP_ALLOC for IOPATH_STRATEGY is counted whenever a buffer already in the strategy ring is reused. And IOOP_WRITE for IOPATH_STRATEGY is counted whenever the reused dirty buffer is written out. Stats on IOOps for all IOPaths for a backend are initially accumulated locally. Later they are flushed to shared memory and accumulated with those from all other backends, exited and live. The accumulated stats in shared memory could be extended in the future with per-backend stats -- useful for per connection IO statistics and monitoring. Some BackendTypes will not flush their pending statistics at regular intervals and explicitly call pgstat_flush_io_ops() during the course of normal operations to flush their backend-local IO Operation statistics to shared memory in a timely manner. Author: Melanie Plageman Reviewed-by: Justin Pryzby , Kyotaro Horiguchi Discussion: https://www.postgresql.org/message-id/flat/20200124195226.lth52iydq2n2uilq%40alap3.anarazel.de --- src/backend/postmaster/checkpointer.c | 1 + src/backend/storage/buffer/bufmgr.c | 53 ++++- src/backend/storage/buffer/freelist.c | 51 ++++- src/backend/storage/buffer/localbuf.c | 6 + src/backend/storage/sync/sync.c | 2 + src/backend/utils/activity/Makefile | 1 + src/backend/utils/activity/pgstat.c | 36 ++++ src/backend/utils/activity/pgstat_bgwriter.c | 7 +- .../utils/activity/pgstat_checkpointer.c | 7 +- src/backend/utils/activity/pgstat_io_ops.c | 192 ++++++++++++++++++ src/backend/utils/activity/pgstat_relation.c | 19 +- src/backend/utils/activity/pgstat_wal.c | 4 +- src/backend/utils/adt/pgstatfuncs.c | 4 +- src/include/miscadmin.h | 2 + src/include/pgstat.h | 58 ++++++ src/include/storage/buf_internals.h | 2 +- src/include/utils/backend_status.h | 36 ++++ src/include/utils/pgstat_internal.h | 24 +++ 18 files changed, 485 insertions(+), 20 deletions(-) create mode 100644 src/backend/utils/activity/pgstat_io_ops.c diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 5fc076fc14..a06331e1eb 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -1116,6 +1116,7 @@ ForwardSyncRequest(const FileTag *ftag, SyncRequestType type) if (!AmBackgroundWriterProcess()) CheckpointerShmem->num_backend_fsync++; LWLockRelease(CheckpointerCommLock); + pgstat_count_io_op(IOOP_FSYNC, IOPATH_SHARED); return false; } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index c7d7abcd73..e872d7edc6 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -482,7 +482,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr); -static void FlushBuffer(BufferDesc *buf, SMgrRelation reln); +static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOPath iopath); static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, @@ -813,6 +813,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BufferDesc *bufHdr; Block bufBlock; bool found; + IOPath io_path; bool isExtend; bool isLocalBuf = SmgrIsTemp(smgr); @@ -978,8 +979,17 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr); + if (isLocalBuf) + io_path = IOPATH_LOCAL; + else if (strategy != NULL) + io_path = IOPATH_STRATEGY; + else + io_path = IOPATH_SHARED; + if (isExtend) { + + pgstat_count_io_op(IOOP_EXTEND, io_path); /* new buffers are zero-filled */ MemSet((char *) bufBlock, 0, BLCKSZ); /* don't set checksum for all-zero page */ @@ -1010,6 +1020,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, smgrread(smgr, forkNum, blockNum, (char *) bufBlock); + pgstat_count_io_op(IOOP_READ, io_path); + if (track_io_timing) { INSTR_TIME_SET_CURRENT(io_time); @@ -1180,6 +1192,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, /* Loop here in case we have to try another victim buffer */ for (;;) { + bool write_from_ring = false; /* * Ensure, while the spinlock's not yet held, that there's a free * refcount entry. @@ -1227,6 +1240,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if (LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf), LW_SHARED)) { + IOPath iopath; /* * If using a nondefault strategy, and writing the buffer * would require a WAL flush, let the strategy decide whether @@ -1244,7 +1258,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, UnlockBufHdr(buf, buf_state); if (XLogNeedsFlush(lsn) && - StrategyRejectBuffer(strategy, buf)) + StrategyRejectBuffer(strategy, buf, &write_from_ring)) { /* Drop lock/pin and loop around for another buffer */ LWLockRelease(BufferDescriptorGetContentLock(buf)); @@ -1253,13 +1267,27 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, } } + /* + * When a strategy is in use, if the target dirty buffer is an existing + * strategy buffer being reused, count this as a strategy write for the + * purposes of IO Operations statistics tracking. + * + * All dirty shared buffers upon first being added to the ring will be + * counted as shared buffer writes. + * + * When a strategy is not in use, the write can only be a only be a + * "regular" write of a dirty shared buffer. + */ + + iopath = write_from_ring ? IOPATH_STRATEGY : IOPATH_SHARED; + /* OK, do the I/O */ TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum, smgr->smgr_rlocator.locator.spcOid, smgr->smgr_rlocator.locator.dbOid, smgr->smgr_rlocator.locator.relNumber); - FlushBuffer(buf, NULL); + FlushBuffer(buf, NULL, iopath); LWLockRelease(BufferDescriptorGetContentLock(buf)); ScheduleBufferTagForWriteback(&BackendWritebackContext, @@ -2563,7 +2591,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context) PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, IOPATH_SHARED); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); @@ -2810,9 +2838,12 @@ BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, * * If the caller has an smgr reference for the buffer's relation, pass it * as the second parameter. If not, pass NULL. + * + * IOPath will always be IOPATH_SHARED except when a buffer access strategy is + * used and the buffer being flushed is a buffer from the strategy ring. */ static void -FlushBuffer(BufferDesc *buf, SMgrRelation reln) +FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOPath iopath) { XLogRecPtr recptr; ErrorContextCallback errcallback; @@ -2892,6 +2923,8 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln) */ bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum); + pgstat_count_io_op(IOOP_WRITE, iopath); + if (track_io_timing) INSTR_TIME_SET_CURRENT(io_start); @@ -3539,6 +3572,8 @@ FlushRelationBuffers(Relation rel) localpage, false); + pgstat_count_io_op(IOOP_WRITE, IOPATH_LOCAL); + buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED); pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); @@ -3574,7 +3609,7 @@ FlushRelationBuffers(Relation rel) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, RelationGetSmgr(rel)); + FlushBuffer(bufHdr, RelationGetSmgr(rel), IOPATH_SHARED); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr, true); } @@ -3669,7 +3704,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, srelent->srel); + FlushBuffer(bufHdr, srelent->srel, IOPATH_SHARED); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr, true); } @@ -3877,7 +3912,7 @@ FlushDatabaseBuffers(Oid dbid) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, IOPATH_SHARED); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr, true); } @@ -3904,7 +3939,7 @@ FlushOneBuffer(Buffer buffer) Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr))); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, IOPATH_SHARED); } /* diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 990e081aae..29f5cbeab6 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -15,6 +15,7 @@ */ #include "postgres.h" +#include "pgstat.h" #include "port/atomics.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" @@ -212,8 +213,20 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) if (strategy != NULL) { buf = GetBufferFromRing(strategy, buf_state); - if (buf != NULL) + if (strategy->current_was_in_ring) + { + /* + * When a strategy is in use, reused buffers from the strategy ring will + * be counted as allocations for the purposes of IO Operation statistics + * tracking. + * + * However, even when a strategy is in use, if a new buffer must be + * allocated from shared buffers and added to the ring, this is counted + * as a IOPATH_SHARED allocation. + */ + pgstat_count_io_op(IOOP_ALLOC, IOPATH_STRATEGY); return buf; + } } /* @@ -247,6 +260,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) * the rate of buffer consumption. Note that buffers recycled by a * strategy object are intentionally not counted here. */ + pgstat_count_io_op(IOOP_ALLOC, IOPATH_SHARED); pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1); /* @@ -682,16 +696,38 @@ AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf) * if this buffer should be written and re-used. */ bool -StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf) +StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool *write_from_ring) { - /* We only do this in bulkread mode */ + + /* + * We only reject reusing and writing out the strategy buffer in bulkread + * mode. + */ if (strategy->btype != BAS_BULKREAD) + { + /* + * If the buffer was from the ring and we are not rejecting it, consider it + * a write of a strategy buffer. Note that this assumes that the buffer is + * dirty. + */ + if (strategy->current_was_in_ring) + *write_from_ring = true; return false; + } - /* Don't muck with behavior of normal buffer-replacement strategy */ + /* + * Don't muck with behavior of normal buffer-replacement strategy. Though we + * are not rejecting this buffer, write_from_ring is false because shared + * buffers that are added to the ring, either initially or when reuse is not + * possible because all existing strategy buffers are pinned, are not + * considered strategy writes for the purposes of IO Operation statistics. + */ if (!strategy->current_was_in_ring || strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf)) + { + *write_from_ring = false; return false; + } /* * Remove the dirty buffer from the ring; necessary to prevent infinite @@ -699,5 +735,12 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf) */ strategy->buffers[strategy->current] = InvalidBuffer; + /* + * Caller should not use this flag since the buffer is being rejected (and it + * should have been initialized to false anyway) and will not be written out. + * Set the flag here anyway for clarity. + */ + *write_from_ring = false; + return true; } diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 9c038851d7..edd3296dd7 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -15,6 +15,7 @@ */ #include "postgres.h" +#include "pgstat.h" #include "access/parallel.h" #include "catalog/catalog.h" #include "executor/instrument.h" @@ -123,6 +124,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, if (LocalBufHash == NULL) InitLocalBuffers(); + /* See if the desired buffer already exists */ hresult = (LocalBufferLookupEnt *) hash_search(LocalBufHash, (void *) &newTag, HASH_FIND, NULL); @@ -196,6 +198,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, LocalRefCount[b]++; ResourceOwnerRememberBuffer(CurrentResourceOwner, BufferDescriptorGetBuffer(bufHdr)); + + pgstat_count_io_op(IOOP_ALLOC, IOPATH_LOCAL); break; } } @@ -226,6 +230,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, localpage, false); + pgstat_count_io_op(IOOP_WRITE, IOPATH_LOCAL); + /* Mark not-dirty now in case we error out below */ buf_state &= ~BM_DIRTY; pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c index e1fb631003..20e259edef 100644 --- a/src/backend/storage/sync/sync.c +++ b/src/backend/storage/sync/sync.c @@ -432,6 +432,8 @@ ProcessSyncRequests(void) total_elapsed += elapsed; processed++; + pgstat_count_io_op(IOOP_FSYNC, IOPATH_SHARED); + if (log_checkpoints) elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f ms", processed, diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile index a2e8507fd6..0098785089 100644 --- a/src/backend/utils/activity/Makefile +++ b/src/backend/utils/activity/Makefile @@ -22,6 +22,7 @@ OBJS = \ pgstat_checkpointer.o \ pgstat_database.o \ pgstat_function.o \ + pgstat_io_ops.o \ pgstat_relation.o \ pgstat_replslot.o \ pgstat_shmem.o \ diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 88e5dd1b2b..3238d9ba85 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -359,6 +359,15 @@ static const PgStat_KindInfo pgstat_kind_infos[PGSTAT_NUM_KINDS] = { .snapshot_cb = pgstat_checkpointer_snapshot_cb, }, + [PGSTAT_KIND_IOOPS] = { + .name = "io_ops", + + .fixed_amount = true, + + .reset_all_cb = pgstat_io_ops_reset_all_cb, + .snapshot_cb = pgstat_io_ops_snapshot_cb, + }, + [PGSTAT_KIND_SLRU] = { .name = "slru", @@ -628,6 +637,9 @@ pgstat_report_stat(bool force) /* flush database / relation / function / ... stats */ partial_flush |= pgstat_flush_pending_entries(nowait); + /* flush IO Operations stats */ + partial_flush |= pgstat_flush_io_ops(nowait); + /* flush wal stats */ partial_flush |= pgstat_flush_wal(nowait); @@ -1312,6 +1324,12 @@ pgstat_write_statsfile(void) pgstat_build_snapshot_fixed(PGSTAT_KIND_CHECKPOINTER); write_chunk_s(fpout, &pgStatLocal.snapshot.checkpointer); + /* + * Write IO Operations stats struct + */ + pgstat_build_snapshot_fixed(PGSTAT_KIND_IOOPS); + write_chunk_s(fpout, &pgStatLocal.snapshot.io_ops); + /* * Write SLRU stats struct */ @@ -1427,8 +1445,10 @@ pgstat_read_statsfile(void) FILE *fpin; int32 format_id; bool found; + PgStat_BackendIOPathOps io_stats; const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME; PgStat_ShmemControl *shmem = pgStatLocal.shmem; + PgStatShared_BackendIOPathOps *io_stats_shmem = &shmem->io_ops; /* shouldn't be called from postmaster */ Assert(IsUnderPostmaster || !IsPostmasterEnvironment); @@ -1486,6 +1506,22 @@ pgstat_read_statsfile(void) if (!read_chunk_s(fpin, &shmem->checkpointer.stats)) goto error; + /* + * Read IO Operations stats struct + */ + if (!read_chunk_s(fpin, &io_stats)) + goto error; + + io_stats_shmem->stat_reset_timestamp = io_stats.stat_reset_timestamp; + + for (int i = 0; i < BACKEND_NUM_TYPES; i++) + { + PgStat_IOPathOps *stats = &io_stats.stats[i]; + PgStatShared_IOPathOps *stats_shmem = &io_stats_shmem->stats[i]; + + memcpy(stats_shmem->data, stats->data, sizeof(stats->data)); + } + /* * Read SLRU stats struct */ diff --git a/src/backend/utils/activity/pgstat_bgwriter.c b/src/backend/utils/activity/pgstat_bgwriter.c index fbb1edc527..3d7f90a1b7 100644 --- a/src/backend/utils/activity/pgstat_bgwriter.c +++ b/src/backend/utils/activity/pgstat_bgwriter.c @@ -24,7 +24,7 @@ PgStat_BgWriterStats PendingBgWriterStats = {0}; /* - * Report bgwriter statistics + * Report bgwriter and IO Operation statistics */ void pgstat_report_bgwriter(void) @@ -56,6 +56,11 @@ pgstat_report_bgwriter(void) * Clear out the statistics buffer, so it can be re-used. */ MemSet(&PendingBgWriterStats, 0, sizeof(PendingBgWriterStats)); + + /* + * Report IO Operations statistics + */ + pgstat_flush_io_ops(false); } /* diff --git a/src/backend/utils/activity/pgstat_checkpointer.c b/src/backend/utils/activity/pgstat_checkpointer.c index af8d513e7b..cfcf127210 100644 --- a/src/backend/utils/activity/pgstat_checkpointer.c +++ b/src/backend/utils/activity/pgstat_checkpointer.c @@ -24,7 +24,7 @@ PgStat_CheckpointerStats PendingCheckpointerStats = {0}; /* - * Report checkpointer statistics + * Report checkpointer and IO Operation statistics */ void pgstat_report_checkpointer(void) @@ -62,6 +62,11 @@ pgstat_report_checkpointer(void) * Clear out the statistics buffer, so it can be re-used. */ MemSet(&PendingCheckpointerStats, 0, sizeof(PendingCheckpointerStats)); + + /* + * Report IO Operation statistics + */ + pgstat_flush_io_ops(false); } /* diff --git a/src/backend/utils/activity/pgstat_io_ops.c b/src/backend/utils/activity/pgstat_io_ops.c new file mode 100644 index 0000000000..6e7351660f --- /dev/null +++ b/src/backend/utils/activity/pgstat_io_ops.c @@ -0,0 +1,192 @@ +/* ------------------------------------------------------------------------- + * + * pgstat_io_ops.c + * Implementation of IO operation statistics. + * + * This file contains the implementation of IO operation statistics. It is kept + * separate from pgstat.c to enforce the line between the statistics access / + * storage implementation and the details about individual types of + * statistics. + * + * Copyright (c) 2001-2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/activity/pgstat_io_ops.c + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "utils/pgstat_internal.h" + +static PgStat_IOPathOps pending_IOOpStats; +bool have_ioopstats = false; + + +/* + * Flush out locally pending IO Operation statistics entries + * + * If nowait is true, this function returns false on lock failure. Otherwise + * this function always returns true. + * + * If nowait is true, this function returns true if the lock could not be + * acquired. Otherwise return false. + */ +bool +pgstat_flush_io_ops(bool nowait) +{ + PgStatShared_IOPathOps *stats_shmem; + + if (!have_ioopstats) + return false; + + stats_shmem = + &pgStatLocal.shmem->io_ops.stats[backend_type_get_idx(MyBackendType)]; + + if (!nowait) + LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE); + else if (!LWLockConditionalAcquire(&stats_shmem->lock, LW_EXCLUSIVE)) + return true; + + + for (int i = 0; i < IOPATH_NUM_TYPES; i++) + { + PgStat_IOOpCounters *sharedent = &stats_shmem->data[i]; + PgStat_IOOpCounters *pendingent = &pending_IOOpStats.data[i]; + +#define IO_OP_ACC(fld) sharedent->fld += pendingent->fld + IO_OP_ACC(allocs); + IO_OP_ACC(extends); + IO_OP_ACC(fsyncs); + IO_OP_ACC(reads); + IO_OP_ACC(writes); +#undef IO_OP_ACC + } + + LWLockRelease(&stats_shmem->lock); + + memset(&pending_IOOpStats, 0, sizeof(pending_IOOpStats)); + + have_ioopstats = false; + + return false; +} + +void +pgstat_io_ops_snapshot_cb(void) +{ + PgStatShared_BackendIOPathOps *all_backend_stats_shmem = &pgStatLocal.shmem->io_ops; + PgStat_BackendIOPathOps *all_backend_stats_snap = &pgStatLocal.snapshot.io_ops; + + for (int i = 0; i < BACKEND_NUM_TYPES; i++) + { + PgStatShared_IOPathOps *stats_shmem = &all_backend_stats_shmem->stats[i]; + PgStat_IOPathOps *stats_snap = &all_backend_stats_snap->stats[i]; + + LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE); + /* + * Use the lock in the first BackendType's PgStat_IOPathOps to protect the + * reset timestamp as well. + */ + if (i == 0) + all_backend_stats_snap->stat_reset_timestamp = all_backend_stats_shmem->stat_reset_timestamp; + + memcpy(stats_snap->data, stats_shmem->data, sizeof(stats_shmem->data)); + LWLockRelease(&stats_shmem->lock); + } + +} + +void +pgstat_io_ops_reset_all_cb(TimestampTz ts) +{ + PgStatShared_BackendIOPathOps *all_backend_stats_shmem = &pgStatLocal.shmem->io_ops; + + for (int i = 0; i < BACKEND_NUM_TYPES; i++) + { + PgStatShared_IOPathOps *stats_shmem = &all_backend_stats_shmem->stats[i]; + + LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE); + + /* + * Use the lock in the first BackendType's PgStat_IOPathOps to protect the + * reset timestamp as well. + */ + if (i == 0) + all_backend_stats_shmem->stat_reset_timestamp = ts; + + memset(stats_shmem->data, 0, sizeof(stats_shmem->data)); + LWLockRelease(&stats_shmem->lock); + } +} + +void +pgstat_count_io_op(IOOp io_op, IOPath io_path) +{ + PgStat_IOOpCounters *pending_counters = &pending_IOOpStats.data[io_path]; + + switch (io_op) + { + case IOOP_ALLOC: + pending_counters->allocs++; + break; + case IOOP_EXTEND: + pending_counters->extends++; + break; + case IOOP_FSYNC: + pending_counters->fsyncs++; + break; + case IOOP_READ: + pending_counters->reads++; + break; + case IOOP_WRITE: + pending_counters->writes++; + break; + } + + have_ioopstats = true; +} + +PgStat_BackendIOPathOps* +pgstat_fetch_backend_io_path_ops(void) +{ + pgstat_snapshot_fixed(PGSTAT_KIND_IOOPS); + + return &pgStatLocal.snapshot.io_ops; +} + +const char * +pgstat_io_path_desc(IOPath io_path) +{ + switch (io_path) + { + case IOPATH_LOCAL: + return "Local"; + case IOPATH_SHARED: + return "Shared"; + case IOPATH_STRATEGY: + return "Strategy"; + } + + elog(ERROR, "unrecognized IOPath value: %d", io_path); +} + +const char * +pgstat_io_op_desc(IOOp io_op) +{ + switch (io_op) + { + case IOOP_ALLOC: + return "Alloc"; + case IOOP_EXTEND: + return "Extend"; + case IOOP_FSYNC: + return "Fsync"; + case IOOP_READ: + return "Read"; + case IOOP_WRITE: + return "Write"; + } + + elog(ERROR, "unrecognized IOOp value: %d", io_op); +} diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index a846d9ffb6..a17b3336db 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -205,7 +205,7 @@ pgstat_drop_relation(Relation rel) } /* - * Report that the table was just vacuumed. + * Report that the table was just vacuumed and flush IO Operation statistics. */ void pgstat_report_vacuum(Oid tableoid, bool shared, @@ -257,10 +257,18 @@ pgstat_report_vacuum(Oid tableoid, bool shared, } pgstat_unlock_entry(entry_ref); + + /* + * Flush IO Operations statistics now. pgstat_report_stat() will flush IO + * Operation stats, however this will not be called after an entire + * autovacuum cycle is done -- which will likely vacuum many relations -- or + * until the VACUUM command has processed all tables and committed. + */ + pgstat_flush_io_ops(false); } /* - * Report that the table was just analyzed. + * Report that the table was just analyzed and flush IO Operation statistics. * * Caller must provide new live- and dead-tuples estimates, as well as a * flag indicating whether to reset the changes_since_analyze counter. @@ -340,6 +348,13 @@ pgstat_report_analyze(Relation rel, } pgstat_unlock_entry(entry_ref); + + /* + * Flush IO Operations statistics explicitly for the same reason as in + * pgstat_report_vacuum(). We don't want to wait for an entire ANALYZE + * command to complete before updating stats. + */ + pgstat_flush_io_ops(false); } /* diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c index 5a878bd115..9cac407b42 100644 --- a/src/backend/utils/activity/pgstat_wal.c +++ b/src/backend/utils/activity/pgstat_wal.c @@ -34,7 +34,7 @@ static WalUsage prevWalUsage; /* * Calculate how much WAL usage counters have increased and update - * shared statistics. + * shared WAL and IO Operation statistics. * * Must be called by processes that generate WAL, that do not call * pgstat_report_stat(), like walwriter. @@ -43,6 +43,8 @@ void pgstat_report_wal(bool force) { pgstat_flush_wal(force); + + pgstat_flush_io_ops(force); } /* diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 893690dad5..6259cc4f4c 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -2104,6 +2104,8 @@ pg_stat_reset_shared(PG_FUNCTION_ARGS) pgstat_reset_of_kind(PGSTAT_KIND_BGWRITER); pgstat_reset_of_kind(PGSTAT_KIND_CHECKPOINTER); } + else if (strcmp(target, "io") == 0) + pgstat_reset_of_kind(PGSTAT_KIND_IOOPS); else if (strcmp(target, "recovery_prefetch") == 0) XLogPrefetchResetStats(); else if (strcmp(target, "wal") == 0) @@ -2112,7 +2114,7 @@ pg_stat_reset_shared(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized reset target: \"%s\"", target), - errhint("Target must be \"archiver\", \"bgwriter\", \"recovery_prefetch\", or \"wal\"."))); + errhint("Target must be \"archiver\", \"io\", \"bgwriter\", \"recovery_prefetch\", or \"wal\"."))); PG_RETURN_VOID(); } diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 5276bf25a1..61e95135f2 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -331,6 +331,8 @@ typedef enum BackendType B_WAL_WRITER, } BackendType; +#define BACKEND_NUM_TYPES B_WAL_WRITER + extern PGDLLIMPORT BackendType MyBackendType; extern const char *GetBackendTypeDesc(BackendType backendType); diff --git a/src/include/pgstat.h b/src/include/pgstat.h index ac28f813b4..d6ed6ec864 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -14,6 +14,7 @@ #include "datatype/timestamp.h" #include "portability/instr_time.h" #include "postmaster/pgarch.h" /* for MAX_XFN_CHARS */ +#include "storage/lwlock.h" #include "utils/backend_progress.h" /* for backward compatibility */ #include "utils/backend_status.h" /* for backward compatibility */ #include "utils/relcache.h" @@ -48,6 +49,7 @@ typedef enum PgStat_Kind PGSTAT_KIND_ARCHIVER, PGSTAT_KIND_BGWRITER, PGSTAT_KIND_CHECKPOINTER, + PGSTAT_KIND_IOOPS, PGSTAT_KIND_SLRU, PGSTAT_KIND_WAL, } PgStat_Kind; @@ -276,6 +278,50 @@ typedef struct PgStat_CheckpointerStats PgStat_Counter buf_fsync_backend; } PgStat_CheckpointerStats; +/* + * Types related to counting IO Operations for various IO Paths + */ + +typedef enum IOOp +{ + IOOP_ALLOC, + IOOP_EXTEND, + IOOP_FSYNC, + IOOP_READ, + IOOP_WRITE, +} IOOp; + +#define IOOP_NUM_TYPES (IOOP_WRITE + 1) + +typedef enum IOPath +{ + IOPATH_LOCAL, + IOPATH_SHARED, + IOPATH_STRATEGY, +} IOPath; + +#define IOPATH_NUM_TYPES (IOPATH_STRATEGY + 1) + +typedef struct PgStat_IOOpCounters +{ + PgStat_Counter allocs; + PgStat_Counter extends; + PgStat_Counter fsyncs; + PgStat_Counter reads; + PgStat_Counter writes; +} PgStat_IOOpCounters; + +typedef struct PgStat_IOPathOps +{ + PgStat_IOOpCounters data[IOPATH_NUM_TYPES]; +} PgStat_IOPathOps; + +typedef struct PgStat_BackendIOPathOps +{ + TimestampTz stat_reset_timestamp; + PgStat_IOPathOps stats[BACKEND_NUM_TYPES]; +} PgStat_BackendIOPathOps; + typedef struct PgStat_StatDBEntry { PgStat_Counter n_xact_commit; @@ -453,6 +499,18 @@ extern void pgstat_report_checkpointer(void); extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void); +/* + * Functions in pgstat_io_ops.c + */ + +extern void pgstat_count_io_op(IOOp io_op, IOPath io_path); +extern bool pgstat_flush_io_ops(bool nowait); +extern PgStat_BackendIOPathOps *pgstat_fetch_backend_io_path_ops(void); +extern PgStat_Counter pgstat_fetch_cumulative_io_ops(IOPath io_path, IOOp io_op); +extern const char *pgstat_io_op_desc(IOOp io_op); +extern const char *pgstat_io_path_desc(IOPath io_path); + + /* * Functions in pgstat_database.c */ diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 69e45900ba..b69c5f7e3c 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -313,7 +313,7 @@ extern BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state); extern void StrategyFreeBuffer(BufferDesc *buf); extern bool StrategyRejectBuffer(BufferAccessStrategy strategy, - BufferDesc *buf); + BufferDesc *buf, bool *write_from_ring); extern int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc); extern void StrategyNotifyBgWriter(int bgwprocno); diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h index 7403bca25e..d9b6d12acc 100644 --- a/src/include/utils/backend_status.h +++ b/src/include/utils/backend_status.h @@ -306,6 +306,42 @@ extern const char *pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen); extern uint64 pgstat_get_my_query_id(void); +/* Utility functions */ + +/* + * When maintaining an array of information about all valid BackendTypes, in + * order to avoid wasting the 0th spot, use this helper to convert a valid + * BackendType to a valid location in the array (given that no spot is + * maintained for B_INVALID BackendType). + */ +static inline int +backend_type_get_idx(BackendType backend_type) +{ + /* + * backend_type must be one of the valid backend types. If caller is + * maintaining backend information in an array that includes B_INVALID, + * this function is unnecessary. + */ + Assert(backend_type > B_INVALID && backend_type <= BACKEND_NUM_TYPES); + return backend_type - 1; +} + +/* + * When using a value from an array of information about all valid + * BackendTypes, add 1 to the index before using it as a BackendType to adjust + * for not maintaining a spot for B_INVALID BackendType. + */ +static inline BackendType +idx_get_backend_type(int idx) +{ + int backend_type = idx + 1; + /* + * If the array includes a spot for B_INVALID BackendType this function is + * not required. + */ + Assert(backend_type > B_INVALID && backend_type <= BACKEND_NUM_TYPES); + return backend_type; +} /* ---------- * Support functions for the SQL-callable functions to diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 9303d05427..3151c43dfe 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -329,6 +329,19 @@ typedef struct PgStatShared_Checkpointer PgStat_CheckpointerStats reset_offset; } PgStatShared_Checkpointer; +typedef struct PgStatShared_IOPathOps +{ + LWLock lock; + PgStat_IOOpCounters data[IOPATH_NUM_TYPES]; +} PgStatShared_IOPathOps; + +typedef struct PgStatShared_BackendIOPathOps +{ + TimestampTz stat_reset_timestamp; + PgStatShared_IOPathOps stats[BACKEND_NUM_TYPES]; +} PgStatShared_BackendIOPathOps; + + typedef struct PgStatShared_SLRU { /* lock protects ->stats */ @@ -419,6 +432,7 @@ typedef struct PgStat_ShmemControl PgStatShared_Archiver archiver; PgStatShared_BgWriter bgwriter; PgStatShared_Checkpointer checkpointer; + PgStatShared_BackendIOPathOps io_ops; PgStatShared_SLRU slru; PgStatShared_Wal wal; } PgStat_ShmemControl; @@ -442,6 +456,8 @@ typedef struct PgStat_Snapshot PgStat_CheckpointerStats checkpointer; + PgStat_BackendIOPathOps io_ops; + PgStat_SLRUStats slru[SLRU_NUM_ELEMENTS]; PgStat_WalStats wal; @@ -549,6 +565,14 @@ extern void pgstat_database_reset_timestamp_cb(PgStatShared_Common *header, Time extern bool pgstat_function_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); +/* + * Functions in pgstat_io_ops.c + */ + +extern void pgstat_io_ops_snapshot_cb(void); +extern void pgstat_io_ops_reset_all_cb(TimestampTz ts); + + /* * Functions in pgstat_relation.c */ -- 2.34.1