From 171f5a5be7b93751378a9cd4d5d8b9731401e78e Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Mon, 5 Dec 2022 19:32:42 -0500 Subject: [PATCH v40 2/4] Track IO operation statistics locally Introduce "IOOp", an IO operation done by a backend, "IOObject", the target object of the IO, and "IOContext", the context or location of the IO operations on that object. For example, the checkpointer may write a shared buffer out. This would be counted as an IOOp "written" on an IOObject IOOBJECT_RELATION in IOContext IOCONTEXT_NORMAL by BackendType "checkpointer". Each IOOp (evict, extend, fsync, read, reuse, and write) is counted per IOObject (relation, temp relation) per IOContext (normal, bulkread, bulkwrite, or vacuum) through a call to pgstat_count_io_op(). The primary concern of these statistics is IO operations on data blocks during the course of normal database operations. IO operations done by, for example, the archiver or syslogger are not counted in these statistics. WAL IO, temporary file IO, and IO done directly though smgr* functions (such as when building an index) are not yet counted but would be useful future additions. IOContext IOCONTEXT_NORMAL concerns operations on local and shared buffers. The IOCONTEXT_BULKREAD, IOCONTEXT_BULKWRITE, and IOCONTEXT_VACUUM IOContexts concern IO operations on buffers as part of a BufferAccessStrategy. IOOP_EVICT IOOps are counted in IOCONTEXT_NORMAL when a buffer is acquired or allocated through [Local]BufferAlloc() and no BufferAccessStrategy is in use. When a BufferAccessStrategy is in use, shared buffers added to the strategy ring are counted as IOOP_EVICT IOOps in the IOCONTEXT_[BULKREAD|BULKWRITE|VACUUM] IOContext. When one of these buffers is reused, it is counted as an IOOP_REUSE IOOp in the corresponding strategy IOContext. IOOP_WRITE IOOps are counted in the BufferAccessStrategy IOContexts whenever the reused dirty buffer is written out. IO Operations on buffers containing temporary table data are counted as operations on IOOBJECT_TEMP_RELATION IOObjects. Stats on IOOps in all IOContexts for a given backend are counted in a backend's local memory. A subsequent commit will expose functions for aggregating and viewing these stats. Author: Melanie Plageman Reviewed-by: Andres Freund Reviewed-by: Justin Pryzby Reviewed-by: Kyotaro Horiguchi Reviewed-by: Maciek Sakrejda Reviewed-by: Lukas Fittl Discussion: https://www.postgresql.org/message-id/flat/20200124195226.lth52iydq2n2uilq%40alap3.anarazel.de --- src/backend/postmaster/checkpointer.c | 13 + src/backend/storage/buffer/bufmgr.c | 93 ++++++- src/backend/storage/buffer/freelist.c | 37 ++- src/backend/storage/buffer/localbuf.c | 4 + src/backend/storage/sync/sync.c | 2 + src/backend/utils/activity/Makefile | 1 + src/backend/utils/activity/meson.build | 1 + src/backend/utils/activity/pgstat_io_ops.c | 280 +++++++++++++++++++++ src/include/pgstat.h | 119 +++++++++ src/include/storage/buf_internals.h | 2 + src/include/storage/bufmgr.h | 7 +- src/tools/pgindent/typedefs.list | 6 + 12 files changed, 554 insertions(+), 11 deletions(-) create mode 100644 src/backend/utils/activity/pgstat_io_ops.c diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 5fc076fc14..783bca52fd 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -1116,6 +1116,19 @@ ForwardSyncRequest(const FileTag *ftag, SyncRequestType type) if (!AmBackgroundWriterProcess()) CheckpointerShmem->num_backend_fsync++; LWLockRelease(CheckpointerCommLock); + + /* + * We have no way of knowing if the current IOContext is + * IOCONTEXT_NORMAL or IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] at this + * point, so count the fsync as being in the IOCONTEXT_NORMAL + * IOContext. This is probably okay, because the number of backend + * fsyncs doesn't say anything about the efficacy of the + * BufferAccessStrategy. And counting both fsyncs done in + * IOCONTEXT_NORMAL and IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] under + * IOCONTEXT_NORMAL is likely clearer when investigating the number of + * backend fsyncs. + */ + pgstat_count_io_op(IOOP_FSYNC, IOOBJECT_RELATION, IOCONTEXT_NORMAL); return false; } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index fa32f24e19..f2e371e1d8 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -482,7 +482,8 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr); -static void FlushBuffer(BufferDesc *buf, SMgrRelation reln); +static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, + IOContext io_context, IOObject io_object); static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, @@ -823,6 +824,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BufferDesc *bufHdr; Block bufBlock; bool found; + IOContext io_context; + IOObject io_object; bool isExtend; bool isLocalBuf = SmgrIsTemp(smgr); @@ -986,10 +989,28 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, */ Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */ - bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr); + if (isLocalBuf) + { + bufBlock = LocalBufHdrGetBlock(bufHdr); + + /* + * Though a strategy object may be passed in, no strategy is employed + * when using local buffers. This could happen when doing, for + * example, CREATE TEMPORARY TABLE AS ... + */ + io_context = IOCONTEXT_NORMAL; + io_object = IOOBJECT_TEMP_RELATION; + } + else + { + bufBlock = BufHdrGetBlock(bufHdr); + io_context = IOContextForStrategy(strategy); + io_object = IOOBJECT_RELATION; + } if (isExtend) { + pgstat_count_io_op(IOOP_EXTEND, io_object, io_context); /* new buffers are zero-filled */ MemSet((char *) bufBlock, 0, BLCKSZ); /* don't set checksum for all-zero page */ @@ -1015,6 +1036,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, instr_time io_start, io_time; + pgstat_count_io_op(IOOP_READ, io_object, io_context); + if (track_io_timing) INSTR_TIME_SET_CURRENT(io_start); @@ -1122,6 +1145,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, bool *foundPtr) { bool from_ring; + IOContext io_context; BufferTag newTag; /* identity of requested block */ uint32 newHash; /* hash value for newTag */ LWLock *newPartitionLock; /* buffer partition lock for it */ @@ -1188,6 +1212,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, */ LWLockRelease(newPartitionLock); + io_context = IOContextForStrategy(strategy); + /* Loop here in case we have to try another victim buffer */ for (;;) { @@ -1264,13 +1290,35 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, } } + /* + * When a strategy is in use, only flushes of dirty buffers + * already in the strategy ring are counted as strategy writes + * (IOCONTEXT [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the + * purpose of IO operation statistics tracking. + * + * If a shared buffer initially added to the ring must be + * flushed before being used, this is counted as an + * IOCONTEXT_NORMAL IOOP_WRITE. + * + * If a shared buffer which was added to the ring later + * because the current strategy buffer is pinned or in use or + * because all strategy buffers were dirty and rejected (for + * BAS_BULKREAD operations only) requires flushing, this is + * counted as an IOCONTEXT_NORMAL IOOP_WRITE (from_ring will + * be false). + * + * When a strategy is not in use, the write can only be a + * "regular" write of a dirty shared buffer (IOCONTEXT_NORMAL + * IOOP_WRITE). + */ + /* OK, do the I/O */ TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum, smgr->smgr_rlocator.locator.spcOid, smgr->smgr_rlocator.locator.dbOid, smgr->smgr_rlocator.locator.relNumber); - FlushBuffer(buf, NULL); + FlushBuffer(buf, NULL, io_context, IOOBJECT_RELATION); LWLockRelease(BufferDescriptorGetContentLock(buf)); ScheduleBufferTagForWriteback(&BackendWritebackContext, @@ -1442,6 +1490,29 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, UnlockBufHdr(buf, buf_state); + if (oldFlags & BM_VALID) + { + /* + * When a BufferAccessStrategy is in use, blocks evicted from shared + * buffers are counted as IOOP_EVICT IO Operations in the + * corresponding context (e.g. IOCONTEXT_BULKWRITE). Shared buffers + * are evicted by a strategy in two cases: 1) while initially claiming + * buffers for the strategy ring 2) to replace an existing strategy + * ring buffer because it is pinned or in use and cannot be reused. + * + * Blocks evicted from buffers already in the strategy ring are + * counted as IOOP_REUSE IO Operations in the corresponding strategy + * context. + * + * At this point, we can accurately count evictions and reuses, + * because we have successfully claimed the valid buffer. Previously, + * we may have been forced to release the buffer due to concurrent + * pinners or erroring out. + */ + pgstat_count_io_op(from_ring ? IOOP_REUSE : IOOP_EVICT, + IOOBJECT_RELATION, io_context); + } + if (oldPartitionLock != NULL) { BufTableDelete(&oldTag, oldHash); @@ -2571,7 +2642,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context) PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, IOCONTEXT_NORMAL, IOOBJECT_RELATION); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); @@ -2821,7 +2892,7 @@ BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, * as the second parameter. If not, pass NULL. */ static void -FlushBuffer(BufferDesc *buf, SMgrRelation reln) +FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOContext io_context, IOObject io_object) { XLogRecPtr recptr; ErrorContextCallback errcallback; @@ -2901,6 +2972,8 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln) */ bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum); + pgstat_count_io_op(IOOP_WRITE, IOOBJECT_RELATION, io_context); + if (track_io_timing) INSTR_TIME_SET_CURRENT(io_start); @@ -3552,6 +3625,8 @@ FlushRelationBuffers(Relation rel) localpage, false); + pgstat_count_io_op(IOOP_WRITE, IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL); + buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED); pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); @@ -3587,7 +3662,7 @@ FlushRelationBuffers(Relation rel) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, RelationGetSmgr(rel)); + FlushBuffer(bufHdr, RelationGetSmgr(rel), IOCONTEXT_NORMAL, IOOBJECT_RELATION); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr); } @@ -3685,7 +3760,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, srelent->srel); + FlushBuffer(bufHdr, srelent->srel, IOCONTEXT_NORMAL, IOOBJECT_RELATION); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr); } @@ -3895,7 +3970,7 @@ FlushDatabaseBuffers(Oid dbid) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, IOCONTEXT_NORMAL, IOOBJECT_RELATION); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr); } @@ -3922,7 +3997,7 @@ FlushOneBuffer(Buffer buffer) Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr))); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, IOCONTEXT_NORMAL, IOOBJECT_RELATION); } /* diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 5299bb8711..d318976b9e 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -15,6 +15,7 @@ */ #include "postgres.h" +#include "pgstat.h" #include "port/atomics.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" @@ -601,7 +602,7 @@ FreeAccessStrategy(BufferAccessStrategy strategy) /* * GetBufferFromRing -- returns a buffer from the ring, or NULL if the - * ring is empty. + * ring is empty / not usable. * * The bufhdr spin lock is held on the returned buffer. */ @@ -664,6 +665,40 @@ AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf) strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf); } +/* + * Utility function returning the IOContext of a given BufferAccessStrategy's + * strategy ring. + */ +IOContext +IOContextForStrategy(BufferAccessStrategy strategy) +{ + if (!strategy) + return IOCONTEXT_NORMAL; + + switch (strategy->btype) + { + case BAS_NORMAL: + + /* + * Currently, GetAccessStrategy() returns NULL for + * BufferAccessStrategyType BAS_NORMAL, so this case is + * unreachable. + */ + pg_unreachable(); + return IOCONTEXT_NORMAL; + case BAS_BULKREAD: + return IOCONTEXT_BULKREAD; + case BAS_BULKWRITE: + return IOCONTEXT_BULKWRITE; + case BAS_VACUUM: + return IOCONTEXT_VACUUM; + } + + elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype); + + pg_unreachable(); +} + /* * StrategyRejectBuffer -- consider rejecting a dirty buffer * diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 30d67d1c40..e27d623174 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -18,6 +18,7 @@ #include "access/parallel.h" #include "catalog/catalog.h" #include "executor/instrument.h" +#include "pgstat.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" #include "utils/guc_hooks.h" @@ -226,6 +227,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, localpage, false); + pgstat_count_io_op(IOOP_WRITE, IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL); + /* Mark not-dirty now in case we error out below */ buf_state &= ~BM_DIRTY; pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); @@ -256,6 +259,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, ClearBufferTag(&bufHdr->tag); buf_state &= ~(BM_VALID | BM_TAG_VALID); pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); + pgstat_count_io_op(IOOP_EVICT, IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL); } hresult = (LocalBufferLookupEnt *) diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c index 9d6a9e9109..684a1c3e21 100644 --- a/src/backend/storage/sync/sync.c +++ b/src/backend/storage/sync/sync.c @@ -432,6 +432,8 @@ ProcessSyncRequests(void) total_elapsed += elapsed; processed++; + pgstat_count_io_op(IOOP_FSYNC, IOOBJECT_RELATION, IOCONTEXT_NORMAL); + if (log_checkpoints) elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f ms", processed, diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile index a2e8507fd6..0098785089 100644 --- a/src/backend/utils/activity/Makefile +++ b/src/backend/utils/activity/Makefile @@ -22,6 +22,7 @@ OBJS = \ pgstat_checkpointer.o \ pgstat_database.o \ pgstat_function.o \ + pgstat_io_ops.o \ pgstat_relation.o \ pgstat_replslot.o \ pgstat_shmem.o \ diff --git a/src/backend/utils/activity/meson.build b/src/backend/utils/activity/meson.build index 5b3b558a67..1038324c32 100644 --- a/src/backend/utils/activity/meson.build +++ b/src/backend/utils/activity/meson.build @@ -7,6 +7,7 @@ backend_sources += files( 'pgstat_checkpointer.c', 'pgstat_database.c', 'pgstat_function.c', + 'pgstat_io_ops.c', 'pgstat_relation.c', 'pgstat_replslot.c', 'pgstat_shmem.c', diff --git a/src/backend/utils/activity/pgstat_io_ops.c b/src/backend/utils/activity/pgstat_io_ops.c new file mode 100644 index 0000000000..6fbb6b185e --- /dev/null +++ b/src/backend/utils/activity/pgstat_io_ops.c @@ -0,0 +1,280 @@ +/* ------------------------------------------------------------------------- + * + * pgstat_io_ops.c + * Implementation of IO operation statistics. + * + * This file contains the implementation of IO operation statistics. It is kept + * separate from pgstat.c to enforce the line between the statistics access / + * storage implementation and the details about individual types of + * statistics. + * + * Copyright (c) 2021-2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/activity/pgstat_io_ops.c + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "utils/pgstat_internal.h" + +static PgStat_IOContextOps pending_IOOpStats; + +void +pgstat_count_io_op(IOOp io_op, IOObject io_object, IOContext io_context) +{ + PgStat_IOOpCounters *pending_counters; + + Assert(io_context < IOCONTEXT_NUM_TYPES); + Assert(io_op < IOOP_NUM_TYPES); + Assert(pgstat_expect_io_op(MyBackendType, io_context, io_object, io_op)); + + pending_counters = &pending_IOOpStats.data[io_context].data[io_object]; + + switch (io_op) + { + case IOOP_EVICT: + pending_counters->evictions++; + break; + case IOOP_EXTEND: + pending_counters->extends++; + break; + case IOOP_FSYNC: + pending_counters->fsyncs++; + break; + case IOOP_READ: + pending_counters->reads++; + break; + case IOOP_REUSE: + pending_counters->reuses++; + break; + case IOOP_WRITE: + pending_counters->writes++; + break; + } +} + +const char * +pgstat_io_context_desc(IOContext io_context) +{ + switch (io_context) + { + case IOCONTEXT_BULKREAD: + return "bulkread"; + case IOCONTEXT_BULKWRITE: + return "bulkwrite"; + case IOCONTEXT_NORMAL: + return "normal"; + case IOCONTEXT_VACUUM: + return "vacuum"; + } + + elog(ERROR, "unrecognized IOContext value: %d", io_context); + + pg_unreachable(); +} + +const char * +pgstat_io_object_desc(IOObject io_object) +{ + switch (io_object) + { + case IOOBJECT_RELATION: + return "relation"; + case IOOBJECT_TEMP_RELATION: + return "temp relation"; + } + + elog(ERROR, "unrecognized IOObject value: %d", io_object); + + pg_unreachable(); +} + +const char * +pgstat_io_op_desc(IOOp io_op) +{ + switch (io_op) + { + case IOOP_EVICT: + return "evicted"; + case IOOP_EXTEND: + return "extended"; + case IOOP_FSYNC: + return "files synced"; + case IOOP_READ: + return "read"; + case IOOP_REUSE: + return "reused"; + case IOOP_WRITE: + return "written"; + } + + elog(ERROR, "unrecognized IOOp value: %d", io_op); + + pg_unreachable(); +} + +/* +* IO Operation statistics are not collected for all BackendTypes. +* +* The following BackendTypes do not participate in the cumulative stats +* subsystem or do not perform IO operations on which we currently report: +* - Syslogger because it is not connected to shared memory +* - Archiver because most relevant archiving IO is delegated to a +* specialized command or module +* - WAL Receiver and WAL Writer IO is not tracked in pg_stat_io for now +* +* Function returns true if BackendType participates in the cumulative stats +* subsystem for IO Operations and false if it does not. +*/ +bool +pgstat_io_op_stats_collected(BackendType bktype) +{ + switch (bktype) + { + case B_INVALID: + case B_ARCHIVER: + case B_LOGGER: + case B_WAL_RECEIVER: + case B_WAL_WRITER: + return false; + default: + return true; + } +} + + +/* + * Some BackendTypes do not perform IO operations in certain IOContexts. Some + * IOObjects are never operated on in some IOContexts. Check that the given + * BackendType is expected to do IO in the given IOContext and that the given + * IOObject is expected to be operated on in the given IOContext. + */ +bool +pgstat_bktype_io_context_io_object_valid(BackendType bktype, + IOContext io_context, IOObject io_object) +{ + bool no_temp_rel; + + /* + * Currently, IO operations on temporary relations can only occur in the + * IOCONTEXT_NORMAL IOContext. + */ + if (io_context != IOCONTEXT_NORMAL && + io_object == IOOBJECT_TEMP_RELATION) + return false; + + /* + * In core Postgres, only regular backends and WAL Sender processes + * executing queries will use local buffers and operate on temporary + * relations. Parallel workers will not use local buffers (see + * InitLocalBuffers()); however, extensions leveraging background workers + * have no such limitation, so track IO Operations on + * IOOBJECT_TEMP_RELATION for BackendType B_BG_WORKER. + */ + no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || + bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER || + bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP; + + if (no_temp_rel && io_context == IOCONTEXT_NORMAL && + io_object == IOOBJECT_TEMP_RELATION) + return false; + + /* + * Some BackendTypes do not currently perform any IO operations in certain + * IOContexts, and, while it may not be inherently incorrect for them to + * do so, excluding those rows from the view makes the view easier to use. + */ + if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) && + (io_context == IOCONTEXT_BULKREAD || + io_context == IOCONTEXT_BULKWRITE || + io_context == IOCONTEXT_VACUUM)) + return false; + + if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM) + return false; + + if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) && + io_context == IOCONTEXT_BULKWRITE) + return false; + + return true; +} + +/* + * Some BackendTypes will never do certain IOOps and some IOOps should not + * occur in certain IOContexts. Check that the given IOOp is valid for the + * given BackendType in the given IOContext. Note that there are currently no + * cases of an IOOp being invalid for a particular BackendType only within a + * certain IOContext. + */ +bool +pgstat_io_op_valid(BackendType bktype, IOContext io_context, IOObject io_object, IOOp io_op) +{ + bool strategy_io_context; + + /* + * Some BackendTypes should never track IO Operation statistics. + */ + Assert(pgstat_io_op_stats_collected(bktype)); + + /* + * Some BackendTypes will not do certain IOOps. + */ + if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) && + (io_op == IOOP_READ || io_op == IOOP_EVICT)) + return false; + + if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || + bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND) + return false; + + /* + * Some IOOps are not valid in certain IOContexts and some IOOps are only + * valid in certain contexts. + */ + if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND) + return false; + + strategy_io_context = io_context == IOCONTEXT_BULKREAD || + io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM; + + /* + * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use. + */ + if (!strategy_io_context && io_op == IOOP_REUSE) + return false; + + /* + * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are + * counted in the IOCONTEXT_NORMAL IOContext. See comment in + * ForwardSyncRequest() for more details. + */ + if (strategy_io_context && io_op == IOOP_FSYNC) + return false; + + /* + * Temporary tables are not logged and thus do not require fsync'ing. + */ + if (io_context == IOCONTEXT_NORMAL && + io_object == IOOBJECT_TEMP_RELATION && io_op == IOOP_FSYNC) + return false; + + return true; +} + +bool +pgstat_expect_io_op(BackendType bktype, IOContext io_context, IOObject io_object, IOOp io_op) +{ + if (!pgstat_io_op_stats_collected(bktype)) + return false; + + if (!pgstat_bktype_io_context_io_object_valid(bktype, io_context, io_object)) + return false; + + if (!pgstat_io_op_valid(bktype, io_context, io_object, io_op)) + return false; + + return true; +} diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 9e2ce6f011..a57e39042f 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -276,6 +276,63 @@ typedef struct PgStat_CheckpointerStats PgStat_Counter buf_fsync_backend; } PgStat_CheckpointerStats; +/* + * Types related to counting IO Operations for various IO Contexts. + * When adding a new value, ensure that the proper assertions are added to + * pgstat_io_context_ops_assert_zero() and pgstat_io_op_assert_zero() (though + * the compiler will remind you about the latter). + */ + +typedef enum IOOp +{ + IOOP_EVICT, + IOOP_EXTEND, + IOOP_FSYNC, + IOOP_READ, + IOOP_REUSE, + IOOP_WRITE, +} IOOp; + +#define IOOP_NUM_TYPES (IOOP_WRITE + 1) + +typedef enum IOObject +{ + IOOBJECT_RELATION, + IOOBJECT_TEMP_RELATION, +} IOObject; + +#define IOOBJECT_NUM_TYPES (IOOBJECT_TEMP_RELATION + 1) + +typedef enum IOContext +{ + IOCONTEXT_BULKREAD, + IOCONTEXT_BULKWRITE, + IOCONTEXT_NORMAL, + IOCONTEXT_VACUUM, +} IOContext; + +#define IOCONTEXT_NUM_TYPES (IOCONTEXT_VACUUM + 1) + +typedef struct PgStat_IOOpCounters +{ + PgStat_Counter evictions; + PgStat_Counter extends; + PgStat_Counter fsyncs; + PgStat_Counter reads; + PgStat_Counter reuses; + PgStat_Counter writes; +} PgStat_IOOpCounters; + +typedef struct PgStat_IOObjectOps +{ + PgStat_IOOpCounters data[IOOBJECT_NUM_TYPES]; +} PgStat_IOObjectOps; + +typedef struct PgStat_IOContextOps +{ + PgStat_IOObjectOps data[IOCONTEXT_NUM_TYPES]; +} PgStat_IOContextOps; + typedef struct PgStat_StatDBEntry { PgStat_Counter n_xact_commit; @@ -453,6 +510,68 @@ extern void pgstat_report_checkpointer(void); extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void); +/* + * Functions in pgstat_io_ops.c + */ + +extern void pgstat_count_io_op(IOOp io_op, IOObject io_object, IOContext io_context); +extern const char *pgstat_io_context_desc(IOContext io_context); +extern const char *pgstat_io_object_desc(IOObject io_object); +extern const char *pgstat_io_op_desc(IOOp io_op); + +/* Validation functions in pgstat_io_ops.c */ +extern bool pgstat_io_op_stats_collected(BackendType bktype); +extern bool pgstat_bktype_io_context_io_object_valid(BackendType bktype, + IOContext io_context, IOObject io_object); +extern bool pgstat_io_op_valid(BackendType bktype, IOContext io_context, + IOObject io_object, IOOp io_op); +extern bool pgstat_expect_io_op(BackendType bktype, + IOContext io_context, IOObject io_object, IOOp io_op); + +/* + * Functions to assert that invalid IO Operation counters are zero. + */ +static inline void +pgstat_io_context_ops_assert_zero(PgStat_IOOpCounters *counters) +{ + Assert(counters->evictions == 0); + Assert(counters->extends == 0); + Assert(counters->fsyncs == 0); + Assert(counters->reads == 0); + Assert(counters->reuses == 0); + Assert(counters->writes == 0); +} + +static inline void +pgstat_io_op_assert_zero(PgStat_IOOpCounters *counters, IOOp io_op) +{ + switch (io_op) + { + case IOOP_EVICT: + Assert(counters->evictions == 0); + return; + case IOOP_EXTEND: + Assert(counters->extends == 0); + return; + case IOOP_FSYNC: + Assert(counters->fsyncs == 0); + return; + case IOOP_READ: + Assert(counters->reads == 0); + return; + case IOOP_REUSE: + Assert(counters->reuses == 0); + return; + case IOOP_WRITE: + Assert(counters->writes == 0); + return; + } + + /* Should not reach here */ + Assert(false); +} + + /* * Functions in pgstat_database.c */ diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 7b67250747..0c80ec9230 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -15,6 +15,7 @@ #ifndef BUFMGR_INTERNALS_H #define BUFMGR_INTERNALS_H +#include "pgstat.h" #include "port/atomics.h" #include "storage/buf.h" #include "storage/bufmgr.h" @@ -391,6 +392,7 @@ extern void IssuePendingWritebacks(WritebackContext *context); extern void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag *tag); /* freelist.c */ +extern IOContext IOContextForStrategy(BufferAccessStrategy bas); extern BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring); extern void StrategyFreeBuffer(BufferDesc *buf); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index e1bd22441b..206f4c0b3e 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -23,7 +23,12 @@ typedef void *Block; -/* Possible arguments for GetAccessStrategy() */ +/* + * Possible arguments for GetAccessStrategy(). + * + * If adding a new BufferAccessStrategyType, also add a new IOContext so + * statistics on IO operations using this strategy are tracked. + */ typedef enum BufferAccessStrategyType { BAS_NORMAL, /* Normal random access */ diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 58daeca831..28362f00a4 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1105,7 +1105,10 @@ ID INFIX INT128 INTERFACE_INFO +IOContext IOFuncSelector +IOObject +IOOp IPCompareMethod ITEM IV @@ -2025,6 +2028,9 @@ PgStat_FetchConsistency PgStat_FunctionCallUsage PgStat_FunctionCounts PgStat_HashKey +PgStat_IOContextOps +PgStat_IOObjectOps +PgStat_IOOpCounters PgStat_Kind PgStat_KindInfo PgStat_LocalState -- 2.38.1