From e87831a0ffe94af54b91285630dd6f1c497c368a Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Wed, 4 Jan 2023 17:20:41 -0500 Subject: [PATCH v45 2/5] pgstat: Infrastructure to track IO operations Introduce "IOOp", an IO operation done by a backend, "IOObject", the target object of the IO, and "IOContext", the context or location of the IO operations on that object. For example, the checkpointer may write a shared buffer out. This would be considered an IOOp "written" on an IOObject IOOBJECT_RELATION in IOContext IOCONTEXT_NORMAL by BackendType "checkpointer". Each IOOp (evict, extend, fsync, read, reuse, and write) can be counted per IOObject (relation, temp relation) per IOContext (normal, bulkread, bulkwrite, or vacuum) through a call to pgstat_count_io_op(). Note that this commit introduces the infrastructure to count IO Operation statistics. A subsequent commit will add calls to pgstat_count_io_op() in the appropriate locations. IOContext IOCONTEXT_NORMAL concerns operations on local and shared buffers, while IOCONTEXT_BULKREAD, IOCONTEXT_BULKWRITE, and IOCONTEXT_VACUUM IOContexts concern IO operations on buffers as part of a BufferAccessStrategy. IOObject IOOBJECT_TEMP_RELATION concerns IO Operations on buffers containing temporary table data, while IOObject IOOBJECT_RELATION concerns IO Operations on buffers containing permanent relation data. Stats on IOOps on all IOObjects in all IOContexts for a given backend are first counted in a backend's local memory and then flushed to shared memory and accumulated with those from all other backends, exited and live. Some BackendTypes will not flush their pending statistics at regular intervals and explicitly call pgstat_flush_io_ops() during the course of normal operations to flush their backend-local IO operation statistics to shared memory in a timely manner. Because not all BackendType, IOOp, IOObject, IOContext combinations are valid, the validity of the stats is checked before flushing pending stats and before reading in the existing stats file to shared memory. The aggregated stats in shared memory could be extended in the future with per-backend stats -- useful for per connection IO statistics and monitoring. Author: Melanie Plageman Reviewed-by: Andres Freund Reviewed-by: Justin Pryzby Reviewed-by: Kyotaro Horiguchi Discussion: https://www.postgresql.org/message-id/flat/20200124195226.lth52iydq2n2uilq%40alap3.anarazel.de --- doc/src/sgml/monitoring.sgml | 2 + src/backend/utils/activity/Makefile | 1 + src/backend/utils/activity/meson.build | 1 + src/backend/utils/activity/pgstat.c | 26 ++ src/backend/utils/activity/pgstat_bgwriter.c | 7 +- .../utils/activity/pgstat_checkpointer.c | 7 +- src/backend/utils/activity/pgstat_io.c | 400 ++++++++++++++++++ src/backend/utils/activity/pgstat_relation.c | 15 +- src/backend/utils/activity/pgstat_shmem.c | 4 + src/backend/utils/activity/pgstat_wal.c | 4 +- src/backend/utils/adt/pgstatfuncs.c | 4 +- src/include/miscadmin.h | 2 + src/include/pgstat.h | 67 +++ src/include/utils/pgstat_internal.h | 32 ++ src/tools/pgindent/typedefs.list | 6 + 15 files changed, 572 insertions(+), 6 deletions(-) create mode 100644 src/backend/utils/activity/pgstat_io.c diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index cf220c3bcb..1691246e76 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -5408,6 +5408,8 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i the pg_stat_bgwriter view, archiver to reset all the counters shown in the pg_stat_archiver view, + io to reset all the counters shown in the + pg_stat_io view, wal to reset all the counters shown in the pg_stat_wal view or recovery_prefetch to reset all the counters shown diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile index a80eda3cf4..7d7482dde0 100644 --- a/src/backend/utils/activity/Makefile +++ b/src/backend/utils/activity/Makefile @@ -22,6 +22,7 @@ OBJS = \ pgstat_checkpointer.o \ pgstat_database.o \ pgstat_function.o \ + pgstat_io.o \ pgstat_relation.o \ pgstat_replslot.o \ pgstat_shmem.o \ diff --git a/src/backend/utils/activity/meson.build b/src/backend/utils/activity/meson.build index a2b872c24b..518ee3f798 100644 --- a/src/backend/utils/activity/meson.build +++ b/src/backend/utils/activity/meson.build @@ -9,6 +9,7 @@ backend_sources += files( 'pgstat_checkpointer.c', 'pgstat_database.c', 'pgstat_function.c', + 'pgstat_io.c', 'pgstat_relation.c', 'pgstat_replslot.c', 'pgstat_shmem.c', diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 0fa5370bcd..608c3b59da 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -72,6 +72,7 @@ * - pgstat_checkpointer.c * - pgstat_database.c * - pgstat_function.c + * - pgstat_io.c * - pgstat_relation.c * - pgstat_replslot.c * - pgstat_slru.c @@ -359,6 +360,15 @@ static const PgStat_KindInfo pgstat_kind_infos[PGSTAT_NUM_KINDS] = { .snapshot_cb = pgstat_checkpointer_snapshot_cb, }, + [PGSTAT_KIND_IO] = { + .name = "io_ops", + + .fixed_amount = true, + + .reset_all_cb = pgstat_io_reset_all_cb, + .snapshot_cb = pgstat_io_snapshot_cb, + }, + [PGSTAT_KIND_SLRU] = { .name = "slru", @@ -582,6 +592,7 @@ pgstat_report_stat(bool force) /* Don't expend a clock check if nothing to do */ if (dlist_is_empty(&pgStatPending) && + !have_iostats && !have_slrustats && !pgstat_have_pending_wal()) { @@ -628,6 +639,9 @@ pgstat_report_stat(bool force) /* flush database / relation / function / ... stats */ partial_flush |= pgstat_flush_pending_entries(nowait); + /* flush IO stats */ + partial_flush |= pgstat_flush_io(nowait); + /* flush wal stats */ partial_flush |= pgstat_flush_wal(nowait); @@ -1322,6 +1336,12 @@ pgstat_write_statsfile(void) pgstat_build_snapshot_fixed(PGSTAT_KIND_CHECKPOINTER); write_chunk_s(fpout, &pgStatLocal.snapshot.checkpointer); + /* + * Write IO stats struct + */ + pgstat_build_snapshot_fixed(PGSTAT_KIND_IO); + write_chunk_s(fpout, &pgStatLocal.snapshot.io); + /* * Write SLRU stats struct */ @@ -1496,6 +1516,12 @@ pgstat_read_statsfile(void) if (!read_chunk_s(fpin, &shmem->checkpointer.stats)) goto error; + /* + * Read IO stats struct + */ + if (!read_chunk_s(fpin, &shmem->io.stats)) + goto error; + /* * Read SLRU stats struct */ diff --git a/src/backend/utils/activity/pgstat_bgwriter.c b/src/backend/utils/activity/pgstat_bgwriter.c index 9247f2dda2..92be384b0d 100644 --- a/src/backend/utils/activity/pgstat_bgwriter.c +++ b/src/backend/utils/activity/pgstat_bgwriter.c @@ -24,7 +24,7 @@ PgStat_BgWriterStats PendingBgWriterStats = {0}; /* - * Report bgwriter statistics + * Report bgwriter and IO statistics */ void pgstat_report_bgwriter(void) @@ -56,6 +56,11 @@ pgstat_report_bgwriter(void) * Clear out the statistics buffer, so it can be re-used. */ MemSet(&PendingBgWriterStats, 0, sizeof(PendingBgWriterStats)); + + /* + * Report IO statistics + */ + pgstat_flush_io(false); } /* diff --git a/src/backend/utils/activity/pgstat_checkpointer.c b/src/backend/utils/activity/pgstat_checkpointer.c index 3e9ab45103..26dec112f6 100644 --- a/src/backend/utils/activity/pgstat_checkpointer.c +++ b/src/backend/utils/activity/pgstat_checkpointer.c @@ -24,7 +24,7 @@ PgStat_CheckpointerStats PendingCheckpointerStats = {0}; /* - * Report checkpointer statistics + * Report checkpointer and IO statistics */ void pgstat_report_checkpointer(void) @@ -62,6 +62,11 @@ pgstat_report_checkpointer(void) * Clear out the statistics buffer, so it can be re-used. */ MemSet(&PendingCheckpointerStats, 0, sizeof(PendingCheckpointerStats)); + + /* + * Report IO statistics + */ + pgstat_flush_io(false); } /* diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c new file mode 100644 index 0000000000..8eac7d9e53 --- /dev/null +++ b/src/backend/utils/activity/pgstat_io.c @@ -0,0 +1,400 @@ +/* ------------------------------------------------------------------------- + * + * pgstat_io.c + * Implementation of IO statistics. + * + * This file contains the implementation of IO statistics. It is kept separate + * from pgstat.c to enforce the line between the statistics access / storage + * implementation and the details about individual types of statistics. + * + * Copyright (c) 2021-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/activity/pgstat_io.c + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "utils/pgstat_internal.h" + + +static PgStat_BackendIO PendingIOStats; +bool have_iostats = false; + + +void +pgstat_count_io_op(IOOp io_op, IOObject io_object, IOContext io_context) +{ + Assert(io_context < IOCONTEXT_NUM_TYPES); + Assert(io_object < IOOBJECT_NUM_TYPES); + Assert(io_op < IOOP_NUM_TYPES); + Assert(pgstat_tracks_io_op(MyBackendType, io_context, io_object, io_op)); + + PendingIOStats.data[io_context][io_object][io_op]++; + + have_iostats = true; +} + +PgStat_IO * +pgstat_fetch_stat_io(void) +{ + pgstat_snapshot_fixed(PGSTAT_KIND_IO); + + return &pgStatLocal.snapshot.io; +} + +/* + * Flush out locally pending IO statistics + * + * If no stats have been recorded, this function returns false. + * + * If nowait is true, this function returns true if the lock could not be + * acquired. Otherwise, return false. + */ +bool +pgstat_flush_io(bool nowait) +{ + LWLock *bktype_lock; + PgStat_BackendIO *bktype_shstats; + + if (!have_iostats) + return false; + + bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType]; + bktype_shstats = + &pgStatLocal.shmem->io.stats.stats[MyBackendType]; + + if (!nowait) + LWLockAcquire(bktype_lock, LW_EXCLUSIVE); + else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE)) + return true; + + for (IOContext io_context = IOCONTEXT_FIRST; + io_context < IOCONTEXT_NUM_TYPES; io_context++) + for (IOObject io_object = IOOBJECT_FIRST; + io_object < IOOBJECT_NUM_TYPES; io_object++) + for (IOOp io_op = IOOP_FIRST; + io_op < IOOP_NUM_TYPES; io_op++) + bktype_shstats->data[io_context][io_object][io_op] += + PendingIOStats.data[io_context][io_object][io_op]; + + Assert(pgstat_bktype_io_stats_valid(bktype_shstats, MyBackendType)); + + LWLockRelease(bktype_lock); + + memset(&PendingIOStats, 0, sizeof(PendingIOStats)); + + have_iostats = false; + + return false; +} + +const char * +pgstat_get_io_context_name(IOContext io_context) +{ + switch (io_context) + { + case IOCONTEXT_BULKREAD: + return "bulkread"; + case IOCONTEXT_BULKWRITE: + return "bulkwrite"; + case IOCONTEXT_NORMAL: + return "normal"; + case IOCONTEXT_VACUUM: + return "vacuum"; + } + + elog(ERROR, "unrecognized IOContext value: %d", io_context); + pg_unreachable(); +} + +const char * +pgstat_get_io_object_name(IOObject io_object) +{ + switch (io_object) + { + case IOOBJECT_RELATION: + return "relation"; + case IOOBJECT_TEMP_RELATION: + return "temp relation"; + } + + elog(ERROR, "unrecognized IOObject value: %d", io_object); + pg_unreachable(); +} + +const char * +pgstat_get_io_op_name(IOOp io_op) +{ + switch (io_op) + { + case IOOP_EVICT: + return "evicted"; + case IOOP_EXTEND: + return "extended"; + case IOOP_FSYNC: + return "files synced"; + case IOOP_READ: + return "read"; + case IOOP_REUSE: + return "reused"; + case IOOP_WRITE: + return "written"; + } + + elog(ERROR, "unrecognized IOOp value: %d", io_op); + pg_unreachable(); +} + +void +pgstat_io_reset_all_cb(TimestampTz ts) +{ + for (int i = 0; i < BACKEND_NUM_TYPES; i++) + { + LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i]; + PgStat_BackendIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i]; + + LWLockAcquire(bktype_lock, LW_EXCLUSIVE); + + /* + * Use the lock in the first BackendType's PgStat_BackendIO to protect + * the reset timestamp as well. + */ + if (i == 0) + pgStatLocal.shmem->io.stats.stat_reset_timestamp = ts; + + memset(bktype_shstats, 0, sizeof(*bktype_shstats)); + LWLockRelease(bktype_lock); + } +} + +void +pgstat_io_snapshot_cb(void) +{ + for (int i = 0; i < BACKEND_NUM_TYPES; i++) + { + LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i]; + PgStat_BackendIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i]; + PgStat_BackendIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i]; + + LWLockAcquire(bktype_lock, LW_SHARED); + + /* + * Use the lock in the first BackendType's PgStat_BackendIO to protect + * the reset timestamp as well. + */ + if (i == 0) + pgStatLocal.snapshot.io.stat_reset_timestamp = + pgStatLocal.shmem->io.stats.stat_reset_timestamp; + + /* using struct assignment due to better type safety */ + *bktype_snap = *bktype_shstats; + LWLockRelease(bktype_lock); + } +} + +/* +* IO statistics are not collected for all BackendTypes. +* +* The following BackendTypes do not participate in the cumulative stats +* subsystem or do not perform IO on which we currently track: +* - Syslogger because it is not connected to shared memory +* - Archiver because most relevant archiving IO is delegated to a +* specialized command or module +* - WAL Receiver and WAL Writer IO is not tracked in pg_stat_io for now +* +* Function returns true if BackendType participates in the cumulative stats +* subsystem for IO and false if it does not. +*/ +bool +pgstat_tracks_io_bktype(BackendType bktype) +{ + /* + * List every type so that new backend types trigger a warning about + * needing to adjust this switch. + */ + switch (bktype) + { + case B_INVALID: + case B_ARCHIVER: + case B_LOGGER: + case B_WAL_RECEIVER: + case B_WAL_WRITER: + return false; + + case B_AUTOVAC_LAUNCHER: + case B_AUTOVAC_WORKER: + case B_BACKEND: + case B_BG_WORKER: + case B_BG_WRITER: + case B_CHECKPOINTER: + case B_STANDALONE_BACKEND: + case B_STARTUP: + case B_WAL_SENDER: + return true; + } + + return false; +} + +/* + * Some BackendTypes do not perform IO in certain IOContexts. Some IOObjects + * are never operated on in some IOContexts. Check that the given BackendType + * is expected to do IO in the given IOContext and that the given IOObject is + * expected to be operated on in the given IOContext. + */ +bool +pgstat_tracks_io_object(BackendType bktype, IOContext io_context, + IOObject io_object) +{ + bool no_temp_rel; + + /* + * Some BackendTypes should never track IO statistics. + */ + if (!pgstat_tracks_io_bktype(bktype)) + return false; + + /* + * Currently, IO on temporary relations can only occur in the + * IOCONTEXT_NORMAL IOContext. + */ + if (io_context != IOCONTEXT_NORMAL && + io_object == IOOBJECT_TEMP_RELATION) + return false; + + /* + * In core Postgres, only regular backends and WAL Sender processes + * executing queries will use local buffers and operate on temporary + * relations. Parallel workers will not use local buffers (see + * InitLocalBuffers()); however, extensions leveraging background workers + * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for + * BackendType B_BG_WORKER. + */ + no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || + bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER || + bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP; + + if (no_temp_rel && io_context == IOCONTEXT_NORMAL && + io_object == IOOBJECT_TEMP_RELATION) + return false; + + /* + * Some BackendTypes do not currently perform any IO in certain + * IOContexts, and, while it may not be inherently incorrect for them to + * do so, excluding those rows from the view makes the view easier to use. + */ + if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) && + (io_context == IOCONTEXT_BULKREAD || + io_context == IOCONTEXT_BULKWRITE || + io_context == IOCONTEXT_VACUUM)) + return false; + + if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM) + return false; + + if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) && + io_context == IOCONTEXT_BULKWRITE) + return false; + + return true; +} + +/* + * Some BackendTypes will never do certain IOOps and some IOOps should not + * occur in certain IOContexts. Check that the given IOOp is valid for the + * given BackendType in the given IOContext. Note that there are currently no + * cases of an IOOp being invalid for a particular BackendType only within a + * certain IOContext. + */ +bool +pgstat_tracks_io_op(BackendType bktype, IOContext io_context, + IOObject io_object, IOOp io_op) +{ + bool strategy_io_context; + + /* if (io_context, io_object) will never collect stats, we're done */ + if (!pgstat_tracks_io_object(bktype, io_context, io_object)) + return false; + + /* + * Some BackendTypes will not do certain IOOps. + */ + if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) && + (io_op == IOOP_READ || io_op == IOOP_EVICT)) + return false; + + if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || + bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND) + return false; + + /* + * Some IOOps are not valid in certain IOContexts and some IOOps are only + * valid in certain contexts. + */ + if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND) + return false; + + strategy_io_context = io_context == IOCONTEXT_BULKREAD || + io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM; + + /* + * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use. + */ + if (!strategy_io_context && io_op == IOOP_REUSE) + return false; + + /* + * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are + * counted in the IOCONTEXT_NORMAL IOContext. See comment in + * register_dirty_segment() for more details. + */ + if (strategy_io_context && io_op == IOOP_FSYNC) + return false; + + /* + * Temporary tables are not logged and thus do not require fsync'ing. + */ + if (io_context == IOCONTEXT_NORMAL && + io_object == IOOBJECT_TEMP_RELATION && io_op == IOOP_FSYNC) + return false; + + return true; +} + +/* + * Check that stats have not been counted for any combination of IOContext, + * IOObject, and IOOp which are not tracked for the passed-in BackendType. The + * passed-in PgStat_BackendIO must contain stats from the BackendType specified + * by the second parameter. Caller is responsible for locking the passed-in + * PgStat_BackendIO, if needed. + */ +bool +pgstat_bktype_io_stats_valid(PgStat_BackendIO *backend_io, + BackendType bktype) +{ + bool bktype_tracked = pgstat_tracks_io_bktype(bktype); + + for (IOContext io_context = IOCONTEXT_FIRST; + io_context < IOCONTEXT_NUM_TYPES; io_context++) + { + for (IOObject io_object = IOOBJECT_FIRST; + io_object < IOOBJECT_NUM_TYPES; io_object++) + { + /* + * Don't bother trying to skip to the next loop iteration if + * pgstat_tracks_io_object() would return false here. We still + * need to validate that each counter is zero anyway. + */ + for (IOOp io_op = IOOP_FIRST; io_op < IOOP_NUM_TYPES; io_op++) + { + if ((!bktype_tracked || !pgstat_tracks_io_op(bktype, io_context, io_object, io_op)) && + backend_io->data[io_context][io_object][io_op] != 0) + return false; + } + } + } + + return true; +} diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index 2e20b93c20..f793ac1516 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -206,7 +206,7 @@ pgstat_drop_relation(Relation rel) } /* - * Report that the table was just vacuumed. + * Report that the table was just vacuumed and flush IO statistics. */ void pgstat_report_vacuum(Oid tableoid, bool shared, @@ -258,10 +258,18 @@ pgstat_report_vacuum(Oid tableoid, bool shared, } pgstat_unlock_entry(entry_ref); + + /* + * Flush IO statistics now. pgstat_report_stat() will flush IO stats, + * however this will not be called until after an entire autovacuum cycle + * is done -- which will likely vacuum many relations -- or until the + * VACUUM command has processed all tables and committed. + */ + pgstat_flush_io(false); } /* - * Report that the table was just analyzed. + * Report that the table was just analyzed and flush IO statistics. * * Caller must provide new live- and dead-tuples estimates, as well as a * flag indicating whether to reset the mod_since_analyze counter. @@ -341,6 +349,9 @@ pgstat_report_analyze(Relation rel, } pgstat_unlock_entry(entry_ref); + + /* see pgstat_report_vacuum() */ + pgstat_flush_io(false); } /* diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c index c1506b53d0..09fffd0e82 100644 --- a/src/backend/utils/activity/pgstat_shmem.c +++ b/src/backend/utils/activity/pgstat_shmem.c @@ -202,6 +202,10 @@ StatsShmemInit(void) LWLockInitialize(&ctl->checkpointer.lock, LWTRANCHE_PGSTATS_DATA); LWLockInitialize(&ctl->slru.lock, LWTRANCHE_PGSTATS_DATA); LWLockInitialize(&ctl->wal.lock, LWTRANCHE_PGSTATS_DATA); + + for (int i = 0; i < BACKEND_NUM_TYPES; i++) + LWLockInitialize(&ctl->io.locks[i], + LWTRANCHE_PGSTATS_DATA); } else { diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c index e7a82b5fed..e8598b2f4e 100644 --- a/src/backend/utils/activity/pgstat_wal.c +++ b/src/backend/utils/activity/pgstat_wal.c @@ -34,7 +34,7 @@ static WalUsage prevWalUsage; /* * Calculate how much WAL usage counters have increased and update - * shared statistics. + * shared WAL and IO statistics. * * Must be called by processes that generate WAL, that do not call * pgstat_report_stat(), like walwriter. @@ -43,6 +43,8 @@ void pgstat_report_wal(bool force) { pgstat_flush_wal(force); + + pgstat_flush_io(force); } /* diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 58bd1360b9..42b890b806 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1593,6 +1593,8 @@ pg_stat_reset_shared(PG_FUNCTION_ARGS) pgstat_reset_of_kind(PGSTAT_KIND_BGWRITER); pgstat_reset_of_kind(PGSTAT_KIND_CHECKPOINTER); } + else if (strcmp(target, "io") == 0) + pgstat_reset_of_kind(PGSTAT_KIND_IO); else if (strcmp(target, "recovery_prefetch") == 0) XLogPrefetchResetStats(); else if (strcmp(target, "wal") == 0) @@ -1601,7 +1603,7 @@ pg_stat_reset_shared(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized reset target: \"%s\"", target), - errhint("Target must be \"archiver\", \"bgwriter\", \"recovery_prefetch\", or \"wal\"."))); + errhint("Target must be \"archiver\", \"io\", \"bgwriter\", \"recovery_prefetch\", or \"wal\"."))); PG_RETURN_VOID(); } diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 0ffeefc437..0aaf600a78 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -331,6 +331,8 @@ typedef enum BackendType B_WAL_WRITER, } BackendType; +#define BACKEND_NUM_TYPES (B_WAL_WRITER + 1) + extern PGDLLIMPORT BackendType MyBackendType; extern const char *GetBackendTypeDesc(BackendType backendType); diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 5e3326a3b9..ea7e19c48d 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -48,6 +48,7 @@ typedef enum PgStat_Kind PGSTAT_KIND_ARCHIVER, PGSTAT_KIND_BGWRITER, PGSTAT_KIND_CHECKPOINTER, + PGSTAT_KIND_IO, PGSTAT_KIND_SLRU, PGSTAT_KIND_WAL, } PgStat_Kind; @@ -276,6 +277,55 @@ typedef struct PgStat_CheckpointerStats PgStat_Counter buf_fsync_backend; } PgStat_CheckpointerStats; + +/* + * Types related to counting IO operations + */ +typedef enum IOContext +{ + IOCONTEXT_BULKREAD, + IOCONTEXT_BULKWRITE, + IOCONTEXT_NORMAL, + IOCONTEXT_VACUUM, +} IOContext; + +#define IOCONTEXT_FIRST IOCONTEXT_BULKREAD +#define IOCONTEXT_NUM_TYPES (IOCONTEXT_VACUUM + 1) + +typedef enum IOObject +{ + IOOBJECT_RELATION, + IOOBJECT_TEMP_RELATION, +} IOObject; + +#define IOOBJECT_FIRST IOOBJECT_RELATION +#define IOOBJECT_NUM_TYPES (IOOBJECT_TEMP_RELATION + 1) + +typedef enum IOOp +{ + IOOP_EVICT, + IOOP_EXTEND, + IOOP_FSYNC, + IOOP_READ, + IOOP_REUSE, + IOOP_WRITE, +} IOOp; + +#define IOOP_FIRST IOOP_EVICT +#define IOOP_NUM_TYPES (IOOP_WRITE + 1) + +typedef struct PgStat_BackendIO +{ + PgStat_Counter data[IOCONTEXT_NUM_TYPES][IOOBJECT_NUM_TYPES][IOOP_NUM_TYPES]; +} PgStat_BackendIO; + +typedef struct PgStat_IO +{ + TimestampTz stat_reset_timestamp; + PgStat_BackendIO stats[BACKEND_NUM_TYPES]; +} PgStat_IO; + + typedef struct PgStat_StatDBEntry { PgStat_Counter xact_commit; @@ -453,6 +503,23 @@ extern void pgstat_report_checkpointer(void); extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void); +/* + * Functions in pgstat_io.c + */ + +extern void pgstat_count_io_op(IOOp io_op, IOObject io_object, IOContext io_context); +extern PgStat_IO *pgstat_fetch_stat_io(void); +extern const char *pgstat_get_io_context_name(IOContext io_context); +extern const char *pgstat_get_io_object_name(IOObject io_object); +extern const char *pgstat_get_io_op_name(IOOp io_op); + +extern bool pgstat_tracks_io_bktype(BackendType bktype); +extern bool pgstat_tracks_io_object(BackendType bktype, + IOContext io_context, IOObject io_object); +extern bool pgstat_tracks_io_op(BackendType bktype, IOContext io_context, + IOObject io_object, IOOp io_op); + + /* * Functions in pgstat_database.c */ diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 12fd51f1ae..bf8e4c3b8b 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -329,6 +329,17 @@ typedef struct PgStatShared_Checkpointer PgStat_CheckpointerStats reset_offset; } PgStatShared_Checkpointer; +/* shared version of PgStat_IO */ +typedef struct PgStatShared_IO +{ + /* + * locks[i] protects stats.stats[i]. locks[0] also protects + * stats.stat_reset_timestamp. + */ + LWLock locks[BACKEND_NUM_TYPES]; + PgStat_IO stats; +} PgStatShared_IO; + typedef struct PgStatShared_SLRU { /* lock protects ->stats */ @@ -419,6 +430,7 @@ typedef struct PgStat_ShmemControl PgStatShared_Archiver archiver; PgStatShared_BgWriter bgwriter; PgStatShared_Checkpointer checkpointer; + PgStatShared_IO io; PgStatShared_SLRU slru; PgStatShared_Wal wal; } PgStat_ShmemControl; @@ -442,6 +454,8 @@ typedef struct PgStat_Snapshot PgStat_CheckpointerStats checkpointer; + PgStat_IO io; + PgStat_SLRUStats slru[SLRU_NUM_ELEMENTS]; PgStat_WalStats wal; @@ -549,6 +563,17 @@ extern void pgstat_database_reset_timestamp_cb(PgStatShared_Common *header, Time extern bool pgstat_function_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); +/* + * Functions in pgstat_io.c + */ + +extern void pgstat_io_reset_all_cb(TimestampTz ts); +extern void pgstat_io_snapshot_cb(void); +extern bool pgstat_flush_io(bool nowait); +extern bool pgstat_bktype_io_stats_valid(PgStat_BackendIO *context_ops, + BackendType bktype); + + /* * Functions in pgstat_relation.c */ @@ -643,6 +668,13 @@ extern void pgstat_create_transactional(PgStat_Kind kind, Oid dboid, Oid objoid) extern PGDLLIMPORT PgStat_LocalState pgStatLocal; +/* + * Variables in pgstat_io.c + */ + +extern PGDLLIMPORT bool have_iostats; + + /* * Variables in pgstat_slru.c */ diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 23bafec5f7..7b66b1bc89 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1106,7 +1106,10 @@ ID INFIX INT128 INTERFACE_INFO +IOContext IOFuncSelector +IOObject +IOOp IPCompareMethod ITEM IV @@ -2016,6 +2019,7 @@ PgStatShared_Common PgStatShared_Database PgStatShared_Function PgStatShared_HashEntry +PgStatShared_IO PgStatShared_Relation PgStatShared_ReplSlot PgStatShared_SLRU @@ -2033,6 +2037,8 @@ PgStat_FetchConsistency PgStat_FunctionCallUsage PgStat_FunctionCounts PgStat_HashKey +PgStat_IO +PgStat_BackendIO PgStat_Kind PgStat_KindInfo PgStat_LocalState -- 2.34.1