From 7cdd6fc78ed82180a705ab9667714f80d08c5f7d Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Mon, 24 Apr 2023 18:21:54 -0400 Subject: [PATCH v4] Add writeback to pg_stat_io 28e626bde00 added the notion of IOOps but neglected to include writeback. With the addition of IO timing to pg_stat_io in ac8d53dae5, the omission of writeback caused some confusion. Checkpointer write timing in pg_stat_io often differed greatly from the write timing written to the log. To fix this, add IOOp IOOP_WRITEBACK and track writebacks and writeback timing in pg_stat_io. Reviewed-by: Kyotaro Horiguchi Reported-by: Andres Freund Discussion: https://postgr.es/m/20230419172326.dhgyo4wrrhulovt6%40awork3.anarazel.de --- doc/src/sgml/monitoring.sgml | 26 ++++++++++++++++++++++++++ src/backend/catalog/system_views.sql | 2 ++ src/backend/postmaster/bgwriter.c | 10 ++++++++-- src/backend/storage/buffer/buf_init.c | 12 +++++++++--- src/backend/storage/buffer/bufmgr.c | 20 ++++++++++++++++++-- src/backend/utils/adt/pgstatfuncs.c | 5 +++++ src/include/catalog/pg_proc.dat | 6 +++--- src/include/pgstat.h | 3 ++- src/include/storage/buf_internals.h | 6 +++++- src/test/regress/expected/rules.out | 4 +++- src/test/regress/expected/stats.out | 4 ++-- src/test/regress/sql/stats.sql | 4 ++-- 12 files changed, 85 insertions(+), 17 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 99f7f95c39..27b6f1a0a0 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -3867,6 +3867,32 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i + + + + writebacks bigint + + + Number of units of size op_bytes which the backend + requested the kernel write out to permanent storage. + + + + + + + + writeback_time double precision + + + Time spent in writeback operations in milliseconds (if + is enabled, otherwise zero). This + includes the time spent queueing write-out requests and, potentially, + the time spent to write out the dirty data. + + + + diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 48aacf66ee..d0c932ad0e 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1131,6 +1131,8 @@ SELECT b.read_time, b.writes, b.write_time, + b.writebacks, + b.writeback_time, b.extends, b.extend_time, b.op_bytes, diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index caad642ec9..dbf866f262 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -129,7 +129,12 @@ BackgroundWriterMain(void) ALLOCSET_DEFAULT_SIZES); MemoryContextSwitchTo(bgwriter_context); - WritebackContextInit(&wb_context, &bgwriter_flush_after); + /* + * bgwriter will only request writeback for permanent relations in + * IOCONTEXT_NORMAL. + */ + WritebackContextInit(&wb_context, IOOBJECT_RELATION, IOCONTEXT_NORMAL, + &bgwriter_flush_after); /* * If an exception is encountered, processing resumes here. @@ -185,7 +190,8 @@ BackgroundWriterMain(void) MemoryContextResetAndDeleteChildren(bgwriter_context); /* re-initialize to avoid repeated errors causing problems */ - WritebackContextInit(&wb_context, &bgwriter_flush_after); + WritebackContextInit(&wb_context, IOOBJECT_RELATION, IOCONTEXT_NORMAL, + &bgwriter_flush_after); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index 0057443f0c..a7182fe95a 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -145,9 +145,15 @@ InitBufferPool(void) /* Init other shared buffer-management stuff */ StrategyInitialize(!foundDescs); - /* Initialize per-backend file flush context */ - WritebackContextInit(&BackendWritebackContext, - &backend_flush_after); + /* + * Initialize per-backend file flush context. IOContext is initialized to + * IOCONTEXT_NORMAL because this is the most common context. IOObject is + * initialized to IOOBJECT_RELATION because writeback is currently only + * requested for permanent relations in shared buffers. The backend can + * overwrite these as appropriate. + */ + WritebackContextInit(&BackendWritebackContext, IOOBJECT_RELATION, + IOCONTEXT_NORMAL, &backend_flush_after); } /* diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 1fa689052e..116910cdfe 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1685,6 +1685,8 @@ again: FlushBuffer(buf_hdr, NULL, IOOBJECT_RELATION, io_context); LWLockRelease(content_lock); + BackendWritebackContext.io_object = IOOBJECT_RELATION; + BackendWritebackContext.io_context = io_context; ScheduleBufferTagForWriteback(&BackendWritebackContext, &buf_hdr->tag); } @@ -2555,7 +2557,12 @@ BufferSync(int flags) if (num_to_scan == 0) return; /* nothing to do */ - WritebackContextInit(&wb_context, &checkpoint_flush_after); + /* + * Checkpointer will only request writeback of permanent relations in + * IOCONTEXT_NORMAL. + */ + WritebackContextInit(&wb_context, IOOBJECT_RELATION, IOCONTEXT_NORMAL, + &checkpoint_flush_after); TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan); @@ -5433,10 +5440,13 @@ ts_ckpt_progress_comparator(Datum a, Datum b, void *arg) * writeback control will be performed. */ void -WritebackContextInit(WritebackContext *context, int *max_pending) +WritebackContextInit(WritebackContext *context, IOObject io_object, + IOContext io_context, int *max_pending) { Assert(*max_pending <= WRITEBACK_MAX_PENDING_FLUSHES); + context->io_object = io_object; + context->io_context = io_context; context->max_pending = max_pending; context->nr_pending = 0; } @@ -5491,6 +5501,7 @@ ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag *tag) void IssuePendingWritebacks(WritebackContext *context) { + instr_time io_start; int i; if (context->nr_pending == 0) @@ -5502,6 +5513,8 @@ IssuePendingWritebacks(WritebackContext *context) */ sort_pending_writebacks(context->pending_writebacks, context->nr_pending); + io_start = pgstat_prepare_io_time(); + /* * Coalesce neighbouring writes, but nothing else. For that we iterate * through the, now sorted, array of pending flushes, and look forward to @@ -5555,6 +5568,9 @@ IssuePendingWritebacks(WritebackContext *context) smgrwriteback(reln, BufTagGetForkNum(&tag), tag.blockNum, nblocks); } + pgstat_count_io_op_time(context->io_object, context->io_context, + IOOP_WRITEBACK, io_start, context->nr_pending); + context->nr_pending = 0; } diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 0d57b6a7c0..70da0a2de1 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1268,6 +1268,8 @@ typedef enum io_stat_col IO_COL_READ_TIME, IO_COL_WRITES, IO_COL_WRITE_TIME, + IO_COL_WRITEBACKS, + IO_COL_WRITEBACK_TIME, IO_COL_EXTENDS, IO_COL_EXTEND_TIME, IO_COL_CONVERSION, @@ -1303,6 +1305,8 @@ pgstat_get_io_op_index(IOOp io_op) return IO_COL_REUSES; case IOOP_WRITE: return IO_COL_WRITES; + case IOOP_WRITEBACK: + return IO_COL_WRITEBACKS; } elog(ERROR, "unrecognized IOOp value: %d", io_op); @@ -1322,6 +1326,7 @@ pgstat_get_io_time_index(IOOp io_op) { case IOOP_READ: case IOOP_WRITE: + case IOOP_WRITEBACK: case IOOP_EXTEND: case IOOP_FSYNC: return pgstat_get_io_op_index(io_op) + 1; diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index b2bc81b15f..e766b9b600 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5760,9 +5760,9 @@ proname => 'pg_stat_get_io', provolatile => 'v', prorows => '30', proretset => 't', proparallel => 'r', prorettype => 'record', proargtypes => '', - proallargtypes => '{text,text,text,int8,float8,int8,float8,int8,float8,int8,int8,int8,int8,int8,float8,timestamptz}', - proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}', - proargnames => '{backend_type,object,context,reads,read_time,writes,write_time,extends,extend_time,op_bytes,hits,evictions,reuses,fsyncs,fsync_time,stats_reset}', + proallargtypes => '{text,text,text,int8,float8,int8,float8,int8,float8,int8,float8,int8,int8,int8,int8,int8,float8,timestamptz}', + proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}', + proargnames => '{backend_type,object,context,reads,read_time,writes,write_time,writebacks,writeback_time,extends,extend_time,op_bytes,hits,evictions,reuses,fsyncs,fsync_time,stats_reset}', prosrc => 'pg_stat_get_io' }, { oid => '1136', descr => 'statistics: information about WAL activity', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index fff4ad5b6d..57a2c0866a 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -300,9 +300,10 @@ typedef enum IOOp IOOP_READ, IOOP_REUSE, IOOP_WRITE, + IOOP_WRITEBACK, } IOOp; -#define IOOP_NUM_TYPES (IOOP_WRITE + 1) +#define IOOP_NUM_TYPES (IOOP_WRITEBACK + 1) typedef struct PgStat_BktypeIO { diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 136cf8fbaf..0921710605 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -295,6 +295,9 @@ typedef struct PendingWriteback /* struct forward declared in bufmgr.h */ typedef struct WritebackContext { + IOObject io_object; + IOContext io_context; + /* pointer to the max number of writeback requests to coalesce */ int *max_pending; @@ -387,7 +390,8 @@ extern PGDLLIMPORT CkptSortItem *CkptBufferIds; * Internal buffer management routines */ /* bufmgr.c */ -extern void WritebackContextInit(WritebackContext *context, int *max_pending); +extern void WritebackContextInit(WritebackContext *context, IOObject io_object, + IOContext io_context, int *max_pending); extern void IssuePendingWritebacks(WritebackContext *context); extern void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag *tag); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 69957687fe..0bd6db2bf8 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1887,6 +1887,8 @@ pg_stat_io| SELECT backend_type, read_time, writes, write_time, + writebacks, + writeback_time, extends, extend_time, op_bytes, @@ -1896,7 +1898,7 @@ pg_stat_io| SELECT backend_type, fsyncs, fsync_time, stats_reset - FROM pg_stat_get_io() b(backend_type, object, context, reads, read_time, writes, write_time, extends, extend_time, op_bytes, hits, evictions, reuses, fsyncs, fsync_time, stats_reset); + FROM pg_stat_get_io() b(backend_type, object, context, reads, read_time, writes, write_time, writebacks, writeback_time, extends, extend_time, op_bytes, hits, evictions, reuses, fsyncs, fsync_time, stats_reset); pg_stat_progress_analyze| SELECT s.pid, s.datid, d.datname, diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out index 813d6d39ea..26ff27911a 100644 --- a/src/test/regress/expected/stats.out +++ b/src/test/regress/expected/stats.out @@ -1384,7 +1384,7 @@ SELECT pg_stat_have_stats('io', 0, 0); t (1 row) -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(writebacks) + sum(hits) AS io_stats_pre_reset FROM pg_stat_io \gset SELECT pg_stat_reset_shared('io'); pg_stat_reset_shared @@ -1392,7 +1392,7 @@ SELECT pg_stat_reset_shared('io'); (1 row) -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(writebacks) + sum(hits) AS io_stats_post_reset FROM pg_stat_io \gset SELECT :io_stats_post_reset < :io_stats_pre_reset; ?column? diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index 99a28bb79c..257e91d10d 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -694,10 +694,10 @@ SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_ext -- Test IO stats reset SELECT pg_stat_have_stats('io', 0, 0); -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(writebacks) + sum(hits) AS io_stats_pre_reset FROM pg_stat_io \gset SELECT pg_stat_reset_shared('io'); -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(writebacks) + sum(hits) AS io_stats_post_reset FROM pg_stat_io \gset SELECT :io_stats_post_reset < :io_stats_pre_reset; -- 2.37.2