From 6ddf6724030ee0c09bd8027252e244b78440514f Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Fri, 14 Mar 2025 11:39:34 -0400 Subject: [PATCH v2.8 26/38] aio: Basic read_stream adjustments for real AIO Adapt the read stream logic for real AIO: - If AIO is enabled, we shouldn't issue advice, but if it isn't, we should continue issuing advice - AIO benefits from reading ahead with direct IO - While in read_stream_look_ahead(), we can use AIO batch submission mode for increased efficiency There is one comment talking about max_ios logic with "real asynchronous I/O" that I am not sure about, so I left it alone for now. There are further improvements we should consider, e.g. waiting to issue IOs until we can issue multiple IOs at once. But that's left for a future change, since it would involve additional heuristics. --- src/backend/storage/aio/read_stream.c | 39 ++++++++++++++++++++------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c index 0f1525f46c5..b44cc358f29 100644 --- a/src/backend/storage/aio/read_stream.c +++ b/src/backend/storage/aio/read_stream.c @@ -72,6 +72,7 @@ #include "postgres.h" #include "miscadmin.h" +#include "storage/aio.h" #include "storage/fd.h" #include "storage/smgr.h" #include "storage/read_stream.h" @@ -99,6 +100,7 @@ struct ReadStream int16 pinned_buffers; int16 distance; int16 initialized_buffers; + bool sync_mode; bool advice_enabled; bool temporary; @@ -420,6 +422,13 @@ read_stream_start_pending_read(ReadStream *stream) static void read_stream_look_ahead(ReadStream *stream) { + /* + * Allow amortizing the cost of submitting IO over multiple IOs. This + * requires that we don't do any operations that could lead to a deadlock + * with staged-but-unsubmitted IO. + */ + pgaio_enter_batchmode(); + while (stream->ios_in_progress < stream->max_ios && stream->pinned_buffers + stream->pending_read_nblocks < stream->distance) { @@ -467,6 +476,7 @@ read_stream_look_ahead(ReadStream *stream) { /* We've hit the buffer or I/O limit. Rewind and stop here. */ read_stream_unget_block(stream, blocknum); + pgaio_exit_batchmode(); return; } } @@ -501,6 +511,8 @@ read_stream_look_ahead(ReadStream *stream) * time. */ Assert(stream->pinned_buffers > 0 || stream->distance == 0); + + pgaio_exit_batchmode(); } /* @@ -556,12 +568,12 @@ read_stream_begin_impl(int flags, max_ios = get_tablespace_io_concurrency(tablespace_id); /* - * XXX Since we don't have asynchronous I/O yet, if direct I/O is enabled - * then just behave as though I/O concurrency is set to 0. Otherwise we - * would look ahead pinning many buffers for no benefit, for lack of - * advice and AIO. + * If real asynchronous I/O is disabled, and direct I/O is enabled, just + * behave as though I/O concurrency is set to 0. Otherwise we would look + * ahead pinning many buffers for no benefit, as the advice-based + * readahead doesn't support direct I/O. */ - if (io_direct_flags & IO_DIRECT_DATA) + if (io_method == IOMETHOD_SYNC && (io_direct_flags & IO_DIRECT_DATA)) max_ios = 0; /* Cap to INT16_MAX to avoid overflowing below */ @@ -641,15 +653,19 @@ read_stream_begin_impl(int flags, stream->per_buffer_data = (void *) MAXALIGN(&stream->ios[Max(1, max_ios)]); + stream->sync_mode = io_method == IOMETHOD_SYNC; + #ifdef USE_PREFETCH /* - * This system supports prefetching advice. We can use it as long as - * direct I/O isn't enabled, the caller hasn't promised sequential access - * (overriding our detection heuristics), and max_ios hasn't been set to - * zero. + * This system supports prefetching advice. + * + * Issue advice only if AIO is not used, direct I/O isn't enabled, the + * caller hasn't promised sequential access (overriding our detection + * heuristics), and max_ios hasn't been set to zero. */ - if ((io_direct_flags & IO_DIRECT_DATA) == 0 && + if (stream->sync_mode && + (io_direct_flags & IO_DIRECT_DATA) == 0 && (flags & READ_STREAM_SEQUENTIAL) == 0 && max_ios > 0) stream->advice_enabled = true; @@ -659,6 +675,9 @@ read_stream_begin_impl(int flags, * For now, max_ios = 0 is interpreted as max_ios = 1 with advice disabled * above. If we had real asynchronous I/O we might need a slightly * different definition. + * + * FIXME: Not sure what different definition we would need? I guess we + * could add the READ_BUFFERS_SYNCHRONOUSLY flag automatically? */ if (max_ios == 0) max_ios = 1; -- 2.48.1.76.g4e746b1a31.dirty