From 40ec3844785b610c2c9aa66af644a9f9d4fbb5da Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Wed, 22 Jan 2025 16:09:51 -0500 Subject: [PATCH v2.5 24/30] bufmgr: Implement AIO write support As of this commit there are no users of these AIO facilities, that'll come in later commits. Author: Reviewed-By: Discussion: https://postgr.es/m/ Backpatch: --- src/include/storage/aio.h | 2 + src/include/storage/bufmgr.h | 2 + src/backend/storage/aio/aio_callback.c | 2 + src/backend/storage/buffer/bufmgr.c | 88 ++++++++++++++++++++++++++ 4 files changed, 94 insertions(+) diff --git a/src/include/storage/aio.h b/src/include/storage/aio.h index ce5c18424bd..912bce87197 100644 --- a/src/include/storage/aio.h +++ b/src/include/storage/aio.h @@ -180,8 +180,10 @@ typedef enum PgAioHandleCallbackID PGAIO_HCB_MD_WRITEV, PGAIO_HCB_SHARED_BUFFER_READV, + PGAIO_HCB_SHARED_BUFFER_WRITEV, PGAIO_HCB_LOCAL_BUFFER_READV, + PGAIO_HCB_LOCAL_BUFFER_WRITEV, } PgAioHandleCallbackID; diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index beeb4c47c1c..7d8618b0b85 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -186,7 +186,9 @@ extern PGDLLIMPORT int32 *LocalRefCount; struct PgAioHandleCallbacks; extern const struct PgAioHandleCallbacks aio_shared_buffer_readv_cb; +extern const struct PgAioHandleCallbacks aio_shared_buffer_writev_cb; extern const struct PgAioHandleCallbacks aio_local_buffer_readv_cb; +extern const struct PgAioHandleCallbacks aio_local_buffer_writev_cb; /* upper limit for effective_io_concurrency */ diff --git a/src/backend/storage/aio/aio_callback.c b/src/backend/storage/aio/aio_callback.c index f0414085eba..00098504efb 100644 --- a/src/backend/storage/aio/aio_callback.c +++ b/src/backend/storage/aio/aio_callback.c @@ -44,8 +44,10 @@ static const PgAioHandleCallbacksEntry aio_handle_cbs[] = { CALLBACK_ENTRY(PGAIO_HCB_MD_WRITEV, aio_md_writev_cb), CALLBACK_ENTRY(PGAIO_HCB_SHARED_BUFFER_READV, aio_shared_buffer_readv_cb), + CALLBACK_ENTRY(PGAIO_HCB_SHARED_BUFFER_WRITEV, aio_shared_buffer_writev_cb), CALLBACK_ENTRY(PGAIO_HCB_LOCAL_BUFFER_READV, aio_local_buffer_readv_cb), + CALLBACK_ENTRY(PGAIO_HCB_LOCAL_BUFFER_WRITEV, aio_local_buffer_writev_cb), #undef CALLBACK_ENTRY }; diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index efd5b6601ad..0944914bb80 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -6514,6 +6514,42 @@ SharedBufferCompleteRead(int buf_off, Buffer buffer, uint8 flags, bool failed) return result; } +static uint64 +BufferCompleteWriteShared(Buffer buffer, bool release_lock, bool failed) +{ + BufferDesc *bufHdr; + bool result = false; + + Assert(BufferIsValid(buffer)); + + bufHdr = GetBufferDescriptor(buffer - 1); + +#ifdef USE_ASSERT_CHECKING + { + uint32 buf_state = pg_atomic_read_u32(&bufHdr->state); + + Assert(buf_state & BM_VALID); + Assert(buf_state & BM_TAG_VALID); + Assert(buf_state & BM_IO_IN_PROGRESS); + Assert(buf_state & BM_DIRTY); + } +#endif + + TerminateBufferIO(bufHdr, /* clear_dirty = */ true, + failed ? BM_IO_ERROR : 0, + /* forget_owner = */ false, + /* syncio = */ false); + + /* + * The initiator of IO is not managing the lock (i.e. called + * LWLockDisown()), we are. + */ + if (release_lock) + LWLockReleaseDisowned(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); + + return result; +} + /* * Helper to prepare IO on shared buffers for execution, shared between reads * and writes. @@ -6594,6 +6630,12 @@ shared_buffer_readv_stage(PgAioHandle *ioh, uint8 cb_data) shared_buffer_stage_common(ioh, false); } +static void +shared_buffer_writev_stage(PgAioHandle *ioh, uint8 cb_data) +{ + shared_buffer_stage_common(ioh, true); +} + static void buffer_readv_report(PgAioResult result, const PgAioTargetData *target_data, int elevel) { @@ -6680,6 +6722,33 @@ shared_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 c return buffer_readv_complete_common(ioh, prior_result, false, cb_data); } +static PgAioResult +shared_buffer_writev_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_data) +{ + PgAioResult result = prior_result; + uint64 *io_data; + uint8 handle_data_len; + + ereport(DEBUG5, + errmsg("%s: %d %d", __func__, prior_result.status, prior_result.result), + errhidestmt(true), errhidecontext(true)); + + io_data = pgaio_io_get_handle_data(ioh, &handle_data_len); + + /* FIXME: handle outright errors */ + + for (int io_data_off = 0; io_data_off < handle_data_len; io_data_off++) + { + Buffer buf = io_data[io_data_off]; + + /* FIXME: handle short writes / failures */ + /* FIXME: ioh->target_data.shared_buffer.release_lock */ + BufferCompleteWriteShared(buf, true, false); + } + + return result; +} + /* * Helper to stage IO on local buffers for execution, shared between reads * and writes. @@ -6724,6 +6793,16 @@ local_buffer_readv_complete(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb return buffer_readv_complete_common(ioh, prior_result, true, cb_data); } +static void +local_buffer_writev_stage(PgAioHandle *ioh, uint8 cb_data) +{ + /* + * Currently this is unreachable as the only write support is for + * checkpointer / bgwriter, which don't deal with local buffers. + */ + elog(ERROR, "not yet"); +} + /* readv callback is is passed READ_BUFFERS_* flags as callback data */ const struct PgAioHandleCallbacks aio_shared_buffer_readv_cb = { @@ -6732,6 +6811,11 @@ const struct PgAioHandleCallbacks aio_shared_buffer_readv_cb = { .report = buffer_readv_report, }; +const struct PgAioHandleCallbacks aio_shared_buffer_writev_cb = { + .stage = shared_buffer_writev_stage, + .complete_shared = shared_buffer_writev_complete, +}; + /* readv callback is is passed READ_BUFFERS_* flags as callback data */ const struct PgAioHandleCallbacks aio_local_buffer_readv_cb = { .stage = local_buffer_readv_stage, @@ -6745,3 +6829,7 @@ const struct PgAioHandleCallbacks aio_local_buffer_readv_cb = { .complete_local = local_buffer_readv_complete, .report = buffer_readv_report, }; + +const struct PgAioHandleCallbacks aio_local_buffer_writev_cb = { + .stage = local_buffer_writev_stage, +}; -- 2.48.1.76.g4e746b1a31.dirty