From d0ce90d0d0c73ee7abc1ec017c6772f5e4a8d3e6 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy Date: Sat, 26 Mar 2022 04:55:12 +0000 Subject: [PATCH v16] pg_walinspect --- contrib/Makefile | 1 + contrib/pg_walinspect/.gitignore | 4 + contrib/pg_walinspect/Makefile | 26 + contrib/pg_walinspect/pg_walinspect--1.0.sql | 132 ++++ contrib/pg_walinspect/pg_walinspect.c | 654 +++++++++++++++++++ contrib/pg_walinspect/pg_walinspect.control | 5 + src/backend/access/transam/xlogreader.c | 13 +- src/backend/access/transam/xlogutils.c | 33 + src/bin/pg_waldump/pg_waldump.c | 5 + src/include/access/xlog.h | 2 +- src/include/access/xlog_internal.h | 2 +- src/include/access/xlogreader.h | 2 - src/include/access/xlogutils.h | 4 + 13 files changed, 872 insertions(+), 11 deletions(-) create mode 100644 contrib/pg_walinspect/.gitignore create mode 100644 contrib/pg_walinspect/Makefile create mode 100644 contrib/pg_walinspect/pg_walinspect--1.0.sql create mode 100644 contrib/pg_walinspect/pg_walinspect.c create mode 100644 contrib/pg_walinspect/pg_walinspect.control diff --git a/contrib/Makefile b/contrib/Makefile index 332b486ecc..bbf220407b 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -41,6 +41,7 @@ SUBDIRS = \ pgrowlocks \ pgstattuple \ pg_visibility \ + pg_walinspect \ postgres_fdw \ seg \ spi \ diff --git a/contrib/pg_walinspect/.gitignore b/contrib/pg_walinspect/.gitignore new file mode 100644 index 0000000000..5dcb3ff972 --- /dev/null +++ b/contrib/pg_walinspect/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/contrib/pg_walinspect/Makefile b/contrib/pg_walinspect/Makefile new file mode 100644 index 0000000000..c92a97447f --- /dev/null +++ b/contrib/pg_walinspect/Makefile @@ -0,0 +1,26 @@ +# contrib/pg_walinspect/Makefile + +MODULE_big = pg_walinspect +OBJS = \ + $(WIN32RES) \ + pg_walinspect.o +PGFILEDESC = "pg_walinspect - functions to inspect contents of PostgreSQL Write-Ahead Log" + +PG_CPPFLAGS = -I$(libpq_srcdir) +SHLIB_LINK_INTERNAL = $(libpq) + +EXTENSION = pg_walinspect +DATA = pg_walinspect--1.0.sql + +REGRESS = pg_walinspect + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/pg_walinspect +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/pg_walinspect/pg_walinspect--1.0.sql b/contrib/pg_walinspect/pg_walinspect--1.0.sql new file mode 100644 index 0000000000..9b192dd2c1 --- /dev/null +++ b/contrib/pg_walinspect/pg_walinspect--1.0.sql @@ -0,0 +1,132 @@ +/* contrib/pg_walinspect/pg_walinspect--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION pg_walinspect" to load this file. \quit + +-- +-- pg_get_wal_record() +-- +CREATE FUNCTION pg_get_wal_record(IN in_lsn pg_lsn, + OUT start_lsn pg_lsn, + OUT end_lsn pg_lsn, + OUT prev_lsn pg_lsn, + OUT record_length int4, + OUT record bytea +) +AS 'MODULE_PATHNAME', 'pg_get_wal_record' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pg_get_wal_record(pg_lsn) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_get_wal_record(pg_lsn) TO pg_read_server_files; + +-- +-- pg_get_wal_record_info() +-- +CREATE FUNCTION pg_get_wal_record_info(IN in_lsn pg_lsn, + OUT start_lsn pg_lsn, + OUT end_lsn pg_lsn, + OUT prev_lsn pg_lsn, + OUT xid xid, + OUT resource_manager text, + OUT record_length int4, + OUT fpi_length int4, + OUT description text, + OUT block_ref text, + OUT data_length int4, + OUT data bytea +) +AS 'MODULE_PATHNAME', 'pg_get_wal_record_info' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_get_wal_record_info(pg_lsn) TO pg_read_server_files; + +-- +-- pg_get_wal_records_info() +-- +CREATE FUNCTION pg_get_wal_records_info(IN start_lsn pg_lsn, + IN end_lsn pg_lsn, + OUT start_lsn pg_lsn, + OUT end_lsn pg_lsn, + OUT prev_lsn pg_lsn, + OUT xid xid, + OUT resource_manager text, + OUT record_length int4, + OUT fpi_length int4, + OUT description text, + OUT block_ref text, + OUT data_length int4, + OUT data bytea +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_get_wal_records_info' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_get_wal_records_info(pg_lsn, pg_lsn) TO pg_read_server_files; + +-- +-- pg_get_wal_records_info_till_end_of_wal() +-- +CREATE FUNCTION pg_get_wal_records_info_till_end_of_wal(IN start_lsn pg_lsn, + OUT start_lsn pg_lsn, + OUT end_lsn pg_lsn, + OUT prev_lsn pg_lsn, + OUT xid xid, + OUT resource_manager text, + OUT record_length int4, + OUT fpi_length int4, + OUT description text, + OUT block_ref text, + OUT data_length int4, + OUT data bytea +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_get_wal_records_info_till_end_of_wal' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pg_get_wal_records_info_till_end_of_wal(pg_lsn) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_get_wal_records_info_till_end_of_wal(pg_lsn) TO pg_read_server_files; + +-- +-- pg_get_wal_stats() +-- +CREATE FUNCTION pg_get_wal_stats(IN start_lsn pg_lsn, + IN end_lsn pg_lsn, + OUT resource_manager text, + OUT count int8, + OUT count_percentage float4, + OUT record_size int8, + OUT record_size_percentage float4, + OUT fpi_size int8, + OUT fpi_size_percentage float4, + OUT combined_size int8, + OUT combined_size_percentage float4 +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_get_wal_stats' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_get_wal_stats(pg_lsn, pg_lsn) TO pg_read_server_files; + +-- +-- pg_get_wal_stats_till_end_of_wal() +-- +CREATE FUNCTION pg_get_wal_stats_till_end_of_wal(IN start_lsn pg_lsn, + OUT resource_manager text, + OUT count int8, + OUT count_percentage float4, + OUT record_size int8, + OUT record_size_percentage float4, + OUT fpi_size int8, + OUT fpi_size_percentage float4, + OUT combined_size int8, + OUT combined_size_percentage float4 +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_get_wal_stats_till_end_of_wal' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pg_get_wal_stats_till_end_of_wal(pg_lsn) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_get_wal_stats_till_end_of_wal(pg_lsn) TO pg_read_server_files; diff --git a/contrib/pg_walinspect/pg_walinspect.c b/contrib/pg_walinspect/pg_walinspect.c new file mode 100644 index 0000000000..adf7fc5890 --- /dev/null +++ b/contrib/pg_walinspect/pg_walinspect.c @@ -0,0 +1,654 @@ +/*------------------------------------------------------------------------- + * + * pg_walinspect.c + * Functions to inspect contents of PostgreSQL Write-Ahead Log + * + * Copyright (c) 2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/pg_walinspect/pg_walinspect.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/xlog.h" +#include "access/xlog_internal.h" +#include "access/xlogreader.h" +#include "access/xlogrecovery.h" +#include "access/xlogstats.h" +#include "access/xlogutils.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/pg_lsn.h" + +/* + * NOTE: For any code change or issue fix here, it is highly recommended to + * give a thought about doing the same in pg_waldump tool as well. + */ + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(pg_get_wal_record); +PG_FUNCTION_INFO_V1(pg_get_wal_record_info); +PG_FUNCTION_INFO_V1(pg_get_wal_records_info); +PG_FUNCTION_INFO_V1(pg_get_wal_records_info_till_end_of_wal); +PG_FUNCTION_INFO_V1(pg_get_wal_stats); +PG_FUNCTION_INFO_V1(pg_get_wal_stats_till_end_of_wal); + +typedef void (*GetWALDetailsCB) (FunctionCallInfo fcinfo, + XLogRecPtr start_lsn, + XLogRecPtr end_lsn); + +static bool IsFutureLSN(XLogRecPtr lsn, XLogRecPtr *curr_lsn); +static XLogReaderState *InitXLogReaderState(XLogRecPtr lsn, + XLogRecPtr *first_record); +static XLogRecord *ReadNextXLogRecord(XLogReaderState *xlogreader, + XLogRecPtr first_record); +static Datum GetWALRecordInternal(FunctionCallInfo fcinfo, Datum *values, + bool *nulls, uint32 ncols, bool get_info); +static void GetXLogRecordInfo(XLogReaderState *record, XLogRecPtr lsn, + Datum *values, bool *nulls, uint32 ncols); +static void GetWALDetailsGuts(FunctionCallInfo fcinfo, bool till_end_of_wal, + GetWALDetailsCB wal_details_cb); +static void GetWALRecordsInfo(FunctionCallInfo fcinfo, XLogRecPtr start_lsn, + XLogRecPtr end_lsn); +static void GetXLogSummaryStats(XLogStats * stats, ReturnSetInfo *rsinfo, + Datum *values, bool *nulls, uint32 ncols); +static void FillXLogStatsRow(const char *name, uint64 n, uint64 total_count, + uint64 rec_len, uint64 total_rec_len, + uint64 fpi_len, uint64 total_fpi_len, + uint64 tot_len, uint64 total_len, + Datum *values, bool *nulls, uint32 ncols); +static void GetWalStats(FunctionCallInfo fcinfo, XLogRecPtr start_lsn, + XLogRecPtr end_lsn); + +/* + * Check if the given LSN is in future. Also, return the LSN up to which the + * server has WAL. + */ +static bool +IsFutureLSN(XLogRecPtr lsn, XLogRecPtr *curr_lsn) +{ + /* + * We determine the current LSN of the server similar to how page_read + * callback read_local_xlog_page_no_wait does. + */ + if (!RecoveryInProgress()) + *curr_lsn = GetFlushRecPtr(NULL); + else + *curr_lsn = GetXLogReplayRecPtr(NULL); + + Assert(!XLogRecPtrIsInvalid(*curr_lsn)); + + if (lsn >= *curr_lsn) + return true; + + return false; +} + +/* + * Intialize WAL reader and identify first valid LSN. + */ +static XLogReaderState * +InitXLogReaderState(XLogRecPtr lsn, XLogRecPtr *first_record) +{ + XLogReaderState *xlogreader; + + /* + * Reading WAL below the first page of the first sgements isn't allowed. + * This is a bootstrap WAL page and the page_read callback fails to read + * it. + */ + if (lsn < XLOG_BLCKSZ) + ereport(ERROR, + (errmsg("could not read WAL at LSN %X/%X", + LSN_FORMAT_ARGS(lsn)))); + + xlogreader = XLogReaderAllocate(wal_segment_size, NULL, + XL_ROUTINE(.page_read = &read_local_xlog_page_no_wait, + .segment_open = &wal_segment_open, + .segment_close = &wal_segment_close), + NULL); + + if (xlogreader == NULL) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"), + errdetail("Failed while allocating a WAL reading processor."))); + + /* First find a valid recptr to start from. */ + *first_record = XLogFindNextRecord(xlogreader, lsn); + + if (XLogRecPtrIsInvalid(*first_record)) + ereport(ERROR, + (errmsg("could not find a valid record after %X/%X", + LSN_FORMAT_ARGS(lsn)))); + + return xlogreader; +} + +/* + * Read next WAL record. + */ +static XLogRecord * +ReadNextXLogRecord(XLogReaderState *xlogreader, XLogRecPtr first_record) +{ + XLogRecord *record; + char *errormsg; + + record = XLogReadRecord(xlogreader, &errormsg); + + if (record == NULL) + { + if (errormsg) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read WAL at %X/%X: %s", + LSN_FORMAT_ARGS(first_record), errormsg))); + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read WAL at %X/%X", + LSN_FORMAT_ARGS(first_record)))); + } + + return record; +} + +/* + * Get WAL record info. + */ +static void +GetXLogRecordInfo(XLogReaderState *record, XLogRecPtr lsn, + Datum *values, bool *nulls, uint32 ncols) +{ + const char *id; + const RmgrData *desc; + uint32 fpi_len = 0; + StringInfoData rec_desc; + StringInfoData rec_blk_ref; + bytea *data; + char *main_data; + uint32 main_data_len; + int i = 0; + + desc = &RmgrTable[XLogRecGetRmid(record)]; + initStringInfo(&rec_desc); + id = desc->rm_identify(XLogRecGetInfo(record)); + + if (id == NULL) + appendStringInfo(&rec_desc, "UNKNOWN (%x) ", XLogRecGetInfo(record) & ~XLR_INFO_MASK); + else + appendStringInfo(&rec_desc, "%s ", id); + + desc->rm_desc(&rec_desc, record); + + /* Block references. */ + initStringInfo(&rec_blk_ref); + XLogRecGetBlockRefInfo(record, NULL, &fpi_len, true, &rec_blk_ref); + + main_data_len = XLogRecGetDataLen(record); + data = (bytea *) palloc(main_data_len + VARHDRSZ); + SET_VARSIZE(data, main_data_len + VARHDRSZ); + main_data = VARDATA(data); + memcpy(main_data, XLogRecGetData(record), main_data_len); + + values[i++] = LSNGetDatum(lsn); + values[i++] = LSNGetDatum(record->EndRecPtr - 1); + values[i++] = LSNGetDatum(XLogRecGetPrev(record)); + values[i++] = TransactionIdGetDatum(XLogRecGetXid(record)); + values[i++] = CStringGetTextDatum(desc->rm_name); + values[i++] = UInt32GetDatum(XLogRecGetTotalLen(record)); + values[i++] = UInt32GetDatum(fpi_len); + values[i++] = CStringGetTextDatum(rec_desc.data); + values[i++] = CStringGetTextDatum(rec_blk_ref.data); + values[i++] = UInt32GetDatum(main_data_len); + values[i++] = PointerGetDatum(data); + + Assert(i == ncols); +} + +/* + * Get WAL record data or info. + */ +Datum +GetWALRecordInternal(FunctionCallInfo fcinfo, Datum *values, bool *nulls, + uint32 ncols, bool get_info) +{ + XLogRecPtr lsn; + XLogRecPtr curr_lsn; + XLogRecord *record; + XLogRecPtr first_record; + XLogReaderState *xlogreader; + TupleDesc tupdesc; + HeapTuple tuple; + Datum result; + + lsn = PG_GETARG_LSN(0); + + if (IsFutureLSN(lsn, &curr_lsn)) + { + /* + * GetFlushRecPtr or GetXLogReplayRecPtr gives "end+1" LSN of the last + * record flushed or replayed respectively. But let's use the LSN up + * to "end" in user facing message. + */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot accept future input LSN"), + errdetail("Last WAL record on the database system ends at LSN %X/%X.", + LSN_FORMAT_ARGS(curr_lsn - 1)))); + } + + /* Build a tuple descriptor for our result type. */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + xlogreader = InitXLogReaderState(lsn, &first_record); + + Assert(xlogreader); + + record = ReadNextXLogRecord(xlogreader, first_record); + + MemSet(values, 0, ncols); + MemSet(nulls, 0, ncols); + + if (get_info) + { + GetXLogRecordInfo(xlogreader, first_record, values, nulls, + ncols); + } + else + { + bytea *rec; + uint32 rec_len; + char *rec_data; + int i = 0; + + rec_len = XLogRecGetTotalLen(xlogreader); + + Assert(rec_len > 0); + + rec = (bytea *) palloc(rec_len + VARHDRSZ); + SET_VARSIZE(rec, rec_len + VARHDRSZ); + rec_data = VARDATA(rec); + + memcpy(rec_data, record, rec_len); + + values[i++] = LSNGetDatum(first_record); + values[i++] = LSNGetDatum(xlogreader->EndRecPtr - 1); + values[i++] = LSNGetDatum(XLogRecGetPrev(xlogreader)); + values[i++] = UInt32GetDatum(rec_len); + values[i++] = PointerGetDatum(rec); + + Assert(i == ncols); + } + + XLogReaderFree(xlogreader); + + tuple = heap_form_tuple(tupdesc, values, nulls); + result = HeapTupleGetDatum(tuple); + + return result; +} + +/* + * Get WAL record. + * + * This function emits an error if a future WAL LSN i.e. WAL LSN the database + * system doesn't know about is specified. + */ +Datum +pg_get_wal_record(PG_FUNCTION_ARGS) +{ +#define PG_GET_WAL_RECORD_COLS 5 + Datum result; + Datum values[PG_GET_WAL_RECORD_COLS]; + bool nulls[PG_GET_WAL_RECORD_COLS]; + + result = GetWALRecordInternal(fcinfo, values, nulls, + PG_GET_WAL_RECORD_COLS, + false); + + PG_RETURN_DATUM(result); +#undef PG_GET_WAL_RECORD_COLS +} + +/* + * Get WAL record info and data. + * + * This function emits an error if a future WAL LSN i.e. WAL LSN the database + * system doesn't know about is specified. + */ +Datum +pg_get_wal_record_info(PG_FUNCTION_ARGS) +{ +#define PG_GET_WAL_RECORD_INFO_COLS 11 + Datum result; + Datum values[PG_GET_WAL_RECORD_INFO_COLS]; + bool nulls[PG_GET_WAL_RECORD_INFO_COLS]; + + result = GetWALRecordInternal(fcinfo, values, nulls, + PG_GET_WAL_RECORD_INFO_COLS, + true); + + PG_RETURN_DATUM(result); +#undef PG_GET_WAL_RECORD_INFO_COLS +} + +/* + * Get WAL details such as record info, stats using the passed in callback. + */ +static void +GetWALDetailsGuts(FunctionCallInfo fcinfo, bool till_end_of_wal, + GetWALDetailsCB wal_details_cb) +{ + XLogRecPtr start_lsn; + XLogRecPtr end_lsn; + XLogRecPtr curr_lsn; + + start_lsn = PG_GETARG_LSN(0); + + /* If not till end of wal, end_lsn would have been specified. */ + if (!till_end_of_wal) + end_lsn = PG_GETARG_LSN(1); + + if (IsFutureLSN(start_lsn, &curr_lsn)) + { + /* + * GetFlushRecPtr or GetXLogReplayRecPtr gives "end+1" LSN of the last + * record flushed or replayed respectively. But let's use the LSN up + * to "end" in user facing message. + */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot accept future start LSN"), + errdetail("Last WAL record on the database system ends at LSN %X/%X.", + LSN_FORMAT_ARGS(curr_lsn - 1)))); + } + + if (!till_end_of_wal && end_lsn >= curr_lsn) + { + /* + * GetFlushRecPtr or GetXLogReplayRecPtr gives "end+1" LSN of the last + * record flushed or replayed respectively. But let's use the LSN up + * to "end" in user facing message. + */ + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot accept future end LSN"), + errdetail("Last WAL record on the database system ends at LSN %X/%X.", + LSN_FORMAT_ARGS(curr_lsn - 1)))); + } + else if (till_end_of_wal) + { + /* + * GetFlushRecPtr or GetXLogReplayRecPtr gives "end+1" LSN of the last + * record flushed or replayed respectively. But let's use the LSN up to + * "end". + */ + end_lsn = curr_lsn - 1; + } + + if (start_lsn >= end_lsn) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("WAL start LSN must be less than end LSN"))); + + wal_details_cb(fcinfo, start_lsn, end_lsn); +} + +/* + * Get info and data of all WAL records between start LSN and end LSN. + */ +static void +GetWALRecordsInfo(FunctionCallInfo fcinfo, XLogRecPtr start_lsn, + XLogRecPtr end_lsn) +{ +#define PG_GET_WAL_RECORDS_INFO_COLS 11 + XLogRecPtr first_record; + XLogReaderState *xlogreader; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + Datum values[PG_GET_WAL_RECORDS_INFO_COLS]; + bool nulls[PG_GET_WAL_RECORDS_INFO_COLS]; + + SetSingleFuncCall(fcinfo, 0); + + xlogreader = InitXLogReaderState(start_lsn, &first_record); + + Assert(xlogreader); + + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + for (;;) + { + (void) ReadNextXLogRecord(xlogreader, first_record); + + /* + * Let's not show the record info if it is spanning more than the + * end_lsn. EndRecPtr is "end+1" of the last read record, hence + * use "end" here. + */ + if ((xlogreader->EndRecPtr - 1) <= end_lsn) + { + GetXLogRecordInfo(xlogreader, xlogreader->currRecPtr, values, nulls, + PG_GET_WAL_RECORDS_INFO_COLS); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + + /* Exit loop if read up to end_lsn. */ + if (xlogreader->EndRecPtr >= end_lsn) + break; + + CHECK_FOR_INTERRUPTS(); + } + + XLogReaderFree(xlogreader); + +#undef PG_GET_WAL_RECORDS_INFO_COLS +} + +/* + * Get info and data of all WAL records between start LSN and end LSN. + * + * This function emits an error if a future start or end WAL LSN i.e. WAL LSN + * the database system doesn't know about is specified. + */ +Datum +pg_get_wal_records_info(PG_FUNCTION_ARGS) +{ + GetWALDetailsGuts(fcinfo, false, GetWALRecordsInfo); + + PG_RETURN_VOID(); +} + +/* + * Get info and data of all WAL records from start LSN till end of WAL. + * + * This function emits an error if a future start i.e. WAL LSN the database + * system doesn't know about is specified. + */ +Datum +pg_get_wal_records_info_till_end_of_wal(PG_FUNCTION_ARGS) +{ + GetWALDetailsGuts(fcinfo, true, GetWALRecordsInfo); + + PG_RETURN_VOID(); +} + +/* + * Fill single row of record counts and sizes for an rmgr or record. + */ +static void +FillXLogStatsRow(const char *name, + uint64 n, uint64 total_count, + uint64 rec_len, uint64 total_rec_len, + uint64 fpi_len, uint64 total_fpi_len, + uint64 tot_len, uint64 total_len, + Datum *values, bool *nulls, uint32 ncols) +{ + double n_pct, + rec_len_pct, + fpi_len_pct, + tot_len_pct; + int i = 0; + + n_pct = 0; + if (total_count != 0) + n_pct = 100 * (double) n / total_count; + + rec_len_pct = 0; + if (total_rec_len != 0) + rec_len_pct = 100 * (double) rec_len / total_rec_len; + + fpi_len_pct = 0; + if (total_fpi_len != 0) + fpi_len_pct = 100 * (double) fpi_len / total_fpi_len; + + tot_len_pct = 0; + if (total_len != 0) + tot_len_pct = 100 * (double) tot_len / total_len; + + values[i++] = CStringGetTextDatum(name); + values[i++] = Int64GetDatum(n); + values[i++] = Float4GetDatum(n_pct); + values[i++] = Int64GetDatum(rec_len); + values[i++] = Float4GetDatum(rec_len_pct); + values[i++] = Int64GetDatum(fpi_len); + values[i++] = Float4GetDatum(fpi_len_pct); + values[i++] = Int64GetDatum(tot_len); + values[i++] = Float4GetDatum(tot_len_pct); + + Assert(i == ncols); +} + +/* + * Get summary statistics about the records seen so far. + */ +static void +GetXLogSummaryStats(XLogStats *stats, ReturnSetInfo *rsinfo, + Datum *values, bool *nulls, uint32 ncols) +{ + uint64 total_count = 0; + uint64 total_rec_len = 0; + uint64 total_fpi_len = 0; + uint64 total_len = 0; + int ri; + + /* + * Each row shows its percentages of the total, so make a first pass to + * calculate column totals. + */ + for (ri = 0; ri < RM_NEXT_ID; ri++) + { + total_count += stats->rmgr_stats[ri].count; + total_rec_len += stats->rmgr_stats[ri].rec_len; + total_fpi_len += stats->rmgr_stats[ri].fpi_len; + } + total_len = total_rec_len + total_fpi_len; + + for (ri = 0; ri < RM_NEXT_ID; ri++) + { + uint64 count; + uint64 rec_len; + uint64 fpi_len; + uint64 tot_len; + const RmgrData *desc = &RmgrTable[ri]; + + count = stats->rmgr_stats[ri].count; + rec_len = stats->rmgr_stats[ri].rec_len; + fpi_len = stats->rmgr_stats[ri].fpi_len; + tot_len = rec_len + fpi_len; + + FillXLogStatsRow(desc->rm_name, count, total_count, rec_len, + total_rec_len, fpi_len, total_fpi_len, tot_len, + total_len, values, nulls, ncols); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } +} + +/* + * Get WAL stats between start LSN and end LSN. + */ +static void +GetWalStats(FunctionCallInfo fcinfo, XLogRecPtr start_lsn, + XLogRecPtr end_lsn) +{ +#define PG_GET_WAL_STATS_COLS 9 + XLogRecPtr first_record; + XLogReaderState *xlogreader; + XLogStats stats; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + Datum values[PG_GET_WAL_STATS_COLS]; + bool nulls[PG_GET_WAL_STATS_COLS]; + + SetSingleFuncCall(fcinfo, 0); + + xlogreader = InitXLogReaderState(start_lsn, &first_record); + + MemSet(&stats, 0, sizeof(stats)); + + for (;;) + { + (void) ReadNextXLogRecord(xlogreader, first_record); + + /* + * Let's not show the record info if it is spanning more than the + * end_lsn. EndRecPtr is "end+1" of the last read record, hence + * use "end" here. + */ + if ((xlogreader->EndRecPtr - 1) <= end_lsn) + XLogRecStoreStats(&stats, xlogreader); + + /* Exit loop if read up to end_lsn. */ + if (xlogreader->EndRecPtr >= end_lsn) + break; + + CHECK_FOR_INTERRUPTS(); + } + + XLogReaderFree(xlogreader); + + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + GetXLogSummaryStats(&stats, rsinfo, values, nulls, + PG_GET_WAL_STATS_COLS); + +#undef PG_GET_WAL_STATS_COLS +} + +/* + * Get stats of all WAL records between start LSN and end LSN. + * + * This function emits an error if a future start or end WAL LSN i.e. WAL LSN + * the database system doesn't know about is specified. + */ +Datum +pg_get_wal_stats(PG_FUNCTION_ARGS) +{ + GetWALDetailsGuts(fcinfo, false, GetWalStats); + + PG_RETURN_VOID(); +} + +/* + * Get stats of all WAL records from start LSN till end of WAL. + * + * This function emits an error if a future start i.e. WAL LSN the database + * system doesn't know about is specified. + */ +Datum +pg_get_wal_stats_till_end_of_wal(PG_FUNCTION_ARGS) +{ + GetWALDetailsGuts(fcinfo, true, GetWalStats); + + PG_RETURN_VOID(); +} diff --git a/contrib/pg_walinspect/pg_walinspect.control b/contrib/pg_walinspect/pg_walinspect.control new file mode 100644 index 0000000000..017e56a2bb --- /dev/null +++ b/contrib/pg_walinspect/pg_walinspect.control @@ -0,0 +1,5 @@ +# pg_walinspect extension +comment = 'functions to inspect contents of PostgreSQL Write-Ahead Log' +default_version = '1.0' +module_pathname = '$libdir/pg_walinspect' +relocatable = true diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index e437c42992..585c94c488 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -1320,13 +1320,6 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, return true; } -#ifdef FRONTEND -/* - * Functions that are currently not needed in the backend, but are better - * implemented inside xlogreader.c because of the internal facilities available - * here. - */ - /* * Find the first record with an lsn >= RecPtr. * @@ -1447,6 +1440,12 @@ err: return InvalidXLogRecPtr; } +#ifdef FRONTEND +/* + * Functions that are currently not needed in the backend, but are better + * implemented inside xlogreader.c because of the internal facilities available + * here. + */ #endif /* FRONTEND */ /* diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 8c1b8216be..4a4ef8a338 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -85,6 +85,10 @@ typedef struct xl_missing_dir static HTAB *missing_dir_tab = NULL; +static int +read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr, + int reqLen, XLogRecPtr targetRecPtr, + char *cur_page, bool wait_for_wal); /* * Keep track of a directory that wasn't found while replaying database @@ -1008,6 +1012,31 @@ wal_segment_close(XLogReaderState *state) int read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *cur_page) +{ + return read_local_xlog_page_guts(state, targetPagePtr, reqLen, + targetRecPtr, cur_page, true); +} + +/* + * Same as read_local_xlog_page except that it doesn't wait for future WAL + * to be available. + */ +int +read_local_xlog_page_no_wait(XLogReaderState *state, XLogRecPtr targetPagePtr, + int reqLen, XLogRecPtr targetRecPtr, + char *cur_page) +{ + return read_local_xlog_page_guts(state, targetPagePtr, reqLen, + targetRecPtr, cur_page, false); +} + +/* + * Implementation of read_local_xlog_page and its no wait version. + */ +static int +read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr, + int reqLen, XLogRecPtr targetRecPtr, + char *cur_page, bool wait_for_wal) { XLogRecPtr read_upto, loc; @@ -1063,6 +1092,10 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, if (loc <= read_upto) break; + /* If asked, let's not wait for future WAL. */ + if (!wait_for_wal) + break; + CHECK_FOR_INTERRUPTS(); pg_usleep(1000L); } diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index f314d33ebf..cad5b3594a 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -27,6 +27,11 @@ #include "getopt_long.h" #include "rmgrdesc.h" +/* + * NOTE: For any code change or issue fix here, it is highly recommended to + * give a thought about doing the same in pg_walinspect contrib module as well. + */ + static const char *progname; static int WalSegSz; diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 09f6464331..3e644372f9 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -31,7 +31,7 @@ extern XLogRecPtr XactLastRecEnd; extern PGDLLIMPORT XLogRecPtr XactLastCommitEnd; /* these variables are GUC parameters related to XLOG */ -extern int wal_segment_size; +extern PGDLLIMPORT int wal_segment_size; extern int min_wal_size_mb; extern int max_wal_size_mb; extern int wal_keep_size_mb; diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index d7c35c37c4..2985c75361 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -319,7 +319,7 @@ typedef struct RmgrData struct XLogRecordBuffer *buf); } RmgrData; -extern const RmgrData RmgrTable[]; +extern PGDLLIMPORT const RmgrData RmgrTable[]; /* * Exported to support xlog switching from checkpointer diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index f4388cc9be..b4c7d93787 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -340,9 +340,7 @@ extern void XLogReaderSetDecodeBuffer(XLogReaderState *state, /* Position the XLogReader to given record */ extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr); -#ifdef FRONTEND extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr); -#endif /* FRONTEND */ /* Return values from XLogPageReadCB. */ typedef enum XLogPageReadResult diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h index 8d48f003b0..40cdb1b947 100644 --- a/src/include/access/xlogutils.h +++ b/src/include/access/xlogutils.h @@ -96,6 +96,10 @@ extern void FreeFakeRelcacheEntry(Relation fakerel); extern int read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *cur_page); +extern int read_local_xlog_page_no_wait(XLogReaderState *state, + XLogRecPtr targetPagePtr, int reqLen, + XLogRecPtr targetRecPtr, + char *cur_page); extern void wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo, TimeLineID *tli_p); -- 2.25.1