Re: [HACKERS] Restricting maximum keep segments by repslots - Mailing list pgsql-hackers
From | Kyotaro HORIGUCHI |
---|---|
Subject | Re: [HACKERS] Restricting maximum keep segments by repslots |
Date | |
Msg-id | 20171109.173128.220115527.horiguchi.kyotaro@lab.ntt.co.jp Whole thread Raw |
In response to | Re: [HACKERS] Restricting maximum keep segments by repslots (Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>) |
Responses |
Re: [HACKERS] Restricting maximum keep segments by repslots
|
List | pgsql-hackers |
Oops! The previous patch is forgetting the default case and crashes. At Wed, 08 Nov 2017 13:14:31 +0900 (Tokyo Standard Time), Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp> wrote in <20171108.131431.170534842.horiguchi.kyotaro@lab.ntt.co.jp> > > I don't think 'distance' is a good metric - that's going to continually > > change. Why not store the LSN that's available and provide a function > > that computes this? Or just rely on the lsn - lsn operator? > > It seems reasonable.,The 'secured minimum LSN' is common among > all slots so showing it in the view may look a bit stupid but I > don't find another suitable place for it. distance = 0 meant the > state that the slot is living but insecured in the previous patch > and that information is lost by changing 'distance' to > 'min_secure_lsn'. > > Thus I changed the 'live' column to 'status' and show that staus > in text representation. > > status: secured | insecured | broken > > So this looks like the following (max_slot_wal_keep_size = 8MB, > which is a half of the default segment size) > > -- slots that required WAL is surely available > select restart_lsn, status, min_secure_lsn, pg_current_wal_lsn() from pg_replication_slots; > restart_lsn | status | min_recure_lsn | pg_current_wal_lsn > ------------+---------+----------------+-------------------- > 0/1A000060 | secured | 0/1A000000 | 0/1B42BC78 > > -- slots that required WAL is still available but insecured > restart_lsn | status | min_recure_lsn | pg_current_wal_lsn > ------------+-----------+----------------+-------------------- > 0/1A000060 | insecured | 0/1C000000 | 0/1D76C948 > > -- slots that required WAL is lost > # We should have seen the log 'Some replication slots have lost...' > > restart_lsn | status | min_recure_lsn | pg_current_wal_lsn > ------------+--------+----------------+-------------------- > 0/1A000060 | broken | 0/1C000000 | 0/1D76C9F0 > > > I noticed that I abandoned the segment fragment of > max_slot_wal_keep_size in calculating in the routines. The > current patch honors the frament part of max_slot_wal_keep_size. I changed IsLsnStillAvailable to return meaningful values regardless whether max_slot_wal_keep_size is set or not. # I had been forgetting to count the version for latestst several # patches. I give the version '4' - as the next of the last # numbered patch. -- Kyotaro Horiguchi NTT Open Source Software Center From 109f056e257aba70dddc8d466767ed0a1db371e2 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horiguchi.kyotaro@lab.ntt.co.jp> Date: Tue, 28 Feb 2017 11:39:48 +0900 Subject: [PATCH 1/2] Add WAL releaf vent for replication slots Adds a capability to limit the number of segments kept by replication slots by a GUC variable. ---src/backend/access/transam/xlog.c | 39 +++++++++++++++++++++++++++src/backend/utils/misc/guc.c | 11 ++++++++src/backend/utils/misc/postgresql.conf.sample | 1 +src/include/access/xlog.h | 1+4 files changed, 52 insertions(+) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index dd028a1..cfdae39 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -105,6 +105,7 @@ int wal_level = WAL_LEVEL_MINIMAL;int CommitDelay = 0; /* precommit delay inmicroseconds */int CommitSiblings = 5; /* # concurrent xacts needed to sleep */int wal_retrieve_retry_interval= 5000; +int max_slot_wal_keep_size_mb = 0;#ifdef WAL_DEBUGbool XLOG_DEBUG = false; @@ -9432,9 +9433,47 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo) if (max_replication_slots > 0 && keep != InvalidXLogRecPtr) { XLogSegNo slotSegNo; + int slotlimitsegs; + uint64 recptroff; + uint64 slotlimitbytes; + uint64 slotlimitfragment; + + recptroff = XLogSegmentOffset(recptr, wal_segment_size); + slotlimitbytes = 1024 * 1024 * max_slot_wal_keep_size_mb; + slotlimitfragment = XLogSegmentOffset(slotlimitbytes, + wal_segment_size); + + /* calculate segments to keep by max_slot_wal_keep_size_mb */ + slotlimitsegs = ConvertToXSegs(max_slot_wal_keep_size_mb, + wal_segment_size); + /* honor the fragment */ + if (recptroff < slotlimitfragment) + slotlimitsegs++; XLByteToSeg(keep, slotSegNo, wal_segment_size); + /* + * ignore slots if too many wal segments are kept. + * max_slot_wal_keep_size is just accumulated on wal_keep_segments. + */ + if (max_slot_wal_keep_size_mb > 0 && slotSegNo + slotlimitsegs < segno) + { + segno = segno - slotlimitsegs; /* must be positive */ + + /* + * warn only if the checkpoint flushes the required segment. + * we assume here that *logSegNo is calculated keep location. + */ + if (slotSegNo < *logSegNo) + ereport(WARNING, + (errmsg ("restart LSN of replication slots is ignored by checkpoint"), + errdetail("Some replication slots have lost required WAL segnents to continue by up to %ld segments.", + (segno < *logSegNo ? segno : *logSegNo) - slotSegNo))); + + /* emergency vent */ + slotSegNo = segno; + } + if (slotSegNo <= 0) segno = 1; else if (slotSegNo < segno) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 65372d7..511023a 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2368,6 +2368,17 @@ static struct config_int ConfigureNamesInt[] = }, { + {"max_slot_wal_keep_size", PGC_SIGHUP, REPLICATION_SENDING, + gettext_noop("Sets the maximum size of extra WALs kept by replication slots."), + NULL, + GUC_UNIT_MB + }, + &max_slot_wal_keep_size_mb, + 0, 0, INT_MAX, + NULL, NULL, NULL + }, + + { {"wal_sender_timeout", PGC_SIGHUP, REPLICATION_SENDING, gettext_noop("Sets the maximum time to waitfor WAL replication."), NULL, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 368b280..e76c73a 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -234,6 +234,7 @@#max_wal_senders = 10 # max number of walsender processes # (change requires restart)#wal_keep_segments= 0 # in logfile segments; 0 disables +#max_slot_wal_keep_size = 0 # measured in bytes; 0 disables#wal_sender_timeout = 60s # in milliseconds; 0 disables#max_replication_slots= 10 # max number of replication slots diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 0f2b8bd..f0c0255 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -98,6 +98,7 @@ extern int wal_segment_size;extern int min_wal_size_mb;extern int max_wal_size_mb;extern int wal_keep_segments; +extern int max_slot_wal_keep_size_mb;extern int XLOGbuffers;extern int XLogArchiveTimeout;extern int wal_retrieve_retry_interval; -- 2.9.2 From 67f73c35b0c1c97bd2fff80139bfd3b7142f6bee Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horiguchi.kyotaro@lab.ntt.co.jp> Date: Thu, 7 Sep 2017 19:13:22 +0900 Subject: [PATCH 2/2] Add monitoring aid for max_replication_slots. Adds two columns "live" and "distance" in pg_replication_slot. Setting max_slot_wal_keep_size, long-disconnected slots may lose sync. The two columns shows how long a slot can live on or how many bytes a slot have lost if max_slot_wal_keep_size is set. ---src/backend/access/transam/xlog.c | 128 ++++++++++++++++++++++++++++++++++-src/backend/catalog/system_views.sql | 4 +-src/backend/replication/slotfuncs.c | 25 ++++++-src/include/access/xlog.h | 1 +src/include/catalog/pg_proc.h | 2 +-src/test/regress/expected/rules.out | 6 +-6 files changed, 160 insertions(+),6 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index cfdae39..be53e0f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -9402,6 +9402,128 @@ CreateRestartPoint(int flags) return true;} + +/* + * Returns the segment number of the oldest file in XLOG directory. + */ +static XLogSegNo +GetOldestXLogFileSegNo(void) +{ + DIR *xldir; + struct dirent *xlde; + XLogSegNo segno = 0; + + xldir = AllocateDir(XLOGDIR); + if (xldir == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open write-ahead log directory \"%s\": %m", + XLOGDIR))); + + while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL) + { + TimeLineID tli; + XLogSegNo fsegno; + + /* Ignore files that are not XLOG segments */ + if (!IsXLogFileName(xlde->d_name) && + !IsPartialXLogFileName(xlde->d_name)) + continue; + + XLogFromFileName(xlde->d_name, &tli, &fsegno, wal_segment_size); + + /* get minimum segment ignorig timeline ID */ + if (segno == 0 || fsegno < segno) + segno = fsegno; + } + + return segno; +} + +/* + * Check if the record on the given restartLSN is present in XLOG files. + * + * Returns true if it is present. If minSecureLSN is given, it receives the + * LSN at the beginning of the oldest existing WAL segment. + */ +bool +IsLsnStillAvaiable(XLogRecPtr restartLSN, XLogRecPtr *minSecureLSN) +{ + XLogRecPtr currpos; + XLogSegNo currSeg; + XLogSegNo restartSeg; + XLogSegNo tailSeg; + XLogSegNo oldestSeg; + uint64 keepSegs; + + currpos = GetXLogWriteRecPtr(); + + SpinLockAcquire(&XLogCtl->info_lck); + oldestSeg = XLogCtl->lastRemovedSegNo; + SpinLockRelease(&XLogCtl->info_lck); + + /* + * oldestSeg is zero before at least one segment has been removed since + * startup. Use oldest segno taken from file names. + */ + if (oldestSeg == 0) + { + static XLogSegNo oldestFileSeg = 0; + + if (oldestFileSeg == 0) + oldestFileSeg = GetOldestXLogFileSegNo(); + /* let it have the same meaning with lastRemovedSegNo here */ + oldestSeg = oldestFileSeg - 1; + } + + /* oldest segment is just after the last removed segment */ + oldestSeg++; + + XLByteToSeg(currpos, currSeg, wal_segment_size); + XLByteToSeg(restartLSN, restartSeg, wal_segment_size); + + + if (minSecureLSN) + { + if (max_slot_wal_keep_size_mb > 0) + { + uint64 slotlimitbytes = 1024 * 1024 * max_slot_wal_keep_size_mb; + uint64 slotlimitfragment = XLogSegmentOffset(slotlimitbytes, + wal_segment_size); + uint64 currposoff = XLogSegmentOffset(currpos, wal_segment_size); + + /* Calculate keep segments. Must be in sync with KeepLogSeg. */ + Assert(wal_keep_segments >= 0); + Assert(max_slot_wal_keep_size_mb >= 0); + + keepSegs = wal_keep_segments + + ConvertToXSegs(max_slot_wal_keep_size_mb, wal_segment_size); + if (currposoff < slotlimitfragment) + keepSegs++; + + /* + * calculate the oldest segment that will be kept by + * wal_keep_segments and max_slot_wal_keep_size_mb + */ + if (currSeg < keepSegs) + tailSeg = 0; + else + tailSeg = currSeg - keepSegs; + + } + else + { + /* all requred segments are secured in this case */ + XLogRecPtr keep = XLogGetReplicationSlotMinimumLSN(); + XLByteToSeg(keep, tailSeg, wal_segment_size); + } + + XLogSegNoOffsetToRecPtr(tailSeg, 0, *minSecureLSN, wal_segment_size); + } + + return oldestSeg <= restartSeg; +} +/* * Retreat *logSegNo to the last segment that we need to retain because of * either wal_keep_segments or replication slots. @@ -9429,7 +9551,11 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo) segno = segno - wal_keep_segments; } - /* then check whether slots limit removal further */ + /* + * then check whether slots limit removal further + * should be consistent with IsLsnStillAvaiable(). + */ + if (max_replication_slots > 0 && keep != InvalidXLogRecPtr) { XLogSegNo slotSegNo; diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index dc40cde..6512ac3 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -793,7 +793,9 @@ CREATE VIEW pg_replication_slots AS L.xmin, L.catalog_xmin, L.restart_lsn, - L.confirmed_flush_lsn + L.confirmed_flush_lsn, + L.status, + L.min_secure_lsn FROM pg_get_replication_slots() AS L LEFT JOIN pg_database D ON (L.datoid = D.oid); diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c index ab776e8..200a478 100644 --- a/src/backend/replication/slotfuncs.c +++ b/src/backend/replication/slotfuncs.c @@ -182,7 +182,7 @@ pg_drop_replication_slot(PG_FUNCTION_ARGS)Datumpg_get_replication_slots(PG_FUNCTION_ARGS){ -#define PG_GET_REPLICATION_SLOTS_COLS 11 +#define PG_GET_REPLICATION_SLOTS_COLS 13 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; TupleDesc tupdesc; Tuplestorestate *tupstore; @@ -304,6 +304,29 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) else nulls[i++] = true; + if (restart_lsn == InvalidXLogRecPtr) + { + values[i++] = CStringGetTextDatum("unknown"); + values[i++] = LSNGetDatum(InvalidXLogRecPtr); + } + else + { + XLogRecPtr min_secure_lsn; + char *status = "borken"; + + if (BoolGetDatum(IsLsnStillAvaiable(restart_lsn, + &min_secure_lsn))) + { + if (min_secure_lsn <= restart_lsn) + status = "secured"; + else + status = "insecured"; + } + + values[i++] = CStringGetTextDatum(status); + values[i++] = LSNGetDatum(min_secure_lsn); + } + tuplestore_putvalues(tupstore, tupdesc, values, nulls); } LWLockRelease(ReplicationSlotControlLock); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index f0c0255..a316ead 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -269,6 +269,7 @@ extern void ShutdownXLOG(int code, Datum arg);extern void InitXLOGAccess(void);extern void CreateCheckPoint(intflags);extern bool CreateRestartPoint(int flags); +extern bool IsLsnStillAvaiable(XLogRecPtr restartLSN, XLogRecPtr *minSecureLSN);extern void XLogPutNextOid(Oid nextOid);externXLogRecPtr XLogRestorePoint(const char *rpName);extern void UpdateFullPageWrites(void); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 93c031a..d03fd6f 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -5340,7 +5340,7 @@ DATA(insert OID = 3779 ( pg_create_physical_replication_slot PGNSP PGUID 12 1 0DESCR("create a physicalreplication slot");DATA(insert OID = 3780 ( pg_drop_replication_slot PGNSP PGUID 12 1 0 0 0 f f f f t f v u 1 02278 "19" _null_ _null_ _null_ _null_ _null_ pg_drop_replication_slot _null_ _null_ _null_ ));DESCR("drop a replicationslot"); -DATA(insert OID = 3781 ( pg_get_replication_slots PGNSP PGUID 12 1 10 0 0 f f f f f t s s 0 0 2249 "" "{19,19,25,26,16,16,23,28,28,3220,3220}""{o,o,o,o,o,o,o,o,o,o,o}" "{slot_name,plugin,slot_type,datoid,temporary,active,active_pid,xmin,catalog_xmin,restart_lsn,confirmed_flush_lsn}" _null__null_ pg_get_replication_slots _null_ _null_ _null_ )); +DATA(insert OID = 3781 ( pg_get_replication_slots PGNSP PGUID 12 1 10 0 0 f f f f f t s s 0 0 2249 "" "{19,19,25,26,16,16,23,28,28,3220,3220,25,3220}""{o,o,o,o,o,o,o,o,o,o,o,o,o}" "{slot_name,plugin,slot_type,datoid,temporary,active,active_pid,xmin,catalog_xmin,restart_lsn,confirmed_flush_lsn,status,min_secure_lsn}" _null__null_ pg_get_replication_slots _null_ _null_ _null_ ));DESCR("information about replication slots currently in use");DATA(insertOID = 3786 ( pg_create_logical_replication_slot PGNSP PGUID 12 1 0 0 0 f f f f t f v u 3 0 2249 "19 1916" "{19,19,16,25,3220}" "{i,i,i,o,o}" "{slot_name,plugin,temporary,slot_name,lsn}" _null_ _null_ pg_create_logical_replication_slot_null_ _null_ _null_ ));DESCR("set up a logical replication slot"); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index f1c1b44..d9d74a3 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1451,8 +1451,10 @@ pg_replication_slots| SELECT l.slot_name, l.xmin, l.catalog_xmin, l.restart_lsn, - l.confirmed_flush_lsn - FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, temporary, active, active_pid, xmin, catalog_xmin,restart_lsn, confirmed_flush_lsn) + l.confirmed_flush_lsn, + l.status, + l.min_secure_lsn + FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, temporary, active, active_pid, xmin, catalog_xmin,restart_lsn, confirmed_flush_lsn, status, min_secure_lsn) LEFT JOIN pg_database d ON ((l.datoid = d.oid)));pg_roles|SELECT pg_authid.rolname, pg_authid.rolsuper, -- 2.9.2 -- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers
pgsql-hackers by date: