From 7838a2217cde2431b2897906fdc0d9b9a8f848e6 Mon Sep 17 00:00:00 2001 From: Vignesh Date: Mon, 3 Feb 2025 10:30:51 +0530 Subject: [PATCH v20250416 5/5] Documentation for sequence synchronization feature. Documentation for sequence synchronization feature. --- doc/src/sgml/catalogs.sgml | 25 ++- doc/src/sgml/config.sgml | 16 +- doc/src/sgml/logical-replication.sgml | 241 ++++++++++++++++++++-- doc/src/sgml/monitoring.sgml | 5 +- doc/src/sgml/ref/alter_subscription.sgml | 55 ++++- doc/src/sgml/ref/create_subscription.sgml | 6 + doc/src/sgml/system-views.sgml | 67 ++++++ 7 files changed, 372 insertions(+), 43 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index cbd4e40a320..4f149656836 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -8155,16 +8155,19 @@ SCRAM-SHA-256$<iteration count>:&l - The catalog pg_subscription_rel contains the - state for each replicated relation in each subscription. This is a - many-to-many mapping. + The catalog pg_subscription_rel stores the + state of each replicated table and sequence for each subscription. This + is a many-to-many mapping. - This catalog only contains tables known to the subscription after running - either CREATE SUBSCRIPTION or - ALTER SUBSCRIPTION ... REFRESH - PUBLICATION. + This catalog only contains tables and sequences known to the subscription + after running + CREATE SUBSCRIPTION or + + ALTER SUBSCRIPTION ... REFRESH PUBLICATION or + + ALTER SUBSCRIPTION ... REFRESH PUBLICATION SEQUENCES. @@ -8198,7 +8201,7 @@ SCRAM-SHA-256$<iteration count>:&l (references pg_class.oid) - Reference to relation + Reference to table or sequence @@ -8209,9 +8212,9 @@ SCRAM-SHA-256$<iteration count>:&l State code: i = initialize, - d = data is being copied, - f = finished table copy, - s = synchronized, + d = data is being copied (not applicable for sequences), + f = finished table copy (not applicable for sequences), + s = synchronized (not applicable for sequences), r = ready (normal replication) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index c1674c22cb2..daab5686b76 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -5168,9 +5168,9 @@ ANY num_sync ( num_sync ( num_sync ( . @@ -1786,6 +1789,201 @@ test_sub=# SELECT * from tab_gen_to_gen; + + Replicating Sequences + + + To replicate sequences from a publisher to a subscriber, first publish them + using + CREATE PUBLICATION ... FOR ALL SEQUENCES. + + + + At the subscriber side: + + + + use CREATE SUBSCRIPTION + to initially synchronize the published sequences. + + + + + use + ALTER SUBSCRIPTION ... REFRESH PUBLICATION + to synchronize only newly added sequences. + + + + + use + ALTER SUBSCRIPTION ... REFRESH PUBLICATION SEQUENCES + to re-synchronize all sequences. + + + + + + + A new sequence synchronization worker will be started + after executing any of the above subscriber commands, and will exit once the + sequences are synchronized. + + + The ability to launch a sequence synchronization worker is limited by the + + max_sync_workers_per_subscription + configuration. + + + + Sequence Definition Mismatches + + + During sequence synchronization, the sequence definitions of the publisher + and the subscriber are compared. A WARNING is logged if any differences + are detected. + + + + To resolve this, use + ALTER SEQUENCE + to align the subscriber's sequence parameters with those of the publisher. + Then, execute + ALTER SUBSCRIPTION ... REFRESH PUBLICATION SEQUENCES. + + + + + Refreshing Stale Sequences + + Subscriber side sequence values may frequently become out of sync due to + updates on the publisher. + + + To verify, compare the sequences values between the publisher and + subscriber, and if necessary, execute + + ALTER SUBSCRIPTION ... REFRESH PUBLICATION SEQUENCES. + + + + + Examples + + + Create some sequences on the publisher. + +test_pub=# CREATE SEQUENCE s1 START WITH 10 INCREMENT BY 1; +CREATE SEQUENCE +test_pub=# CREATE SEQUENCE s2 START WITH 100 INCREMENT BY 10; +CREATE SEQUENCE + + + + Create the same sequences on the subscriber. + +test_sub=# CREATE SEQUENCE s1 START WITH 10 INCREMENT BY 1 +CREATE SEQUENCE +test_sub=# CREATE SEQUENCE s2 START WITH 100 INCREMENT BY 10; +CREATE SEQUENCE + + + + Update the sequences at the publisher side few times. + +test_pub=# SELECT nextval('s1'); + nextval +--------- + 10 +(1 row) +test_pub=# SELECT NEXTVAL('s1'); + nextval +--------- + 11 +(1 row) +test_pub=# SELECT nextval('s2'); + nextval +--------- + 100 +(1 row) +test_pub=# SELECT nextval('s2'); + nextval +--------- + 110 +(1 row) + + + + Create a publication for the sequences. + +test_pub=# CREATE PUBLICATION pub1 FOR ALL SEQUENCES; +CREATE PUBLICATION + + + + Subscribe to the publication. + +test_sub=# CREATE SUBSCRIPTION sub1 +test_sub-# CONNECTION 'host=localhost dbname=test_pub application_name=sub1' +test_sub-# PUBLICATION pub1; +CREATE SUBSCRIPTION + + + + Observe that initial sequence values are synchronized. + +test_sub=# SELECT * FROM s1; + last_value | log_cnt | is_called +------------+---------+----------- + 11 | 31 | t +(1 row) + +test_sub=# SELECT * FROM s2; + last_value | log_cnt | is_called +------------+---------+----------- + 110 | 31 | t +(1 row) + + + + Update the sequences at the publisher side. + +test_pub=# SELECT nextval('s1'); + nextval +--------- + 12 +(1 row) +test_pub=# SELECT nextval('s2'); + nextval +--------- + 120 +(1 row) + + + + Re-synchronize all the sequences at the subscriber side using + + ALTER SUBSCRIPTION ... REFRESH PUBLICATION SEQUENCES. + +test_sub=# ALTER SUBSCRIPTION sub1 REFRESH PUBLICATION SEQUENCES; +ALTER SUBSCRIPTION + +test_sub=# SELECT * FROM s1; + last_value | log_cnt | is_called +------------+---------+----------- + 12 | 30 | t +(1 row) + +test_sub=# SELECT * FROM s2 + last_value | log_cnt | is_called +------------+---------+----------- + 120 | 30 | t +(1 row) + + + + Conflicts @@ -2115,16 +2313,22 @@ CONTEXT: processing remote data for replication origin "pg_16395" during "INSER - Sequence data is not replicated. The data in serial or identity columns - backed by sequences will of course be replicated as part of the table, - but the sequence itself would still show the start value on the - subscriber. If the subscriber is used as a read-only database, then this - should typically not be a problem. If, however, some kind of switchover - or failover to the subscriber database is intended, then the sequences - would need to be updated to the latest values, either by copying the - current data from the publisher (perhaps - using pg_dump) or by determining a sufficiently high - value from the tables themselves. + Incremental sequence changes are not replicated. The data in serial or + identity columns backed by sequences will of course be replicated as part + of the table, the sequences themselves do not replicate ongoing changes. + On the subscriber, a sequence will retain the last value it synchronized + from the publisher either during the initial + CREATE SUBSCRIPTION or + ALTER SUBSCRIPTION ... REFRESH PUBLICATION SEQUENCES. + If the subscriber is used as a read-only database, then this should + typically not be a problem. If, however, some kind of switchover or + failover to the subscriber database is intended, then the sequences would + need to be updated to the latest values, either by executing + + ALTER SUBSCRIPTION ... REFRESH PUBLICATION SEQUENCES + or by copying the current data from the publisher (perhaps using + pg_dump) or by determining a sufficiently high value + from the tables themselves. @@ -2442,8 +2646,8 @@ CONTEXT: processing remote data for replication origin "pg_16395" during "INSER max_logical_replication_workers must be set to at least the number of subscriptions (for leader apply - workers), plus some reserve for the table synchronization workers and - parallel apply workers. + workers), plus some reserve for the parallel apply workers, table synchronization workers, and a sequence + synchronization worker. @@ -2456,8 +2660,9 @@ CONTEXT: processing remote data for replication origin "pg_16395" during "INSER max_sync_workers_per_subscription - controls the amount of parallelism of the initial data copy during the - subscription initialization or when new tables are added. + controls how many tables can be synchronized in parallel during + subscription initialization or when new tables are added. One additional + worker is also needed for sequence synchronization. diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index c421d89edff..f5680347a1f 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -2025,8 +2025,9 @@ description | Waiting for a newly initialized WAL file to reach durable storage Type of the subscription worker process. Possible types are - apply, parallel apply, and - table synchronization. + apply, parallel apply, + table synchronization, and + sequence synchronization. diff --git a/doc/src/sgml/ref/alter_subscription.sgml b/doc/src/sgml/ref/alter_subscription.sgml index fdc648d007f..c474e37c03e 100644 --- a/doc/src/sgml/ref/alter_subscription.sgml +++ b/doc/src/sgml/ref/alter_subscription.sgml @@ -26,6 +26,7 @@ ALTER SUBSCRIPTION name SET PUBLICA ALTER SUBSCRIPTION name ADD PUBLICATION publication_name [, ...] [ WITH ( publication_option [= value] [, ... ] ) ] ALTER SUBSCRIPTION name DROP PUBLICATION publication_name [, ...] [ WITH ( publication_option [= value] [, ... ] ) ] ALTER SUBSCRIPTION name REFRESH PUBLICATION [ WITH ( refresh_option [= value] [, ... ] ) ] +ALTER SUBSCRIPTION name REFRESH PUBLICATION SEQUENCES ALTER SUBSCRIPTION name ENABLE ALTER SUBSCRIPTION name DISABLE ALTER SUBSCRIPTION name SET ( subscription_parameter [= value] [, ... ] ) @@ -67,6 +68,7 @@ ALTER SUBSCRIPTION name RENAME TO < Commands ALTER SUBSCRIPTION ... REFRESH PUBLICATION, + ALTER SUBSCRIPTION ... REFRESH PUBLICATION SEQUENCES, ALTER SUBSCRIPTION ... {SET|ADD|DROP} PUBLICATION ... with refresh option as true, ALTER SUBSCRIPTION ... SET (failover = true|false) and @@ -158,30 +160,51 @@ ALTER SUBSCRIPTION name RENAME TO < REFRESH PUBLICATION - Fetch missing table information from publisher. This will start + Fetch missing table information from the publisher. This will start replication of tables that were added to the subscribed-to publications since CREATE SUBSCRIPTION or the last invocation of REFRESH PUBLICATION. + + Also, fetch missing sequence information from the publisher. + + + + The system catalog pg_subscription_rel + is updated to record all tables and sequences known to the subscription, + that are still part of the publication. + + refresh_option specifies additional options for the - refresh operation. The supported options are: + refresh operation. The only supported option is: copy_data (boolean) - Specifies whether to copy pre-existing data in the publications - that are being subscribed to when the replication starts. - The default is true. + Specifies whether to copy pre-existing data for tables and synchronize + sequences in the publications that are being subscribed to when the replication + starts. The default is true. Previously subscribed tables are not copied, even if a table's row filter WHERE clause has since been modified. + + Previously subscribed sequences are not re-synchronized. To do that, + see + ALTER SUBSCRIPTION ... REFRESH PUBLICATION SEQUENCES + + + See for recommendations on how + to handle any warnings about sequence definition differences between + the publisher and the subscriber, which might occur when + copy_data = true. + See for details of how copy_data = true can interact with the @@ -200,6 +223,28 @@ ALTER SUBSCRIPTION name RENAME TO < + + REFRESH PUBLICATION SEQUENCES + + + Fetch missing sequence information from the publisher, then re-synchronize + sequence data with the publisher. Unlike + ALTER SUBSCRIPTION ... REFRESH PUBLICATION which + only synchronizes newly added sequences, REFRESH PUBLICATION SEQUENCES + will re-synchronize the sequence data for all subscribed sequences. + + + See for + recommendations on how to handle any warnings about sequence definition + differences between the publisher and the subscriber. + + + See for recommendations on how to + identify and handle out-of-sync sequences. + + + + ENABLE diff --git a/doc/src/sgml/ref/create_subscription.sgml b/doc/src/sgml/ref/create_subscription.sgml index 57dec28a5df..44308515bbb 100644 --- a/doc/src/sgml/ref/create_subscription.sgml +++ b/doc/src/sgml/ref/create_subscription.sgml @@ -263,6 +263,12 @@ CREATE SUBSCRIPTION subscription_namecopy_data = true can interact with the origin parameter. + + See + for recommendations on how to handle any warnings about sequence + definition differences between the publisher and the subscriber, + which might occur when copy_data = true. + diff --git a/doc/src/sgml/system-views.sgml b/doc/src/sgml/system-views.sgml index 737e7489b78..21edf725843 100644 --- a/doc/src/sgml/system-views.sgml +++ b/doc/src/sgml/system-views.sgml @@ -131,6 +131,11 @@ prepared transactions + + pg_publication_sequences + publications and information of their associated sequences + + pg_publication_tables publications and information of their associated tables @@ -2475,6 +2480,68 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx + + <structname>pg_publication_sequences</structname> + + + pg_publication_sequences + + + + The view pg_publication_sequences provides + information about the mapping between publications and information of + sequences they contain. + + +
+ <structname>pg_publication_sequences</structname> Columns + + + + + Column Type + + + Description + + + + + + + + pubname name + (references pg_publication.pubname) + + + Name of publication + + + + + + schemaname name + (references pg_namespace.nspname) + + + Name of schema containing sequence + + + + + + sequencename name + (references pg_class.relname) + + + Name of sequence + + + + +
+ + <structname>pg_publication_tables</structname> -- 2.43.0