From c762de3139baa3142f8385c9dff361e78ac742d6 Mon Sep 17 00:00:00 2001 From: Melih Mutlu Date: Mon, 8 Aug 2022 14:14:44 +0300 Subject: [PATCH v15] Allow logical replication to copy table in binary If binary option is enabled in a subscription, then copy tables in binary format during table synchronization. Without this patch, tables are copied in text format even if the subscription is created with binary option enabled. This patch allows logical replication to perform in binary format starting from initial sync. Copying tables in binary format may reduce the time spent depending on column types. Discussion: https://postgr.es/m/CAGPVpCQvAziCLknEnygY0v1-KBtg%2BOm-9JHJYZOnNPKFJPompw%40mail.gmail.com --- doc/src/sgml/logical-replication.sgml | 5 +- doc/src/sgml/ref/alter_subscription.sgml | 4 + doc/src/sgml/ref/create_subscription.sgml | 25 ++- src/backend/replication/logical/tablesync.c | 15 +- src/test/subscription/t/014_binary.pl | 170 ++++++++++++++++++-- 5 files changed, 202 insertions(+), 17 deletions(-) diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml index 6b0e300adc..fa2d9383e8 100644 --- a/doc/src/sgml/logical-replication.sgml +++ b/doc/src/sgml/logical-replication.sgml @@ -251,7 +251,10 @@ column of type bigint. The target table can also have additional columns not provided by the published table. Any such columns will be filled with the default value as specified in the definition of the - target table. + target table. However, logical replication in binary format is more restrictive, + see binary option of + CREATE SUBSCRIPTION + for more details. diff --git a/doc/src/sgml/ref/alter_subscription.sgml b/doc/src/sgml/ref/alter_subscription.sgml index 964fcbb8ff..507ff5127f 100644 --- a/doc/src/sgml/ref/alter_subscription.sgml +++ b/doc/src/sgml/ref/alter_subscription.sgml @@ -177,6 +177,10 @@ ALTER SUBSCRIPTION name RENAME TO < how copy_data = true can interact with the origin parameter. + + See binary option of + for details of copying pre-existing data in binary format. + diff --git a/doc/src/sgml/ref/create_subscription.sgml b/doc/src/sgml/ref/create_subscription.sgml index 51c45f17c7..c9c3f2aeac 100644 --- a/doc/src/sgml/ref/create_subscription.sgml +++ b/doc/src/sgml/ref/create_subscription.sgml @@ -189,11 +189,20 @@ CREATE SUBSCRIPTION subscription_namebinary (boolean) - Specifies whether the subscription will request the publisher to - send the data in binary format (as opposed to text). - The default is false. - Even when this option is enabled, only data types having - binary send and receive functions will be transferred in binary. + Specifies whether the subscription will request the publisher to send + the data in binary format (as opposed to text). The default is + false. Any initial table synchronization copy + (see copy_data) also uses the same format. Binary + format can be faster than the text format, but it is less portable + across machine architectures and PostgreSQL versions. Binary format + is very data type specific; for example, it will not allow copying + from a smallint column to an integer column, even though that would + work fine in text format. Even when this option is enabled, only data + types having binary send and receive functions will be transferred in + binary. Note that the initial synchronization requires all data types + to have binary send and receive functions, otherwise the synchronization + will fail (see for more about + send/receive functions). @@ -203,6 +212,12 @@ CREATE SUBSCRIPTION subscription_namebinary option cannot be used. + + + If the publisher is a PostgreSQL version + before 14, then any initial table synchronization will use text format + even if binary = true. + diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c index 07eea504ba..3a6a01a428 100644 --- a/src/backend/replication/logical/tablesync.c +++ b/src/backend/replication/logical/tablesync.c @@ -101,6 +101,7 @@ #include "catalog/pg_type.h" #include "commands/copy.h" #include "miscadmin.h" +#include "nodes/makefuncs.h" #include "parser/parse_relation.h" #include "pgstat.h" #include "replication/logicallauncher.h" @@ -1090,6 +1091,7 @@ copy_table(Relation rel) CopyFromState cstate; List *attnamelist; ParseState *pstate; + List *options = NIL; /* Get the publisher relation info. */ fetch_remote_table_info(get_namespace_name(RelationGetNamespace(rel)), @@ -1168,6 +1170,17 @@ copy_table(Relation rel) appendStringInfoString(&cmd, ") TO STDOUT"); } + + /* + * The binary option for replication is supported since v14 + */ + if (walrcv_server_version(LogRepWorkerWalRcvConn) >= 140000 && + MySubscription->binary) + { + appendStringInfo(&cmd, " WITH (FORMAT binary)"); + options = lappend(options, makeDefElem("format", (Node *) makeString("binary"), -1)); + } + res = walrcv_exec(LogRepWorkerWalRcvConn, cmd.data, 0, NULL); pfree(cmd.data); if (res->status != WALRCV_OK_COPY_OUT) @@ -1184,7 +1197,7 @@ copy_table(Relation rel) NULL, false, false); attnamelist = make_copy_attnamelist(relmapentry); - cstate = BeginCopyFrom(pstate, rel, NULL, NULL, false, copy_read_data, attnamelist, NIL); + cstate = BeginCopyFrom(pstate, rel, NULL, NULL, false, copy_read_data, attnamelist, options); /* Do the copy */ (void) CopyFrom(cstate); diff --git a/src/test/subscription/t/014_binary.pl b/src/test/subscription/t/014_binary.pl index e53e23da3e..87d6d117b3 100644 --- a/src/test/subscription/t/014_binary.pl +++ b/src/test/subscription/t/014_binary.pl @@ -40,35 +40,62 @@ $node_subscriber->safe_psql('postgres', $ddl); $node_publisher->safe_psql('postgres', "CREATE PUBLICATION tpub FOR ALL TABLES"); +# Insert some content before creating a subscription +$node_publisher->safe_psql( + 'postgres', qq( + INSERT INTO public.test_numerical (a, b, c, d) VALUES + (1, 1.2, 1.3, 10), + (2, 2.2, 2.3, 20); + INSERT INTO public.test_arrays (a, b, c) VALUES + ('{1,2,3}', '{1.1, 1.2, 1.3}', '{"one", "two", "three"}'), + ('{3,1,2}', '{1.3, 1.1, 1.2}', '{"three", "one", "two"}'); + )); + my $publisher_connstring = $node_publisher->connstr . ' dbname=postgres'; $node_subscriber->safe_psql('postgres', "CREATE SUBSCRIPTION tsub CONNECTION '$publisher_connstring' " . "PUBLICATION tpub WITH (slot_name = tpub_slot, binary = true)"); +# Ensure the COPY command is executed in binary format on the publisher +$node_publisher->wait_for_log(qr/LOG: ( [a-z0-9]+:)? COPY (.+)? TO STDOUT WITH \(FORMAT binary\)/); + # Ensure nodes are in sync with each other $node_subscriber->wait_for_subscription_sync($node_publisher, 'tsub'); +my $sync_check = qq( + SELECT a, b, c, d FROM test_numerical ORDER BY a; + SELECT a, b, c FROM test_arrays ORDER BY a; +); + +# Check the synced data on the subscriber +my $result = $node_subscriber->safe_psql('postgres', $sync_check); + +is( $result, '1|1.2|1.3|10 +2|2.2|2.3|20 +{1,2,3}|{1.1,1.2,1.3}|{one,two,three} +{3,1,2}|{1.3,1.1,1.2}|{three,one,two}', 'check synced data on subscriber'); + # Insert some content and make sure it's replicated across $node_publisher->safe_psql( 'postgres', qq( INSERT INTO public.test_arrays (a, b, c) VALUES - ('{1,2,3}', '{1.1, 1.2, 1.3}', '{"one", "two", "three"}'), - ('{3,1,2}', '{1.3, 1.1, 1.2}', '{"three", "one", "two"}'); + ('{2,1,3}', '{1.2, 1.1, 1.3}', '{"two", "one", "three"}'), + ('{1,3,2}', '{1.1, 1.3, 1.2}', '{"one", "three", "two"}'); INSERT INTO public.test_numerical (a, b, c, d) VALUES - (1, 1.2, 1.3, 10), - (2, 2.2, 2.3, 20), - (3, 3.2, 3.3, 30); + (3, 3.2, 3.3, 30), + (4, 4.2, 4.3, 40); )); $node_publisher->wait_for_catchup('tsub'); -my $result = $node_subscriber->safe_psql('postgres', +$result = $node_subscriber->safe_psql('postgres', "SELECT a, b, c, d FROM test_numerical ORDER BY a"); is( $result, '1|1.2|1.3|10 2|2.2|2.3|20 -3|3.2|3.3|30', 'check replicated data on subscriber'); +3|3.2|3.3|30 +4|4.2|4.3|40', 'check replicated data on subscriber'); # Test updates as well $node_publisher->safe_psql( @@ -83,6 +110,8 @@ $result = $node_subscriber->safe_psql('postgres', "SELECT a, b, c FROM test_arrays ORDER BY a"); is( $result, '{1,2,3}|{42,1.2,1.3}| +{1,3,2}|{42,1.3,1.2}| +{2,1,3}|{42,1.1,1.3}| {3,1,2}|{42,1.1,1.2}|', 'check updated replicated data on subscriber'); $result = $node_subscriber->safe_psql('postgres', @@ -90,7 +119,8 @@ $result = $node_subscriber->safe_psql('postgres', is( $result, '1|42||10 2|42||20 -3|42||30', 'check updated replicated data on subscriber'); +3|42||30 +4|42||40', 'check updated replicated data on subscriber'); # Test to reset back to text formatting, and then to binary again $node_subscriber->safe_psql('postgres', @@ -99,7 +129,7 @@ $node_subscriber->safe_psql('postgres', $node_publisher->safe_psql( 'postgres', qq( INSERT INTO public.test_numerical (a, b, c, d) VALUES - (4, 4.2, 4.3, 40); + (5, 5.2, 5.3, 50); )); $node_publisher->wait_for_catchup('tsub'); @@ -110,7 +140,8 @@ $result = $node_subscriber->safe_psql('postgres', is( $result, '1|42||10 2|42||20 3|42||30 -4|4.2|4.3|40', 'check replicated data on subscriber'); +4|42||40 +5|5.2|5.3|50', 'check replicated data on subscriber'); $node_subscriber->safe_psql('postgres', "ALTER SUBSCRIPTION tsub SET (binary = true);"); @@ -127,9 +158,128 @@ $result = $node_subscriber->safe_psql('postgres', "SELECT a, b, c FROM test_arrays ORDER BY a"); is( $result, '{1,2,3}|{42,1.2,1.3}| +{1,3,2}|{42,1.3,1.2}| +{2,1,3}|{42,1.1,1.3}| {2,3,1}|{1.2,1.3,1.1}|{two,three,one} {3,1,2}|{42,1.1,1.2}|', 'check replicated data on subscriber'); +# Create a custom type without send/rcv functions +$ddl = qq( + CREATE TYPE myvarchar; + CREATE FUNCTION myvarcharin(cstring, oid, integer) RETURNS myvarchar + LANGUAGE internal IMMUTABLE PARALLEL SAFE STRICT AS 'varcharin'; + CREATE FUNCTION myvarcharout(myvarchar) RETURNS cstring + LANGUAGE internal IMMUTABLE PARALLEL SAFE STRICT AS 'varcharout'; + CREATE TYPE myvarchar ( + input = myvarcharin, + output = myvarcharout); + CREATE TABLE public.test_myvarchar ( + a myvarchar + );); + +$node_publisher->safe_psql('postgres', $ddl); +$node_subscriber->safe_psql('postgres', $ddl); + +# Insert some initial data +$node_publisher->safe_psql( + 'postgres', qq( + INSERT INTO public.test_myvarchar (a) VALUES + ('a'); + )); + +# Refresh the publication to trigger the tablesync +$node_subscriber->safe_psql('postgres', "ALTER SUBSCRIPTION tsub REFRESH PUBLICATION"); + +# It should fail +$node_subscriber->wait_for_log(qr/ERROR: ( [A-Z0-9]+:)? no binary input function available for type/); + +# Create and set send/rcv functions for the custom type +$ddl = qq( + CREATE FUNCTION myvarcharsend(myvarchar) RETURNS bytea + LANGUAGE internal STABLE PARALLEL SAFE STRICT AS 'varcharsend'; + CREATE FUNCTION myvarcharrecv(internal, oid, integer) RETURNS myvarchar + LANGUAGE internal STABLE PARALLEL SAFE STRICT AS 'varcharrecv'; + ALTER TYPE myvarchar SET ( + send = myvarcharsend, + receive = myvarcharrecv + );); + +$node_publisher->safe_psql('postgres', $ddl); +$node_subscriber->safe_psql('postgres', $ddl); + +# Now tablesync should succeed +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tsub'); + +# Check the synced data on the subscriber +$result = $node_subscriber->safe_psql('postgres', 'SELECT a FROM test_myvarchar;'); + +is( $result, 'a', 'check synced data on subscriber with custom type'); + +# Test syncing tables with different column order +$node_publisher->safe_psql( + 'postgres', qq( + CREATE TABLE public.test_col_order ( + a bigint, b int + ); + INSERT INTO public.test_col_order (a,b) + VALUES (1,2),(3,4); + )); + +$node_subscriber->safe_psql( + 'postgres', qq( + CREATE TABLE public.test_col_order ( + b int, a bigint + ); + ALTER SUBSCRIPTION tsub REFRESH PUBLICATION; + )); + +# Ensure nodes are in sync with each other +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tsub'); + +# Check the synced data on the subscriber +$result = $node_subscriber->safe_psql('postgres', + 'SELECT a,b FROM public.test_col_order ORDER BY a;'); + +is( $result, '1|2 +3|4', 'check synced data on subscriber for different column order'); + +# Test syncing tables with mismatching column types +$node_publisher->safe_psql( + 'postgres', qq( + CREATE TABLE public.test_mismatching_types ( + a bigint PRIMARY KEY + ); + INSERT INTO public.test_mismatching_types (a) + VALUES (1), (2); + )); + +$node_subscriber->safe_psql( + 'postgres', qq( + CREATE TABLE public.test_mismatching_types ( + a int PRIMARY KEY + ); + ALTER SUBSCRIPTION tsub REFRESH PUBLICATION; + )); + +# Cannot sync due to type mismatch +$node_subscriber->wait_for_log(qr/ERROR: ( [A-Z0-9]+:)? incorrect binary data format/); + +# Setting binary to false should allow syncing +$node_subscriber->safe_psql( + 'postgres', qq( + ALTER SUBSCRIPTION tsub SET (binary = false);)); + +# Ensure the COPY command is executed in text format on the publisher +$node_publisher->wait_for_log(qr/LOG: ( [a-z0-9]+:)? COPY (.+)? TO STDOUT\n/); + +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tsub'); + +# Check the synced data on the subscriber +$result = $node_subscriber->safe_psql('postgres', 'SELECT a FROM test_mismatching_types ORDER BY a;'); + +is( $result, '1 +2', 'check synced data on subscriber with binary = false'); + $node_subscriber->stop('fast'); $node_publisher->stop('fast'); -- 2.25.1