From 1654e0fb1e02c1f4f08daf50968865ba0bcc830f Mon Sep 17 00:00:00 2001 From: Melih Mutlu Date: Mon, 8 Aug 2022 14:14:44 +0300 Subject: [PATCH v12] Allow logical replication to copy table in binary If binary option is enabled in a subscription, then copy tables in binary format during table synchronization. Without this patch, table are copied in text format even if the subscription is created with binary option enabled. This patch allows logical replication to perform in binary format starting from initial sync. When binary format is beneficial to use, allowing the subscription to copy tables in binary in table sync phase may reduce the time spent on copy depending on column types. Discussion: https://postgr.es/m/CAGPVpCQvAziCLknEnygY0v1-KBtg%2BOm-9JHJYZOnNPKFJPompw%40mail.gmail.com --- doc/src/sgml/logical-replication.sgml | 9 +- doc/src/sgml/ref/create_subscription.sgml | 16 +- src/backend/replication/logical/tablesync.c | 17 +- src/test/subscription/meson.build | 1 + src/test/subscription/t/032_binary_copy.pl | 193 ++++++++++++++++++++ 5 files changed, 226 insertions(+), 10 deletions(-) create mode 100644 src/test/subscription/t/032_binary_copy.pl diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml index 1bd5660c87..83caf40a05 100644 --- a/doc/src/sgml/logical-replication.sgml +++ b/doc/src/sgml/logical-replication.sgml @@ -241,10 +241,11 @@ types of the columns do not need to match, as long as the text representation of the data can be converted to the target type. For example, you can replicate from a column of type integer to a - column of type bigint. The target table can also have - additional columns not provided by the published table. Any such columns - will be filled with the default value as specified in the definition of the - target table. + column of type bigint. However, replication in binary format is + type specific and does not allow to replicate data between different types + according to its restrictions. The target table can also have additional + columns not provided by the published table. Any such columns will be filled + with the default value as specified in the definition of the target table. diff --git a/doc/src/sgml/ref/create_subscription.sgml b/doc/src/sgml/ref/create_subscription.sgml index 51c45f17c7..5be8848376 100644 --- a/doc/src/sgml/ref/create_subscription.sgml +++ b/doc/src/sgml/ref/create_subscription.sgml @@ -189,11 +189,17 @@ CREATE SUBSCRIPTION subscription_namebinary (boolean) - Specifies whether the subscription will request the publisher to - send the data in binary format (as opposed to text). - The default is false. - Even when this option is enabled, only data types having - binary send and receive functions will be transferred in binary. + Specifies whether the subscription will both copy the initial data to + synchronize relations and request the publisher to send the data in + binary format (as opposed to text). The default is false. + Binary format can be faster than the text format, but it is less portable + across machine architectures and PostgreSQL versions. Binary format is + also very data type specific, it will not allow copying between different + column types as opposed to text format. Even when this option is enabled, + only data types having binary send and receive functions will be + transferred in binary. Note that the initial synchronization requires + all data types to have binary send and receive functions, otherwise + the synchronization will fail. diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c index 07eea504ba..0a1ff28ae6 100644 --- a/src/backend/replication/logical/tablesync.c +++ b/src/backend/replication/logical/tablesync.c @@ -101,6 +101,7 @@ #include "catalog/pg_type.h" #include "commands/copy.h" #include "miscadmin.h" +#include "nodes/makefuncs.h" #include "parser/parse_relation.h" #include "pgstat.h" #include "replication/logicallauncher.h" @@ -1090,6 +1091,7 @@ copy_table(Relation rel) CopyFromState cstate; List *attnamelist; ParseState *pstate; + List *options = NIL; /* Get the publisher relation info. */ fetch_remote_table_info(get_namespace_name(RelationGetNamespace(rel)), @@ -1168,6 +1170,19 @@ copy_table(Relation rel) appendStringInfoString(&cmd, ") TO STDOUT"); } + + /* + * If the publisher is v14 or later, copy data in the required data format. + * If the publisher version is earlier, it doesn't support COPY with binary + * option. + */ + if (walrcv_server_version(LogRepWorkerWalRcvConn) >= 140000 && + MySubscription->binary) + { + appendStringInfo(&cmd, " WITH (FORMAT binary)"); + options = lappend(options, makeDefElem("format", (Node *) makeString("binary"), -1)); + } + res = walrcv_exec(LogRepWorkerWalRcvConn, cmd.data, 0, NULL); pfree(cmd.data); if (res->status != WALRCV_OK_COPY_OUT) @@ -1184,7 +1199,7 @@ copy_table(Relation rel) NULL, false, false); attnamelist = make_copy_attnamelist(relmapentry); - cstate = BeginCopyFrom(pstate, rel, NULL, NULL, false, copy_read_data, attnamelist, NIL); + cstate = BeginCopyFrom(pstate, rel, NULL, NULL, false, copy_read_data, attnamelist, options); /* Do the copy */ (void) CopyFrom(cstate); diff --git a/src/test/subscription/meson.build b/src/test/subscription/meson.build index 3db0fdfd96..f9ab6eb7e1 100644 --- a/src/test/subscription/meson.build +++ b/src/test/subscription/meson.build @@ -38,6 +38,7 @@ tests += { 't/029_on_error.pl', 't/030_origin.pl', 't/031_column_list.pl', + 't/032_binary_copy.pl', 't/100_bugs.pl', ], }, diff --git a/src/test/subscription/t/032_binary_copy.pl b/src/test/subscription/t/032_binary_copy.pl new file mode 100644 index 0000000000..656cabc0fe --- /dev/null +++ b/src/test/subscription/t/032_binary_copy.pl @@ -0,0 +1,193 @@ + +# Copyright (c) 2023, PostgreSQL Global Development Group + +# Test the binary copy for logical replication +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Create and initialize a publisher node +my $node_publisher = PostgreSQL::Test::Cluster->new('publisher'); +$node_publisher->init(allows_streaming => 'logical'); +$node_publisher->start; + +# Create and initialize a subscriber node +my $node_subscriber = PostgreSQL::Test::Cluster->new('subscriber'); +$node_subscriber->init(allows_streaming => 'logical'); +$node_subscriber->start; + +# Create tables on both sides of the replication +my $ddl = qq( + CREATE TABLE public.test_numerical ( + a INTEGER PRIMARY KEY, + b NUMERIC, + c FLOAT, + d BIGINT + ); + CREATE TABLE public.test_arrays ( + a INTEGER[] PRIMARY KEY, + b NUMERIC[], + c TEXT[] + ); + CREATE TABLE public.test_range_array ( + a INTEGER PRIMARY KEY, + b TSTZRANGE, + c int8range[] + ); + CREATE TYPE public.test_comp_basic_t AS (a FLOAT, b TEXT, c INTEGER); + CREATE TABLE public.test_one_comp ( + a INTEGER PRIMARY KEY, + b public.test_comp_basic_t + );); + +$node_publisher->safe_psql('postgres', $ddl); +$node_subscriber->safe_psql('postgres', $ddl); + +# Publish all tables +$node_publisher->safe_psql('postgres', + "CREATE PUBLICATION tpub FOR ALL TABLES"); + +# Insert some content before creating a subscription +$node_publisher->safe_psql( + 'postgres', qq( + INSERT INTO public.test_numerical (a, b, c, d) VALUES + (1, 1.2, 1.3, 10), + (2, 2.2, 2.3, 20); + INSERT INTO public.test_arrays (a, b, c) VALUES + ('{1,2,3}', '{1.1, 1.2, 1.3}', '{"one", "two", "three"}'), + ('{3,1,2}', '{1.3, 1.1, 1.2}', '{"three", "one", "two"}'); + INSERT INTO test_range_array (a, b, c) VALUES + (1, tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz, 'infinity'), '{"[1,2]", "[10,20]"}'), + (2, tstzrange('Sat Aug 02 00:00:00 2014 CEST'::timestamptz, 'Mon Aug 04 00:00:00 2014 CEST'::timestamptz), '{"[2,3]", "[20,30]"}'); + INSERT INTO test_one_comp (a, b) VALUES + (1, ROW(1.0, 'a', 1)), + (2, ROW(2.0, 'b', 2)); + )); + +# Create the subscription with binary = true +my $publisher_connstring = $node_publisher->connstr . ' dbname=postgres'; +$node_subscriber->safe_psql('postgres', + "CREATE SUBSCRIPTION tsub CONNECTION '$publisher_connstring' " + . "PUBLICATION tpub WITH (slot_name = tpub_slot, binary = true)"); + +# Ensure the COPY command is executed in binary format on the publisher +$node_publisher->wait_for_log(qr/LOG: ( [a-z0-9]+:)? COPY (.+)? TO STDOUT WITH \(FORMAT binary\)/); + +# Ensure nodes are in sync with each other +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tsub'); + +my $sync_check = qq( + SET timezone = '+2'; + SELECT a, b, c, d FROM test_numerical ORDER BY a; + SELECT a, b, c FROM test_arrays ORDER BY a; + SELECT a, b, c FROM test_range_array ORDER BY a; + SELECT a, b FROM test_one_comp ORDER BY a; +); + +# Check the synced data on subscribers +my $result = $node_subscriber->safe_psql('postgres', $sync_check); + +is( $result, '1|1.2|1.3|10 +2|2.2|2.3|20 +{1,2,3}|{1.1,1.2,1.3}|{one,two,three} +{3,1,2}|{1.3,1.1,1.2}|{three,one,two} +1|["2014-08-04 00:00:00+02",infinity)|{"[1,3)","[10,21)"} +2|["2014-08-02 00:00:00+02","2014-08-04 00:00:00+02")|{"[2,4)","[20,31)"} +1|(1,a,1) +2|(2,b,2)', 'check synced data on subscriber'); + +# Create a custom type without send/rcv functions +$ddl = qq( + CREATE TYPE myvarchar; + CREATE FUNCTION myvarcharin(cstring, oid, integer) RETURNS myvarchar + LANGUAGE internal IMMUTABLE PARALLEL SAFE STRICT AS 'varcharin'; + CREATE FUNCTION myvarcharout(myvarchar) RETURNS cstring + LANGUAGE internal IMMUTABLE PARALLEL SAFE STRICT AS 'varcharout'; + CREATE TYPE myvarchar ( + input = myvarcharin, + output = myvarcharout); + CREATE TABLE public.test_myvarchar ( + a myvarchar + );); + +$node_publisher->safe_psql('postgres', $ddl); +$node_subscriber->safe_psql('postgres', $ddl); + +# Insert some initial data +$node_publisher->safe_psql( + 'postgres', qq( + INSERT INTO public.test_myvarchar (a) VALUES + ('a'); + )); + +# Refresh the publication to trigger the tablesync +$node_subscriber->safe_psql('postgres', "ALTER SUBSCRIPTION tsub REFRESH PUBLICATION"); + +# It should fail +$node_subscriber->wait_for_log(qr/ERROR: ( [A-Z0-9]+:)? no binary input function available for type/); + +# Create and set send/rcv functions for the custom type +$ddl = qq( + CREATE FUNCTION myvarcharsend(myvarchar) RETURNS bytea + LANGUAGE internal STABLE PARALLEL SAFE STRICT AS 'varcharsend'; + CREATE FUNCTION myvarcharrecv(internal, oid, integer) RETURNS myvarchar + LANGUAGE internal STABLE PARALLEL SAFE STRICT AS 'varcharrecv'; + ALTER TYPE myvarchar SET ( + send = myvarcharsend, + receive = myvarcharrecv + );); + +$node_publisher->safe_psql('postgres', $ddl); +$node_subscriber->safe_psql('postgres', $ddl); + +# Now tablesync should succeed +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tsub'); + +# Check the synced data on subscriber +$result = $node_subscriber->safe_psql('postgres', 'SELECT a FROM test_myvarchar;'); + +is( $result, 'a', 'check synced data on subscriber with custom type'); + +# Test syncing tables with mismatching column types +$node_publisher->safe_psql( + 'postgres', qq( + CREATE TABLE public.test_mismatching_types ( + a bigint PRIMARY KEY + ); + INSERT INTO public.test_mismatching_types (a) + VALUES (1), (2); + )); + +$node_subscriber->safe_psql( + 'postgres', qq( + CREATE TABLE public.test_mismatching_types ( + a int PRIMARY KEY + ); + ALTER SUBSCRIPTION tsub REFRESH PUBLICATION; + )); + +# Cannot sync due to type mismatch +$node_subscriber->wait_for_log(qr/ERROR: ( [A-Z0-9]+:)? incorrect binary data format/); + +# Setting binary to false should allow syncing +$node_subscriber->safe_psql( + 'postgres', qq( + ALTER SUBSCRIPTION tsub SET (binary = false);)); + +# Ensure the COPY command is executed in text format on the publisher +$node_publisher->wait_for_log(qr/LOG: ( [a-z0-9]+:)? COPY (.+)? TO STDOUT\n/); + +$node_subscriber->wait_for_subscription_sync($node_publisher, 'tsub'); + +# Check the synced data on subscribers +$result = $node_subscriber->safe_psql('postgres', 'SELECT a FROM test_mismatching_types ORDER BY a;'); + +is( $result, '1 +2', 'check synced data on subscriber with binary = false'); + +$node_subscriber->stop('fast'); +$node_publisher->stop('fast'); + +done_testing(); -- 2.25.1